otterly 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,246 +1,177 @@
1
1
  # otterly
2
2
 
3
- Drop a Claude Code agent into your app in one line. Like Ollama, but instead of running a model for inference, you get a full coding agent that reads, writes, and runs code on your local machine.
3
+ Local AI inference for your apps. Use Claude Code instead of paying for API tokens.
4
4
 
5
5
  ```bash
6
6
  npm install otterly @anthropic-ai/claude-code
7
7
  ```
8
8
 
9
+ ## What is this?
10
+
11
+ You have Claude Code on your machine. You're already paying for it (Max subscription or org access). Otterly lets your apps — backends, Electron apps, scripts, local tools — call Claude Code directly instead of making API calls and paying per token.
12
+
13
+ **Instead of this:**
9
14
  ```typescript
10
- import { claude } from 'otterly';
15
+ import OpenAI from "openai";
16
+ const client = new OpenAI({ apiKey: "sk-..." }); // costs per token
17
+ ```
11
18
 
12
- const result = await claude.run("Add error handling to server.ts", {
13
- cwd: "./my-project",
14
- });
19
+ **Do this:**
20
+ ```typescript
21
+ import { claude } from "otterly";
22
+ const result = await claude.run("Fix the bug in server.ts", { cwd: "./my-app" });
23
+ // uses your local Claude Code — no API key, no per-token cost
24
+ ```
25
+
26
+ Or run it as a server and point any OpenAI-compatible client at it:
15
27
 
16
- console.log(result.text); // "I've added try-catch blocks to all route handlers..."
17
- console.log(result.cost); // 0.03
28
+ ```bash
29
+ npx otterly serve
30
+ # now http://localhost:11434 speaks the OpenAI protocol
18
31
  ```
19
32
 
20
33
  ## Requirements
21
34
 
22
35
  - Node.js 18+
23
36
  - Claude Code installed and authenticated (`claude login`)
24
- - `@anthropic-ai/claude-code` installed as a peer dependency
25
37
 
26
- ## Usage
38
+ ## Two ways to use it
27
39
 
28
- ### One-shot
29
-
30
- Run a task, get the result. Simplest way to use it.
40
+ ### 1. As a library (import into your app)
31
41
 
32
42
  ```typescript
33
- import { claude } from 'otterly';
43
+ import { claude } from "otterly";
34
44
 
35
- const result = await claude.run("Fix the login bug in auth.ts", {
45
+ // Simple: run a prompt, get a result
46
+ const result = await claude.run("Add input validation to user.ts", {
36
47
  cwd: "./my-project",
37
48
  });
49
+ console.log(result.text);
38
50
 
39
- console.log(result.text); // Final output text
40
- console.log(result.cost); // Cost in USD
41
- console.log(result.duration); // Duration in ms
42
- console.log(result.sessionId); // Save this to resume later
43
- console.log(result.tools); // Every tool that was used
44
- ```
45
-
46
- ### Streaming
47
-
48
- Get real-time events as Claude works.
49
-
50
- ```typescript
51
- import { claude } from 'otterly';
52
-
53
- for await (const event of claude.stream("Refactor the auth module", { cwd: "." })) {
54
- switch (event.type) {
55
- case "text_delta":
56
- process.stdout.write(event.delta);
57
- break;
58
- case "tool_use":
59
- console.log(`\n> ${event.description}`);
60
- break;
61
- case "tool_result":
62
- if (event.isError) console.error(`Tool error: ${event.output}`);
63
- break;
64
- case "result":
65
- console.log(`\nDone! Cost: $${event.cost}`);
66
- break;
67
- }
51
+ // Streaming: get tokens as they arrive
52
+ for await (const event of claude.stream("Refactor auth", { cwd: "." })) {
53
+ if (event.type === "text_delta") process.stdout.write(event.delta);
68
54
  }
69
- ```
70
-
71
- ### Multi-turn Sessions
72
-
73
- Keep conversation context alive across multiple messages.
74
-
75
- ```typescript
76
- import { claude } from 'otterly';
77
55
 
56
+ // Multi-turn: context persists between messages
78
57
  const session = claude.session({ cwd: "./my-project" });
79
-
80
- const r1 = await session.send("Create a REST API for users");
81
- console.log(r1.text);
82
-
83
- const r2 = await session.send("Now add authentication to it");
84
- console.log(r2.text);
85
-
86
- const r3 = await session.send("Write tests for the auth middleware");
87
- console.log(r3.text);
88
-
89
- // Save the session ID to resume later
90
- console.log(session.id);
91
-
58
+ await session.send("Create a REST API");
59
+ await session.send("Now add auth to it"); // remembers the API it just built
92
60
  session.close();
93
61
  ```
94
62
 
95
- Resume a previous session:
96
-
97
- ```typescript
98
- const session = claude.session({
99
- cwd: "./my-project",
100
- resume: "previous-session-id",
101
- });
63
+ ### 2. As a server (any app can call it over HTTP)
102
64
 
103
- await session.send("What did we work on last time?");
65
+ ```bash
66
+ npx otterly serve --port 11434
104
67
  ```
105
68
 
106
- ### Custom Permissions
107
-
108
- By default, otterly runs in autopilot mode (no permission prompts). You can control what Claude is allowed to do.
69
+ Now any app that speaks OpenAI protocol works out of the box:
109
70
 
110
71
  ```typescript
111
- import { claude, READONLY } from 'otterly';
72
+ import OpenAI from "openai";
112
73
 
113
- // Read-only: Claude can read files but can't modify anything
114
- const analysis = await claude.run("Analyze the codebase architecture", {
115
- cwd: ".",
116
- onPermission: READONLY,
74
+ const ai = new OpenAI({
75
+ baseURL: "http://localhost:11434/v1",
76
+ apiKey: "not-needed",
117
77
  });
118
78
 
119
- // Custom: fine-grained control
120
- const result = await claude.run("Deploy to staging", {
121
- cwd: ".",
122
- onPermission: ({ tool, input }) => {
123
- // Allow file reads and edits
124
- if (["Read", "Edit", "Write", "Glob", "Grep"].includes(tool)) {
125
- return { allow: true };
126
- }
127
- // Allow specific commands only
128
- if (tool === "Bash" && input.command?.includes("npm run deploy")) {
129
- return { allow: true };
130
- }
131
- // Deny everything else
132
- return { allow: false, message: `${tool} not allowed in this context` };
133
- },
79
+ const response = await ai.chat.completions.create({
80
+ model: "claude-sonnet-4-20250514",
81
+ messages: [{ role: "user", content: "Fix the failing tests" }],
134
82
  });
135
83
  ```
136
84
 
137
- ### Custom Engine Instance
85
+ This works with any OpenAI SDK (Python, Go, Rust, whatever), Cursor, Continue, or any tool that lets you set a custom base URL.
138
86
 
139
- Set defaults for all calls.
87
+ ## Server endpoints
140
88
 
141
- ```typescript
142
- import { ClaudeEngine } from 'otterly';
89
+ | Endpoint | Protocol | Use case |
90
+ |----------|----------|----------|
91
+ | `POST /v1/chat/completions` | OpenAI-compatible | Drop-in for any OpenAI client |
92
+ | `POST /api/run` | Native JSON | One-shot, returns full result with cost/tools |
93
+ | `POST /api/stream` | Native NDJSON | Streaming with rich events |
94
+ | `WS /ws` | WebSocket | Multi-turn sessions for chat UIs |
95
+ | `GET /api/status` | JSON | Health check, queue depth, circuit state |
143
96
 
144
- const engine = new ClaudeEngine({
145
- cwd: "./my-project",
146
- model: "claude-sonnet-4-20250514",
147
- maxTurns: 10,
148
- });
97
+ ## Server options
149
98
 
150
- // All calls inherit the defaults
151
- const r1 = await engine.run("Fix lint errors");
152
- const r2 = await engine.run("Add missing types");
99
+ ```bash
100
+ npx otterly serve \
101
+ --port 11434 \
102
+ --dir ./my-project \
103
+ --max-concurrent 3 \
104
+ --max-queue 20 \
105
+ --rate-limit 30
153
106
  ```
154
107
 
155
- ### Abort / Timeout
108
+ | Flag | Default | What it does |
109
+ |------|---------|--------------|
110
+ | `--port` | 11434 | Port to listen on |
111
+ | `--dir` | cwd | Working directory for Claude |
112
+ | `--max-concurrent` | 5 | Max simultaneous Claude processes |
113
+ | `--max-queue` | 50 | Max waiting requests before rejecting |
114
+ | `--rate-limit` | 60 | Requests per minute per client |
156
115
 
157
- ```typescript
158
- const controller = new AbortController();
159
- setTimeout(() => controller.abort(), 30_000); // 30s timeout
116
+ Set `OTTERLY_API_KEY` to require Bearer auth on all requests.
160
117
 
161
- const result = await claude.run("Refactor the entire test suite", {
162
- cwd: ".",
163
- signal: controller.signal,
164
- });
165
- ```
118
+ ## Features
166
119
 
167
- ## Event Types
120
+ **Production safety** — concurrency limits, rate limiting, request timeouts, circuit breaker (stops calling a down API), graceful shutdown on SIGTERM, structured JSON logging with request IDs.
168
121
 
169
- When using `stream()` or `session.sendStream()`, you receive these events:
122
+ **OpenAI compatibility** — streaming (SSE), `response_format: { type: "json_object" }`, `tools` parameter for filtering which tools Claude can use.
170
123
 
171
- | Event | Fields | Description |
172
- |-------|--------|-------------|
173
- | `text` | `text` | Complete text from an assistant message block |
174
- | `text_delta` | `delta` | Streaming text chunk (arrives in real-time) |
175
- | `tool_use` | `id`, `tool`, `input`, `description` | Claude is using a tool |
176
- | `tool_result` | `toolUseId`, `tool`, `output`, `isError` | Tool execution result |
177
- | `system` | `sessionId`, `model`, `cwd`, `tools` | Session initialized |
178
- | `result` | `text`, `cost`, `duration`, `sessionId`, `usage` | Turn complete |
179
- | `error` | `error` | Something went wrong |
124
+ **Session reuse** pass `X-Session-Id` header on HTTP requests to continue a conversation across multiple calls.
180
125
 
181
- ## Error Handling
126
+ **WebSocket heartbeats** — dead connections detected and cleaned up within 30s.
182
127
 
183
- Errors are classified with a `code` field for programmatic handling:
128
+ ## Library API
184
129
 
185
130
  ```typescript
186
- import { claude, AgentError } from 'otterly';
187
-
188
- try {
189
- await claude.run("Do something");
190
- } catch (err) {
191
- if (err instanceof AgentError) {
192
- switch (err.code) {
193
- case "NOT_AUTHENTICATED":
194
- console.log("Run `claude login` to authenticate");
195
- break;
196
- case "RATE_LIMITED":
197
- console.log("Wait and retry");
198
- break;
199
- case "SDK_NOT_FOUND":
200
- console.log("npm install @anthropic-ai/claude-code");
201
- break;
202
- case "BILLING":
203
- console.log("Check your Anthropic account");
204
- break;
205
- case "NETWORK":
206
- console.log("Check your internet connection");
207
- break;
208
- case "ABORTED":
209
- console.log("Operation was cancelled");
210
- break;
211
- }
212
- }
213
- }
131
+ import { claude, ClaudeEngine, READONLY } from "otterly";
132
+
133
+ // One-shot
134
+ const result = await claude.run(prompt, options);
135
+ // result: { text, cost, duration, sessionId, usage, tools }
136
+
137
+ // Stream
138
+ for await (const event of claude.stream(prompt, options)) { ... }
139
+ // events: text_delta, tool_use, tool_result, result, error
140
+
141
+ // Session
142
+ const session = claude.session(options);
143
+ await session.send(message);
144
+ session.close();
145
+
146
+ // Custom engine with defaults
147
+ const engine = new ClaudeEngine({ model: "claude-sonnet-4-20250514", maxTurns: 10 });
148
+
149
+ // Read-only mode (no file writes, no commands)
150
+ await claude.run("Analyze this code", { onPermission: READONLY });
214
151
  ```
215
152
 
216
- ## Options
153
+ ### Options
217
154
 
218
155
  ```typescript
219
- interface EngineOptions {
220
- cwd?: string; // Working directory (default: process.cwd())
221
- model?: string; // Model to use
222
- permissionMode?: PermissionMode; // "default" | "acceptEdits" | "bypassPermissions" | "plan"
223
- systemPrompt?: string; // Custom system prompt
224
- maxTurns?: number; // Max agent turns
225
- allowedTools?: string[]; // Tool whitelist
226
- disallowedTools?: string[]; // Tool blacklist
227
- mcpServers?: Record<string, any>;// MCP server configs
228
- signal?: AbortSignal; // Cancellation signal
229
- onPermission?: PermissionHandler;// Custom permission handler
230
- resume?: string; // Session ID to resume
231
- effort?: "low" | "medium" | "high"; // Reasoning effort
156
+ {
157
+ cwd?: string; // working directory
158
+ model?: string; // model name
159
+ systemPrompt?: string; // custom system prompt
160
+ maxTurns?: number; // max agent turns
161
+ allowedTools?: string[]; // tool whitelist
162
+ disallowedTools?: string[];// tool blacklist
163
+ signal?: AbortSignal; // cancellation
164
+ onPermission?: handler; // custom permission logic
165
+ resume?: string; // resume a previous session
166
+ effort?: "low" | "medium" | "high";
232
167
  }
233
168
  ```
234
169
 
235
- ## How It Works
236
-
237
- otterly wraps the `@anthropic-ai/claude-code` SDK's `query()` function. It piggybacks on your existing Claude Code installation — if you've run `claude login`, you're already authenticated. No API keys to manage.
170
+ ## How it works
238
171
 
239
- 1. **`run()`** calls `query()` with your prompt, collects all events, returns the final result
240
- 2. **`stream()`** calls `query()` and yields normalized events as they arrive
241
- 3. **`session()`** uses the SDK's streaming input mode — an async generator that yields user messages on demand, keeping conversation context alive across turns in a single long-lived `query()` call
172
+ Otterly wraps the `@anthropic-ai/claude-code` SDK. Each call spawns a Claude Code subprocess — the same thing that runs in your terminal. It uses your existing `claude login` auth. No separate API keys.
242
173
 
243
- No API keys. No server. No HTTP. No WebSocket. The SDK runs in-process using your local Claude Code auth.
174
+ The server adds production concerns on top: concurrency control (so you don't fork-bomb your machine), rate limiting, circuit breaking, timeouts, and format translation so OpenAI clients can talk to it.
244
175
 
245
176
  ## License
246
177
 
package/dist/cli.js CHANGED
@@ -12,9 +12,14 @@ const { values, positionals } = parseArgs({
12
12
  "max-queue": { type: "string", default: "50" },
13
13
  "rate-limit": { type: "string", default: "60" },
14
14
  help: { type: "boolean", short: "h", default: false },
15
+ version: { type: "boolean", short: "v", default: false },
15
16
  },
16
17
  });
17
18
  const command = positionals[0] || "serve";
19
+ if (values.version) {
20
+ console.log("0.3.1");
21
+ process.exit(0);
22
+ }
18
23
  if (values.help || command === "help") {
19
24
  console.log(`
20
25
  otterly — local inference server for Claude Code
@@ -29,6 +34,7 @@ if (values.help || command === "help") {
29
34
  --max-concurrent <number> Max concurrent requests (default: 5)
30
35
  --max-queue <number> Max queued requests (default: 50)
31
36
  --rate-limit <number> Requests per minute per client (default: 60)
37
+ -v, --version Print version
32
38
  -h, --help Show this help
33
39
 
34
40
  Environment:
@@ -39,6 +45,7 @@ if (values.help || command === "help") {
39
45
  POST /api/run Native one-shot execution
40
46
  POST /api/stream Native NDJSON streaming
41
47
  GET /api/status Health check + queue/circuit stats
48
+ GET /playground Interactive API playground
42
49
  WS /ws Multi-turn WebSocket sessions
43
50
  `);
44
51
  process.exit(0);
@@ -11,6 +11,8 @@ import { RequestQueue, QueueFullError, QueueTimeoutError } from "./request-queue
11
11
  import { checkAuth, RateLimiter, sendAuthError, sendRateLimitError } from "./middleware.js";
12
12
  import { generateRequestId, logRequest, logResponse, logError } from "./logger.js";
13
13
  import { CircuitBreaker } from "./circuit-breaker.js";
14
+ import { openApiSpec } from "./swagger.js";
15
+ import { getPlaygroundHtml } from "./playground.js";
14
16
  /**
15
17
  * Parse JSON body from an incoming request. Returns parsed object or null on failure.
16
18
  */
@@ -98,6 +100,23 @@ export async function startApiServer(opts = {}) {
98
100
  handleStatus(req, res, queue, circuitBreaker);
99
101
  return;
100
102
  }
103
+ // GET /swagger.json — OpenAPI spec, no auth
104
+ if (req.method === "GET" && path === "/swagger.json") {
105
+ jsonResponse(res, 200, openApiSpec);
106
+ return;
107
+ }
108
+ // GET /playground — interactive API playground
109
+ if (req.method === "GET" && path === "/playground") {
110
+ const html = getPlaygroundHtml(port);
111
+ res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
112
+ res.end(html);
113
+ return;
114
+ }
115
+ // GET / — server info
116
+ if (req.method === "GET" && path === "/") {
117
+ jsonResponse(res, 200, { name: "otterly", version: "0.3.1", playground: "/playground" });
118
+ return;
119
+ }
101
120
  // ── POST routes: auth → rate limit → circuit breaker → queue ──
102
121
  if (req.method !== "POST") {
103
122
  jsonResponse(res, 404, { error: "Not found" });
@@ -237,6 +256,7 @@ export async function startApiServer(opts = {}) {
237
256
  console.log(` Streaming : http://localhost:${port}/api/stream`);
238
257
  console.log(` WebSocket : ws://localhost:${port}/ws`);
239
258
  console.log(` Health : http://localhost:${port}/api/status`);
259
+ console.log(` Playground : http://localhost:${port}/playground`);
240
260
  console.log(` Working dir : ${workingDir}`);
241
261
  if (apiKey) {
242
262
  console.log(` Auth : API key required (OTTERLY_API_KEY)`);
@@ -0,0 +1 @@
1
+ export declare function getPlaygroundHtml(port: number): string;