otterly 0.1.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -178
- package/dist/cli.js +7 -0
- package/dist/server/index.js +20 -0
- package/dist/server/playground.d.ts +1 -0
- package/dist/server/playground.js +1227 -0
- package/dist/server/routes-native.js +1 -1
- package/dist/server/swagger.d.ts +503 -0
- package/dist/server/swagger.js +320 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,246 +1,177 @@
|
|
|
1
1
|
# otterly
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Local AI inference for your apps. Use Claude Code instead of paying for API tokens.
|
|
4
4
|
|
|
5
5
|
```bash
|
|
6
6
|
npm install otterly @anthropic-ai/claude-code
|
|
7
7
|
```
|
|
8
8
|
|
|
9
|
+
## What is this?
|
|
10
|
+
|
|
11
|
+
You have Claude Code on your machine. You're already paying for it (Max subscription or org access). Otterly lets your apps — backends, Electron apps, scripts, local tools — call Claude Code directly instead of making API calls and paying per token.
|
|
12
|
+
|
|
13
|
+
**Instead of this:**
|
|
9
14
|
```typescript
|
|
10
|
-
import
|
|
15
|
+
import OpenAI from "openai";
|
|
16
|
+
const client = new OpenAI({ apiKey: "sk-..." }); // costs per token
|
|
17
|
+
```
|
|
11
18
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
}
|
|
19
|
+
**Do this:**
|
|
20
|
+
```typescript
|
|
21
|
+
import { claude } from "otterly";
|
|
22
|
+
const result = await claude.run("Fix the bug in server.ts", { cwd: "./my-app" });
|
|
23
|
+
// uses your local Claude Code — no API key, no per-token cost
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Or run it as a server and point any OpenAI-compatible client at it:
|
|
15
27
|
|
|
16
|
-
|
|
17
|
-
|
|
28
|
+
```bash
|
|
29
|
+
npx otterly serve
|
|
30
|
+
# now http://localhost:11434 speaks the OpenAI protocol
|
|
18
31
|
```
|
|
19
32
|
|
|
20
33
|
## Requirements
|
|
21
34
|
|
|
22
35
|
- Node.js 18+
|
|
23
36
|
- Claude Code installed and authenticated (`claude login`)
|
|
24
|
-
- `@anthropic-ai/claude-code` installed as a peer dependency
|
|
25
37
|
|
|
26
|
-
##
|
|
38
|
+
## Two ways to use it
|
|
27
39
|
|
|
28
|
-
###
|
|
29
|
-
|
|
30
|
-
Run a task, get the result. Simplest way to use it.
|
|
40
|
+
### 1. As a library (import into your app)
|
|
31
41
|
|
|
32
42
|
```typescript
|
|
33
|
-
import { claude } from
|
|
43
|
+
import { claude } from "otterly";
|
|
34
44
|
|
|
35
|
-
|
|
45
|
+
// Simple: run a prompt, get a result
|
|
46
|
+
const result = await claude.run("Add input validation to user.ts", {
|
|
36
47
|
cwd: "./my-project",
|
|
37
48
|
});
|
|
49
|
+
console.log(result.text);
|
|
38
50
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
console.log(result.sessionId); // Save this to resume later
|
|
43
|
-
console.log(result.tools); // Every tool that was used
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
### Streaming
|
|
47
|
-
|
|
48
|
-
Get real-time events as Claude works.
|
|
49
|
-
|
|
50
|
-
```typescript
|
|
51
|
-
import { claude } from 'otterly';
|
|
52
|
-
|
|
53
|
-
for await (const event of claude.stream("Refactor the auth module", { cwd: "." })) {
|
|
54
|
-
switch (event.type) {
|
|
55
|
-
case "text_delta":
|
|
56
|
-
process.stdout.write(event.delta);
|
|
57
|
-
break;
|
|
58
|
-
case "tool_use":
|
|
59
|
-
console.log(`\n> ${event.description}`);
|
|
60
|
-
break;
|
|
61
|
-
case "tool_result":
|
|
62
|
-
if (event.isError) console.error(`Tool error: ${event.output}`);
|
|
63
|
-
break;
|
|
64
|
-
case "result":
|
|
65
|
-
console.log(`\nDone! Cost: $${event.cost}`);
|
|
66
|
-
break;
|
|
67
|
-
}
|
|
51
|
+
// Streaming: get tokens as they arrive
|
|
52
|
+
for await (const event of claude.stream("Refactor auth", { cwd: "." })) {
|
|
53
|
+
if (event.type === "text_delta") process.stdout.write(event.delta);
|
|
68
54
|
}
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
### Multi-turn Sessions
|
|
72
|
-
|
|
73
|
-
Keep conversation context alive across multiple messages.
|
|
74
|
-
|
|
75
|
-
```typescript
|
|
76
|
-
import { claude } from 'otterly';
|
|
77
55
|
|
|
56
|
+
// Multi-turn: context persists between messages
|
|
78
57
|
const session = claude.session({ cwd: "./my-project" });
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
console.log(r1.text);
|
|
82
|
-
|
|
83
|
-
const r2 = await session.send("Now add authentication to it");
|
|
84
|
-
console.log(r2.text);
|
|
85
|
-
|
|
86
|
-
const r3 = await session.send("Write tests for the auth middleware");
|
|
87
|
-
console.log(r3.text);
|
|
88
|
-
|
|
89
|
-
// Save the session ID to resume later
|
|
90
|
-
console.log(session.id);
|
|
91
|
-
|
|
58
|
+
await session.send("Create a REST API");
|
|
59
|
+
await session.send("Now add auth to it"); // remembers the API it just built
|
|
92
60
|
session.close();
|
|
93
61
|
```
|
|
94
62
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
```typescript
|
|
98
|
-
const session = claude.session({
|
|
99
|
-
cwd: "./my-project",
|
|
100
|
-
resume: "previous-session-id",
|
|
101
|
-
});
|
|
63
|
+
### 2. As a server (any app can call it over HTTP)
|
|
102
64
|
|
|
103
|
-
|
|
65
|
+
```bash
|
|
66
|
+
npx otterly serve --port 11434
|
|
104
67
|
```
|
|
105
68
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
By default, otterly runs in autopilot mode (no permission prompts). You can control what Claude is allowed to do.
|
|
69
|
+
Now any app that speaks OpenAI protocol works out of the box:
|
|
109
70
|
|
|
110
71
|
```typescript
|
|
111
|
-
import
|
|
72
|
+
import OpenAI from "openai";
|
|
112
73
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
onPermission: READONLY,
|
|
74
|
+
const ai = new OpenAI({
|
|
75
|
+
baseURL: "http://localhost:11434/v1",
|
|
76
|
+
apiKey: "not-needed",
|
|
117
77
|
});
|
|
118
78
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
onPermission: ({ tool, input }) => {
|
|
123
|
-
// Allow file reads and edits
|
|
124
|
-
if (["Read", "Edit", "Write", "Glob", "Grep"].includes(tool)) {
|
|
125
|
-
return { allow: true };
|
|
126
|
-
}
|
|
127
|
-
// Allow specific commands only
|
|
128
|
-
if (tool === "Bash" && input.command?.includes("npm run deploy")) {
|
|
129
|
-
return { allow: true };
|
|
130
|
-
}
|
|
131
|
-
// Deny everything else
|
|
132
|
-
return { allow: false, message: `${tool} not allowed in this context` };
|
|
133
|
-
},
|
|
79
|
+
const response = await ai.chat.completions.create({
|
|
80
|
+
model: "claude-sonnet-4-20250514",
|
|
81
|
+
messages: [{ role: "user", content: "Fix the failing tests" }],
|
|
134
82
|
});
|
|
135
83
|
```
|
|
136
84
|
|
|
137
|
-
|
|
85
|
+
This works with any OpenAI SDK (Python, Go, Rust, whatever), Cursor, Continue, or any tool that lets you set a custom base URL.
|
|
138
86
|
|
|
139
|
-
|
|
87
|
+
## Server endpoints
|
|
140
88
|
|
|
141
|
-
|
|
142
|
-
|
|
89
|
+
| Endpoint | Protocol | Use case |
|
|
90
|
+
|----------|----------|----------|
|
|
91
|
+
| `POST /v1/chat/completions` | OpenAI-compatible | Drop-in for any OpenAI client |
|
|
92
|
+
| `POST /api/run` | Native JSON | One-shot, returns full result with cost/tools |
|
|
93
|
+
| `POST /api/stream` | Native NDJSON | Streaming with rich events |
|
|
94
|
+
| `WS /ws` | WebSocket | Multi-turn sessions for chat UIs |
|
|
95
|
+
| `GET /api/status` | JSON | Health check, queue depth, circuit state |
|
|
143
96
|
|
|
144
|
-
|
|
145
|
-
cwd: "./my-project",
|
|
146
|
-
model: "claude-sonnet-4-20250514",
|
|
147
|
-
maxTurns: 10,
|
|
148
|
-
});
|
|
97
|
+
## Server options
|
|
149
98
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
99
|
+
```bash
|
|
100
|
+
npx otterly serve \
|
|
101
|
+
--port 11434 \
|
|
102
|
+
--dir ./my-project \
|
|
103
|
+
--max-concurrent 3 \
|
|
104
|
+
--max-queue 20 \
|
|
105
|
+
--rate-limit 30
|
|
153
106
|
```
|
|
154
107
|
|
|
155
|
-
|
|
108
|
+
| Flag | Default | What it does |
|
|
109
|
+
|------|---------|--------------|
|
|
110
|
+
| `--port` | 11434 | Port to listen on |
|
|
111
|
+
| `--dir` | cwd | Working directory for Claude |
|
|
112
|
+
| `--max-concurrent` | 5 | Max simultaneous Claude processes |
|
|
113
|
+
| `--max-queue` | 50 | Max waiting requests before rejecting |
|
|
114
|
+
| `--rate-limit` | 60 | Requests per minute per client |
|
|
156
115
|
|
|
157
|
-
|
|
158
|
-
const controller = new AbortController();
|
|
159
|
-
setTimeout(() => controller.abort(), 30_000); // 30s timeout
|
|
116
|
+
Set `OTTERLY_API_KEY` to require Bearer auth on all requests.
|
|
160
117
|
|
|
161
|
-
|
|
162
|
-
cwd: ".",
|
|
163
|
-
signal: controller.signal,
|
|
164
|
-
});
|
|
165
|
-
```
|
|
118
|
+
## Features
|
|
166
119
|
|
|
167
|
-
|
|
120
|
+
**Production safety** — concurrency limits, rate limiting, request timeouts, circuit breaker (stops calling a down API), graceful shutdown on SIGTERM, structured JSON logging with request IDs.
|
|
168
121
|
|
|
169
|
-
|
|
122
|
+
**OpenAI compatibility** — streaming (SSE), `response_format: { type: "json_object" }`, `tools` parameter for filtering which tools Claude can use.
|
|
170
123
|
|
|
171
|
-
|
|
172
|
-
|-------|--------|-------------|
|
|
173
|
-
| `text` | `text` | Complete text from an assistant message block |
|
|
174
|
-
| `text_delta` | `delta` | Streaming text chunk (arrives in real-time) |
|
|
175
|
-
| `tool_use` | `id`, `tool`, `input`, `description` | Claude is using a tool |
|
|
176
|
-
| `tool_result` | `toolUseId`, `tool`, `output`, `isError` | Tool execution result |
|
|
177
|
-
| `system` | `sessionId`, `model`, `cwd`, `tools` | Session initialized |
|
|
178
|
-
| `result` | `text`, `cost`, `duration`, `sessionId`, `usage` | Turn complete |
|
|
179
|
-
| `error` | `error` | Something went wrong |
|
|
124
|
+
**Session reuse** — pass `X-Session-Id` header on HTTP requests to continue a conversation across multiple calls.
|
|
180
125
|
|
|
181
|
-
|
|
126
|
+
**WebSocket heartbeats** — dead connections detected and cleaned up within 30s.
|
|
182
127
|
|
|
183
|
-
|
|
128
|
+
## Library API
|
|
184
129
|
|
|
185
130
|
```typescript
|
|
186
|
-
import { claude,
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
console.log("Check your internet connection");
|
|
207
|
-
break;
|
|
208
|
-
case "ABORTED":
|
|
209
|
-
console.log("Operation was cancelled");
|
|
210
|
-
break;
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
}
|
|
131
|
+
import { claude, ClaudeEngine, READONLY } from "otterly";
|
|
132
|
+
|
|
133
|
+
// One-shot
|
|
134
|
+
const result = await claude.run(prompt, options);
|
|
135
|
+
// result: { text, cost, duration, sessionId, usage, tools }
|
|
136
|
+
|
|
137
|
+
// Stream
|
|
138
|
+
for await (const event of claude.stream(prompt, options)) { ... }
|
|
139
|
+
// events: text_delta, tool_use, tool_result, result, error
|
|
140
|
+
|
|
141
|
+
// Session
|
|
142
|
+
const session = claude.session(options);
|
|
143
|
+
await session.send(message);
|
|
144
|
+
session.close();
|
|
145
|
+
|
|
146
|
+
// Custom engine with defaults
|
|
147
|
+
const engine = new ClaudeEngine({ model: "claude-sonnet-4-20250514", maxTurns: 10 });
|
|
148
|
+
|
|
149
|
+
// Read-only mode (no file writes, no commands)
|
|
150
|
+
await claude.run("Analyze this code", { onPermission: READONLY });
|
|
214
151
|
```
|
|
215
152
|
|
|
216
|
-
|
|
153
|
+
### Options
|
|
217
154
|
|
|
218
155
|
```typescript
|
|
219
|
-
|
|
220
|
-
cwd?: string;
|
|
221
|
-
model?: string;
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
resume?: string; // Session ID to resume
|
|
231
|
-
effort?: "low" | "medium" | "high"; // Reasoning effort
|
|
156
|
+
{
|
|
157
|
+
cwd?: string; // working directory
|
|
158
|
+
model?: string; // model name
|
|
159
|
+
systemPrompt?: string; // custom system prompt
|
|
160
|
+
maxTurns?: number; // max agent turns
|
|
161
|
+
allowedTools?: string[]; // tool whitelist
|
|
162
|
+
disallowedTools?: string[];// tool blacklist
|
|
163
|
+
signal?: AbortSignal; // cancellation
|
|
164
|
+
onPermission?: handler; // custom permission logic
|
|
165
|
+
resume?: string; // resume a previous session
|
|
166
|
+
effort?: "low" | "medium" | "high";
|
|
232
167
|
}
|
|
233
168
|
```
|
|
234
169
|
|
|
235
|
-
## How
|
|
236
|
-
|
|
237
|
-
otterly wraps the `@anthropic-ai/claude-code` SDK's `query()` function. It piggybacks on your existing Claude Code installation — if you've run `claude login`, you're already authenticated. No API keys to manage.
|
|
170
|
+
## How it works
|
|
238
171
|
|
|
239
|
-
|
|
240
|
-
2. **`stream()`** calls `query()` and yields normalized events as they arrive
|
|
241
|
-
3. **`session()`** uses the SDK's streaming input mode — an async generator that yields user messages on demand, keeping conversation context alive across turns in a single long-lived `query()` call
|
|
172
|
+
Otterly wraps the `@anthropic-ai/claude-code` SDK. Each call spawns a Claude Code subprocess — the same thing that runs in your terminal. It uses your existing `claude login` auth. No separate API keys.
|
|
242
173
|
|
|
243
|
-
|
|
174
|
+
The server adds production concerns on top: concurrency control (so you don't fork-bomb your machine), rate limiting, circuit breaking, timeouts, and format translation so OpenAI clients can talk to it.
|
|
244
175
|
|
|
245
176
|
## License
|
|
246
177
|
|
package/dist/cli.js
CHANGED
|
@@ -12,9 +12,14 @@ const { values, positionals } = parseArgs({
|
|
|
12
12
|
"max-queue": { type: "string", default: "50" },
|
|
13
13
|
"rate-limit": { type: "string", default: "60" },
|
|
14
14
|
help: { type: "boolean", short: "h", default: false },
|
|
15
|
+
version: { type: "boolean", short: "v", default: false },
|
|
15
16
|
},
|
|
16
17
|
});
|
|
17
18
|
const command = positionals[0] || "serve";
|
|
19
|
+
if (values.version) {
|
|
20
|
+
console.log("0.3.1");
|
|
21
|
+
process.exit(0);
|
|
22
|
+
}
|
|
18
23
|
if (values.help || command === "help") {
|
|
19
24
|
console.log(`
|
|
20
25
|
otterly — local inference server for Claude Code
|
|
@@ -29,6 +34,7 @@ if (values.help || command === "help") {
|
|
|
29
34
|
--max-concurrent <number> Max concurrent requests (default: 5)
|
|
30
35
|
--max-queue <number> Max queued requests (default: 50)
|
|
31
36
|
--rate-limit <number> Requests per minute per client (default: 60)
|
|
37
|
+
-v, --version Print version
|
|
32
38
|
-h, --help Show this help
|
|
33
39
|
|
|
34
40
|
Environment:
|
|
@@ -39,6 +45,7 @@ if (values.help || command === "help") {
|
|
|
39
45
|
POST /api/run Native one-shot execution
|
|
40
46
|
POST /api/stream Native NDJSON streaming
|
|
41
47
|
GET /api/status Health check + queue/circuit stats
|
|
48
|
+
GET /playground Interactive API playground
|
|
42
49
|
WS /ws Multi-turn WebSocket sessions
|
|
43
50
|
`);
|
|
44
51
|
process.exit(0);
|
package/dist/server/index.js
CHANGED
|
@@ -11,6 +11,8 @@ import { RequestQueue, QueueFullError, QueueTimeoutError } from "./request-queue
|
|
|
11
11
|
import { checkAuth, RateLimiter, sendAuthError, sendRateLimitError } from "./middleware.js";
|
|
12
12
|
import { generateRequestId, logRequest, logResponse, logError } from "./logger.js";
|
|
13
13
|
import { CircuitBreaker } from "./circuit-breaker.js";
|
|
14
|
+
import { openApiSpec } from "./swagger.js";
|
|
15
|
+
import { getPlaygroundHtml } from "./playground.js";
|
|
14
16
|
/**
|
|
15
17
|
* Parse JSON body from an incoming request. Returns parsed object or null on failure.
|
|
16
18
|
*/
|
|
@@ -98,6 +100,23 @@ export async function startApiServer(opts = {}) {
|
|
|
98
100
|
handleStatus(req, res, queue, circuitBreaker);
|
|
99
101
|
return;
|
|
100
102
|
}
|
|
103
|
+
// GET /swagger.json — OpenAPI spec, no auth
|
|
104
|
+
if (req.method === "GET" && path === "/swagger.json") {
|
|
105
|
+
jsonResponse(res, 200, openApiSpec);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
// GET /playground — interactive API playground
|
|
109
|
+
if (req.method === "GET" && path === "/playground") {
|
|
110
|
+
const html = getPlaygroundHtml(port);
|
|
111
|
+
res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
|
|
112
|
+
res.end(html);
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
// GET / — server info
|
|
116
|
+
if (req.method === "GET" && path === "/") {
|
|
117
|
+
jsonResponse(res, 200, { name: "otterly", version: "0.3.1", playground: "/playground" });
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
101
120
|
// ── POST routes: auth → rate limit → circuit breaker → queue ──
|
|
102
121
|
if (req.method !== "POST") {
|
|
103
122
|
jsonResponse(res, 404, { error: "Not found" });
|
|
@@ -237,6 +256,7 @@ export async function startApiServer(opts = {}) {
|
|
|
237
256
|
console.log(` Streaming : http://localhost:${port}/api/stream`);
|
|
238
257
|
console.log(` WebSocket : ws://localhost:${port}/ws`);
|
|
239
258
|
console.log(` Health : http://localhost:${port}/api/status`);
|
|
259
|
+
console.log(` Playground : http://localhost:${port}/playground`);
|
|
240
260
|
console.log(` Working dir : ${workingDir}`);
|
|
241
261
|
if (apiKey) {
|
|
242
262
|
console.log(` Auth : API key required (OTTERLY_API_KEY)`);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function getPlaygroundHtml(port: number): string;
|