npm - otterly - Versions diffs - 0.1.0 → 0.3.1 - Mend

otterly 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +109 -178
package/dist/cli.js +7 -0
package/dist/server/index.js +20 -0
package/dist/server/playground.d.ts +1 -0
package/dist/server/playground.js +1227 -0
package/dist/server/routes-native.js +1 -1
package/dist/server/swagger.d.ts +503 -0
package/dist/server/swagger.js +320 -0
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -1,246 +1,177 @@
 # otterly
-Drop a Claude Code agent into your app in one line. Like Ollama, but instead of running a model for inference, you get a full coding agent that reads, writes, and runs code on your local machine.
+Local AI inference for your apps. Use Claude Code instead of paying for API tokens.
 ```bash
 npm install otterly @anthropic-ai/claude-code
 ```
+## What is this?
+You have Claude Code on your machine. You're already paying for it (Max subscription or org access). Otterly lets your apps — backends, Electron apps, scripts, local tools — call Claude Code directly instead of making API calls and paying per token.
+**Instead of this:**
 ```typescript
-import { claude } from 'otterly';
+import OpenAI from "openai";
+const client = new OpenAI({ apiKey: "sk-..." }); // costs per token
+```
-const result = await claude.run("Add error handling to server.ts", {
-  cwd: "./my-project",
-});
+**Do this:**
+```typescript
+import { claude } from "otterly";
+const result = await claude.run("Fix the bug in server.ts", { cwd: "./my-app" });
+// uses your local Claude Code — no API key, no per-token cost
+```
+Or run it as a server and point any OpenAI-compatible client at it:
-console.log(result.text);   // "I've added try-catch blocks to all route handlers..."
-console.log(result.cost);   // 0.03
+```bash
+npx otterly serve
+# now http://localhost:11434 speaks the OpenAI protocol
 ```
 ## Requirements
 - Node.js 18+
 - Claude Code installed and authenticated (`claude login`)
-- `@anthropic-ai/claude-code` installed as a peer dependency
-## Usage
+## Two ways to use it
-### One-shot
-Run a task, get the result. Simplest way to use it.
+### 1. As a library (import into your app)
 ```typescript
-import { claude } from 'otterly';
+import { claude } from "otterly";
-const result = await claude.run("Fix the login bug in auth.ts", {
+// Simple: run a prompt, get a result
+const result = await claude.run("Add input validation to user.ts", {
   cwd: "./my-project",
 });
+console.log(result.text);
-console.log(result.text);       // Final output text
-console.log(result.cost);       // Cost in USD
-console.log(result.duration);   // Duration in ms
-console.log(result.sessionId);  // Save this to resume later
-console.log(result.tools);      // Every tool that was used
-```
-### Streaming
-Get real-time events as Claude works.
-```typescript
-import { claude } from 'otterly';
-for await (const event of claude.stream("Refactor the auth module", { cwd: "." })) {
-  switch (event.type) {
-    case "text_delta":
-      process.stdout.write(event.delta);
-      break;
-    case "tool_use":
-      console.log(`\n> ${event.description}`);
-      break;
-    case "tool_result":
-      if (event.isError) console.error(`Tool error: ${event.output}`);
-      break;
-    case "result":
-      console.log(`\nDone! Cost: $${event.cost}`);
-      break;
-  }
+// Streaming: get tokens as they arrive
+for await (const event of claude.stream("Refactor auth", { cwd: "." })) {
+  if (event.type === "text_delta") process.stdout.write(event.delta);
 }
-```
-### Multi-turn Sessions
-Keep conversation context alive across multiple messages.
-```typescript
-import { claude } from 'otterly';
+// Multi-turn: context persists between messages
 const session = claude.session({ cwd: "./my-project" });
-const r1 = await session.send("Create a REST API for users");
-console.log(r1.text);
-const r2 = await session.send("Now add authentication to it");
-console.log(r2.text);
-const r3 = await session.send("Write tests for the auth middleware");
-console.log(r3.text);
-// Save the session ID to resume later
-console.log(session.id);
+await session.send("Create a REST API");
+await session.send("Now add auth to it"); // remembers the API it just built
 session.close();
 ```
-Resume a previous session:
-```typescript
-const session = claude.session({
-  cwd: "./my-project",
-  resume: "previous-session-id",
-});
+### 2. As a server (any app can call it over HTTP)
-await session.send("What did we work on last time?");
+```bash
+npx otterly serve --port 11434
 ```
-### Custom Permissions
-By default, otterly runs in autopilot mode (no permission prompts). You can control what Claude is allowed to do.
+Now any app that speaks OpenAI protocol works out of the box:
 ```typescript
-import { claude, READONLY } from 'otterly';
+import OpenAI from "openai";
-// Read-only: Claude can read files but can't modify anything
-const analysis = await claude.run("Analyze the codebase architecture", {
-  cwd: ".",
-  onPermission: READONLY,
+const ai = new OpenAI({
+  baseURL: "http://localhost:11434/v1",
+  apiKey: "not-needed",
 });
-// Custom: fine-grained control
-const result = await claude.run("Deploy to staging", {
-  cwd: ".",
-  onPermission: ({ tool, input }) => {
-    // Allow file reads and edits
-    if (["Read", "Edit", "Write", "Glob", "Grep"].includes(tool)) {
-      return { allow: true };
-    }
-    // Allow specific commands only
-    if (tool === "Bash" && input.command?.includes("npm run deploy")) {
-      return { allow: true };
-    }
-    // Deny everything else
-    return { allow: false, message: `${tool} not allowed in this context` };
-  },
+const response = await ai.chat.completions.create({
+  model: "claude-sonnet-4-20250514",
+  messages: [{ role: "user", content: "Fix the failing tests" }],
 });
 ```
-### Custom Engine Instance
+This works with any OpenAI SDK (Python, Go, Rust, whatever), Cursor, Continue, or any tool that lets you set a custom base URL.
-Set defaults for all calls.
+## Server endpoints
-```typescript
-import { ClaudeEngine } from 'otterly';
+| Endpoint | Protocol | Use case |
+|----------|----------|----------|
+| `POST /v1/chat/completions` | OpenAI-compatible | Drop-in for any OpenAI client |
+| `POST /api/run` | Native JSON | One-shot, returns full result with cost/tools |
+| `POST /api/stream` | Native NDJSON | Streaming with rich events |
+| `WS /ws` | WebSocket | Multi-turn sessions for chat UIs |
+| `GET /api/status` | JSON | Health check, queue depth, circuit state |
-const engine = new ClaudeEngine({
-  cwd: "./my-project",
-  model: "claude-sonnet-4-20250514",
-  maxTurns: 10,
-});
+## Server options
-// All calls inherit the defaults
-const r1 = await engine.run("Fix lint errors");
-const r2 = await engine.run("Add missing types");
+```bash
+npx otterly serve \
+  --port 11434 \
+  --dir ./my-project \
+  --max-concurrent 3 \
+  --max-queue 20 \
+  --rate-limit 30
 ```
-### Abort / Timeout
+| Flag | Default | What it does |
+|------|---------|--------------|
+| `--port` | 11434 | Port to listen on |
+| `--dir` | cwd | Working directory for Claude |
+| `--max-concurrent` | 5 | Max simultaneous Claude processes |
+| `--max-queue` | 50 | Max waiting requests before rejecting |
+| `--rate-limit` | 60 | Requests per minute per client |
-```typescript
-const controller = new AbortController();
-setTimeout(() => controller.abort(), 30_000); // 30s timeout
+Set `OTTERLY_API_KEY` to require Bearer auth on all requests.
-const result = await claude.run("Refactor the entire test suite", {
-  cwd: ".",
-  signal: controller.signal,
-});
-```
+## Features
-## Event Types
+**Production safety** — concurrency limits, rate limiting, request timeouts, circuit breaker (stops calling a down API), graceful shutdown on SIGTERM, structured JSON logging with request IDs.
-When using `stream()` or `session.sendStream()`, you receive these events:
+**OpenAI compatibility** — streaming (SSE), `response_format: { type: "json_object" }`, `tools` parameter for filtering which tools Claude can use.
-| Event | Fields | Description |
-|-------|--------|-------------|
-| `text` | `text` | Complete text from an assistant message block |
-| `text_delta` | `delta` | Streaming text chunk (arrives in real-time) |
-| `tool_use` | `id`, `tool`, `input`, `description` | Claude is using a tool |
-| `tool_result` | `toolUseId`, `tool`, `output`, `isError` | Tool execution result |
-| `system` | `sessionId`, `model`, `cwd`, `tools` | Session initialized |
-| `result` | `text`, `cost`, `duration`, `sessionId`, `usage` | Turn complete |
-| `error` | `error` | Something went wrong |
+**Session reuse** — pass `X-Session-Id` header on HTTP requests to continue a conversation across multiple calls.
-## Error Handling
+**WebSocket heartbeats** — dead connections detected and cleaned up within 30s.
-Errors are classified with a `code` field for programmatic handling:
+## Library API
 ```typescript
-import { claude, AgentError } from 'otterly';
-try {
-  await claude.run("Do something");
-} catch (err) {
-  if (err instanceof AgentError) {
-    switch (err.code) {
-      case "NOT_AUTHENTICATED":
-        console.log("Run `claude login` to authenticate");
-        break;
-      case "RATE_LIMITED":
-        console.log("Wait and retry");
-        break;
-      case "SDK_NOT_FOUND":
-        console.log("npm install @anthropic-ai/claude-code");
-        break;
-      case "BILLING":
-        console.log("Check your Anthropic account");
-        break;
-      case "NETWORK":
-        console.log("Check your internet connection");
-        break;
-      case "ABORTED":
-        console.log("Operation was cancelled");
-        break;
-    }
-  }
-}
+import { claude, ClaudeEngine, READONLY } from "otterly";
+// One-shot
+const result = await claude.run(prompt, options);
+// result: { text, cost, duration, sessionId, usage, tools }
+// Stream
+for await (const event of claude.stream(prompt, options)) { ... }
+// events: text_delta, tool_use, tool_result, result, error
+// Session
+const session = claude.session(options);
+await session.send(message);
+session.close();
+// Custom engine with defaults
+const engine = new ClaudeEngine({ model: "claude-sonnet-4-20250514", maxTurns: 10 });
+// Read-only mode (no file writes, no commands)
+await claude.run("Analyze this code", { onPermission: READONLY });
 ```
-## Options
+### Options
 ```typescript
-interface EngineOptions {
-  cwd?: string;                    // Working directory (default: process.cwd())
-  model?: string;                  // Model to use
-  permissionMode?: PermissionMode; // "default" | "acceptEdits" | "bypassPermissions" | "plan"
-  systemPrompt?: string;           // Custom system prompt
-  maxTurns?: number;               // Max agent turns
-  allowedTools?: string[];         // Tool whitelist
-  disallowedTools?: string[];      // Tool blacklist
-  mcpServers?: Record<string, any>;// MCP server configs
-  signal?: AbortSignal;            // Cancellation signal
-  onPermission?: PermissionHandler;// Custom permission handler
-  resume?: string;                 // Session ID to resume
-  effort?: "low" | "medium" | "high"; // Reasoning effort
+{
+  cwd?: string;              // working directory
+  model?: string;            // model name
+  systemPrompt?: string;     // custom system prompt
+  maxTurns?: number;         // max agent turns
+  allowedTools?: string[];   // tool whitelist
+  disallowedTools?: string[];// tool blacklist
+  signal?: AbortSignal;      // cancellation
+  onPermission?: handler;    // custom permission logic
+  resume?: string;           // resume a previous session
+  effort?: "low" | "medium" | "high";
 }
 ```
-## How It Works
-otterly wraps the `@anthropic-ai/claude-code` SDK's `query()` function. It piggybacks on your existing Claude Code installation — if you've run `claude login`, you're already authenticated. No API keys to manage.
+## How it works
-1. **`run()`** calls `query()` with your prompt, collects all events, returns the final result
-2. **`stream()`** calls `query()` and yields normalized events as they arrive
-3. **`session()`** uses the SDK's streaming input mode — an async generator that yields user messages on demand, keeping conversation context alive across turns in a single long-lived `query()` call
+Otterly wraps the `@anthropic-ai/claude-code` SDK. Each call spawns a Claude Code subprocess — the same thing that runs in your terminal. It uses your existing `claude login` auth. No separate API keys.
-No API keys. No server. No HTTP. No WebSocket. The SDK runs in-process using your local Claude Code auth.
+The server adds production concerns on top: concurrency control (so you don't fork-bomb your machine), rate limiting, circuit breaking, timeouts, and format translation so OpenAI clients can talk to it.
 ## License

package/dist/cli.js CHANGED Viewed

@@ -12,9 +12,14 @@ const { values, positionals } = parseArgs({
         "max-queue": { type: "string", default: "50" },
         "rate-limit": { type: "string", default: "60" },
         help: { type: "boolean", short: "h", default: false },
+        version: { type: "boolean", short: "v", default: false },
     },
 });
 const command = positionals[0] || "serve";
+if (values.version) {
+    console.log("0.3.1");
+    process.exit(0);
+}
 if (values.help || command === "help") {
     console.log(`
   otterly — local inference server for Claude Code
@@ -29,6 +34,7 @@ if (values.help || command === "help") {
     --max-concurrent <number>   Max concurrent requests (default: 5)
     --max-queue <number>        Max queued requests (default: 50)
     --rate-limit <number>       Requests per minute per client (default: 60)
+    -v, --version               Print version
     -h, --help                  Show this help
   Environment:
@@ -39,6 +45,7 @@ if (values.help || command === "help") {
     POST /api/run               Native one-shot execution
     POST /api/stream            Native NDJSON streaming
     GET  /api/status            Health check + queue/circuit stats
+    GET  /playground            Interactive API playground
     WS   /ws                    Multi-turn WebSocket sessions
 `);
     process.exit(0);

package/dist/server/index.js CHANGED Viewed

@@ -11,6 +11,8 @@ import { RequestQueue, QueueFullError, QueueTimeoutError } from "./request-queue
 import { checkAuth, RateLimiter, sendAuthError, sendRateLimitError } from "./middleware.js";
 import { generateRequestId, logRequest, logResponse, logError } from "./logger.js";
 import { CircuitBreaker } from "./circuit-breaker.js";
+import { openApiSpec } from "./swagger.js";
+import { getPlaygroundHtml } from "./playground.js";
 /**
  * Parse JSON body from an incoming request. Returns parsed object or null on failure.
  */
@@ -98,6 +100,23 @@ export async function startApiServer(opts = {}) {
             handleStatus(req, res, queue, circuitBreaker);
             return;
         }
+        // GET /swagger.json — OpenAPI spec, no auth
+        if (req.method === "GET" && path === "/swagger.json") {
+            jsonResponse(res, 200, openApiSpec);
+            return;
+        }
+        // GET /playground — interactive API playground
+        if (req.method === "GET" && path === "/playground") {
+            const html = getPlaygroundHtml(port);
+            res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
+            res.end(html);
+            return;
+        }
+        // GET / — server info
+        if (req.method === "GET" && path === "/") {
+            jsonResponse(res, 200, { name: "otterly", version: "0.3.1", playground: "/playground" });
+            return;
+        }
         // ── POST routes: auth → rate limit → circuit breaker → queue ──
         if (req.method !== "POST") {
             jsonResponse(res, 404, { error: "Not found" });
@@ -237,6 +256,7 @@ export async function startApiServer(opts = {}) {
             console.log(`  Streaming     : http://localhost:${port}/api/stream`);
             console.log(`  WebSocket     : ws://localhost:${port}/ws`);
             console.log(`  Health        : http://localhost:${port}/api/status`);
+            console.log(`  Playground    : http://localhost:${port}/playground`);
             console.log(`  Working dir   : ${workingDir}`);
             if (apiKey) {
                 console.log(`  Auth          : API key required (OTTERLY_API_KEY)`);

package/dist/server/playground.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function getPlaygroundHtml(port: number): string;