@shardworks/claude-code-apparatus 0.1.269 → 0.1.271
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -12
- package/dist/babysitter.d.ts +10 -142
- package/dist/babysitter.d.ts.map +1 -1
- package/dist/babysitter.js +23 -344
- package/dist/babysitter.js.map +1 -1
- package/dist/index.d.ts +17 -50
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +30 -90
- package/dist/index.js.map +1 -1
- package/dist/runtime.d.ts +165 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +357 -0
- package/dist/runtime.js.map +1 -0
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -55,24 +55,20 @@ import {
|
|
|
55
55
|
|
|
56
56
|
### Rate-Limit Detection
|
|
57
57
|
|
|
58
|
-
The provider runs a
|
|
58
|
+
The provider runs a **two-branch NDJSON detector** to identify rate-limited terminations and attach a structured `terminationTag` to the session result. The Animator's back-off state machine consumes the tag and transitions its pause-state doc accordingly.
|
|
59
59
|
|
|
60
60
|
```typescript
|
|
61
|
-
import {
|
|
62
|
-
detectRateLimitFromNdjson,
|
|
63
|
-
detectRateLimitFromStderr,
|
|
64
|
-
detectRateLimitFromExitCode,
|
|
65
|
-
RATE_LIMIT_EXIT_CODE,
|
|
66
|
-
} from '@shardworks/claude-code-apparatus';
|
|
61
|
+
import { detectRateLimitFromNdjson } from '@shardworks/claude-code-apparatus';
|
|
67
62
|
```
|
|
68
63
|
|
|
69
|
-
|
|
64
|
+
Active detector branches (first-wins):
|
|
65
|
+
|
|
66
|
+
1. **Structural `subtype`** — `parseStreamJsonMessage` inspects every NDJSON message whose `subtype` contains `rate_limit` / `rate-limit` and emits a tag with `source: 'ndjson-result'`.
|
|
67
|
+
2. **Structural `is_error`** — if `msg.is_error === true` and the carried error text matches the rate-limit phrasing regex, the same tag is produced.
|
|
70
68
|
|
|
71
|
-
|
|
72
|
-
2. **Stderr pattern match** — the babysitter's stderr observer samples each chunk for a forgiving rate-limit regex (`rate limit`, `429`, `usage limit`, `quota exceeded`, `too many requests` — case-insensitive). Scope is narrow: detection only; the babysitter does not forward the full stderr buffer to the guild.
|
|
73
|
-
3. **Distinguished exit code** — if neither of the above fired and `claude` exits with `RATE_LIMIT_EXIT_CODE`, the provider still promotes the result to `rate-limited`.
|
|
69
|
+
Everything else surfaces as plain `failed`. The previous stderr-pattern and exit-code branches were retired after two production incidents where an assistant's prose summary / a generic non-zero exit code tripped a false-positive pause. Generic non-zero exit codes surface as `'failed'`; the babysitter no longer samples claude's stderr for pattern matches, only forwards it to the per-session log file.
|
|
74
70
|
|
|
75
|
-
|
|
71
|
+
When a non-zero exit arrives without an NDJSON termination tag, the babysitter captures a `terminationDiagnostic: { exitCode, stderrExcerpt? }` on the session-record payload so operators can review the signal that fell through — without the Animator widening its pause gate on it.
|
|
76
72
|
|
|
77
73
|
## Session Babysitter
|
|
78
74
|
|
package/dist/babysitter.d.ts
CHANGED
|
@@ -13,71 +13,25 @@
|
|
|
13
13
|
* The babysitter is a detached process: it survives guild restarts.
|
|
14
14
|
* All guild communication is via HTTP (tool server) and SQLite (transcripts).
|
|
15
15
|
*
|
|
16
|
+
* The single-purpose primitives (stdin parsing, retrying HTTP, DLQ writes,
|
|
17
|
+
* the SQLite trio, lifecycle reporters, stderr redirect) live in
|
|
18
|
+
* `runtime.ts`. This file owns the orchestrator (`runBabysitter`), the
|
|
19
|
+
* MCP/SSE proxy, and the script entry point. The previously-exported
|
|
20
|
+
* primitives are re-exported below to preserve the package's public
|
|
21
|
+
* surface.
|
|
22
|
+
*
|
|
16
23
|
* See: docs/architecture/detached-sessions.md
|
|
17
24
|
*/
|
|
18
25
|
import { spawn } from 'node:child_process';
|
|
19
|
-
import { type
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
export interface SerializedTool {
|
|
23
|
-
/** Tool name (e.g. 'writ-list'). */
|
|
24
|
-
name: string;
|
|
25
|
-
/** Tool description. */
|
|
26
|
-
description: string;
|
|
27
|
-
/** JSON Schema for the tool's input parameters. */
|
|
28
|
-
params: Record<string, unknown>;
|
|
29
|
-
/**
|
|
30
|
-
* HTTP method the guild tool server routes this tool under. The MCP proxy
|
|
31
|
-
* uses this to avoid POSTing to a GET-only read-tool route (which would
|
|
32
|
-
* 404). Derived from the tool's `permission` by `permissionToMethod()`.
|
|
33
|
-
*/
|
|
34
|
-
method: 'GET' | 'POST' | 'DELETE';
|
|
35
|
-
}
|
|
36
|
-
/** Config written to the babysitter's stdin by the spawning process. */
|
|
37
|
-
export interface BabysitterConfig {
|
|
38
|
-
sessionId: string;
|
|
39
|
-
guildToolUrl: string;
|
|
40
|
-
dbPath: string;
|
|
41
|
-
logDir: string;
|
|
42
|
-
claudeArgs: string[];
|
|
43
|
-
cwd: string;
|
|
44
|
-
env: Record<string, string>;
|
|
45
|
-
prompt: string;
|
|
46
|
-
tools: SerializedTool[];
|
|
47
|
-
startedAt: string;
|
|
48
|
-
provider: string;
|
|
49
|
-
metadata?: Record<string, unknown>;
|
|
50
|
-
/** Temp directory for the system prompt file. Cleaned up in finally block. */
|
|
51
|
-
systemPromptTmpDir?: string;
|
|
52
|
-
}
|
|
53
|
-
/**
|
|
54
|
-
* Walk an error's cause chain looking for a retryable error code.
|
|
55
|
-
* Returns the first retryable code found, or null if none.
|
|
56
|
-
* Caps traversal depth to prevent infinite loops from circular cause chains.
|
|
57
|
-
*/
|
|
58
|
-
export declare function findRetryableCode(err: unknown, maxDepth?: number): string | null;
|
|
26
|
+
import { type BabysitterConfig, type SerializedTool, type TranscriptDb } from './runtime.ts';
|
|
27
|
+
export { callGuildHttpApi, findRetryableCode, initTranscriptDb, openTranscriptDb, readConfigFromStdin, redirectStderrToFile, reportResult, reportRunning, resolveTerminalStatus, STDERR_DIAGNOSTIC_TAIL_LIMIT, writeToDlq, writeTranscript, } from './runtime.ts';
|
|
28
|
+
export type { BabysitterConfig, SerializedTool, TranscriptDb, } from './runtime.ts';
|
|
59
29
|
export interface McpProxyHandle {
|
|
60
30
|
/** URL for --mcp-config (e.g. "http://127.0.0.1:PORT/sse"). */
|
|
61
31
|
url: string;
|
|
62
32
|
/** Shut down the HTTP server and MCP transport. */
|
|
63
33
|
close(): Promise<void>;
|
|
64
34
|
}
|
|
65
|
-
/**
|
|
66
|
-
* Read the babysitter config from stdin.
|
|
67
|
-
*
|
|
68
|
-
* Reads stdin to completion, parses the JSON, and validates required fields.
|
|
69
|
-
* The spawning process writes config and closes the write end.
|
|
70
|
-
*/
|
|
71
|
-
export declare function readConfigFromStdin(stream?: NodeJS.ReadableStream): Promise<BabysitterConfig>;
|
|
72
|
-
export declare function callGuildHttpApi(url: string, sessionId: string, body: unknown, timeoutMs?: number, method?: 'GET' | 'POST' | 'DELETE'): Promise<unknown>;
|
|
73
|
-
/**
|
|
74
|
-
* Write a payload to the Dead Letter Queue.
|
|
75
|
-
*
|
|
76
|
-
* Creates the DLQ directory if it doesn't exist. Writes the payload as
|
|
77
|
-
* pretty-printed JSON. Used as a fallback when the guild HTTP API is
|
|
78
|
-
* unreachable for lifecycle calls.
|
|
79
|
-
*/
|
|
80
|
-
export declare function writeToDlq(cwd: string, filename: string, payload: unknown): void;
|
|
81
35
|
/**
|
|
82
36
|
* Create an MCP/SSE HTTP server that proxies tool calls to the guild.
|
|
83
37
|
*
|
|
@@ -88,92 +42,6 @@ export declare function writeToDlq(cwd: string, filename: string, payload: unkno
|
|
|
88
42
|
* JSON Schema (the serialized params from the config).
|
|
89
43
|
*/
|
|
90
44
|
export declare function createProxyMcpHttpServer(tools: SerializedTool[], guildToolUrl: string, sessionId: string): Promise<McpProxyHandle>;
|
|
91
|
-
/** Minimal interface for the SQLite database used by the babysitter. */
|
|
92
|
-
export interface TranscriptDb {
|
|
93
|
-
/** Write a transcript entry (id, content JSON). */
|
|
94
|
-
writeTranscript(sessionId: string, content: string): void;
|
|
95
|
-
/** Close the database connection. */
|
|
96
|
-
close(): void;
|
|
97
|
-
}
|
|
98
|
-
/**
|
|
99
|
-
* Open the guild's SQLite database for transcript streaming.
|
|
100
|
-
*
|
|
101
|
-
* Creates the database file and table if they don't exist.
|
|
102
|
-
* Enables WAL mode for concurrent read access by other processes
|
|
103
|
-
* (Oculus, CLI queries, other agents).
|
|
104
|
-
*
|
|
105
|
-
* Uses dynamic import() to load better-sqlite3 at runtime. This avoids
|
|
106
|
-
* requiring the native module at import time (beneficial for type-checking
|
|
107
|
-
* and testing).
|
|
108
|
-
*/
|
|
109
|
-
export declare function openTranscriptDb(dbPath: string): Promise<TranscriptDb>;
|
|
110
|
-
/**
|
|
111
|
-
* Initialize a TranscriptDb from a Database constructor.
|
|
112
|
-
*
|
|
113
|
-
* Shared logic between openTranscriptDb() and test injection.
|
|
114
|
-
* Exported for testing — allows injecting a mock Database constructor.
|
|
115
|
-
*/
|
|
116
|
-
export declare function initTranscriptDb(DatabaseConstructor: new (path: string) => {
|
|
117
|
-
pragma(stmt: string): unknown;
|
|
118
|
-
prepare(sql: string): {
|
|
119
|
-
run(...params: unknown[]): void;
|
|
120
|
-
};
|
|
121
|
-
exec(sql: string): void;
|
|
122
|
-
close(): void;
|
|
123
|
-
}, dbPath: string): TranscriptDb;
|
|
124
|
-
/**
|
|
125
|
-
* Write the current transcript to SQLite.
|
|
126
|
-
*/
|
|
127
|
-
export declare function writeTranscript(db: TranscriptDb, sessionId: string, messages: Record<string, unknown>[]): void;
|
|
128
|
-
/**
|
|
129
|
-
* Report "running" status to the guild via the session-running tool.
|
|
130
|
-
*
|
|
131
|
-
* If the guild is unreachable, writes the payload to the DLQ.
|
|
132
|
-
*/
|
|
133
|
-
export declare function reportRunning(config: BabysitterConfig, cancelHandle: Record<string, unknown>, timeoutMs?: number): Promise<void>;
|
|
134
|
-
/**
|
|
135
|
-
* Resolve the terminal status and error text for a terminated session,
|
|
136
|
-
* giving rate-limit detection precedence over the generic exit-code
|
|
137
|
-
* mapping.
|
|
138
|
-
*
|
|
139
|
-
* Cascade order (D5):
|
|
140
|
-
* 1. A `'cancelled'` override (SIGTERM path) — short-circuits.
|
|
141
|
-
* 2. A `terminationTag` already carried on the StreamJsonResult —
|
|
142
|
-
* set by the NDJSON-level cascade (first-wins across NDJSON and
|
|
143
|
-
* stderr observations in the babysitter).
|
|
144
|
-
* 3. A distinguished rate-limit exit code (RATE_LIMIT_EXIT_CODE).
|
|
145
|
-
* 4. Generic exit-code mapping (0 → completed, non-zero → failed).
|
|
146
|
-
*
|
|
147
|
-
* Returns both the payload status, a human-readable error string (only
|
|
148
|
-
* populated for the failed branches), and the tag that informed the
|
|
149
|
-
* decision (if any). The tag is forwarded to the guild so the Animator's
|
|
150
|
-
* back-off machine can disambiguate rate-limit terminations without
|
|
151
|
-
* pattern-matching on error text.
|
|
152
|
-
*/
|
|
153
|
-
export declare function resolveTerminalStatus(result: StreamJsonResult, statusOverride?: 'cancelled'): {
|
|
154
|
-
status: 'completed' | 'failed' | 'cancelled' | 'rate-limited';
|
|
155
|
-
error?: string;
|
|
156
|
-
terminationTag?: SessionTerminationTag;
|
|
157
|
-
};
|
|
158
|
-
/**
|
|
159
|
-
* Report the final session result to the guild via the session-record tool.
|
|
160
|
-
*
|
|
161
|
-
* If the guild is unreachable, writes the payload to the DLQ.
|
|
162
|
-
*/
|
|
163
|
-
export declare function reportResult(config: BabysitterConfig, result: StreamJsonResult, transcript: Record<string, unknown>[], timeoutMs?: number, statusOverride?: 'cancelled'): Promise<void>;
|
|
164
|
-
/**
|
|
165
|
-
* Open a per-session log file and redirect process.stderr.write to it.
|
|
166
|
-
*
|
|
167
|
-
* Creates the logDir (recursive) and opens `<logDir>/<sessionId>.log`
|
|
168
|
-
* for append-writing. Replaces process.stderr.write with a function
|
|
169
|
-
* that calls fs.writeSync on the owned fd. Writes the startup banner
|
|
170
|
-
* as the first line.
|
|
171
|
-
*
|
|
172
|
-
* Returns the owned fd so the caller can close it in a finally block.
|
|
173
|
-
*
|
|
174
|
-
* @internal Exported for testing only.
|
|
175
|
-
*/
|
|
176
|
-
export declare function redirectStderrToFile(logDir: string, sessionId: string): number;
|
|
177
45
|
/**
|
|
178
46
|
* Run the session babysitter.
|
|
179
47
|
*
|
package/dist/babysitter.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"babysitter.d.ts","sourceRoot":"","sources":["../src/babysitter.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"babysitter.d.ts","sourceRoot":"","sources":["../src/babysitter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EAAE,KAAK,EAAqB,MAAM,oBAAoB,CAAC;AAwB9D,OAAO,EAUL,KAAK,gBAAgB,EACrB,KAAK,cAAc,EACnB,KAAK,YAAY,EAClB,MAAM,cAAc,CAAC;AAItB,OAAO,EACL,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,YAAY,EACZ,aAAa,EACb,qBAAqB,EACrB,4BAA4B,EAC5B,UAAU,EACV,eAAe,GAChB,MAAM,cAAc,CAAC;AACtB,YAAY,EACV,gBAAgB,EAChB,cAAc,EACd,YAAY,GACb,MAAM,cAAc,CAAC;AAItB,MAAM,WAAW,cAAc;IAC7B,+DAA+D;IAC/D,GAAG,EAAE,MAAM,CAAC;IACZ,mDAAmD;IACnD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAID;;;;;;;;GAQG;AACH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,cAAc,EAAE,EACvB,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,cAAc,CAAC,CAmLzB;AAID;;;;;;;;;;;;GAYG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,gBAAgB,EACxB,IAAI,CAAC,EAAE;IACL,yEAAyE;IACzE,EAAE,CAAC,EAAE,YAAY,CAAC;IAClB,kCAAkC;IAClC,OAAO,CAAC,EAAE,OAAO,KAAK,CAAC;IACvB,8DAA8D;IAC9D,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB,GACA,OAAO,CAAC,IAAI,CAAC,CAqOf"}
|
package/dist/babysitter.js
CHANGED
|
@@ -13,6 +13,13 @@
|
|
|
13
13
|
* The babysitter is a detached process: it survives guild restarts.
|
|
14
14
|
* All guild communication is via HTTP (tool server) and SQLite (transcripts).
|
|
15
15
|
*
|
|
16
|
+
* The single-purpose primitives (stdin parsing, retrying HTTP, DLQ writes,
|
|
17
|
+
* the SQLite trio, lifecycle reporters, stderr redirect) live in
|
|
18
|
+
* `runtime.ts`. This file owns the orchestrator (`runBabysitter`), the
|
|
19
|
+
* MCP/SSE proxy, and the script entry point. The previously-exported
|
|
20
|
+
* primitives are re-exported below to preserve the package's public
|
|
21
|
+
* surface.
|
|
22
|
+
*
|
|
16
23
|
* See: docs/architecture/detached-sessions.md
|
|
17
24
|
*/
|
|
18
25
|
import { spawn } from 'node:child_process';
|
|
@@ -25,153 +32,10 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
|
25
32
|
import { ListToolsRequestSchema, CallToolRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
26
33
|
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
|
27
34
|
import { toolNameToRoute } from '@shardworks/tools-apparatus';
|
|
28
|
-
import { processNdjsonBuffer, parseStreamJsonMessage,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
const RETRY_TIMEOUT_MS = 60_000;
|
|
33
|
-
const RETRYABLE_CODES = new Set(['ECONNREFUSED', 'ECONNRESET', 'ETIMEDOUT']);
|
|
34
|
-
/**
|
|
35
|
-
* Walk an error's cause chain looking for a retryable error code.
|
|
36
|
-
* Returns the first retryable code found, or null if none.
|
|
37
|
-
* Caps traversal depth to prevent infinite loops from circular cause chains.
|
|
38
|
-
*/
|
|
39
|
-
export function findRetryableCode(err, maxDepth = 5) {
|
|
40
|
-
let current = err;
|
|
41
|
-
for (let i = 0; i < maxDepth && current != null; i++) {
|
|
42
|
-
const code = current.code;
|
|
43
|
-
if (code && RETRYABLE_CODES.has(code)) {
|
|
44
|
-
return code;
|
|
45
|
-
}
|
|
46
|
-
current = current.cause;
|
|
47
|
-
}
|
|
48
|
-
return null;
|
|
49
|
-
}
|
|
50
|
-
// ── stdin config reader ─────────────────────────────────────────────────
|
|
51
|
-
/**
|
|
52
|
-
* Read the babysitter config from stdin.
|
|
53
|
-
*
|
|
54
|
-
* Reads stdin to completion, parses the JSON, and validates required fields.
|
|
55
|
-
* The spawning process writes config and closes the write end.
|
|
56
|
-
*/
|
|
57
|
-
export async function readConfigFromStdin(stream = process.stdin) {
|
|
58
|
-
const chunks = [];
|
|
59
|
-
for await (const chunk of stream) {
|
|
60
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
61
|
-
}
|
|
62
|
-
const raw = Buffer.concat(chunks).toString('utf-8');
|
|
63
|
-
if (!raw.trim()) {
|
|
64
|
-
throw new Error('Empty config received on stdin');
|
|
65
|
-
}
|
|
66
|
-
let parsed;
|
|
67
|
-
try {
|
|
68
|
-
parsed = JSON.parse(raw);
|
|
69
|
-
}
|
|
70
|
-
catch {
|
|
71
|
-
throw new Error(`Invalid JSON config on stdin: ${raw.slice(0, 200)}`);
|
|
72
|
-
}
|
|
73
|
-
const config = parsed;
|
|
74
|
-
// Validate required fields
|
|
75
|
-
const required = [
|
|
76
|
-
'sessionId', 'guildToolUrl', 'dbPath', 'logDir', 'claudeArgs',
|
|
77
|
-
'cwd', 'env', 'prompt', 'tools', 'startedAt', 'provider',
|
|
78
|
-
];
|
|
79
|
-
for (const field of required) {
|
|
80
|
-
if (config[field] === undefined || config[field] === null) {
|
|
81
|
-
throw new Error(`Missing required config field: ${field}`);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return config;
|
|
85
|
-
}
|
|
86
|
-
// ── HTTP retry helper ───────────────────────────────────────────────────
|
|
87
|
-
/**
|
|
88
|
-
* Call a guild HTTP API endpoint with exponential backoff retry.
|
|
89
|
-
*
|
|
90
|
-
* Retries on connection errors (ECONNREFUSED, ECONNRESET, ETIMEDOUT).
|
|
91
|
-
* Returns the parsed JSON response on success.
|
|
92
|
-
* Throws after RETRY_TIMEOUT_MS of retrying.
|
|
93
|
-
*/
|
|
94
|
-
/**
|
|
95
|
-
* Encode a params object as a query string for GET requests.
|
|
96
|
-
*
|
|
97
|
-
* Scalars (string/number/boolean) become their string form. Arrays and
|
|
98
|
-
* objects are JSON-encoded so the tool-server can still parse them after
|
|
99
|
-
* its param coercion pass (though read-tools generally take scalar
|
|
100
|
-
* inputs). null/undefined values are skipped.
|
|
101
|
-
*/
|
|
102
|
-
function encodeParamsAsQuery(params) {
|
|
103
|
-
if (params == null || typeof params !== 'object')
|
|
104
|
-
return '';
|
|
105
|
-
const usp = new URLSearchParams();
|
|
106
|
-
for (const [key, value] of Object.entries(params)) {
|
|
107
|
-
if (value == null)
|
|
108
|
-
continue;
|
|
109
|
-
if (typeof value === 'string') {
|
|
110
|
-
usp.set(key, value);
|
|
111
|
-
}
|
|
112
|
-
else if (typeof value === 'number' || typeof value === 'boolean') {
|
|
113
|
-
usp.set(key, String(value));
|
|
114
|
-
}
|
|
115
|
-
else {
|
|
116
|
-
usp.set(key, JSON.stringify(value));
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
const s = usp.toString();
|
|
120
|
-
return s.length > 0 ? `?${s}` : '';
|
|
121
|
-
}
|
|
122
|
-
export async function callGuildHttpApi(url, sessionId, body, timeoutMs = RETRY_TIMEOUT_MS, method = 'POST') {
|
|
123
|
-
const startTime = Date.now();
|
|
124
|
-
let delay = RETRY_INITIAL_DELAY_MS;
|
|
125
|
-
let lastError;
|
|
126
|
-
// GET can't carry a body — encode params as query string instead.
|
|
127
|
-
const targetUrl = method === 'GET' ? `${url}${encodeParamsAsQuery(body)}` : url;
|
|
128
|
-
const requestBody = method === 'GET' ? undefined : JSON.stringify(body);
|
|
129
|
-
while (Date.now() - startTime < timeoutMs) {
|
|
130
|
-
try {
|
|
131
|
-
const response = await fetch(targetUrl, {
|
|
132
|
-
method,
|
|
133
|
-
headers: {
|
|
134
|
-
'Content-Type': 'application/json',
|
|
135
|
-
'X-Session-Id': sessionId,
|
|
136
|
-
},
|
|
137
|
-
...(requestBody !== undefined ? { body: requestBody } : {}),
|
|
138
|
-
});
|
|
139
|
-
if (!response.ok) {
|
|
140
|
-
const text = await response.text().catch(() => '');
|
|
141
|
-
throw new Error(`HTTP ${response.status}: ${text.slice(0, 500)}`);
|
|
142
|
-
}
|
|
143
|
-
return await response.json();
|
|
144
|
-
}
|
|
145
|
-
catch (err) {
|
|
146
|
-
lastError = err instanceof Error ? err : new Error(String(err));
|
|
147
|
-
// Check if the error is retryable (connection-level error in the cause chain)
|
|
148
|
-
const isRetryable = findRetryableCode(err) !== null;
|
|
149
|
-
if (!isRetryable) {
|
|
150
|
-
throw lastError;
|
|
151
|
-
}
|
|
152
|
-
// Wait before retrying
|
|
153
|
-
const remaining = timeoutMs - (Date.now() - startTime);
|
|
154
|
-
if (remaining <= 0)
|
|
155
|
-
break;
|
|
156
|
-
await new Promise((resolve) => setTimeout(resolve, Math.min(delay, remaining)));
|
|
157
|
-
delay = Math.min(delay * 2, RETRY_MAX_DELAY_MS);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
throw new Error(`Guild HTTP API unreachable after ${timeoutMs}ms: ${lastError?.message ?? 'unknown error'}`);
|
|
161
|
-
}
|
|
162
|
-
// ── DLQ writer ──────────────────────────────────────────────────────────
|
|
163
|
-
/**
|
|
164
|
-
* Write a payload to the Dead Letter Queue.
|
|
165
|
-
*
|
|
166
|
-
* Creates the DLQ directory if it doesn't exist. Writes the payload as
|
|
167
|
-
* pretty-printed JSON. Used as a fallback when the guild HTTP API is
|
|
168
|
-
* unreachable for lifecycle calls.
|
|
169
|
-
*/
|
|
170
|
-
export function writeToDlq(cwd, filename, payload) {
|
|
171
|
-
const dlqDir = path.join(cwd, '.nexus', 'dlq');
|
|
172
|
-
fs.mkdirSync(dlqDir, { recursive: true });
|
|
173
|
-
fs.writeFileSync(path.join(dlqDir, filename), JSON.stringify(payload, null, 2));
|
|
174
|
-
}
|
|
35
|
+
import { processNdjsonBuffer, parseStreamJsonMessage, } from "./index.js";
|
|
36
|
+
import { callGuildHttpApi, openTranscriptDb, readConfigFromStdin, redirectStderrToFile, reportRunning, reportResult, STDERR_DIAGNOSTIC_TAIL_LIMIT, writeToDlq, writeTranscript, } from "./runtime.js";
|
|
37
|
+
// ── Re-exports (preserves the pre-extraction public surface) ────────────
|
|
38
|
+
export { callGuildHttpApi, findRetryableCode, initTranscriptDb, openTranscriptDb, readConfigFromStdin, redirectStderrToFile, reportResult, reportRunning, resolveTerminalStatus, STDERR_DIAGNOSTIC_TAIL_LIMIT, writeToDlq, writeTranscript, } from "./runtime.js";
|
|
175
39
|
// ── MCP proxy server ────────────────────────────────────────────────────
|
|
176
40
|
/**
|
|
177
41
|
* Create an MCP/SSE HTTP server that proxies tool calls to the guild.
|
|
@@ -342,190 +206,6 @@ export async function createProxyMcpHttpServer(tools, guildToolUrl, sessionId) {
|
|
|
342
206
|
},
|
|
343
207
|
};
|
|
344
208
|
}
|
|
345
|
-
/**
|
|
346
|
-
* Open the guild's SQLite database for transcript streaming.
|
|
347
|
-
*
|
|
348
|
-
* Creates the database file and table if they don't exist.
|
|
349
|
-
* Enables WAL mode for concurrent read access by other processes
|
|
350
|
-
* (Oculus, CLI queries, other agents).
|
|
351
|
-
*
|
|
352
|
-
* Uses dynamic import() to load better-sqlite3 at runtime. This avoids
|
|
353
|
-
* requiring the native module at import time (beneficial for type-checking
|
|
354
|
-
* and testing).
|
|
355
|
-
*/
|
|
356
|
-
export async function openTranscriptDb(dbPath) {
|
|
357
|
-
const { default: Database } = await import('better-sqlite3');
|
|
358
|
-
return initTranscriptDb(Database, dbPath);
|
|
359
|
-
}
|
|
360
|
-
/**
|
|
361
|
-
* Initialize a TranscriptDb from a Database constructor.
|
|
362
|
-
*
|
|
363
|
-
* Shared logic between openTranscriptDb() and test injection.
|
|
364
|
-
* Exported for testing — allows injecting a mock Database constructor.
|
|
365
|
-
*/
|
|
366
|
-
export function initTranscriptDb(DatabaseConstructor, dbPath) {
|
|
367
|
-
const raw = new DatabaseConstructor(dbPath);
|
|
368
|
-
raw.pragma('journal_mode = WAL');
|
|
369
|
-
raw.exec(`
|
|
370
|
-
CREATE TABLE IF NOT EXISTS books_animator_transcripts (
|
|
371
|
-
id TEXT PRIMARY KEY,
|
|
372
|
-
content TEXT NOT NULL
|
|
373
|
-
)
|
|
374
|
-
`);
|
|
375
|
-
const stmt = raw.prepare('INSERT OR REPLACE INTO books_animator_transcripts (id, content) VALUES (?, ?)');
|
|
376
|
-
return {
|
|
377
|
-
writeTranscript(sessionId, content) {
|
|
378
|
-
stmt.run(sessionId, content);
|
|
379
|
-
},
|
|
380
|
-
close() {
|
|
381
|
-
raw.close();
|
|
382
|
-
},
|
|
383
|
-
};
|
|
384
|
-
}
|
|
385
|
-
/**
|
|
386
|
-
* Write the current transcript to SQLite.
|
|
387
|
-
*/
|
|
388
|
-
export function writeTranscript(db, sessionId, messages) {
|
|
389
|
-
const content = JSON.stringify({ id: sessionId, messages });
|
|
390
|
-
db.writeTranscript(sessionId, content);
|
|
391
|
-
}
|
|
392
|
-
// ── Session lifecycle reporting ─────────────────────────────────────────
|
|
393
|
-
/**
|
|
394
|
-
* Report "running" status to the guild via the session-running tool.
|
|
395
|
-
*
|
|
396
|
-
* If the guild is unreachable, writes the payload to the DLQ.
|
|
397
|
-
*/
|
|
398
|
-
export async function reportRunning(config, cancelHandle, timeoutMs) {
|
|
399
|
-
const route = toolNameToRoute('session-running');
|
|
400
|
-
const url = `${config.guildToolUrl}${route}`;
|
|
401
|
-
const payload = {
|
|
402
|
-
sessionId: config.sessionId,
|
|
403
|
-
startedAt: config.startedAt,
|
|
404
|
-
provider: config.provider,
|
|
405
|
-
metadata: config.metadata,
|
|
406
|
-
cancelHandle,
|
|
407
|
-
};
|
|
408
|
-
try {
|
|
409
|
-
await callGuildHttpApi(url, config.sessionId, payload, timeoutMs);
|
|
410
|
-
}
|
|
411
|
-
catch {
|
|
412
|
-
writeToDlq(config.cwd, `${config.sessionId}-running.json`, payload);
|
|
413
|
-
}
|
|
414
|
-
}
|
|
415
|
-
/**
|
|
416
|
-
* Resolve the terminal status and error text for a terminated session,
|
|
417
|
-
* giving rate-limit detection precedence over the generic exit-code
|
|
418
|
-
* mapping.
|
|
419
|
-
*
|
|
420
|
-
* Cascade order (D5):
|
|
421
|
-
* 1. A `'cancelled'` override (SIGTERM path) — short-circuits.
|
|
422
|
-
* 2. A `terminationTag` already carried on the StreamJsonResult —
|
|
423
|
-
* set by the NDJSON-level cascade (first-wins across NDJSON and
|
|
424
|
-
* stderr observations in the babysitter).
|
|
425
|
-
* 3. A distinguished rate-limit exit code (RATE_LIMIT_EXIT_CODE).
|
|
426
|
-
* 4. Generic exit-code mapping (0 → completed, non-zero → failed).
|
|
427
|
-
*
|
|
428
|
-
* Returns both the payload status, a human-readable error string (only
|
|
429
|
-
* populated for the failed branches), and the tag that informed the
|
|
430
|
-
* decision (if any). The tag is forwarded to the guild so the Animator's
|
|
431
|
-
* back-off machine can disambiguate rate-limit terminations without
|
|
432
|
-
* pattern-matching on error text.
|
|
433
|
-
*/
|
|
434
|
-
export function resolveTerminalStatus(result, statusOverride) {
|
|
435
|
-
if (statusOverride === 'cancelled') {
|
|
436
|
-
return { status: 'cancelled' };
|
|
437
|
-
}
|
|
438
|
-
// Second priority: a structural tag observed by the NDJSON/stderr
|
|
439
|
-
// cascades. Fire even on exit code 0 because claude may emit the
|
|
440
|
-
// rate-limit signal and still exit cleanly.
|
|
441
|
-
if (result.terminationTag) {
|
|
442
|
-
return {
|
|
443
|
-
status: 'rate-limited',
|
|
444
|
-
error: result.terminationTag.detail ?? `Anima provider reported a rate limit (source: ${result.terminationTag.source})`,
|
|
445
|
-
terminationTag: result.terminationTag,
|
|
446
|
-
};
|
|
447
|
-
}
|
|
448
|
-
// Third priority: distinguished exit code.
|
|
449
|
-
const exitCodeTag = detectRateLimitFromExitCode(result.exitCode);
|
|
450
|
-
if (exitCodeTag) {
|
|
451
|
-
return {
|
|
452
|
-
status: 'rate-limited',
|
|
453
|
-
error: exitCodeTag.detail,
|
|
454
|
-
terminationTag: exitCodeTag,
|
|
455
|
-
};
|
|
456
|
-
}
|
|
457
|
-
if (result.exitCode === 0) {
|
|
458
|
-
return { status: 'completed' };
|
|
459
|
-
}
|
|
460
|
-
return {
|
|
461
|
-
status: 'failed',
|
|
462
|
-
error: `claude exited with code ${result.exitCode}`,
|
|
463
|
-
};
|
|
464
|
-
}
|
|
465
|
-
/**
|
|
466
|
-
* Report the final session result to the guild via the session-record tool.
|
|
467
|
-
*
|
|
468
|
-
* If the guild is unreachable, writes the payload to the DLQ.
|
|
469
|
-
*/
|
|
470
|
-
export async function reportResult(config, result, transcript, timeoutMs, statusOverride) {
|
|
471
|
-
const route = toolNameToRoute('session-record');
|
|
472
|
-
const url = `${config.guildToolUrl}${route}`;
|
|
473
|
-
const resolved = resolveTerminalStatus(result, statusOverride);
|
|
474
|
-
const output = extractFinalAssistantText(transcript);
|
|
475
|
-
const payload = {
|
|
476
|
-
sessionId: config.sessionId,
|
|
477
|
-
status: resolved.status,
|
|
478
|
-
exitCode: result.exitCode,
|
|
479
|
-
signal: result.signal,
|
|
480
|
-
error: resolved.error,
|
|
481
|
-
costUsd: result.costUsd,
|
|
482
|
-
tokenUsage: result.tokenUsage,
|
|
483
|
-
output,
|
|
484
|
-
providerSessionId: result.providerSessionId,
|
|
485
|
-
transcript,
|
|
486
|
-
...(resolved.terminationTag ? { terminationTag: resolved.terminationTag } : {}),
|
|
487
|
-
};
|
|
488
|
-
try {
|
|
489
|
-
await callGuildHttpApi(url, config.sessionId, payload, timeoutMs);
|
|
490
|
-
}
|
|
491
|
-
catch {
|
|
492
|
-
writeToDlq(config.cwd, `${config.sessionId}.json`, payload);
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
// ── Stderr redirect ────────────────────────────────────────────────────
|
|
496
|
-
/**
|
|
497
|
-
* Open a per-session log file and redirect process.stderr.write to it.
|
|
498
|
-
*
|
|
499
|
-
* Creates the logDir (recursive) and opens `<logDir>/<sessionId>.log`
|
|
500
|
-
* for append-writing. Replaces process.stderr.write with a function
|
|
501
|
-
* that calls fs.writeSync on the owned fd. Writes the startup banner
|
|
502
|
-
* as the first line.
|
|
503
|
-
*
|
|
504
|
-
* Returns the owned fd so the caller can close it in a finally block.
|
|
505
|
-
*
|
|
506
|
-
* @internal Exported for testing only.
|
|
507
|
-
*/
|
|
508
|
-
export function redirectStderrToFile(logDir, sessionId) {
|
|
509
|
-
fs.mkdirSync(logDir, { recursive: true });
|
|
510
|
-
const logFilePath = path.join(logDir, `${sessionId}.log`);
|
|
511
|
-
const fd = fs.openSync(logFilePath, 'a');
|
|
512
|
-
// Replace process.stderr.write with a function that writes to our fd.
|
|
513
|
-
process.stderr.write = function (chunk, encodingOrCallback, callback) {
|
|
514
|
-
const encoding = typeof encodingOrCallback === 'string' ? encodingOrCallback : 'utf8';
|
|
515
|
-
const cb = typeof encodingOrCallback === 'function' ? encodingOrCallback : callback;
|
|
516
|
-
const buffer = typeof chunk === 'string'
|
|
517
|
-
? Buffer.from(chunk, encoding)
|
|
518
|
-
: chunk;
|
|
519
|
-
fs.writeSync(fd, buffer);
|
|
520
|
-
if (cb)
|
|
521
|
-
cb();
|
|
522
|
-
return true;
|
|
523
|
-
};
|
|
524
|
-
// Write the startup banner (now goes to the log file).
|
|
525
|
-
const pgid = process.getgid?.() ?? process.pid;
|
|
526
|
-
process.stderr.write(`[babysitter] session=${sessionId} pid=${process.pid} pgid=${pgid} log=${logFilePath} started at ${new Date().toISOString()}\n`);
|
|
527
|
-
return fd;
|
|
528
|
-
}
|
|
529
209
|
// ── Main babysitter function ────────────────────────────────────────────
|
|
530
210
|
/**
|
|
531
211
|
* Run the session babysitter.
|
|
@@ -581,21 +261,20 @@ export async function runBabysitter(config, deps) {
|
|
|
581
261
|
claudeProc.stdin.write(config.prompt);
|
|
582
262
|
}
|
|
583
263
|
claudeProc.stdin.end();
|
|
584
|
-
//
|
|
585
|
-
//
|
|
586
|
-
//
|
|
264
|
+
// Forward claude's stderr bytes to the babysitter's redirected
|
|
265
|
+
// stderr log. No detection happens here — rate-limit signals are
|
|
266
|
+
// detected only on structured NDJSON messages inside
|
|
267
|
+
// parseStreamJsonMessage.
|
|
587
268
|
//
|
|
588
|
-
//
|
|
589
|
-
//
|
|
590
|
-
//
|
|
591
|
-
//
|
|
269
|
+
// Also maintain a rolling tail buffer (last
|
|
270
|
+
// STDERR_DIAGNOSTIC_TAIL_LIMIT chars) — used as the `stderrExcerpt`
|
|
271
|
+
// of the passive `terminationDiagnostic` attached when the session
|
|
272
|
+
// ends with `'failed'`. O(1) per chunk: append then slice the tail.
|
|
273
|
+
let stderrTail = '';
|
|
592
274
|
claudeProc.stderr?.on('data', (chunk) => {
|
|
593
|
-
if (!acc.terminationTag) {
|
|
594
|
-
const tag = detectRateLimitFromStderr(chunk.toString());
|
|
595
|
-
if (tag)
|
|
596
|
-
acc.terminationTag = tag;
|
|
597
|
-
}
|
|
598
275
|
process.stderr.write(chunk);
|
|
276
|
+
const text = chunk.toString('utf8');
|
|
277
|
+
stderrTail = (stderrTail + text).slice(-STDERR_DIAGNOSTIC_TAIL_LIMIT);
|
|
599
278
|
});
|
|
600
279
|
// 5. Report "running" status (don't await — fire and forget with retry)
|
|
601
280
|
const cancelHandle = { kind: 'local-pgid', pgid: process.pid };
|
|
@@ -684,7 +363,7 @@ export async function runBabysitter(config, deps) {
|
|
|
684
363
|
...(acc.terminationTag ? { terminationTag: acc.terminationTag } : {}),
|
|
685
364
|
};
|
|
686
365
|
// 8. Report result
|
|
687
|
-
await reportResult(config, result, acc.transcript, retryTimeoutMs, cancelledBySignal ? 'cancelled' : undefined);
|
|
366
|
+
await reportResult(config, result, acc.transcript, retryTimeoutMs, cancelledBySignal ? 'cancelled' : undefined, stderrTail);
|
|
688
367
|
}
|
|
689
368
|
catch (err) {
|
|
690
369
|
// Top-level error: attempt to report failure
|