qwen-agent-server 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +211 -0
- package/dist/backends.js +444 -0
- package/dist/backends.js.map +1 -0
- package/dist/embed.js +92 -0
- package/dist/embed.js.map +1 -0
- package/dist/extensions.js +497 -0
- package/dist/extensions.js.map +1 -0
- package/dist/log.js +21 -0
- package/dist/log.js.map +1 -0
- package/dist/openai-compat.js +147 -0
- package/dist/openai-compat.js.map +1 -0
- package/dist/permissions.js +71 -0
- package/dist/permissions.js.map +1 -0
- package/dist/pool.js +155 -0
- package/dist/pool.js.map +1 -0
- package/dist/rerank.js +93 -0
- package/dist/rerank.js.map +1 -0
- package/dist/server.js +1050 -0
- package/dist/server.js.map +1 -0
- package/dist/session.js +649 -0
- package/dist/session.js.map +1 -0
- package/dist/shutdown.js +68 -0
- package/dist/shutdown.js.map +1 -0
- package/dist/threads.js +218 -0
- package/dist/threads.js.map +1 -0
- package/dist/tokenize.js +90 -0
- package/dist/tokenize.js.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/dist/version.js +13 -0
- package/dist/version.js.map +1 -0
- package/dist/vision.js +293 -0
- package/dist/vision.js.map +1 -0
- package/package.json +42 -0
package/dist/session.js
ADDED
|
@@ -0,0 +1,649 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// QwenSession — per-task state machine wrapping one @qwen-code/sdk query().
|
|
4
|
+
//
|
|
5
|
+
// Conversation model: multi-turn via an async-generator prompt that the
|
|
6
|
+
// supervisor controls. The generator yields each user message as it
|
|
7
|
+
// arrives via send(); when the queue is empty, it awaits a resolver
|
|
8
|
+
// that send() / stop() flip. After each turn the SDK emits a `result`
|
|
9
|
+
// message; we transition state running → idle and stay there until the
|
|
10
|
+
// caller calls send() (push the next user message) or stop() (terminate).
|
|
11
|
+
//
|
|
12
|
+
// Critical pins (RDR-001):
|
|
13
|
+
// §Q1 ask_user_question is EXCLUDED from the inner Qwen's tool surface
|
|
14
|
+
// by default. Answer delivery happens via streamInput user turns,
|
|
15
|
+
// not via canUseTool. The original deny-with-message and
|
|
16
|
+
// deny-then-streamInput-tool_result patterns were both empirically
|
|
17
|
+
// confirmed to fail (probe-tool-result.mjs, 2026-05-04).
|
|
18
|
+
// §Q3 KV-cache affinity: session.backend is pinned at construction and
|
|
19
|
+
// NEVER reassigned (phase-6 review gate greps for re-assignment).
|
|
20
|
+
// §S4 permissionMode='yolo' only when write_authority===true; otherwise
|
|
21
|
+
// 'default' with canUseTool callback emitting permission_denied
|
|
22
|
+
// events for write-tool denials (visible to the caller via poll).
|
|
23
|
+
import { randomBytes } from "node:crypto";
|
|
24
|
+
import { query } from "@qwen-code/sdk";
|
|
25
|
+
import { createLogger } from "./log.js";
|
|
26
|
+
import { makeCanUseTool } from "./permissions.js";
|
|
27
|
+
const log = createLogger("qwen-session");
|
|
28
|
+
// ─────────────────────────────────────────────────────────────────
|
|
29
|
+
// Constants
|
|
30
|
+
/** Maximum events kept in the ring buffer; oldest evicted on overflow. */
|
|
31
|
+
const RING_BUFFER_CAP = 1_000;
|
|
32
|
+
/** Default max events returned per poll call. */
|
|
33
|
+
const DEFAULT_MAX_EVENTS = 16;
|
|
34
|
+
/** Timeout for canUseTool callback (ms). 10 min — RDR §Critical Pins.
|
|
35
|
+
* The SDK default of 60 s is too tight for human-in-the-loop. */
|
|
36
|
+
const CAN_USE_TOOL_TIMEOUT_MS = 600_000;
|
|
37
|
+
/** Tools always excluded from the inner Qwen's surface.
|
|
38
|
+
* - 'agent': prevents recursive sub-agent spawning (the supervisor IS
|
|
39
|
+
* the orchestration layer; nested Qwen sub-agents would be invisible
|
|
40
|
+
* to it). Override via opts.allow_subagents.
|
|
41
|
+
* - 'ask_user_question': RDR §Q1. The supervisor exposes `qwen_send`
|
|
42
|
+
* for multi-turn input; tool-based asks don't have a working
|
|
43
|
+
* answer-delivery channel given the SDK's deny semantics. The model
|
|
44
|
+
* is told via system prompt to ask in plain text and wait. */
|
|
45
|
+
const DEFAULT_EXCLUDED_TOOLS = ["agent", "ask_user_question"];
|
|
46
|
+
/** System prompt fragment instructing the inner Qwen on the
|
|
47
|
+
* multi-turn / no-ask_user_question contract. Always prepended. */
|
|
48
|
+
const COPROCESSOR_PREAMBLE = `You are operating as a coprocessor under a supervisor that runs you in
|
|
49
|
+
multi-turn mode. Important contract:
|
|
50
|
+
|
|
51
|
+
- The 'ask_user_question' tool is NOT available to you. If you need
|
|
52
|
+
clarification from the user, ask in plain text in your response and
|
|
53
|
+
stop generating. The user will see your question and reply in their
|
|
54
|
+
next message. Do not loop indefinitely on hypotheticals — when you
|
|
55
|
+
need input, ask and wait.
|
|
56
|
+
- Each user message you receive may be a fresh task or a follow-up to
|
|
57
|
+
a prior turn. Treat the conversation as continuous.
|
|
58
|
+
- If you have completed the user's request, simply say so and stop.`;
|
|
59
|
+
// ─────────────────────────────────────────────────────────────────
|
|
60
|
+
// Event ID counter (monotonic per process)
|
|
61
|
+
let _eventSeq = 0;
|
|
62
|
+
function nextEventId() {
|
|
63
|
+
return String(++_eventSeq);
|
|
64
|
+
}
|
|
65
|
+
/** Test-only: reset the event sequence counter. */
|
|
66
|
+
export function _resetEventSeq() {
|
|
67
|
+
_eventSeq = 0;
|
|
68
|
+
}
|
|
69
|
+
// ─────────────────────────────────────────────────────────────────
|
|
70
|
+
// QwenSession
|
|
71
|
+
export class QwenSession {
|
|
72
|
+
task_id;
|
|
73
|
+
// §Q3 KV-cache affinity: backend pinned at construction; NEVER reassigned.
|
|
74
|
+
backend; // eslint-disable-line -- single assignment; phase-6 grep gate
|
|
75
|
+
_state = "running";
|
|
76
|
+
_events = [];
|
|
77
|
+
_last_message;
|
|
78
|
+
_last_user_message;
|
|
79
|
+
_last_assistant_summary;
|
|
80
|
+
_result;
|
|
81
|
+
_error;
|
|
82
|
+
_turns_completed = 0;
|
|
83
|
+
_sdkIter = null;
|
|
84
|
+
_abortController;
|
|
85
|
+
// ── Session budget (RDR-002 §Session budget, 2026-05-09 amendment) ─
|
|
86
|
+
// Caps are zero-disabled; defaults are applied in the wiring layer
|
|
87
|
+
// (server.ts / config), not here. The budget tracks accumulated
|
|
88
|
+
// tool_result content and tool_call count — the two knobs that
|
|
89
|
+
// actually correlate with the Prime-Mover-style ECONNRESET crash we
|
|
90
|
+
// saw in the 2026-05-09 shakeout.
|
|
91
|
+
_maxContextTokens;
|
|
92
|
+
_maxToolCalls;
|
|
93
|
+
_accumulatedToolResultChars = 0;
|
|
94
|
+
_toolCallCount = 0;
|
|
95
|
+
_emittedPressure = new Set();
|
|
96
|
+
// v0.8: thinking-mode and JSON-schema controls. See SpawnOpts.
|
|
97
|
+
_thinkingMode;
|
|
98
|
+
// Multi-turn input queue + waker for the async-generator prompt.
|
|
99
|
+
_inputQueue = [];
|
|
100
|
+
_inputResolver = null;
|
|
101
|
+
_inputClosed = false;
|
|
102
|
+
constructor(backend, prompt, opts, infra, resolvedExtensions) {
|
|
103
|
+
this.task_id = `q-${randomBytes(4).toString("hex")}`;
|
|
104
|
+
this.backend = backend;
|
|
105
|
+
this.write_authority = opts.write_authority === true;
|
|
106
|
+
this._abortController = new AbortController();
|
|
107
|
+
this._last_user_message = prompt;
|
|
108
|
+
// Zero/undefined disables the cap; defaults flow in from server.ts.
|
|
109
|
+
this._maxContextTokens = opts.max_context_tokens ?? 0;
|
|
110
|
+
this._maxToolCalls = opts.max_tool_calls ?? 0;
|
|
111
|
+
// Default thinking_mode to false (RDR-002 v0.8 amendment) — Qwen3.6
|
|
112
|
+
// ships with thinking ON which causes ~6× output bloat in dispatch
|
|
113
|
+
// workloads (Artificial Analysis 2026-04). Caller can opt back in.
|
|
114
|
+
this._thinkingMode = opts.thinking_mode === true;
|
|
115
|
+
// RDR-002 step 11: extensions_loaded is the first event in the
|
|
116
|
+
// session's log when a resolution is provided. Populating before
|
|
117
|
+
// _run() means qwen_poll surfaces it immediately, even before the
|
|
118
|
+
// SDK emits its first message.
|
|
119
|
+
if (resolvedExtensions !== undefined) {
|
|
120
|
+
this.pushEvent("extensions_loaded", describeResolvedExtensions(resolvedExtensions.resolved), { resolved: resolvedExtensions.resolved });
|
|
121
|
+
}
|
|
122
|
+
// Seed the queue with the initial user message. _mkUserMessage
|
|
123
|
+
// applies the /no_think prefix when thinking_mode is disabled.
|
|
124
|
+
this._inputQueue.push(this._mkUserMessage(prompt));
|
|
125
|
+
// Build excludeTools list. allow_subagents removes 'agent' from the
|
|
126
|
+
// default exclude list; ask_user_question is always excluded.
|
|
127
|
+
const excludeTools = opts.allow_subagents === true
|
|
128
|
+
? DEFAULT_EXCLUDED_TOOLS.filter((t) => t !== "agent")
|
|
129
|
+
: [...DEFAULT_EXCLUDED_TOOLS];
|
|
130
|
+
// Build system prompt: coprocessor preamble + caller's system +
|
|
131
|
+
// prior_context + optional JSON-schema directive when opts.json_schema
|
|
132
|
+
// is supplied (RDR-002 v0.8 amendment).
|
|
133
|
+
const systemPrompt = buildSystemPrompt(opts.system, opts.prior_context, opts.json_schema);
|
|
134
|
+
// §S4 Permission mode: yolo only when write_authority===true.
|
|
135
|
+
const permissionMode = opts.write_authority === true ? "yolo" : "default";
|
|
136
|
+
// RDR-002 wrapper bridge: when both fields are populated, route the
|
|
137
|
+
// SDK's qwen invocation through the wrapper script and forward the
|
|
138
|
+
// resolved real-binary path via env. Empty strings fall through to
|
|
139
|
+
// default SDK behaviour so existing tests that don't configure
|
|
140
|
+
// infra (and unit-test constructions in general) stay unaffected.
|
|
141
|
+
const bridgeActive = infra !== undefined && infra.qwenRealBin !== "" && infra.wrapperPath !== "";
|
|
142
|
+
const env = {
|
|
143
|
+
OPENAI_BASE_URL: backend.url,
|
|
144
|
+
OPENAI_API_KEY: process.env["OPENAI_API_KEY"] ?? "sk-local",
|
|
145
|
+
QWEN_MODEL: backend.model,
|
|
146
|
+
};
|
|
147
|
+
if (bridgeActive) {
|
|
148
|
+
env["QWEN_REAL_BIN"] = infra.qwenRealBin;
|
|
149
|
+
}
|
|
150
|
+
// RDR-006 4yx: forward a per-turn output-token floor to the inner qwen-code
|
|
151
|
+
// so Arm A (via supervisor) isn't output-starved relative to Arm B (which
|
|
152
|
+
// sets this env directly). Distinct from max_context_tokens. >0 guard so a
|
|
153
|
+
// 0/unset never writes a starving cap.
|
|
154
|
+
if (opts.max_output_tokens !== undefined && opts.max_output_tokens > 0) {
|
|
155
|
+
env["QWEN_CODE_MAX_OUTPUT_TOKENS"] = String(opts.max_output_tokens);
|
|
156
|
+
}
|
|
157
|
+
// RDR-006 40v.13: isolate the INNER qwen's HOME (clean throwaway config)
|
|
158
|
+
// without touching the supervisor's own HOME, which resolves its backend
|
|
159
|
+
// registry. The SDK merges this env over process.env, so setting HOME here
|
|
160
|
+
// overrides the inherited one for the inner process only.
|
|
161
|
+
if (opts.home !== undefined && opts.home !== "") {
|
|
162
|
+
env["HOME"] = opts.home;
|
|
163
|
+
}
|
|
164
|
+
// RDR-002 step 8: render the resolved extension set into the env
|
|
165
|
+
// var the wrapper reads. envValue===null means "leave-defaults"
|
|
166
|
+
// (wrapper drops --extensions). Setting QWEN_AGENT_EXTENSIONS only
|
|
167
|
+
// when bridgeActive avoids leaking it to non-bridged tests.
|
|
168
|
+
if (bridgeActive && resolvedExtensions?.envValue !== undefined && resolvedExtensions.envValue !== null) {
|
|
169
|
+
env["QWEN_AGENT_EXTENSIONS"] = resolvedExtensions.envValue;
|
|
170
|
+
}
|
|
171
|
+
const queryOptions = {
|
|
172
|
+
cwd: opts.cwd ?? process.cwd(),
|
|
173
|
+
model: backend.model,
|
|
174
|
+
env,
|
|
175
|
+
authType: "openai",
|
|
176
|
+
permissionMode,
|
|
177
|
+
excludeTools,
|
|
178
|
+
abortController: this._abortController,
|
|
179
|
+
timeout: { canUseTool: CAN_USE_TOOL_TIMEOUT_MS },
|
|
180
|
+
// canUseTool only registered in 'default' mode; yolo ignores it.
|
|
181
|
+
...(permissionMode === "default"
|
|
182
|
+
? { canUseTool: makeCanUseTool(this) }
|
|
183
|
+
: {}),
|
|
184
|
+
systemPrompt,
|
|
185
|
+
...(bridgeActive ? { pathToQwenExecutable: infra.wrapperPath } : {}),
|
|
186
|
+
};
|
|
187
|
+
this._sdkIter = query({
|
|
188
|
+
prompt: this._inputGenerator(),
|
|
189
|
+
options: queryOptions,
|
|
190
|
+
});
|
|
191
|
+
void this._run();
|
|
192
|
+
}
|
|
193
|
+
// ── Public accessors ──────────────────────────────────────────
|
|
194
|
+
get state() {
|
|
195
|
+
return this._state;
|
|
196
|
+
}
|
|
197
|
+
/** Bound at construction; mirrors the permissionMode decision. */
|
|
198
|
+
write_authority;
|
|
199
|
+
/** Number of fully completed turns. Read by `qwen_sessions` for
|
|
200
|
+
* operator overviews; the same counter appears in `last_known` on
|
|
201
|
+
* error PollResults. */
|
|
202
|
+
get turns_completed() {
|
|
203
|
+
return this._turns_completed;
|
|
204
|
+
}
|
|
205
|
+
/** Live budget snapshot — same shape that `poll()` embeds in its
|
|
206
|
+
* result. Exposed independently so `qwen_sessions` can build a
|
|
207
|
+
* multi-session overview without producing a full PollResult per
|
|
208
|
+
* session. */
|
|
209
|
+
budgetStats() {
|
|
210
|
+
return {
|
|
211
|
+
est_tokens: this._estTokens(),
|
|
212
|
+
max_tokens: this._maxContextTokens,
|
|
213
|
+
tool_calls: this._toolCallCount,
|
|
214
|
+
max_tool_calls: this._maxToolCalls,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
// ── Event ring buffer ─────────────────────────────────────────
|
|
218
|
+
/** Push an event into the ring; evict oldest when over cap. */
|
|
219
|
+
pushEvent(type, summary, data) {
|
|
220
|
+
const ev = {
|
|
221
|
+
id: nextEventId(),
|
|
222
|
+
type,
|
|
223
|
+
ts: Date.now(),
|
|
224
|
+
summary,
|
|
225
|
+
data,
|
|
226
|
+
};
|
|
227
|
+
this._events.push(ev);
|
|
228
|
+
if (this._events.length > RING_BUFFER_CAP) {
|
|
229
|
+
this._events.shift();
|
|
230
|
+
}
|
|
231
|
+
return ev;
|
|
232
|
+
}
|
|
233
|
+
// ── State transitions ─────────────────────────────────────────
|
|
234
|
+
/** Push a new user message into the conversation.
|
|
235
|
+
* Wakes the input generator so the SDK pulls the message and starts
|
|
236
|
+
* the next turn.
|
|
237
|
+
*
|
|
238
|
+
* Throws if the session is `complete` or `error` (terminal).
|
|
239
|
+
* Permitted in `running` (queues for after current turn) or `idle`
|
|
240
|
+
* (immediate next turn). */
|
|
241
|
+
send(answer) {
|
|
242
|
+
if (this._state === "complete" || this._state === "error") {
|
|
243
|
+
throw new Error(`session ${this.task_id} is ${this._state}; cannot send`);
|
|
244
|
+
}
|
|
245
|
+
this._inputQueue.push(this._mkUserMessage(answer));
|
|
246
|
+
this._last_user_message = answer;
|
|
247
|
+
this._state = "running";
|
|
248
|
+
this._wakeInput();
|
|
249
|
+
}
|
|
250
|
+
/** Cancel the running SDK iterator and close the input stream.
|
|
251
|
+
* Idempotent — safe to call repeatedly. */
|
|
252
|
+
stop() {
|
|
253
|
+
this._inputClosed = true;
|
|
254
|
+
this._abortController.abort();
|
|
255
|
+
if (this._sdkIter) {
|
|
256
|
+
// AbortController.abort() is the real cancel signal; this
|
|
257
|
+
// return() is belt-and-suspenders. If the SDK ever makes it
|
|
258
|
+
// async-and-failable, surface the rejection in the structured
|
|
259
|
+
// log instead of letting it bubble to unhandledRejection (which
|
|
260
|
+
// can terminate the process in newer Node versions).
|
|
261
|
+
this._sdkIter.return?.().catch((err) => {
|
|
262
|
+
log.warn({ task_id: this.task_id, err: err instanceof Error ? err.message : String(err) }, "sdkIter.return() rejected during stop()");
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
this._wakeInput();
|
|
266
|
+
if (this._state !== "error") {
|
|
267
|
+
this._state = "complete";
|
|
268
|
+
if (this._result === undefined) {
|
|
269
|
+
this._result = this._last_message ?? "";
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// ── Poll ──────────────────────────────────────────────────────
|
|
274
|
+
poll(opts) {
|
|
275
|
+
const maxEvents = opts.max_events ?? DEFAULT_MAX_EVENTS;
|
|
276
|
+
const since = opts.since;
|
|
277
|
+
// Find events after the cursor. Event IDs are numeric strings
|
|
278
|
+
// (String(++_eventSeq)) — compare numerically to avoid the lexicographic
|
|
279
|
+
// 9→10 boundary trap where "10" < "9" silently breaks incremental polling.
|
|
280
|
+
const sinceNum = since !== undefined ? Number(since) : undefined;
|
|
281
|
+
let slice;
|
|
282
|
+
if (sinceNum === undefined) {
|
|
283
|
+
slice = this._events.slice(-maxEvents);
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
const startIdx = this._events.findIndex((e) => Number(e.id) > sinceNum);
|
|
287
|
+
if (startIdx === -1) {
|
|
288
|
+
slice = [];
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
const available = this._events.slice(startIdx);
|
|
292
|
+
slice = available.slice(0, maxEvents);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
const hasMore = sinceNum !== undefined
|
|
296
|
+
? (() => {
|
|
297
|
+
const startIdx = this._events.findIndex((e) => Number(e.id) > sinceNum);
|
|
298
|
+
return startIdx !== -1 && this._events.slice(startIdx).length > maxEvents;
|
|
299
|
+
})()
|
|
300
|
+
: this._events.length > maxEvents;
|
|
301
|
+
const latestId = this._events.length > 0 ? (this._events[this._events.length - 1].id) : "0";
|
|
302
|
+
const result = {
|
|
303
|
+
state: this._state,
|
|
304
|
+
recent_events: slice,
|
|
305
|
+
more_events_available: hasMore,
|
|
306
|
+
latest_event_id: latestId,
|
|
307
|
+
// Live budget counters (RDR-002 v0.6 amendment). Always set so
|
|
308
|
+
// pollers don't have to special-case post-abort sessions; both
|
|
309
|
+
// caps are zero-disabled per the SessionBudgetStats contract.
|
|
310
|
+
budget: {
|
|
311
|
+
est_tokens: this._estTokens(),
|
|
312
|
+
max_tokens: this._maxContextTokens,
|
|
313
|
+
tool_calls: this._toolCallCount,
|
|
314
|
+
max_tool_calls: this._maxToolCalls,
|
|
315
|
+
},
|
|
316
|
+
};
|
|
317
|
+
if ((this._state === "idle" || this._state === "complete") && this._last_message !== undefined) {
|
|
318
|
+
result.last_message = this._last_message;
|
|
319
|
+
}
|
|
320
|
+
if (this._state === "complete" && this._result !== undefined) {
|
|
321
|
+
result.result = this._result;
|
|
322
|
+
}
|
|
323
|
+
if (this._state === "error") {
|
|
324
|
+
if (this._error !== undefined) {
|
|
325
|
+
result.error = this._error;
|
|
326
|
+
}
|
|
327
|
+
const lastKnown = {
|
|
328
|
+
turns_completed: this._turns_completed,
|
|
329
|
+
};
|
|
330
|
+
if (this._last_user_message !== undefined) {
|
|
331
|
+
lastKnown.last_user_message = this._last_user_message;
|
|
332
|
+
}
|
|
333
|
+
if (this._last_assistant_summary !== undefined) {
|
|
334
|
+
lastKnown.last_assistant_summary = this._last_assistant_summary;
|
|
335
|
+
}
|
|
336
|
+
result.last_known = lastKnown;
|
|
337
|
+
}
|
|
338
|
+
return result;
|
|
339
|
+
}
|
|
340
|
+
// ── Internals ─────────────────────────────────────────────────
|
|
341
|
+
_mkUserMessage(text) {
|
|
342
|
+
// RDR-002 v0.8: Qwen3.6's documented mechanism for skipping
|
|
343
|
+
// chain-of-thought is the `/no_think` directive on the user
|
|
344
|
+
// message. Apply per-message rather than once-per-session because
|
|
345
|
+
// the chat template renders directives on the message they
|
|
346
|
+
// accompany; subsequent turns need their own prefix.
|
|
347
|
+
const out = this._thinkingMode ? text : `/no_think\n\n${text}`;
|
|
348
|
+
return {
|
|
349
|
+
type: "user",
|
|
350
|
+
session_id: this.task_id,
|
|
351
|
+
parent_tool_use_id: null,
|
|
352
|
+
message: {
|
|
353
|
+
role: "user",
|
|
354
|
+
content: [{ type: "text", text: out }],
|
|
355
|
+
},
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
// Single-slot resolver. The current send() → _wakeInput() →
|
|
359
|
+
// _inputGenerator-resumes chain is race-free because JS micro-task
|
|
360
|
+
// ordering guarantees the generator awakens (and drops the resolver
|
|
361
|
+
// back to null) before any second send() can fire-and-set it.
|
|
362
|
+
// If send() ever becomes async, or a message-batch API is added, this
|
|
363
|
+
// single-slot design must become a queue of resolvers or a proper
|
|
364
|
+
// semaphore — otherwise back-to-back wakes between yields silently
|
|
365
|
+
// collapse to one.
|
|
366
|
+
_wakeInput() {
|
|
367
|
+
if (this._inputResolver) {
|
|
368
|
+
const resolve = this._inputResolver;
|
|
369
|
+
this._inputResolver = null;
|
|
370
|
+
resolve();
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
/** Async generator that the SDK consumes as `prompt`. Yields queued
|
|
374
|
+
* messages, blocks on a resolver when empty, and returns when
|
|
375
|
+
* stop() flips _inputClosed. */
|
|
376
|
+
async *_inputGenerator() {
|
|
377
|
+
while (true) {
|
|
378
|
+
// Drain queue first.
|
|
379
|
+
while (this._inputQueue.length > 0) {
|
|
380
|
+
const msg = this._inputQueue.shift();
|
|
381
|
+
if (msg !== undefined)
|
|
382
|
+
yield msg;
|
|
383
|
+
}
|
|
384
|
+
if (this._inputClosed)
|
|
385
|
+
return;
|
|
386
|
+
// Wait for someone to push a message or close the input.
|
|
387
|
+
await new Promise((resolve) => {
|
|
388
|
+
this._inputResolver = resolve;
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
// ── Session budget ────────────────────────────────────────────
|
|
393
|
+
//
|
|
394
|
+
// The chars/4 token estimate is intentionally crude — the SDK doesn't
|
|
395
|
+
// expose a tokenizer and we don't want to ship one. It runs ~25–30%
|
|
396
|
+
// hot for English prose against tiktoken, so 0.85 * ctx_size as the
|
|
397
|
+
// default cap leaves comfortable headroom even when the estimate is
|
|
398
|
+
// optimistic. The point is to fire visibly before the HTTP layer
|
|
399
|
+
// panics, not to be precise.
|
|
400
|
+
/** Returns the current chars/4 token estimate. */
|
|
401
|
+
_estTokens() {
|
|
402
|
+
return Math.floor(this._accumulatedToolResultChars / 4);
|
|
403
|
+
}
|
|
404
|
+
/** Emit a `context_pressure` event when the estimate first crosses
|
|
405
|
+
* 50%, 75%, or 90% of max_context_tokens. Each level fires once. */
|
|
406
|
+
_maybeEmitPressure() {
|
|
407
|
+
if (this._maxContextTokens <= 0)
|
|
408
|
+
return;
|
|
409
|
+
const est = this._estTokens();
|
|
410
|
+
const thresholds = [
|
|
411
|
+
[0.5, "warn"],
|
|
412
|
+
[0.75, "high"],
|
|
413
|
+
[0.9, "critical"],
|
|
414
|
+
];
|
|
415
|
+
for (const [pct, level] of thresholds) {
|
|
416
|
+
if (this._emittedPressure.has(level))
|
|
417
|
+
continue;
|
|
418
|
+
if (est >= this._maxContextTokens * pct) {
|
|
419
|
+
this._emittedPressure.add(level);
|
|
420
|
+
this.pushEvent("context_pressure", `context_pressure ${level}: ${est}/${this._maxContextTokens} est tokens, ${this._toolCallCount} tool calls`, {
|
|
421
|
+
level,
|
|
422
|
+
est_tokens: est,
|
|
423
|
+
max_tokens: this._maxContextTokens,
|
|
424
|
+
tool_calls: this._toolCallCount,
|
|
425
|
+
max_tool_calls: this._maxToolCalls,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
/** If either cap is exceeded, transition to error and stop the SDK
|
|
431
|
+
* iterator. Returns true when an abort was triggered so callers in
|
|
432
|
+
* `_run` can break out of their loop. Idempotent. */
|
|
433
|
+
_enforceBudget() {
|
|
434
|
+
if (this._state === "error")
|
|
435
|
+
return true; // already aborted
|
|
436
|
+
const est = this._estTokens();
|
|
437
|
+
const overTokens = this._maxContextTokens > 0 && est > this._maxContextTokens;
|
|
438
|
+
const overCalls = this._maxToolCalls > 0 && this._toolCallCount > this._maxToolCalls;
|
|
439
|
+
if (!overTokens && !overCalls)
|
|
440
|
+
return false;
|
|
441
|
+
const message = `session exceeded budget: est_tokens=${est}/${this._maxContextTokens || "off"}, tool_calls=${this._toolCallCount}/${this._maxToolCalls || "off"}`;
|
|
442
|
+
this._state = "error";
|
|
443
|
+
this._error = { code: "context_exceeded", message };
|
|
444
|
+
this.pushEvent("error", `context budget exceeded: ${message}`);
|
|
445
|
+
log.warn({
|
|
446
|
+
task_id: this.task_id,
|
|
447
|
+
event_type: "context_exceeded",
|
|
448
|
+
est_tokens: est,
|
|
449
|
+
max_tokens: this._maxContextTokens,
|
|
450
|
+
tool_calls: this._toolCallCount,
|
|
451
|
+
max_tool_calls: this._maxToolCalls,
|
|
452
|
+
}, "session aborted: budget exceeded");
|
|
453
|
+
// stop() preserves _state when it is already "error" — by design,
|
|
454
|
+
// see the existing guard in stop().
|
|
455
|
+
this.stop();
|
|
456
|
+
return true;
|
|
457
|
+
}
|
|
458
|
+
// ── SDK event loop ────────────────────────────────────────────
|
|
459
|
+
async _run() {
|
|
460
|
+
if (!this._sdkIter)
|
|
461
|
+
return;
|
|
462
|
+
try {
|
|
463
|
+
for await (const msg of this._sdkIter) {
|
|
464
|
+
if (msg.type === "assistant") {
|
|
465
|
+
// Capture & emit text content for model_message_summary.
|
|
466
|
+
const textBlocks = msg.message.content
|
|
467
|
+
.filter((b) => b.type === "text")
|
|
468
|
+
.map((b) => b.text)
|
|
469
|
+
.join(" ");
|
|
470
|
+
if (textBlocks.trim()) {
|
|
471
|
+
const summary = textBlocks.slice(0, 120);
|
|
472
|
+
this.pushEvent("model_message_summary", summary);
|
|
473
|
+
this._last_message = textBlocks;
|
|
474
|
+
this._last_assistant_summary = summary;
|
|
475
|
+
}
|
|
476
|
+
// Tool-use events.
|
|
477
|
+
for (const block of msg.message.content) {
|
|
478
|
+
if (block.type === "tool_use") {
|
|
479
|
+
this.pushEvent("tool_call", `tool_call: ${block.name}`, { name: block.name, id: block.id, input: block.input });
|
|
480
|
+
this._toolCallCount++;
|
|
481
|
+
if (this._enforceBudget())
|
|
482
|
+
return;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
else if (msg.type === "user") {
|
|
487
|
+
// Tool results coming back from the SDK's internal tool execution.
|
|
488
|
+
const content = msg.message.content;
|
|
489
|
+
if (Array.isArray(content)) {
|
|
490
|
+
for (const block of content) {
|
|
491
|
+
if (block.type === "tool_result") {
|
|
492
|
+
this.pushEvent("tool_result", `tool_result: ${block.tool_use_id}`, block);
|
|
493
|
+
this._accumulatedToolResultChars += measureToolResultChars(block);
|
|
494
|
+
this._maybeEmitPressure();
|
|
495
|
+
if (this._enforceBudget())
|
|
496
|
+
return;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
else if (msg.type === "result") {
|
|
502
|
+
// §Observability — log cache_read_input_tokens (RDR §Observability).
|
|
503
|
+
const usage = msg.usage;
|
|
504
|
+
if (usage?.cache_read_input_tokens !== undefined) {
|
|
505
|
+
log.info({
|
|
506
|
+
task_id: this.task_id,
|
|
507
|
+
cache_read_input_tokens: usage.cache_read_input_tokens,
|
|
508
|
+
input_tokens: usage.input_tokens,
|
|
509
|
+
output_tokens: usage.output_tokens,
|
|
510
|
+
}, "sdk result usage");
|
|
511
|
+
}
|
|
512
|
+
this._turns_completed++;
|
|
513
|
+
if (msg.is_error) {
|
|
514
|
+
this._state = "error";
|
|
515
|
+
const errMsg = msg.error?.message ?? msg.subtype;
|
|
516
|
+
this._error = { code: "backend_internal", message: errMsg };
|
|
517
|
+
this.pushEvent("error", `sdk error: ${errMsg}`);
|
|
518
|
+
return;
|
|
519
|
+
}
|
|
520
|
+
// Successful turn end — capture result text and transition to idle.
|
|
521
|
+
// Don't return; the for-await continues, the SDK pulls the next
|
|
522
|
+
// user message from our input generator (which blocks until
|
|
523
|
+
// send() pushes one or stop() closes).
|
|
524
|
+
const turnResult = msg.result;
|
|
525
|
+
if (turnResult !== undefined && turnResult !== "") {
|
|
526
|
+
this._last_message = turnResult;
|
|
527
|
+
this._result = turnResult;
|
|
528
|
+
}
|
|
529
|
+
this.pushEvent("turn_complete", `turn ${this._turns_completed} complete`);
|
|
530
|
+
this._state = "idle";
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
// Iterator naturally exhausted (only happens after stop()).
|
|
534
|
+
if (this._state === "running" || this._state === "idle") {
|
|
535
|
+
this._state = "complete";
|
|
536
|
+
if (this._result === undefined) {
|
|
537
|
+
this._result = this._last_message ?? "";
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
catch (err) {
|
|
542
|
+
if (this._state === "running" || this._state === "idle") {
|
|
543
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
544
|
+
this._state = "error";
|
|
545
|
+
this._error = { code: "backend_internal", message };
|
|
546
|
+
this.pushEvent("error", `sdk exception: ${message}`);
|
|
547
|
+
log.error({ task_id: this.task_id, err }, "sdk iterator error");
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
// ─────────────────────────────────────────────────────────────────
|
|
553
|
+
// Helpers
|
|
554
|
+
/**
|
|
555
|
+
* Best-effort char count of a tool_result block's content. The SDK
|
|
556
|
+
* permits either a plain string or an array of typed sub-blocks
|
|
557
|
+
* (`{type:"text",text}` or other content blocks); we sum text length
|
|
558
|
+
* across both shapes and fall back to the JSON string for anything
|
|
559
|
+
* unexpected. Slight over-counting is fine — the budget is a guardrail,
|
|
560
|
+
* not an accountant.
|
|
561
|
+
*/
|
|
562
|
+
function measureToolResultChars(block) {
|
|
563
|
+
const obj = block;
|
|
564
|
+
const c = obj?.content;
|
|
565
|
+
if (typeof c === "string")
|
|
566
|
+
return c.length;
|
|
567
|
+
if (Array.isArray(c)) {
|
|
568
|
+
let total = 0;
|
|
569
|
+
for (const part of c) {
|
|
570
|
+
if (typeof part === "string") {
|
|
571
|
+
total += part.length;
|
|
572
|
+
continue;
|
|
573
|
+
}
|
|
574
|
+
const p = part;
|
|
575
|
+
if (p && typeof p.text === "string") {
|
|
576
|
+
total += p.text.length;
|
|
577
|
+
}
|
|
578
|
+
else {
|
|
579
|
+
total += JSON.stringify(part).length;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return total;
|
|
583
|
+
}
|
|
584
|
+
if (c === undefined || c === null)
|
|
585
|
+
return 0;
|
|
586
|
+
try {
|
|
587
|
+
return JSON.stringify(c).length;
|
|
588
|
+
}
|
|
589
|
+
catch {
|
|
590
|
+
return 0;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
/**
|
|
594
|
+
* Render the resolved extension set into a one-line summary suitable
|
|
595
|
+
* for the extensions_loaded event's `summary` field. The full structured
|
|
596
|
+
* payload still goes into `data.resolved` for callers that want it.
|
|
597
|
+
*/
|
|
598
|
+
function describeResolvedExtensions(resolved) {
|
|
599
|
+
if (resolved === "leave-defaults")
|
|
600
|
+
return "extensions: leave-defaults (CLI defaults apply)";
|
|
601
|
+
if (resolved === "none")
|
|
602
|
+
return "extensions: none (explicitly disabled)";
|
|
603
|
+
if (resolved.length === 0)
|
|
604
|
+
return "extensions: none";
|
|
605
|
+
return `extensions: ${resolved.join(", ")}`;
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Build a system-prompt string from the coprocessor preamble, the
|
|
609
|
+
* caller's system, and optional prior_context.
|
|
610
|
+
*
|
|
611
|
+
* Prior context synthesis is text-faithful but lossy for prior tool
|
|
612
|
+
* calls (tool call history cannot be replayed against a new backend —
|
|
613
|
+
* see RDR §S2).
|
|
614
|
+
*/
|
|
615
|
+
function buildSystemPrompt(system, priorContext, jsonSchema) {
|
|
616
|
+
const parts = [COPROCESSOR_PREAMBLE];
|
|
617
|
+
if (priorContext) {
|
|
618
|
+
parts.push(`[Resuming prior session context]`);
|
|
619
|
+
parts.push(`Conversation summary:\n${priorContext.conversation_summary}`);
|
|
620
|
+
if (priorContext.last_user_message) {
|
|
621
|
+
parts.push(`Last user message:\n${priorContext.last_user_message}`);
|
|
622
|
+
}
|
|
623
|
+
if (priorContext.prior_session_id) {
|
|
624
|
+
parts.push(`Prior session ID: ${priorContext.prior_session_id}`);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
if (system) {
|
|
628
|
+
parts.push(system);
|
|
629
|
+
}
|
|
630
|
+
// RDR-002 v0.8: schema-as-system-prompt. The supervisor doesn't
|
|
631
|
+
// grammar-enforce yet (v0.9 candidate); for now we instruct the
|
|
632
|
+
// model and rely on Qwen3.6's well-documented JSON output reliability.
|
|
633
|
+
// qwen_oneshot wraps spawn + JSON.parse + retry to round out the
|
|
634
|
+
// surface for callers that want a Result-shaped return.
|
|
635
|
+
if (jsonSchema !== undefined) {
|
|
636
|
+
parts.push("[Output contract — JSON only]\n" +
|
|
637
|
+
"Your final assistant message must START with `{` or `[` and END\n" +
|
|
638
|
+
"with `}` or `]`. No preamble, no closing remarks, no explanatory\n" +
|
|
639
|
+
"text. ABSOLUTELY no markdown code fences (no triple backticks,\n" +
|
|
640
|
+
"no ```json wrappers). The very first character of your response\n" +
|
|
641
|
+
"must be `{` or `[`.\n\n" +
|
|
642
|
+
"The JSON must conform to this JSON Schema:\n\n" +
|
|
643
|
+
JSON.stringify(jsonSchema, null, 2) +
|
|
644
|
+
"\n\nIf the task cannot be completed, return a JSON object with\n" +
|
|
645
|
+
'`{"error": "<one-line explanation>"}` rather than free text.');
|
|
646
|
+
}
|
|
647
|
+
return parts.join("\n\n");
|
|
648
|
+
}
|
|
649
|
+
//# sourceMappingURL=session.js.map
|