@copilotkit/aimock 1.27.3 → 1.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +12 -0
- package/dist/config-loader.d.ts.map +1 -1
- package/dist/gemini.cjs +18 -8
- package/dist/gemini.cjs.map +1 -1
- package/dist/gemini.d.cts.map +1 -1
- package/dist/gemini.d.ts.map +1 -1
- package/dist/gemini.js +18 -8
- package/dist/gemini.js.map +1 -1
- package/dist/harmony.cjs +419 -0
- package/dist/harmony.cjs.map +1 -0
- package/dist/harmony.js +417 -0
- package/dist/harmony.js.map +1 -0
- package/dist/recorder.cjs +57 -13
- package/dist/recorder.cjs.map +1 -1
- package/dist/recorder.d.cts +6 -1
- package/dist/recorder.d.cts.map +1 -1
- package/dist/recorder.d.ts +6 -1
- package/dist/recorder.d.ts.map +1 -1
- package/dist/recorder.js +57 -13
- package/dist/recorder.js.map +1 -1
- package/dist/stream-collapse.cjs +219 -57
- package/dist/stream-collapse.cjs.map +1 -1
- package/dist/stream-collapse.d.cts +16 -0
- package/dist/stream-collapse.d.cts.map +1 -1
- package/dist/stream-collapse.d.ts +16 -0
- package/dist/stream-collapse.d.ts.map +1 -1
- package/dist/stream-collapse.js +219 -57
- package/dist/stream-collapse.js.map +1 -1
- package/dist/types.d.cts +9 -0
- package/dist/types.d.cts.map +1 -1
- package/dist/types.d.ts +9 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/vector-types.d.cts.map +1 -1
- package/dist/vector-types.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/harmony.cjs
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
|
|
2
|
+
//#region src/harmony.ts
|
|
3
|
+
const START_TOKEN = "<|start|>";
|
|
4
|
+
const END_TOKEN = "<|end|>";
|
|
5
|
+
const RETURN_TOKEN = "<|return|>";
|
|
6
|
+
const CALL_TOKEN = "<|call|>";
|
|
7
|
+
const CHANNEL_TOKEN = "<|channel|>";
|
|
8
|
+
const MESSAGE_TOKEN = "<|message|>";
|
|
9
|
+
const CONSTRAIN_TOKEN = "<|constrain|>";
|
|
10
|
+
const CONTROL_TOKENS = [
|
|
11
|
+
{
|
|
12
|
+
type: "START",
|
|
13
|
+
literal: START_TOKEN
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
type: "END",
|
|
17
|
+
literal: END_TOKEN
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
type: "RETURN",
|
|
21
|
+
literal: RETURN_TOKEN
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
type: "CALL",
|
|
25
|
+
literal: CALL_TOKEN
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
type: "CHANNEL",
|
|
29
|
+
literal: CHANNEL_TOKEN
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
type: "MESSAGE",
|
|
33
|
+
literal: MESSAGE_TOKEN
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
type: "CONSTRAIN",
|
|
37
|
+
literal: CONSTRAIN_TOKEN
|
|
38
|
+
}
|
|
39
|
+
];
|
|
40
|
+
const CONTROL_LITERAL = {
|
|
41
|
+
START: START_TOKEN,
|
|
42
|
+
END: END_TOKEN,
|
|
43
|
+
RETURN: RETURN_TOKEN,
|
|
44
|
+
CALL: CALL_TOKEN,
|
|
45
|
+
CHANNEL: CHANNEL_TOKEN,
|
|
46
|
+
MESSAGE: MESSAGE_TOKEN,
|
|
47
|
+
CONSTRAIN: CONSTRAIN_TOKEN
|
|
48
|
+
};
|
|
49
|
+
const RECIPIENT_RE = /to=functions\.([A-Za-z_][\w.-]*)/;
|
|
50
|
+
/**
|
|
51
|
+
* Cheap detection guard — only ATTEMPT a parse when a harmony structure looks
|
|
52
|
+
* present, i.e. a `<|channel|>` followed (anywhere after it) by a `<|message|>`,
|
|
53
|
+
* OR a `<|message|>` appearing after a `<|start|>`.
|
|
54
|
+
*
|
|
55
|
+
* This is a fast-path gate, NOT the authority on well-formedness: the state
|
|
56
|
+
* machine in {@link parseHarmonyContent} makes the real decision and is itself
|
|
57
|
+
* fully fail-safe. Requiring the token pairing keeps hosted/structured answers
|
|
58
|
+
* that merely MENTION a single token as prose out of the parser entirely.
|
|
59
|
+
*/
|
|
60
|
+
function isHarmonyContent(content) {
|
|
61
|
+
const channelIdx = content.indexOf(CHANNEL_TOKEN);
|
|
62
|
+
if (channelIdx !== -1) {
|
|
63
|
+
if (content.indexOf(MESSAGE_TOKEN, channelIdx + 11) !== -1) return true;
|
|
64
|
+
}
|
|
65
|
+
const startIdx = content.indexOf(START_TOKEN);
|
|
66
|
+
if (startIdx !== -1) {
|
|
67
|
+
if (content.indexOf(MESSAGE_TOKEN, startIdx + 9) !== -1) return true;
|
|
68
|
+
}
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Lex the accumulated content into an ordered token stream via a single
|
|
73
|
+
* left-to-right cursor walk. At each position, match a control token by exact
|
|
74
|
+
* prefix; otherwise accumulate bytes into the current TEXT run until the next
|
|
75
|
+
* control token or EOF.
|
|
76
|
+
*
|
|
77
|
+
* Bytes consumed into a TEXT span are NEVER re-scanned for control tokens, so
|
|
78
|
+
* an embedded literal "<|call|>"/"<|channel|>" inside a JSON string or prose is
|
|
79
|
+
* inert. The lexer NEVER throws; it always returns a complete token stream.
|
|
80
|
+
*/
|
|
81
|
+
function lex(raw) {
|
|
82
|
+
const tokens = [];
|
|
83
|
+
let cursor = 0;
|
|
84
|
+
let textStart = 0;
|
|
85
|
+
const flushText = (end) => {
|
|
86
|
+
if (end > textStart) tokens.push({
|
|
87
|
+
kind: "text",
|
|
88
|
+
value: raw.slice(textStart, end)
|
|
89
|
+
});
|
|
90
|
+
};
|
|
91
|
+
while (cursor < raw.length) {
|
|
92
|
+
let matched;
|
|
93
|
+
if (raw.startsWith("<|", cursor)) {
|
|
94
|
+
for (const tok of CONTROL_TOKENS) if (raw.startsWith(tok.literal, cursor)) {
|
|
95
|
+
matched = tok;
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (matched) {
|
|
100
|
+
flushText(cursor);
|
|
101
|
+
tokens.push({
|
|
102
|
+
kind: "control",
|
|
103
|
+
type: matched.type
|
|
104
|
+
});
|
|
105
|
+
cursor += matched.literal.length;
|
|
106
|
+
textStart = cursor;
|
|
107
|
+
} else cursor += 1;
|
|
108
|
+
}
|
|
109
|
+
flushText(raw.length);
|
|
110
|
+
return tokens;
|
|
111
|
+
}
|
|
112
|
+
/** True when `s` is empty or only whitespace. */
|
|
113
|
+
function isBlank(s) {
|
|
114
|
+
return s.trim().length === 0;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* True when `s` parses as a JSON OBJECT — a non-null, non-array `{...}` value.
|
|
118
|
+
*
|
|
119
|
+
* Harmony tool-call arguments are JSON OBJECTS. A bare JSON SCALAR (number /
|
|
120
|
+
* boolean / string / null) or ARRAY parses as valid JSON but is NOT a valid
|
|
121
|
+
* tool-call argument, so it must not terminate a tool call (fail-safe verbatim
|
|
122
|
+
* per the uniform contract). The object check (and ONLY the object check) is
|
|
123
|
+
* what makes a commentary `<|call|>` body a tool call; embedded token-shaped
|
|
124
|
+
* substrings INSIDE the object's string values remain valid data (matrix 13/14).
|
|
125
|
+
*/
|
|
126
|
+
function isToolArgsObject(s) {
|
|
127
|
+
let value;
|
|
128
|
+
try {
|
|
129
|
+
value = JSON.parse(s);
|
|
130
|
+
} catch {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Extract the channel name from the header text that follows `<|channel|>`. The
|
|
137
|
+
* channel name is the leading token, delimited by whitespace (the rest of the
|
|
138
|
+
* header carries optional `to=functions.NAME` routing). A `<|constrain|>` token
|
|
139
|
+
* is lexed separately, so it never appears inside this text.
|
|
140
|
+
*/
|
|
141
|
+
function headerChannel(headerText) {
|
|
142
|
+
return headerText.trim().split(/\s+/)[0] ?? "";
|
|
143
|
+
}
|
|
144
|
+
/** The harmony channels a real `<|channel|>` header may name. */
|
|
145
|
+
const KNOWN_CHANNELS = new Set([
|
|
146
|
+
"analysis",
|
|
147
|
+
"commentary",
|
|
148
|
+
"final"
|
|
149
|
+
]);
|
|
150
|
+
/**
|
|
151
|
+
* True when token index `idx` begins a well-formed harmony message header —
|
|
152
|
+
* used as lookahead to decide whether a `<|start|>` is a real message boundary
|
|
153
|
+
* (terminating the current body) or a literal `<|start|>` quoted inside a prose
|
|
154
|
+
* body. A real message header reaches a `<|message|>` token via the optional
|
|
155
|
+
* `START? role-TEXT? CHANNEL? header-TEXT? CONSTRAIN? constraint-TEXT?` prefix
|
|
156
|
+
* WITHOUT first crossing a body terminator (END/RETURN/CALL) or EOF.
|
|
157
|
+
*
|
|
158
|
+
* When the lookahead carries a `<|channel|>` header, the channel name it names
|
|
159
|
+
* must be a KNOWN harmony channel (analysis/commentary/final). A lookahead like
|
|
160
|
+
* `<|start|>...<|channel|>X<|message|>` whose X is unknown is NOT a real
|
|
161
|
+
* boundary — it narrows the quoted-message ambiguity so a body quoting a
|
|
162
|
+
* bogus-channel pseudo-message is not split on it. A channel-LESS header
|
|
163
|
+
* (`<|start|>role<|message|>...`) is unaffected (KNOWN_CHANNELS only gates a
|
|
164
|
+
* present `<|channel|>` name).
|
|
165
|
+
*/
|
|
166
|
+
function looksLikeMessageStart(tokens, idx) {
|
|
167
|
+
let k = idx;
|
|
168
|
+
if (tokens[k]?.kind === "control" && tokens[k].type === "START") {
|
|
169
|
+
k += 1;
|
|
170
|
+
if (tokens[k]?.kind === "text") k += 1;
|
|
171
|
+
}
|
|
172
|
+
if (tokens[k]?.kind === "control" && tokens[k].type === "CHANNEL") {
|
|
173
|
+
k += 1;
|
|
174
|
+
if (tokens[k]?.kind === "text") {
|
|
175
|
+
if (!KNOWN_CHANNELS.has(headerChannel(tokens[k].value))) return false;
|
|
176
|
+
k += 1;
|
|
177
|
+
}
|
|
178
|
+
if (tokens[k]?.kind === "control" && tokens[k].type === "CONSTRAIN") {
|
|
179
|
+
k += 1;
|
|
180
|
+
if (tokens[k]?.kind === "text") k += 1;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return tokens[k]?.kind === "control" && tokens[k].type === "MESSAGE";
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* True when the position right after a body terminator candidate (END/RETURN/
|
|
187
|
+
* CALL at the token before `idx`) is a REAL message boundary: either EOF
|
|
188
|
+
* (optionally preceded by whitespace-only TEXT spans) or the start of a
|
|
189
|
+
* well-formed next message. When false, the terminator candidate is a literal
|
|
190
|
+
* control token embedded in a prose body.
|
|
191
|
+
*/
|
|
192
|
+
function isRealBoundaryAfter(tokens, idx) {
|
|
193
|
+
let k = idx;
|
|
194
|
+
while (tokens[k]?.kind === "text" && tokens[k].value.trim().length === 0) k += 1;
|
|
195
|
+
if (k >= tokens.length) return true;
|
|
196
|
+
return looksLikeMessageStart(tokens, k);
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* True when the token at index `idx` is a NON-BLANK TEXT span — i.e. real prose
|
|
200
|
+
* follows. A control-token literal embedded in a non-tool body is only
|
|
201
|
+
* LEGITIMATELY prose when it is bracketed by real text (e.g. a final answer that
|
|
202
|
+
* quotes "the `<|end|>` token"); the lexer will have tokenized the quoted
|
|
203
|
+
* literal, and its immediate follower being non-blank prose is what makes it
|
|
204
|
+
* inert body text rather than structure. When the follower is instead another
|
|
205
|
+
* control token or EOF (or only whitespace), the literal is NOT embedded prose —
|
|
206
|
+
* it is a control token that would leak into routed content/reasoning, so the
|
|
207
|
+
* body must fail safe. This is the STRUCTURAL fail-safe invariant: it fires at
|
|
208
|
+
* absorption time on EVERY exit path, not per-exit, so a leak-shaped body can
|
|
209
|
+
* never reach the routing step regardless of how its loop terminates.
|
|
210
|
+
*/
|
|
211
|
+
function hasProseFollower(tokens, idx) {
|
|
212
|
+
const next = tokens[idx];
|
|
213
|
+
return next !== void 0 && next.kind === "text" && next.value.trim().length > 0;
|
|
214
|
+
}
|
|
215
|
+
/** Sentinel thrown internally to unwind to the uniform fail-safe path. */
|
|
216
|
+
const FAIL = Symbol("harmony-fail");
|
|
217
|
+
/**
|
|
218
|
+
* Walk the token stream against the harmony grammar and route each message by
|
|
219
|
+
* channel. Throws {@link FAIL} on ANY grammar deviation so {@link
|
|
220
|
+
* parseHarmonyContent} returns the original bytes verbatim (uniform
|
|
221
|
+
* all-or-nothing fail-safe). On success returns fully-routed channels.
|
|
222
|
+
*/
|
|
223
|
+
function parseTokens(tokens) {
|
|
224
|
+
let content = "";
|
|
225
|
+
let reasoning = "";
|
|
226
|
+
const toolCalls = [];
|
|
227
|
+
let i = 0;
|
|
228
|
+
const peek = () => tokens[i];
|
|
229
|
+
const fail = () => {
|
|
230
|
+
throw FAIL;
|
|
231
|
+
};
|
|
232
|
+
if (peek()?.kind === "text") {
|
|
233
|
+
const t = tokens[i];
|
|
234
|
+
const next = tokens[i + 1];
|
|
235
|
+
if (next !== void 0 && next.kind === "control" && (next.type === "START" || next.type === "CHANNEL")) {
|
|
236
|
+
if (!isBlank(t.value)) content += t.value;
|
|
237
|
+
i += 1;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
let sawMessage = false;
|
|
241
|
+
let nextIsQuotedSplit = false;
|
|
242
|
+
while (i < tokens.length) {
|
|
243
|
+
const tok = peek();
|
|
244
|
+
if (tok === void 0) break;
|
|
245
|
+
if (tok.kind === "text") {
|
|
246
|
+
if (isBlank(tok.value)) {
|
|
247
|
+
i += 1;
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
fail();
|
|
251
|
+
}
|
|
252
|
+
const fromQuotedSplit = nextIsQuotedSplit;
|
|
253
|
+
nextIsQuotedSplit = false;
|
|
254
|
+
let recipient;
|
|
255
|
+
let channel = "";
|
|
256
|
+
let sawHeader = false;
|
|
257
|
+
if (tok.kind === "control" && tok.type === "START") {
|
|
258
|
+
sawHeader = true;
|
|
259
|
+
i += 1;
|
|
260
|
+
if (peek()?.kind === "text") {
|
|
261
|
+
recipient = tokens[i].value.match(RECIPIENT_RE)?.[1];
|
|
262
|
+
i += 1;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
if (peek()?.kind === "control" && peek().type === "CHANNEL") {
|
|
266
|
+
sawHeader = true;
|
|
267
|
+
i += 1;
|
|
268
|
+
if (peek()?.kind === "text") {
|
|
269
|
+
const headerText = tokens[i].value;
|
|
270
|
+
channel = headerChannel(headerText);
|
|
271
|
+
const headerRecipient = headerText.match(RECIPIENT_RE)?.[1];
|
|
272
|
+
if (headerRecipient !== void 0) recipient = headerRecipient;
|
|
273
|
+
i += 1;
|
|
274
|
+
}
|
|
275
|
+
if (peek()?.kind === "control" && peek().type === "CONSTRAIN") {
|
|
276
|
+
i += 1;
|
|
277
|
+
if (peek()?.kind === "text") i += 1;
|
|
278
|
+
}
|
|
279
|
+
if (!KNOWN_CHANNELS.has(channel)) fail();
|
|
280
|
+
}
|
|
281
|
+
if (!sawHeader) fail();
|
|
282
|
+
if (!(peek()?.kind === "control" && peek().type === "MESSAGE")) fail();
|
|
283
|
+
i += 1;
|
|
284
|
+
const bodyStart = i;
|
|
285
|
+
if (recipient !== void 0 && channel === "commentary") {
|
|
286
|
+
let acc = "";
|
|
287
|
+
let j = bodyStart;
|
|
288
|
+
let parsed;
|
|
289
|
+
for (; j < tokens.length; j++) {
|
|
290
|
+
const t = tokens[j];
|
|
291
|
+
if (t.kind === "control" && t.type === "CALL") {
|
|
292
|
+
const candidate = acc.trim();
|
|
293
|
+
if (isToolArgsObject(candidate)) {
|
|
294
|
+
parsed = candidate;
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
acc += CONTROL_LITERAL.CALL;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
if (t.kind === "control") acc += CONTROL_LITERAL[t.type];
|
|
301
|
+
else acc += t.value;
|
|
302
|
+
}
|
|
303
|
+
if (parsed === void 0) fail();
|
|
304
|
+
i = j + 1;
|
|
305
|
+
toolCalls.push({
|
|
306
|
+
name: recipient,
|
|
307
|
+
arguments: parsed
|
|
308
|
+
});
|
|
309
|
+
sawMessage = true;
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
let body = "";
|
|
313
|
+
let terminated = false;
|
|
314
|
+
let reachedEof = false;
|
|
315
|
+
let absorbedControlLiteral = false;
|
|
316
|
+
let j = i;
|
|
317
|
+
for (; j < tokens.length; j++) {
|
|
318
|
+
const t = tokens[j];
|
|
319
|
+
if (t.kind === "control" && (t.type === "END" || t.type === "RETURN" || t.type === "CALL")) {
|
|
320
|
+
if (isRealBoundaryAfter(tokens, j + 1)) {
|
|
321
|
+
terminated = true;
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();
|
|
325
|
+
absorbedControlLiteral = true;
|
|
326
|
+
body += CONTROL_LITERAL[t.type];
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
if (t.kind === "control" && t.type === "START") {
|
|
330
|
+
if (looksLikeMessageStart(tokens, j)) {
|
|
331
|
+
terminated = true;
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();
|
|
335
|
+
absorbedControlLiteral = true;
|
|
336
|
+
body += CONTROL_LITERAL.START;
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
if (t.kind === "control" && (t.type === "CHANNEL" || t.type === "MESSAGE")) fail();
|
|
340
|
+
if (t.kind === "control") {
|
|
341
|
+
if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();
|
|
342
|
+
absorbedControlLiteral = true;
|
|
343
|
+
body += CONTROL_LITERAL[t.type];
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
body += t.value;
|
|
347
|
+
}
|
|
348
|
+
if (j >= tokens.length) reachedEof = true;
|
|
349
|
+
if (terminated) {
|
|
350
|
+
if (tokens[j].type === "START") {
|
|
351
|
+
i = j;
|
|
352
|
+
nextIsQuotedSplit = true;
|
|
353
|
+
} else i = j + 1;
|
|
354
|
+
routeBody(channel, body);
|
|
355
|
+
sawMessage = true;
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
if (reachedEof) {
|
|
359
|
+
if (channel === "analysis" || absorbedControlLiteral) fail();
|
|
360
|
+
i = j;
|
|
361
|
+
routeBody(channel, body);
|
|
362
|
+
sawMessage = true;
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
fail();
|
|
366
|
+
}
|
|
367
|
+
if (!sawMessage) fail();
|
|
368
|
+
return {
|
|
369
|
+
content,
|
|
370
|
+
reasoning,
|
|
371
|
+
toolCalls
|
|
372
|
+
};
|
|
373
|
+
function routeBody(ch, body) {
|
|
374
|
+
if (ch === "analysis") reasoning += body;
|
|
375
|
+
else content += body;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Parse harmony channel tokens out of an accumulated assistant `content`
|
|
380
|
+
* string, splitting them into final-channel content, analysis-channel
|
|
381
|
+
* reasoning, and commentary-channel tool calls. Pure function — no I/O.
|
|
382
|
+
*
|
|
383
|
+
* Callers should gate this behind {@link isHarmonyContent} so ordinary
|
|
384
|
+
* (already-structured) streams are never touched. Even so, this function is
|
|
385
|
+
* itself UNIFORM all-or-nothing fail-safe: on ANY structural/validation failure
|
|
386
|
+
* it returns `{ content: raw, reasoning: "", toolCalls: [], failed: true }` so
|
|
387
|
+
* the original content is preserved VERBATIM and the caller can surface a
|
|
388
|
+
* distinct `harmonyUnparsed` signal (NOT a dropped/truncated chunk).
|
|
389
|
+
*/
|
|
390
|
+
function parseHarmonyContent(raw) {
|
|
391
|
+
const tokens = lex(raw);
|
|
392
|
+
try {
|
|
393
|
+
const { content, reasoning, toolCalls } = parseTokens(tokens);
|
|
394
|
+
return {
|
|
395
|
+
content,
|
|
396
|
+
reasoning,
|
|
397
|
+
toolCalls,
|
|
398
|
+
failed: false
|
|
399
|
+
};
|
|
400
|
+
} catch (err) {
|
|
401
|
+
if (err === FAIL) return {
|
|
402
|
+
content: raw,
|
|
403
|
+
reasoning: "",
|
|
404
|
+
toolCalls: [],
|
|
405
|
+
failed: true
|
|
406
|
+
};
|
|
407
|
+
return {
|
|
408
|
+
content: raw,
|
|
409
|
+
reasoning: "",
|
|
410
|
+
toolCalls: [],
|
|
411
|
+
failed: true
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
//#endregion
|
|
417
|
+
exports.isHarmonyContent = isHarmonyContent;
|
|
418
|
+
exports.parseHarmonyContent = parseHarmonyContent;
|
|
419
|
+
//# sourceMappingURL=harmony.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"harmony.cjs","names":[],"sources":["../src/harmony.ts"],"sourcesContent":["/**\n * OpenAI harmony channel parsing for open-weight gpt-oss models.\n *\n * Hosted api.openai.com pre-parses harmony output into structured\n * `tool_calls` / `message.content`, but open-weight gpt-oss models served via\n * Ollama / vLLM / OpenRouter (i.e. whenever OPENAI_BASE_URL points at a\n * local/open-weights backend) stream tool calls as RAW harmony channel tokens\n * INSIDE `delta.content`. Without parsing, the recorded fixture leaks the\n * tool-call routing marker (`to=functions.NAME`) and its args JSON as plain\n * text content instead of capturing a structured tool call.\n *\n * Harmony grammar (authoritative, from OpenAI's harmony spec):\n * Special tokens: <|start|> <|end|> <|message|> <|channel|> <|constrain|>\n * <|return|> <|call|>\n * A message is laid out as:\n * <|start|>{role/recipient header}<|channel|>{channel header}<|message|>{body}{terminator}\n * where the leading <|start|> and/or <|channel|> may be absent on the very\n * first message of a stream, and the channel header carries the channel name\n * plus optional `to=functions.NAME` routing and `<|constrain|>json`.\n * Channels:\n * - analysis chain-of-thought -> reasoning\n * - commentary function/tool calls + preambles\n * - final user-facing answer -> content\n * A tool call is a `commentary`-channel message whose header (role segment OR\n * channel header) carries recipient routing `to=functions.NAME`; its args are\n * the JSON body after `<|message|>`, terminated by `<|call|>`. Example:\n * <|channel|>analysis<|message|>Need to call the tool.<|end|>\n * <|start|>assistant<|channel|>commentary to=functions.generate_a2ui\n * <|constrain|>json<|message|>{\"component\":\"card\",\"props\":{}}<|call|>\n * <|start|>assistant<|channel|>final<|message|>Here you go.<|return|>\n *\n * Implementation: a TWO-PHASE parser, NOT an indexOf scanner.\n *\n * Phase 1 — LEXER ({@link lex}). One left-to-right pass over the accumulated\n * content producing an ordered {@link Token}[]: each element is either a\n * CONTROL token (matched by exact prefix at the cursor) or a TEXT span (the\n * literal run between control tokens). Once bytes are consumed into a TEXT\n * span they are NEVER re-scanned for control tokens — so a literal\n * \"<|call|>\"/\"<|channel|>\" substring inside a JSON string or prose can never\n * be mistaken for structure. The lexer NEVER throws; it always returns a\n * complete token stream.\n *\n * Phase 2 — STATE MACHINE ({@link parseTokens}). Walks the token stream\n * against the harmony grammar:\n * Stream := TEXT? Message+ TEXT?\n * Message := START? Header? MESSAGE Body Terminator\n * Header := role-TEXT? CHANNEL header-TEXT?\n * Body := the token span following MESSAGE up to its real Terminator\n * Terminator := END | RETURN | CALL | (lookahead START) | EOF (final only)\n * The Terminator is located over the TOKEN STREAM, never via indexOf: a body\n * may re-materialize embedded control-token literals as prose (e.g. a final\n * answer that quotes \"`<|end|>`\"), so the real terminator is the first\n * END/RETURN/CALL (or a START that begins a well-formed next message) whose\n * follower is a real message boundary — EOF or a parseable next header. A\n * commentary tool body additionally requires Terminator==CALL AND a body that\n * parses as JSON; the \"first CALL whose preceding TEXT parses as valid JSON,\n * else fail-safe\" rule is preserved but operates over the token stream. A\n * CHANNEL header must name a known channel (analysis/commentary/final) and a\n * dangling CHANNEL/MESSAGE inside a body is a grammar deviation (fail-safe).\n *\n * Fail-safe contract: parsing is UNIFORM all-or-nothing. {@link\n * parseHarmonyContent} returns `failed:true` with `content` set to the ORIGINAL\n * raw input VERBATIM on ANY grammar deviation (TEXT-only / prose mention with\n * no Message, CHANNEL with no following MESSAGE, a tool body that is not valid\n * JSON or not CALL-terminated, an unterminated non-final body, a body\n * terminator followed by trailing non-message junk, or any leftover unexpected\n * token). There is EXACTLY ONE success path that strips tokens; it never\n * partial-strips and never leaks a control token into content/reasoning.\n * Harmony-present-but-unparseable is NOT transport loss — the caller preserves\n * the bytes verbatim and surfaces a distinct `harmonyUnparsed` signal rather\n * than `droppedChunks`/`truncated`.\n *\n * KNOWN LIMITATION — quoted whole-message ambiguity. Harmony tokens arrive as\n * detokenized TEXT, so a body that QUOTES a COMPLETE, well-formed harmony\n * message is structurally indistinguishable from two real messages. Example:\n * <|channel|>final<|message|>To emit write <|start|>assistant<|channel|>\n * final<|message|>hello<|return|>\n * The lexer cannot know the inner `<|start|>...<|message|>hello<|return|>` is a\n * quotation rather than a real second message, so this parses as TWO final\n * messages and the quoted control tokens are stripped (content \"To emit write\n * hello\"). This is the irreducible quoted-vs-real ambiguity; the parser does\n * NOT over-engineer a guess. The fail-safe contract still holds at its edges:\n * the split is only accepted when it yields cleanly well-formed messages — if\n * any resulting message is malformed (e.g. the quoted message is followed by\n * trailing junk, \"...hello<|return|> and then stop\"), the body terminator /\n * trailing-junk rule fails the WHOLE input safe (verbatim) rather than emit a\n * mangled middle. So the behavior is always verbatim-or-clean, never mangled.\n */\n\nimport type { ToolCall } from \"./types.js\";\n\n// Harmony special tokens.\nconst START_TOKEN = \"<|start|>\";\nconst END_TOKEN = \"<|end|>\";\nconst RETURN_TOKEN = \"<|return|>\";\nconst CALL_TOKEN = \"<|call|>\";\nconst CHANNEL_TOKEN = \"<|channel|>\";\nconst MESSAGE_TOKEN = \"<|message|>\";\nconst CONSTRAIN_TOKEN = \"<|constrain|>\";\n\n/** The seven harmony control-token kinds. */\ntype ControlType = \"START\" | \"END\" | \"RETURN\" | \"CALL\" | \"CHANNEL\" | \"MESSAGE\" | \"CONSTRAIN\";\n\n// Control tokens ordered for prefix matching at the cursor. All seven literals\n// are distinct prefixes, so match order is irrelevant for correctness; the\n// array simply drives the single cursor scan in the lexer.\nconst CONTROL_TOKENS: ReadonlyArray<{ type: ControlType; literal: string }> = [\n { type: \"START\", literal: START_TOKEN },\n { type: \"END\", literal: END_TOKEN },\n { type: \"RETURN\", literal: RETURN_TOKEN },\n { type: \"CALL\", literal: CALL_TOKEN },\n { type: \"CHANNEL\", literal: CHANNEL_TOKEN },\n { type: \"MESSAGE\", literal: MESSAGE_TOKEN },\n { type: \"CONSTRAIN\", literal: CONSTRAIN_TOKEN },\n];\n\n// Reverse map: control-token kind -> its literal. Used by the state machine to\n// re-materialize a control token's literal text when reconstructing a JSON\n// tool-call body that legitimately contains \"<|call|>\"-shaped substrings.\nconst CONTROL_LITERAL: Record<ControlType, string> = {\n START: START_TOKEN,\n END: END_TOKEN,\n RETURN: RETURN_TOKEN,\n CALL: CALL_TOKEN,\n CHANNEL: CHANNEL_TOKEN,\n MESSAGE: MESSAGE_TOKEN,\n CONSTRAIN: CONSTRAIN_TOKEN,\n};\n\n// Recipient routing marker carried by the role segment or the channel header.\n// Requires a valid identifier after `to=functions.` — must START with a letter\n// or underscore (so `to=functions.-` / `to=functions.` are NOT recipients),\n// then allow word chars, dots, and dashes.\nconst RECIPIENT_RE = /to=functions\\.([A-Za-z_][\\w.-]*)/;\n\n/**\n * Cheap detection guard — only ATTEMPT a parse when a harmony structure looks\n * present, i.e. a `<|channel|>` followed (anywhere after it) by a `<|message|>`,\n * OR a `<|message|>` appearing after a `<|start|>`.\n *\n * This is a fast-path gate, NOT the authority on well-formedness: the state\n * machine in {@link parseHarmonyContent} makes the real decision and is itself\n * fully fail-safe. Requiring the token pairing keeps hosted/structured answers\n * that merely MENTION a single token as prose out of the parser entirely.\n */\nexport function isHarmonyContent(content: string): boolean {\n const channelIdx = content.indexOf(CHANNEL_TOKEN);\n if (channelIdx !== -1) {\n if (content.indexOf(MESSAGE_TOKEN, channelIdx + CHANNEL_TOKEN.length) !== -1) {\n return true;\n }\n }\n const startIdx = content.indexOf(START_TOKEN);\n if (startIdx !== -1) {\n if (content.indexOf(MESSAGE_TOKEN, startIdx + START_TOKEN.length) !== -1) {\n return true;\n }\n }\n return false;\n}\n\nexport interface HarmonyParseResult {\n content: string;\n reasoning: string;\n toolCalls: ToolCall[];\n /**\n * True when the input could NOT be parsed as a complete, valid harmony\n * structure and the ORIGINAL content was returned VERBATIM (fail-safe). The\n * bytes are preserved, so this is NOT transport loss — the caller surfaces it\n * via a distinct `harmonyUnparsed` signal, not `droppedChunks`/`truncated`.\n */\n failed: boolean;\n}\n\n// ---------------------------------------------------------------------------\n// Phase 1: Lexer\n// ---------------------------------------------------------------------------\n\n/** A control token (one of the seven harmony special tokens). */\ninterface ControlToken {\n kind: \"control\";\n type: ControlType;\n}\n\n/** A literal text span between control tokens. Never empty. */\ninterface TextToken {\n kind: \"text\";\n value: string;\n}\n\ntype Token = ControlToken | TextToken;\n\n/**\n * Lex the accumulated content into an ordered token stream via a single\n * left-to-right cursor walk. At each position, match a control token by exact\n * prefix; otherwise accumulate bytes into the current TEXT run until the next\n * control token or EOF.\n *\n * Bytes consumed into a TEXT span are NEVER re-scanned for control tokens, so\n * an embedded literal \"<|call|>\"/\"<|channel|>\" inside a JSON string or prose is\n * inert. The lexer NEVER throws; it always returns a complete token stream.\n */\nfunction lex(raw: string): Token[] {\n const tokens: Token[] = [];\n let cursor = 0;\n let textStart = 0;\n\n const flushText = (end: number): void => {\n if (end > textStart) {\n tokens.push({ kind: \"text\", value: raw.slice(textStart, end) });\n }\n };\n\n while (cursor < raw.length) {\n let matched: { type: ControlType; literal: string } | undefined;\n // A control token only begins at \"<|\"; cheap reject avoids scanning the\n // literal list on every plain character.\n if (raw.startsWith(\"<|\", cursor)) {\n for (const tok of CONTROL_TOKENS) {\n if (raw.startsWith(tok.literal, cursor)) {\n matched = tok;\n break;\n }\n }\n }\n if (matched) {\n flushText(cursor);\n tokens.push({ kind: \"control\", type: matched.type });\n cursor += matched.literal.length;\n textStart = cursor;\n } else {\n cursor += 1;\n }\n }\n flushText(raw.length);\n\n return tokens;\n}\n\n// ---------------------------------------------------------------------------\n// Phase 2: State machine\n// ---------------------------------------------------------------------------\n\n/** True when `s` is empty or only whitespace. */\nfunction isBlank(s: string): boolean {\n return s.trim().length === 0;\n}\n\n/**\n * True when `s` parses as a JSON OBJECT — a non-null, non-array `{...}` value.\n *\n * Harmony tool-call arguments are JSON OBJECTS. A bare JSON SCALAR (number /\n * boolean / string / null) or ARRAY parses as valid JSON but is NOT a valid\n * tool-call argument, so it must not terminate a tool call (fail-safe verbatim\n * per the uniform contract). The object check (and ONLY the object check) is\n * what makes a commentary `<|call|>` body a tool call; embedded token-shaped\n * substrings INSIDE the object's string values remain valid data (matrix 13/14).\n */\nfunction isToolArgsObject(s: string): boolean {\n let value: unknown;\n try {\n value = JSON.parse(s);\n } catch {\n return false;\n }\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/**\n * Extract the channel name from the header text that follows `<|channel|>`. The\n * channel name is the leading token, delimited by whitespace (the rest of the\n * header carries optional `to=functions.NAME` routing). A `<|constrain|>` token\n * is lexed separately, so it never appears inside this text.\n */\nfunction headerChannel(headerText: string): string {\n return headerText.trim().split(/\\s+/)[0] ?? \"\";\n}\n\n/** The harmony channels a real `<|channel|>` header may name. */\nconst KNOWN_CHANNELS = new Set([\"analysis\", \"commentary\", \"final\"]);\n\n/**\n * True when token index `idx` begins a well-formed harmony message header —\n * used as lookahead to decide whether a `<|start|>` is a real message boundary\n * (terminating the current body) or a literal `<|start|>` quoted inside a prose\n * body. A real message header reaches a `<|message|>` token via the optional\n * `START? role-TEXT? CHANNEL? header-TEXT? CONSTRAIN? constraint-TEXT?` prefix\n * WITHOUT first crossing a body terminator (END/RETURN/CALL) or EOF.\n *\n * When the lookahead carries a `<|channel|>` header, the channel name it names\n * must be a KNOWN harmony channel (analysis/commentary/final). A lookahead like\n * `<|start|>...<|channel|>X<|message|>` whose X is unknown is NOT a real\n * boundary — it narrows the quoted-message ambiguity so a body quoting a\n * bogus-channel pseudo-message is not split on it. A channel-LESS header\n * (`<|start|>role<|message|>...`) is unaffected (KNOWN_CHANNELS only gates a\n * present `<|channel|>` name).\n */\nfunction looksLikeMessageStart(tokens: Token[], idx: number): boolean {\n let k = idx;\n if (tokens[k]?.kind === \"control\" && (tokens[k] as ControlToken).type === \"START\") {\n k += 1;\n if (tokens[k]?.kind === \"text\") k += 1; // optional role-TEXT\n }\n if (tokens[k]?.kind === \"control\" && (tokens[k] as ControlToken).type === \"CHANNEL\") {\n k += 1;\n if (tokens[k]?.kind === \"text\") {\n // The channel name must be a known harmony channel for this to be a real\n // message boundary; an unknown channel header is not a true boundary.\n if (!KNOWN_CHANNELS.has(headerChannel((tokens[k] as TextToken).value))) return false;\n k += 1; // header-TEXT\n }\n if (tokens[k]?.kind === \"control\" && (tokens[k] as ControlToken).type === \"CONSTRAIN\") {\n k += 1;\n if (tokens[k]?.kind === \"text\") k += 1; // optional constraint-name TEXT\n }\n }\n return tokens[k]?.kind === \"control\" && (tokens[k] as ControlToken).type === \"MESSAGE\";\n}\n\n/**\n * True when the position right after a body terminator candidate (END/RETURN/\n * CALL at the token before `idx`) is a REAL message boundary: either EOF\n * (optionally preceded by whitespace-only TEXT spans) or the start of a\n * well-formed next message. When false, the terminator candidate is a literal\n * control token embedded in a prose body.\n */\nfunction isRealBoundaryAfter(tokens: Token[], idx: number): boolean {\n let k = idx;\n // Skip whitespace-only TEXT spans (inter-message / trailing whitespace).\n while (tokens[k]?.kind === \"text\" && (tokens[k] as TextToken).value.trim().length === 0) {\n k += 1;\n }\n if (k >= tokens.length) return true; // EOF (final message)\n return looksLikeMessageStart(tokens, k);\n}\n\n/**\n * True when the token at index `idx` is a NON-BLANK TEXT span — i.e. real prose\n * follows. A control-token literal embedded in a non-tool body is only\n * LEGITIMATELY prose when it is bracketed by real text (e.g. a final answer that\n * quotes \"the `<|end|>` token\"); the lexer will have tokenized the quoted\n * literal, and its immediate follower being non-blank prose is what makes it\n * inert body text rather than structure. When the follower is instead another\n * control token or EOF (or only whitespace), the literal is NOT embedded prose —\n * it is a control token that would leak into routed content/reasoning, so the\n * body must fail safe. This is the STRUCTURAL fail-safe invariant: it fires at\n * absorption time on EVERY exit path, not per-exit, so a leak-shaped body can\n * never reach the routing step regardless of how its loop terminates.\n */\nfunction hasProseFollower(tokens: Token[], idx: number): boolean {\n const next = tokens[idx];\n return next !== undefined && next.kind === \"text\" && next.value.trim().length > 0;\n}\n\n/** Sentinel thrown internally to unwind to the uniform fail-safe path. */\nconst FAIL = Symbol(\"harmony-fail\");\n\n/**\n * Walk the token stream against the harmony grammar and route each message by\n * channel. Throws {@link FAIL} on ANY grammar deviation so {@link\n * parseHarmonyContent} returns the original bytes verbatim (uniform\n * all-or-nothing fail-safe). On success returns fully-routed channels.\n */\nfunction parseTokens(tokens: Token[]): {\n content: string;\n reasoning: string;\n toolCalls: ToolCall[];\n} {\n let content = \"\";\n let reasoning = \"\";\n const toolCalls: ToolCall[] = [];\n\n let i = 0;\n const peek = (): Token | undefined => tokens[i];\n const fail = (): never => {\n throw FAIL;\n };\n\n // ----- Leading channel-less TEXT (before the first Message) -----\n // Whitespace-only leading text is absorbed; non-whitespace leading text is\n // channel-less content (a pre-channel preamble).\n if (peek()?.kind === \"text\") {\n const t = tokens[i] as TextToken;\n // Only treat this as leading content when a real Message header actually\n // follows (START / CHANNEL). A bare MESSAGE is NOT a message header — a\n // legitimate message always opens with START or CHANNEL before MESSAGE — so\n // leading text followed by a bare <|message|> is a grammar deviation, left\n // for the main loop to fail safe (verbatim) rather than glued to the body.\n // Otherwise the text is handled by the trailing / no-message rules below\n // (which fail-safe when no message exists).\n const next = tokens[i + 1];\n const nextStartsMessage =\n next !== undefined &&\n next.kind === \"control\" &&\n (next.type === \"START\" || next.type === \"CHANNEL\");\n if (nextStartsMessage) {\n if (!isBlank(t.value)) content += t.value;\n i += 1;\n }\n }\n\n // A well-formed stream has at least one Message.\n let sawMessage = false;\n\n // Set when the PREVIOUS body terminated on a START-lookahead — i.e. a body\n // ran (without an intervening real terminator) into a `<|start|>...` that\n // looks like a message header, so the parser SPLIT it off as a separate\n // message. This is the irreducible quoted-whole-message ambiguity: in\n // detokenized TEXT a body that QUOTES a complete well-formed message is\n // indistinguishable from two real messages. The split is only accepted when\n // BOTH resulting messages are cleanly well-formed (matrix-doc \"verbatim-or-\n // clean, never mangled\"). A quoted-split message whose OWN body would have to\n // absorb an embedded control literal (e.g. the quoted body\n // \"hello<|return|> and then stop\") is NOT clean — absorbing it would leak the\n // token into routed content/reasoning — so it fails the WHOLE input safe.\n let nextIsQuotedSplit = false;\n\n while (i < tokens.length) {\n const tok = peek();\n if (tok === undefined) break;\n\n // Absorb whitespace-only inter-message / trailing TEXT spans. A non-blank\n // stray TEXT span at message position is a grammar deviation.\n if (tok.kind === \"text\") {\n if (isBlank(tok.value)) {\n i += 1;\n continue;\n }\n // Non-blank text where a message (or EOF) was expected: this is leftover,\n // unexpected token content — fail safe.\n fail();\n }\n\n // Capture-and-reset the quoted-split marker for THIS message.\n const fromQuotedSplit = nextIsQuotedSplit;\n nextIsQuotedSplit = false;\n\n // tok is a control token: the start of a Message.\n let recipient: string | undefined;\n let channel = \"\";\n // A well-formed message ALWAYS opens with a real header (START and/or\n // CHANNEL) before <|message|>. Track whether such a header was seen so a\n // bare <|message|> at message position (no preceding START/CHANNEL) fails\n // safe instead of being silently accepted as a channel-less message (which\n // would strip control tokens and glue bodies together).\n let sawHeader = false;\n\n // ----- optional START + role-TEXT -----\n if (tok.kind === \"control\" && tok.type === \"START\") {\n sawHeader = true;\n i += 1;\n // Optional role header text carrying `to=functions.NAME`.\n if (peek()?.kind === \"text\") {\n const roleText = (tokens[i] as TextToken).value;\n recipient = roleText.match(RECIPIENT_RE)?.[1];\n i += 1;\n }\n }\n\n // ----- optional CHANNEL + header-TEXT (+ optional CONSTRAIN) -----\n if (peek()?.kind === \"control\" && (peek() as ControlToken).type === \"CHANNEL\") {\n sawHeader = true;\n i += 1;\n // Optional header text carrying the channel name + optional routing.\n if (peek()?.kind === \"text\") {\n const headerText = (tokens[i] as TextToken).value;\n channel = headerChannel(headerText);\n const headerRecipient = headerText.match(RECIPIENT_RE)?.[1];\n if (headerRecipient !== undefined) recipient = headerRecipient;\n i += 1;\n }\n // An optional <|constrain|> token (e.g. <|constrain|>json) may sit\n // between the channel header and <|message|>. It carries a constraint\n // hint only — consume it and any following constraint-name text. It does\n // NOT make a body <|call|>-terminated on its own (only a commentary\n // recipient does).\n if (peek()?.kind === \"control\" && (peek() as ControlToken).type === \"CONSTRAIN\") {\n i += 1;\n if (peek()?.kind === \"text\") {\n // e.g. \"json\" — discard; it is a constraint hint, not body content.\n i += 1;\n }\n }\n // A real <|channel|> header names a KNOWN channel (analysis / commentary\n // / final). If it does not, this is not harmony structure — it is a prose\n // mention of the literal token (e.g. \"use `<|channel|>` to pick a\n // channel\"). Fail safe so the original bytes are preserved verbatim.\n if (!KNOWN_CHANNELS.has(channel)) fail();\n }\n\n // ----- mandatory MESSAGE -----\n // A message must be introduced by a real header (START and/or CHANNEL)\n // before <|message|> is consumed. A bare <|message|> at message position\n // — with no preceding START/CHANNEL in this message — is a grammar\n // deviation (not a channel-less message): accepting it would silently strip\n // control tokens and glue bodies together. Fail safe (uniform verbatim),\n // mirroring the bare CHANNEL/MESSAGE-inside-a-non-tool-body rule below.\n if (!sawHeader) fail();\n if (!(peek()?.kind === \"control\" && (peek() as ControlToken).type === \"MESSAGE\")) {\n // A header (START and/or CHANNEL) with no following <|message|> is an\n // incomplete message — fail safe.\n fail();\n }\n i += 1; // consume MESSAGE\n const bodyStart = i; // token index of the first body token\n\n const isCommentaryToolCall = recipient !== undefined && channel === \"commentary\";\n\n if (isCommentaryToolCall) {\n // A commentary tool-call body is a JSON value terminated by <|call|>. The\n // literal substring \"<|call|>\" can legitimately appear INSIDE a JSON\n // string, and the lexer will have tokenized it as a CALL control token.\n // So scan CALL tokens left-to-right, re-materializing the body text from\n // tokens between <|message|> and each CALL, and pick the FIRST CALL whose\n // accumulated preceding text parses as a COMPLETE JSON OBJECT (A2). A bare\n // JSON SCALAR/array (e.g. `123`, `true`, `[1,2]`, `null`) is valid JSON but\n // is NOT a valid tool-call argument, so it does NOT terminate the call.\n // If no CALL closes a valid JSON object, fail safe.\n let acc = \"\";\n let j = bodyStart;\n let parsed: string | undefined;\n for (; j < tokens.length; j++) {\n const t = tokens[j];\n if (t.kind === \"control\" && t.type === \"CALL\") {\n // Canonicalize the captured args: leading/trailing whitespace around\n // the JSON value is not part of the value (e.g. \"<|message|> {...} \")\n // — trim it so the recorded arguments are the canonical JSON. Interior\n // whitespace inside the JSON is preserved. JSON.parse already tolerates\n // surrounding whitespace, so validate the TRIMMED form to pick the\n // terminator correctly.\n const candidate = acc.trim();\n if (isToolArgsObject(candidate)) {\n parsed = candidate;\n break;\n }\n // Not a complete JSON object yet (incomplete, or a scalar/array that is\n // not a valid tool-call argument) — the embedded \"<|call|>\" is part of\n // the JSON string; keep accumulating.\n acc += CONTROL_LITERAL.CALL;\n continue;\n }\n if (t.kind === \"control\") {\n acc += CONTROL_LITERAL[t.type];\n } else {\n acc += t.value;\n }\n }\n if (parsed === undefined) fail();\n i = j + 1; // consume body tokens + the terminating CALL\n toolCalls.push({ name: recipient!, arguments: parsed! });\n sawMessage = true;\n continue;\n }\n\n // ----- Non-tool Body + Terminator -----\n // The body runs from MESSAGE to its REAL terminator. A literal control\n // token can legitimately appear in a prose body (e.g. \"the `<|end|>`\n // token\"), and the lexer will have tokenized it. So scan forward,\n // re-materializing control literals into the body text, and stop at the\n // FIRST END/RETURN/CALL whose follower is a real message boundary — i.e.\n // EOF (optionally preceded by whitespace-only TEXT) or the start of a\n // well-formed next message ({@link looksLikeMessageStart}). A bare START\n // that begins a parseable message is also a (lookahead) terminator.\n //\n // STRUCTURAL FAIL-SAFE INVARIANT. A control-token literal may only be\n // ABSORBED into a routed (content/reasoning) body when it is genuinely\n // embedded prose — i.e. its immediate follower is real text ({@link\n // hasProseFollower}), as in a final answer quoting \"the `<|end|>` token\"\n // (matrix 10-12). When an embedded terminator-shaped literal (END/RETURN/\n // CALL), a non-boundary START, or a stray CONSTRAIN is followed by another\n // control token or by EOF (i.e. NOT bracketed by prose), it is not embedded\n // prose — it is a control token that would LEAK into routed output. Rather\n // than a per-exit guard (which the old code only applied on the EOF exit,\n // leaking on the `terminated` exit), the check fires HERE, at absorption\n // time, so a leak-shaped body fails safe uniformly no matter how its loop\n // ends. This single invariant subsumes the terminated-exit leak\n // (`A<|return|><|return|>`), the trailing `<|start|>` absorption leak\n // (`answer <|start|>`), and the stray-CONSTRAIN re-materialization. Tool\n // (commentary+recipient) bodies are handled separately above and are NOT\n // subject to this rule (embedded tokens inside a JSON string arg are valid\n // data validated by JSON.parse). `absorbedControlLiteral` records that a\n // literal was legitimately absorbed mid-prose so the EOF branch can reject a\n // body that runs past such a token straight to EOF with no real terminator.\n let body = \"\";\n let terminated = false;\n let reachedEof = false;\n let absorbedControlLiteral = false;\n let j = i;\n for (; j < tokens.length; j++) {\n const t = tokens[j];\n if (t.kind === \"control\" && (t.type === \"END\" || t.type === \"RETURN\" || t.type === \"CALL\")) {\n if (isRealBoundaryAfter(tokens, j + 1)) {\n terminated = true;\n break;\n }\n // Embedded terminator-shaped literal. It is inert body prose ONLY when\n // bracketed by real text AND this message is not itself a quoted split\n // (a quoted-split body that must absorb a literal is not clean — it\n // would leak the token); otherwise fail safe.\n if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();\n absorbedControlLiteral = true;\n body += CONTROL_LITERAL[t.type];\n continue;\n }\n if (t.kind === \"control\" && t.type === \"START\") {\n if (looksLikeMessageStart(tokens, j)) {\n // Lookahead terminator: the NEXT message begins here. Do NOT consume.\n // The next message is a quoted-message split (see nextIsQuotedSplit).\n terminated = true;\n break;\n }\n // Embedded <|start|> inside prose — inert body text ONLY when bracketed\n // by real text and not a quoted split; a START with no prose after it\n // (e.g. trailing \"answer <|start|>\" or \"<|start|>\" before another\n // control token) would leak — fail safe.\n if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();\n absorbedControlLiteral = true;\n body += CONTROL_LITERAL.START;\n continue;\n }\n if (t.kind === \"control\" && (t.type === \"CHANNEL\" || t.type === \"MESSAGE\")) {\n // A bare CHANNEL / MESSAGE inside a non-tool body is structural, not\n // prose: in a well-formed stream the next message's CHANNEL is always\n // introduced by a START (caught above as a real boundary), so a\n // dangling CHANNEL/MESSAGE here means the structure is malformed (e.g.\n // \"<|message|>body<|channel|>analysis\"). Fail safe.\n fail();\n }\n if (t.kind === \"control\") {\n // A stray CONSTRAIN inside a body is an inert hint, but its literal\n // would leak unless it is bracketed by prose (and not a quoted split) —\n // fail safe otherwise.\n if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();\n absorbedControlLiteral = true;\n body += CONTROL_LITERAL[t.type];\n continue;\n }\n // TEXT span — part of the body.\n body += t.value;\n }\n if (j >= tokens.length) reachedEof = true;\n\n if (terminated) {\n const term = tokens[j] as ControlToken;\n if (term.type === \"START\") {\n // Lookahead: leave START in place for the next loop iteration, and flag\n // that the next message is a quoted-message split (the current body ran\n // into a START without a real terminator of its own).\n i = j;\n nextIsQuotedSplit = true;\n } else {\n i = j + 1; // consume END/RETURN/CALL\n }\n routeBody(channel, body);\n sawMessage = true;\n continue;\n }\n if (reachedEof) {\n // EOF terminates the FINAL message only. A content-routing channel\n // (final / commentary-preamble-without-recipient / channel-less) may\n // legitimately run to EOF with no explicit terminator, so it is accepted\n // verbatim. But:\n // - An `analysis` body is a terminator-expecting reasoning body\n // (closed by <|end|>); an UNTERMINATED analysis body at EOF is a\n // grammar deviation (B-A3) — fail safe rather than surface dangling\n // reasoning.\n // - If the body legitimately absorbed a mid-prose control literal and\n // then ran to EOF, the message was never properly terminated and the\n // control token would leak into the output (B-A1) — fail safe rather\n // than mangle.\n if (channel === \"analysis\" || absorbedControlLiteral) fail();\n i = j;\n routeBody(channel, body);\n sawMessage = true;\n break;\n }\n // Unreachable in practice (loop only exits via terminator or EOF), but keep\n // the uniform fail-safe for any unexpected fallthrough.\n fail();\n }\n\n if (!sawMessage) fail();\n\n return { content, reasoning, toolCalls };\n\n // Route a non-tool body by channel. Only two channel shapes reach this\n // function: `analysis` (-> reasoning) and `final` / commentary-without-\n // recipient (preamble) / channel-less (-> content). An UNKNOWN <|channel|>\n // name never reaches here — it fail-safes upstream at the\n // `if (!KNOWN_CHANNELS.has(channel)) fail()` guard during header parsing — so\n // there is no unknown-channel case to route.\n function routeBody(ch: string, body: string): void {\n if (ch === \"analysis\") {\n reasoning += body;\n } else {\n // final, commentary-without-recipient (preamble), and channel-less bodies\n // all surface as user-facing content.\n content += body;\n }\n }\n}\n\n// ---------------------------------------------------------------------------\n// Public entry point\n// ---------------------------------------------------------------------------\n\n/**\n * Parse harmony channel tokens out of an accumulated assistant `content`\n * string, splitting them into final-channel content, analysis-channel\n * reasoning, and commentary-channel tool calls. Pure function — no I/O.\n *\n * Callers should gate this behind {@link isHarmonyContent} so ordinary\n * (already-structured) streams are never touched. Even so, this function is\n * itself UNIFORM all-or-nothing fail-safe: on ANY structural/validation failure\n * it returns `{ content: raw, reasoning: \"\", toolCalls: [], failed: true }` so\n * the original content is preserved VERBATIM and the caller can surface a\n * distinct `harmonyUnparsed` signal (NOT a dropped/truncated chunk).\n */\nexport function parseHarmonyContent(raw: string): HarmonyParseResult {\n const tokens = lex(raw);\n try {\n const { content, reasoning, toolCalls } = parseTokens(tokens);\n return { content, reasoning, toolCalls, failed: false };\n } catch (err) {\n if (err === FAIL) {\n return { content: raw, reasoning: \"\", toolCalls: [], failed: true };\n }\n // Unexpected error — still fail safe rather than throw to the caller.\n return { content: raw, reasoning: \"\", toolCalls: [], failed: true };\n }\n}\n"],"mappings":";;AA4FA,MAAM,cAAc;AACpB,MAAM,YAAY;AAClB,MAAM,eAAe;AACrB,MAAM,aAAa;AACnB,MAAM,gBAAgB;AACtB,MAAM,gBAAgB;AACtB,MAAM,kBAAkB;AAQxB,MAAM,iBAAwE;CAC5E;EAAE,MAAM;EAAS,SAAS;EAAa;CACvC;EAAE,MAAM;EAAO,SAAS;EAAW;CACnC;EAAE,MAAM;EAAU,SAAS;EAAc;CACzC;EAAE,MAAM;EAAQ,SAAS;EAAY;CACrC;EAAE,MAAM;EAAW,SAAS;EAAe;CAC3C;EAAE,MAAM;EAAW,SAAS;EAAe;CAC3C;EAAE,MAAM;EAAa,SAAS;EAAiB;CAChD;AAKD,MAAM,kBAA+C;CACnD,OAAO;CACP,KAAK;CACL,QAAQ;CACR,MAAM;CACN,SAAS;CACT,SAAS;CACT,WAAW;CACZ;AAMD,MAAM,eAAe;;;;;;;;;;;AAYrB,SAAgB,iBAAiB,SAA0B;CACzD,MAAM,aAAa,QAAQ,QAAQ,cAAc;AACjD,KAAI,eAAe,IACjB;MAAI,QAAQ,QAAQ,eAAe,aAAa,GAAqB,KAAK,GACxE,QAAO;;CAGX,MAAM,WAAW,QAAQ,QAAQ,YAAY;AAC7C,KAAI,aAAa,IACf;MAAI,QAAQ,QAAQ,eAAe,WAAW,EAAmB,KAAK,GACpE,QAAO;;AAGX,QAAO;;;;;;;;;;;;AA4CT,SAAS,IAAI,KAAsB;CACjC,MAAM,SAAkB,EAAE;CAC1B,IAAI,SAAS;CACb,IAAI,YAAY;CAEhB,MAAM,aAAa,QAAsB;AACvC,MAAI,MAAM,UACR,QAAO,KAAK;GAAE,MAAM;GAAQ,OAAO,IAAI,MAAM,WAAW,IAAI;GAAE,CAAC;;AAInE,QAAO,SAAS,IAAI,QAAQ;EAC1B,IAAI;AAGJ,MAAI,IAAI,WAAW,MAAM,OAAO,EAC9B;QAAK,MAAM,OAAO,eAChB,KAAI,IAAI,WAAW,IAAI,SAAS,OAAO,EAAE;AACvC,cAAU;AACV;;;AAIN,MAAI,SAAS;AACX,aAAU,OAAO;AACjB,UAAO,KAAK;IAAE,MAAM;IAAW,MAAM,QAAQ;IAAM,CAAC;AACpD,aAAU,QAAQ,QAAQ;AAC1B,eAAY;QAEZ,WAAU;;AAGd,WAAU,IAAI,OAAO;AAErB,QAAO;;;AAQT,SAAS,QAAQ,GAAoB;AACnC,QAAO,EAAE,MAAM,CAAC,WAAW;;;;;;;;;;;;AAa7B,SAAS,iBAAiB,GAAoB;CAC5C,IAAI;AACJ,KAAI;AACF,UAAQ,KAAK,MAAM,EAAE;SACf;AACN,SAAO;;AAET,QAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,MAAM;;;;;;;;AAS7E,SAAS,cAAc,YAA4B;AACjD,QAAO,WAAW,MAAM,CAAC,MAAM,MAAM,CAAC,MAAM;;;AAI9C,MAAM,iBAAiB,IAAI,IAAI;CAAC;CAAY;CAAc;CAAQ,CAAC;;;;;;;;;;;;;;;;;AAkBnE,SAAS,sBAAsB,QAAiB,KAAsB;CACpE,IAAI,IAAI;AACR,KAAI,OAAO,IAAI,SAAS,aAAc,OAAO,GAAoB,SAAS,SAAS;AACjF,OAAK;AACL,MAAI,OAAO,IAAI,SAAS,OAAQ,MAAK;;AAEvC,KAAI,OAAO,IAAI,SAAS,aAAc,OAAO,GAAoB,SAAS,WAAW;AACnF,OAAK;AACL,MAAI,OAAO,IAAI,SAAS,QAAQ;AAG9B,OAAI,CAAC,eAAe,IAAI,cAAe,OAAO,GAAiB,MAAM,CAAC,CAAE,QAAO;AAC/E,QAAK;;AAEP,MAAI,OAAO,IAAI,SAAS,aAAc,OAAO,GAAoB,SAAS,aAAa;AACrF,QAAK;AACL,OAAI,OAAO,IAAI,SAAS,OAAQ,MAAK;;;AAGzC,QAAO,OAAO,IAAI,SAAS,aAAc,OAAO,GAAoB,SAAS;;;;;;;;;AAU/E,SAAS,oBAAoB,QAAiB,KAAsB;CAClE,IAAI,IAAI;AAER,QAAO,OAAO,IAAI,SAAS,UAAW,OAAO,GAAiB,MAAM,MAAM,CAAC,WAAW,EACpF,MAAK;AAEP,KAAI,KAAK,OAAO,OAAQ,QAAO;AAC/B,QAAO,sBAAsB,QAAQ,EAAE;;;;;;;;;;;;;;;AAgBzC,SAAS,iBAAiB,QAAiB,KAAsB;CAC/D,MAAM,OAAO,OAAO;AACpB,QAAO,SAAS,UAAa,KAAK,SAAS,UAAU,KAAK,MAAM,MAAM,CAAC,SAAS;;;AAIlF,MAAM,OAAO,OAAO,eAAe;;;;;;;AAQnC,SAAS,YAAY,QAInB;CACA,IAAI,UAAU;CACd,IAAI,YAAY;CAChB,MAAM,YAAwB,EAAE;CAEhC,IAAI,IAAI;CACR,MAAM,aAAgC,OAAO;CAC7C,MAAM,aAAoB;AACxB,QAAM;;AAMR,KAAI,MAAM,EAAE,SAAS,QAAQ;EAC3B,MAAM,IAAI,OAAO;EAQjB,MAAM,OAAO,OAAO,IAAI;AAKxB,MAHE,SAAS,UACT,KAAK,SAAS,cACb,KAAK,SAAS,WAAW,KAAK,SAAS,YACnB;AACrB,OAAI,CAAC,QAAQ,EAAE,MAAM,CAAE,YAAW,EAAE;AACpC,QAAK;;;CAKT,IAAI,aAAa;CAajB,IAAI,oBAAoB;AAExB,QAAO,IAAI,OAAO,QAAQ;EACxB,MAAM,MAAM,MAAM;AAClB,MAAI,QAAQ,OAAW;AAIvB,MAAI,IAAI,SAAS,QAAQ;AACvB,OAAI,QAAQ,IAAI,MAAM,EAAE;AACtB,SAAK;AACL;;AAIF,SAAM;;EAIR,MAAM,kBAAkB;AACxB,sBAAoB;EAGpB,IAAI;EACJ,IAAI,UAAU;EAMd,IAAI,YAAY;AAGhB,MAAI,IAAI,SAAS,aAAa,IAAI,SAAS,SAAS;AAClD,eAAY;AACZ,QAAK;AAEL,OAAI,MAAM,EAAE,SAAS,QAAQ;AAE3B,gBADkB,OAAO,GAAiB,MACrB,MAAM,aAAa,GAAG;AAC3C,SAAK;;;AAKT,MAAI,MAAM,EAAE,SAAS,aAAc,MAAM,CAAkB,SAAS,WAAW;AAC7E,eAAY;AACZ,QAAK;AAEL,OAAI,MAAM,EAAE,SAAS,QAAQ;IAC3B,MAAM,aAAc,OAAO,GAAiB;AAC5C,cAAU,cAAc,WAAW;IACnC,MAAM,kBAAkB,WAAW,MAAM,aAAa,GAAG;AACzD,QAAI,oBAAoB,OAAW,aAAY;AAC/C,SAAK;;AAOP,OAAI,MAAM,EAAE,SAAS,aAAc,MAAM,CAAkB,SAAS,aAAa;AAC/E,SAAK;AACL,QAAI,MAAM,EAAE,SAAS,OAEnB,MAAK;;AAOT,OAAI,CAAC,eAAe,IAAI,QAAQ,CAAE,OAAM;;AAU1C,MAAI,CAAC,UAAW,OAAM;AACtB,MAAI,EAAE,MAAM,EAAE,SAAS,aAAc,MAAM,CAAkB,SAAS,WAGpE,OAAM;AAER,OAAK;EACL,MAAM,YAAY;AAIlB,MAF6B,cAAc,UAAa,YAAY,cAE1C;GAUxB,IAAI,MAAM;GACV,IAAI,IAAI;GACR,IAAI;AACJ,UAAO,IAAI,OAAO,QAAQ,KAAK;IAC7B,MAAM,IAAI,OAAO;AACjB,QAAI,EAAE,SAAS,aAAa,EAAE,SAAS,QAAQ;KAO7C,MAAM,YAAY,IAAI,MAAM;AAC5B,SAAI,iBAAiB,UAAU,EAAE;AAC/B,eAAS;AACT;;AAKF,YAAO,gBAAgB;AACvB;;AAEF,QAAI,EAAE,SAAS,UACb,QAAO,gBAAgB,EAAE;QAEzB,QAAO,EAAE;;AAGb,OAAI,WAAW,OAAW,OAAM;AAChC,OAAI,IAAI;AACR,aAAU,KAAK;IAAE,MAAM;IAAY,WAAW;IAAS,CAAC;AACxD,gBAAa;AACb;;EAgCF,IAAI,OAAO;EACX,IAAI,aAAa;EACjB,IAAI,aAAa;EACjB,IAAI,yBAAyB;EAC7B,IAAI,IAAI;AACR,SAAO,IAAI,OAAO,QAAQ,KAAK;GAC7B,MAAM,IAAI,OAAO;AACjB,OAAI,EAAE,SAAS,cAAc,EAAE,SAAS,SAAS,EAAE,SAAS,YAAY,EAAE,SAAS,SAAS;AAC1F,QAAI,oBAAoB,QAAQ,IAAI,EAAE,EAAE;AACtC,kBAAa;AACb;;AAMF,QAAI,mBAAmB,CAAC,iBAAiB,QAAQ,IAAI,EAAE,CAAE,OAAM;AAC/D,6BAAyB;AACzB,YAAQ,gBAAgB,EAAE;AAC1B;;AAEF,OAAI,EAAE,SAAS,aAAa,EAAE,SAAS,SAAS;AAC9C,QAAI,sBAAsB,QAAQ,EAAE,EAAE;AAGpC,kBAAa;AACb;;AAMF,QAAI,mBAAmB,CAAC,iBAAiB,QAAQ,IAAI,EAAE,CAAE,OAAM;AAC/D,6BAAyB;AACzB,YAAQ,gBAAgB;AACxB;;AAEF,OAAI,EAAE,SAAS,cAAc,EAAE,SAAS,aAAa,EAAE,SAAS,WAM9D,OAAM;AAER,OAAI,EAAE,SAAS,WAAW;AAIxB,QAAI,mBAAmB,CAAC,iBAAiB,QAAQ,IAAI,EAAE,CAAE,OAAM;AAC/D,6BAAyB;AACzB,YAAQ,gBAAgB,EAAE;AAC1B;;AAGF,WAAQ,EAAE;;AAEZ,MAAI,KAAK,OAAO,OAAQ,cAAa;AAErC,MAAI,YAAY;AAEd,OADa,OAAO,GACX,SAAS,SAAS;AAIzB,QAAI;AACJ,wBAAoB;SAEpB,KAAI,IAAI;AAEV,aAAU,SAAS,KAAK;AACxB,gBAAa;AACb;;AAEF,MAAI,YAAY;AAad,OAAI,YAAY,cAAc,uBAAwB,OAAM;AAC5D,OAAI;AACJ,aAAU,SAAS,KAAK;AACxB,gBAAa;AACb;;AAIF,QAAM;;AAGR,KAAI,CAAC,WAAY,OAAM;AAEvB,QAAO;EAAE;EAAS;EAAW;EAAW;CAQxC,SAAS,UAAU,IAAY,MAAoB;AACjD,MAAI,OAAO,WACT,cAAa;MAIb,YAAW;;;;;;;;;;;;;;;AAqBjB,SAAgB,oBAAoB,KAAiC;CACnE,MAAM,SAAS,IAAI,IAAI;AACvB,KAAI;EACF,MAAM,EAAE,SAAS,WAAW,cAAc,YAAY,OAAO;AAC7D,SAAO;GAAE;GAAS;GAAW;GAAW,QAAQ;GAAO;UAChD,KAAK;AACZ,MAAI,QAAQ,KACV,QAAO;GAAE,SAAS;GAAK,WAAW;GAAI,WAAW,EAAE;GAAE,QAAQ;GAAM;AAGrE,SAAO;GAAE,SAAS;GAAK,WAAW;GAAI,WAAW,EAAE;GAAE,QAAQ;GAAM"}
|