@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2458 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/glassbox-routes/index.ts
21
+ var glassbox_routes_exports = {};
22
+ __export(glassbox_routes_exports, {
23
+ createGlassboxRoutes: () => createGlassboxRoutes
24
+ });
25
+ module.exports = __toCommonJS(glassbox_routes_exports);
26
+
27
+ // src/glassbox-routes/auth.ts
28
+ var JSON_HEADERS = { "Content-Type": "application/json" };
29
+ function jsonError(status, code) {
30
+ return new Response(JSON.stringify({ error: code }), {
31
+ status,
32
+ headers: JSON_HEADERS
33
+ });
34
+ }
35
+ function tokensEqual(a, b) {
36
+ if (a.length !== b.length) return false;
37
+ let mismatch = 0;
38
+ for (let i = 0; i < a.length; i++) {
39
+ mismatch |= a.charCodeAt(i) ^ b.charCodeAt(i);
40
+ }
41
+ return mismatch === 0;
42
+ }
43
+ function checkAuth(req, config) {
44
+ const authHeader = req.headers.get("Authorization") ?? "";
45
+ const match = /^Bearer\s+(.+)$/i.exec(authHeader);
46
+ const provided = match?.[1]?.trim() ?? "";
47
+ if (!provided || !tokensEqual(provided, config.installToken)) {
48
+ return jsonError(401, "unauthorized");
49
+ }
50
+ const origin = req.headers.get("Origin") ?? "";
51
+ const xExtId = req.headers.get("X-Glassbox-Extension-Id") ?? "";
52
+ const expectedOrigin = `chrome-extension://${config.extensionId}`;
53
+ const originOk = origin === expectedOrigin;
54
+ const xExtOk = xExtId.length > 0 && tokensEqual(xExtId, config.extensionId);
55
+ if (!originOk && !xExtOk) {
56
+ return jsonError(403, "forbidden_origin");
57
+ }
58
+ return null;
59
+ }
60
+
61
+ // src/profiles.ts
62
+ var ANTHROPIC_LOWERING_BASE = {
63
+ system: { mode: "inline" },
64
+ cache: {
65
+ strategy: "cache_control",
66
+ minTokens: 1024,
67
+ discount: 0.1,
68
+ ttlSeconds: 300
69
+ },
70
+ tools: { format: "anthropic" }
71
+ };
72
+ var GOOGLE_LOWERING_BASE = {
73
+ system: { mode: "separate", field: "systemInstruction" },
74
+ cache: {
75
+ strategy: "cachedContent",
76
+ minTokens: 4096,
77
+ discount: 0.25,
78
+ ttlSeconds: 3600
79
+ },
80
+ tools: { format: "google" }
81
+ };
82
+ var PROFILES_RAW = [
83
+ // ── Anthropic ──
84
+ {
85
+ id: "claude-opus-4-7",
86
+ verifiedAgainstDocs: "2026-05-08",
87
+ provider: "anthropic",
88
+ status: "current",
89
+ maxContextTokens: 1e6,
90
+ maxOutputTokens: 128e3,
91
+ maxTools: 64,
92
+ parallelToolCalls: true,
93
+ structuredOutput: "grammar",
94
+ systemPromptMode: "inline",
95
+ streaming: true,
96
+ cliffs: [],
97
+ costInputPer1m: 5,
98
+ costOutputPer1m: 25,
99
+ lowering: ANTHROPIC_LOWERING_BASE,
100
+ recovery: [
101
+ {
102
+ signal: "rate_limit",
103
+ action: "escalate",
104
+ reason: "429 from Anthropic \u2014 escalate to fallback chain"
105
+ },
106
+ {
107
+ signal: "model_not_found",
108
+ action: "escalate",
109
+ reason: "Model deprecated/renamed \u2014 escalate (L-061)"
110
+ }
111
+ ],
112
+ strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
113
+ weaknesses: ["cost", "latency"],
114
+ notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
115
+ // Frontier perf. Drops on archetypes where parallel-tool throughput
116
+ // (hunt) or low-budget cost-sensitivity (classify/summarize) matters
117
+ // more than reasoning depth.
118
+ archetypePerf: {
119
+ critique: 10,
120
+ plan: 10,
121
+ generate: 9,
122
+ ask: 9,
123
+ extract: 9,
124
+ transform: 9,
125
+ hunt: 8,
126
+ // strong but Flash dominates parallel tool throughput
127
+ summarize: 8,
128
+ // overkill for tolerant archetype; cost-out of frontier
129
+ classify: 8
130
+ // overkill; brain-validated cheaper models cover this
131
+ }
132
+ },
133
+ {
134
+ id: "claude-opus-4-6",
135
+ verifiedAgainstDocs: "2026-05-08",
136
+ provider: "anthropic",
137
+ status: "legacy",
138
+ maxContextTokens: 1e6,
139
+ maxOutputTokens: 128e3,
140
+ maxTools: 64,
141
+ parallelToolCalls: true,
142
+ structuredOutput: "grammar",
143
+ systemPromptMode: "inline",
144
+ streaming: true,
145
+ cliffs: [],
146
+ costInputPer1m: 5,
147
+ costOutputPer1m: 25,
148
+ lowering: ANTHROPIC_LOWERING_BASE,
149
+ recovery: [
150
+ {
151
+ signal: "rate_limit",
152
+ action: "escalate",
153
+ reason: "429 from Anthropic \u2014 escalate to fallback chain"
154
+ },
155
+ {
156
+ signal: "model_not_found",
157
+ action: "escalate",
158
+ reason: "Model deprecated/renamed \u2014 escalate (L-061)"
159
+ }
160
+ ],
161
+ strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
162
+ weaknesses: ["cost", "latency"],
163
+ notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
164
+ // One notch below 4.7 across the board — extended-thinking edge does
165
+ // not flip any archetype ranking. Legacy: chains should prefer 4.7.
166
+ archetypePerf: {
167
+ critique: 9,
168
+ plan: 9,
169
+ generate: 9,
170
+ ask: 9,
171
+ extract: 9,
172
+ transform: 9,
173
+ hunt: 7,
174
+ summarize: 8,
175
+ classify: 8
176
+ }
177
+ },
178
+ {
179
+ id: "claude-sonnet-4-6",
180
+ verifiedAgainstDocs: "2026-05-08",
181
+ provider: "anthropic",
182
+ status: "current",
183
+ maxContextTokens: 1e6,
184
+ maxOutputTokens: 64e3,
185
+ maxTools: 64,
186
+ parallelToolCalls: true,
187
+ structuredOutput: "grammar",
188
+ systemPromptMode: "inline",
189
+ streaming: true,
190
+ cliffs: [],
191
+ costInputPer1m: 3,
192
+ costOutputPer1m: 15,
193
+ lowering: ANTHROPIC_LOWERING_BASE,
194
+ recovery: [
195
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" },
196
+ { signal: "model_not_found", action: "escalate", reason: "Deprecated \u2014 escalate (L-061)" }
197
+ ],
198
+ strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
199
+ weaknesses: [],
200
+ notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
201
+ // Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
202
+ // in starter chains; tier 1 cross-provider for hunt/summarize/classify.
203
+ archetypePerf: {
204
+ ask: 9,
205
+ generate: 9,
206
+ plan: 9,
207
+ critique: 9,
208
+ extract: 9,
209
+ transform: 9,
210
+ hunt: 7,
211
+ // strong but Flash beats on parallel tool throughput
212
+ summarize: 8,
213
+ // overkill for tolerant archetype
214
+ classify: 8
215
+ // overkill
216
+ }
217
+ },
218
+ {
219
+ id: "claude-haiku-4-5",
220
+ verifiedAgainstDocs: "2026-05-08",
221
+ provider: "anthropic",
222
+ status: "current",
223
+ maxContextTokens: 2e5,
224
+ maxOutputTokens: 64e3,
225
+ maxTools: 32,
226
+ parallelToolCalls: true,
227
+ structuredOutput: "grammar",
228
+ systemPromptMode: "inline",
229
+ streaming: true,
230
+ cliffs: [
231
+ {
232
+ metric: "tool_count",
233
+ threshold: 16,
234
+ action: "drop_to_top_relevant",
235
+ reason: "Haiku reliability degrades above ~16 tools"
236
+ }
237
+ ],
238
+ costInputPer1m: 1,
239
+ costOutputPer1m: 5,
240
+ lowering: ANTHROPIC_LOWERING_BASE,
241
+ recovery: [
242
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to Sonnet" }
243
+ ],
244
+ strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
245
+ weaknesses: ["complex_reasoning", "large_tool_sets"],
246
+ notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
247
+ // Tier 1 cross-provider anchor for short-output chains (classify/
248
+ // summarize/extract/transform). Falls off on plan/critique where
249
+ // reasoning depth matters; competes with Pro on cost+latency.
250
+ archetypePerf: {
251
+ classify: 8,
252
+ summarize: 8,
253
+ ask: 7,
254
+ transform: 7,
255
+ extract: 7,
256
+ hunt: 6,
257
+ // tool reliability drops at 16 — cliff guard fires
258
+ generate: 6,
259
+ plan: 5,
260
+ critique: 4
261
+ // reasoning depth gap vs Sonnet/Opus
262
+ }
263
+ },
264
+ // ── Google ──
265
+ {
266
+ id: "gemini-2.5-flash",
267
+ verifiedAgainstDocs: "2026-05-08",
268
+ provider: "google",
269
+ status: "current",
270
+ maxContextTokens: 1048576,
271
+ maxOutputTokens: 65535,
272
+ maxTools: 128,
273
+ parallelToolCalls: true,
274
+ structuredOutput: "native",
275
+ systemPromptMode: "separate",
276
+ streaming: true,
277
+ cliffs: [
278
+ {
279
+ metric: "input_tokens",
280
+ threshold: 8e3,
281
+ action: "downgrade_quality_warning",
282
+ reason: "Quality degrades significantly above ~8K context tokens"
283
+ },
284
+ {
285
+ metric: "tool_count",
286
+ threshold: 20,
287
+ action: "drop_to_top_relevant",
288
+ reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
289
+ },
290
+ {
291
+ metric: "thinking_with_short_output",
292
+ threshold: 1,
293
+ action: "force_thinking_budget_zero",
294
+ reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
295
+ },
296
+ {
297
+ // s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
298
+ // tt-intelligence/summarize/gemini-2.5-flash with tools offered.
299
+ // v1's disable_thinking_for_short_output already fired and didn't
300
+ // help — disabling thinking is necessary but not sufficient. Tools
301
+ // present + summarize intent confuses Flash into a no-output state
302
+ // (likely tool-decision purgatory). Strip tools entirely for this
303
+ // archetype on this model.
304
+ metric: "tool_count",
305
+ threshold: 1,
306
+ whenIntent: "summarize",
307
+ action: "strip_tools",
308
+ reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
309
+ }
310
+ ],
311
+ costInputPer1m: 0.3,
312
+ costOutputPer1m: 2.5,
313
+ lowering: {
314
+ ...GOOGLE_LOWERING_BASE,
315
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
316
+ },
317
+ recovery: [
318
+ {
319
+ signal: "empty_response_after_tool",
320
+ action: "retry_with_params",
321
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
322
+ maxRetries: 1,
323
+ reason: "Known: empty after tool result \u2014 retry with thinking off"
324
+ },
325
+ {
326
+ signal: "empty_response",
327
+ action: "retry_with_params",
328
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
329
+ maxRetries: 1,
330
+ reason: "Empty response \u2014 try with thinking off"
331
+ },
332
+ {
333
+ signal: "malformed_function_call",
334
+ action: "escalate",
335
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
336
+ }
337
+ ],
338
+ strengths: ["speed", "volume", "classification", "1m_context", "cost"],
339
+ weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
340
+ notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
341
+ // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
342
+ // 15-75 calls/step beats Sonnet — L-040), summarize, classify.
343
+ archetypePerf: {
344
+ hunt: 9,
345
+ // L-040: parallel tool throughput 15-75/step
346
+ classify: 7,
347
+ // brain-validated, 218 rows
348
+ summarize: 7,
349
+ // brain-validated; cliff strips tools when present
350
+ transform: 7,
351
+ ask: 7,
352
+ generate: 6,
353
+ plan: 5,
354
+ extract: 6,
355
+ // alpha.8 MAX_TOKENS history on structured output
356
+ critique: 4
357
+ // reasoning shallower than Sonnet/Opus
358
+ }
359
+ },
360
+ {
361
+ // ── Gemini 2.5 Flash-Lite ──
362
+ // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
363
+ // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
364
+ // stable. Positioned BELOW Flash on the cost/perf frontier:
365
+ // input $0.10/M (Flash $0.30/M) — 3× cheaper
366
+ // output $0.40/M (Flash $2.50/M) — 6× cheaper
367
+ // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
368
+ // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
369
+ // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
370
+ // thresholds. The brain will validate/relax these as evidence accumulates
371
+ // per (archetype, model) tuple. Currently ZERO brain rows for this model.
372
+ id: "gemini-2.5-flash-lite",
373
+ verifiedAgainstDocs: "2026-05-13",
374
+ provider: "google",
375
+ status: "current",
376
+ maxContextTokens: 1048576,
377
+ maxOutputTokens: 65536,
378
+ maxTools: 128,
379
+ parallelToolCalls: true,
380
+ structuredOutput: "native",
381
+ systemPromptMode: "separate",
382
+ streaming: true,
383
+ cliffs: [
384
+ {
385
+ metric: "input_tokens",
386
+ threshold: 8e3,
387
+ action: "downgrade_quality_warning",
388
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
389
+ },
390
+ {
391
+ metric: "tool_count",
392
+ threshold: 10,
393
+ action: "drop_to_top_relevant",
394
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
395
+ },
396
+ {
397
+ metric: "thinking_with_short_output",
398
+ threshold: 1,
399
+ action: "force_thinking_budget_zero",
400
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
401
+ },
402
+ {
403
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
404
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
405
+ // same architectural family — almost certainly inherits this cliff.
406
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
407
+ metric: "tool_count",
408
+ threshold: 1,
409
+ whenIntent: "summarize",
410
+ action: "strip_tools",
411
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
412
+ }
413
+ ],
414
+ costInputPer1m: 0.1,
415
+ costOutputPer1m: 0.4,
416
+ lowering: {
417
+ ...GOOGLE_LOWERING_BASE,
418
+ // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
419
+ // $0.10/M input. Material for repeat-prompt workloads (classify shape).
420
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
421
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
422
+ },
423
+ recovery: [
424
+ {
425
+ signal: "empty_response_after_tool",
426
+ action: "retry_with_params",
427
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
428
+ maxRetries: 1,
429
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
430
+ },
431
+ {
432
+ signal: "empty_response",
433
+ action: "retry_with_params",
434
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
435
+ maxRetries: 1,
436
+ reason: "Empty response \u2014 try with thinking off."
437
+ },
438
+ {
439
+ signal: "malformed_function_call",
440
+ action: "escalate",
441
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
442
+ }
443
+ ],
444
+ strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
445
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
446
+ notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
447
+ // Tier 3 emergency floor for summarize/classify chains. ZERO brain
448
+ // rows — all values are starter hypotheses anchored to "smaller
449
+ // sibling of Flash, at-or-below Flash perf on every archetype." The
450
+ // first 50 brain rows per archetype will validate or relax these.
451
+ archetypePerf: {
452
+ classify: 6,
453
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
454
+ summarize: 6,
455
+ // starter hypothesis — verify; cliff strips tools
456
+ transform: 6,
457
+ // starter hypothesis — verify
458
+ ask: 5,
459
+ hunt: 5,
460
+ generate: 4,
461
+ extract: 4,
462
+ plan: 3,
463
+ critique: 3
464
+ }
465
+ },
466
+ {
467
+ id: "gemini-2.5-pro",
468
+ verifiedAgainstDocs: "2026-05-08",
469
+ provider: "google",
470
+ status: "current",
471
+ maxContextTokens: 1048576,
472
+ maxOutputTokens: 65535,
473
+ maxTools: 128,
474
+ parallelToolCalls: true,
475
+ structuredOutput: "native",
476
+ systemPromptMode: "separate",
477
+ streaming: true,
478
+ cliffs: [
479
+ {
480
+ metric: "input_tokens",
481
+ threshold: 2e5,
482
+ action: "downgrade_quality_warning",
483
+ reason: "Pricing doubles above 200K: input $1.25\u2192$2.50/M, output $10\u2192$15/M"
484
+ }
485
+ ],
486
+ costInputPer1m: 1.25,
487
+ costOutputPer1m: 10,
488
+ lowering: {
489
+ ...GOOGLE_LOWERING_BASE,
490
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
491
+ },
492
+ recovery: [
493
+ {
494
+ signal: "malformed_function_call",
495
+ action: "escalate",
496
+ reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
497
+ }
498
+ ],
499
+ strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
500
+ weaknesses: ["pricing_above_200k"],
501
+ // Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
502
+ // Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
503
+ archetypePerf: {
504
+ critique: 9,
505
+ plan: 9,
506
+ ask: 8,
507
+ generate: 8,
508
+ extract: 8,
509
+ transform: 8,
510
+ hunt: 8,
511
+ // tier 1 cross-provider for hunt chain
512
+ summarize: 7,
513
+ classify: 7
514
+ }
515
+ },
516
+ {
517
+ id: "gemini-3.1-pro-preview",
518
+ verifiedAgainstDocs: "2026-05-08",
519
+ provider: "google",
520
+ status: "preview",
521
+ maxContextTokens: 1048576,
522
+ maxOutputTokens: 65535,
523
+ maxTools: 128,
524
+ parallelToolCalls: true,
525
+ structuredOutput: "native",
526
+ systemPromptMode: "separate",
527
+ streaming: true,
528
+ cliffs: [
529
+ {
530
+ metric: "input_tokens",
531
+ threshold: 2e5,
532
+ action: "downgrade_quality_warning",
533
+ reason: "Pricing doubles above 200K: input $2\u2192$4/M, output $12\u2192$18/M"
534
+ }
535
+ ],
536
+ costInputPer1m: 2,
537
+ costOutputPer1m: 12,
538
+ lowering: {
539
+ ...GOOGLE_LOWERING_BASE,
540
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
541
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
542
+ },
543
+ recovery: [
544
+ {
545
+ signal: "malformed_function_call",
546
+ action: "escalate",
547
+ reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
548
+ }
549
+ ],
550
+ strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
551
+ weaknesses: ["cost", "preview_status", "pricing_above_200k"],
552
+ notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
553
+ // Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
554
+ // coding / reasoning per Google's release notes. Preview status:
555
+ // chains should stay on 2.5 Pro until GA. Starter hypothesis.
556
+ archetypePerf: {
557
+ critique: 10,
558
+ // Google claims step-change on reasoning
559
+ plan: 10,
560
+ ask: 9,
561
+ generate: 9,
562
+ extract: 9,
563
+ transform: 8,
564
+ hunt: 9,
565
+ // step-change agentic per Google
566
+ summarize: 8,
567
+ classify: 7
568
+ }
569
+ },
570
+ // ── DeepSeek ──
571
+ // 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
572
+ // to `deepseek-v4-flash` non-thinking mode. Old kgauto profile claimed 64k
573
+ // context + $0.27/$1.10 — actual is 1M context + $0.14/$0.28. Now modeled
574
+ // as: V4-Flash + V4-Pro as canonical profiles; deepseek-chat and
575
+ // deepseek-reasoner registered as aliases (see ALIASES below).
576
+ {
577
+ id: "deepseek-v4-flash",
578
+ verifiedAgainstDocs: "2026-05-08",
579
+ provider: "deepseek",
580
+ status: "current",
581
+ maxContextTokens: 1e6,
582
+ maxOutputTokens: 384e3,
583
+ maxTools: 16,
584
+ parallelToolCalls: false,
585
+ structuredOutput: "native",
586
+ systemPromptMode: "inline",
587
+ streaming: true,
588
+ cliffs: [
589
+ {
590
+ metric: "tool_count",
591
+ threshold: 1,
592
+ action: "drop_to_top_relevant",
593
+ reason: "Sequential tool calls only \u2014 L-040"
594
+ }
595
+ ],
596
+ costInputPer1m: 0.14,
597
+ costOutputPer1m: 0.28,
598
+ lowering: {
599
+ system: { mode: "inline" },
600
+ cache: { strategy: "unsupported" },
601
+ tools: { format: "deepseek" }
602
+ },
603
+ recovery: [
604
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
605
+ ],
606
+ strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
607
+ weaknesses: ["parallel_tools", "large_tool_sets"],
608
+ notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
609
+ // Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
610
+ // classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
611
+ // Falls off on hunt (sequential tools — L-040) and reasoning depth.
612
+ archetypePerf: {
613
+ classify: 7,
614
+ // brain-validated, 169 rows
615
+ summarize: 7,
616
+ // archetype-tolerant, no brain evidence yet
617
+ ask: 6,
618
+ transform: 6,
619
+ generate: 5,
620
+ plan: 5,
621
+ extract: 5,
622
+ critique: 4,
623
+ hunt: 4
624
+ // sequential tool calls only — L-040
625
+ }
626
+ },
627
+ {
628
+ id: "deepseek-v4-pro",
629
+ verifiedAgainstDocs: "2026-05-08",
630
+ provider: "deepseek",
631
+ status: "current",
632
+ maxContextTokens: 1e6,
633
+ maxOutputTokens: 384e3,
634
+ maxTools: 16,
635
+ parallelToolCalls: false,
636
+ structuredOutput: "native",
637
+ systemPromptMode: "inline",
638
+ streaming: true,
639
+ cliffs: [
640
+ {
641
+ metric: "tool_count",
642
+ threshold: 1,
643
+ action: "drop_to_top_relevant",
644
+ reason: "Sequential tool calls only \u2014 L-040"
645
+ }
646
+ ],
647
+ // Profile carries REGULAR pricing, not the 75%-off promo (ends 2026-05-31).
648
+ // Under-estimating cost is worse than over-estimating for budget caps.
649
+ costInputPer1m: 1.74,
650
+ costOutputPer1m: 3.48,
651
+ lowering: {
652
+ system: { mode: "inline" },
653
+ cache: { strategy: "unsupported" },
654
+ tools: { format: "deepseek" }
655
+ },
656
+ recovery: [
657
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
658
+ ],
659
+ strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
660
+ weaknesses: ["parallel_tools", "large_tool_sets"],
661
+ notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
662
+ // Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
663
+ // bumped one notch over V4-Flash; same parallel-tool ceiling.
664
+ archetypePerf: {
665
+ plan: 7,
666
+ // §3.3 tier 3 for plan
667
+ critique: 6,
668
+ ask: 7,
669
+ generate: 6,
670
+ classify: 7,
671
+ summarize: 7,
672
+ extract: 6,
673
+ transform: 6,
674
+ hunt: 4
675
+ // sequential tools — same as V4-Flash
676
+ }
677
+ },
678
+ // ── OpenAI ──
679
+ // alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
680
+ // already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
681
+ // + lowerOpenAI all existed; profile entries were missing, so the
682
+ // alpha.10 auto-filter would mark openai-keyed models reachable but
683
+ // there were no profiles to filter IN. Half-supported is now fully
684
+ // supported. PB request `openai-provider-profiles` (2026-05-16).
685
+ //
686
+ // Profile data verified against developers.openai.com/api/docs/pricing
687
+ // + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
688
+ // numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
689
+ // current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
690
+ // are the workhorse family. gpt-4.1 + gpt-4o are legacy.
691
+ //
692
+ // Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
693
+ // 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
694
+ // cliff because it ranks the model down at large-context shapes — the
695
+ // semantics of "this model is now 2x more expensive" map onto the
696
+ // existing penalty mechanism. Cost-watcher will catch high-context
697
+ // spikes empirically; the cliff prevents naive routing into the doubled
698
+ // pricing zone.
699
+ {
700
+ id: "gpt-5.5",
701
+ verifiedAgainstDocs: "2026-05-17",
702
+ provider: "openai",
703
+ status: "current",
704
+ maxContextTokens: 105e4,
705
+ maxOutputTokens: 128e3,
706
+ maxTools: 64,
707
+ parallelToolCalls: true,
708
+ structuredOutput: "native",
709
+ systemPromptMode: "inline",
710
+ streaming: true,
711
+ cliffs: [
712
+ {
713
+ metric: "input_tokens",
714
+ threshold: 272e3,
715
+ action: "downgrade_quality_warning",
716
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
717
+ }
718
+ ],
719
+ costInputPer1m: 5,
720
+ costOutputPer1m: 30,
721
+ lowering: {
722
+ system: { mode: "inline" },
723
+ // OpenAI caching is implicit (auto-applied to repeated prefixes
724
+ // ≥1024 tokens for prompt_tokens_details.cached_tokens). No
725
+ // wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
726
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
727
+ tools: { format: "openai" }
728
+ },
729
+ recovery: [
730
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
731
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
732
+ ],
733
+ strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
734
+ weaknesses: ["cost", "pricing_cliff_at_272k"],
735
+ notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
736
+ // Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
737
+ // price/positioning). Brain evidence will refine; no telemetry yet.
738
+ archetypePerf: {
739
+ critique: 9,
740
+ plan: 9,
741
+ generate: 9,
742
+ ask: 9,
743
+ extract: 9,
744
+ transform: 9,
745
+ hunt: 8,
746
+ // parallel tool support good but cliff at 272K hurts deep multi-step
747
+ summarize: 7,
748
+ // overkill for tolerant archetype
749
+ classify: 7
750
+ // overkill; cheaper models cover this
751
+ }
752
+ },
753
+ {
754
+ id: "gpt-5.4",
755
+ verifiedAgainstDocs: "2026-05-17",
756
+ provider: "openai",
757
+ status: "current",
758
+ maxContextTokens: 105e4,
759
+ maxOutputTokens: 128e3,
760
+ maxTools: 64,
761
+ parallelToolCalls: true,
762
+ structuredOutput: "native",
763
+ systemPromptMode: "inline",
764
+ streaming: true,
765
+ cliffs: [
766
+ {
767
+ metric: "input_tokens",
768
+ threshold: 272e3,
769
+ action: "downgrade_quality_warning",
770
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
771
+ }
772
+ ],
773
+ costInputPer1m: 2.5,
774
+ costOutputPer1m: 15,
775
+ lowering: {
776
+ system: { mode: "inline" },
777
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
778
+ tools: { format: "openai" }
779
+ },
780
+ recovery: [
781
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
782
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
783
+ ],
784
+ strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
785
+ weaknesses: ["pricing_cliff_at_272k"],
786
+ notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
787
+ // Anchored to Sonnet 4.6 row (similar price/positioning). Slight
788
+ // anthropic-side edge on agentic coding per master plan vibe.
789
+ archetypePerf: {
790
+ critique: 8,
791
+ plan: 8,
792
+ generate: 8,
793
+ ask: 8,
794
+ extract: 8,
795
+ transform: 8,
796
+ hunt: 7,
797
+ summarize: 7,
798
+ classify: 7
799
+ }
800
+ },
801
+ {
802
+ id: "gpt-5.4-mini",
803
+ verifiedAgainstDocs: "2026-05-17",
804
+ provider: "openai",
805
+ status: "current",
806
+ maxContextTokens: 4e5,
807
+ maxOutputTokens: 128e3,
808
+ maxTools: 64,
809
+ parallelToolCalls: true,
810
+ structuredOutput: "native",
811
+ systemPromptMode: "inline",
812
+ streaming: true,
813
+ cliffs: [],
814
+ costInputPer1m: 0.75,
815
+ costOutputPer1m: 4.5,
816
+ lowering: {
817
+ system: { mode: "inline" },
818
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
819
+ tools: { format: "openai" }
820
+ },
821
+ recovery: [
822
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
823
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
824
+ ],
825
+ strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
826
+ weaknesses: ["reasoning_depth"],
827
+ notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
828
+ // Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
829
+ // Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
830
+ // OpenAI claims strong coding/subagent perf.
831
+ archetypePerf: {
832
+ ask: 7,
833
+ generate: 7,
834
+ extract: 7,
835
+ transform: 7,
836
+ classify: 7,
837
+ summarize: 7,
838
+ hunt: 7,
839
+ plan: 6,
840
+ critique: 5
841
+ // reasoning depth gap — frontier models handle this
842
+ }
843
+ },
844
+ {
845
+ id: "gpt-5.4-nano",
846
+ verifiedAgainstDocs: "2026-05-17",
847
+ provider: "openai",
848
+ status: "current",
849
+ maxContextTokens: 4e5,
850
+ maxOutputTokens: 128e3,
851
+ maxTools: 64,
852
+ parallelToolCalls: true,
853
+ structuredOutput: "native",
854
+ systemPromptMode: "inline",
855
+ streaming: true,
856
+ cliffs: [],
857
+ costInputPer1m: 0.2,
858
+ costOutputPer1m: 1.25,
859
+ lowering: {
860
+ system: { mode: "inline" },
861
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
862
+ tools: { format: "openai" }
863
+ },
864
+ recovery: [
865
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
866
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
867
+ ],
868
+ strengths: ["cost", "speed", "volume", "structured_output"],
869
+ weaknesses: ["reasoning_depth", "no_computer_use"],
870
+ notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
871
+ // Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
872
+ // $0.20/$1.25). Slightly more expensive than Flash-Lite but with
873
+ // OpenAI brand reliability. Good fit for classify/summarize floor.
874
+ archetypePerf: {
875
+ classify: 7,
876
+ summarize: 6,
877
+ ask: 6,
878
+ transform: 6,
879
+ extract: 6,
880
+ generate: 5,
881
+ hunt: 5,
882
+ plan: 4,
883
+ critique: 3
884
+ // not for reasoning archetypes
885
+ }
886
+ },
887
+ // ── Auto-onboarded (UNVERIFIED) ──
888
+ // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
889
+ // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
890
+ // provider docs. Verify before promoting status to 'current' (L-049/L-081).
891
+ {
892
+ // s37 (2026-05-21): UNVERIFIED-AUTO-ONBOARD → verified against
893
+ // ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview +
894
+ // ai.google.dev/gemini-api/docs/pricing. L-081 catches:
895
+ // maxOutputTokens 65_535 → 65_536 (off-by-one)
896
+ // costInputPer1m 0.30 → 0.50 (template-cloned from 2.5-flash; actual is 1.67× more expensive)
897
+ // costOutputPer1m 2.50 → 3.00 (template-cloned; actual 1.2× more expensive)
898
+ // cache discount default 0.25 → 0.10 (10× discount, $0.05/$0.50 per docs)
899
+ // Cliffs inherited from 2.5-flash conservatively. The 8K-context-quality
900
+ // cliff was a 2.5-Flash observation — Google positions Gemini 3 as
901
+ // sustained-frontier-on-long-context; brain evidence will validate/relax.
902
+ // Kept as guard for now.
903
+ id: "gemini-3-flash-preview",
904
+ verifiedAgainstDocs: "2026-05-21",
905
+ provider: "google",
906
+ status: "preview",
907
+ maxContextTokens: 1048576,
908
+ maxOutputTokens: 65536,
909
+ maxTools: 128,
910
+ parallelToolCalls: true,
911
+ structuredOutput: "native",
912
+ systemPromptMode: "separate",
913
+ streaming: true,
914
+ cliffs: [
915
+ {
916
+ metric: "input_tokens",
917
+ threshold: 8e3,
918
+ action: "downgrade_quality_warning",
919
+ reason: "Inherited from 2.5-flash guard; brain evidence on Gemini 3 long-context quality will validate/relax"
920
+ },
921
+ {
922
+ metric: "tool_count",
923
+ threshold: 20,
924
+ action: "drop_to_top_relevant",
925
+ reason: "Tool reliability drops above ~20 tools (despite 128 hard limit) \u2014 inherited from Flash family"
926
+ },
927
+ {
928
+ metric: "thinking_with_short_output",
929
+ threshold: 1,
930
+ action: "force_thinking_budget_zero",
931
+ reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
932
+ },
933
+ {
934
+ // Inherited from gemini-2.5-flash s11 trust artifact. Family-likely
935
+ // failure mode for Flash architecture. Keep preemptively until brain
936
+ // evidence on Gemini 3 specifically.
937
+ metric: "tool_count",
938
+ threshold: 1,
939
+ whenIntent: "summarize",
940
+ action: "strip_tools",
941
+ reason: "Inherited from 2.5-flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3-flash-preview specifically."
942
+ }
943
+ ],
944
+ costInputPer1m: 0.5,
945
+ costOutputPer1m: 3,
946
+ lowering: {
947
+ ...GOOGLE_LOWERING_BASE,
948
+ // 10× cache discount per Google pricing: $0.05/M cached vs $0.50/M input.
949
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
950
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
951
+ },
952
+ recovery: [
953
+ {
954
+ signal: "empty_response_after_tool",
955
+ action: "retry_with_params",
956
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
957
+ maxRetries: 1,
958
+ reason: "Known: empty after tool result \u2014 retry with thinking off"
959
+ },
960
+ {
961
+ signal: "empty_response",
962
+ action: "retry_with_params",
963
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
964
+ maxRetries: 1,
965
+ reason: "Empty response \u2014 try with thinking off"
966
+ },
967
+ {
968
+ signal: "malformed_function_call",
969
+ action: "escalate",
970
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
971
+ }
972
+ ],
973
+ strengths: ["speed", "volume", "classification", "1m_context", "cost"],
974
+ weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
975
+ notes: "Verified s37 (2026-05-21) against Google docs. Step-change positioning vs 2.5-flash on agentic loops per Google's release notes (Dec 2025). Pricing 1.67\xD7/1.2\xD7 higher than 2.5-flash; cache discount 10\xD7 (vs 4\xD7 for 2.5). Status=preview until brain evidence accumulates.",
976
+ // Anchored to 2.5-flash archetypePerf as starter, with judgment adjustments
977
+ // for Google's "step-change on agentic" positioning. Brain evidence (zero
978
+ // rows today) will replace these starter values.
979
+ archetypePerf: {
980
+ hunt: 9,
981
+ // Inherits 2.5-flash L-040 parallel-tool tier; Google positions 3 as agentic-loop upgrade
982
+ classify: 7,
983
+ // Inherits 2.5-flash brain-validated tier (218 rows on 2.5)
984
+ summarize: 7,
985
+ // Inherits 2.5-flash; cliff strips tools when present
986
+ transform: 7,
987
+ ask: 8,
988
+ // +1 vs 2.5-flash — sustained-frontier positioning
989
+ generate: 7,
990
+ // +1 vs 2.5-flash — agentic coding upgrade per Google
991
+ plan: 6,
992
+ // +1 vs 2.5-flash — complex iterations per positioning
993
+ extract: 6,
994
+ critique: 5
995
+ // +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
996
+ }
997
+ },
998
+ {
999
+ // ── Gemini 3.1 Flash-Lite ──
1000
+ // Onboarded 2026-05-16 by auto-onboarder; s37 (2026-05-21) verified
1001
+ // against ai.google.dev/gemini-api/docs/pricing.
1002
+ //
1003
+ // L-081 CATCHES (template clone from 2.5-flash-lite was 2.5-3.75× too cheap):
1004
+ // costInputPer1m 0.10 → 0.25 (template clone undervalued by 2.5×)
1005
+ // costOutputPer1m 0.40 → 1.50 (template clone undervalued by 3.75×)
1006
+ //
1007
+ // Real 3.1-flash-lite is NOT a cost-equivalent successor to 2.5-flash-lite —
1008
+ // it sits between 2.5-flash-lite ($0.10/$0.40) and 2.5-flash ($0.30/$2.50).
1009
+ // Cache discount 10× verified ($0.025/M cached vs $0.25/M input).
1010
+ //
1011
+ // Cliffs are HYPOTHESIZED from 2.5-flash family; brain evidence pending.
1012
+ id: "gemini-3.1-flash-lite",
1013
+ verifiedAgainstDocs: "2026-05-21",
1014
+ provider: "google",
1015
+ status: "preview",
1016
+ maxContextTokens: 1048576,
1017
+ maxOutputTokens: 65536,
1018
+ maxTools: 128,
1019
+ parallelToolCalls: true,
1020
+ structuredOutput: "native",
1021
+ systemPromptMode: "separate",
1022
+ streaming: true,
1023
+ cliffs: [
1024
+ {
1025
+ metric: "input_tokens",
1026
+ threshold: 8e3,
1027
+ action: "downgrade_quality_warning",
1028
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
1029
+ },
1030
+ {
1031
+ metric: "tool_count",
1032
+ threshold: 10,
1033
+ action: "drop_to_top_relevant",
1034
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
1035
+ },
1036
+ {
1037
+ metric: "thinking_with_short_output",
1038
+ threshold: 1,
1039
+ action: "force_thinking_budget_zero",
1040
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
1041
+ },
1042
+ {
1043
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
1044
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
1045
+ // same architectural family — almost certainly inherits this cliff.
1046
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
1047
+ metric: "tool_count",
1048
+ threshold: 1,
1049
+ whenIntent: "summarize",
1050
+ action: "strip_tools",
1051
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
1052
+ }
1053
+ ],
1054
+ costInputPer1m: 0.25,
1055
+ costOutputPer1m: 1.5,
1056
+ lowering: {
1057
+ ...GOOGLE_LOWERING_BASE,
1058
+ // Cache discount 10× (vs Flash 4×) — Google docs s37: $0.025/M cached vs
1059
+ // $0.25/M input. Material for repeat-prompt workloads (classify shape).
1060
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1061
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1062
+ },
1063
+ recovery: [
1064
+ {
1065
+ signal: "empty_response_after_tool",
1066
+ action: "retry_with_params",
1067
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1068
+ maxRetries: 1,
1069
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
1070
+ },
1071
+ {
1072
+ signal: "empty_response",
1073
+ action: "retry_with_params",
1074
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1075
+ maxRetries: 1,
1076
+ reason: "Empty response \u2014 try with thinking off."
1077
+ },
1078
+ {
1079
+ signal: "malformed_function_call",
1080
+ action: "escalate",
1081
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
1082
+ }
1083
+ ],
1084
+ strengths: ["low_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
1085
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
1086
+ notes: "Verified s37 (2026-05-21) against Google docs. Sits between 2.5-flash-lite (cheaper) and 2.5-flash (more expensive) on cost frontier; 2.5\xD7 more expensive than initial template-clone. Cliffs hypothesized from Flash family \u2014 brain evidence pending.",
1087
+ // Tier 2-3 floor for summarize/classify chains at the new (verified) price
1088
+ // point. ZERO brain rows — values are starter hypotheses anchored to
1089
+ // "smaller sibling of Flash at higher cost than 2.5-flash-lite." The first
1090
+ // 50 brain rows per archetype will validate or relax these.
1091
+ archetypePerf: {
1092
+ classify: 6,
1093
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
1094
+ summarize: 6,
1095
+ // starter hypothesis — verify; cliff strips tools
1096
+ transform: 6,
1097
+ // starter hypothesis — verify
1098
+ ask: 5,
1099
+ hunt: 5,
1100
+ generate: 4,
1101
+ extract: 4,
1102
+ plan: 3,
1103
+ critique: 3
1104
+ }
1105
+ },
1106
+ // ─────────────────────────────────────────────────────────────────────────
1107
+ // Gemini 3.5 Flash — hand-onboarded s37 (2026-05-21)
1108
+ //
1109
+ // Google positioning ("Most intelligent for sustained frontier performance
1110
+ // on agentic and coding tasks" / "particularly effective for rapid agentic
1111
+ // loops involving complex coding cycles and iterations") suggests this is
1112
+ // the Flash-family upgrade specifically aimed at hunt-shape workloads.
1113
+ // Pricing 5× input / 3.6× output vs 2.5-flash — material cost premium.
1114
+ // archetypePerf adjusted +1 vs 2.5-flash on ask/generate/plan/critique
1115
+ // (sustained-frontier positioning); hunt held at 9 inherited from L-040
1116
+ // family parallel-tool tier; brain evidence will validate within 50 rows.
1117
+ //
1118
+ // Cliffs inherited conservatively from 2.5-flash. Google's "sustained
1119
+ // frontier on long-context" positioning suggests the 8K cliff may not
1120
+ // apply to 3.5 — keep as guard until brain evidence shows otherwise.
1121
+ //
1122
+ // Specs verified against:
1123
+ // ai.google.dev/gemini-api/docs/models/gemini-3.5-flash
1124
+ // ai.google.dev/gemini-api/docs/pricing (Standard tier)
1125
+ // ─────────────────────────────────────────────────────────────────────────
1126
+ {
1127
+ id: "gemini-3.5-flash",
1128
+ verifiedAgainstDocs: "2026-05-21",
1129
+ provider: "google",
1130
+ status: "current",
1131
+ maxContextTokens: 1048576,
1132
+ maxOutputTokens: 65536,
1133
+ maxTools: 128,
1134
+ parallelToolCalls: true,
1135
+ structuredOutput: "native",
1136
+ systemPromptMode: "separate",
1137
+ streaming: true,
1138
+ cliffs: [
1139
+ {
1140
+ metric: "input_tokens",
1141
+ threshold: 8e3,
1142
+ action: "downgrade_quality_warning",
1143
+ reason: "Inherited from 2.5-flash guard; Google positions 3.5 as sustained-frontier-on-long-context but brain evidence pending"
1144
+ },
1145
+ {
1146
+ metric: "tool_count",
1147
+ threshold: 20,
1148
+ action: "drop_to_top_relevant",
1149
+ reason: "Inherited from Flash family: tool reliability drops above ~20 (despite 128 hard limit). Validate per (archetype, model) after n\u226520."
1150
+ },
1151
+ {
1152
+ metric: "thinking_with_short_output",
1153
+ threshold: 1,
1154
+ action: "force_thinking_budget_zero",
1155
+ reason: "Thinking mode supported per Google docs; same drain risk as 2.5-flash \u2014 thinking tokens consume maxOutputTokens"
1156
+ },
1157
+ {
1158
+ // Inherited from 2.5-flash s11 trust artifact (5/5 empty rate on
1159
+ // tt-intelligence/summarize/gemini-2.5-flash with tools offered).
1160
+ // Family-likely failure mode for Flash architecture across versions.
1161
+ // Keep preemptively until brain evidence on 3.5-flash specifically.
1162
+ metric: "tool_count",
1163
+ threshold: 1,
1164
+ whenIntent: "summarize",
1165
+ action: "strip_tools",
1166
+ reason: "Inherited from 2.5-flash s11 cliff (kgauto commit 3872832): summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3.5-flash specifically."
1167
+ }
1168
+ ],
1169
+ costInputPer1m: 1.5,
1170
+ costOutputPer1m: 9,
1171
+ lowering: {
1172
+ ...GOOGLE_LOWERING_BASE,
1173
+ // 10× cache discount per Google pricing: $0.15/M cached vs $1.50/M input.
1174
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1175
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1176
+ },
1177
+ recovery: [
1178
+ {
1179
+ signal: "empty_response_after_tool",
1180
+ action: "retry_with_params",
1181
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1182
+ maxRetries: 1,
1183
+ reason: "Inherited Flash-family pattern: empty after tool result \u2014 retry with thinking off"
1184
+ },
1185
+ {
1186
+ signal: "empty_response",
1187
+ action: "retry_with_params",
1188
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1189
+ maxRetries: 1,
1190
+ reason: "Empty response \u2014 try with thinking off"
1191
+ },
1192
+ {
1193
+ signal: "malformed_function_call",
1194
+ action: "escalate",
1195
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
1196
+ }
1197
+ ],
1198
+ strengths: ["agentic_loops", "coding", "1m_context", "parallel_tools", "thinking_mode", "sustained_frontier"],
1199
+ weaknesses: ["cost_vs_2_5_flash", "no_brain_evidence_yet"],
1200
+ notes: "Hand-onboarded s37 (2026-05-21) verified against Google docs. Stable status; positioned as Flash-family upgrade for agentic loops and coding. 5\xD7/3.6\xD7 more expensive than 2.5-flash but Google claims step-change on sustained frontier work. archetypePerf adjustments are judgment-grounded starter hypotheses \u2014 brain evidence will validate within ~50 rows per archetype.",
1201
+ // Starter hypothesis: anchored to 2.5-flash archetypePerf with +1
1202
+ // adjustments where Google's positioning explicitly supports
1203
+ // (agentic/coding/sustained). Hunt held at 9 inherited from L-040 family
1204
+ // parallel-tool tier. Brain evidence will replace.
1205
+ archetypePerf: {
1206
+ hunt: 9,
1207
+ // Inherited from 2.5-flash L-040 parallel-tool tier; Google positions 3.5 as agentic-loop champion
1208
+ classify: 7,
1209
+ // Inherited from 2.5-flash brain-validated tier (218 rows on 2.5)
1210
+ summarize: 7,
1211
+ // Inherited from 2.5-flash; cliff strips tools when present
1212
+ transform: 7,
1213
+ ask: 8,
1214
+ // +1 vs 2.5-flash — sustained-frontier positioning
1215
+ generate: 8,
1216
+ // +1 vs 2.5-flash (6→8) — Google: "complex coding cycles and iterations"
1217
+ plan: 7,
1218
+ // +1 vs 2.5-flash (5→7) — "complex iterations" positioning
1219
+ extract: 7,
1220
+ // +1 vs 2.5-flash — sustained-frontier on structured tasks
1221
+ critique: 5
1222
+ // +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
1223
+ }
1224
+ }
1225
+ ];
1226
+ var ALIASES = {
1227
+ // DeepSeek's own model routing — both names served by V4-Flash.
1228
+ "deepseek-chat": "deepseek-v4-flash",
1229
+ "deepseek-reasoner": "deepseek-v4-flash",
1230
+ // Legacy kgauto typo — actual API alias is dash-form (alpha.1 had dot).
1231
+ "claude-haiku-4.5": "claude-haiku-4-5"
1232
+ };
1233
+ var brainHook = {};
1234
+ function canonicalId(id) {
1235
+ return brainHook.resolveAlias?.(id) ?? ALIASES[id] ?? id;
1236
+ }
1237
+ var PROFILE_INDEX = new Map(
1238
+ PROFILES_RAW.map((p) => [p.id, p])
1239
+ );
1240
+ function tryGetProfile(id) {
1241
+ const canonical = canonicalId(id);
1242
+ return brainHook.getProfile?.(canonical) ?? PROFILE_INDEX.get(canonical);
1243
+ }
1244
+
1245
+ // src/env.ts
1246
+ var SUPPORTED_PROVIDERS = Object.freeze([
1247
+ "anthropic",
1248
+ "google",
1249
+ "openai",
1250
+ "deepseek"
1251
+ ]);
1252
+ function isSupportedProvider(p) {
1253
+ return SUPPORTED_PROVIDERS.includes(p);
1254
+ }
1255
+ var PROVIDER_ENV_KEYS = Object.freeze({
1256
+ anthropic: Object.freeze(["ANTHROPIC_API_KEY"]),
1257
+ google: Object.freeze([
1258
+ "GOOGLE_API_KEY",
1259
+ "GEMINI_API_KEY",
1260
+ "GOOGLE_GENERATIVE_AI_API_KEY"
1261
+ ]),
1262
+ openai: Object.freeze(["OPENAI_API_KEY"]),
1263
+ deepseek: Object.freeze(["DEEPSEEK_API_KEY"])
1264
+ });
1265
+ function defaultEnv() {
1266
+ return typeof process !== "undefined" && process.env ? process.env : {};
1267
+ }
1268
+ function readKeyValue(raw) {
1269
+ if (raw === void 0) return void 0;
1270
+ const trimmed = raw.trim();
1271
+ return trimmed.length > 0 ? trimmed : void 0;
1272
+ }
1273
+ function resolveProviderKey(provider, opts = {}) {
1274
+ if (!isSupportedProvider(provider)) return void 0;
1275
+ const explicit = readKeyValue(opts.apiKeys?.[provider]);
1276
+ if (explicit) return explicit;
1277
+ const env = opts.envSource ?? defaultEnv();
1278
+ for (const name of PROVIDER_ENV_KEYS[provider]) {
1279
+ const v = readKeyValue(env[name]);
1280
+ if (v) return v;
1281
+ }
1282
+ return void 0;
1283
+ }
1284
+ function isProviderReachable(provider, opts = {}) {
1285
+ return resolveProviderKey(provider, opts) !== void 0;
1286
+ }
1287
+ function isModelReachable(modelId, opts = {}) {
1288
+ const profile = tryGetProfile(modelId);
1289
+ if (!profile) return false;
1290
+ return isProviderReachable(profile.provider, opts);
1291
+ }
1292
+
1293
+ // src/brain-query.ts
1294
+ var FRESH_SNAPSHOT = {
1295
+ data: null,
1296
+ expiresAt: 0,
1297
+ refreshing: false,
1298
+ warned: false
1299
+ };
1300
+ var snapshot = { ...FRESH_SNAPSHOT };
1301
+ var runtime;
1302
+ function createBrainQueryCache(opts) {
1303
+ return () => {
1304
+ const rt = runtime;
1305
+ if (!rt || !rt.enabledTables.has(opts.table)) {
1306
+ return opts.bundledFallback();
1307
+ }
1308
+ const now = Date.now();
1309
+ const stale = snapshot.expiresAt <= now;
1310
+ if (stale && !snapshot.refreshing) {
1311
+ snapshot.refreshing = true;
1312
+ void asyncRefresh(rt);
1313
+ }
1314
+ if (snapshot.data) {
1315
+ const rows = snapshot.data[opts.table];
1316
+ if (Array.isArray(rows) && rows.length > 0) {
1317
+ try {
1318
+ return opts.mapRows(rows);
1319
+ } catch {
1320
+ return opts.bundledFallback();
1321
+ }
1322
+ }
1323
+ }
1324
+ return opts.bundledFallback();
1325
+ };
1326
+ }
1327
+ var pendingRefresh;
1328
+ async function asyncRefresh(rt) {
1329
+ const promise = doRefresh(rt);
1330
+ pendingRefresh = promise;
1331
+ try {
1332
+ await promise;
1333
+ } finally {
1334
+ if (pendingRefresh === promise) pendingRefresh = void 0;
1335
+ }
1336
+ }
1337
+ var DEFAULT_CONFIG_URL = "https://kgauto-dashboard.vercel.app/api/kgauto-v2/config";
1338
+ async function doRefresh(rt) {
1339
+ const url = rt.configEndpoint ?? DEFAULT_CONFIG_URL;
1340
+ try {
1341
+ const res = await rt.fetchImpl(url, { method: "GET" });
1342
+ if (!res.ok) {
1343
+ throw new Error(`brain-query ${res.status}: ${res.statusText}`);
1344
+ }
1345
+ const body = await res.json();
1346
+ if (runtime !== rt) return;
1347
+ snapshot = {
1348
+ data: body,
1349
+ expiresAt: Date.now() + rt.ttlMs,
1350
+ refreshing: false,
1351
+ warned: snapshot.warned
1352
+ };
1353
+ } catch (err) {
1354
+ if (runtime !== rt) return;
1355
+ snapshot.refreshing = false;
1356
+ snapshot.expiresAt = Date.now() + rt.ttlMs;
1357
+ if (!snapshot.warned) {
1358
+ snapshot.warned = true;
1359
+ (rt.onError ?? defaultOnError)(err);
1360
+ }
1361
+ }
1362
+ }
1363
+ function defaultOnError(err) {
1364
+ console.warn("[kgauto] brain-query failed (using bundled fallback):", err);
1365
+ }
1366
+
1367
+ // src/chains-brain.ts
1368
+ function isChainsRow(x) {
1369
+ if (!x || typeof x !== "object") return false;
1370
+ const r = x;
1371
+ return typeof r.archetype === "string" && typeof r.tier === "number" && typeof r.model_id === "string";
1372
+ }
1373
+ function mapRowsToChains(rows) {
1374
+ const grouped = /* @__PURE__ */ new Map();
1375
+ for (const row of rows) {
1376
+ if (!isChainsRow(row)) continue;
1377
+ const list = grouped.get(row.archetype) ?? [];
1378
+ list.push(row);
1379
+ grouped.set(row.archetype, list);
1380
+ }
1381
+ const out = {};
1382
+ for (const [archetype, group] of grouped.entries()) {
1383
+ group.sort((a, b) => a.tier - b.tier);
1384
+ out[archetype] = group.map((r) => r.model_id);
1385
+ }
1386
+ const bundled = getAllStarterChains();
1387
+ for (const archetype of Object.keys(bundled)) {
1388
+ if (!out[archetype]) out[archetype] = bundled[archetype];
1389
+ }
1390
+ return out;
1391
+ }
1392
+ var loadChainsFromBrain = createBrainQueryCache({
1393
+ table: "kgauto_chains",
1394
+ mapRows: mapRowsToChains,
1395
+ bundledFallback: getAllStarterChains
1396
+ });
1397
+
1398
+ // src/fallback.ts
1399
+ var STARTER_CHAINS_GROUNDED = {
1400
+ // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
1401
+ critique: [
1402
+ { id: "claude-opus-4-7", grounding: "judgment", reason: "Highest reasoning bar, no degradation tier \u2014 engineer pick, awaiting measured backing" },
1403
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Same-provider walk-down from Opus on 429" },
1404
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor in similar quality bracket" },
1405
+ { id: "gpt-5.5", grounding: "judgment", reason: "alpha.16: third-provider frontier-tier floor (archetypePerf=9)" }
1406
+ ],
1407
+ // Reasoning matters — Sonnet primary; walk UP to Opus on 429.
1408
+ plan: [
1409
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Reasoning + cost balance \u2014 engineer pick" },
1410
+ { id: "claude-opus-4-7", grounding: "judgment", reason: 'Same-provider walk-UP on 429 (rare exception to "always cheaper")' },
1411
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
1412
+ { id: "deepseek-v4-pro", grounding: "judgment", reason: "Tier 3 cost floor \u2014 no brain evidence yet" }
1413
+ ],
1414
+ // Quality + cost match.
1415
+ generate: [
1416
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality + cost match \u2014 engineer pick" },
1417
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
1418
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
1419
+ { id: "gpt-5.4-mini", grounding: "judgment", reason: "alpha.16: third-provider tail (archetypePerf=7) \u2014 closes mono-Anthropic gap" }
1420
+ ],
1421
+ // ask::sonnet — STARTER_CHAINS calls this "Quality + cost match" but
1422
+ // tt-intel s78 prod data showed 27% empty rate. Labeled 'judgment' until
1423
+ // evidence either validates or refutes the placement.
1424
+ ask: [
1425
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality + cost match \u2014 engineer pick. NOTE: tt-intel s78 prod showed 27% empty rate; placement awaits measurement validation" },
1426
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
1427
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
1428
+ { id: "gpt-5.4-mini", grounding: "judgment", reason: "alpha.16: third-provider tail (archetypePerf=7)" }
1429
+ ],
1430
+ // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff,
1431
+ // capability-fact); DeepSeek skipped (no brain evidence).
1432
+ extract: [
1433
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Reliable structured-output anchor \u2014 engineer pick" },
1434
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down with native structured output" },
1435
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor with structured-output support" },
1436
+ { id: "gpt-5.4", grounding: "capability-fact", reason: "alpha.16: third-provider floor \u2014 native structured-output capability (archetypePerf=8)" }
1437
+ ],
1438
+ // Forgiving archetype — Sonnet primary but Flash safely floors it.
1439
+ transform: [
1440
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality anchor \u2014 engineer pick" },
1441
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
1442
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
1443
+ { id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost floor \u2014 forgiving archetype tolerates Flash" }
1444
+ ],
1445
+ // Parallel-tool throughput champion — Flash leads on the L-040 cliff
1446
+ // (capability-fact: Flash 15-75 parallel calls/step vs DeepSeek 7-8).
1447
+ hunt: [
1448
+ { id: "gemini-2.5-flash", grounding: "capability-fact", reason: "L-040 parallel-tool throughput champion (15-75 calls/step)" },
1449
+ { id: "gemini-2.5-pro", grounding: "capability-fact", reason: "Cross-provider tier 1 with strong parallel-tool support" },
1450
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality safety net for blocked-Flash case" },
1451
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Reduced tool budget \u2014 cliff at 16 fires" }
1452
+ ],
1453
+ // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1.
1454
+ summarize: [
1455
+ { id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost-sensitive primary \u2014 engineer pick" },
1456
+ { id: "deepseek-v4-flash", grounding: "measured", reason: "Brain-validated tier 1 for cost-sensitive summarize workloads", n: 169 },
1457
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Quality safety net" },
1458
+ { id: "gemini-2.5-flash-lite", grounding: "judgment", reason: "Emergency floor \u2014 onboarded s22, no brain evidence yet" }
1459
+ ],
1460
+ // Brain-validated DeepSeek tier 1 (169 rows, 0% empty rate).
1461
+ classify: [
1462
+ { id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost-sensitive primary \u2014 engineer pick" },
1463
+ { id: "deepseek-v4-flash", grounding: "measured", reason: "Brain-validated tier 1 (169 rows, 0% empty rate)", n: 169 },
1464
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Quality safety net" },
1465
+ { id: "gemini-2.5-flash-lite", grounding: "judgment", reason: "Cache-discount 10\xD7 floor for repeat-prompt workloads" }
1466
+ ]
1467
+ };
1468
+ var STARTER_CHAINS = (() => {
1469
+ const out = {};
1470
+ for (const [archetype, entries] of Object.entries(STARTER_CHAINS_GROUNDED)) {
1471
+ out[archetype] = entries.map((e) => e.id);
1472
+ }
1473
+ return out;
1474
+ })();
1475
+ var STARTER_CHAINS_BY_MODE_GROUNDED = {
1476
+ hunt: {
1477
+ sequential: [
1478
+ {
1479
+ id: "deepseek-v4-pro",
1480
+ grounding: "judgment",
1481
+ reason: "alpha.20 E3: cheap + good reasoning at single-step granularity; L-040 cliff silenced when sequential \u2014 hypothesis not yet measured"
1482
+ },
1483
+ {
1484
+ id: "deepseek-v4-flash",
1485
+ grounding: "judgment",
1486
+ reason: "Cheapest viable; sibling-provider fallback"
1487
+ },
1488
+ {
1489
+ id: "claude-sonnet-4-6",
1490
+ grounding: "judgment",
1491
+ reason: "Cross-provider safety net \u2014 Sonnet handles sequential agentic loops cleanly"
1492
+ },
1493
+ {
1494
+ id: "gemini-2.5-pro",
1495
+ grounding: "judgment",
1496
+ reason: "Third-provider tail when no DeepSeek key reachable"
1497
+ }
1498
+ ]
1499
+ }
1500
+ };
1501
+ var STARTER_CHAINS_BY_MODE = (() => {
1502
+ const out = {};
1503
+ for (const [archetype, modes] of Object.entries(STARTER_CHAINS_BY_MODE_GROUNDED)) {
1504
+ if (modes?.sequential) {
1505
+ out[archetype] = {
1506
+ sequential: modes.sequential.map((e) => e.id)
1507
+ };
1508
+ }
1509
+ }
1510
+ return out;
1511
+ })();
1512
+ function resolveStarterForMode(archetype, toolOrchestration, allChains) {
1513
+ if (toolOrchestration === "sequential") {
1514
+ const overlay = STARTER_CHAINS_BY_MODE[archetype]?.sequential;
1515
+ if (overlay) return [...overlay];
1516
+ }
1517
+ return allChains[archetype];
1518
+ }
1519
+ function getDefaultFallbackChain(opts) {
1520
+ const { archetype, primary, maxDepth = 3, policy, reachability, toolOrchestration } = opts;
1521
+ if (maxDepth < 1) {
1522
+ throw new Error(
1523
+ `getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
1524
+ );
1525
+ }
1526
+ const allChains = loadChainsFromBrain();
1527
+ const starter = resolveStarterForMode(archetype, toolOrchestration, allChains);
1528
+ if (!starter) {
1529
+ throw new Error(
1530
+ `getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(allChains).join(", ")}`
1531
+ );
1532
+ }
1533
+ let chain;
1534
+ if (primary) {
1535
+ chain = [primary, ...starter.filter((id) => id !== primary)];
1536
+ } else {
1537
+ chain = [...starter];
1538
+ }
1539
+ if (policy?.blockedModels && policy.blockedModels.length > 0) {
1540
+ const blocked = new Set(policy.blockedModels);
1541
+ chain = chain.filter((id) => !blocked.has(id));
1542
+ }
1543
+ const seen = /* @__PURE__ */ new Set();
1544
+ const deduped = [];
1545
+ for (const id of chain) {
1546
+ if (!seen.has(id)) {
1547
+ seen.add(id);
1548
+ deduped.push(id);
1549
+ }
1550
+ }
1551
+ let filtered = deduped;
1552
+ if (reachability) {
1553
+ filtered = deduped.filter((id) => isModelReachable(id, reachability));
1554
+ }
1555
+ return filtered.slice(0, maxDepth);
1556
+ }
1557
+ function getAllStarterChains() {
1558
+ const out = {};
1559
+ for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
1560
+ out[archetype] = [...chain];
1561
+ }
1562
+ return out;
1563
+ }
1564
+
1565
+ // src/compatibility.ts
1566
+ var ARCHETYPE_FLOOR_DEFAULT = 6;
1567
+
1568
+ // src/glassbox-routes/counterfactuals.ts
1569
+ var COUNTERFACTUAL_MIN_SAVINGS_RATIO = 0.1;
1570
+ var COUNTERFACTUAL_MAX_RESULTS = 2;
1571
+ function computeCounterfactuals(args) {
1572
+ const {
1573
+ servedModel,
1574
+ servedCostUsd,
1575
+ archetype,
1576
+ tokensIn,
1577
+ tokensOut,
1578
+ cacheReadInputTokens = 0,
1579
+ toolOrchestration
1580
+ } = args;
1581
+ if (tokensIn <= 0) return [];
1582
+ if (servedCostUsd <= 0) return [];
1583
+ let chain;
1584
+ try {
1585
+ chain = getDefaultFallbackChain({
1586
+ archetype,
1587
+ posture: "open",
1588
+ maxDepth: 10,
1589
+ toolOrchestration
1590
+ });
1591
+ } catch {
1592
+ return [];
1593
+ }
1594
+ const candidates = [];
1595
+ const minSavings = servedCostUsd * COUNTERFACTUAL_MIN_SAVINGS_RATIO;
1596
+ for (const modelId of chain) {
1597
+ if (modelId === servedModel) continue;
1598
+ const profile = tryGetProfile(modelId);
1599
+ if (!profile) continue;
1600
+ const perf = profile.archetypePerf?.[archetype] ?? 5;
1601
+ if (perf < ARCHETYPE_FLOOR_DEFAULT) continue;
1602
+ const estimated = estimateCostUsd({
1603
+ profile,
1604
+ tokensIn,
1605
+ tokensOut,
1606
+ cacheReadInputTokens
1607
+ });
1608
+ if (estimated === void 0) continue;
1609
+ const savings = servedCostUsd - estimated;
1610
+ if (savings < minSavings) continue;
1611
+ const savingsPercent = Math.round(savings / servedCostUsd * 100);
1612
+ const reason = buildReason({
1613
+ modelId,
1614
+ archetype,
1615
+ perf,
1616
+ profile
1617
+ });
1618
+ candidates.push({
1619
+ modelId,
1620
+ estimatedCostUsd: round6(estimated),
1621
+ savingsUsd: round6(savings),
1622
+ savingsPercent,
1623
+ reason
1624
+ });
1625
+ }
1626
+ candidates.sort((a, b) => a.estimatedCostUsd - b.estimatedCostUsd);
1627
+ return candidates.slice(0, COUNTERFACTUAL_MAX_RESULTS);
1628
+ }
1629
+ function estimateCostUsd(args) {
1630
+ const { profile, tokensIn, tokensOut, cacheReadInputTokens } = args;
1631
+ const cacheableIn = Math.min(cacheReadInputTokens, tokensIn);
1632
+ const nonCachedIn = Math.max(tokensIn - cacheableIn, 0);
1633
+ const discount = profile.lowering.cache.discount ?? 1;
1634
+ const inUsd = nonCachedIn / 1e6 * profile.costInputPer1m + cacheableIn / 1e6 * profile.costInputPer1m * discount;
1635
+ const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
1636
+ const total = inUsd + outUsd;
1637
+ if (!Number.isFinite(total)) return void 0;
1638
+ if (total < 0) return void 0;
1639
+ return total;
1640
+ }
1641
+ function round6(n) {
1642
+ return Math.round(n * 1e6) / 1e6;
1643
+ }
1644
+ function buildReason(args) {
1645
+ const { modelId, archetype, perf, profile } = args;
1646
+ const hook = profile.strengths?.[0];
1647
+ const suffix = hook ? `, ${hook.replace(/_/g, " ")}` : "";
1648
+ return `${modelId} on ${archetype}: archetypePerf=${perf}${suffix}`;
1649
+ }
1650
+
1651
+ // src/glassbox-routes/projected-cost.ts
1652
+ var INSUFFICIENT_VOLUME_THRESHOLD = 5;
1653
+ var WINDOW_DAYS = 7;
1654
+ async function computeProjectedDailyCost(args) {
1655
+ const {
1656
+ appId,
1657
+ archetype,
1658
+ servedCostUsd,
1659
+ brainEndpoint,
1660
+ brainJwt,
1661
+ brainAnonKey,
1662
+ fetch: fetchImpl
1663
+ } = args;
1664
+ if (!appId || !archetype) return void 0;
1665
+ if (!Number.isFinite(servedCostUsd) || servedCostUsd <= 0) return void 0;
1666
+ const doFetch = fetchImpl ?? ((...a) => globalThis.fetch(...a));
1667
+ const base = brainEndpoint.replace(/\/+$/, "");
1668
+ const cutoffIso = new Date(
1669
+ Date.now() - WINDOW_DAYS * 24 * 60 * 60 * 1e3
1670
+ ).toISOString();
1671
+ const qs = new URLSearchParams();
1672
+ qs.set("app_id", `eq.${appId}`);
1673
+ qs.set("intent_archetype", `eq.${archetype}`);
1674
+ qs.set("created_at", `gte.${cutoffIso}`);
1675
+ qs.set("select", "handle");
1676
+ qs.set("limit", "0");
1677
+ const url = `${base}/rest/v1/compile_outcomes?${qs.toString()}`;
1678
+ let res;
1679
+ try {
1680
+ res = await doFetch(url, {
1681
+ method: "GET",
1682
+ headers: {
1683
+ Authorization: `Bearer ${brainJwt}`,
1684
+ apikey: brainAnonKey,
1685
+ Accept: "application/json",
1686
+ // Triggers PostgREST exact count in Content-Range header.
1687
+ Prefer: "count=exact"
1688
+ }
1689
+ });
1690
+ } catch {
1691
+ return void 0;
1692
+ }
1693
+ if (!res.ok) return void 0;
1694
+ const contentRange = res.headers.get("content-range");
1695
+ const count = parseContentRangeCount(contentRange);
1696
+ if (count === void 0) return void 0;
1697
+ const avgPerDay = count / WINDOW_DAYS;
1698
+ if (avgPerDay < INSUFFICIENT_VOLUME_THRESHOLD) return void 0;
1699
+ const projected = avgPerDay * servedCostUsd;
1700
+ return Math.round(projected * 1e6) / 1e6;
1701
+ }
1702
+ function parseContentRangeCount(header) {
1703
+ if (!header) return void 0;
1704
+ const slash = header.lastIndexOf("/");
1705
+ if (slash < 0) return void 0;
1706
+ const tail = header.slice(slash + 1).trim();
1707
+ if (tail === "*" || tail === "") return void 0;
1708
+ const n = Number.parseInt(tail, 10);
1709
+ if (!Number.isFinite(n) || n < 0) return void 0;
1710
+ return n;
1711
+ }
1712
+
1713
+ // src/glassbox-routes/proxy.ts
1714
+ var JSON_HEADERS2 = {
1715
+ "Content-Type": "application/json",
1716
+ "Cache-Control": "no-store"
1717
+ };
1718
+ var DEFAULT_LIMIT = 20;
1719
+ var MAX_LIMIT = 100;
1720
+ function jsonResponse(status, body) {
1721
+ return new Response(JSON.stringify(body), { status, headers: JSON_HEADERS2 });
1722
+ }
1723
+ function jsonError2(status, code) {
1724
+ return jsonResponse(status, { error: code });
1725
+ }
1726
+ function applyScrub(row, scrub) {
1727
+ if (!scrub || row == null || typeof row !== "object") return row;
1728
+ try {
1729
+ return scrub(row);
1730
+ } catch {
1731
+ return row;
1732
+ }
1733
+ }
1734
+ function parseLimit(raw) {
1735
+ if (!raw) return DEFAULT_LIMIT;
1736
+ const n = Number.parseInt(raw, 10);
1737
+ if (!Number.isFinite(n) || n <= 0) return DEFAULT_LIMIT;
1738
+ return Math.min(n, MAX_LIMIT);
1739
+ }
1740
+ function rowToSummary(row) {
1741
+ return {
1742
+ traceId: typeof row.handle === "string" ? row.handle : "",
1743
+ appId: typeof row.app_id === "string" ? row.app_id : "",
1744
+ archetype: typeof row.intent_archetype === "string" ? row.intent_archetype : "",
1745
+ target: typeof row.model === "string" ? row.model : "",
1746
+ createdAt: typeof row.created_at === "string" ? row.created_at : "",
1747
+ tokensIn: typeof row.tokens_in === "number" ? row.tokens_in : 0,
1748
+ tokensOut: typeof row.tokens_out === "number" ? row.tokens_out : 0,
1749
+ estimatedCostUsd: typeof row.cost_usd_actual === "number" ? row.cost_usd_actual : 0
1750
+ };
1751
+ }
1752
+ var INPUT_RATIO_YELLOW = 0.65;
1753
+ var INPUT_RATIO_RED = 0.85;
1754
+ var CACHE_HEALTH_MIN_TOKENS = 1e3;
1755
+ var CACHE_RATIO_GREEN = 0.5;
1756
+ var CACHE_RATIO_YELLOW = 0.1;
1757
+ var FALLBACK_REASONS = /* @__PURE__ */ new Set([
1758
+ "rate_limit",
1759
+ "provider_auth_failed",
1760
+ "provider_error",
1761
+ "cliff",
1762
+ "cost_cap",
1763
+ "contract_violation"
1764
+ ]);
1765
+ function asString(v) {
1766
+ return typeof v === "string" && v.length > 0 ? v : void 0;
1767
+ }
1768
+ function asNumber(v) {
1769
+ return typeof v === "number" && Number.isFinite(v) ? v : void 0;
1770
+ }
1771
+ function asNumberOrZero(v) {
1772
+ return typeof v === "number" && Number.isFinite(v) ? v : 0;
1773
+ }
1774
+ function asStringArray(v) {
1775
+ if (!Array.isArray(v)) return [];
1776
+ const out = [];
1777
+ for (const e of v) {
1778
+ if (typeof e === "string") out.push(e);
1779
+ }
1780
+ return out;
1781
+ }
1782
+ function asFallbackReason(v) {
1783
+ if (typeof v !== "string") return void 0;
1784
+ const candidate = v;
1785
+ if (candidate && FALLBACK_REASONS.has(candidate)) return candidate;
1786
+ return "provider_error";
1787
+ }
1788
+ function rowToAdvisory(raw) {
1789
+ if (!raw || typeof raw !== "object") return void 0;
1790
+ const r = raw;
1791
+ const level = r.level;
1792
+ const code = r.code;
1793
+ const message = r.message;
1794
+ if (level !== "info" && level !== "warn" && level !== "critical" || typeof code !== "string" || typeof message !== "string") {
1795
+ return void 0;
1796
+ }
1797
+ const out = { level, code, message };
1798
+ const suggestion = asString(r.suggestion);
1799
+ if (suggestion) out.suggestion = suggestion;
1800
+ const docsUrl = asString(r.docs_url ?? r.docsUrl);
1801
+ if (docsUrl) out.docsUrl = docsUrl;
1802
+ const adapter = toAdapter(r.suggested_adaptation ?? r.suggestedAdaptation);
1803
+ if (adapter) out.suggestedAdaptation = adapter;
1804
+ return out;
1805
+ }
1806
+ var SECTION_KINDS = /* @__PURE__ */ new Set([
1807
+ "role_intro",
1808
+ "tool_call_contract",
1809
+ "narration_contract",
1810
+ "user_turn",
1811
+ "reference",
1812
+ "arbitrary"
1813
+ ]);
1814
+ function summarizeSectionRewrite(kind, rule) {
1815
+ if (kind === "tool_call_contract" && rule === "sequential-tool-cliff-below-floor") {
1816
+ return "Sequential tool pattern applied (model cliff cleared at compile time).";
1817
+ }
1818
+ if (kind === "narration_contract" && rule === "narration-drift-anthropic") {
1819
+ return "Narration tightened for Anthropic dialect (terse-log shape preserved).";
1820
+ }
1821
+ if (kind === "narration_contract" && rule === "narration-thinking-leak-deepseek") {
1822
+ return "Thinking-block suppression applied (DeepSeek V4 internal reasoning kept off-wire).";
1823
+ }
1824
+ return `Translator applied rule "${rule}" to ${kind} section.`;
1825
+ }
1826
+ function rowToSectionRewrite(raw) {
1827
+ if (!raw || typeof raw !== "object") return void 0;
1828
+ const r = raw;
1829
+ const sectionId = r.sectionId ?? r.section_id;
1830
+ if (typeof sectionId !== "string" || sectionId.length === 0) return void 0;
1831
+ const kind = r.kind;
1832
+ if (typeof kind !== "string" || !SECTION_KINDS.has(kind)) {
1833
+ return void 0;
1834
+ }
1835
+ const rule = r.rule;
1836
+ if (typeof rule !== "string" || rule.length === 0) return void 0;
1837
+ return {
1838
+ sectionId,
1839
+ kind,
1840
+ rule,
1841
+ summary: summarizeSectionRewrite(kind, rule)
1842
+ };
1843
+ }
1844
+ function toAdapter(raw) {
1845
+ if (!raw || typeof raw !== "object") return void 0;
1846
+ const a = raw;
1847
+ if (a.parameter === "toolOrchestration" && a.value === "sequential" && typeof a.consequence === "string") {
1848
+ return {
1849
+ parameter: "toolOrchestration",
1850
+ value: "sequential",
1851
+ consequence: a.consequence
1852
+ };
1853
+ }
1854
+ return void 0;
1855
+ }
1856
+ function computeHealth(args) {
1857
+ const {
1858
+ tokensIn,
1859
+ tokensOut,
1860
+ historyCacheableTokens,
1861
+ inputCacheHitRatio,
1862
+ fellOverFrom,
1863
+ target
1864
+ } = args;
1865
+ const total = tokensIn + tokensOut;
1866
+ const ratio = total > 0 ? tokensIn / total : 0;
1867
+ let inputRatioStatus;
1868
+ if (ratio > INPUT_RATIO_RED) inputRatioStatus = "red";
1869
+ else if (ratio > INPUT_RATIO_YELLOW) inputRatioStatus = "yellow";
1870
+ else inputRatioStatus = "green";
1871
+ let cacheStatus;
1872
+ if (historyCacheableTokens <= CACHE_HEALTH_MIN_TOKENS) {
1873
+ cacheStatus = "na";
1874
+ } else if (inputCacheHitRatio >= CACHE_RATIO_GREEN) {
1875
+ cacheStatus = "green";
1876
+ } else if (inputCacheHitRatio >= CACHE_RATIO_YELLOW) {
1877
+ cacheStatus = "yellow";
1878
+ } else {
1879
+ cacheStatus = "red";
1880
+ }
1881
+ const fallbackStatus = fellOverFrom !== void 0 && fellOverFrom !== target ? "red" : "green";
1882
+ return { inputRatioStatus, cacheStatus, fallbackStatus };
1883
+ }
1884
+ function rowToDetail(row) {
1885
+ const summary = rowToSummary(row);
1886
+ const tokensIn = summary.tokensIn;
1887
+ const tokensOut = summary.tokensOut;
1888
+ const cacheReadInputTokens = asNumberOrZero(row.cache_read_input_tokens);
1889
+ const cacheCreationInputTokens = asNumberOrZero(
1890
+ row.cache_creation_input_tokens
1891
+ );
1892
+ const historyCacheableTokens = asNumberOrZero(row.history_cacheable_tokens);
1893
+ const inputCacheHitRatio = tokensIn > 0 ? cacheReadInputTokens / tokensIn : 0;
1894
+ const fellOverFrom = asString(row.fell_over_from);
1895
+ const fallbackReasonRaw = row.fallback_reason;
1896
+ const fallbackReason = fellOverFrom ? asFallbackReason(fallbackReasonRaw) : void 0;
1897
+ const requestedModel = asString(row.requested_model) ?? fellOverFrom;
1898
+ const advisoriesRaw = Array.isArray(row.advisories) ? row.advisories : [];
1899
+ const advisories = [];
1900
+ for (const a of advisoriesRaw) {
1901
+ const rec = rowToAdvisory(a);
1902
+ if (rec) advisories.push(rec);
1903
+ }
1904
+ const health = computeHealth({
1905
+ tokensIn,
1906
+ tokensOut,
1907
+ cacheReadInputTokens,
1908
+ historyCacheableTokens,
1909
+ inputCacheHitRatio,
1910
+ fellOverFrom,
1911
+ target: summary.target
1912
+ });
1913
+ const sectionRewritesRaw = Array.isArray(row.section_rewrites_applied) ? row.section_rewrites_applied : [];
1914
+ const sectionRewritesApplied = [];
1915
+ for (const e of sectionRewritesRaw) {
1916
+ const rw = rowToSectionRewrite(e);
1917
+ if (rw) sectionRewritesApplied.push(rw);
1918
+ }
1919
+ const detail = {
1920
+ ...summary,
1921
+ mutationsApplied: asStringArray(row.mutations_applied),
1922
+ advisories,
1923
+ rawRequest: asString(row.prompt_preview),
1924
+ rawResponse: asString(row.response_preview),
1925
+ requestedModel,
1926
+ finishReason: asString(row.finish_reason),
1927
+ ttftMs: asNumber(row.ttft_ms),
1928
+ totalMs: asNumber(row.total_ms) ?? asNumber(row.latency_ms),
1929
+ toolsCount: asNumber(row.tools_count),
1930
+ historyDepth: asNumber(row.history_depth),
1931
+ systemPromptChars: asNumber(row.system_prompt_chars),
1932
+ cacheReadInputTokens,
1933
+ cacheCreationInputTokens,
1934
+ historyCacheableTokens,
1935
+ inputCacheHitRatio,
1936
+ fellOverFrom,
1937
+ fallbackReason,
1938
+ sectionRewritesApplied,
1939
+ health
1940
+ };
1941
+ return detail;
1942
+ }
1943
+ function createProxyHandler(config) {
1944
+ const {
1945
+ installToken,
1946
+ extensionId,
1947
+ brainEndpoint,
1948
+ brainJwt,
1949
+ brainAnonKey,
1950
+ appId,
1951
+ scrub,
1952
+ fetch: fetchImpl
1953
+ } = config;
1954
+ const doFetch = fetchImpl ?? ((...args) => globalThis.fetch(...args));
1955
+ const base = brainEndpoint.replace(/\/+$/, "");
1956
+ return async function proxy(req) {
1957
+ const authFail = checkAuth(req, { installToken, extensionId });
1958
+ if (authFail) return authFail;
1959
+ const url = new URL(req.url);
1960
+ const traceId = url.searchParams.get("traceId");
1961
+ const limit = parseLimit(url.searchParams.get("limit"));
1962
+ const qs = new URLSearchParams();
1963
+ qs.set("app_id", `eq.${appId}`);
1964
+ if (traceId) {
1965
+ qs.set("handle", `eq.${traceId}`);
1966
+ } else {
1967
+ qs.set("order", "created_at.desc");
1968
+ qs.set("limit", String(limit));
1969
+ }
1970
+ const brainUrl = `${base}/rest/v1/compile_outcomes?${qs.toString()}`;
1971
+ let brainRes;
1972
+ try {
1973
+ brainRes = await doFetch(brainUrl, {
1974
+ method: "GET",
1975
+ headers: {
1976
+ // Authorization carries the scoped JWT — drives RLS via app_id claim.
1977
+ Authorization: `Bearer ${brainJwt}`,
1978
+ // apikey MUST be one of the project's known keys (anon or
1979
+ // service_role). Supabase rejects any other JWT here, even when
1980
+ // HS256-signed with the same secret. Pre-alpha.24 this was set to
1981
+ // brainJwt and silently 401'd against real Supabase. See L-117.
1982
+ apikey: brainAnonKey,
1983
+ Accept: "application/json"
1984
+ }
1985
+ });
1986
+ } catch {
1987
+ return jsonError2(502, "brain_unavailable");
1988
+ }
1989
+ if (brainRes.status === 401 || brainRes.status === 403) {
1990
+ return jsonError2(500, "brain_auth_misconfig");
1991
+ }
1992
+ if (brainRes.status >= 500) {
1993
+ return jsonError2(502, "brain_unavailable");
1994
+ }
1995
+ if (!brainRes.ok) {
1996
+ return jsonError2(400, "bad_request");
1997
+ }
1998
+ let rows;
1999
+ try {
2000
+ rows = await brainRes.json();
2001
+ } catch {
2002
+ return jsonError2(502, "brain_unavailable");
2003
+ }
2004
+ if (!Array.isArray(rows)) {
2005
+ return jsonError2(502, "brain_unavailable");
2006
+ }
2007
+ const scrubbed = rows.map(
2008
+ (row) => applyScrub(row, scrub)
2009
+ );
2010
+ if (traceId) {
2011
+ const first = scrubbed[0];
2012
+ if (!first) return jsonError2(404, "not_found");
2013
+ const detail = rowToDetail(first);
2014
+ const counterfactuals = computeCounterfactuals({
2015
+ servedModel: detail.target,
2016
+ servedCostUsd: detail.estimatedCostUsd,
2017
+ archetype: detail.archetype,
2018
+ tokensIn: detail.tokensIn,
2019
+ tokensOut: detail.tokensOut,
2020
+ cacheReadInputTokens: detail.cacheReadInputTokens
2021
+ });
2022
+ detail.counterfactuals = counterfactuals;
2023
+ if (detail.estimatedCostUsd > 0) {
2024
+ const projected = await computeProjectedDailyCost({
2025
+ appId: detail.appId,
2026
+ archetype: detail.archetype,
2027
+ servedCostUsd: detail.estimatedCostUsd,
2028
+ brainEndpoint: base,
2029
+ brainJwt,
2030
+ brainAnonKey,
2031
+ fetch: doFetch
2032
+ });
2033
+ if (projected !== void 0) {
2034
+ detail.projectedDailyCostUsd = projected;
2035
+ }
2036
+ }
2037
+ return jsonResponse(200, detail);
2038
+ }
2039
+ return jsonResponse(200, { traces: scrubbed.map(rowToSummary) });
2040
+ };
2041
+ }
2042
+
2043
+ // src/glassbox-routes/stream.ts
2044
+ var SSE_HEADERS = {
2045
+ "Content-Type": "text/event-stream",
2046
+ "Cache-Control": "no-cache, no-transform",
2047
+ Connection: "keep-alive",
2048
+ "X-Accel-Buffering": "no"
2049
+ };
2050
+ function applyScrub2(event, scrub) {
2051
+ if (!scrub) return event;
2052
+ try {
2053
+ const out = scrub(event);
2054
+ if (out && typeof out === "object" && typeof out.kind === "string" && typeof out.at === "number") {
2055
+ return out;
2056
+ }
2057
+ return event;
2058
+ } catch {
2059
+ return event;
2060
+ }
2061
+ }
2062
+ function sseFrame(eventName, data) {
2063
+ const safeName = eventName.replace(/[\r\n]/g, "");
2064
+ return `event: ${safeName}
2065
+ data: ${JSON.stringify(data)}
2066
+
2067
+ `;
2068
+ }
2069
+ function createStreamHandler(config, subscribe2, subscribeApp2) {
2070
+ const { installToken, extensionId, appId, scrub } = config;
2071
+ return async function stream(req) {
2072
+ const authFail = checkAuth(req, { installToken, extensionId });
2073
+ if (authFail) return authFail;
2074
+ const url = new URL(req.url);
2075
+ const traceId = url.searchParams.get("traceId");
2076
+ const source = traceId ? subscribe2(traceId) : subscribeApp2({ appId });
2077
+ const encoder = new TextEncoder();
2078
+ let sourceReader;
2079
+ let cancelled = false;
2080
+ const body = new ReadableStream({
2081
+ async start(controller) {
2082
+ controller.enqueue(encoder.encode(sseFrame("ready", {})));
2083
+ sourceReader = source.getReader();
2084
+ const signal = req.signal;
2085
+ if (signal) {
2086
+ if (signal.aborted) {
2087
+ cancelled = true;
2088
+ await sourceReader.cancel();
2089
+ try {
2090
+ controller.close();
2091
+ } catch {
2092
+ }
2093
+ return;
2094
+ }
2095
+ signal.addEventListener(
2096
+ "abort",
2097
+ () => {
2098
+ cancelled = true;
2099
+ sourceReader?.cancel().catch(() => {
2100
+ });
2101
+ try {
2102
+ controller.close();
2103
+ } catch {
2104
+ }
2105
+ },
2106
+ { once: true }
2107
+ );
2108
+ }
2109
+ try {
2110
+ while (!cancelled) {
2111
+ const { value, done } = await sourceReader.read();
2112
+ if (done) break;
2113
+ const scrubbed = applyScrub2(value, scrub);
2114
+ controller.enqueue(
2115
+ encoder.encode(sseFrame(scrubbed.kind, scrubbed))
2116
+ );
2117
+ }
2118
+ } catch {
2119
+ } finally {
2120
+ try {
2121
+ controller.close();
2122
+ } catch {
2123
+ }
2124
+ try {
2125
+ sourceReader?.releaseLock();
2126
+ } catch {
2127
+ }
2128
+ }
2129
+ },
2130
+ cancel() {
2131
+ cancelled = true;
2132
+ sourceReader?.cancel().catch(() => {
2133
+ });
2134
+ }
2135
+ });
2136
+ return new Response(body, { status: 200, headers: SSE_HEADERS });
2137
+ };
2138
+ }
2139
+
2140
+ // src/glassbox/types.ts
2141
+ var GLASSBOX_STREAM_TTL_MS = 6e4;
2142
+
2143
+ // src/glassbox/pubsub-memory.ts
2144
+ var MemoryPubSub = class {
2145
+ subscribers = /* @__PURE__ */ new Map();
2146
+ async publish(channelKey, event) {
2147
+ const subs = this.subscribers.get(channelKey);
2148
+ if (!subs || subs.size === 0) return;
2149
+ for (const sub of subs) {
2150
+ if (sub.closed) continue;
2151
+ try {
2152
+ sub.controller.enqueue(event);
2153
+ } catch {
2154
+ sub.closed = true;
2155
+ continue;
2156
+ }
2157
+ this.refreshTtl(channelKey, sub);
2158
+ }
2159
+ }
2160
+ subscribe(channelKey) {
2161
+ const self = this;
2162
+ let sub;
2163
+ return new ReadableStream({
2164
+ start(controller) {
2165
+ sub = {
2166
+ controller,
2167
+ ttlTimer: setTimeout(() => {
2168
+ self.closeSubscriber(channelKey, sub);
2169
+ }, GLASSBOX_STREAM_TTL_MS),
2170
+ closed: false
2171
+ };
2172
+ let set = self.subscribers.get(channelKey);
2173
+ if (!set) {
2174
+ set = /* @__PURE__ */ new Set();
2175
+ self.subscribers.set(channelKey, set);
2176
+ }
2177
+ set.add(sub);
2178
+ },
2179
+ cancel() {
2180
+ if (sub) self.removeSubscriber(channelKey, sub);
2181
+ }
2182
+ });
2183
+ }
2184
+ /**
2185
+ * Refresh the rolling TTL for a subscriber after an event lands. Replaces
2186
+ * the existing timer with a fresh 60s one.
2187
+ */
2188
+ refreshTtl(channelKey, sub) {
2189
+ clearTimeout(sub.ttlTimer);
2190
+ sub.ttlTimer = setTimeout(() => {
2191
+ this.closeSubscriber(channelKey, sub);
2192
+ }, GLASSBOX_STREAM_TTL_MS);
2193
+ }
2194
+ /**
2195
+ * Close the subscriber's stream cleanly and remove from the fan-out set.
2196
+ * Idempotent — safe to call multiple times.
2197
+ */
2198
+ closeSubscriber(channelKey, sub) {
2199
+ if (sub.closed) return;
2200
+ sub.closed = true;
2201
+ clearTimeout(sub.ttlTimer);
2202
+ try {
2203
+ sub.controller.close();
2204
+ } catch {
2205
+ }
2206
+ this.removeSubscriber(channelKey, sub);
2207
+ }
2208
+ removeSubscriber(channelKey, sub) {
2209
+ clearTimeout(sub.ttlTimer);
2210
+ const set = this.subscribers.get(channelKey);
2211
+ if (!set) return;
2212
+ set.delete(sub);
2213
+ if (set.size === 0) this.subscribers.delete(channelKey);
2214
+ }
2215
+ /**
2216
+ * Test-only reset. Tears down all subscribers, clears all state. Calling
2217
+ * outside of tests is harmless but cancels every active stream.
2218
+ */
2219
+ _reset() {
2220
+ for (const [, set] of this.subscribers) {
2221
+ for (const sub of set) {
2222
+ this.closeSubscriber("", sub);
2223
+ }
2224
+ }
2225
+ this.subscribers.clear();
2226
+ }
2227
+ };
2228
+
2229
+ // src/glassbox/pubsub-upstash.ts
2230
+ var UpstashPubSub = class {
2231
+ url;
2232
+ token;
2233
+ fetchImpl;
2234
+ blockMs;
2235
+ maxLen;
2236
+ constructor(cfg) {
2237
+ this.url = cfg.url.replace(/\/$/, "");
2238
+ this.token = cfg.token;
2239
+ this.fetchImpl = cfg.fetchImpl ?? globalThis.fetch.bind(globalThis);
2240
+ this.blockMs = cfg.blockMs ?? 100;
2241
+ this.maxLen = cfg.maxLen ?? 100;
2242
+ }
2243
+ async publish(channelKey, event) {
2244
+ const key = channelKey;
2245
+ const payload = JSON.stringify(event);
2246
+ await this.cmd([
2247
+ "XADD",
2248
+ key,
2249
+ "MAXLEN",
2250
+ "~",
2251
+ String(this.maxLen),
2252
+ "*",
2253
+ "event",
2254
+ payload
2255
+ ]);
2256
+ await this.cmd(["EXPIRE", key, String(Math.ceil(GLASSBOX_STREAM_TTL_MS / 1e3))]);
2257
+ }
2258
+ subscribe(channelKey) {
2259
+ const key = channelKey;
2260
+ const self = this;
2261
+ let cursor = "$";
2262
+ let cancelled = false;
2263
+ let ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
2264
+ return new ReadableStream({
2265
+ async start(controller) {
2266
+ try {
2267
+ while (!cancelled && Date.now() < ttlDeadline) {
2268
+ const resp = await self.cmd([
2269
+ "XREAD",
2270
+ "BLOCK",
2271
+ String(self.blockMs),
2272
+ "STREAMS",
2273
+ key,
2274
+ cursor
2275
+ ]);
2276
+ if (cancelled) break;
2277
+ const parsed = parseXReadResult(resp.result);
2278
+ if (parsed.entries.length === 0) {
2279
+ continue;
2280
+ }
2281
+ for (const entry of parsed.entries) {
2282
+ const evt = decodeEvent(entry.fields);
2283
+ if (evt) {
2284
+ try {
2285
+ controller.enqueue(evt);
2286
+ } catch {
2287
+ cancelled = true;
2288
+ break;
2289
+ }
2290
+ }
2291
+ cursor = entry.id;
2292
+ }
2293
+ ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
2294
+ }
2295
+ } catch (err) {
2296
+ if (!cancelled) {
2297
+ try {
2298
+ controller.error(err);
2299
+ } catch {
2300
+ }
2301
+ return;
2302
+ }
2303
+ }
2304
+ try {
2305
+ controller.close();
2306
+ } catch {
2307
+ }
2308
+ },
2309
+ cancel() {
2310
+ cancelled = true;
2311
+ }
2312
+ });
2313
+ }
2314
+ async cmd(args) {
2315
+ const res = await this.fetchImpl(this.url, {
2316
+ method: "POST",
2317
+ headers: {
2318
+ Authorization: `Bearer ${this.token}`,
2319
+ "Content-Type": "application/json"
2320
+ },
2321
+ body: JSON.stringify(args)
2322
+ });
2323
+ if (!res.ok) {
2324
+ throw new Error(`Upstash ${args[0]} failed: HTTP ${res.status}`);
2325
+ }
2326
+ const json = await res.json();
2327
+ if (json.error) {
2328
+ throw new Error(`Upstash ${args[0]} failed: ${json.error}`);
2329
+ }
2330
+ return json;
2331
+ }
2332
+ };
2333
+ function traceChannel(traceId) {
2334
+ return `glassbox:trace:${traceId}`;
2335
+ }
2336
+ function appChannel(appId) {
2337
+ return `glassbox:app:${appId}`;
2338
+ }
2339
+ function decodeEvent(fields) {
2340
+ const raw = fields["event"];
2341
+ if (!raw) return void 0;
2342
+ try {
2343
+ const parsed = JSON.parse(raw);
2344
+ if (typeof parsed.kind === "string" && typeof parsed.at === "number") {
2345
+ return parsed;
2346
+ }
2347
+ return void 0;
2348
+ } catch {
2349
+ return void 0;
2350
+ }
2351
+ }
2352
+ function parseXReadResult(raw) {
2353
+ if (!Array.isArray(raw)) return { entries: [] };
2354
+ const entries = [];
2355
+ for (const stream of raw) {
2356
+ if (!Array.isArray(stream) || stream.length < 2) continue;
2357
+ const streamEntries = stream[1];
2358
+ if (!Array.isArray(streamEntries)) continue;
2359
+ for (const entry of streamEntries) {
2360
+ if (!Array.isArray(entry) || entry.length < 2) continue;
2361
+ const id = String(entry[0]);
2362
+ const flat = entry[1];
2363
+ if (!Array.isArray(flat)) continue;
2364
+ const fields = {};
2365
+ for (let i = 0; i < flat.length; i += 2) {
2366
+ const k = flat[i];
2367
+ const v = flat[i + 1];
2368
+ if (typeof k === "string") fields[k] = String(v ?? "");
2369
+ }
2370
+ entries.push({ id, fields });
2371
+ }
2372
+ }
2373
+ return { entries };
2374
+ }
2375
+
2376
+ // src/glassbox/emit.ts
2377
+ var activePubSub;
2378
+ function getPubSub() {
2379
+ if (activePubSub) return activePubSub;
2380
+ const url = readEnv("UPSTASH_REDIS_URL");
2381
+ const token = readEnv("UPSTASH_REDIS_TOKEN");
2382
+ if (url && token) {
2383
+ activePubSub = new UpstashPubSub({ url, token });
2384
+ } else {
2385
+ activePubSub = new MemoryPubSub();
2386
+ }
2387
+ return activePubSub;
2388
+ }
2389
+ function readEnv(key) {
2390
+ try {
2391
+ if (typeof process !== "undefined" && process.env) {
2392
+ const v = process.env[key];
2393
+ return v && v.trim() !== "" ? v : void 0;
2394
+ }
2395
+ } catch {
2396
+ }
2397
+ return void 0;
2398
+ }
2399
+
2400
+ // src/glassbox/subscribe.ts
2401
+ function emptyStream() {
2402
+ return new ReadableStream({
2403
+ start(controller) {
2404
+ controller.close();
2405
+ }
2406
+ });
2407
+ }
2408
+ function subscribe(traceId) {
2409
+ if (!traceId) return emptyStream();
2410
+ return getPubSub().subscribe(traceChannel(traceId));
2411
+ }
2412
+ function subscribeApp({
2413
+ appId
2414
+ }) {
2415
+ if (!appId) return emptyStream();
2416
+ return getPubSub().subscribe(appChannel(appId));
2417
+ }
2418
+
2419
+ // src/glassbox-routes/index.ts
2420
+ function requireString(name, value) {
2421
+ if (typeof value !== "string" || value.length === 0) {
2422
+ throw new Error(`createGlassboxRoutes: ${name} is required`);
2423
+ }
2424
+ return value;
2425
+ }
2426
+ function createGlassboxRoutes(config) {
2427
+ const installToken = requireString("installToken", config.installToken);
2428
+ const extensionId = requireString("extensionId", config.extensionId);
2429
+ const brainEndpoint = requireString("brainEndpoint", config.brainEndpoint);
2430
+ const brainJwt = requireString("brainJwt", config.brainJwt);
2431
+ const brainAnonKey = requireString("brainAnonKey", config.brainAnonKey);
2432
+ const appId = requireString("appId", config.appId);
2433
+ const proxy = createProxyHandler({
2434
+ installToken,
2435
+ extensionId,
2436
+ brainEndpoint,
2437
+ brainJwt,
2438
+ brainAnonKey,
2439
+ appId,
2440
+ scrub: config.scrub,
2441
+ fetch: config.fetch
2442
+ });
2443
+ const stream = createStreamHandler(
2444
+ {
2445
+ installToken,
2446
+ extensionId,
2447
+ appId,
2448
+ scrub: config.scrub
2449
+ },
2450
+ config.subscribe ?? subscribe,
2451
+ config.subscribeApp ?? subscribeApp
2452
+ );
2453
+ return { proxy, stream };
2454
+ }
2455
+ // Annotate the CommonJS export names for ESM import in node:
2456
+ 0 && (module.exports = {
2457
+ createGlassboxRoutes
2458
+ });