@warmdrift/kgauto-compiler 2.0.0-alpha.15 → 2.0.0-alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-SFF5EVTL.mjs → chunk-7MTHFSNY.mjs} +209 -0
- package/dist/chunk-NUTC7NUC.mjs +298 -0
- package/dist/glassbox/index.d.mts +159 -0
- package/dist/glassbox/index.d.ts +159 -0
- package/dist/glassbox/index.js +300 -0
- package/dist/glassbox/index.mjs +20 -0
- package/dist/index.d.mts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +624 -9
- package/dist/index.mjs +136 -10
- package/dist/{profiles-DTnIzGsA.d.mts → ir-C3P4gDt0.d.mts} +30 -134
- package/dist/{profiles-D0y6aLk0.d.ts → ir-CFHU3BUT.d.ts} +30 -134
- package/dist/profiles.d.mts +137 -2
- package/dist/profiles.d.ts +137 -2
- package/dist/profiles.js +209 -0
- package/dist/profiles.mjs +1 -1
- package/package.json +7 -2
package/dist/index.js
CHANGED
|
@@ -1475,6 +1475,215 @@ var PROFILES_RAW = [
|
|
|
1475
1475
|
// sequential tools — same as V4-Flash
|
|
1476
1476
|
}
|
|
1477
1477
|
},
|
|
1478
|
+
// ── OpenAI ──
|
|
1479
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
1480
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
1481
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
1482
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
1483
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
1484
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
1485
|
+
//
|
|
1486
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
1487
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
1488
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
1489
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
1490
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
1491
|
+
//
|
|
1492
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
1493
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
1494
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
1495
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
1496
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
1497
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
1498
|
+
// pricing zone.
|
|
1499
|
+
{
|
|
1500
|
+
id: "gpt-5.5",
|
|
1501
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1502
|
+
provider: "openai",
|
|
1503
|
+
status: "current",
|
|
1504
|
+
maxContextTokens: 105e4,
|
|
1505
|
+
maxOutputTokens: 128e3,
|
|
1506
|
+
maxTools: 64,
|
|
1507
|
+
parallelToolCalls: true,
|
|
1508
|
+
structuredOutput: "native",
|
|
1509
|
+
systemPromptMode: "inline",
|
|
1510
|
+
streaming: true,
|
|
1511
|
+
cliffs: [
|
|
1512
|
+
{
|
|
1513
|
+
metric: "input_tokens",
|
|
1514
|
+
threshold: 272e3,
|
|
1515
|
+
action: "downgrade_quality_warning",
|
|
1516
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
1517
|
+
}
|
|
1518
|
+
],
|
|
1519
|
+
costInputPer1m: 5,
|
|
1520
|
+
costOutputPer1m: 30,
|
|
1521
|
+
lowering: {
|
|
1522
|
+
system: { mode: "inline" },
|
|
1523
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
1524
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
1525
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
1526
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1527
|
+
tools: { format: "openai" }
|
|
1528
|
+
},
|
|
1529
|
+
recovery: [
|
|
1530
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1531
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1532
|
+
],
|
|
1533
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
1534
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
1535
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
1536
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
1537
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
1538
|
+
archetypePerf: {
|
|
1539
|
+
critique: 9,
|
|
1540
|
+
plan: 9,
|
|
1541
|
+
generate: 9,
|
|
1542
|
+
ask: 9,
|
|
1543
|
+
extract: 9,
|
|
1544
|
+
transform: 9,
|
|
1545
|
+
hunt: 8,
|
|
1546
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
1547
|
+
summarize: 7,
|
|
1548
|
+
// overkill for tolerant archetype
|
|
1549
|
+
classify: 7
|
|
1550
|
+
// overkill; cheaper models cover this
|
|
1551
|
+
}
|
|
1552
|
+
},
|
|
1553
|
+
{
|
|
1554
|
+
id: "gpt-5.4",
|
|
1555
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1556
|
+
provider: "openai",
|
|
1557
|
+
status: "current",
|
|
1558
|
+
maxContextTokens: 105e4,
|
|
1559
|
+
maxOutputTokens: 128e3,
|
|
1560
|
+
maxTools: 64,
|
|
1561
|
+
parallelToolCalls: true,
|
|
1562
|
+
structuredOutput: "native",
|
|
1563
|
+
systemPromptMode: "inline",
|
|
1564
|
+
streaming: true,
|
|
1565
|
+
cliffs: [
|
|
1566
|
+
{
|
|
1567
|
+
metric: "input_tokens",
|
|
1568
|
+
threshold: 272e3,
|
|
1569
|
+
action: "downgrade_quality_warning",
|
|
1570
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
1571
|
+
}
|
|
1572
|
+
],
|
|
1573
|
+
costInputPer1m: 2.5,
|
|
1574
|
+
costOutputPer1m: 15,
|
|
1575
|
+
lowering: {
|
|
1576
|
+
system: { mode: "inline" },
|
|
1577
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1578
|
+
tools: { format: "openai" }
|
|
1579
|
+
},
|
|
1580
|
+
recovery: [
|
|
1581
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1582
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1583
|
+
],
|
|
1584
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
1585
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
1586
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
1587
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
1588
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
1589
|
+
archetypePerf: {
|
|
1590
|
+
critique: 8,
|
|
1591
|
+
plan: 8,
|
|
1592
|
+
generate: 8,
|
|
1593
|
+
ask: 8,
|
|
1594
|
+
extract: 8,
|
|
1595
|
+
transform: 8,
|
|
1596
|
+
hunt: 7,
|
|
1597
|
+
summarize: 7,
|
|
1598
|
+
classify: 7
|
|
1599
|
+
}
|
|
1600
|
+
},
|
|
1601
|
+
{
|
|
1602
|
+
id: "gpt-5.4-mini",
|
|
1603
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1604
|
+
provider: "openai",
|
|
1605
|
+
status: "current",
|
|
1606
|
+
maxContextTokens: 4e5,
|
|
1607
|
+
maxOutputTokens: 128e3,
|
|
1608
|
+
maxTools: 64,
|
|
1609
|
+
parallelToolCalls: true,
|
|
1610
|
+
structuredOutput: "native",
|
|
1611
|
+
systemPromptMode: "inline",
|
|
1612
|
+
streaming: true,
|
|
1613
|
+
cliffs: [],
|
|
1614
|
+
costInputPer1m: 0.75,
|
|
1615
|
+
costOutputPer1m: 4.5,
|
|
1616
|
+
lowering: {
|
|
1617
|
+
system: { mode: "inline" },
|
|
1618
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1619
|
+
tools: { format: "openai" }
|
|
1620
|
+
},
|
|
1621
|
+
recovery: [
|
|
1622
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1623
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1624
|
+
],
|
|
1625
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
1626
|
+
weaknesses: ["reasoning_depth"],
|
|
1627
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
1628
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
1629
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
1630
|
+
// OpenAI claims strong coding/subagent perf.
|
|
1631
|
+
archetypePerf: {
|
|
1632
|
+
ask: 7,
|
|
1633
|
+
generate: 7,
|
|
1634
|
+
extract: 7,
|
|
1635
|
+
transform: 7,
|
|
1636
|
+
classify: 7,
|
|
1637
|
+
summarize: 7,
|
|
1638
|
+
hunt: 7,
|
|
1639
|
+
plan: 6,
|
|
1640
|
+
critique: 5
|
|
1641
|
+
// reasoning depth gap — frontier models handle this
|
|
1642
|
+
}
|
|
1643
|
+
},
|
|
1644
|
+
{
|
|
1645
|
+
id: "gpt-5.4-nano",
|
|
1646
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1647
|
+
provider: "openai",
|
|
1648
|
+
status: "current",
|
|
1649
|
+
maxContextTokens: 4e5,
|
|
1650
|
+
maxOutputTokens: 128e3,
|
|
1651
|
+
maxTools: 64,
|
|
1652
|
+
parallelToolCalls: true,
|
|
1653
|
+
structuredOutput: "native",
|
|
1654
|
+
systemPromptMode: "inline",
|
|
1655
|
+
streaming: true,
|
|
1656
|
+
cliffs: [],
|
|
1657
|
+
costInputPer1m: 0.2,
|
|
1658
|
+
costOutputPer1m: 1.25,
|
|
1659
|
+
lowering: {
|
|
1660
|
+
system: { mode: "inline" },
|
|
1661
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1662
|
+
tools: { format: "openai" }
|
|
1663
|
+
},
|
|
1664
|
+
recovery: [
|
|
1665
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1666
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1667
|
+
],
|
|
1668
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
1669
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
1670
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
1671
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
1672
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
1673
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
1674
|
+
archetypePerf: {
|
|
1675
|
+
classify: 7,
|
|
1676
|
+
summarize: 6,
|
|
1677
|
+
ask: 6,
|
|
1678
|
+
transform: 6,
|
|
1679
|
+
extract: 6,
|
|
1680
|
+
generate: 5,
|
|
1681
|
+
hunt: 5,
|
|
1682
|
+
plan: 4,
|
|
1683
|
+
critique: 3
|
|
1684
|
+
// not for reasoning archetypes
|
|
1685
|
+
}
|
|
1686
|
+
},
|
|
1478
1687
|
// ── Auto-onboarded (UNVERIFIED) ──
|
|
1479
1688
|
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
1480
1689
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
@@ -2563,10 +2772,14 @@ var loadChainsFromBrain = createBrainQueryCache({
|
|
|
2563
2772
|
// src/fallback.ts
|
|
2564
2773
|
var STARTER_CHAINS = {
|
|
2565
2774
|
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
2775
|
+
// alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
|
|
2776
|
+
// archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
|
|
2777
|
+
// both Anthropic + Google are unreachable (consumer adds only OpenAI key).
|
|
2566
2778
|
critique: [
|
|
2567
2779
|
"claude-opus-4-7",
|
|
2568
2780
|
"claude-sonnet-4-6",
|
|
2569
|
-
"gemini-2.5-pro"
|
|
2781
|
+
"gemini-2.5-pro",
|
|
2782
|
+
"gpt-5.5"
|
|
2570
2783
|
],
|
|
2571
2784
|
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
2572
2785
|
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
@@ -2577,25 +2790,29 @@ var STARTER_CHAINS = {
|
|
|
2577
2790
|
"deepseek-v4-pro"
|
|
2578
2791
|
],
|
|
2579
2792
|
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
2580
|
-
//
|
|
2793
|
+
// gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
|
|
2794
|
+
// gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
|
|
2581
2795
|
generate: [
|
|
2582
2796
|
"claude-sonnet-4-6",
|
|
2583
2797
|
"claude-haiku-4-5",
|
|
2584
2798
|
"gemini-2.5-pro",
|
|
2585
|
-
"
|
|
2799
|
+
"gpt-5.4-mini"
|
|
2586
2800
|
],
|
|
2587
2801
|
ask: [
|
|
2588
2802
|
"claude-sonnet-4-6",
|
|
2589
2803
|
"claude-haiku-4-5",
|
|
2590
2804
|
"gemini-2.5-pro",
|
|
2591
|
-
"
|
|
2805
|
+
"gpt-5.4-mini"
|
|
2592
2806
|
],
|
|
2593
2807
|
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
2594
|
-
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
2808
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
|
|
2809
|
+
// appended as third-provider extract floor (archetypePerf=8, native
|
|
2810
|
+
// structured-output support).
|
|
2595
2811
|
extract: [
|
|
2596
2812
|
"claude-sonnet-4-6",
|
|
2597
2813
|
"claude-haiku-4-5",
|
|
2598
|
-
"gemini-2.5-pro"
|
|
2814
|
+
"gemini-2.5-pro",
|
|
2815
|
+
"gpt-5.4"
|
|
2599
2816
|
],
|
|
2600
2817
|
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
2601
2818
|
transform: [
|
|
@@ -2708,9 +2925,321 @@ function ensureCrossProviderTail(opts) {
|
|
|
2708
2925
|
return { chain };
|
|
2709
2926
|
}
|
|
2710
2927
|
|
|
2928
|
+
// src/glassbox/types.ts
|
|
2929
|
+
var GLASSBOX_STREAM_TTL_MS = 6e4;
|
|
2930
|
+
|
|
2931
|
+
// src/glassbox/pubsub-memory.ts
|
|
2932
|
+
var MemoryPubSub = class {
|
|
2933
|
+
subscribers = /* @__PURE__ */ new Map();
|
|
2934
|
+
async publish(traceId, event) {
|
|
2935
|
+
const subs = this.subscribers.get(traceId);
|
|
2936
|
+
if (!subs || subs.size === 0) return;
|
|
2937
|
+
for (const sub of subs) {
|
|
2938
|
+
if (sub.closed) continue;
|
|
2939
|
+
try {
|
|
2940
|
+
sub.controller.enqueue(event);
|
|
2941
|
+
} catch {
|
|
2942
|
+
sub.closed = true;
|
|
2943
|
+
continue;
|
|
2944
|
+
}
|
|
2945
|
+
this.refreshTtl(traceId, sub);
|
|
2946
|
+
}
|
|
2947
|
+
}
|
|
2948
|
+
subscribe(traceId) {
|
|
2949
|
+
const self = this;
|
|
2950
|
+
let sub;
|
|
2951
|
+
return new ReadableStream({
|
|
2952
|
+
start(controller) {
|
|
2953
|
+
sub = {
|
|
2954
|
+
controller,
|
|
2955
|
+
ttlTimer: setTimeout(() => {
|
|
2956
|
+
self.closeSubscriber(traceId, sub);
|
|
2957
|
+
}, GLASSBOX_STREAM_TTL_MS),
|
|
2958
|
+
closed: false
|
|
2959
|
+
};
|
|
2960
|
+
let set = self.subscribers.get(traceId);
|
|
2961
|
+
if (!set) {
|
|
2962
|
+
set = /* @__PURE__ */ new Set();
|
|
2963
|
+
self.subscribers.set(traceId, set);
|
|
2964
|
+
}
|
|
2965
|
+
set.add(sub);
|
|
2966
|
+
},
|
|
2967
|
+
cancel() {
|
|
2968
|
+
if (sub) self.removeSubscriber(traceId, sub);
|
|
2969
|
+
}
|
|
2970
|
+
});
|
|
2971
|
+
}
|
|
2972
|
+
/**
|
|
2973
|
+
* Refresh the rolling TTL for a subscriber after an event lands. Replaces
|
|
2974
|
+
* the existing timer with a fresh 60s one.
|
|
2975
|
+
*/
|
|
2976
|
+
refreshTtl(traceId, sub) {
|
|
2977
|
+
clearTimeout(sub.ttlTimer);
|
|
2978
|
+
sub.ttlTimer = setTimeout(() => {
|
|
2979
|
+
this.closeSubscriber(traceId, sub);
|
|
2980
|
+
}, GLASSBOX_STREAM_TTL_MS);
|
|
2981
|
+
}
|
|
2982
|
+
/**
|
|
2983
|
+
* Close the subscriber's stream cleanly and remove from the fan-out set.
|
|
2984
|
+
* Idempotent — safe to call multiple times.
|
|
2985
|
+
*/
|
|
2986
|
+
closeSubscriber(traceId, sub) {
|
|
2987
|
+
if (sub.closed) return;
|
|
2988
|
+
sub.closed = true;
|
|
2989
|
+
clearTimeout(sub.ttlTimer);
|
|
2990
|
+
try {
|
|
2991
|
+
sub.controller.close();
|
|
2992
|
+
} catch {
|
|
2993
|
+
}
|
|
2994
|
+
this.removeSubscriber(traceId, sub);
|
|
2995
|
+
}
|
|
2996
|
+
removeSubscriber(traceId, sub) {
|
|
2997
|
+
clearTimeout(sub.ttlTimer);
|
|
2998
|
+
const set = this.subscribers.get(traceId);
|
|
2999
|
+
if (!set) return;
|
|
3000
|
+
set.delete(sub);
|
|
3001
|
+
if (set.size === 0) this.subscribers.delete(traceId);
|
|
3002
|
+
}
|
|
3003
|
+
/**
|
|
3004
|
+
* Test-only reset. Tears down all subscribers, clears all state. Calling
|
|
3005
|
+
* outside of tests is harmless but cancels every active stream.
|
|
3006
|
+
*/
|
|
3007
|
+
_reset() {
|
|
3008
|
+
for (const [, set] of this.subscribers) {
|
|
3009
|
+
for (const sub of set) {
|
|
3010
|
+
this.closeSubscriber("", sub);
|
|
3011
|
+
}
|
|
3012
|
+
}
|
|
3013
|
+
this.subscribers.clear();
|
|
3014
|
+
}
|
|
3015
|
+
};
|
|
3016
|
+
|
|
3017
|
+
// src/glassbox/pubsub-upstash.ts
|
|
3018
|
+
var UpstashPubSub = class {
|
|
3019
|
+
url;
|
|
3020
|
+
token;
|
|
3021
|
+
fetchImpl;
|
|
3022
|
+
blockMs;
|
|
3023
|
+
maxLen;
|
|
3024
|
+
constructor(cfg) {
|
|
3025
|
+
this.url = cfg.url.replace(/\/$/, "");
|
|
3026
|
+
this.token = cfg.token;
|
|
3027
|
+
this.fetchImpl = cfg.fetchImpl ?? globalThis.fetch.bind(globalThis);
|
|
3028
|
+
this.blockMs = cfg.blockMs ?? 100;
|
|
3029
|
+
this.maxLen = cfg.maxLen ?? 100;
|
|
3030
|
+
}
|
|
3031
|
+
async publish(traceId, event) {
|
|
3032
|
+
const key = streamKey(traceId);
|
|
3033
|
+
const payload = JSON.stringify(event);
|
|
3034
|
+
await this.cmd([
|
|
3035
|
+
"XADD",
|
|
3036
|
+
key,
|
|
3037
|
+
"MAXLEN",
|
|
3038
|
+
"~",
|
|
3039
|
+
String(this.maxLen),
|
|
3040
|
+
"*",
|
|
3041
|
+
"event",
|
|
3042
|
+
payload
|
|
3043
|
+
]);
|
|
3044
|
+
await this.cmd(["EXPIRE", key, String(Math.ceil(GLASSBOX_STREAM_TTL_MS / 1e3))]);
|
|
3045
|
+
}
|
|
3046
|
+
subscribe(traceId) {
|
|
3047
|
+
const key = streamKey(traceId);
|
|
3048
|
+
const self = this;
|
|
3049
|
+
let cursor = "$";
|
|
3050
|
+
let cancelled = false;
|
|
3051
|
+
let ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
|
|
3052
|
+
return new ReadableStream({
|
|
3053
|
+
async start(controller) {
|
|
3054
|
+
try {
|
|
3055
|
+
while (!cancelled && Date.now() < ttlDeadline) {
|
|
3056
|
+
const resp = await self.cmd([
|
|
3057
|
+
"XREAD",
|
|
3058
|
+
"BLOCK",
|
|
3059
|
+
String(self.blockMs),
|
|
3060
|
+
"STREAMS",
|
|
3061
|
+
key,
|
|
3062
|
+
cursor
|
|
3063
|
+
]);
|
|
3064
|
+
if (cancelled) break;
|
|
3065
|
+
const parsed = parseXReadResult(resp.result);
|
|
3066
|
+
if (parsed.entries.length === 0) {
|
|
3067
|
+
continue;
|
|
3068
|
+
}
|
|
3069
|
+
for (const entry of parsed.entries) {
|
|
3070
|
+
const evt = decodeEvent(entry.fields);
|
|
3071
|
+
if (evt) {
|
|
3072
|
+
try {
|
|
3073
|
+
controller.enqueue(evt);
|
|
3074
|
+
} catch {
|
|
3075
|
+
cancelled = true;
|
|
3076
|
+
break;
|
|
3077
|
+
}
|
|
3078
|
+
}
|
|
3079
|
+
cursor = entry.id;
|
|
3080
|
+
}
|
|
3081
|
+
ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
|
|
3082
|
+
}
|
|
3083
|
+
} catch (err) {
|
|
3084
|
+
if (!cancelled) {
|
|
3085
|
+
try {
|
|
3086
|
+
controller.error(err);
|
|
3087
|
+
} catch {
|
|
3088
|
+
}
|
|
3089
|
+
return;
|
|
3090
|
+
}
|
|
3091
|
+
}
|
|
3092
|
+
try {
|
|
3093
|
+
controller.close();
|
|
3094
|
+
} catch {
|
|
3095
|
+
}
|
|
3096
|
+
},
|
|
3097
|
+
cancel() {
|
|
3098
|
+
cancelled = true;
|
|
3099
|
+
}
|
|
3100
|
+
});
|
|
3101
|
+
}
|
|
3102
|
+
async cmd(args) {
|
|
3103
|
+
const res = await this.fetchImpl(this.url, {
|
|
3104
|
+
method: "POST",
|
|
3105
|
+
headers: {
|
|
3106
|
+
Authorization: `Bearer ${this.token}`,
|
|
3107
|
+
"Content-Type": "application/json"
|
|
3108
|
+
},
|
|
3109
|
+
body: JSON.stringify(args)
|
|
3110
|
+
});
|
|
3111
|
+
if (!res.ok) {
|
|
3112
|
+
throw new Error(`Upstash ${args[0]} failed: HTTP ${res.status}`);
|
|
3113
|
+
}
|
|
3114
|
+
const json = await res.json();
|
|
3115
|
+
if (json.error) {
|
|
3116
|
+
throw new Error(`Upstash ${args[0]} failed: ${json.error}`);
|
|
3117
|
+
}
|
|
3118
|
+
return json;
|
|
3119
|
+
}
|
|
3120
|
+
};
|
|
3121
|
+
function streamKey(traceId) {
|
|
3122
|
+
return `glassbox:trace:${traceId}`;
|
|
3123
|
+
}
|
|
3124
|
+
function decodeEvent(fields) {
|
|
3125
|
+
const raw = fields["event"];
|
|
3126
|
+
if (!raw) return void 0;
|
|
3127
|
+
try {
|
|
3128
|
+
const parsed = JSON.parse(raw);
|
|
3129
|
+
if (typeof parsed.kind === "string" && typeof parsed.at === "number") {
|
|
3130
|
+
return parsed;
|
|
3131
|
+
}
|
|
3132
|
+
return void 0;
|
|
3133
|
+
} catch {
|
|
3134
|
+
return void 0;
|
|
3135
|
+
}
|
|
3136
|
+
}
|
|
3137
|
+
function parseXReadResult(raw) {
|
|
3138
|
+
if (!Array.isArray(raw)) return { entries: [] };
|
|
3139
|
+
const entries = [];
|
|
3140
|
+
for (const stream of raw) {
|
|
3141
|
+
if (!Array.isArray(stream) || stream.length < 2) continue;
|
|
3142
|
+
const streamEntries = stream[1];
|
|
3143
|
+
if (!Array.isArray(streamEntries)) continue;
|
|
3144
|
+
for (const entry of streamEntries) {
|
|
3145
|
+
if (!Array.isArray(entry) || entry.length < 2) continue;
|
|
3146
|
+
const id = String(entry[0]);
|
|
3147
|
+
const flat = entry[1];
|
|
3148
|
+
if (!Array.isArray(flat)) continue;
|
|
3149
|
+
const fields = {};
|
|
3150
|
+
for (let i = 0; i < flat.length; i += 2) {
|
|
3151
|
+
const k = flat[i];
|
|
3152
|
+
const v = flat[i + 1];
|
|
3153
|
+
if (typeof k === "string") fields[k] = String(v ?? "");
|
|
3154
|
+
}
|
|
3155
|
+
entries.push({ id, fields });
|
|
3156
|
+
}
|
|
3157
|
+
}
|
|
3158
|
+
return { entries };
|
|
3159
|
+
}
|
|
3160
|
+
|
|
3161
|
+
// src/glassbox/emit.ts
|
|
3162
|
+
var activePubSub;
|
|
3163
|
+
function getPubSub() {
|
|
3164
|
+
if (activePubSub) return activePubSub;
|
|
3165
|
+
const url = readEnv("UPSTASH_REDIS_URL");
|
|
3166
|
+
const token = readEnv("UPSTASH_REDIS_TOKEN");
|
|
3167
|
+
if (url && token) {
|
|
3168
|
+
activePubSub = new UpstashPubSub({ url, token });
|
|
3169
|
+
} else {
|
|
3170
|
+
activePubSub = new MemoryPubSub();
|
|
3171
|
+
}
|
|
3172
|
+
return activePubSub;
|
|
3173
|
+
}
|
|
3174
|
+
function readEnv(key) {
|
|
3175
|
+
try {
|
|
3176
|
+
if (typeof process !== "undefined" && process.env) {
|
|
3177
|
+
const v = process.env[key];
|
|
3178
|
+
return v && v.trim() !== "" ? v : void 0;
|
|
3179
|
+
}
|
|
3180
|
+
} catch {
|
|
3181
|
+
}
|
|
3182
|
+
return void 0;
|
|
3183
|
+
}
|
|
3184
|
+
function emitGlassboxEvent(traceId, kind, data) {
|
|
3185
|
+
if (!traceId) return;
|
|
3186
|
+
const event = { kind, at: Date.now(), data };
|
|
3187
|
+
const ps = getPubSub();
|
|
3188
|
+
try {
|
|
3189
|
+
const p = ps.publish(traceId, event);
|
|
3190
|
+
if (p && typeof p.then === "function") {
|
|
3191
|
+
p.catch(() => {
|
|
3192
|
+
});
|
|
3193
|
+
}
|
|
3194
|
+
} catch {
|
|
3195
|
+
}
|
|
3196
|
+
}
|
|
3197
|
+
function emitCompileStart(traceId, data) {
|
|
3198
|
+
emitGlassboxEvent(traceId, "compile.start", data);
|
|
3199
|
+
}
|
|
3200
|
+
function emitCompileDone(traceId, data) {
|
|
3201
|
+
emitGlassboxEvent(traceId, "compile.done", data);
|
|
3202
|
+
}
|
|
3203
|
+
function emitExecuteAttempt(traceId, data) {
|
|
3204
|
+
emitGlassboxEvent(traceId, "execute.attempt", data);
|
|
3205
|
+
}
|
|
3206
|
+
function emitExecuteSuccess(traceId, data) {
|
|
3207
|
+
emitGlassboxEvent(traceId, "execute.success", data);
|
|
3208
|
+
}
|
|
3209
|
+
function emitAdvisoryFired(traceId, data) {
|
|
3210
|
+
emitGlassboxEvent(traceId, "advisory.fired", data);
|
|
3211
|
+
}
|
|
3212
|
+
function emitFallbackWalked(traceId, data) {
|
|
3213
|
+
emitGlassboxEvent(traceId, "fallback.walked", data);
|
|
3214
|
+
}
|
|
3215
|
+
|
|
2711
3216
|
// src/call.ts
|
|
2712
3217
|
async function call(ir, opts = {}) {
|
|
3218
|
+
const traceId = generateTraceId();
|
|
3219
|
+
safeEmit(
|
|
3220
|
+
() => emitCompileStart(traceId, {
|
|
3221
|
+
appId: ir.appId,
|
|
3222
|
+
archetype: ir.intent.archetype,
|
|
3223
|
+
models: ir.models
|
|
3224
|
+
})
|
|
3225
|
+
);
|
|
2713
3226
|
const initial = compileAndRegister(ir, opts);
|
|
3227
|
+
safeEmit(
|
|
3228
|
+
() => emitCompileDone(traceId, {
|
|
3229
|
+
target: initial.target,
|
|
3230
|
+
provider: initial.provider,
|
|
3231
|
+
fallbackChain: initial.fallbackChain,
|
|
3232
|
+
tokensIn: initial.tokensIn,
|
|
3233
|
+
estimatedCostUsd: initial.estimatedCostUsd,
|
|
3234
|
+
mutationsApplied: initial.mutationsApplied,
|
|
3235
|
+
advisories: initial.advisories
|
|
3236
|
+
})
|
|
3237
|
+
);
|
|
3238
|
+
for (const adv of initial.advisories) {
|
|
3239
|
+
safeEmit(
|
|
3240
|
+
() => emitAdvisoryFired(traceId, { code: adv.code, message: adv.message })
|
|
3241
|
+
);
|
|
3242
|
+
}
|
|
2714
3243
|
const start = Date.now();
|
|
2715
3244
|
const attempts = [];
|
|
2716
3245
|
const rawTargets = [initial.target, ...initial.fallbackChain];
|
|
@@ -2765,6 +3294,47 @@ async function call(ir, opts = {}) {
|
|
|
2765
3294
|
}
|
|
2766
3295
|
}
|
|
2767
3296
|
}
|
|
3297
|
+
let policyBlockedFiltered;
|
|
3298
|
+
if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
|
|
3299
|
+
const blocked = new Set(opts.policy.blockedModels);
|
|
3300
|
+
const filtered = [];
|
|
3301
|
+
const dropped = [];
|
|
3302
|
+
for (const t of targetsToTry) {
|
|
3303
|
+
if (blocked.has(t)) {
|
|
3304
|
+
dropped.push(t);
|
|
3305
|
+
} else {
|
|
3306
|
+
filtered.push(t);
|
|
3307
|
+
}
|
|
3308
|
+
}
|
|
3309
|
+
if (dropped.length > 0) {
|
|
3310
|
+
policyBlockedFiltered = dropped;
|
|
3311
|
+
targetsToTry = filtered;
|
|
3312
|
+
}
|
|
3313
|
+
if (targetsToTry.length === 0) {
|
|
3314
|
+
const latencyMs2 = Date.now() - start;
|
|
3315
|
+
await record({
|
|
3316
|
+
handle: initial.handle,
|
|
3317
|
+
tokensIn: 0,
|
|
3318
|
+
tokensOut: 0,
|
|
3319
|
+
latencyMs: latencyMs2,
|
|
3320
|
+
success: false,
|
|
3321
|
+
errorType: "all_blocked_by_policy",
|
|
3322
|
+
promptPreview: extractPromptPreview(ir)
|
|
3323
|
+
});
|
|
3324
|
+
const blockedAttempts = dropped.map((m) => ({
|
|
3325
|
+
model: m,
|
|
3326
|
+
status: "terminal",
|
|
3327
|
+
errorCode: "blocked_by_policy",
|
|
3328
|
+
message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
|
|
3329
|
+
}));
|
|
3330
|
+
throw new CallError(
|
|
3331
|
+
`call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
|
|
3332
|
+
blockedAttempts,
|
|
3333
|
+
void 0,
|
|
3334
|
+
"all_blocked_by_policy"
|
|
3335
|
+
);
|
|
3336
|
+
}
|
|
3337
|
+
}
|
|
2768
3338
|
let activeCompile = initial;
|
|
2769
3339
|
let lastErr;
|
|
2770
3340
|
const failedProviders = /* @__PURE__ */ new Set();
|
|
@@ -2800,6 +3370,9 @@ async function call(ir, opts = {}) {
|
|
|
2800
3370
|
continue;
|
|
2801
3371
|
}
|
|
2802
3372
|
}
|
|
3373
|
+
safeEmit(
|
|
3374
|
+
() => emitExecuteAttempt(traceId, { model: targetModel, attemptIndex: i })
|
|
3375
|
+
);
|
|
2803
3376
|
const exec = await execute(activeCompile.request, {
|
|
2804
3377
|
apiKeys: opts.apiKeys,
|
|
2805
3378
|
fetchImpl: opts.fetchImpl,
|
|
@@ -2809,6 +3382,14 @@ async function call(ir, opts = {}) {
|
|
|
2809
3382
|
if (validated.ok) {
|
|
2810
3383
|
attempts.push({ model: targetModel, status: "success" });
|
|
2811
3384
|
const latencyMs2 = Date.now() - start;
|
|
3385
|
+
safeEmit(
|
|
3386
|
+
() => emitExecuteSuccess(traceId, {
|
|
3387
|
+
model: targetModel,
|
|
3388
|
+
tokensIn: validated.response.tokens.input,
|
|
3389
|
+
tokensOut: validated.response.tokens.output,
|
|
3390
|
+
latencyMs: latencyMs2
|
|
3391
|
+
})
|
|
3392
|
+
);
|
|
2812
3393
|
await record({
|
|
2813
3394
|
handle: initial.handle,
|
|
2814
3395
|
tokensIn: validated.response.tokens.input,
|
|
@@ -2825,6 +3406,20 @@ async function call(ir, opts = {}) {
|
|
|
2825
3406
|
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
2826
3407
|
});
|
|
2827
3408
|
const fellOver = targetModel !== initial.target;
|
|
3409
|
+
const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
|
|
3410
|
+
if (fellOver) {
|
|
3411
|
+
const firstFailed = attempts.find((a) => a.status !== "success");
|
|
3412
|
+
if (firstFailed) {
|
|
3413
|
+
safeEmit(
|
|
3414
|
+
() => emitFallbackWalked(traceId, {
|
|
3415
|
+
from: initial.target,
|
|
3416
|
+
to: targetModel,
|
|
3417
|
+
reason: fallbackReason ?? "unknown",
|
|
3418
|
+
attempt: firstFailed
|
|
3419
|
+
})
|
|
3420
|
+
);
|
|
3421
|
+
}
|
|
3422
|
+
}
|
|
2828
3423
|
return {
|
|
2829
3424
|
handle: initial.handle,
|
|
2830
3425
|
actualModel: targetModel,
|
|
@@ -2836,8 +3431,10 @@ async function call(ir, opts = {}) {
|
|
|
2836
3431
|
attempts,
|
|
2837
3432
|
servedBy: targetModel,
|
|
2838
3433
|
fellOverFrom: fellOver ? initial.target : void 0,
|
|
2839
|
-
fallbackReason
|
|
2840
|
-
unreachableFiltered
|
|
3434
|
+
fallbackReason,
|
|
3435
|
+
unreachableFiltered,
|
|
3436
|
+
policyBlockedFiltered,
|
|
3437
|
+
traceId
|
|
2841
3438
|
};
|
|
2842
3439
|
}
|
|
2843
3440
|
attempts.push({
|
|
@@ -2866,8 +3463,9 @@ async function call(ir, opts = {}) {
|
|
|
2866
3463
|
promptPreview: extractPromptPreview(ir)
|
|
2867
3464
|
});
|
|
2868
3465
|
const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
|
|
3466
|
+
const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
|
|
2869
3467
|
throw new CallError(
|
|
2870
|
-
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
|
|
3468
|
+
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
|
|
2871
3469
|
attempts,
|
|
2872
3470
|
lastErr?.status,
|
|
2873
3471
|
lastErr?.errorCode
|
|
@@ -2933,6 +3531,23 @@ function normalizeFallbackReason(attempts) {
|
|
|
2933
3531
|
if (code === "auth" || code === "auth_inferred") return "provider_auth_failed";
|
|
2934
3532
|
return "provider_error";
|
|
2935
3533
|
}
|
|
3534
|
+
function generateTraceId() {
|
|
3535
|
+
try {
|
|
3536
|
+
const g = globalThis;
|
|
3537
|
+
if (g.crypto && typeof g.crypto.randomUUID === "function") {
|
|
3538
|
+
return g.crypto.randomUUID();
|
|
3539
|
+
}
|
|
3540
|
+
} catch {
|
|
3541
|
+
}
|
|
3542
|
+
const hex = (n) => Math.floor(Math.random() * Math.pow(16, n)).toString(16).padStart(n, "0");
|
|
3543
|
+
return `${hex(8)}-${hex(4)}-${hex(4)}-${hex(4)}-${hex(12)}`;
|
|
3544
|
+
}
|
|
3545
|
+
function safeEmit(fn) {
|
|
3546
|
+
try {
|
|
3547
|
+
fn();
|
|
3548
|
+
} catch {
|
|
3549
|
+
}
|
|
3550
|
+
}
|
|
2936
3551
|
|
|
2937
3552
|
// src/oracle.ts
|
|
2938
3553
|
var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
|