@warmdrift/kgauto-compiler 2.0.0-alpha.25 → 2.0.0-alpha.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7MTHFSNY.mjs → chunk-JQGRWJZO.mjs} +181 -48
- package/dist/{chunk-NUTC7NUC.mjs → chunk-NBO4R5PC.mjs} +122 -107
- package/dist/chunk-RO22VFIF.mjs +29 -0
- package/dist/glassbox/index.d.mts +20 -3
- package/dist/glassbox/index.d.ts +20 -3
- package/dist/glassbox/index.js +42 -30
- package/dist/glassbox/index.mjs +6 -4
- package/dist/glassbox-routes/index.d.mts +12 -3
- package/dist/glassbox-routes/index.d.ts +12 -3
- package/dist/glassbox-routes/index.js +44 -42
- package/dist/glassbox-routes/index.mjs +10 -17
- package/dist/index.js +236 -90
- package/dist/index.mjs +8 -8
- package/dist/profiles.js +181 -48
- package/dist/profiles.mjs +1 -1
- package/dist/{types-DiWBWvxg.d.ts → types-bt0aVJb8.d.ts} +19 -10
- package/dist/{types-D9WndxeD.d.mts → types-o9etg93a.d.mts} +19 -10
- package/package.json +1 -1
- package/dist/chunk-VZGMWKRT.mjs +0 -19
package/dist/index.js
CHANGED
|
@@ -1703,12 +1703,23 @@ var PROFILES_RAW = [
|
|
|
1703
1703
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
1704
1704
|
// provider docs. Verify before promoting status to 'current' (L-049/L-081).
|
|
1705
1705
|
{
|
|
1706
|
+
// s37 (2026-05-21): UNVERIFIED-AUTO-ONBOARD → verified against
|
|
1707
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview +
|
|
1708
|
+
// ai.google.dev/gemini-api/docs/pricing. L-081 catches:
|
|
1709
|
+
// maxOutputTokens 65_535 → 65_536 (off-by-one)
|
|
1710
|
+
// costInputPer1m 0.30 → 0.50 (template-cloned from 2.5-flash; actual is 1.67× more expensive)
|
|
1711
|
+
// costOutputPer1m 2.50 → 3.00 (template-cloned; actual 1.2× more expensive)
|
|
1712
|
+
// cache discount default 0.25 → 0.10 (10× discount, $0.05/$0.50 per docs)
|
|
1713
|
+
// Cliffs inherited from 2.5-flash conservatively. The 8K-context-quality
|
|
1714
|
+
// cliff was a 2.5-Flash observation — Google positions Gemini 3 as
|
|
1715
|
+
// sustained-frontier-on-long-context; brain evidence will validate/relax.
|
|
1716
|
+
// Kept as guard for now.
|
|
1706
1717
|
id: "gemini-3-flash-preview",
|
|
1707
|
-
verifiedAgainstDocs: "
|
|
1718
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1708
1719
|
provider: "google",
|
|
1709
1720
|
status: "preview",
|
|
1710
1721
|
maxContextTokens: 1048576,
|
|
1711
|
-
maxOutputTokens:
|
|
1722
|
+
maxOutputTokens: 65536,
|
|
1712
1723
|
maxTools: 128,
|
|
1713
1724
|
parallelToolCalls: true,
|
|
1714
1725
|
structuredOutput: "native",
|
|
@@ -1719,13 +1730,13 @@ var PROFILES_RAW = [
|
|
|
1719
1730
|
metric: "input_tokens",
|
|
1720
1731
|
threshold: 8e3,
|
|
1721
1732
|
action: "downgrade_quality_warning",
|
|
1722
|
-
reason: "
|
|
1733
|
+
reason: "Inherited from 2.5-flash guard; brain evidence on Gemini 3 long-context quality will validate/relax"
|
|
1723
1734
|
},
|
|
1724
1735
|
{
|
|
1725
1736
|
metric: "tool_count",
|
|
1726
1737
|
threshold: 20,
|
|
1727
1738
|
action: "drop_to_top_relevant",
|
|
1728
|
-
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
|
|
1739
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit) \u2014 inherited from Flash family"
|
|
1729
1740
|
},
|
|
1730
1741
|
{
|
|
1731
1742
|
metric: "thinking_with_short_output",
|
|
@@ -1734,24 +1745,22 @@ var PROFILES_RAW = [
|
|
|
1734
1745
|
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
1735
1746
|
},
|
|
1736
1747
|
{
|
|
1737
|
-
//
|
|
1738
|
-
//
|
|
1739
|
-
//
|
|
1740
|
-
// help — disabling thinking is necessary but not sufficient. Tools
|
|
1741
|
-
// present + summarize intent confuses Flash into a no-output state
|
|
1742
|
-
// (likely tool-decision purgatory). Strip tools entirely for this
|
|
1743
|
-
// archetype on this model.
|
|
1748
|
+
// Inherited from gemini-2.5-flash s11 trust artifact. Family-likely
|
|
1749
|
+
// failure mode for Flash architecture. Keep preemptively until brain
|
|
1750
|
+
// evidence on Gemini 3 specifically.
|
|
1744
1751
|
metric: "tool_count",
|
|
1745
1752
|
threshold: 1,
|
|
1746
1753
|
whenIntent: "summarize",
|
|
1747
1754
|
action: "strip_tools",
|
|
1748
|
-
reason: "
|
|
1755
|
+
reason: "Inherited from 2.5-flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3-flash-preview specifically."
|
|
1749
1756
|
}
|
|
1750
1757
|
],
|
|
1751
|
-
costInputPer1m: 0.
|
|
1752
|
-
costOutputPer1m:
|
|
1758
|
+
costInputPer1m: 0.5,
|
|
1759
|
+
costOutputPer1m: 3,
|
|
1753
1760
|
lowering: {
|
|
1754
1761
|
...GOOGLE_LOWERING_BASE,
|
|
1762
|
+
// 10× cache discount per Google pricing: $0.05/M cached vs $0.50/M input.
|
|
1763
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1755
1764
|
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1756
1765
|
},
|
|
1757
1766
|
recovery: [
|
|
@@ -1777,40 +1786,45 @@ var PROFILES_RAW = [
|
|
|
1777
1786
|
],
|
|
1778
1787
|
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
1779
1788
|
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
1780
|
-
notes: "
|
|
1781
|
-
//
|
|
1782
|
-
//
|
|
1789
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Step-change positioning vs 2.5-flash on agentic loops per Google's release notes (Dec 2025). Pricing 1.67\xD7/1.2\xD7 higher than 2.5-flash; cache discount 10\xD7 (vs 4\xD7 for 2.5). Status=preview until brain evidence accumulates.",
|
|
1790
|
+
// Anchored to 2.5-flash archetypePerf as starter, with judgment adjustments
|
|
1791
|
+
// for Google's "step-change on agentic" positioning. Brain evidence (zero
|
|
1792
|
+
// rows today) will replace these starter values.
|
|
1783
1793
|
archetypePerf: {
|
|
1784
1794
|
hunt: 9,
|
|
1785
|
-
// L-040
|
|
1795
|
+
// Inherits 2.5-flash L-040 parallel-tool tier; Google positions 3 as agentic-loop upgrade
|
|
1786
1796
|
classify: 7,
|
|
1787
|
-
// brain-validated
|
|
1797
|
+
// Inherits 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
1788
1798
|
summarize: 7,
|
|
1789
|
-
//
|
|
1799
|
+
// Inherits 2.5-flash; cliff strips tools when present
|
|
1790
1800
|
transform: 7,
|
|
1791
|
-
ask:
|
|
1792
|
-
|
|
1793
|
-
|
|
1801
|
+
ask: 8,
|
|
1802
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
1803
|
+
generate: 7,
|
|
1804
|
+
// +1 vs 2.5-flash — agentic coding upgrade per Google
|
|
1805
|
+
plan: 6,
|
|
1806
|
+
// +1 vs 2.5-flash — complex iterations per positioning
|
|
1794
1807
|
extract: 6,
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
// reasoning shallower than Sonnet/Opus
|
|
1808
|
+
critique: 5
|
|
1809
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
1798
1810
|
}
|
|
1799
1811
|
},
|
|
1800
1812
|
{
|
|
1801
|
-
// ── Gemini
|
|
1802
|
-
// Onboarded 2026-05-
|
|
1803
|
-
//
|
|
1804
|
-
//
|
|
1805
|
-
//
|
|
1806
|
-
//
|
|
1807
|
-
//
|
|
1808
|
-
//
|
|
1809
|
-
//
|
|
1810
|
-
//
|
|
1811
|
-
//
|
|
1813
|
+
// ── Gemini 3.1 Flash-Lite ──
|
|
1814
|
+
// Onboarded 2026-05-16 by auto-onboarder; s37 (2026-05-21) verified
|
|
1815
|
+
// against ai.google.dev/gemini-api/docs/pricing.
|
|
1816
|
+
//
|
|
1817
|
+
// L-081 CATCHES (template clone from 2.5-flash-lite was 2.5-3.75× too cheap):
|
|
1818
|
+
// costInputPer1m 0.10 → 0.25 (template clone undervalued by 2.5×)
|
|
1819
|
+
// costOutputPer1m 0.40 → 1.50 (template clone undervalued by 3.75×)
|
|
1820
|
+
//
|
|
1821
|
+
// Real 3.1-flash-lite is NOT a cost-equivalent successor to 2.5-flash-lite —
|
|
1822
|
+
// it sits between 2.5-flash-lite ($0.10/$0.40) and 2.5-flash ($0.30/$2.50).
|
|
1823
|
+
// Cache discount 10× verified ($0.025/M cached vs $0.25/M input).
|
|
1824
|
+
//
|
|
1825
|
+
// Cliffs are HYPOTHESIZED from 2.5-flash family; brain evidence pending.
|
|
1812
1826
|
id: "gemini-3.1-flash-lite",
|
|
1813
|
-
verifiedAgainstDocs: "
|
|
1827
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1814
1828
|
provider: "google",
|
|
1815
1829
|
status: "preview",
|
|
1816
1830
|
maxContextTokens: 1048576,
|
|
@@ -1851,12 +1865,12 @@ var PROFILES_RAW = [
|
|
|
1851
1865
|
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
1852
1866
|
}
|
|
1853
1867
|
],
|
|
1854
|
-
costInputPer1m: 0.
|
|
1855
|
-
costOutputPer1m:
|
|
1868
|
+
costInputPer1m: 0.25,
|
|
1869
|
+
costOutputPer1m: 1.5,
|
|
1856
1870
|
lowering: {
|
|
1857
1871
|
...GOOGLE_LOWERING_BASE,
|
|
1858
|
-
// Cache discount 10× (vs Flash 4×) — Google
|
|
1859
|
-
// $0.
|
|
1872
|
+
// Cache discount 10× (vs Flash 4×) — Google docs s37: $0.025/M cached vs
|
|
1873
|
+
// $0.25/M input. Material for repeat-prompt workloads (classify shape).
|
|
1860
1874
|
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1861
1875
|
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1862
1876
|
},
|
|
@@ -1881,13 +1895,13 @@ var PROFILES_RAW = [
|
|
|
1881
1895
|
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1882
1896
|
}
|
|
1883
1897
|
],
|
|
1884
|
-
strengths: ["
|
|
1898
|
+
strengths: ["low_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1885
1899
|
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1886
|
-
notes: "
|
|
1887
|
-
// Tier 3
|
|
1888
|
-
// rows —
|
|
1889
|
-
// sibling of Flash
|
|
1890
|
-
//
|
|
1900
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Sits between 2.5-flash-lite (cheaper) and 2.5-flash (more expensive) on cost frontier; 2.5\xD7 more expensive than initial template-clone. Cliffs hypothesized from Flash family \u2014 brain evidence pending.",
|
|
1901
|
+
// Tier 2-3 floor for summarize/classify chains at the new (verified) price
|
|
1902
|
+
// point. ZERO brain rows — values are starter hypotheses anchored to
|
|
1903
|
+
// "smaller sibling of Flash at higher cost than 2.5-flash-lite." The first
|
|
1904
|
+
// 50 brain rows per archetype will validate or relax these.
|
|
1891
1905
|
archetypePerf: {
|
|
1892
1906
|
classify: 6,
|
|
1893
1907
|
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
@@ -1902,6 +1916,125 @@ var PROFILES_RAW = [
|
|
|
1902
1916
|
plan: 3,
|
|
1903
1917
|
critique: 3
|
|
1904
1918
|
}
|
|
1919
|
+
},
|
|
1920
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1921
|
+
// Gemini 3.5 Flash — hand-onboarded s37 (2026-05-21)
|
|
1922
|
+
//
|
|
1923
|
+
// Google positioning ("Most intelligent for sustained frontier performance
|
|
1924
|
+
// on agentic and coding tasks" / "particularly effective for rapid agentic
|
|
1925
|
+
// loops involving complex coding cycles and iterations") suggests this is
|
|
1926
|
+
// the Flash-family upgrade specifically aimed at hunt-shape workloads.
|
|
1927
|
+
// Pricing 5× input / 3.6× output vs 2.5-flash — material cost premium.
|
|
1928
|
+
// archetypePerf adjusted +1 vs 2.5-flash on ask/generate/plan/critique
|
|
1929
|
+
// (sustained-frontier positioning); hunt held at 9 inherited from L-040
|
|
1930
|
+
// family parallel-tool tier; brain evidence will validate within 50 rows.
|
|
1931
|
+
//
|
|
1932
|
+
// Cliffs inherited conservatively from 2.5-flash. Google's "sustained
|
|
1933
|
+
// frontier on long-context" positioning suggests the 8K cliff may not
|
|
1934
|
+
// apply to 3.5 — keep as guard until brain evidence shows otherwise.
|
|
1935
|
+
//
|
|
1936
|
+
// Specs verified against:
|
|
1937
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3.5-flash
|
|
1938
|
+
// ai.google.dev/gemini-api/docs/pricing (Standard tier)
|
|
1939
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1940
|
+
{
|
|
1941
|
+
id: "gemini-3.5-flash",
|
|
1942
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1943
|
+
provider: "google",
|
|
1944
|
+
status: "current",
|
|
1945
|
+
maxContextTokens: 1048576,
|
|
1946
|
+
maxOutputTokens: 65536,
|
|
1947
|
+
maxTools: 128,
|
|
1948
|
+
parallelToolCalls: true,
|
|
1949
|
+
structuredOutput: "native",
|
|
1950
|
+
systemPromptMode: "separate",
|
|
1951
|
+
streaming: true,
|
|
1952
|
+
cliffs: [
|
|
1953
|
+
{
|
|
1954
|
+
metric: "input_tokens",
|
|
1955
|
+
threshold: 8e3,
|
|
1956
|
+
action: "downgrade_quality_warning",
|
|
1957
|
+
reason: "Inherited from 2.5-flash guard; Google positions 3.5 as sustained-frontier-on-long-context but brain evidence pending"
|
|
1958
|
+
},
|
|
1959
|
+
{
|
|
1960
|
+
metric: "tool_count",
|
|
1961
|
+
threshold: 20,
|
|
1962
|
+
action: "drop_to_top_relevant",
|
|
1963
|
+
reason: "Inherited from Flash family: tool reliability drops above ~20 (despite 128 hard limit). Validate per (archetype, model) after n\u226520."
|
|
1964
|
+
},
|
|
1965
|
+
{
|
|
1966
|
+
metric: "thinking_with_short_output",
|
|
1967
|
+
threshold: 1,
|
|
1968
|
+
action: "force_thinking_budget_zero",
|
|
1969
|
+
reason: "Thinking mode supported per Google docs; same drain risk as 2.5-flash \u2014 thinking tokens consume maxOutputTokens"
|
|
1970
|
+
},
|
|
1971
|
+
{
|
|
1972
|
+
// Inherited from 2.5-flash s11 trust artifact (5/5 empty rate on
|
|
1973
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered).
|
|
1974
|
+
// Family-likely failure mode for Flash architecture across versions.
|
|
1975
|
+
// Keep preemptively until brain evidence on 3.5-flash specifically.
|
|
1976
|
+
metric: "tool_count",
|
|
1977
|
+
threshold: 1,
|
|
1978
|
+
whenIntent: "summarize",
|
|
1979
|
+
action: "strip_tools",
|
|
1980
|
+
reason: "Inherited from 2.5-flash s11 cliff (kgauto commit 3872832): summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3.5-flash specifically."
|
|
1981
|
+
}
|
|
1982
|
+
],
|
|
1983
|
+
costInputPer1m: 1.5,
|
|
1984
|
+
costOutputPer1m: 9,
|
|
1985
|
+
lowering: {
|
|
1986
|
+
...GOOGLE_LOWERING_BASE,
|
|
1987
|
+
// 10× cache discount per Google pricing: $0.15/M cached vs $1.50/M input.
|
|
1988
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1989
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1990
|
+
},
|
|
1991
|
+
recovery: [
|
|
1992
|
+
{
|
|
1993
|
+
signal: "empty_response_after_tool",
|
|
1994
|
+
action: "retry_with_params",
|
|
1995
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1996
|
+
maxRetries: 1,
|
|
1997
|
+
reason: "Inherited Flash-family pattern: empty after tool result \u2014 retry with thinking off"
|
|
1998
|
+
},
|
|
1999
|
+
{
|
|
2000
|
+
signal: "empty_response",
|
|
2001
|
+
action: "retry_with_params",
|
|
2002
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
2003
|
+
maxRetries: 1,
|
|
2004
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
2005
|
+
},
|
|
2006
|
+
{
|
|
2007
|
+
signal: "malformed_function_call",
|
|
2008
|
+
action: "escalate",
|
|
2009
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
2010
|
+
}
|
|
2011
|
+
],
|
|
2012
|
+
strengths: ["agentic_loops", "coding", "1m_context", "parallel_tools", "thinking_mode", "sustained_frontier"],
|
|
2013
|
+
weaknesses: ["cost_vs_2_5_flash", "no_brain_evidence_yet"],
|
|
2014
|
+
notes: "Hand-onboarded s37 (2026-05-21) verified against Google docs. Stable status; positioned as Flash-family upgrade for agentic loops and coding. 5\xD7/3.6\xD7 more expensive than 2.5-flash but Google claims step-change on sustained frontier work. archetypePerf adjustments are judgment-grounded starter hypotheses \u2014 brain evidence will validate within ~50 rows per archetype.",
|
|
2015
|
+
// Starter hypothesis: anchored to 2.5-flash archetypePerf with +1
|
|
2016
|
+
// adjustments where Google's positioning explicitly supports
|
|
2017
|
+
// (agentic/coding/sustained). Hunt held at 9 inherited from L-040 family
|
|
2018
|
+
// parallel-tool tier. Brain evidence will replace.
|
|
2019
|
+
archetypePerf: {
|
|
2020
|
+
hunt: 9,
|
|
2021
|
+
// Inherited from 2.5-flash L-040 parallel-tool tier; Google positions 3.5 as agentic-loop champion
|
|
2022
|
+
classify: 7,
|
|
2023
|
+
// Inherited from 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
2024
|
+
summarize: 7,
|
|
2025
|
+
// Inherited from 2.5-flash; cliff strips tools when present
|
|
2026
|
+
transform: 7,
|
|
2027
|
+
ask: 8,
|
|
2028
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
2029
|
+
generate: 8,
|
|
2030
|
+
// +1 vs 2.5-flash (6→8) — Google: "complex coding cycles and iterations"
|
|
2031
|
+
plan: 7,
|
|
2032
|
+
// +1 vs 2.5-flash (5→7) — "complex iterations" positioning
|
|
2033
|
+
extract: 7,
|
|
2034
|
+
// +1 vs 2.5-flash — sustained-frontier on structured tasks
|
|
2035
|
+
critique: 5
|
|
2036
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
2037
|
+
}
|
|
1905
2038
|
}
|
|
1906
2039
|
];
|
|
1907
2040
|
var ALIASES = {
|
|
@@ -3473,8 +3606,8 @@ var GLASSBOX_STREAM_TTL_MS = 6e4;
|
|
|
3473
3606
|
// src/glassbox/pubsub-memory.ts
|
|
3474
3607
|
var MemoryPubSub = class {
|
|
3475
3608
|
subscribers = /* @__PURE__ */ new Map();
|
|
3476
|
-
async publish(
|
|
3477
|
-
const subs = this.subscribers.get(
|
|
3609
|
+
async publish(channelKey, event) {
|
|
3610
|
+
const subs = this.subscribers.get(channelKey);
|
|
3478
3611
|
if (!subs || subs.size === 0) return;
|
|
3479
3612
|
for (const sub of subs) {
|
|
3480
3613
|
if (sub.closed) continue;
|
|
@@ -3484,10 +3617,10 @@ var MemoryPubSub = class {
|
|
|
3484
3617
|
sub.closed = true;
|
|
3485
3618
|
continue;
|
|
3486
3619
|
}
|
|
3487
|
-
this.refreshTtl(
|
|
3620
|
+
this.refreshTtl(channelKey, sub);
|
|
3488
3621
|
}
|
|
3489
3622
|
}
|
|
3490
|
-
subscribe(
|
|
3623
|
+
subscribe(channelKey) {
|
|
3491
3624
|
const self = this;
|
|
3492
3625
|
let sub;
|
|
3493
3626
|
return new ReadableStream({
|
|
@@ -3495,19 +3628,19 @@ var MemoryPubSub = class {
|
|
|
3495
3628
|
sub = {
|
|
3496
3629
|
controller,
|
|
3497
3630
|
ttlTimer: setTimeout(() => {
|
|
3498
|
-
self.closeSubscriber(
|
|
3631
|
+
self.closeSubscriber(channelKey, sub);
|
|
3499
3632
|
}, GLASSBOX_STREAM_TTL_MS),
|
|
3500
3633
|
closed: false
|
|
3501
3634
|
};
|
|
3502
|
-
let set = self.subscribers.get(
|
|
3635
|
+
let set = self.subscribers.get(channelKey);
|
|
3503
3636
|
if (!set) {
|
|
3504
3637
|
set = /* @__PURE__ */ new Set();
|
|
3505
|
-
self.subscribers.set(
|
|
3638
|
+
self.subscribers.set(channelKey, set);
|
|
3506
3639
|
}
|
|
3507
3640
|
set.add(sub);
|
|
3508
3641
|
},
|
|
3509
3642
|
cancel() {
|
|
3510
|
-
if (sub) self.removeSubscriber(
|
|
3643
|
+
if (sub) self.removeSubscriber(channelKey, sub);
|
|
3511
3644
|
}
|
|
3512
3645
|
});
|
|
3513
3646
|
}
|
|
@@ -3515,17 +3648,17 @@ var MemoryPubSub = class {
|
|
|
3515
3648
|
* Refresh the rolling TTL for a subscriber after an event lands. Replaces
|
|
3516
3649
|
* the existing timer with a fresh 60s one.
|
|
3517
3650
|
*/
|
|
3518
|
-
refreshTtl(
|
|
3651
|
+
refreshTtl(channelKey, sub) {
|
|
3519
3652
|
clearTimeout(sub.ttlTimer);
|
|
3520
3653
|
sub.ttlTimer = setTimeout(() => {
|
|
3521
|
-
this.closeSubscriber(
|
|
3654
|
+
this.closeSubscriber(channelKey, sub);
|
|
3522
3655
|
}, GLASSBOX_STREAM_TTL_MS);
|
|
3523
3656
|
}
|
|
3524
3657
|
/**
|
|
3525
3658
|
* Close the subscriber's stream cleanly and remove from the fan-out set.
|
|
3526
3659
|
* Idempotent — safe to call multiple times.
|
|
3527
3660
|
*/
|
|
3528
|
-
closeSubscriber(
|
|
3661
|
+
closeSubscriber(channelKey, sub) {
|
|
3529
3662
|
if (sub.closed) return;
|
|
3530
3663
|
sub.closed = true;
|
|
3531
3664
|
clearTimeout(sub.ttlTimer);
|
|
@@ -3533,14 +3666,14 @@ var MemoryPubSub = class {
|
|
|
3533
3666
|
sub.controller.close();
|
|
3534
3667
|
} catch {
|
|
3535
3668
|
}
|
|
3536
|
-
this.removeSubscriber(
|
|
3669
|
+
this.removeSubscriber(channelKey, sub);
|
|
3537
3670
|
}
|
|
3538
|
-
removeSubscriber(
|
|
3671
|
+
removeSubscriber(channelKey, sub) {
|
|
3539
3672
|
clearTimeout(sub.ttlTimer);
|
|
3540
|
-
const set = this.subscribers.get(
|
|
3673
|
+
const set = this.subscribers.get(channelKey);
|
|
3541
3674
|
if (!set) return;
|
|
3542
3675
|
set.delete(sub);
|
|
3543
|
-
if (set.size === 0) this.subscribers.delete(
|
|
3676
|
+
if (set.size === 0) this.subscribers.delete(channelKey);
|
|
3544
3677
|
}
|
|
3545
3678
|
/**
|
|
3546
3679
|
* Test-only reset. Tears down all subscribers, clears all state. Calling
|
|
@@ -3570,8 +3703,8 @@ var UpstashPubSub = class {
|
|
|
3570
3703
|
this.blockMs = cfg.blockMs ?? 100;
|
|
3571
3704
|
this.maxLen = cfg.maxLen ?? 100;
|
|
3572
3705
|
}
|
|
3573
|
-
async publish(
|
|
3574
|
-
const key =
|
|
3706
|
+
async publish(channelKey, event) {
|
|
3707
|
+
const key = channelKey;
|
|
3575
3708
|
const payload = JSON.stringify(event);
|
|
3576
3709
|
await this.cmd([
|
|
3577
3710
|
"XADD",
|
|
@@ -3585,8 +3718,8 @@ var UpstashPubSub = class {
|
|
|
3585
3718
|
]);
|
|
3586
3719
|
await this.cmd(["EXPIRE", key, String(Math.ceil(GLASSBOX_STREAM_TTL_MS / 1e3))]);
|
|
3587
3720
|
}
|
|
3588
|
-
subscribe(
|
|
3589
|
-
const key =
|
|
3721
|
+
subscribe(channelKey) {
|
|
3722
|
+
const key = channelKey;
|
|
3590
3723
|
const self = this;
|
|
3591
3724
|
let cursor = "$";
|
|
3592
3725
|
let cancelled = false;
|
|
@@ -3660,9 +3793,12 @@ var UpstashPubSub = class {
|
|
|
3660
3793
|
return json;
|
|
3661
3794
|
}
|
|
3662
3795
|
};
|
|
3663
|
-
function
|
|
3796
|
+
function traceChannel(traceId) {
|
|
3664
3797
|
return `glassbox:trace:${traceId}`;
|
|
3665
3798
|
}
|
|
3799
|
+
function appChannel(appId) {
|
|
3800
|
+
return `glassbox:app:${appId}`;
|
|
3801
|
+
}
|
|
3666
3802
|
function decodeEvent(fields) {
|
|
3667
3803
|
const raw = fields["event"];
|
|
3668
3804
|
if (!raw) return void 0;
|
|
@@ -3723,43 +3859,53 @@ function readEnv(key) {
|
|
|
3723
3859
|
}
|
|
3724
3860
|
return void 0;
|
|
3725
3861
|
}
|
|
3726
|
-
function emitGlassboxEvent(traceId, kind, data) {
|
|
3862
|
+
function emitGlassboxEvent(traceId, appId, kind, data) {
|
|
3727
3863
|
if (!traceId) return;
|
|
3728
3864
|
const event = { kind, at: Date.now(), data };
|
|
3729
3865
|
const ps = getPubSub();
|
|
3730
3866
|
try {
|
|
3731
|
-
const
|
|
3732
|
-
if (
|
|
3733
|
-
|
|
3867
|
+
const p1 = ps.publish(traceChannel(traceId), event);
|
|
3868
|
+
if (p1 && typeof p1.then === "function") {
|
|
3869
|
+
p1.catch(() => {
|
|
3734
3870
|
});
|
|
3735
3871
|
}
|
|
3736
3872
|
} catch {
|
|
3737
3873
|
}
|
|
3874
|
+
if (appId) {
|
|
3875
|
+
try {
|
|
3876
|
+
const p2 = ps.publish(appChannel(appId), event);
|
|
3877
|
+
if (p2 && typeof p2.then === "function") {
|
|
3878
|
+
p2.catch(() => {
|
|
3879
|
+
});
|
|
3880
|
+
}
|
|
3881
|
+
} catch {
|
|
3882
|
+
}
|
|
3883
|
+
}
|
|
3738
3884
|
}
|
|
3739
|
-
function emitCompileStart(traceId, data) {
|
|
3740
|
-
emitGlassboxEvent(traceId, "compile.start", data);
|
|
3885
|
+
function emitCompileStart(traceId, appId, data) {
|
|
3886
|
+
emitGlassboxEvent(traceId, appId, "compile.start", data);
|
|
3741
3887
|
}
|
|
3742
|
-
function emitCompileDone(traceId, data) {
|
|
3743
|
-
emitGlassboxEvent(traceId, "compile.done", data);
|
|
3888
|
+
function emitCompileDone(traceId, appId, data) {
|
|
3889
|
+
emitGlassboxEvent(traceId, appId, "compile.done", data);
|
|
3744
3890
|
}
|
|
3745
|
-
function emitExecuteAttempt(traceId, data) {
|
|
3746
|
-
emitGlassboxEvent(traceId, "execute.attempt", data);
|
|
3891
|
+
function emitExecuteAttempt(traceId, appId, data) {
|
|
3892
|
+
emitGlassboxEvent(traceId, appId, "execute.attempt", data);
|
|
3747
3893
|
}
|
|
3748
|
-
function emitExecuteSuccess(traceId, data) {
|
|
3749
|
-
emitGlassboxEvent(traceId, "execute.success", data);
|
|
3894
|
+
function emitExecuteSuccess(traceId, appId, data) {
|
|
3895
|
+
emitGlassboxEvent(traceId, appId, "execute.success", data);
|
|
3750
3896
|
}
|
|
3751
|
-
function emitAdvisoryFired(traceId, data) {
|
|
3752
|
-
emitGlassboxEvent(traceId, "advisory.fired", data);
|
|
3897
|
+
function emitAdvisoryFired(traceId, appId, data) {
|
|
3898
|
+
emitGlassboxEvent(traceId, appId, "advisory.fired", data);
|
|
3753
3899
|
}
|
|
3754
|
-
function emitFallbackWalked(traceId, data) {
|
|
3755
|
-
emitGlassboxEvent(traceId, "fallback.walked", data);
|
|
3900
|
+
function emitFallbackWalked(traceId, appId, data) {
|
|
3901
|
+
emitGlassboxEvent(traceId, appId, "fallback.walked", data);
|
|
3756
3902
|
}
|
|
3757
3903
|
|
|
3758
3904
|
// src/call.ts
|
|
3759
3905
|
async function call(ir, opts = {}) {
|
|
3760
3906
|
const traceId = generateTraceId();
|
|
3761
3907
|
safeEmit(
|
|
3762
|
-
() => emitCompileStart(traceId, {
|
|
3908
|
+
() => emitCompileStart(traceId, ir.appId, {
|
|
3763
3909
|
appId: ir.appId,
|
|
3764
3910
|
archetype: ir.intent.archetype,
|
|
3765
3911
|
models: ir.models
|
|
@@ -3767,7 +3913,7 @@ async function call(ir, opts = {}) {
|
|
|
3767
3913
|
);
|
|
3768
3914
|
const initial = compileAndRegister(ir, opts);
|
|
3769
3915
|
safeEmit(
|
|
3770
|
-
() => emitCompileDone(traceId, {
|
|
3916
|
+
() => emitCompileDone(traceId, ir.appId, {
|
|
3771
3917
|
target: initial.target,
|
|
3772
3918
|
provider: initial.provider,
|
|
3773
3919
|
fallbackChain: initial.fallbackChain,
|
|
@@ -3779,7 +3925,7 @@ async function call(ir, opts = {}) {
|
|
|
3779
3925
|
);
|
|
3780
3926
|
for (const adv of initial.advisories) {
|
|
3781
3927
|
safeEmit(
|
|
3782
|
-
() => emitAdvisoryFired(traceId, { code: adv.code, message: adv.message })
|
|
3928
|
+
() => emitAdvisoryFired(traceId, ir.appId, { code: adv.code, message: adv.message })
|
|
3783
3929
|
);
|
|
3784
3930
|
}
|
|
3785
3931
|
const start = Date.now();
|
|
@@ -3913,7 +4059,7 @@ async function call(ir, opts = {}) {
|
|
|
3913
4059
|
}
|
|
3914
4060
|
}
|
|
3915
4061
|
safeEmit(
|
|
3916
|
-
() => emitExecuteAttempt(traceId, { model: targetModel, attemptIndex: i })
|
|
4062
|
+
() => emitExecuteAttempt(traceId, ir.appId, { model: targetModel, attemptIndex: i })
|
|
3917
4063
|
);
|
|
3918
4064
|
const exec = await execute(activeCompile.request, {
|
|
3919
4065
|
apiKeys: opts.apiKeys,
|
|
@@ -3925,7 +4071,7 @@ async function call(ir, opts = {}) {
|
|
|
3925
4071
|
attempts.push({ model: targetModel, status: "success" });
|
|
3926
4072
|
const latencyMs2 = Date.now() - start;
|
|
3927
4073
|
safeEmit(
|
|
3928
|
-
() => emitExecuteSuccess(traceId, {
|
|
4074
|
+
() => emitExecuteSuccess(traceId, ir.appId, {
|
|
3929
4075
|
model: targetModel,
|
|
3930
4076
|
tokensIn: validated.response.tokens.input,
|
|
3931
4077
|
tokensOut: validated.response.tokens.output,
|
|
@@ -3953,7 +4099,7 @@ async function call(ir, opts = {}) {
|
|
|
3953
4099
|
const firstFailed = attempts.find((a) => a.status !== "success");
|
|
3954
4100
|
if (firstFailed) {
|
|
3955
4101
|
safeEmit(
|
|
3956
|
-
() => emitFallbackWalked(traceId, {
|
|
4102
|
+
() => emitFallbackWalked(traceId, ir.appId, {
|
|
3957
4103
|
from: initial.target,
|
|
3958
4104
|
to: targetModel,
|
|
3959
4105
|
reason: fallbackReason ?? "unknown",
|
package/dist/index.mjs
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
getProfile,
|
|
18
18
|
profilesByProvider,
|
|
19
19
|
tryGetProfile
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-JQGRWJZO.mjs";
|
|
21
21
|
import {
|
|
22
22
|
emitAdvisoryFired,
|
|
23
23
|
emitCompileDone,
|
|
@@ -25,7 +25,7 @@ import {
|
|
|
25
25
|
emitExecuteAttempt,
|
|
26
26
|
emitExecuteSuccess,
|
|
27
27
|
emitFallbackWalked
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-NBO4R5PC.mjs";
|
|
29
29
|
|
|
30
30
|
// src/tokenizer.ts
|
|
31
31
|
var tokenizerImpl = defaultCharBasedCounter;
|
|
@@ -2265,7 +2265,7 @@ function ensureCrossProviderTail(opts) {
|
|
|
2265
2265
|
async function call(ir, opts = {}) {
|
|
2266
2266
|
const traceId = generateTraceId();
|
|
2267
2267
|
safeEmit(
|
|
2268
|
-
() => emitCompileStart(traceId, {
|
|
2268
|
+
() => emitCompileStart(traceId, ir.appId, {
|
|
2269
2269
|
appId: ir.appId,
|
|
2270
2270
|
archetype: ir.intent.archetype,
|
|
2271
2271
|
models: ir.models
|
|
@@ -2273,7 +2273,7 @@ async function call(ir, opts = {}) {
|
|
|
2273
2273
|
);
|
|
2274
2274
|
const initial = compileAndRegister(ir, opts);
|
|
2275
2275
|
safeEmit(
|
|
2276
|
-
() => emitCompileDone(traceId, {
|
|
2276
|
+
() => emitCompileDone(traceId, ir.appId, {
|
|
2277
2277
|
target: initial.target,
|
|
2278
2278
|
provider: initial.provider,
|
|
2279
2279
|
fallbackChain: initial.fallbackChain,
|
|
@@ -2285,7 +2285,7 @@ async function call(ir, opts = {}) {
|
|
|
2285
2285
|
);
|
|
2286
2286
|
for (const adv of initial.advisories) {
|
|
2287
2287
|
safeEmit(
|
|
2288
|
-
() => emitAdvisoryFired(traceId, { code: adv.code, message: adv.message })
|
|
2288
|
+
() => emitAdvisoryFired(traceId, ir.appId, { code: adv.code, message: adv.message })
|
|
2289
2289
|
);
|
|
2290
2290
|
}
|
|
2291
2291
|
const start = Date.now();
|
|
@@ -2419,7 +2419,7 @@ async function call(ir, opts = {}) {
|
|
|
2419
2419
|
}
|
|
2420
2420
|
}
|
|
2421
2421
|
safeEmit(
|
|
2422
|
-
() => emitExecuteAttempt(traceId, { model: targetModel, attemptIndex: i })
|
|
2422
|
+
() => emitExecuteAttempt(traceId, ir.appId, { model: targetModel, attemptIndex: i })
|
|
2423
2423
|
);
|
|
2424
2424
|
const exec = await execute(activeCompile.request, {
|
|
2425
2425
|
apiKeys: opts.apiKeys,
|
|
@@ -2431,7 +2431,7 @@ async function call(ir, opts = {}) {
|
|
|
2431
2431
|
attempts.push({ model: targetModel, status: "success" });
|
|
2432
2432
|
const latencyMs2 = Date.now() - start;
|
|
2433
2433
|
safeEmit(
|
|
2434
|
-
() => emitExecuteSuccess(traceId, {
|
|
2434
|
+
() => emitExecuteSuccess(traceId, ir.appId, {
|
|
2435
2435
|
model: targetModel,
|
|
2436
2436
|
tokensIn: validated.response.tokens.input,
|
|
2437
2437
|
tokensOut: validated.response.tokens.output,
|
|
@@ -2459,7 +2459,7 @@ async function call(ir, opts = {}) {
|
|
|
2459
2459
|
const firstFailed = attempts.find((a) => a.status !== "success");
|
|
2460
2460
|
if (firstFailed) {
|
|
2461
2461
|
safeEmit(
|
|
2462
|
-
() => emitFallbackWalked(traceId, {
|
|
2462
|
+
() => emitFallbackWalked(traceId, ir.appId, {
|
|
2463
2463
|
from: initial.target,
|
|
2464
2464
|
to: targetModel,
|
|
2465
2465
|
reason: fallbackReason ?? "unknown",
|