@warmdrift/kgauto-compiler 2.0.0-alpha.6 → 2.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-zm6diETo.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
4
 
5
5
  /**
@@ -19,6 +19,15 @@ interface CompileOptions {
19
19
  toolRelevanceThreshold?: number;
20
20
  /** History compression — turns count threshold (default 8). */
21
21
  compressHistoryAfter?: number;
22
+ /**
23
+ * History compression — token threshold (alpha.7). When total history
24
+ * tokens exceed this AND there are more recent turns to keep, compress
25
+ * even when count threshold is below `compressHistoryAfter`. Catches
26
+ * fat-message bloat (tool-using agents pack many tool-call/result pairs
27
+ * into single assistant messages — count stays low, tokens explode).
28
+ * Default undefined (disabled — backward-compatible).
29
+ */
30
+ compressHistoryAboveTokens?: number;
22
31
  /**
23
32
  * Consumer-declared policy. Filters blocked models, enforces cost
24
33
  * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
114
123
  * network error is swallowed/forwarded to onError.
115
124
  */
116
125
  declare function record(input: RecordInput): Promise<void>;
126
+ /**
127
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
128
+ *
129
+ * Exported so consumer proxies can `import { OutcomePayload } from
130
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
131
+ * TypeScript catches future schema additions (cache fields, advisory
132
+ * telemetry, etc.) at consumer build time, not silently at runtime.
133
+ *
134
+ * **Forward-compat rule:** consumer proxies should pass the body through to
135
+ * Supabase rather than reconstructing field-by-field. The recommended shape
136
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
137
+ * .insert(body)` directly). Filtering proxies break schema evolution
138
+ * silently — see s17 root-cause investigation 2026-05-10.
139
+ */
140
+ interface OutcomePayload {
141
+ handle: string;
142
+ app_id?: string;
143
+ intent_archetype?: string;
144
+ /** The model that ACTUALLY RAN (post-fallback). */
145
+ model?: string;
146
+ /** The model v2 compile() originally targeted. NULL when no fallback. */
147
+ requested_model?: string;
148
+ provider?: string;
149
+ shape_key?: string;
150
+ learning_key?: string;
151
+ mutations_applied: string[];
152
+ tokens_in: number;
153
+ tokens_out: number;
154
+ estimated_tokens_in?: number;
155
+ latency_ms: number;
156
+ success: boolean;
157
+ empty_response: boolean;
158
+ error_type?: string;
159
+ tools_called?: string[];
160
+ oracle_score?: number;
161
+ oracle_dimensions?: Record<string, number>;
162
+ oracle_rationale?: string;
163
+ prompt_preview?: string;
164
+ response_preview?: string;
165
+ dialect_version: string;
166
+ cache_read_input_tokens?: number;
167
+ cache_creation_input_tokens?: number;
168
+ cost_usd_actual?: number;
169
+ ttft_ms?: number;
170
+ history_cacheable_tokens?: number;
171
+ history_tokens_at_compile?: number;
172
+ }
117
173
 
118
174
  /**
119
175
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
270
326
  */
271
327
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
272
328
 
273
- export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
329
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-CQnLkQ7b.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
4
 
5
5
  /**
@@ -19,6 +19,15 @@ interface CompileOptions {
19
19
  toolRelevanceThreshold?: number;
20
20
  /** History compression — turns count threshold (default 8). */
21
21
  compressHistoryAfter?: number;
22
+ /**
23
+ * History compression — token threshold (alpha.7). When total history
24
+ * tokens exceed this AND there are more recent turns to keep, compress
25
+ * even when count threshold is below `compressHistoryAfter`. Catches
26
+ * fat-message bloat (tool-using agents pack many tool-call/result pairs
27
+ * into single assistant messages — count stays low, tokens explode).
28
+ * Default undefined (disabled — backward-compatible).
29
+ */
30
+ compressHistoryAboveTokens?: number;
22
31
  /**
23
32
  * Consumer-declared policy. Filters blocked models, enforces cost
24
33
  * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
114
123
  * network error is swallowed/forwarded to onError.
115
124
  */
116
125
  declare function record(input: RecordInput): Promise<void>;
126
+ /**
127
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
128
+ *
129
+ * Exported so consumer proxies can `import { OutcomePayload } from
130
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
131
+ * TypeScript catches future schema additions (cache fields, advisory
132
+ * telemetry, etc.) at consumer build time, not silently at runtime.
133
+ *
134
+ * **Forward-compat rule:** consumer proxies should pass the body through to
135
+ * Supabase rather than reconstructing field-by-field. The recommended shape
136
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
137
+ * .insert(body)` directly). Filtering proxies break schema evolution
138
+ * silently — see s17 root-cause investigation 2026-05-10.
139
+ */
140
+ interface OutcomePayload {
141
+ handle: string;
142
+ app_id?: string;
143
+ intent_archetype?: string;
144
+ /** The model that ACTUALLY RAN (post-fallback). */
145
+ model?: string;
146
+ /** The model v2 compile() originally targeted. NULL when no fallback. */
147
+ requested_model?: string;
148
+ provider?: string;
149
+ shape_key?: string;
150
+ learning_key?: string;
151
+ mutations_applied: string[];
152
+ tokens_in: number;
153
+ tokens_out: number;
154
+ estimated_tokens_in?: number;
155
+ latency_ms: number;
156
+ success: boolean;
157
+ empty_response: boolean;
158
+ error_type?: string;
159
+ tools_called?: string[];
160
+ oracle_score?: number;
161
+ oracle_dimensions?: Record<string, number>;
162
+ oracle_rationale?: string;
163
+ prompt_preview?: string;
164
+ response_preview?: string;
165
+ dialect_version: string;
166
+ cache_read_input_tokens?: number;
167
+ cache_creation_input_tokens?: number;
168
+ cost_usd_actual?: number;
169
+ ttft_ms?: number;
170
+ history_cacheable_tokens?: number;
171
+ history_tokens_at_compile?: number;
172
+ }
117
173
 
118
174
  /**
119
175
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
270
326
  */
271
327
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
272
328
 
273
- export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
329
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.js CHANGED
@@ -236,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
236
236
  ]
237
237
  };
238
238
  }
239
+ function totalHistoryTokens(history) {
240
+ let total = 0;
241
+ for (const m of history) {
242
+ if (typeof m.content === "string") total += countTokens(m.content);
243
+ }
244
+ return total;
245
+ }
239
246
  function passCompressHistory(ir, opts = {}) {
240
247
  const history = ir.history;
241
- if (!history || history.length === 0) return { value: ir, mutations: [] };
248
+ if (!history || history.length === 0) {
249
+ return { value: ir, mutations: [], historyTokensTotal: 0 };
250
+ }
242
251
  const keepRecent = opts.keepRecent ?? 4;
243
252
  const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
244
- if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
253
+ const summarizeAboveTokens = opts.summarizeAboveTokens;
254
+ const historyTokensTotal = totalHistoryTokens(history);
255
+ const countThresholdHit = history.length > summarizeOlderThan;
256
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
257
+ if (!countThresholdHit && !tokenThresholdHit) {
258
+ return { value: ir, mutations: [], historyTokensTotal };
259
+ }
245
260
  const cutIndex = history.length - keepRecent;
246
261
  const old = history.slice(0, cutIndex);
247
262
  const recent = history.slice(cutIndex);
248
263
  const userTurns = old.filter((m) => m.role === "user");
249
264
  const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
265
+ const oldTokens = totalHistoryTokens(old);
266
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
250
267
  const summary = {
251
268
  role: "system",
252
- content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
269
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
253
270
  };
254
271
  return {
255
272
  value: { ...ir, history: [summary, ...recent] },
@@ -258,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
258
275
  id: `compress-history-${old.length}`,
259
276
  source: "static_pass",
260
277
  passName: "compress_history",
261
- description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
278
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
262
279
  }
263
- ]
280
+ ],
281
+ historyTokensTotal
264
282
  };
265
283
  }
266
284
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -506,7 +524,11 @@ function lowerAnthropic(ir, profile, hints) {
506
524
  system: systemBlocks,
507
525
  messages,
508
526
  tools,
509
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
527
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
528
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
529
+ // Profile is the single source of truth; consumers wanting a tighter
530
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
531
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
510
532
  },
511
533
  diagnostics: {
512
534
  cacheableTokens,
@@ -1280,7 +1302,8 @@ function compile(ir, opts = {}) {
1280
1302
  threshold: opts.toolRelevanceThreshold
1281
1303
  });
1282
1304
  const compressed = passCompressHistory(toolFiltered.value, {
1283
- summarizeOlderThan: opts.compressHistoryAfter
1305
+ summarizeOlderThan: opts.compressHistoryAfter,
1306
+ summarizeAboveTokens: opts.compressHistoryAboveTokens
1284
1307
  });
1285
1308
  let workingIR = compressed.value;
1286
1309
  const accumulatedMutations = [
@@ -1324,7 +1347,8 @@ function compile(ir, opts = {}) {
1324
1347
  historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1325
1348
  cacheableTokens: lowered.diagnostics.cacheableTokens,
1326
1349
  estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1327
- historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
1350
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
1351
+ historyTokensTotal: compressed.historyTokensTotal
1328
1352
  };
1329
1353
  const advisories = runAdvisor(
1330
1354
  ir,
@@ -1431,7 +1455,8 @@ function registerCompile(appId, archetype, ir, result) {
1431
1455
  estimatedTokensIn: tokens,
1432
1456
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
1433
1457
  startedAt: Date.now(),
1434
- historyCacheableTokens: result.diagnostics.historyCacheableTokens
1458
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens,
1459
+ historyTokensTotal: result.diagnostics.historyTokensTotal
1435
1460
  });
1436
1461
  }
1437
1462
  async function record(input) {
@@ -1505,7 +1530,8 @@ function buildPayload(input, reg) {
1505
1530
  cache_creation_input_tokens: input.cacheCreationInputTokens,
1506
1531
  cost_usd_actual: costUsdActual,
1507
1532
  ttft_ms: input.ttftMs,
1508
- history_cacheable_tokens: reg?.historyCacheableTokens
1533
+ history_cacheable_tokens: reg?.historyCacheableTokens,
1534
+ history_tokens_at_compile: reg?.historyTokensTotal
1509
1535
  };
1510
1536
  }
1511
1537
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1784,31 +1810,31 @@ async function call(ir, opts = {}) {
1784
1810
  fetchImpl: opts.fetchImpl,
1785
1811
  providerOverrides: opts.providerOverrides
1786
1812
  });
1787
- if (exec.ok) {
1813
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
1814
+ if (validated.ok) {
1788
1815
  attempts.push({ model: targetModel, status: "success" });
1789
1816
  const latencyMs2 = Date.now() - start;
1790
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1791
1817
  await record({
1792
1818
  handle: initial.handle,
1793
- tokensIn: responseWithStructured.tokens.input,
1794
- tokensOut: responseWithStructured.tokens.output,
1819
+ tokensIn: validated.response.tokens.input,
1820
+ tokensOut: validated.response.tokens.output,
1795
1821
  latencyMs: latencyMs2,
1796
1822
  success: true,
1797
- emptyResponse: responseWithStructured.tokens.output === 0,
1798
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1823
+ emptyResponse: validated.response.tokens.output === 0,
1824
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1799
1825
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1800
1826
  mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1801
1827
  promptPreview: extractPromptPreview(ir),
1802
- responsePreview: responseWithStructured.text.slice(0, 200),
1803
- cacheReadInputTokens: responseWithStructured.tokens.cached,
1804
- cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1828
+ responsePreview: validated.response.text.slice(0, 200),
1829
+ cacheReadInputTokens: validated.response.tokens.cached,
1830
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated
1805
1831
  });
1806
1832
  return {
1807
1833
  handle: initial.handle,
1808
1834
  actualModel: targetModel,
1809
1835
  requestedModel: initial.target,
1810
1836
  provider: activeCompile.provider,
1811
- response: responseWithStructured,
1837
+ response: validated.response,
1812
1838
  latencyMs: latencyMs2,
1813
1839
  mutationsApplied: activeCompile.mutationsApplied,
1814
1840
  attempts
@@ -1816,12 +1842,12 @@ async function call(ir, opts = {}) {
1816
1842
  }
1817
1843
  attempts.push({
1818
1844
  model: targetModel,
1819
- status: exec.errorType,
1820
- errorCode: exec.errorCode,
1821
- message: exec.message
1845
+ status: validated.errorType,
1846
+ errorCode: validated.errorCode,
1847
+ message: validated.message
1822
1848
  });
1823
- lastErr = exec;
1824
- if (exec.errorType === "terminal" || opts.noFallback) {
1849
+ lastErr = validated;
1850
+ if (validated.errorType === "terminal" || opts.noFallback) {
1825
1851
  break;
1826
1852
  }
1827
1853
  }
@@ -1858,17 +1884,35 @@ function extractPromptPreview(ir) {
1858
1884
  if (lastHist) return lastHist.slice(0, 200);
1859
1885
  return void 0;
1860
1886
  }
1861
- function withStructuredOutput(response, ir) {
1862
- if (!ir.constraints?.structuredOutput) return response;
1863
- if (!response.text) return response;
1887
+ function validateStructuredContract(exec, ir) {
1888
+ if (!ir.constraints?.structuredOutput) {
1889
+ return { ok: true, response: exec.response };
1890
+ }
1891
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
1892
+ if (finish === "max_tokens" || finish === "length") {
1893
+ return {
1894
+ ok: false,
1895
+ status: exec.status,
1896
+ errorType: "retryable",
1897
+ errorCode: "max_tokens_on_structured_output",
1898
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
1899
+ raw: exec.response.raw
1900
+ };
1901
+ }
1902
+ if (!exec.response.text) {
1903
+ return { ok: true, response: exec.response };
1904
+ }
1864
1905
  try {
1865
- const parsed = JSON.parse(response.text);
1866
- return { ...response, structuredOutput: parsed };
1906
+ const parsed = JSON.parse(exec.response.text);
1907
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1867
1908
  } catch (err) {
1868
1909
  return {
1869
- ...response,
1870
- structuredOutput: null,
1871
- parseError: err instanceof Error ? err.message : String(err)
1910
+ ok: false,
1911
+ status: exec.status,
1912
+ errorType: "retryable",
1913
+ errorCode: "structured_output_parse_failed",
1914
+ message: err instanceof Error ? err.message : String(err),
1915
+ raw: exec.response.raw
1872
1916
  };
1873
1917
  }
1874
1918
  }
package/dist/index.mjs CHANGED
@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
120
120
  ]
121
121
  };
122
122
  }
123
+ function totalHistoryTokens(history) {
124
+ let total = 0;
125
+ for (const m of history) {
126
+ if (typeof m.content === "string") total += countTokens(m.content);
127
+ }
128
+ return total;
129
+ }
123
130
  function passCompressHistory(ir, opts = {}) {
124
131
  const history = ir.history;
125
- if (!history || history.length === 0) return { value: ir, mutations: [] };
132
+ if (!history || history.length === 0) {
133
+ return { value: ir, mutations: [], historyTokensTotal: 0 };
134
+ }
126
135
  const keepRecent = opts.keepRecent ?? 4;
127
136
  const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
128
- if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
137
+ const summarizeAboveTokens = opts.summarizeAboveTokens;
138
+ const historyTokensTotal = totalHistoryTokens(history);
139
+ const countThresholdHit = history.length > summarizeOlderThan;
140
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
141
+ if (!countThresholdHit && !tokenThresholdHit) {
142
+ return { value: ir, mutations: [], historyTokensTotal };
143
+ }
129
144
  const cutIndex = history.length - keepRecent;
130
145
  const old = history.slice(0, cutIndex);
131
146
  const recent = history.slice(cutIndex);
132
147
  const userTurns = old.filter((m) => m.role === "user");
133
148
  const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
149
+ const oldTokens = totalHistoryTokens(old);
150
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
134
151
  const summary = {
135
152
  role: "system",
136
- content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
153
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
137
154
  };
138
155
  return {
139
156
  value: { ...ir, history: [summary, ...recent] },
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
142
159
  id: `compress-history-${old.length}`,
143
160
  source: "static_pass",
144
161
  passName: "compress_history",
145
- description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
162
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
146
163
  }
147
- ]
164
+ ],
165
+ historyTokensTotal
148
166
  };
149
167
  }
150
168
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -390,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
390
408
  system: systemBlocks,
391
409
  messages,
392
410
  tools,
393
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
411
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
412
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
413
+ // Profile is the single source of truth; consumers wanting a tighter
414
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
415
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
394
416
  },
395
417
  diagnostics: {
396
418
  cacheableTokens,
@@ -762,7 +784,8 @@ function compile(ir, opts = {}) {
762
784
  threshold: opts.toolRelevanceThreshold
763
785
  });
764
786
  const compressed = passCompressHistory(toolFiltered.value, {
765
- summarizeOlderThan: opts.compressHistoryAfter
787
+ summarizeOlderThan: opts.compressHistoryAfter,
788
+ summarizeAboveTokens: opts.compressHistoryAboveTokens
766
789
  });
767
790
  let workingIR = compressed.value;
768
791
  const accumulatedMutations = [
@@ -806,7 +829,8 @@ function compile(ir, opts = {}) {
806
829
  historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
807
830
  cacheableTokens: lowered.diagnostics.cacheableTokens,
808
831
  estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
809
- historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
832
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
833
+ historyTokensTotal: compressed.historyTokensTotal
810
834
  };
811
835
  const advisories = runAdvisor(
812
836
  ir,
@@ -913,7 +937,8 @@ function registerCompile(appId, archetype, ir, result) {
913
937
  estimatedTokensIn: tokens,
914
938
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
915
939
  startedAt: Date.now(),
916
- historyCacheableTokens: result.diagnostics.historyCacheableTokens
940
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens,
941
+ historyTokensTotal: result.diagnostics.historyTokensTotal
917
942
  });
918
943
  }
919
944
  async function record(input) {
@@ -987,7 +1012,8 @@ function buildPayload(input, reg) {
987
1012
  cache_creation_input_tokens: input.cacheCreationInputTokens,
988
1013
  cost_usd_actual: costUsdActual,
989
1014
  ttft_ms: input.ttftMs,
990
- history_cacheable_tokens: reg?.historyCacheableTokens
1015
+ history_cacheable_tokens: reg?.historyCacheableTokens,
1016
+ history_tokens_at_compile: reg?.historyTokensTotal
991
1017
  };
992
1018
  }
993
1019
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1266,31 +1292,31 @@ async function call(ir, opts = {}) {
1266
1292
  fetchImpl: opts.fetchImpl,
1267
1293
  providerOverrides: opts.providerOverrides
1268
1294
  });
1269
- if (exec.ok) {
1295
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
1296
+ if (validated.ok) {
1270
1297
  attempts.push({ model: targetModel, status: "success" });
1271
1298
  const latencyMs2 = Date.now() - start;
1272
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1273
1299
  await record({
1274
1300
  handle: initial.handle,
1275
- tokensIn: responseWithStructured.tokens.input,
1276
- tokensOut: responseWithStructured.tokens.output,
1301
+ tokensIn: validated.response.tokens.input,
1302
+ tokensOut: validated.response.tokens.output,
1277
1303
  latencyMs: latencyMs2,
1278
1304
  success: true,
1279
- emptyResponse: responseWithStructured.tokens.output === 0,
1280
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1305
+ emptyResponse: validated.response.tokens.output === 0,
1306
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1281
1307
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1282
1308
  mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1283
1309
  promptPreview: extractPromptPreview(ir),
1284
- responsePreview: responseWithStructured.text.slice(0, 200),
1285
- cacheReadInputTokens: responseWithStructured.tokens.cached,
1286
- cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1310
+ responsePreview: validated.response.text.slice(0, 200),
1311
+ cacheReadInputTokens: validated.response.tokens.cached,
1312
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated
1287
1313
  });
1288
1314
  return {
1289
1315
  handle: initial.handle,
1290
1316
  actualModel: targetModel,
1291
1317
  requestedModel: initial.target,
1292
1318
  provider: activeCompile.provider,
1293
- response: responseWithStructured,
1319
+ response: validated.response,
1294
1320
  latencyMs: latencyMs2,
1295
1321
  mutationsApplied: activeCompile.mutationsApplied,
1296
1322
  attempts
@@ -1298,12 +1324,12 @@ async function call(ir, opts = {}) {
1298
1324
  }
1299
1325
  attempts.push({
1300
1326
  model: targetModel,
1301
- status: exec.errorType,
1302
- errorCode: exec.errorCode,
1303
- message: exec.message
1327
+ status: validated.errorType,
1328
+ errorCode: validated.errorCode,
1329
+ message: validated.message
1304
1330
  });
1305
- lastErr = exec;
1306
- if (exec.errorType === "terminal" || opts.noFallback) {
1331
+ lastErr = validated;
1332
+ if (validated.errorType === "terminal" || opts.noFallback) {
1307
1333
  break;
1308
1334
  }
1309
1335
  }
@@ -1340,17 +1366,35 @@ function extractPromptPreview(ir) {
1340
1366
  if (lastHist) return lastHist.slice(0, 200);
1341
1367
  return void 0;
1342
1368
  }
1343
- function withStructuredOutput(response, ir) {
1344
- if (!ir.constraints?.structuredOutput) return response;
1345
- if (!response.text) return response;
1369
+ function validateStructuredContract(exec, ir) {
1370
+ if (!ir.constraints?.structuredOutput) {
1371
+ return { ok: true, response: exec.response };
1372
+ }
1373
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
1374
+ if (finish === "max_tokens" || finish === "length") {
1375
+ return {
1376
+ ok: false,
1377
+ status: exec.status,
1378
+ errorType: "retryable",
1379
+ errorCode: "max_tokens_on_structured_output",
1380
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
1381
+ raw: exec.response.raw
1382
+ };
1383
+ }
1384
+ if (!exec.response.text) {
1385
+ return { ok: true, response: exec.response };
1386
+ }
1346
1387
  try {
1347
- const parsed = JSON.parse(response.text);
1348
- return { ...response, structuredOutput: parsed };
1388
+ const parsed = JSON.parse(exec.response.text);
1389
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1349
1390
  } catch (err) {
1350
1391
  return {
1351
- ...response,
1352
- structuredOutput: null,
1353
- parseError: err instanceof Error ? err.message : String(err)
1392
+ ok: false,
1393
+ status: exec.status,
1394
+ errorType: "retryable",
1395
+ errorCode: "structured_output_parse_failed",
1396
+ message: err instanceof Error ? err.message : String(err),
1397
+ raw: exec.response.raw
1354
1398
  };
1355
1399
  }
1356
1400
  }
@@ -330,6 +330,14 @@ interface CompileResult {
330
330
  * from history caching. alpha.5.
331
331
  */
332
332
  historyCacheableTokens: number;
333
+ /**
334
+ * Total tokens in input `history` (pre-compression). Computed regardless
335
+ * of whether `passCompressHistory` fired — surfaces how close a tuple is
336
+ * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
337
+ * watchers can see the bloat axis the count-based threshold misses.
338
+ * 0 when history is empty. alpha.7.
339
+ */
340
+ historyTokensTotal: number;
333
341
  };
334
342
  }
335
343
  /**
@@ -330,6 +330,14 @@ interface CompileResult {
330
330
  * from history caching. alpha.5.
331
331
  */
332
332
  historyCacheableTokens: number;
333
+ /**
334
+ * Total tokens in input `history` (pre-compression). Computed regardless
335
+ * of whether `passCompressHistory` fired — surfaces how close a tuple is
336
+ * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
337
+ * watchers can see the bloat axis the count-based threshold misses.
338
+ * 0 when history is empty. alpha.7.
339
+ */
340
+ historyTokensTotal: number;
333
341
  };
334
342
  }
335
343
  /**
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
2
2
  import './dialect.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.6",
3
+ "version": "2.0.0-alpha.8",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",