@warmdrift/kgauto-compiler 2.0.0-alpha.7 → 2.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -524,7 +524,11 @@ function lowerAnthropic(ir, profile, hints) {
524
524
  system: systemBlocks,
525
525
  messages,
526
526
  tools,
527
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
527
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
528
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
529
+ // Profile is the single source of truth; consumers wanting a tighter
530
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
531
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
528
532
  },
529
533
  diagnostics: {
530
534
  cacheableTokens,
@@ -1806,31 +1810,31 @@ async function call(ir, opts = {}) {
1806
1810
  fetchImpl: opts.fetchImpl,
1807
1811
  providerOverrides: opts.providerOverrides
1808
1812
  });
1809
- if (exec.ok) {
1813
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
1814
+ if (validated.ok) {
1810
1815
  attempts.push({ model: targetModel, status: "success" });
1811
1816
  const latencyMs2 = Date.now() - start;
1812
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1813
1817
  await record({
1814
1818
  handle: initial.handle,
1815
- tokensIn: responseWithStructured.tokens.input,
1816
- tokensOut: responseWithStructured.tokens.output,
1819
+ tokensIn: validated.response.tokens.input,
1820
+ tokensOut: validated.response.tokens.output,
1817
1821
  latencyMs: latencyMs2,
1818
1822
  success: true,
1819
- emptyResponse: responseWithStructured.tokens.output === 0,
1820
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1823
+ emptyResponse: validated.response.tokens.output === 0,
1824
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1821
1825
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1822
1826
  mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1823
1827
  promptPreview: extractPromptPreview(ir),
1824
- responsePreview: responseWithStructured.text.slice(0, 200),
1825
- cacheReadInputTokens: responseWithStructured.tokens.cached,
1826
- cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1828
+ responsePreview: validated.response.text.slice(0, 200),
1829
+ cacheReadInputTokens: validated.response.tokens.cached,
1830
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated
1827
1831
  });
1828
1832
  return {
1829
1833
  handle: initial.handle,
1830
1834
  actualModel: targetModel,
1831
1835
  requestedModel: initial.target,
1832
1836
  provider: activeCompile.provider,
1833
- response: responseWithStructured,
1837
+ response: validated.response,
1834
1838
  latencyMs: latencyMs2,
1835
1839
  mutationsApplied: activeCompile.mutationsApplied,
1836
1840
  attempts
@@ -1838,12 +1842,12 @@ async function call(ir, opts = {}) {
1838
1842
  }
1839
1843
  attempts.push({
1840
1844
  model: targetModel,
1841
- status: exec.errorType,
1842
- errorCode: exec.errorCode,
1843
- message: exec.message
1845
+ status: validated.errorType,
1846
+ errorCode: validated.errorCode,
1847
+ message: validated.message
1844
1848
  });
1845
- lastErr = exec;
1846
- if (exec.errorType === "terminal" || opts.noFallback) {
1849
+ lastErr = validated;
1850
+ if (validated.errorType === "terminal" || opts.noFallback) {
1847
1851
  break;
1848
1852
  }
1849
1853
  }
@@ -1880,17 +1884,35 @@ function extractPromptPreview(ir) {
1880
1884
  if (lastHist) return lastHist.slice(0, 200);
1881
1885
  return void 0;
1882
1886
  }
1883
- function withStructuredOutput(response, ir) {
1884
- if (!ir.constraints?.structuredOutput) return response;
1885
- if (!response.text) return response;
1887
+ function validateStructuredContract(exec, ir) {
1888
+ if (!ir.constraints?.structuredOutput) {
1889
+ return { ok: true, response: exec.response };
1890
+ }
1891
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
1892
+ if (finish === "max_tokens" || finish === "length") {
1893
+ return {
1894
+ ok: false,
1895
+ status: exec.status,
1896
+ errorType: "retryable",
1897
+ errorCode: "max_tokens_on_structured_output",
1898
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
1899
+ raw: exec.response.raw
1900
+ };
1901
+ }
1902
+ if (!exec.response.text) {
1903
+ return { ok: true, response: exec.response };
1904
+ }
1886
1905
  try {
1887
- const parsed = JSON.parse(response.text);
1888
- return { ...response, structuredOutput: parsed };
1906
+ const parsed = JSON.parse(exec.response.text);
1907
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1889
1908
  } catch (err) {
1890
1909
  return {
1891
- ...response,
1892
- structuredOutput: null,
1893
- parseError: err instanceof Error ? err.message : String(err)
1910
+ ok: false,
1911
+ status: exec.status,
1912
+ errorType: "retryable",
1913
+ errorCode: "structured_output_parse_failed",
1914
+ message: err instanceof Error ? err.message : String(err),
1915
+ raw: exec.response.raw
1894
1916
  };
1895
1917
  }
1896
1918
  }
package/dist/index.mjs CHANGED
@@ -408,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
408
408
  system: systemBlocks,
409
409
  messages,
410
410
  tools,
411
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
411
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
412
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
413
+ // Profile is the single source of truth; consumers wanting a tighter
414
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
415
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
412
416
  },
413
417
  diagnostics: {
414
418
  cacheableTokens,
@@ -1288,31 +1292,31 @@ async function call(ir, opts = {}) {
1288
1292
  fetchImpl: opts.fetchImpl,
1289
1293
  providerOverrides: opts.providerOverrides
1290
1294
  });
1291
- if (exec.ok) {
1295
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
1296
+ if (validated.ok) {
1292
1297
  attempts.push({ model: targetModel, status: "success" });
1293
1298
  const latencyMs2 = Date.now() - start;
1294
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1295
1299
  await record({
1296
1300
  handle: initial.handle,
1297
- tokensIn: responseWithStructured.tokens.input,
1298
- tokensOut: responseWithStructured.tokens.output,
1301
+ tokensIn: validated.response.tokens.input,
1302
+ tokensOut: validated.response.tokens.output,
1299
1303
  latencyMs: latencyMs2,
1300
1304
  success: true,
1301
- emptyResponse: responseWithStructured.tokens.output === 0,
1302
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1305
+ emptyResponse: validated.response.tokens.output === 0,
1306
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1303
1307
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1304
1308
  mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1305
1309
  promptPreview: extractPromptPreview(ir),
1306
- responsePreview: responseWithStructured.text.slice(0, 200),
1307
- cacheReadInputTokens: responseWithStructured.tokens.cached,
1308
- cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1310
+ responsePreview: validated.response.text.slice(0, 200),
1311
+ cacheReadInputTokens: validated.response.tokens.cached,
1312
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated
1309
1313
  });
1310
1314
  return {
1311
1315
  handle: initial.handle,
1312
1316
  actualModel: targetModel,
1313
1317
  requestedModel: initial.target,
1314
1318
  provider: activeCompile.provider,
1315
- response: responseWithStructured,
1319
+ response: validated.response,
1316
1320
  latencyMs: latencyMs2,
1317
1321
  mutationsApplied: activeCompile.mutationsApplied,
1318
1322
  attempts
@@ -1320,12 +1324,12 @@ async function call(ir, opts = {}) {
1320
1324
  }
1321
1325
  attempts.push({
1322
1326
  model: targetModel,
1323
- status: exec.errorType,
1324
- errorCode: exec.errorCode,
1325
- message: exec.message
1327
+ status: validated.errorType,
1328
+ errorCode: validated.errorCode,
1329
+ message: validated.message
1326
1330
  });
1327
- lastErr = exec;
1328
- if (exec.errorType === "terminal" || opts.noFallback) {
1331
+ lastErr = validated;
1332
+ if (validated.errorType === "terminal" || opts.noFallback) {
1329
1333
  break;
1330
1334
  }
1331
1335
  }
@@ -1362,17 +1366,35 @@ function extractPromptPreview(ir) {
1362
1366
  if (lastHist) return lastHist.slice(0, 200);
1363
1367
  return void 0;
1364
1368
  }
1365
- function withStructuredOutput(response, ir) {
1366
- if (!ir.constraints?.structuredOutput) return response;
1367
- if (!response.text) return response;
1369
+ function validateStructuredContract(exec, ir) {
1370
+ if (!ir.constraints?.structuredOutput) {
1371
+ return { ok: true, response: exec.response };
1372
+ }
1373
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
1374
+ if (finish === "max_tokens" || finish === "length") {
1375
+ return {
1376
+ ok: false,
1377
+ status: exec.status,
1378
+ errorType: "retryable",
1379
+ errorCode: "max_tokens_on_structured_output",
1380
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
1381
+ raw: exec.response.raw
1382
+ };
1383
+ }
1384
+ if (!exec.response.text) {
1385
+ return { ok: true, response: exec.response };
1386
+ }
1368
1387
  try {
1369
- const parsed = JSON.parse(response.text);
1370
- return { ...response, structuredOutput: parsed };
1388
+ const parsed = JSON.parse(exec.response.text);
1389
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1371
1390
  } catch (err) {
1372
1391
  return {
1373
- ...response,
1374
- structuredOutput: null,
1375
- parseError: err instanceof Error ? err.message : String(err)
1392
+ ok: false,
1393
+ status: exec.status,
1394
+ errorType: "retryable",
1395
+ errorCode: "structured_output_parse_failed",
1396
+ message: err instanceof Error ? err.message : String(err),
1397
+ raw: exec.response.raw
1376
1398
  };
1377
1399
  }
1378
1400
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.7",
3
+ "version": "2.0.0-alpha.8",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",