@warmdrift/kgauto-compiler 2.0.0-alpha.7 → 2.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +46 -24
- package/dist/index.mjs +46 -24
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -524,7 +524,11 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
524
524
|
system: systemBlocks,
|
|
525
525
|
messages,
|
|
526
526
|
tools,
|
|
527
|
-
|
|
527
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
528
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
529
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
530
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
531
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
528
532
|
},
|
|
529
533
|
diagnostics: {
|
|
530
534
|
cacheableTokens,
|
|
@@ -1806,31 +1810,31 @@ async function call(ir, opts = {}) {
|
|
|
1806
1810
|
fetchImpl: opts.fetchImpl,
|
|
1807
1811
|
providerOverrides: opts.providerOverrides
|
|
1808
1812
|
});
|
|
1809
|
-
|
|
1813
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
1814
|
+
if (validated.ok) {
|
|
1810
1815
|
attempts.push({ model: targetModel, status: "success" });
|
|
1811
1816
|
const latencyMs2 = Date.now() - start;
|
|
1812
|
-
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1813
1817
|
await record({
|
|
1814
1818
|
handle: initial.handle,
|
|
1815
|
-
tokensIn:
|
|
1816
|
-
tokensOut:
|
|
1819
|
+
tokensIn: validated.response.tokens.input,
|
|
1820
|
+
tokensOut: validated.response.tokens.output,
|
|
1817
1821
|
latencyMs: latencyMs2,
|
|
1818
1822
|
success: true,
|
|
1819
|
-
emptyResponse:
|
|
1820
|
-
toolsCalled:
|
|
1823
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
1824
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1821
1825
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1822
1826
|
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1823
1827
|
promptPreview: extractPromptPreview(ir),
|
|
1824
|
-
responsePreview:
|
|
1825
|
-
cacheReadInputTokens:
|
|
1826
|
-
cacheCreationInputTokens:
|
|
1828
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
1829
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
1830
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
1827
1831
|
});
|
|
1828
1832
|
return {
|
|
1829
1833
|
handle: initial.handle,
|
|
1830
1834
|
actualModel: targetModel,
|
|
1831
1835
|
requestedModel: initial.target,
|
|
1832
1836
|
provider: activeCompile.provider,
|
|
1833
|
-
response:
|
|
1837
|
+
response: validated.response,
|
|
1834
1838
|
latencyMs: latencyMs2,
|
|
1835
1839
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1836
1840
|
attempts
|
|
@@ -1838,12 +1842,12 @@ async function call(ir, opts = {}) {
|
|
|
1838
1842
|
}
|
|
1839
1843
|
attempts.push({
|
|
1840
1844
|
model: targetModel,
|
|
1841
|
-
status:
|
|
1842
|
-
errorCode:
|
|
1843
|
-
message:
|
|
1845
|
+
status: validated.errorType,
|
|
1846
|
+
errorCode: validated.errorCode,
|
|
1847
|
+
message: validated.message
|
|
1844
1848
|
});
|
|
1845
|
-
lastErr =
|
|
1846
|
-
if (
|
|
1849
|
+
lastErr = validated;
|
|
1850
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
1847
1851
|
break;
|
|
1848
1852
|
}
|
|
1849
1853
|
}
|
|
@@ -1880,17 +1884,35 @@ function extractPromptPreview(ir) {
|
|
|
1880
1884
|
if (lastHist) return lastHist.slice(0, 200);
|
|
1881
1885
|
return void 0;
|
|
1882
1886
|
}
|
|
1883
|
-
function
|
|
1884
|
-
if (!ir.constraints?.structuredOutput)
|
|
1885
|
-
|
|
1887
|
+
function validateStructuredContract(exec, ir) {
|
|
1888
|
+
if (!ir.constraints?.structuredOutput) {
|
|
1889
|
+
return { ok: true, response: exec.response };
|
|
1890
|
+
}
|
|
1891
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
1892
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
1893
|
+
return {
|
|
1894
|
+
ok: false,
|
|
1895
|
+
status: exec.status,
|
|
1896
|
+
errorType: "retryable",
|
|
1897
|
+
errorCode: "max_tokens_on_structured_output",
|
|
1898
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
1899
|
+
raw: exec.response.raw
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1902
|
+
if (!exec.response.text) {
|
|
1903
|
+
return { ok: true, response: exec.response };
|
|
1904
|
+
}
|
|
1886
1905
|
try {
|
|
1887
|
-
const parsed = JSON.parse(response.text);
|
|
1888
|
-
return { ...response, structuredOutput: parsed };
|
|
1906
|
+
const parsed = JSON.parse(exec.response.text);
|
|
1907
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1889
1908
|
} catch (err) {
|
|
1890
1909
|
return {
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1910
|
+
ok: false,
|
|
1911
|
+
status: exec.status,
|
|
1912
|
+
errorType: "retryable",
|
|
1913
|
+
errorCode: "structured_output_parse_failed",
|
|
1914
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1915
|
+
raw: exec.response.raw
|
|
1894
1916
|
};
|
|
1895
1917
|
}
|
|
1896
1918
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -408,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
408
408
|
system: systemBlocks,
|
|
409
409
|
messages,
|
|
410
410
|
tools,
|
|
411
|
-
|
|
411
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
412
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
413
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
414
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
415
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
412
416
|
},
|
|
413
417
|
diagnostics: {
|
|
414
418
|
cacheableTokens,
|
|
@@ -1288,31 +1292,31 @@ async function call(ir, opts = {}) {
|
|
|
1288
1292
|
fetchImpl: opts.fetchImpl,
|
|
1289
1293
|
providerOverrides: opts.providerOverrides
|
|
1290
1294
|
});
|
|
1291
|
-
|
|
1295
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
1296
|
+
if (validated.ok) {
|
|
1292
1297
|
attempts.push({ model: targetModel, status: "success" });
|
|
1293
1298
|
const latencyMs2 = Date.now() - start;
|
|
1294
|
-
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1295
1299
|
await record({
|
|
1296
1300
|
handle: initial.handle,
|
|
1297
|
-
tokensIn:
|
|
1298
|
-
tokensOut:
|
|
1301
|
+
tokensIn: validated.response.tokens.input,
|
|
1302
|
+
tokensOut: validated.response.tokens.output,
|
|
1299
1303
|
latencyMs: latencyMs2,
|
|
1300
1304
|
success: true,
|
|
1301
|
-
emptyResponse:
|
|
1302
|
-
toolsCalled:
|
|
1305
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
1306
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1303
1307
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1304
1308
|
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1305
1309
|
promptPreview: extractPromptPreview(ir),
|
|
1306
|
-
responsePreview:
|
|
1307
|
-
cacheReadInputTokens:
|
|
1308
|
-
cacheCreationInputTokens:
|
|
1310
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
1311
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
1312
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
1309
1313
|
});
|
|
1310
1314
|
return {
|
|
1311
1315
|
handle: initial.handle,
|
|
1312
1316
|
actualModel: targetModel,
|
|
1313
1317
|
requestedModel: initial.target,
|
|
1314
1318
|
provider: activeCompile.provider,
|
|
1315
|
-
response:
|
|
1319
|
+
response: validated.response,
|
|
1316
1320
|
latencyMs: latencyMs2,
|
|
1317
1321
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1318
1322
|
attempts
|
|
@@ -1320,12 +1324,12 @@ async function call(ir, opts = {}) {
|
|
|
1320
1324
|
}
|
|
1321
1325
|
attempts.push({
|
|
1322
1326
|
model: targetModel,
|
|
1323
|
-
status:
|
|
1324
|
-
errorCode:
|
|
1325
|
-
message:
|
|
1327
|
+
status: validated.errorType,
|
|
1328
|
+
errorCode: validated.errorCode,
|
|
1329
|
+
message: validated.message
|
|
1326
1330
|
});
|
|
1327
|
-
lastErr =
|
|
1328
|
-
if (
|
|
1331
|
+
lastErr = validated;
|
|
1332
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
1329
1333
|
break;
|
|
1330
1334
|
}
|
|
1331
1335
|
}
|
|
@@ -1362,17 +1366,35 @@ function extractPromptPreview(ir) {
|
|
|
1362
1366
|
if (lastHist) return lastHist.slice(0, 200);
|
|
1363
1367
|
return void 0;
|
|
1364
1368
|
}
|
|
1365
|
-
function
|
|
1366
|
-
if (!ir.constraints?.structuredOutput)
|
|
1367
|
-
|
|
1369
|
+
function validateStructuredContract(exec, ir) {
|
|
1370
|
+
if (!ir.constraints?.structuredOutput) {
|
|
1371
|
+
return { ok: true, response: exec.response };
|
|
1372
|
+
}
|
|
1373
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
1374
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
1375
|
+
return {
|
|
1376
|
+
ok: false,
|
|
1377
|
+
status: exec.status,
|
|
1378
|
+
errorType: "retryable",
|
|
1379
|
+
errorCode: "max_tokens_on_structured_output",
|
|
1380
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
1381
|
+
raw: exec.response.raw
|
|
1382
|
+
};
|
|
1383
|
+
}
|
|
1384
|
+
if (!exec.response.text) {
|
|
1385
|
+
return { ok: true, response: exec.response };
|
|
1386
|
+
}
|
|
1368
1387
|
try {
|
|
1369
|
-
const parsed = JSON.parse(response.text);
|
|
1370
|
-
return { ...response, structuredOutput: parsed };
|
|
1388
|
+
const parsed = JSON.parse(exec.response.text);
|
|
1389
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1371
1390
|
} catch (err) {
|
|
1372
1391
|
return {
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1392
|
+
ok: false,
|
|
1393
|
+
status: exec.status,
|
|
1394
|
+
errorType: "retryable",
|
|
1395
|
+
errorCode: "structured_output_parse_failed",
|
|
1396
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1397
|
+
raw: exec.response.raw
|
|
1376
1398
|
};
|
|
1377
1399
|
}
|
|
1378
1400
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.8",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|