@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +116 -15
- package/dist/index.mjs +116 -15
- package/dist/{profiles-BiyrF36f.d.mts → profiles-DHdCRBVH.d.mts} +82 -0
- package/dist/{profiles-C5lVqF8_.d.ts → profiles-MGq5Tnjv.d.ts} +82 -0
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @warmdrift/kgauto-compiler — v2.0.0-alpha.
|
|
1
|
+
# @warmdrift/kgauto-compiler — v2.0.0-alpha.5
|
|
2
2
|
|
|
3
3
|
> Prompt compiler + central learning brain for multi-model AI apps.
|
|
4
4
|
> **Swap models without rewriting prompts.**
|
|
@@ -18,8 +18,8 @@ mutations.
|
|
|
18
18
|
- **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
|
|
19
19
|
the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
|
|
20
20
|
v1 is fully retired from production.
|
|
21
|
-
- **Tests:**
|
|
22
|
-
- **Build:** clean (
|
|
21
|
+
- **Tests:** 180/180 passing
|
|
22
|
+
- **Build:** clean (47KB ESM, 64KB CJS)
|
|
23
23
|
- **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
|
|
24
24
|
awaiting dedicated Supabase provisioning.
|
|
25
25
|
- **Mutation engine:** v2.1 (after enough outcome data accumulates).
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.js
CHANGED
|
@@ -489,10 +489,15 @@ function lower(ir, profile, hints = {}) {
|
|
|
489
489
|
}
|
|
490
490
|
function lowerAnthropic(ir, profile, hints) {
|
|
491
491
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
492
|
-
const
|
|
492
|
+
const history = ir.history ?? [];
|
|
493
|
+
const policy = ir.historyCachePolicy;
|
|
494
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
495
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
493
496
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
494
497
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
495
|
-
const
|
|
498
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
499
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
500
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
496
501
|
return {
|
|
497
502
|
request: {
|
|
498
503
|
provider: "anthropic",
|
|
@@ -504,6 +509,7 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
504
509
|
},
|
|
505
510
|
diagnostics: {
|
|
506
511
|
cacheableTokens,
|
|
512
|
+
historyCacheableTokens,
|
|
507
513
|
estimatedCacheSavingsUsd: cacheSavings
|
|
508
514
|
}
|
|
509
515
|
};
|
|
@@ -536,17 +542,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
536
542
|
}
|
|
537
543
|
return blocks;
|
|
538
544
|
}
|
|
539
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
545
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
540
546
|
const out = [];
|
|
541
|
-
for (
|
|
547
|
+
for (let i = 0; i < history.length; i++) {
|
|
548
|
+
const m = history[i];
|
|
542
549
|
if (m.role === "system") continue;
|
|
543
|
-
|
|
550
|
+
const shouldMark = i === markIndex;
|
|
551
|
+
out.push({
|
|
552
|
+
role: m.role,
|
|
553
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
554
|
+
});
|
|
544
555
|
}
|
|
545
556
|
if (currentTurn && currentTurn.role !== "system") {
|
|
546
557
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
547
558
|
}
|
|
548
559
|
return out;
|
|
549
560
|
}
|
|
561
|
+
function attachAnthropicCacheControl(m) {
|
|
562
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
563
|
+
const blocks = m.parts;
|
|
564
|
+
const last = blocks[blocks.length - 1];
|
|
565
|
+
const withMarker = {
|
|
566
|
+
...last,
|
|
567
|
+
cache_control: { type: "ephemeral" }
|
|
568
|
+
};
|
|
569
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
570
|
+
}
|
|
571
|
+
return [
|
|
572
|
+
{
|
|
573
|
+
type: "text",
|
|
574
|
+
text: m.content,
|
|
575
|
+
cache_control: { type: "ephemeral" }
|
|
576
|
+
}
|
|
577
|
+
];
|
|
578
|
+
}
|
|
579
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
580
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
581
|
+
if (historyLen === 0) return -1;
|
|
582
|
+
if (policy.strategy === "all-but-latest") {
|
|
583
|
+
return historyLen - 1;
|
|
584
|
+
}
|
|
585
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
586
|
+
return idx >= 0 ? idx : -1;
|
|
587
|
+
}
|
|
588
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
589
|
+
let total = 0;
|
|
590
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
591
|
+
const m = history[i];
|
|
592
|
+
if (m.role === "system") continue;
|
|
593
|
+
if (Array.isArray(m.parts)) {
|
|
594
|
+
for (const p of m.parts) {
|
|
595
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
596
|
+
}
|
|
597
|
+
} else if (typeof m.content === "string") {
|
|
598
|
+
total += countTokens(m.content);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
return total;
|
|
602
|
+
}
|
|
550
603
|
function toAnthropicTools(tools) {
|
|
551
604
|
return tools.map((t) => ({
|
|
552
605
|
name: t.name,
|
|
@@ -581,6 +634,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
581
634
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
582
635
|
const meetsMin = cacheableTokens >= minTokens;
|
|
583
636
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
637
|
+
const history = ir.history ?? [];
|
|
638
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
639
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
584
640
|
return {
|
|
585
641
|
request: {
|
|
586
642
|
provider: "google",
|
|
@@ -592,6 +648,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
592
648
|
},
|
|
593
649
|
diagnostics: {
|
|
594
650
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
651
|
+
historyCacheableTokens,
|
|
595
652
|
estimatedCacheSavingsUsd: cacheSavings
|
|
596
653
|
}
|
|
597
654
|
};
|
|
@@ -639,6 +696,9 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
639
696
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
640
697
|
});
|
|
641
698
|
}
|
|
699
|
+
const history = ir.history ?? [];
|
|
700
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
701
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
642
702
|
return {
|
|
643
703
|
request: {
|
|
644
704
|
provider: "openai",
|
|
@@ -648,7 +708,11 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
648
708
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
649
709
|
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
650
710
|
},
|
|
651
|
-
diagnostics: {
|
|
711
|
+
diagnostics: {
|
|
712
|
+
cacheableTokens: 0,
|
|
713
|
+
historyCacheableTokens,
|
|
714
|
+
estimatedCacheSavingsUsd: 0
|
|
715
|
+
}
|
|
652
716
|
};
|
|
653
717
|
}
|
|
654
718
|
function toOpenAITools(tools) {
|
|
@@ -675,6 +739,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
675
739
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
676
740
|
});
|
|
677
741
|
}
|
|
742
|
+
const history = ir.history ?? [];
|
|
743
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
744
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
678
745
|
return {
|
|
679
746
|
request: {
|
|
680
747
|
provider: "deepseek",
|
|
@@ -689,7 +756,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
689
756
|
}
|
|
690
757
|
})) : void 0
|
|
691
758
|
},
|
|
692
|
-
diagnostics: {
|
|
759
|
+
diagnostics: {
|
|
760
|
+
cacheableTokens: 0,
|
|
761
|
+
historyCacheableTokens,
|
|
762
|
+
estimatedCacheSavingsUsd: 0
|
|
763
|
+
}
|
|
693
764
|
};
|
|
694
765
|
}
|
|
695
766
|
function sortSections(sections) {
|
|
@@ -1181,7 +1252,8 @@ function compile(ir, opts = {}) {
|
|
|
1181
1252
|
historyKept: workingIR.history?.length ?? 0,
|
|
1182
1253
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1183
1254
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1184
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
1255
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1256
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
1185
1257
|
}
|
|
1186
1258
|
};
|
|
1187
1259
|
}
|
|
@@ -1266,7 +1338,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1266
1338
|
learningKey: learningKey(archetype, result.target, shape),
|
|
1267
1339
|
estimatedTokensIn: tokens,
|
|
1268
1340
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1269
|
-
startedAt: Date.now()
|
|
1341
|
+
startedAt: Date.now(),
|
|
1342
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
1270
1343
|
});
|
|
1271
1344
|
}
|
|
1272
1345
|
async function record(input) {
|
|
@@ -1309,6 +1382,9 @@ function buildPayload(input, reg) {
|
|
|
1309
1382
|
const compileTarget = reg?.model;
|
|
1310
1383
|
const actual = input.actualModel ?? compileTarget;
|
|
1311
1384
|
const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
|
|
1385
|
+
const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
|
|
1386
|
+
const costModel = actual;
|
|
1387
|
+
const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
|
|
1312
1388
|
return {
|
|
1313
1389
|
handle: input.handle,
|
|
1314
1390
|
app_id: reg?.appId,
|
|
@@ -1318,7 +1394,7 @@ function buildPayload(input, reg) {
|
|
|
1318
1394
|
provider: reg?.provider,
|
|
1319
1395
|
shape_key: reg?.shapeKey,
|
|
1320
1396
|
learning_key: reg?.learningKey,
|
|
1321
|
-
mutations_applied:
|
|
1397
|
+
mutations_applied: mutationsApplied,
|
|
1322
1398
|
tokens_in: input.tokensIn,
|
|
1323
1399
|
tokens_out: input.tokensOut,
|
|
1324
1400
|
estimated_tokens_in: reg?.estimatedTokensIn,
|
|
@@ -1332,9 +1408,22 @@ function buildPayload(input, reg) {
|
|
|
1332
1408
|
oracle_rationale: input.oracleScore?.rationale,
|
|
1333
1409
|
prompt_preview: input.promptPreview,
|
|
1334
1410
|
response_preview: input.responsePreview,
|
|
1335
|
-
dialect_version: "v1"
|
|
1411
|
+
dialect_version: "v1",
|
|
1412
|
+
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
1413
|
+
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1414
|
+
cost_usd_actual: costUsdActual,
|
|
1415
|
+
ttft_ms: input.ttftMs,
|
|
1416
|
+
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1336
1417
|
};
|
|
1337
1418
|
}
|
|
1419
|
+
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
1420
|
+
if (tokensIn === 0 && tokensOut === 0) return void 0;
|
|
1421
|
+
const profile = tryGetProfile(modelId);
|
|
1422
|
+
if (!profile) return void 0;
|
|
1423
|
+
const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
|
|
1424
|
+
const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
|
|
1425
|
+
return Math.round((inUsd + outUsd) * 1e6) / 1e6;
|
|
1426
|
+
}
|
|
1338
1427
|
|
|
1339
1428
|
// src/ir.ts
|
|
1340
1429
|
var CallError = class extends Error {
|
|
@@ -1607,7 +1696,7 @@ async function call(ir, opts = {}) {
|
|
|
1607
1696
|
attempts.push({ model: targetModel, status: "success" });
|
|
1608
1697
|
const latencyMs2 = Date.now() - start;
|
|
1609
1698
|
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1610
|
-
|
|
1699
|
+
await record({
|
|
1611
1700
|
handle: initial.handle,
|
|
1612
1701
|
tokensIn: responseWithStructured.tokens.input,
|
|
1613
1702
|
tokensOut: responseWithStructured.tokens.output,
|
|
@@ -1616,7 +1705,11 @@ async function call(ir, opts = {}) {
|
|
|
1616
1705
|
emptyResponse: responseWithStructured.tokens.output === 0,
|
|
1617
1706
|
toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
|
|
1618
1707
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1619
|
-
|
|
1708
|
+
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1709
|
+
promptPreview: extractPromptPreview(ir),
|
|
1710
|
+
responsePreview: responseWithStructured.text.slice(0, 200),
|
|
1711
|
+
cacheReadInputTokens: responseWithStructured.tokens.cached,
|
|
1712
|
+
cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
|
|
1620
1713
|
});
|
|
1621
1714
|
return {
|
|
1622
1715
|
handle: initial.handle,
|
|
@@ -1641,13 +1734,14 @@ async function call(ir, opts = {}) {
|
|
|
1641
1734
|
}
|
|
1642
1735
|
}
|
|
1643
1736
|
const latencyMs = Date.now() - start;
|
|
1644
|
-
|
|
1737
|
+
await record({
|
|
1645
1738
|
handle: initial.handle,
|
|
1646
1739
|
tokensIn: 0,
|
|
1647
1740
|
tokensOut: 0,
|
|
1648
1741
|
latencyMs,
|
|
1649
1742
|
success: false,
|
|
1650
|
-
errorType: lastErr?.errorCode
|
|
1743
|
+
errorType: lastErr?.errorCode,
|
|
1744
|
+
promptPreview: extractPromptPreview(ir)
|
|
1651
1745
|
});
|
|
1652
1746
|
throw new CallError(
|
|
1653
1747
|
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
|
|
@@ -1665,6 +1759,13 @@ function compileAndRegister(ir, opts) {
|
|
|
1665
1759
|
registerCompile(ir.appId, ir.intent.archetype, ir, result);
|
|
1666
1760
|
return result;
|
|
1667
1761
|
}
|
|
1762
|
+
function extractPromptPreview(ir) {
|
|
1763
|
+
const turn = ir.currentTurn?.content;
|
|
1764
|
+
if (turn) return turn.slice(0, 200);
|
|
1765
|
+
const lastHist = ir.history?.[ir.history.length - 1]?.content;
|
|
1766
|
+
if (lastHist) return lastHist.slice(0, 200);
|
|
1767
|
+
return void 0;
|
|
1768
|
+
}
|
|
1668
1769
|
function withStructuredOutput(response, ir) {
|
|
1669
1770
|
if (!ir.constraints?.structuredOutput) return response;
|
|
1670
1771
|
if (!response.text) return response;
|
package/dist/index.mjs
CHANGED
|
@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
|
|
|
374
374
|
}
|
|
375
375
|
function lowerAnthropic(ir, profile, hints) {
|
|
376
376
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
377
|
-
const
|
|
377
|
+
const history = ir.history ?? [];
|
|
378
|
+
const policy = ir.historyCachePolicy;
|
|
379
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
380
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
378
381
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
379
382
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
380
|
-
const
|
|
383
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
384
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
385
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
381
386
|
return {
|
|
382
387
|
request: {
|
|
383
388
|
provider: "anthropic",
|
|
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
389
394
|
},
|
|
390
395
|
diagnostics: {
|
|
391
396
|
cacheableTokens,
|
|
397
|
+
historyCacheableTokens,
|
|
392
398
|
estimatedCacheSavingsUsd: cacheSavings
|
|
393
399
|
}
|
|
394
400
|
};
|
|
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
421
427
|
}
|
|
422
428
|
return blocks;
|
|
423
429
|
}
|
|
424
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
430
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
425
431
|
const out = [];
|
|
426
|
-
for (
|
|
432
|
+
for (let i = 0; i < history.length; i++) {
|
|
433
|
+
const m = history[i];
|
|
427
434
|
if (m.role === "system") continue;
|
|
428
|
-
|
|
435
|
+
const shouldMark = i === markIndex;
|
|
436
|
+
out.push({
|
|
437
|
+
role: m.role,
|
|
438
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
439
|
+
});
|
|
429
440
|
}
|
|
430
441
|
if (currentTurn && currentTurn.role !== "system") {
|
|
431
442
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
432
443
|
}
|
|
433
444
|
return out;
|
|
434
445
|
}
|
|
446
|
+
function attachAnthropicCacheControl(m) {
|
|
447
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
448
|
+
const blocks = m.parts;
|
|
449
|
+
const last = blocks[blocks.length - 1];
|
|
450
|
+
const withMarker = {
|
|
451
|
+
...last,
|
|
452
|
+
cache_control: { type: "ephemeral" }
|
|
453
|
+
};
|
|
454
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
455
|
+
}
|
|
456
|
+
return [
|
|
457
|
+
{
|
|
458
|
+
type: "text",
|
|
459
|
+
text: m.content,
|
|
460
|
+
cache_control: { type: "ephemeral" }
|
|
461
|
+
}
|
|
462
|
+
];
|
|
463
|
+
}
|
|
464
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
465
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
466
|
+
if (historyLen === 0) return -1;
|
|
467
|
+
if (policy.strategy === "all-but-latest") {
|
|
468
|
+
return historyLen - 1;
|
|
469
|
+
}
|
|
470
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
471
|
+
return idx >= 0 ? idx : -1;
|
|
472
|
+
}
|
|
473
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
474
|
+
let total = 0;
|
|
475
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
476
|
+
const m = history[i];
|
|
477
|
+
if (m.role === "system") continue;
|
|
478
|
+
if (Array.isArray(m.parts)) {
|
|
479
|
+
for (const p of m.parts) {
|
|
480
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
481
|
+
}
|
|
482
|
+
} else if (typeof m.content === "string") {
|
|
483
|
+
total += countTokens(m.content);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
return total;
|
|
487
|
+
}
|
|
435
488
|
function toAnthropicTools(tools) {
|
|
436
489
|
return tools.map((t) => ({
|
|
437
490
|
name: t.name,
|
|
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
466
519
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
467
520
|
const meetsMin = cacheableTokens >= minTokens;
|
|
468
521
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
522
|
+
const history = ir.history ?? [];
|
|
523
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
524
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
469
525
|
return {
|
|
470
526
|
request: {
|
|
471
527
|
provider: "google",
|
|
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
477
533
|
},
|
|
478
534
|
diagnostics: {
|
|
479
535
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
536
|
+
historyCacheableTokens,
|
|
480
537
|
estimatedCacheSavingsUsd: cacheSavings
|
|
481
538
|
}
|
|
482
539
|
};
|
|
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
524
581
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
525
582
|
});
|
|
526
583
|
}
|
|
584
|
+
const history = ir.history ?? [];
|
|
585
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
586
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
527
587
|
return {
|
|
528
588
|
request: {
|
|
529
589
|
provider: "openai",
|
|
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
533
593
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
534
594
|
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
535
595
|
},
|
|
536
|
-
diagnostics: {
|
|
596
|
+
diagnostics: {
|
|
597
|
+
cacheableTokens: 0,
|
|
598
|
+
historyCacheableTokens,
|
|
599
|
+
estimatedCacheSavingsUsd: 0
|
|
600
|
+
}
|
|
537
601
|
};
|
|
538
602
|
}
|
|
539
603
|
function toOpenAITools(tools) {
|
|
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
560
624
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
561
625
|
});
|
|
562
626
|
}
|
|
627
|
+
const history = ir.history ?? [];
|
|
628
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
629
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
563
630
|
return {
|
|
564
631
|
request: {
|
|
565
632
|
provider: "deepseek",
|
|
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
574
641
|
}
|
|
575
642
|
})) : void 0
|
|
576
643
|
},
|
|
577
|
-
diagnostics: {
|
|
644
|
+
diagnostics: {
|
|
645
|
+
cacheableTokens: 0,
|
|
646
|
+
historyCacheableTokens,
|
|
647
|
+
estimatedCacheSavingsUsd: 0
|
|
648
|
+
}
|
|
578
649
|
};
|
|
579
650
|
}
|
|
580
651
|
function sortSections(sections) {
|
|
@@ -664,7 +735,8 @@ function compile(ir, opts = {}) {
|
|
|
664
735
|
historyKept: workingIR.history?.length ?? 0,
|
|
665
736
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
666
737
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
667
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
738
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
739
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
668
740
|
}
|
|
669
741
|
};
|
|
670
742
|
}
|
|
@@ -749,7 +821,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
749
821
|
learningKey: learningKey(archetype, result.target, shape),
|
|
750
822
|
estimatedTokensIn: tokens,
|
|
751
823
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
752
|
-
startedAt: Date.now()
|
|
824
|
+
startedAt: Date.now(),
|
|
825
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
753
826
|
});
|
|
754
827
|
}
|
|
755
828
|
async function record(input) {
|
|
@@ -792,6 +865,9 @@ function buildPayload(input, reg) {
|
|
|
792
865
|
const compileTarget = reg?.model;
|
|
793
866
|
const actual = input.actualModel ?? compileTarget;
|
|
794
867
|
const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
|
|
868
|
+
const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
|
|
869
|
+
const costModel = actual;
|
|
870
|
+
const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
|
|
795
871
|
return {
|
|
796
872
|
handle: input.handle,
|
|
797
873
|
app_id: reg?.appId,
|
|
@@ -801,7 +877,7 @@ function buildPayload(input, reg) {
|
|
|
801
877
|
provider: reg?.provider,
|
|
802
878
|
shape_key: reg?.shapeKey,
|
|
803
879
|
learning_key: reg?.learningKey,
|
|
804
|
-
mutations_applied:
|
|
880
|
+
mutations_applied: mutationsApplied,
|
|
805
881
|
tokens_in: input.tokensIn,
|
|
806
882
|
tokens_out: input.tokensOut,
|
|
807
883
|
estimated_tokens_in: reg?.estimatedTokensIn,
|
|
@@ -815,9 +891,22 @@ function buildPayload(input, reg) {
|
|
|
815
891
|
oracle_rationale: input.oracleScore?.rationale,
|
|
816
892
|
prompt_preview: input.promptPreview,
|
|
817
893
|
response_preview: input.responsePreview,
|
|
818
|
-
dialect_version: "v1"
|
|
894
|
+
dialect_version: "v1",
|
|
895
|
+
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
896
|
+
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
897
|
+
cost_usd_actual: costUsdActual,
|
|
898
|
+
ttft_ms: input.ttftMs,
|
|
899
|
+
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
819
900
|
};
|
|
820
901
|
}
|
|
902
|
+
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
903
|
+
if (tokensIn === 0 && tokensOut === 0) return void 0;
|
|
904
|
+
const profile = tryGetProfile(modelId);
|
|
905
|
+
if (!profile) return void 0;
|
|
906
|
+
const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
|
|
907
|
+
const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
|
|
908
|
+
return Math.round((inUsd + outUsd) * 1e6) / 1e6;
|
|
909
|
+
}
|
|
821
910
|
|
|
822
911
|
// src/ir.ts
|
|
823
912
|
var CallError = class extends Error {
|
|
@@ -1090,7 +1179,7 @@ async function call(ir, opts = {}) {
|
|
|
1090
1179
|
attempts.push({ model: targetModel, status: "success" });
|
|
1091
1180
|
const latencyMs2 = Date.now() - start;
|
|
1092
1181
|
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1093
|
-
|
|
1182
|
+
await record({
|
|
1094
1183
|
handle: initial.handle,
|
|
1095
1184
|
tokensIn: responseWithStructured.tokens.input,
|
|
1096
1185
|
tokensOut: responseWithStructured.tokens.output,
|
|
@@ -1099,7 +1188,11 @@ async function call(ir, opts = {}) {
|
|
|
1099
1188
|
emptyResponse: responseWithStructured.tokens.output === 0,
|
|
1100
1189
|
toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
|
|
1101
1190
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1102
|
-
|
|
1191
|
+
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1192
|
+
promptPreview: extractPromptPreview(ir),
|
|
1193
|
+
responsePreview: responseWithStructured.text.slice(0, 200),
|
|
1194
|
+
cacheReadInputTokens: responseWithStructured.tokens.cached,
|
|
1195
|
+
cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
|
|
1103
1196
|
});
|
|
1104
1197
|
return {
|
|
1105
1198
|
handle: initial.handle,
|
|
@@ -1124,13 +1217,14 @@ async function call(ir, opts = {}) {
|
|
|
1124
1217
|
}
|
|
1125
1218
|
}
|
|
1126
1219
|
const latencyMs = Date.now() - start;
|
|
1127
|
-
|
|
1220
|
+
await record({
|
|
1128
1221
|
handle: initial.handle,
|
|
1129
1222
|
tokensIn: 0,
|
|
1130
1223
|
tokensOut: 0,
|
|
1131
1224
|
latencyMs,
|
|
1132
1225
|
success: false,
|
|
1133
|
-
errorType: lastErr?.errorCode
|
|
1226
|
+
errorType: lastErr?.errorCode,
|
|
1227
|
+
promptPreview: extractPromptPreview(ir)
|
|
1134
1228
|
});
|
|
1135
1229
|
throw new CallError(
|
|
1136
1230
|
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
|
|
@@ -1148,6 +1242,13 @@ function compileAndRegister(ir, opts) {
|
|
|
1148
1242
|
registerCompile(ir.appId, ir.intent.archetype, ir, result);
|
|
1149
1243
|
return result;
|
|
1150
1244
|
}
|
|
1245
|
+
function extractPromptPreview(ir) {
|
|
1246
|
+
const turn = ir.currentTurn?.content;
|
|
1247
|
+
if (turn) return turn.slice(0, 200);
|
|
1248
|
+
const lastHist = ir.history?.[ir.history.length - 1]?.content;
|
|
1249
|
+
if (lastHist) return lastHist.slice(0, 200);
|
|
1250
|
+
return void 0;
|
|
1251
|
+
}
|
|
1151
1252
|
function withStructuredOutput(response, ir) {
|
|
1152
1253
|
if (!ir.constraints?.structuredOutput) return response;
|
|
1153
1254
|
if (!response.text) return response;
|
|
@@ -91,6 +91,40 @@ interface Constraints {
|
|
|
91
91
|
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
92
|
forceModel?: string;
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
94
128
|
/**
|
|
95
129
|
* Consumer-declared policy for model selection. Lives outside the IR
|
|
96
130
|
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
@@ -146,6 +180,12 @@ interface PromptIR {
|
|
|
146
180
|
models: string[];
|
|
147
181
|
/** Compile constraints. */
|
|
148
182
|
constraints?: Constraints;
|
|
183
|
+
/**
|
|
184
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
185
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
186
|
+
* alpha.5.
|
|
187
|
+
*/
|
|
188
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
149
189
|
}
|
|
150
190
|
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
151
191
|
/**
|
|
@@ -240,6 +280,16 @@ interface CompileResult {
|
|
|
240
280
|
historyDropped: number;
|
|
241
281
|
cacheableTokens: number;
|
|
242
282
|
estimatedCacheSavingsUsd: number;
|
|
283
|
+
/**
|
|
284
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
285
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
286
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
287
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
288
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
289
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
290
|
+
* from history caching. alpha.5.
|
|
291
|
+
*/
|
|
292
|
+
historyCacheableTokens: number;
|
|
243
293
|
};
|
|
244
294
|
}
|
|
245
295
|
/**
|
|
@@ -386,6 +436,38 @@ interface RecordInput {
|
|
|
386
436
|
* the originally-requested model.
|
|
387
437
|
*/
|
|
388
438
|
actualModel?: string;
|
|
439
|
+
/**
|
|
440
|
+
* Override `mutations_applied` for this outcome. Set by `call()` when
|
|
441
|
+
* fallback fires — the served compile's mutations (which actually shaped
|
|
442
|
+
* the request that went on the wire) replace the initial compile's
|
|
443
|
+
* mutations (registered against the handle). Without this override, fallback
|
|
444
|
+
* traffic is attributed to the initial compile's mutations and the brain's
|
|
445
|
+
* mutation effectiveness stats become misleading.
|
|
446
|
+
*
|
|
447
|
+
* alpha.4: extends s11 truth-in-logging to mutations.
|
|
448
|
+
*/
|
|
449
|
+
mutationsApplied?: string[];
|
|
450
|
+
/**
|
|
451
|
+
* Cache read input tokens, when supported by the provider.
|
|
452
|
+
* - Anthropic: `usage.cache_read_input_tokens`
|
|
453
|
+
* - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
|
|
454
|
+
* - OpenAI: `usage.prompt_tokens_details.cached_tokens`
|
|
455
|
+
*
|
|
456
|
+
* Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
|
|
457
|
+
* `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
|
|
458
|
+
*/
|
|
459
|
+
cacheReadInputTokens?: number;
|
|
460
|
+
/**
|
|
461
|
+
* Cache creation input tokens (Anthropic-specific).
|
|
462
|
+
* `usage.cache_creation_input_tokens`. The first call that pays the 25%
|
|
463
|
+
* upcharge to write a cache marker; subsequent calls hit `cacheRead`.
|
|
464
|
+
*/
|
|
465
|
+
cacheCreationInputTokens?: number;
|
|
466
|
+
/**
|
|
467
|
+
* Time to first token (ms). Optional; populated when the provider/SDK
|
|
468
|
+
* surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
|
|
469
|
+
*/
|
|
470
|
+
ttftMs?: number;
|
|
389
471
|
}
|
|
390
472
|
|
|
391
473
|
/**
|
|
@@ -91,6 +91,40 @@ interface Constraints {
|
|
|
91
91
|
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
92
|
forceModel?: string;
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
94
128
|
/**
|
|
95
129
|
* Consumer-declared policy for model selection. Lives outside the IR
|
|
96
130
|
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
@@ -146,6 +180,12 @@ interface PromptIR {
|
|
|
146
180
|
models: string[];
|
|
147
181
|
/** Compile constraints. */
|
|
148
182
|
constraints?: Constraints;
|
|
183
|
+
/**
|
|
184
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
185
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
186
|
+
* alpha.5.
|
|
187
|
+
*/
|
|
188
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
149
189
|
}
|
|
150
190
|
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
151
191
|
/**
|
|
@@ -240,6 +280,16 @@ interface CompileResult {
|
|
|
240
280
|
historyDropped: number;
|
|
241
281
|
cacheableTokens: number;
|
|
242
282
|
estimatedCacheSavingsUsd: number;
|
|
283
|
+
/**
|
|
284
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
285
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
286
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
287
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
288
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
289
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
290
|
+
* from history caching. alpha.5.
|
|
291
|
+
*/
|
|
292
|
+
historyCacheableTokens: number;
|
|
243
293
|
};
|
|
244
294
|
}
|
|
245
295
|
/**
|
|
@@ -386,6 +436,38 @@ interface RecordInput {
|
|
|
386
436
|
* the originally-requested model.
|
|
387
437
|
*/
|
|
388
438
|
actualModel?: string;
|
|
439
|
+
/**
|
|
440
|
+
* Override `mutations_applied` for this outcome. Set by `call()` when
|
|
441
|
+
* fallback fires — the served compile's mutations (which actually shaped
|
|
442
|
+
* the request that went on the wire) replace the initial compile's
|
|
443
|
+
* mutations (registered against the handle). Without this override, fallback
|
|
444
|
+
* traffic is attributed to the initial compile's mutations and the brain's
|
|
445
|
+
* mutation effectiveness stats become misleading.
|
|
446
|
+
*
|
|
447
|
+
* alpha.4: extends s11 truth-in-logging to mutations.
|
|
448
|
+
*/
|
|
449
|
+
mutationsApplied?: string[];
|
|
450
|
+
/**
|
|
451
|
+
* Cache read input tokens, when supported by the provider.
|
|
452
|
+
* - Anthropic: `usage.cache_read_input_tokens`
|
|
453
|
+
* - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
|
|
454
|
+
* - OpenAI: `usage.prompt_tokens_details.cached_tokens`
|
|
455
|
+
*
|
|
456
|
+
* Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
|
|
457
|
+
* `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
|
|
458
|
+
*/
|
|
459
|
+
cacheReadInputTokens?: number;
|
|
460
|
+
/**
|
|
461
|
+
* Cache creation input tokens (Anthropic-specific).
|
|
462
|
+
* `usage.cache_creation_input_tokens`. The first call that pays the 25%
|
|
463
|
+
* upcharge to write a cache marker; subsequent calls hit `cacheRead`.
|
|
464
|
+
*/
|
|
465
|
+
cacheCreationInputTokens?: number;
|
|
466
|
+
/**
|
|
467
|
+
* Time to first token (ms). Optional; populated when the provider/SDK
|
|
468
|
+
* surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
|
|
469
|
+
*/
|
|
470
|
+
ttftMs?: number;
|
|
389
471
|
}
|
|
390
472
|
|
|
391
473
|
/**
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
|
|
2
2
|
import './dialect.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.5",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|