@warmdrift/kgauto-compiler 2.0.0-alpha.4 → 2.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @warmdrift/kgauto-compiler — v2.0.0-alpha.4
1
+ # @warmdrift/kgauto-compiler — v2.0.0-alpha.5
2
2
 
3
3
  > Prompt compiler + central learning brain for multi-model AI apps.
4
4
  > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
18
18
  - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
19
19
  the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
20
20
  v1 is fully retired from production.
21
- - **Tests:** 147/147 passing
22
- - **Build:** clean (43KB ESM, 60KB CJS)
21
+ - **Tests:** 180/180 passing
22
+ - **Build:** clean (47KB ESM, 64KB CJS)
23
23
  - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
24
24
  awaiting dedicated Supabase provisioning.
25
25
  - **Mutation engine:** v2.1 (after enough outcome data accumulates).
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CH_nKPjp.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
4
 
5
5
  /**
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CDttLtaD.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
4
 
5
5
  /**
package/dist/index.js CHANGED
@@ -489,10 +489,15 @@ function lower(ir, profile, hints = {}) {
489
489
  }
490
490
  function lowerAnthropic(ir, profile, hints) {
491
491
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
492
- const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
492
+ const history = ir.history ?? [];
493
+ const policy = ir.historyCachePolicy;
494
+ const markIndex = resolveHistoryMarkIndex(history.length, policy);
495
+ const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
493
496
  const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
494
497
  const cacheableTokens = computeCacheableTokens(systemBlocks);
495
- const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
498
+ const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
499
+ const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
500
+ const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
496
501
  return {
497
502
  request: {
498
503
  provider: "anthropic",
@@ -504,6 +509,7 @@ function lowerAnthropic(ir, profile, hints) {
504
509
  },
505
510
  diagnostics: {
506
511
  cacheableTokens,
512
+ historyCacheableTokens,
507
513
  estimatedCacheSavingsUsd: cacheSavings
508
514
  }
509
515
  };
@@ -536,17 +542,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
536
542
  }
537
543
  return blocks;
538
544
  }
539
- function buildAnthropicMessages(history, currentTurn) {
545
+ function buildAnthropicMessages(history, currentTurn, markIndex) {
540
546
  const out = [];
541
- for (const m of history) {
547
+ for (let i = 0; i < history.length; i++) {
548
+ const m = history[i];
542
549
  if (m.role === "system") continue;
543
- out.push({ role: m.role, content: m.parts ?? m.content });
550
+ const shouldMark = i === markIndex;
551
+ out.push({
552
+ role: m.role,
553
+ content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
554
+ });
544
555
  }
545
556
  if (currentTurn && currentTurn.role !== "system") {
546
557
  out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
547
558
  }
548
559
  return out;
549
560
  }
561
+ function attachAnthropicCacheControl(m) {
562
+ if (Array.isArray(m.parts) && m.parts.length > 0) {
563
+ const blocks = m.parts;
564
+ const last = blocks[blocks.length - 1];
565
+ const withMarker = {
566
+ ...last,
567
+ cache_control: { type: "ephemeral" }
568
+ };
569
+ return [...blocks.slice(0, -1), withMarker];
570
+ }
571
+ return [
572
+ {
573
+ type: "text",
574
+ text: m.content,
575
+ cache_control: { type: "ephemeral" }
576
+ }
577
+ ];
578
+ }
579
+ function resolveHistoryMarkIndex(historyLen, policy) {
580
+ if (!policy || policy.strategy === "none") return -1;
581
+ if (historyLen === 0) return -1;
582
+ if (policy.strategy === "all-but-latest") {
583
+ return historyLen - 1;
584
+ }
585
+ const idx = historyLen - 1 - policy.suffix;
586
+ return idx >= 0 ? idx : -1;
587
+ }
588
+ function sumHistoryTokens(history, throughIndex) {
589
+ let total = 0;
590
+ for (let i = 0; i <= throughIndex && i < history.length; i++) {
591
+ const m = history[i];
592
+ if (m.role === "system") continue;
593
+ if (Array.isArray(m.parts)) {
594
+ for (const p of m.parts) {
595
+ if (typeof p.text === "string") total += countTokens(p.text);
596
+ }
597
+ } else if (typeof m.content === "string") {
598
+ total += countTokens(m.content);
599
+ }
600
+ }
601
+ return total;
602
+ }
550
603
  function toAnthropicTools(tools) {
551
604
  return tools.map((t) => ({
552
605
  name: t.name,
@@ -581,6 +634,9 @@ function lowerGoogle(ir, profile, hints) {
581
634
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
582
635
  const meetsMin = cacheableTokens >= minTokens;
583
636
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
637
+ const history = ir.history ?? [];
638
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
639
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
584
640
  return {
585
641
  request: {
586
642
  provider: "google",
@@ -592,6 +648,7 @@ function lowerGoogle(ir, profile, hints) {
592
648
  },
593
649
  diagnostics: {
594
650
  cacheableTokens: meetsMin ? cacheableTokens : 0,
651
+ historyCacheableTokens,
595
652
  estimatedCacheSavingsUsd: cacheSavings
596
653
  }
597
654
  };
@@ -639,6 +696,9 @@ function lowerOpenAI(ir, profile, hints) {
639
696
  content: ir.currentTurn.parts ?? ir.currentTurn.content
640
697
  });
641
698
  }
699
+ const history = ir.history ?? [];
700
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
701
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
642
702
  return {
643
703
  request: {
644
704
  provider: "openai",
@@ -648,7 +708,11 @@ function lowerOpenAI(ir, profile, hints) {
648
708
  response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
649
709
  reasoning_effort: hints.forceTerseOutput ? "low" : void 0
650
710
  },
651
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
711
+ diagnostics: {
712
+ cacheableTokens: 0,
713
+ historyCacheableTokens,
714
+ estimatedCacheSavingsUsd: 0
715
+ }
652
716
  };
653
717
  }
654
718
  function toOpenAITools(tools) {
@@ -675,6 +739,9 @@ function lowerDeepSeek(ir, profile) {
675
739
  content: ir.currentTurn.parts ?? ir.currentTurn.content
676
740
  });
677
741
  }
742
+ const history = ir.history ?? [];
743
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
744
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
678
745
  return {
679
746
  request: {
680
747
  provider: "deepseek",
@@ -689,7 +756,11 @@ function lowerDeepSeek(ir, profile) {
689
756
  }
690
757
  })) : void 0
691
758
  },
692
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
759
+ diagnostics: {
760
+ cacheableTokens: 0,
761
+ historyCacheableTokens,
762
+ estimatedCacheSavingsUsd: 0
763
+ }
693
764
  };
694
765
  }
695
766
  function sortSections(sections) {
@@ -1181,7 +1252,8 @@ function compile(ir, opts = {}) {
1181
1252
  historyKept: workingIR.history?.length ?? 0,
1182
1253
  historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1183
1254
  cacheableTokens: lowered.diagnostics.cacheableTokens,
1184
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
1255
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1256
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
1185
1257
  }
1186
1258
  };
1187
1259
  }
@@ -1266,7 +1338,8 @@ function registerCompile(appId, archetype, ir, result) {
1266
1338
  learningKey: learningKey(archetype, result.target, shape),
1267
1339
  estimatedTokensIn: tokens,
1268
1340
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
1269
- startedAt: Date.now()
1341
+ startedAt: Date.now(),
1342
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens
1270
1343
  });
1271
1344
  }
1272
1345
  async function record(input) {
@@ -1339,7 +1412,8 @@ function buildPayload(input, reg) {
1339
1412
  cache_read_input_tokens: input.cacheReadInputTokens,
1340
1413
  cache_creation_input_tokens: input.cacheCreationInputTokens,
1341
1414
  cost_usd_actual: costUsdActual,
1342
- ttft_ms: input.ttftMs
1415
+ ttft_ms: input.ttftMs,
1416
+ history_cacheable_tokens: reg?.historyCacheableTokens
1343
1417
  };
1344
1418
  }
1345
1419
  function computeCostUsd(modelId, tokensIn, tokensOut) {
package/dist/index.mjs CHANGED
@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
374
374
  }
375
375
  function lowerAnthropic(ir, profile, hints) {
376
376
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
377
- const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
377
+ const history = ir.history ?? [];
378
+ const policy = ir.historyCachePolicy;
379
+ const markIndex = resolveHistoryMarkIndex(history.length, policy);
380
+ const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
378
381
  const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
379
382
  const cacheableTokens = computeCacheableTokens(systemBlocks);
380
- const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
383
+ const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
384
+ const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
385
+ const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
381
386
  return {
382
387
  request: {
383
388
  provider: "anthropic",
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
389
394
  },
390
395
  diagnostics: {
391
396
  cacheableTokens,
397
+ historyCacheableTokens,
392
398
  estimatedCacheSavingsUsd: cacheSavings
393
399
  }
394
400
  };
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
421
427
  }
422
428
  return blocks;
423
429
  }
424
- function buildAnthropicMessages(history, currentTurn) {
430
+ function buildAnthropicMessages(history, currentTurn, markIndex) {
425
431
  const out = [];
426
- for (const m of history) {
432
+ for (let i = 0; i < history.length; i++) {
433
+ const m = history[i];
427
434
  if (m.role === "system") continue;
428
- out.push({ role: m.role, content: m.parts ?? m.content });
435
+ const shouldMark = i === markIndex;
436
+ out.push({
437
+ role: m.role,
438
+ content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
439
+ });
429
440
  }
430
441
  if (currentTurn && currentTurn.role !== "system") {
431
442
  out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
432
443
  }
433
444
  return out;
434
445
  }
446
+ function attachAnthropicCacheControl(m) {
447
+ if (Array.isArray(m.parts) && m.parts.length > 0) {
448
+ const blocks = m.parts;
449
+ const last = blocks[blocks.length - 1];
450
+ const withMarker = {
451
+ ...last,
452
+ cache_control: { type: "ephemeral" }
453
+ };
454
+ return [...blocks.slice(0, -1), withMarker];
455
+ }
456
+ return [
457
+ {
458
+ type: "text",
459
+ text: m.content,
460
+ cache_control: { type: "ephemeral" }
461
+ }
462
+ ];
463
+ }
464
+ function resolveHistoryMarkIndex(historyLen, policy) {
465
+ if (!policy || policy.strategy === "none") return -1;
466
+ if (historyLen === 0) return -1;
467
+ if (policy.strategy === "all-but-latest") {
468
+ return historyLen - 1;
469
+ }
470
+ const idx = historyLen - 1 - policy.suffix;
471
+ return idx >= 0 ? idx : -1;
472
+ }
473
+ function sumHistoryTokens(history, throughIndex) {
474
+ let total = 0;
475
+ for (let i = 0; i <= throughIndex && i < history.length; i++) {
476
+ const m = history[i];
477
+ if (m.role === "system") continue;
478
+ if (Array.isArray(m.parts)) {
479
+ for (const p of m.parts) {
480
+ if (typeof p.text === "string") total += countTokens(p.text);
481
+ }
482
+ } else if (typeof m.content === "string") {
483
+ total += countTokens(m.content);
484
+ }
485
+ }
486
+ return total;
487
+ }
435
488
  function toAnthropicTools(tools) {
436
489
  return tools.map((t) => ({
437
490
  name: t.name,
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
466
519
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
467
520
  const meetsMin = cacheableTokens >= minTokens;
468
521
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
522
+ const history = ir.history ?? [];
523
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
524
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
469
525
  return {
470
526
  request: {
471
527
  provider: "google",
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
477
533
  },
478
534
  diagnostics: {
479
535
  cacheableTokens: meetsMin ? cacheableTokens : 0,
536
+ historyCacheableTokens,
480
537
  estimatedCacheSavingsUsd: cacheSavings
481
538
  }
482
539
  };
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
524
581
  content: ir.currentTurn.parts ?? ir.currentTurn.content
525
582
  });
526
583
  }
584
+ const history = ir.history ?? [];
585
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
586
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
527
587
  return {
528
588
  request: {
529
589
  provider: "openai",
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
533
593
  response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
534
594
  reasoning_effort: hints.forceTerseOutput ? "low" : void 0
535
595
  },
536
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
596
+ diagnostics: {
597
+ cacheableTokens: 0,
598
+ historyCacheableTokens,
599
+ estimatedCacheSavingsUsd: 0
600
+ }
537
601
  };
538
602
  }
539
603
  function toOpenAITools(tools) {
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
560
624
  content: ir.currentTurn.parts ?? ir.currentTurn.content
561
625
  });
562
626
  }
627
+ const history = ir.history ?? [];
628
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
629
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
563
630
  return {
564
631
  request: {
565
632
  provider: "deepseek",
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
574
641
  }
575
642
  })) : void 0
576
643
  },
577
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
644
+ diagnostics: {
645
+ cacheableTokens: 0,
646
+ historyCacheableTokens,
647
+ estimatedCacheSavingsUsd: 0
648
+ }
578
649
  };
579
650
  }
580
651
  function sortSections(sections) {
@@ -664,7 +735,8 @@ function compile(ir, opts = {}) {
664
735
  historyKept: workingIR.history?.length ?? 0,
665
736
  historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
666
737
  cacheableTokens: lowered.diagnostics.cacheableTokens,
667
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
738
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
739
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
668
740
  }
669
741
  };
670
742
  }
@@ -749,7 +821,8 @@ function registerCompile(appId, archetype, ir, result) {
749
821
  learningKey: learningKey(archetype, result.target, shape),
750
822
  estimatedTokensIn: tokens,
751
823
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
752
- startedAt: Date.now()
824
+ startedAt: Date.now(),
825
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens
753
826
  });
754
827
  }
755
828
  async function record(input) {
@@ -822,7 +895,8 @@ function buildPayload(input, reg) {
822
895
  cache_read_input_tokens: input.cacheReadInputTokens,
823
896
  cache_creation_input_tokens: input.cacheCreationInputTokens,
824
897
  cost_usd_actual: costUsdActual,
825
- ttft_ms: input.ttftMs
898
+ ttft_ms: input.ttftMs,
899
+ history_cacheable_tokens: reg?.historyCacheableTokens
826
900
  };
827
901
  }
828
902
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -91,6 +91,40 @@ interface Constraints {
91
91
  /** Override target model selection — if set, compiler uses this instead of routing. */
92
92
  forceModel?: string;
93
93
  }
94
+ /**
95
+ * Cache marker policy for the messages array (history + currentTurn).
96
+ *
97
+ * Anthropic positional caching: a `cache_control` marker on a content block
98
+ * tells the API "remember the prefix up through this block." On a subsequent
99
+ * request whose first N tokens match, those N billed at the cached rate
100
+ * (10% of the input price). Without a marker, every call re-pays for the
101
+ * entire history.
102
+ *
103
+ * - `'none'` (default when omitted): no history cache marker. System-level
104
+ * cache markers from `PromptSection.cacheable=true` still apply.
105
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
106
+ * (the last history entry). On the next call, that entire history prefix
107
+ * is cacheable. Good fit for chat/agent loops where every prior turn is
108
+ * stable.
109
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
110
+ * `history`. Use when the last few turns are volatile (e.g., scratchpad,
111
+ * draft revisions) but the earlier prefix is stable.
112
+ *
113
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
114
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
115
+ * stable prefix is reused). The compiler still computes
116
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
117
+ *
118
+ * alpha.5.
119
+ */
120
+ type HistoryCachePolicy = {
121
+ strategy: 'none';
122
+ } | {
123
+ strategy: 'all-but-latest';
124
+ } | {
125
+ strategy: 'fixed-suffix';
126
+ suffix: number;
127
+ };
94
128
  /**
95
129
  * Consumer-declared policy for model selection. Lives outside the IR
96
130
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
146
180
  models: string[];
147
181
  /** Compile constraints. */
148
182
  constraints?: Constraints;
183
+ /**
184
+ * Cache marker placement policy for the messages array. Default = no
185
+ * history cache markers. See `HistoryCachePolicy` for semantics.
186
+ * alpha.5.
187
+ */
188
+ historyCachePolicy?: HistoryCachePolicy;
149
189
  }
150
190
  type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
151
191
  /**
@@ -240,6 +280,16 @@ interface CompileResult {
240
280
  historyDropped: number;
241
281
  cacheableTokens: number;
242
282
  estimatedCacheSavingsUsd: number;
283
+ /**
284
+ * Tokens in `history` (and `currentTurn` when before the marker) that
285
+ * fall within the cacheable prefix per `historyCachePolicy`. Always
286
+ * computed; only Anthropic actually emits a wire-format marker. For
287
+ * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
288
+ * prefix that implicit caching may pick up — useful telemetry for the
289
+ * brain to learn which (app, model, archetype) tuples benefit most
290
+ * from history caching. alpha.5.
291
+ */
292
+ historyCacheableTokens: number;
243
293
  };
244
294
  }
245
295
  /**
@@ -91,6 +91,40 @@ interface Constraints {
91
91
  /** Override target model selection — if set, compiler uses this instead of routing. */
92
92
  forceModel?: string;
93
93
  }
94
+ /**
95
+ * Cache marker policy for the messages array (history + currentTurn).
96
+ *
97
+ * Anthropic positional caching: a `cache_control` marker on a content block
98
+ * tells the API "remember the prefix up through this block." On a subsequent
99
+ * request whose first N tokens match, those N billed at the cached rate
100
+ * (10% of the input price). Without a marker, every call re-pays for the
101
+ * entire history.
102
+ *
103
+ * - `'none'` (default when omitted): no history cache marker. System-level
104
+ * cache markers from `PromptSection.cacheable=true` still apply.
105
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
106
+ * (the last history entry). On the next call, that entire history prefix
107
+ * is cacheable. Good fit for chat/agent loops where every prior turn is
108
+ * stable.
109
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
110
+ * `history`. Use when the last few turns are volatile (e.g., scratchpad,
111
+ * draft revisions) but the earlier prefix is stable.
112
+ *
113
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
114
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
115
+ * stable prefix is reused). The compiler still computes
116
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
117
+ *
118
+ * alpha.5.
119
+ */
120
+ type HistoryCachePolicy = {
121
+ strategy: 'none';
122
+ } | {
123
+ strategy: 'all-but-latest';
124
+ } | {
125
+ strategy: 'fixed-suffix';
126
+ suffix: number;
127
+ };
94
128
  /**
95
129
  * Consumer-declared policy for model selection. Lives outside the IR
96
130
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
146
180
  models: string[];
147
181
  /** Compile constraints. */
148
182
  constraints?: Constraints;
183
+ /**
184
+ * Cache marker placement policy for the messages array. Default = no
185
+ * history cache markers. See `HistoryCachePolicy` for semantics.
186
+ * alpha.5.
187
+ */
188
+ historyCachePolicy?: HistoryCachePolicy;
149
189
  }
150
190
  type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
151
191
  /**
@@ -240,6 +280,16 @@ interface CompileResult {
240
280
  historyDropped: number;
241
281
  cacheableTokens: number;
242
282
  estimatedCacheSavingsUsd: number;
283
+ /**
284
+ * Tokens in `history` (and `currentTurn` when before the marker) that
285
+ * fall within the cacheable prefix per `historyCachePolicy`. Always
286
+ * computed; only Anthropic actually emits a wire-format marker. For
287
+ * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
288
+ * prefix that implicit caching may pick up — useful telemetry for the
289
+ * brain to learn which (app, model, archetype) tuples benefit most
290
+ * from history caching. alpha.5.
291
+ */
292
+ historyCacheableTokens: number;
243
293
  };
244
294
  }
245
295
  /**
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
2
2
  import './dialect.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.4",
3
+ "version": "2.0.0-alpha.5",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",