@warmdrift/kgauto-compiler 2.0.0-alpha.4 → 2.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +84 -10
- package/dist/index.mjs +84 -10
- package/dist/{profiles-CH_nKPjp.d.mts → profiles-DHdCRBVH.d.mts} +50 -0
- package/dist/{profiles-CDttLtaD.d.ts → profiles-MGq5Tnjv.d.ts} +50 -0
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @warmdrift/kgauto-compiler — v2.0.0-alpha.
|
|
1
|
+
# @warmdrift/kgauto-compiler — v2.0.0-alpha.5
|
|
2
2
|
|
|
3
3
|
> Prompt compiler + central learning brain for multi-model AI apps.
|
|
4
4
|
> **Swap models without rewriting prompts.**
|
|
@@ -18,8 +18,8 @@ mutations.
|
|
|
18
18
|
- **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
|
|
19
19
|
the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
|
|
20
20
|
v1 is fully retired from production.
|
|
21
|
-
- **Tests:**
|
|
22
|
-
- **Build:** clean (
|
|
21
|
+
- **Tests:** 180/180 passing
|
|
22
|
+
- **Build:** clean (47KB ESM, 64KB CJS)
|
|
23
23
|
- **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
|
|
24
24
|
awaiting dedicated Supabase provisioning.
|
|
25
25
|
- **Mutation engine:** v2.1 (after enough outcome data accumulates).
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.js
CHANGED
|
@@ -489,10 +489,15 @@ function lower(ir, profile, hints = {}) {
|
|
|
489
489
|
}
|
|
490
490
|
function lowerAnthropic(ir, profile, hints) {
|
|
491
491
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
492
|
-
const
|
|
492
|
+
const history = ir.history ?? [];
|
|
493
|
+
const policy = ir.historyCachePolicy;
|
|
494
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
495
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
493
496
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
494
497
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
495
|
-
const
|
|
498
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
499
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
500
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
496
501
|
return {
|
|
497
502
|
request: {
|
|
498
503
|
provider: "anthropic",
|
|
@@ -504,6 +509,7 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
504
509
|
},
|
|
505
510
|
diagnostics: {
|
|
506
511
|
cacheableTokens,
|
|
512
|
+
historyCacheableTokens,
|
|
507
513
|
estimatedCacheSavingsUsd: cacheSavings
|
|
508
514
|
}
|
|
509
515
|
};
|
|
@@ -536,17 +542,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
536
542
|
}
|
|
537
543
|
return blocks;
|
|
538
544
|
}
|
|
539
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
545
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
540
546
|
const out = [];
|
|
541
|
-
for (
|
|
547
|
+
for (let i = 0; i < history.length; i++) {
|
|
548
|
+
const m = history[i];
|
|
542
549
|
if (m.role === "system") continue;
|
|
543
|
-
|
|
550
|
+
const shouldMark = i === markIndex;
|
|
551
|
+
out.push({
|
|
552
|
+
role: m.role,
|
|
553
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
554
|
+
});
|
|
544
555
|
}
|
|
545
556
|
if (currentTurn && currentTurn.role !== "system") {
|
|
546
557
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
547
558
|
}
|
|
548
559
|
return out;
|
|
549
560
|
}
|
|
561
|
+
function attachAnthropicCacheControl(m) {
|
|
562
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
563
|
+
const blocks = m.parts;
|
|
564
|
+
const last = blocks[blocks.length - 1];
|
|
565
|
+
const withMarker = {
|
|
566
|
+
...last,
|
|
567
|
+
cache_control: { type: "ephemeral" }
|
|
568
|
+
};
|
|
569
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
570
|
+
}
|
|
571
|
+
return [
|
|
572
|
+
{
|
|
573
|
+
type: "text",
|
|
574
|
+
text: m.content,
|
|
575
|
+
cache_control: { type: "ephemeral" }
|
|
576
|
+
}
|
|
577
|
+
];
|
|
578
|
+
}
|
|
579
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
580
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
581
|
+
if (historyLen === 0) return -1;
|
|
582
|
+
if (policy.strategy === "all-but-latest") {
|
|
583
|
+
return historyLen - 1;
|
|
584
|
+
}
|
|
585
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
586
|
+
return idx >= 0 ? idx : -1;
|
|
587
|
+
}
|
|
588
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
589
|
+
let total = 0;
|
|
590
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
591
|
+
const m = history[i];
|
|
592
|
+
if (m.role === "system") continue;
|
|
593
|
+
if (Array.isArray(m.parts)) {
|
|
594
|
+
for (const p of m.parts) {
|
|
595
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
596
|
+
}
|
|
597
|
+
} else if (typeof m.content === "string") {
|
|
598
|
+
total += countTokens(m.content);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
return total;
|
|
602
|
+
}
|
|
550
603
|
function toAnthropicTools(tools) {
|
|
551
604
|
return tools.map((t) => ({
|
|
552
605
|
name: t.name,
|
|
@@ -581,6 +634,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
581
634
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
582
635
|
const meetsMin = cacheableTokens >= minTokens;
|
|
583
636
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
637
|
+
const history = ir.history ?? [];
|
|
638
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
639
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
584
640
|
return {
|
|
585
641
|
request: {
|
|
586
642
|
provider: "google",
|
|
@@ -592,6 +648,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
592
648
|
},
|
|
593
649
|
diagnostics: {
|
|
594
650
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
651
|
+
historyCacheableTokens,
|
|
595
652
|
estimatedCacheSavingsUsd: cacheSavings
|
|
596
653
|
}
|
|
597
654
|
};
|
|
@@ -639,6 +696,9 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
639
696
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
640
697
|
});
|
|
641
698
|
}
|
|
699
|
+
const history = ir.history ?? [];
|
|
700
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
701
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
642
702
|
return {
|
|
643
703
|
request: {
|
|
644
704
|
provider: "openai",
|
|
@@ -648,7 +708,11 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
648
708
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
649
709
|
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
650
710
|
},
|
|
651
|
-
diagnostics: {
|
|
711
|
+
diagnostics: {
|
|
712
|
+
cacheableTokens: 0,
|
|
713
|
+
historyCacheableTokens,
|
|
714
|
+
estimatedCacheSavingsUsd: 0
|
|
715
|
+
}
|
|
652
716
|
};
|
|
653
717
|
}
|
|
654
718
|
function toOpenAITools(tools) {
|
|
@@ -675,6 +739,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
675
739
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
676
740
|
});
|
|
677
741
|
}
|
|
742
|
+
const history = ir.history ?? [];
|
|
743
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
744
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
678
745
|
return {
|
|
679
746
|
request: {
|
|
680
747
|
provider: "deepseek",
|
|
@@ -689,7 +756,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
689
756
|
}
|
|
690
757
|
})) : void 0
|
|
691
758
|
},
|
|
692
|
-
diagnostics: {
|
|
759
|
+
diagnostics: {
|
|
760
|
+
cacheableTokens: 0,
|
|
761
|
+
historyCacheableTokens,
|
|
762
|
+
estimatedCacheSavingsUsd: 0
|
|
763
|
+
}
|
|
693
764
|
};
|
|
694
765
|
}
|
|
695
766
|
function sortSections(sections) {
|
|
@@ -1181,7 +1252,8 @@ function compile(ir, opts = {}) {
|
|
|
1181
1252
|
historyKept: workingIR.history?.length ?? 0,
|
|
1182
1253
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1183
1254
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1184
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
1255
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1256
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
1185
1257
|
}
|
|
1186
1258
|
};
|
|
1187
1259
|
}
|
|
@@ -1266,7 +1338,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1266
1338
|
learningKey: learningKey(archetype, result.target, shape),
|
|
1267
1339
|
estimatedTokensIn: tokens,
|
|
1268
1340
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1269
|
-
startedAt: Date.now()
|
|
1341
|
+
startedAt: Date.now(),
|
|
1342
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
1270
1343
|
});
|
|
1271
1344
|
}
|
|
1272
1345
|
async function record(input) {
|
|
@@ -1339,7 +1412,8 @@ function buildPayload(input, reg) {
|
|
|
1339
1412
|
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
1340
1413
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1341
1414
|
cost_usd_actual: costUsdActual,
|
|
1342
|
-
ttft_ms: input.ttftMs
|
|
1415
|
+
ttft_ms: input.ttftMs,
|
|
1416
|
+
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1343
1417
|
};
|
|
1344
1418
|
}
|
|
1345
1419
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
package/dist/index.mjs
CHANGED
|
@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
|
|
|
374
374
|
}
|
|
375
375
|
function lowerAnthropic(ir, profile, hints) {
|
|
376
376
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
377
|
-
const
|
|
377
|
+
const history = ir.history ?? [];
|
|
378
|
+
const policy = ir.historyCachePolicy;
|
|
379
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
380
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
378
381
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
379
382
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
380
|
-
const
|
|
383
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
384
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
385
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
381
386
|
return {
|
|
382
387
|
request: {
|
|
383
388
|
provider: "anthropic",
|
|
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
389
394
|
},
|
|
390
395
|
diagnostics: {
|
|
391
396
|
cacheableTokens,
|
|
397
|
+
historyCacheableTokens,
|
|
392
398
|
estimatedCacheSavingsUsd: cacheSavings
|
|
393
399
|
}
|
|
394
400
|
};
|
|
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
421
427
|
}
|
|
422
428
|
return blocks;
|
|
423
429
|
}
|
|
424
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
430
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
425
431
|
const out = [];
|
|
426
|
-
for (
|
|
432
|
+
for (let i = 0; i < history.length; i++) {
|
|
433
|
+
const m = history[i];
|
|
427
434
|
if (m.role === "system") continue;
|
|
428
|
-
|
|
435
|
+
const shouldMark = i === markIndex;
|
|
436
|
+
out.push({
|
|
437
|
+
role: m.role,
|
|
438
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
439
|
+
});
|
|
429
440
|
}
|
|
430
441
|
if (currentTurn && currentTurn.role !== "system") {
|
|
431
442
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
432
443
|
}
|
|
433
444
|
return out;
|
|
434
445
|
}
|
|
446
|
+
function attachAnthropicCacheControl(m) {
|
|
447
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
448
|
+
const blocks = m.parts;
|
|
449
|
+
const last = blocks[blocks.length - 1];
|
|
450
|
+
const withMarker = {
|
|
451
|
+
...last,
|
|
452
|
+
cache_control: { type: "ephemeral" }
|
|
453
|
+
};
|
|
454
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
455
|
+
}
|
|
456
|
+
return [
|
|
457
|
+
{
|
|
458
|
+
type: "text",
|
|
459
|
+
text: m.content,
|
|
460
|
+
cache_control: { type: "ephemeral" }
|
|
461
|
+
}
|
|
462
|
+
];
|
|
463
|
+
}
|
|
464
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
465
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
466
|
+
if (historyLen === 0) return -1;
|
|
467
|
+
if (policy.strategy === "all-but-latest") {
|
|
468
|
+
return historyLen - 1;
|
|
469
|
+
}
|
|
470
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
471
|
+
return idx >= 0 ? idx : -1;
|
|
472
|
+
}
|
|
473
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
474
|
+
let total = 0;
|
|
475
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
476
|
+
const m = history[i];
|
|
477
|
+
if (m.role === "system") continue;
|
|
478
|
+
if (Array.isArray(m.parts)) {
|
|
479
|
+
for (const p of m.parts) {
|
|
480
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
481
|
+
}
|
|
482
|
+
} else if (typeof m.content === "string") {
|
|
483
|
+
total += countTokens(m.content);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
return total;
|
|
487
|
+
}
|
|
435
488
|
function toAnthropicTools(tools) {
|
|
436
489
|
return tools.map((t) => ({
|
|
437
490
|
name: t.name,
|
|
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
466
519
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
467
520
|
const meetsMin = cacheableTokens >= minTokens;
|
|
468
521
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
522
|
+
const history = ir.history ?? [];
|
|
523
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
524
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
469
525
|
return {
|
|
470
526
|
request: {
|
|
471
527
|
provider: "google",
|
|
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
477
533
|
},
|
|
478
534
|
diagnostics: {
|
|
479
535
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
536
|
+
historyCacheableTokens,
|
|
480
537
|
estimatedCacheSavingsUsd: cacheSavings
|
|
481
538
|
}
|
|
482
539
|
};
|
|
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
524
581
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
525
582
|
});
|
|
526
583
|
}
|
|
584
|
+
const history = ir.history ?? [];
|
|
585
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
586
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
527
587
|
return {
|
|
528
588
|
request: {
|
|
529
589
|
provider: "openai",
|
|
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
533
593
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
534
594
|
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
535
595
|
},
|
|
536
|
-
diagnostics: {
|
|
596
|
+
diagnostics: {
|
|
597
|
+
cacheableTokens: 0,
|
|
598
|
+
historyCacheableTokens,
|
|
599
|
+
estimatedCacheSavingsUsd: 0
|
|
600
|
+
}
|
|
537
601
|
};
|
|
538
602
|
}
|
|
539
603
|
function toOpenAITools(tools) {
|
|
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
560
624
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
561
625
|
});
|
|
562
626
|
}
|
|
627
|
+
const history = ir.history ?? [];
|
|
628
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
629
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
563
630
|
return {
|
|
564
631
|
request: {
|
|
565
632
|
provider: "deepseek",
|
|
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
574
641
|
}
|
|
575
642
|
})) : void 0
|
|
576
643
|
},
|
|
577
|
-
diagnostics: {
|
|
644
|
+
diagnostics: {
|
|
645
|
+
cacheableTokens: 0,
|
|
646
|
+
historyCacheableTokens,
|
|
647
|
+
estimatedCacheSavingsUsd: 0
|
|
648
|
+
}
|
|
578
649
|
};
|
|
579
650
|
}
|
|
580
651
|
function sortSections(sections) {
|
|
@@ -664,7 +735,8 @@ function compile(ir, opts = {}) {
|
|
|
664
735
|
historyKept: workingIR.history?.length ?? 0,
|
|
665
736
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
666
737
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
667
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
738
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
739
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
668
740
|
}
|
|
669
741
|
};
|
|
670
742
|
}
|
|
@@ -749,7 +821,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
749
821
|
learningKey: learningKey(archetype, result.target, shape),
|
|
750
822
|
estimatedTokensIn: tokens,
|
|
751
823
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
752
|
-
startedAt: Date.now()
|
|
824
|
+
startedAt: Date.now(),
|
|
825
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
753
826
|
});
|
|
754
827
|
}
|
|
755
828
|
async function record(input) {
|
|
@@ -822,7 +895,8 @@ function buildPayload(input, reg) {
|
|
|
822
895
|
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
823
896
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
824
897
|
cost_usd_actual: costUsdActual,
|
|
825
|
-
ttft_ms: input.ttftMs
|
|
898
|
+
ttft_ms: input.ttftMs,
|
|
899
|
+
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
826
900
|
};
|
|
827
901
|
}
|
|
828
902
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -91,6 +91,40 @@ interface Constraints {
|
|
|
91
91
|
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
92
|
forceModel?: string;
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
94
128
|
/**
|
|
95
129
|
* Consumer-declared policy for model selection. Lives outside the IR
|
|
96
130
|
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
@@ -146,6 +180,12 @@ interface PromptIR {
|
|
|
146
180
|
models: string[];
|
|
147
181
|
/** Compile constraints. */
|
|
148
182
|
constraints?: Constraints;
|
|
183
|
+
/**
|
|
184
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
185
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
186
|
+
* alpha.5.
|
|
187
|
+
*/
|
|
188
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
149
189
|
}
|
|
150
190
|
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
151
191
|
/**
|
|
@@ -240,6 +280,16 @@ interface CompileResult {
|
|
|
240
280
|
historyDropped: number;
|
|
241
281
|
cacheableTokens: number;
|
|
242
282
|
estimatedCacheSavingsUsd: number;
|
|
283
|
+
/**
|
|
284
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
285
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
286
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
287
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
288
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
289
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
290
|
+
* from history caching. alpha.5.
|
|
291
|
+
*/
|
|
292
|
+
historyCacheableTokens: number;
|
|
243
293
|
};
|
|
244
294
|
}
|
|
245
295
|
/**
|
|
@@ -91,6 +91,40 @@ interface Constraints {
|
|
|
91
91
|
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
92
|
forceModel?: string;
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
94
128
|
/**
|
|
95
129
|
* Consumer-declared policy for model selection. Lives outside the IR
|
|
96
130
|
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
@@ -146,6 +180,12 @@ interface PromptIR {
|
|
|
146
180
|
models: string[];
|
|
147
181
|
/** Compile constraints. */
|
|
148
182
|
constraints?: Constraints;
|
|
183
|
+
/**
|
|
184
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
185
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
186
|
+
* alpha.5.
|
|
187
|
+
*/
|
|
188
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
149
189
|
}
|
|
150
190
|
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
151
191
|
/**
|
|
@@ -240,6 +280,16 @@ interface CompileResult {
|
|
|
240
280
|
historyDropped: number;
|
|
241
281
|
cacheableTokens: number;
|
|
242
282
|
estimatedCacheSavingsUsd: number;
|
|
283
|
+
/**
|
|
284
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
285
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
286
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
287
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
288
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
289
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
290
|
+
* from history caching. alpha.5.
|
|
291
|
+
*/
|
|
292
|
+
historyCacheableTokens: number;
|
|
243
293
|
};
|
|
244
294
|
}
|
|
245
295
|
/**
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
|
|
2
2
|
import './dialect.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.5",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|