@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @warmdrift/kgauto-compiler — v2.0.0-alpha.3
1
+ # @warmdrift/kgauto-compiler — v2.0.0-alpha.4
2
2
 
3
3
  > Prompt compiler + central learning brain for multi-model AI apps.
4
4
  > **Swap models without rewriting prompts.**
@@ -18,7 +18,7 @@ mutations.
18
18
  - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
19
19
  the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
20
20
  v1 is fully retired from production.
21
- - **Tests:** 132/132 passing
21
+ - **Tests:** 147/147 passing
22
22
  - **Build:** clean (43KB ESM, 60KB CJS)
23
23
  - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
24
24
  awaiting dedicated Supabase provisioning.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-BiyrF36f.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BiyrF36f.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CH_nKPjp.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
4
 
5
5
  /**
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-C5lVqF8_.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-C5lVqF8_.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CDttLtaD.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
4
 
5
5
  /**
package/dist/index.js CHANGED
@@ -1309,6 +1309,9 @@ function buildPayload(input, reg) {
1309
1309
  const compileTarget = reg?.model;
1310
1310
  const actual = input.actualModel ?? compileTarget;
1311
1311
  const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
1312
+ const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
1313
+ const costModel = actual;
1314
+ const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
1312
1315
  return {
1313
1316
  handle: input.handle,
1314
1317
  app_id: reg?.appId,
@@ -1318,7 +1321,7 @@ function buildPayload(input, reg) {
1318
1321
  provider: reg?.provider,
1319
1322
  shape_key: reg?.shapeKey,
1320
1323
  learning_key: reg?.learningKey,
1321
- mutations_applied: reg?.mutationsApplied ?? [],
1324
+ mutations_applied: mutationsApplied,
1322
1325
  tokens_in: input.tokensIn,
1323
1326
  tokens_out: input.tokensOut,
1324
1327
  estimated_tokens_in: reg?.estimatedTokensIn,
@@ -1332,9 +1335,21 @@ function buildPayload(input, reg) {
1332
1335
  oracle_rationale: input.oracleScore?.rationale,
1333
1336
  prompt_preview: input.promptPreview,
1334
1337
  response_preview: input.responsePreview,
1335
- dialect_version: "v1"
1338
+ dialect_version: "v1",
1339
+ cache_read_input_tokens: input.cacheReadInputTokens,
1340
+ cache_creation_input_tokens: input.cacheCreationInputTokens,
1341
+ cost_usd_actual: costUsdActual,
1342
+ ttft_ms: input.ttftMs
1336
1343
  };
1337
1344
  }
1345
+ function computeCostUsd(modelId, tokensIn, tokensOut) {
1346
+ if (tokensIn === 0 && tokensOut === 0) return void 0;
1347
+ const profile = tryGetProfile(modelId);
1348
+ if (!profile) return void 0;
1349
+ const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
1350
+ const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
1351
+ return Math.round((inUsd + outUsd) * 1e6) / 1e6;
1352
+ }
1338
1353
 
1339
1354
  // src/ir.ts
1340
1355
  var CallError = class extends Error {
@@ -1607,7 +1622,7 @@ async function call(ir, opts = {}) {
1607
1622
  attempts.push({ model: targetModel, status: "success" });
1608
1623
  const latencyMs2 = Date.now() - start;
1609
1624
  const responseWithStructured = withStructuredOutput(exec.response, ir);
1610
- void record({
1625
+ await record({
1611
1626
  handle: initial.handle,
1612
1627
  tokensIn: responseWithStructured.tokens.input,
1613
1628
  tokensOut: responseWithStructured.tokens.output,
@@ -1616,7 +1631,11 @@ async function call(ir, opts = {}) {
1616
1631
  emptyResponse: responseWithStructured.tokens.output === 0,
1617
1632
  toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1618
1633
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1619
- responsePreview: responseWithStructured.text.slice(0, 200)
1634
+ mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1635
+ promptPreview: extractPromptPreview(ir),
1636
+ responsePreview: responseWithStructured.text.slice(0, 200),
1637
+ cacheReadInputTokens: responseWithStructured.tokens.cached,
1638
+ cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1620
1639
  });
1621
1640
  return {
1622
1641
  handle: initial.handle,
@@ -1641,13 +1660,14 @@ async function call(ir, opts = {}) {
1641
1660
  }
1642
1661
  }
1643
1662
  const latencyMs = Date.now() - start;
1644
- void record({
1663
+ await record({
1645
1664
  handle: initial.handle,
1646
1665
  tokensIn: 0,
1647
1666
  tokensOut: 0,
1648
1667
  latencyMs,
1649
1668
  success: false,
1650
- errorType: lastErr?.errorCode
1669
+ errorType: lastErr?.errorCode,
1670
+ promptPreview: extractPromptPreview(ir)
1651
1671
  });
1652
1672
  throw new CallError(
1653
1673
  `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
@@ -1665,6 +1685,13 @@ function compileAndRegister(ir, opts) {
1665
1685
  registerCompile(ir.appId, ir.intent.archetype, ir, result);
1666
1686
  return result;
1667
1687
  }
1688
+ function extractPromptPreview(ir) {
1689
+ const turn = ir.currentTurn?.content;
1690
+ if (turn) return turn.slice(0, 200);
1691
+ const lastHist = ir.history?.[ir.history.length - 1]?.content;
1692
+ if (lastHist) return lastHist.slice(0, 200);
1693
+ return void 0;
1694
+ }
1668
1695
  function withStructuredOutput(response, ir) {
1669
1696
  if (!ir.constraints?.structuredOutput) return response;
1670
1697
  if (!response.text) return response;
package/dist/index.mjs CHANGED
@@ -792,6 +792,9 @@ function buildPayload(input, reg) {
792
792
  const compileTarget = reg?.model;
793
793
  const actual = input.actualModel ?? compileTarget;
794
794
  const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
795
+ const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
796
+ const costModel = actual;
797
+ const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
795
798
  return {
796
799
  handle: input.handle,
797
800
  app_id: reg?.appId,
@@ -801,7 +804,7 @@ function buildPayload(input, reg) {
801
804
  provider: reg?.provider,
802
805
  shape_key: reg?.shapeKey,
803
806
  learning_key: reg?.learningKey,
804
- mutations_applied: reg?.mutationsApplied ?? [],
807
+ mutations_applied: mutationsApplied,
805
808
  tokens_in: input.tokensIn,
806
809
  tokens_out: input.tokensOut,
807
810
  estimated_tokens_in: reg?.estimatedTokensIn,
@@ -815,9 +818,21 @@ function buildPayload(input, reg) {
815
818
  oracle_rationale: input.oracleScore?.rationale,
816
819
  prompt_preview: input.promptPreview,
817
820
  response_preview: input.responsePreview,
818
- dialect_version: "v1"
821
+ dialect_version: "v1",
822
+ cache_read_input_tokens: input.cacheReadInputTokens,
823
+ cache_creation_input_tokens: input.cacheCreationInputTokens,
824
+ cost_usd_actual: costUsdActual,
825
+ ttft_ms: input.ttftMs
819
826
  };
820
827
  }
828
+ function computeCostUsd(modelId, tokensIn, tokensOut) {
829
+ if (tokensIn === 0 && tokensOut === 0) return void 0;
830
+ const profile = tryGetProfile(modelId);
831
+ if (!profile) return void 0;
832
+ const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
833
+ const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
834
+ return Math.round((inUsd + outUsd) * 1e6) / 1e6;
835
+ }
821
836
 
822
837
  // src/ir.ts
823
838
  var CallError = class extends Error {
@@ -1090,7 +1105,7 @@ async function call(ir, opts = {}) {
1090
1105
  attempts.push({ model: targetModel, status: "success" });
1091
1106
  const latencyMs2 = Date.now() - start;
1092
1107
  const responseWithStructured = withStructuredOutput(exec.response, ir);
1093
- void record({
1108
+ await record({
1094
1109
  handle: initial.handle,
1095
1110
  tokensIn: responseWithStructured.tokens.input,
1096
1111
  tokensOut: responseWithStructured.tokens.output,
@@ -1099,7 +1114,11 @@ async function call(ir, opts = {}) {
1099
1114
  emptyResponse: responseWithStructured.tokens.output === 0,
1100
1115
  toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1101
1116
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1102
- responsePreview: responseWithStructured.text.slice(0, 200)
1117
+ mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1118
+ promptPreview: extractPromptPreview(ir),
1119
+ responsePreview: responseWithStructured.text.slice(0, 200),
1120
+ cacheReadInputTokens: responseWithStructured.tokens.cached,
1121
+ cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1103
1122
  });
1104
1123
  return {
1105
1124
  handle: initial.handle,
@@ -1124,13 +1143,14 @@ async function call(ir, opts = {}) {
1124
1143
  }
1125
1144
  }
1126
1145
  const latencyMs = Date.now() - start;
1127
- void record({
1146
+ await record({
1128
1147
  handle: initial.handle,
1129
1148
  tokensIn: 0,
1130
1149
  tokensOut: 0,
1131
1150
  latencyMs,
1132
1151
  success: false,
1133
- errorType: lastErr?.errorCode
1152
+ errorType: lastErr?.errorCode,
1153
+ promptPreview: extractPromptPreview(ir)
1134
1154
  });
1135
1155
  throw new CallError(
1136
1156
  `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
@@ -1148,6 +1168,13 @@ function compileAndRegister(ir, opts) {
1148
1168
  registerCompile(ir.appId, ir.intent.archetype, ir, result);
1149
1169
  return result;
1150
1170
  }
1171
+ function extractPromptPreview(ir) {
1172
+ const turn = ir.currentTurn?.content;
1173
+ if (turn) return turn.slice(0, 200);
1174
+ const lastHist = ir.history?.[ir.history.length - 1]?.content;
1175
+ if (lastHist) return lastHist.slice(0, 200);
1176
+ return void 0;
1177
+ }
1151
1178
  function withStructuredOutput(response, ir) {
1152
1179
  if (!ir.constraints?.structuredOutput) return response;
1153
1180
  if (!response.text) return response;
@@ -386,6 +386,38 @@ interface RecordInput {
386
386
  * the originally-requested model.
387
387
  */
388
388
  actualModel?: string;
389
+ /**
390
+ * Override `mutations_applied` for this outcome. Set by `call()` when
391
+ * fallback fires — the served compile's mutations (which actually shaped
392
+ * the request that went on the wire) replace the initial compile's
393
+ * mutations (registered against the handle). Without this override, fallback
394
+ * traffic is attributed to the initial compile's mutations and the brain's
395
+ * mutation effectiveness stats become misleading.
396
+ *
397
+ * alpha.4: extends s11 truth-in-logging to mutations.
398
+ */
399
+ mutationsApplied?: string[];
400
+ /**
401
+ * Cache read input tokens, when supported by the provider.
402
+ * - Anthropic: `usage.cache_read_input_tokens`
403
+ * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
404
+ * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
405
+ *
406
+ * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
407
+ * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
408
+ */
409
+ cacheReadInputTokens?: number;
410
+ /**
411
+ * Cache creation input tokens (Anthropic-specific).
412
+ * `usage.cache_creation_input_tokens`. The first call that pays the 25%
413
+ * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
414
+ */
415
+ cacheCreationInputTokens?: number;
416
+ /**
417
+ * Time to first token (ms). Optional; populated when the provider/SDK
418
+ * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
419
+ */
420
+ ttftMs?: number;
389
421
  }
390
422
 
391
423
  /**
@@ -386,6 +386,38 @@ interface RecordInput {
386
386
  * the originally-requested model.
387
387
  */
388
388
  actualModel?: string;
389
+ /**
390
+ * Override `mutations_applied` for this outcome. Set by `call()` when
391
+ * fallback fires — the served compile's mutations (which actually shaped
392
+ * the request that went on the wire) replace the initial compile's
393
+ * mutations (registered against the handle). Without this override, fallback
394
+ * traffic is attributed to the initial compile's mutations and the brain's
395
+ * mutation effectiveness stats become misleading.
396
+ *
397
+ * alpha.4: extends s11 truth-in-logging to mutations.
398
+ */
399
+ mutationsApplied?: string[];
400
+ /**
401
+ * Cache read input tokens, when supported by the provider.
402
+ * - Anthropic: `usage.cache_read_input_tokens`
403
+ * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
404
+ * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
405
+ *
406
+ * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
407
+ * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
408
+ */
409
+ cacheReadInputTokens?: number;
410
+ /**
411
+ * Cache creation input tokens (Anthropic-specific).
412
+ * `usage.cache_creation_input_tokens`. The first call that pays the 25%
413
+ * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
414
+ */
415
+ cacheCreationInputTokens?: number;
416
+ /**
417
+ * Time to first token (ms). Optional; populated when the provider/SDK
418
+ * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
419
+ */
420
+ ttftMs?: number;
389
421
  }
390
422
 
391
423
  /**
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BiyrF36f.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-C5lVqF8_.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
2
2
  import './dialect.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.3",
3
+ "version": "2.0.0-alpha.4",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",