npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.3 → 2.0.0-alpha.4 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +2 -2
package/dist/index.d.mts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +33 -6
package/dist/index.mjs +33 -6
package/dist/{profiles-C5lVqF8_.d.ts → profiles-CDttLtaD.d.ts} +32 -0
package/dist/{profiles-BiyrF36f.d.mts → profiles-CH_nKPjp.d.mts} +32 -0
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# @warmdrift/kgauto-compiler — v2.0.0-alpha.3
+# @warmdrift/kgauto-compiler — v2.0.0-alpha.4
 > Prompt compiler + central learning brain for multi-model AI apps.
 > **Swap models without rewriting prompts.**
@@ -18,7 +18,7 @@ mutations.
 - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
   the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
   v1 is fully retired from production.
-- **Tests:** 132/132 passing
+- **Tests:** 147/147 passing
 - **Build:** clean (43KB ESM, 60KB CJS)
 - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
   awaiting dedicated Supabase provisioning.

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-BiyrF36f.mjs';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BiyrF36f.mjs';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CH_nKPjp.mjs';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
 /**

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-C5lVqF8_.js';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-C5lVqF8_.js';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CDttLtaD.js';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
 /**

package/dist/index.js CHANGED Viewed

@@ -1309,6 +1309,9 @@ function buildPayload(input, reg) {
   const compileTarget = reg?.model;
   const actual = input.actualModel ?? compileTarget;
   const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
+  const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
+  const costModel = actual;
+  const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
   return {
     handle: input.handle,
     app_id: reg?.appId,
@@ -1318,7 +1321,7 @@ function buildPayload(input, reg) {
     provider: reg?.provider,
     shape_key: reg?.shapeKey,
     learning_key: reg?.learningKey,
-    mutations_applied: reg?.mutationsApplied ?? [],
+    mutations_applied: mutationsApplied,
     tokens_in: input.tokensIn,
     tokens_out: input.tokensOut,
     estimated_tokens_in: reg?.estimatedTokensIn,
@@ -1332,9 +1335,21 @@ function buildPayload(input, reg) {
     oracle_rationale: input.oracleScore?.rationale,
     prompt_preview: input.promptPreview,
     response_preview: input.responsePreview,
-    dialect_version: "v1"
+    dialect_version: "v1",
+    cache_read_input_tokens: input.cacheReadInputTokens,
+    cache_creation_input_tokens: input.cacheCreationInputTokens,
+    cost_usd_actual: costUsdActual,
+    ttft_ms: input.ttftMs
   };
 }
+function computeCostUsd(modelId, tokensIn, tokensOut) {
+  if (tokensIn === 0 && tokensOut === 0) return void 0;
+  const profile = tryGetProfile(modelId);
+  if (!profile) return void 0;
+  const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
+  const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
+  return Math.round((inUsd + outUsd) * 1e6) / 1e6;
+}
 // src/ir.ts
 var CallError = class extends Error {
@@ -1607,7 +1622,7 @@ async function call(ir, opts = {}) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
       const responseWithStructured = withStructuredOutput(exec.response, ir);
-      void record({
+      await record({
         handle: initial.handle,
         tokensIn: responseWithStructured.tokens.input,
         tokensOut: responseWithStructured.tokens.output,
@@ -1616,7 +1631,11 @@ async function call(ir, opts = {}) {
         emptyResponse: responseWithStructured.tokens.output === 0,
         toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
         actualModel: targetModel !== initial.target ? targetModel : void 0,
-        responsePreview: responseWithStructured.text.slice(0, 200)
+        mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
+        promptPreview: extractPromptPreview(ir),
+        responsePreview: responseWithStructured.text.slice(0, 200),
+        cacheReadInputTokens: responseWithStructured.tokens.cached,
+        cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
       });
       return {
         handle: initial.handle,
@@ -1641,13 +1660,14 @@ async function call(ir, opts = {}) {
     }
   }
   const latencyMs = Date.now() - start;
-  void record({
+  await record({
     handle: initial.handle,
     tokensIn: 0,
     tokensOut: 0,
     latencyMs,
     success: false,
-    errorType: lastErr?.errorCode
+    errorType: lastErr?.errorCode,
+    promptPreview: extractPromptPreview(ir)
   });
   throw new CallError(
     `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
@@ -1665,6 +1685,13 @@ function compileAndRegister(ir, opts) {
   registerCompile(ir.appId, ir.intent.archetype, ir, result);
   return result;
 }
+function extractPromptPreview(ir) {
+  const turn = ir.currentTurn?.content;
+  if (turn) return turn.slice(0, 200);
+  const lastHist = ir.history?.[ir.history.length - 1]?.content;
+  if (lastHist) return lastHist.slice(0, 200);
+  return void 0;
+}
 function withStructuredOutput(response, ir) {
   if (!ir.constraints?.structuredOutput) return response;
   if (!response.text) return response;

package/dist/index.mjs CHANGED Viewed

@@ -792,6 +792,9 @@ function buildPayload(input, reg) {
   const compileTarget = reg?.model;
   const actual = input.actualModel ?? compileTarget;
   const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
+  const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
+  const costModel = actual;
+  const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
   return {
     handle: input.handle,
     app_id: reg?.appId,
@@ -801,7 +804,7 @@ function buildPayload(input, reg) {
     provider: reg?.provider,
     shape_key: reg?.shapeKey,
     learning_key: reg?.learningKey,
-    mutations_applied: reg?.mutationsApplied ?? [],
+    mutations_applied: mutationsApplied,
     tokens_in: input.tokensIn,
     tokens_out: input.tokensOut,
     estimated_tokens_in: reg?.estimatedTokensIn,
@@ -815,9 +818,21 @@ function buildPayload(input, reg) {
     oracle_rationale: input.oracleScore?.rationale,
     prompt_preview: input.promptPreview,
     response_preview: input.responsePreview,
-    dialect_version: "v1"
+    dialect_version: "v1",
+    cache_read_input_tokens: input.cacheReadInputTokens,
+    cache_creation_input_tokens: input.cacheCreationInputTokens,
+    cost_usd_actual: costUsdActual,
+    ttft_ms: input.ttftMs
   };
 }
+function computeCostUsd(modelId, tokensIn, tokensOut) {
+  if (tokensIn === 0 && tokensOut === 0) return void 0;
+  const profile = tryGetProfile(modelId);
+  if (!profile) return void 0;
+  const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
+  const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
+  return Math.round((inUsd + outUsd) * 1e6) / 1e6;
+}
 // src/ir.ts
 var CallError = class extends Error {
@@ -1090,7 +1105,7 @@ async function call(ir, opts = {}) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
       const responseWithStructured = withStructuredOutput(exec.response, ir);
-      void record({
+      await record({
         handle: initial.handle,
         tokensIn: responseWithStructured.tokens.input,
         tokensOut: responseWithStructured.tokens.output,
@@ -1099,7 +1114,11 @@ async function call(ir, opts = {}) {
         emptyResponse: responseWithStructured.tokens.output === 0,
         toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
         actualModel: targetModel !== initial.target ? targetModel : void 0,
-        responsePreview: responseWithStructured.text.slice(0, 200)
+        mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
+        promptPreview: extractPromptPreview(ir),
+        responsePreview: responseWithStructured.text.slice(0, 200),
+        cacheReadInputTokens: responseWithStructured.tokens.cached,
+        cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
       });
       return {
         handle: initial.handle,
@@ -1124,13 +1143,14 @@ async function call(ir, opts = {}) {
     }
   }
   const latencyMs = Date.now() - start;
-  void record({
+  await record({
     handle: initial.handle,
     tokensIn: 0,
     tokensOut: 0,
     latencyMs,
     success: false,
-    errorType: lastErr?.errorCode
+    errorType: lastErr?.errorCode,
+    promptPreview: extractPromptPreview(ir)
   });
   throw new CallError(
     `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
@@ -1148,6 +1168,13 @@ function compileAndRegister(ir, opts) {
   registerCompile(ir.appId, ir.intent.archetype, ir, result);
   return result;
 }
+function extractPromptPreview(ir) {
+  const turn = ir.currentTurn?.content;
+  if (turn) return turn.slice(0, 200);
+  const lastHist = ir.history?.[ir.history.length - 1]?.content;
+  if (lastHist) return lastHist.slice(0, 200);
+  return void 0;
+}
 function withStructuredOutput(response, ir) {
   if (!ir.constraints?.structuredOutput) return response;
   if (!response.text) return response;

package/dist/{profiles-C5lVqF8_.d.ts → profiles-CDttLtaD.d.ts} RENAMED Viewed

@@ -386,6 +386,38 @@ interface RecordInput {
      * the originally-requested model.
      */
     actualModel?: string;
+    /**
+     * Override `mutations_applied` for this outcome. Set by `call()` when
+     * fallback fires — the served compile's mutations (which actually shaped
+     * the request that went on the wire) replace the initial compile's
+     * mutations (registered against the handle). Without this override, fallback
+     * traffic is attributed to the initial compile's mutations and the brain's
+     * mutation effectiveness stats become misleading.
+     *
+     * alpha.4: extends s11 truth-in-logging to mutations.
+     */
+    mutationsApplied?: string[];
+    /**
+     * Cache read input tokens, when supported by the provider.
+     * - Anthropic: `usage.cache_read_input_tokens`
+     * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
+     * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
+     *
+     * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
+     * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
+     */
+    cacheReadInputTokens?: number;
+    /**
+     * Cache creation input tokens (Anthropic-specific).
+     * `usage.cache_creation_input_tokens`. The first call that pays the 25%
+     * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
+     */
+    cacheCreationInputTokens?: number;
+    /**
+     * Time to first token (ms). Optional; populated when the provider/SDK
+     * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
+     */
+    ttftMs?: number;
 }
 /**

package/dist/{profiles-BiyrF36f.d.mts → profiles-CH_nKPjp.d.mts} RENAMED Viewed

@@ -386,6 +386,38 @@ interface RecordInput {
      * the originally-requested model.
      */
     actualModel?: string;
+    /**
+     * Override `mutations_applied` for this outcome. Set by `call()` when
+     * fallback fires — the served compile's mutations (which actually shaped
+     * the request that went on the wire) replace the initial compile's
+     * mutations (registered against the handle). Without this override, fallback
+     * traffic is attributed to the initial compile's mutations and the brain's
+     * mutation effectiveness stats become misleading.
+     *
+     * alpha.4: extends s11 truth-in-logging to mutations.
+     */
+    mutationsApplied?: string[];
+    /**
+     * Cache read input tokens, when supported by the provider.
+     * - Anthropic: `usage.cache_read_input_tokens`
+     * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
+     * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
+     *
+     * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
+     * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
+     */
+    cacheReadInputTokens?: number;
+    /**
+     * Cache creation input tokens (Anthropic-specific).
+     * `usage.cache_creation_input_tokens`. The first call that pays the 25%
+     * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
+     */
+    cacheCreationInputTokens?: number;
+    /**
+     * Time to first token (ms). Optional; populated when the provider/SDK
+     * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
+     */
+    ttftMs?: number;
 }
 /**

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BiyrF36f.mjs';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
 import './dialect.mjs';

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-C5lVqF8_.js';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
 import './dialect.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.3",
+  "version": "2.0.0-alpha.4",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",