npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.3 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +198 -0
package/dist/chunk-5TI6PNSK.mjs +95 -0
package/dist/chunk-MBEI5UOM.mjs +409 -0
package/dist/dialect.d.mts +99 -0
package/dist/dialect.d.ts +99 -0
package/dist/dialect.js +127 -0
package/dist/dialect.mjs +22 -0
package/dist/index.d.mts +238 -0
package/dist/index.d.ts +238 -0
package/dist/index.js +1804 -0
package/dist/index.mjs +1286 -0
package/dist/profiles-BiyrF36f.d.mts +489 -0
package/dist/profiles-C5lVqF8_.d.ts +489 -0
package/dist/profiles.d.mts +2 -0
package/dist/profiles.d.ts +2 -0
package/dist/profiles.js +437 -0
package/dist/profiles.mjs +14 -0
package/package.json +59 -0

package/dist/profiles.js ADDED Viewed

@@ -0,0 +1,437 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/profiles.ts
+var profiles_exports = {};
+__export(profiles_exports, {
+  ALIASES: () => ALIASES,
+  allProfiles: () => allProfiles,
+  getProfile: () => getProfile,
+  profilesByProvider: () => profilesByProvider,
+  tryGetProfile: () => tryGetProfile
+});
+module.exports = __toCommonJS(profiles_exports);
+var ANTHROPIC_LOWERING_BASE = {
+  system: { mode: "inline" },
+  cache: {
+    strategy: "cache_control",
+    minTokens: 1024,
+    discount: 0.1,
+    ttlSeconds: 300
+  },
+  tools: { format: "anthropic" }
+};
+var GOOGLE_LOWERING_BASE = {
+  system: { mode: "separate", field: "systemInstruction" },
+  cache: {
+    strategy: "cachedContent",
+    minTokens: 4096,
+    discount: 0.25,
+    ttlSeconds: 3600
+  },
+  tools: { format: "google" }
+};
+var PROFILES_RAW = [
+  // ── Anthropic ──
+  {
+    id: "claude-opus-4-7",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "anthropic",
+    status: "current",
+    maxContextTokens: 1e6,
+    maxOutputTokens: 128e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "grammar",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [],
+    costInputPer1m: 5,
+    costOutputPer1m: 25,
+    lowering: ANTHROPIC_LOWERING_BASE,
+    recovery: [
+      {
+        signal: "rate_limit",
+        action: "escalate",
+        reason: "429 from Anthropic \u2014 escalate to fallback chain"
+      },
+      {
+        signal: "model_not_found",
+        action: "escalate",
+        reason: "Model deprecated/renamed \u2014 escalate (L-061)"
+      }
+    ],
+    strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
+    weaknesses: ["cost", "latency"],
+    notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output."
+  },
+  {
+    id: "claude-opus-4-6",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "anthropic",
+    status: "legacy",
+    maxContextTokens: 1e6,
+    maxOutputTokens: 128e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "grammar",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [],
+    costInputPer1m: 5,
+    costOutputPer1m: 25,
+    lowering: ANTHROPIC_LOWERING_BASE,
+    recovery: [
+      {
+        signal: "rate_limit",
+        action: "escalate",
+        reason: "429 from Anthropic \u2014 escalate to fallback chain"
+      },
+      {
+        signal: "model_not_found",
+        action: "escalate",
+        reason: "Model deprecated/renamed \u2014 escalate (L-061)"
+      }
+    ],
+    strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
+    weaknesses: ["cost", "latency"],
+    notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only)."
+  },
+  {
+    id: "claude-sonnet-4-6",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "anthropic",
+    status: "current",
+    maxContextTokens: 1e6,
+    maxOutputTokens: 64e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "grammar",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [],
+    costInputPer1m: 3,
+    costOutputPer1m: 15,
+    lowering: ANTHROPIC_LOWERING_BASE,
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" },
+      { signal: "model_not_found", action: "escalate", reason: "Deprecated \u2014 escalate (L-061)" }
+    ],
+    strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
+    weaknesses: [],
+    notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output."
+  },
+  {
+    id: "claude-haiku-4-5",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "anthropic",
+    status: "current",
+    maxContextTokens: 2e5,
+    maxOutputTokens: 64e3,
+    maxTools: 32,
+    parallelToolCalls: true,
+    structuredOutput: "grammar",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "tool_count",
+        threshold: 16,
+        action: "drop_to_top_relevant",
+        reason: "Haiku reliability degrades above ~16 tools"
+      }
+    ],
+    costInputPer1m: 1,
+    costOutputPer1m: 5,
+    lowering: ANTHROPIC_LOWERING_BASE,
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to Sonnet" }
+    ],
+    strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
+    weaknesses: ["complex_reasoning", "large_tool_sets"],
+    notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`."
+  },
+  // ── Google ──
+  {
+    id: "gemini-2.5-flash",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "google",
+    status: "current",
+    maxContextTokens: 1048576,
+    maxOutputTokens: 65535,
+    maxTools: 128,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "separate",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "input_tokens",
+        threshold: 8e3,
+        action: "downgrade_quality_warning",
+        reason: "Quality degrades significantly above ~8K context tokens"
+      },
+      {
+        metric: "tool_count",
+        threshold: 20,
+        action: "drop_to_top_relevant",
+        reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
+      },
+      {
+        metric: "thinking_with_short_output",
+        threshold: 1,
+        action: "force_thinking_budget_zero",
+        reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
+      },
+      {
+        // s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
+        // tt-intelligence/summarize/gemini-2.5-flash with tools offered.
+        // v1's disable_thinking_for_short_output already fired and didn't
+        // help — disabling thinking is necessary but not sufficient. Tools
+        // present + summarize intent confuses Flash into a no-output state
+        // (likely tool-decision purgatory). Strip tools entirely for this
+        // archetype on this model.
+        metric: "tool_count",
+        threshold: 1,
+        whenIntent: "summarize",
+        action: "strip_tools",
+        reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
+      }
+    ],
+    costInputPer1m: 0.3,
+    costOutputPer1m: 2.5,
+    lowering: {
+      ...GOOGLE_LOWERING_BASE,
+      thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
+    },
+    recovery: [
+      {
+        signal: "empty_response_after_tool",
+        action: "retry_with_params",
+        retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
+        maxRetries: 1,
+        reason: "Known: empty after tool result \u2014 retry with thinking off"
+      },
+      {
+        signal: "empty_response",
+        action: "retry_with_params",
+        retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
+        maxRetries: 1,
+        reason: "Empty response \u2014 try with thinking off"
+      },
+      {
+        signal: "malformed_function_call",
+        action: "escalate",
+        reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
+      }
+    ],
+    strengths: ["speed", "volume", "classification", "1m_context", "cost"],
+    weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
+    notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs."
+  },
+  {
+    id: "gemini-2.5-pro",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "google",
+    status: "current",
+    maxContextTokens: 1048576,
+    maxOutputTokens: 65535,
+    maxTools: 128,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "separate",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "input_tokens",
+        threshold: 2e5,
+        action: "downgrade_quality_warning",
+        reason: "Pricing doubles above 200K: input $1.25\u2192$2.50/M, output $10\u2192$15/M"
+      }
+    ],
+    costInputPer1m: 1.25,
+    costOutputPer1m: 10,
+    lowering: {
+      ...GOOGLE_LOWERING_BASE,
+      thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
+    },
+    recovery: [
+      {
+        signal: "malformed_function_call",
+        action: "escalate",
+        reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
+      }
+    ],
+    strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
+    weaknesses: ["pricing_above_200k"]
+  },
+  {
+    id: "gemini-3.1-pro-preview",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "google",
+    status: "preview",
+    maxContextTokens: 1048576,
+    maxOutputTokens: 65535,
+    maxTools: 128,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "separate",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "input_tokens",
+        threshold: 2e5,
+        action: "downgrade_quality_warning",
+        reason: "Pricing doubles above 200K: input $2\u2192$4/M, output $12\u2192$18/M"
+      }
+    ],
+    costInputPer1m: 2,
+    costOutputPer1m: 12,
+    lowering: {
+      ...GOOGLE_LOWERING_BASE,
+      cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
+      thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
+    },
+    recovery: [
+      {
+        signal: "malformed_function_call",
+        action: "escalate",
+        reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
+      }
+    ],
+    strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
+    weaknesses: ["cost", "preview_status", "pricing_above_200k"],
+    notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA."
+  },
+  // ── DeepSeek ──
+  // 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
+  // to `deepseek-v4-flash` non-thinking mode. Old kgauto profile claimed 64k
+  // context + $0.27/$1.10 — actual is 1M context + $0.14/$0.28. Now modeled
+  // as: V4-Flash + V4-Pro as canonical profiles; deepseek-chat and
+  // deepseek-reasoner registered as aliases (see ALIASES below).
+  {
+    id: "deepseek-v4-flash",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "deepseek",
+    status: "current",
+    maxContextTokens: 1e6,
+    maxOutputTokens: 384e3,
+    maxTools: 16,
+    parallelToolCalls: false,
+    structuredOutput: "native",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "tool_count",
+        threshold: 1,
+        action: "drop_to_top_relevant",
+        reason: "Sequential tool calls only \u2014 L-040"
+      }
+    ],
+    costInputPer1m: 0.14,
+    costOutputPer1m: 0.28,
+    lowering: {
+      system: { mode: "inline" },
+      cache: { strategy: "unsupported" },
+      tools: { format: "deepseek" }
+    },
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
+    ],
+    strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
+    weaknesses: ["parallel_tools", "large_tool_sets"],
+    notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES."
+  },
+  {
+    id: "deepseek-v4-pro",
+    verifiedAgainstDocs: "2026-05-08",
+    provider: "deepseek",
+    status: "current",
+    maxContextTokens: 1e6,
+    maxOutputTokens: 384e3,
+    maxTools: 16,
+    parallelToolCalls: false,
+    structuredOutput: "native",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "tool_count",
+        threshold: 1,
+        action: "drop_to_top_relevant",
+        reason: "Sequential tool calls only \u2014 L-040"
+      }
+    ],
+    // Profile carries REGULAR pricing, not the 75%-off promo (ends 2026-05-31).
+    // Under-estimating cost is worse than over-estimating for budget caps.
+    costInputPer1m: 1.74,
+    costOutputPer1m: 3.48,
+    lowering: {
+      system: { mode: "inline" },
+      cache: { strategy: "unsupported" },
+      tools: { format: "deepseek" }
+    },
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
+    ],
+    strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
+    weaknesses: ["parallel_tools", "large_tool_sets"],
+    notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking."
+  }
+];
+var ALIASES = {
+  // DeepSeek's own model routing — both names served by V4-Flash.
+  "deepseek-chat": "deepseek-v4-flash",
+  "deepseek-reasoner": "deepseek-v4-flash",
+  // Legacy kgauto typo — actual API alias is dash-form (alpha.1 had dot).
+  "claude-haiku-4.5": "claude-haiku-4-5"
+};
+function canonicalId(id) {
+  return ALIASES[id] ?? id;
+}
+var PROFILE_INDEX = new Map(
+  PROFILES_RAW.map((p) => [p.id, p])
+);
+function getProfile(id) {
+  const canonical = canonicalId(id);
+  const p = PROFILE_INDEX.get(canonical);
+  if (!p) {
+    const known = [...PROFILE_INDEX.keys(), ...Object.keys(ALIASES)].join(", ");
+    throw new Error(`Unknown model id: "${id}". Known: ${known}`);
+  }
+  return p;
+}
+function tryGetProfile(id) {
+  return PROFILE_INDEX.get(canonicalId(id));
+}
+function allProfiles() {
+  return PROFILES_RAW;
+}
+function profilesByProvider(provider) {
+  return PROFILES_RAW.filter((p) => p.provider === provider);
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  ALIASES,
+  allProfiles,
+  getProfile,
+  profilesByProvider,
+  tryGetProfile
+});

package/dist/profiles.mjs ADDED Viewed

@@ -0,0 +1,14 @@
+import {
+  ALIASES,
+  allProfiles,
+  getProfile,
+  profilesByProvider,
+  tryGetProfile
+} from "./chunk-MBEI5UOM.mjs";
+export {
+  ALIASES,
+  allProfiles,
+  getProfile,
+  profilesByProvider,
+  tryGetProfile
+};

package/package.json ADDED Viewed

@@ -0,0 +1,59 @@
+{
+  "name": "@warmdrift/kgauto-compiler",
+  "version": "2.0.0-alpha.3",
+  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
+  "main": "./dist/index.js",
+  "module": "./dist/index.mjs",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.mjs",
+      "require": "./dist/index.js"
+    },
+    "./dialect": {
+      "types": "./dist/dialect.d.ts",
+      "import": "./dist/dialect.mjs",
+      "require": "./dist/dialect.js"
+    },
+    "./profiles": {
+      "types": "./dist/profiles.d.ts",
+      "import": "./dist/profiles.mjs",
+      "require": "./dist/profiles.js"
+    }
+  },
+  "files": ["dist", "README.md"],
+  "scripts": {
+    "build": "tsup src/index.ts src/dialect.ts src/profiles.ts --format cjs,esm --dts --clean",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "typecheck": "tsc --noEmit",
+    "prepublishOnly": "npm run typecheck && npm run test && npm run build"
+  },
+  "keywords": [
+    "ai",
+    "llm",
+    "prompt-compiler",
+    "model-router",
+    "anthropic",
+    "openai",
+    "gemini",
+    "deepseek",
+    "prompt-cache",
+    "self-improving"
+  ],
+  "license": "MIT",
+  "dependencies": {},
+  "peerDependencies": {
+    "js-tiktoken": ">=1.0.0"
+  },
+  "peerDependenciesMeta": {
+    "js-tiktoken": { "optional": true }
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.0",
+    "tsup": "^8.4.0",
+    "typescript": "^5.7.0",
+    "vitest": "^3.1.0"
+  }
+}