npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.15 → 2.0.0-alpha.17 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.15 → 2.0.0-alpha.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{chunk-SFF5EVTL.mjs → chunk-7MTHFSNY.mjs} +209 -0
package/dist/chunk-NUTC7NUC.mjs +298 -0
package/dist/glassbox/index.d.mts +159 -0
package/dist/glassbox/index.d.ts +159 -0
package/dist/glassbox/index.js +300 -0
package/dist/glassbox/index.mjs +20 -0
package/dist/index.d.mts +4 -2
package/dist/index.d.ts +4 -2
package/dist/index.js +624 -9
package/dist/index.mjs +136 -10
package/dist/{profiles-DTnIzGsA.d.mts → ir-C3P4gDt0.d.mts} +30 -134
package/dist/{profiles-D0y6aLk0.d.ts → ir-CFHU3BUT.d.ts} +30 -134
package/dist/profiles.d.mts +137 -2
package/dist/profiles.d.ts +137 -2
package/dist/profiles.js +209 -0
package/dist/profiles.mjs +1 -1
package/package.json +7 -2

package/dist/index.js CHANGED Viewed

@@ -1475,6 +1475,215 @@ var PROFILES_RAW = [
       // sequential tools — same as V4-Flash
     }
   },
+  // ── OpenAI ──
+  // alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
+  // already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
+  // + lowerOpenAI all existed; profile entries were missing, so the
+  // alpha.10 auto-filter would mark openai-keyed models reachable but
+  // there were no profiles to filter IN. Half-supported is now fully
+  // supported. PB request `openai-provider-profiles` (2026-05-16).
+  //
+  // Profile data verified against developers.openai.com/api/docs/pricing
+  // + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
+  // numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
+  // current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
+  // are the workhorse family. gpt-4.1 + gpt-4o are legacy.
+  //
+  // Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
+  // 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
+  // cliff because it ranks the model down at large-context shapes — the
+  // semantics of "this model is now 2x more expensive" map onto the
+  // existing penalty mechanism. Cost-watcher will catch high-context
+  // spikes empirically; the cliff prevents naive routing into the doubled
+  // pricing zone.
+  {
+    id: "gpt-5.5",
+    verifiedAgainstDocs: "2026-05-17",
+    provider: "openai",
+    status: "current",
+    maxContextTokens: 105e4,
+    maxOutputTokens: 128e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "input_tokens",
+        threshold: 272e3,
+        action: "downgrade_quality_warning",
+        reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
+      }
+    ],
+    costInputPer1m: 5,
+    costOutputPer1m: 30,
+    lowering: {
+      system: { mode: "inline" },
+      // OpenAI caching is implicit (auto-applied to repeated prefixes
+      // ≥1024 tokens for prompt_tokens_details.cached_tokens). No
+      // wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
+      cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
+      tools: { format: "openai" }
+    },
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
+      { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
+    ],
+    strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
+    weaknesses: ["cost", "pricing_cliff_at_272k"],
+    notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
+    // Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
+    // price/positioning). Brain evidence will refine; no telemetry yet.
+    archetypePerf: {
+      critique: 9,
+      plan: 9,
+      generate: 9,
+      ask: 9,
+      extract: 9,
+      transform: 9,
+      hunt: 8,
+      // parallel tool support good but cliff at 272K hurts deep multi-step
+      summarize: 7,
+      // overkill for tolerant archetype
+      classify: 7
+      // overkill; cheaper models cover this
+    }
+  },
+  {
+    id: "gpt-5.4",
+    verifiedAgainstDocs: "2026-05-17",
+    provider: "openai",
+    status: "current",
+    maxContextTokens: 105e4,
+    maxOutputTokens: 128e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [
+      {
+        metric: "input_tokens",
+        threshold: 272e3,
+        action: "downgrade_quality_warning",
+        reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
+      }
+    ],
+    costInputPer1m: 2.5,
+    costOutputPer1m: 15,
+    lowering: {
+      system: { mode: "inline" },
+      cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
+      tools: { format: "openai" }
+    },
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
+      { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
+    ],
+    strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
+    weaknesses: ["pricing_cliff_at_272k"],
+    notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
+    // Anchored to Sonnet 4.6 row (similar price/positioning). Slight
+    // anthropic-side edge on agentic coding per master plan vibe.
+    archetypePerf: {
+      critique: 8,
+      plan: 8,
+      generate: 8,
+      ask: 8,
+      extract: 8,
+      transform: 8,
+      hunt: 7,
+      summarize: 7,
+      classify: 7
+    }
+  },
+  {
+    id: "gpt-5.4-mini",
+    verifiedAgainstDocs: "2026-05-17",
+    provider: "openai",
+    status: "current",
+    maxContextTokens: 4e5,
+    maxOutputTokens: 128e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [],
+    costInputPer1m: 0.75,
+    costOutputPer1m: 4.5,
+    lowering: {
+      system: { mode: "inline" },
+      cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
+      tools: { format: "openai" }
+    },
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
+      { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
+    ],
+    strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
+    weaknesses: ["reasoning_depth"],
+    notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
+    // Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
+    // Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
+    // OpenAI claims strong coding/subagent perf.
+    archetypePerf: {
+      ask: 7,
+      generate: 7,
+      extract: 7,
+      transform: 7,
+      classify: 7,
+      summarize: 7,
+      hunt: 7,
+      plan: 6,
+      critique: 5
+      // reasoning depth gap — frontier models handle this
+    }
+  },
+  {
+    id: "gpt-5.4-nano",
+    verifiedAgainstDocs: "2026-05-17",
+    provider: "openai",
+    status: "current",
+    maxContextTokens: 4e5,
+    maxOutputTokens: 128e3,
+    maxTools: 64,
+    parallelToolCalls: true,
+    structuredOutput: "native",
+    systemPromptMode: "inline",
+    streaming: true,
+    cliffs: [],
+    costInputPer1m: 0.2,
+    costOutputPer1m: 1.25,
+    lowering: {
+      system: { mode: "inline" },
+      cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
+      tools: { format: "openai" }
+    },
+    recovery: [
+      { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
+      { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
+    ],
+    strengths: ["cost", "speed", "volume", "structured_output"],
+    weaknesses: ["reasoning_depth", "no_computer_use"],
+    notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
+    // Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
+    // $0.20/$1.25). Slightly more expensive than Flash-Lite but with
+    // OpenAI brand reliability. Good fit for classify/summarize floor.
+    archetypePerf: {
+      classify: 7,
+      summarize: 6,
+      ask: 6,
+      transform: 6,
+      extract: 6,
+      generate: 5,
+      hunt: 5,
+      plan: 4,
+      critique: 3
+      // not for reasoning archetypes
+    }
+  },
   // ── Auto-onboarded (UNVERIFIED) ──
   // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
   // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
@@ -2563,10 +2772,14 @@ var loadChainsFromBrain = createBrainQueryCache({
 // src/fallback.ts
 var STARTER_CHAINS = {
   // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
+  // alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
+  // archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
+  // both Anthropic + Google are unreachable (consumer adds only OpenAI key).
   critique: [
     "claude-opus-4-7",
     "claude-sonnet-4-6",
-    "gemini-2.5-pro"
+    "gemini-2.5-pro",
+    "gpt-5.5"
   ],
   // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
   // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
@@ -2577,25 +2790,29 @@ var STARTER_CHAINS = {
     "deepseek-v4-pro"
   ],
   // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
-  // Flash floor for the open-posture chain.
+  // gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
+  // gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
   generate: [
     "claude-sonnet-4-6",
     "claude-haiku-4-5",
     "gemini-2.5-pro",
-    "gemini-2.5-flash"
+    "gpt-5.4-mini"
   ],
   ask: [
     "claude-sonnet-4-6",
     "claude-haiku-4-5",
     "gemini-2.5-pro",
-    "gemini-2.5-flash"
+    "gpt-5.4-mini"
   ],
   // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
-  // DeepSeek skipped (no brain evidence). Floor at Haiku.
+  // DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
+  // appended as third-provider extract floor (archetypePerf=8, native
+  // structured-output support).
   extract: [
     "claude-sonnet-4-6",
     "claude-haiku-4-5",
-    "gemini-2.5-pro"
+    "gemini-2.5-pro",
+    "gpt-5.4"
   ],
   // Forgiving archetype — Sonnet primary but Flash safely floors it.
   transform: [
@@ -2708,9 +2925,321 @@ function ensureCrossProviderTail(opts) {
   return { chain };
 }
+// src/glassbox/types.ts
+var GLASSBOX_STREAM_TTL_MS = 6e4;
+// src/glassbox/pubsub-memory.ts
+var MemoryPubSub = class {
+  subscribers = /* @__PURE__ */ new Map();
+  async publish(traceId, event) {
+    const subs = this.subscribers.get(traceId);
+    if (!subs || subs.size === 0) return;
+    for (const sub of subs) {
+      if (sub.closed) continue;
+      try {
+        sub.controller.enqueue(event);
+      } catch {
+        sub.closed = true;
+        continue;
+      }
+      this.refreshTtl(traceId, sub);
+    }
+  }
+  subscribe(traceId) {
+    const self = this;
+    let sub;
+    return new ReadableStream({
+      start(controller) {
+        sub = {
+          controller,
+          ttlTimer: setTimeout(() => {
+            self.closeSubscriber(traceId, sub);
+          }, GLASSBOX_STREAM_TTL_MS),
+          closed: false
+        };
+        let set = self.subscribers.get(traceId);
+        if (!set) {
+          set = /* @__PURE__ */ new Set();
+          self.subscribers.set(traceId, set);
+        }
+        set.add(sub);
+      },
+      cancel() {
+        if (sub) self.removeSubscriber(traceId, sub);
+      }
+    });
+  }
+  /**
+   * Refresh the rolling TTL for a subscriber after an event lands. Replaces
+   * the existing timer with a fresh 60s one.
+   */
+  refreshTtl(traceId, sub) {
+    clearTimeout(sub.ttlTimer);
+    sub.ttlTimer = setTimeout(() => {
+      this.closeSubscriber(traceId, sub);
+    }, GLASSBOX_STREAM_TTL_MS);
+  }
+  /**
+   * Close the subscriber's stream cleanly and remove from the fan-out set.
+   * Idempotent — safe to call multiple times.
+   */
+  closeSubscriber(traceId, sub) {
+    if (sub.closed) return;
+    sub.closed = true;
+    clearTimeout(sub.ttlTimer);
+    try {
+      sub.controller.close();
+    } catch {
+    }
+    this.removeSubscriber(traceId, sub);
+  }
+  removeSubscriber(traceId, sub) {
+    clearTimeout(sub.ttlTimer);
+    const set = this.subscribers.get(traceId);
+    if (!set) return;
+    set.delete(sub);
+    if (set.size === 0) this.subscribers.delete(traceId);
+  }
+  /**
+   * Test-only reset. Tears down all subscribers, clears all state. Calling
+   * outside of tests is harmless but cancels every active stream.
+   */
+  _reset() {
+    for (const [, set] of this.subscribers) {
+      for (const sub of set) {
+        this.closeSubscriber("", sub);
+      }
+    }
+    this.subscribers.clear();
+  }
+};
+// src/glassbox/pubsub-upstash.ts
+var UpstashPubSub = class {
+  url;
+  token;
+  fetchImpl;
+  blockMs;
+  maxLen;
+  constructor(cfg) {
+    this.url = cfg.url.replace(/\/$/, "");
+    this.token = cfg.token;
+    this.fetchImpl = cfg.fetchImpl ?? globalThis.fetch.bind(globalThis);
+    this.blockMs = cfg.blockMs ?? 100;
+    this.maxLen = cfg.maxLen ?? 100;
+  }
+  async publish(traceId, event) {
+    const key = streamKey(traceId);
+    const payload = JSON.stringify(event);
+    await this.cmd([
+      "XADD",
+      key,
+      "MAXLEN",
+      "~",
+      String(this.maxLen),
+      "*",
+      "event",
+      payload
+    ]);
+    await this.cmd(["EXPIRE", key, String(Math.ceil(GLASSBOX_STREAM_TTL_MS / 1e3))]);
+  }
+  subscribe(traceId) {
+    const key = streamKey(traceId);
+    const self = this;
+    let cursor = "$";
+    let cancelled = false;
+    let ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
+    return new ReadableStream({
+      async start(controller) {
+        try {
+          while (!cancelled && Date.now() < ttlDeadline) {
+            const resp = await self.cmd([
+              "XREAD",
+              "BLOCK",
+              String(self.blockMs),
+              "STREAMS",
+              key,
+              cursor
+            ]);
+            if (cancelled) break;
+            const parsed = parseXReadResult(resp.result);
+            if (parsed.entries.length === 0) {
+              continue;
+            }
+            for (const entry of parsed.entries) {
+              const evt = decodeEvent(entry.fields);
+              if (evt) {
+                try {
+                  controller.enqueue(evt);
+                } catch {
+                  cancelled = true;
+                  break;
+                }
+              }
+              cursor = entry.id;
+            }
+            ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
+          }
+        } catch (err) {
+          if (!cancelled) {
+            try {
+              controller.error(err);
+            } catch {
+            }
+            return;
+          }
+        }
+        try {
+          controller.close();
+        } catch {
+        }
+      },
+      cancel() {
+        cancelled = true;
+      }
+    });
+  }
+  async cmd(args) {
+    const res = await this.fetchImpl(this.url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${this.token}`,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify(args)
+    });
+    if (!res.ok) {
+      throw new Error(`Upstash ${args[0]} failed: HTTP ${res.status}`);
+    }
+    const json = await res.json();
+    if (json.error) {
+      throw new Error(`Upstash ${args[0]} failed: ${json.error}`);
+    }
+    return json;
+  }
+};
+function streamKey(traceId) {
+  return `glassbox:trace:${traceId}`;
+}
+function decodeEvent(fields) {
+  const raw = fields["event"];
+  if (!raw) return void 0;
+  try {
+    const parsed = JSON.parse(raw);
+    if (typeof parsed.kind === "string" && typeof parsed.at === "number") {
+      return parsed;
+    }
+    return void 0;
+  } catch {
+    return void 0;
+  }
+}
+function parseXReadResult(raw) {
+  if (!Array.isArray(raw)) return { entries: [] };
+  const entries = [];
+  for (const stream of raw) {
+    if (!Array.isArray(stream) || stream.length < 2) continue;
+    const streamEntries = stream[1];
+    if (!Array.isArray(streamEntries)) continue;
+    for (const entry of streamEntries) {
+      if (!Array.isArray(entry) || entry.length < 2) continue;
+      const id = String(entry[0]);
+      const flat = entry[1];
+      if (!Array.isArray(flat)) continue;
+      const fields = {};
+      for (let i = 0; i < flat.length; i += 2) {
+        const k = flat[i];
+        const v = flat[i + 1];
+        if (typeof k === "string") fields[k] = String(v ?? "");
+      }
+      entries.push({ id, fields });
+    }
+  }
+  return { entries };
+}
+// src/glassbox/emit.ts
+var activePubSub;
+function getPubSub() {
+  if (activePubSub) return activePubSub;
+  const url = readEnv("UPSTASH_REDIS_URL");
+  const token = readEnv("UPSTASH_REDIS_TOKEN");
+  if (url && token) {
+    activePubSub = new UpstashPubSub({ url, token });
+  } else {
+    activePubSub = new MemoryPubSub();
+  }
+  return activePubSub;
+}
+function readEnv(key) {
+  try {
+    if (typeof process !== "undefined" && process.env) {
+      const v = process.env[key];
+      return v && v.trim() !== "" ? v : void 0;
+    }
+  } catch {
+  }
+  return void 0;
+}
+function emitGlassboxEvent(traceId, kind, data) {
+  if (!traceId) return;
+  const event = { kind, at: Date.now(), data };
+  const ps = getPubSub();
+  try {
+    const p = ps.publish(traceId, event);
+    if (p && typeof p.then === "function") {
+      p.catch(() => {
+      });
+    }
+  } catch {
+  }
+}
+function emitCompileStart(traceId, data) {
+  emitGlassboxEvent(traceId, "compile.start", data);
+}
+function emitCompileDone(traceId, data) {
+  emitGlassboxEvent(traceId, "compile.done", data);
+}
+function emitExecuteAttempt(traceId, data) {
+  emitGlassboxEvent(traceId, "execute.attempt", data);
+}
+function emitExecuteSuccess(traceId, data) {
+  emitGlassboxEvent(traceId, "execute.success", data);
+}
+function emitAdvisoryFired(traceId, data) {
+  emitGlassboxEvent(traceId, "advisory.fired", data);
+}
+function emitFallbackWalked(traceId, data) {
+  emitGlassboxEvent(traceId, "fallback.walked", data);
+}
 // src/call.ts
 async function call(ir, opts = {}) {
+  const traceId = generateTraceId();
+  safeEmit(
+    () => emitCompileStart(traceId, {
+      appId: ir.appId,
+      archetype: ir.intent.archetype,
+      models: ir.models
+    })
+  );
   const initial = compileAndRegister(ir, opts);
+  safeEmit(
+    () => emitCompileDone(traceId, {
+      target: initial.target,
+      provider: initial.provider,
+      fallbackChain: initial.fallbackChain,
+      tokensIn: initial.tokensIn,
+      estimatedCostUsd: initial.estimatedCostUsd,
+      mutationsApplied: initial.mutationsApplied,
+      advisories: initial.advisories
+    })
+  );
+  for (const adv of initial.advisories) {
+    safeEmit(
+      () => emitAdvisoryFired(traceId, { code: adv.code, message: adv.message })
+    );
+  }
   const start = Date.now();
   const attempts = [];
   const rawTargets = [initial.target, ...initial.fallbackChain];
@@ -2765,6 +3294,47 @@ async function call(ir, opts = {}) {
       }
     }
   }
+  let policyBlockedFiltered;
+  if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
+    const blocked = new Set(opts.policy.blockedModels);
+    const filtered = [];
+    const dropped = [];
+    for (const t of targetsToTry) {
+      if (blocked.has(t)) {
+        dropped.push(t);
+      } else {
+        filtered.push(t);
+      }
+    }
+    if (dropped.length > 0) {
+      policyBlockedFiltered = dropped;
+      targetsToTry = filtered;
+    }
+    if (targetsToTry.length === 0) {
+      const latencyMs2 = Date.now() - start;
+      await record({
+        handle: initial.handle,
+        tokensIn: 0,
+        tokensOut: 0,
+        latencyMs: latencyMs2,
+        success: false,
+        errorType: "all_blocked_by_policy",
+        promptPreview: extractPromptPreview(ir)
+      });
+      const blockedAttempts = dropped.map((m) => ({
+        model: m,
+        status: "terminal",
+        errorCode: "blocked_by_policy",
+        message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
+      }));
+      throw new CallError(
+        `call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
+        blockedAttempts,
+        void 0,
+        "all_blocked_by_policy"
+      );
+    }
+  }
   let activeCompile = initial;
   let lastErr;
   const failedProviders = /* @__PURE__ */ new Set();
@@ -2800,6 +3370,9 @@ async function call(ir, opts = {}) {
         continue;
       }
     }
+    safeEmit(
+      () => emitExecuteAttempt(traceId, { model: targetModel, attemptIndex: i })
+    );
     const exec = await execute(activeCompile.request, {
       apiKeys: opts.apiKeys,
       fetchImpl: opts.fetchImpl,
@@ -2809,6 +3382,14 @@ async function call(ir, opts = {}) {
     if (validated.ok) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
+      safeEmit(
+        () => emitExecuteSuccess(traceId, {
+          model: targetModel,
+          tokensIn: validated.response.tokens.input,
+          tokensOut: validated.response.tokens.output,
+          latencyMs: latencyMs2
+        })
+      );
       await record({
         handle: initial.handle,
         tokensIn: validated.response.tokens.input,
@@ -2825,6 +3406,20 @@ async function call(ir, opts = {}) {
         cacheCreationInputTokens: validated.response.tokens.cacheCreated
       });
       const fellOver = targetModel !== initial.target;
+      const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
+      if (fellOver) {
+        const firstFailed = attempts.find((a) => a.status !== "success");
+        if (firstFailed) {
+          safeEmit(
+            () => emitFallbackWalked(traceId, {
+              from: initial.target,
+              to: targetModel,
+              reason: fallbackReason ?? "unknown",
+              attempt: firstFailed
+            })
+          );
+        }
+      }
       return {
         handle: initial.handle,
         actualModel: targetModel,
@@ -2836,8 +3431,10 @@ async function call(ir, opts = {}) {
         attempts,
         servedBy: targetModel,
         fellOverFrom: fellOver ? initial.target : void 0,
-        fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0,
-        unreachableFiltered
+        fallbackReason,
+        unreachableFiltered,
+        policyBlockedFiltered,
+        traceId
       };
     }
     attempts.push({
@@ -2866,8 +3463,9 @@ async function call(ir, opts = {}) {
     promptPreview: extractPromptPreview(ir)
   });
   const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
+  const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
   throw new CallError(
-    `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
+    `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
     attempts,
     lastErr?.status,
     lastErr?.errorCode
@@ -2933,6 +3531,23 @@ function normalizeFallbackReason(attempts) {
   if (code === "auth" || code === "auth_inferred") return "provider_auth_failed";
   return "provider_error";
 }
+function generateTraceId() {
+  try {
+    const g = globalThis;
+    if (g.crypto && typeof g.crypto.randomUUID === "function") {
+      return g.crypto.randomUUID();
+    }
+  } catch {
+  }
+  const hex = (n) => Math.floor(Math.random() * Math.pow(16, n)).toString(16).padStart(n, "0");
+  return `${hex(8)}-${hex(4)}-${hex(4)}-${hex(4)}-${hex(12)}`;
+}
+function safeEmit(fn) {
+  try {
+    fn();
+  } catch {
+  }
+}
 // src/oracle.ts
 var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];