npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.19 → 2.0.0-alpha.21 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.19 → 2.0.0-alpha.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/glassbox/index.d.mts +3 -3
package/dist/glassbox/index.d.ts +3 -3
package/dist/glassbox-routes/index.d.mts +2 -2
package/dist/glassbox-routes/index.d.ts +2 -2
package/dist/index.d.mts +159 -7
package/dist/index.d.ts +159 -7
package/dist/index.js +381 -62
package/dist/index.mjs +373 -62
package/dist/{ir-C3P4gDt0.d.mts → ir-CruZBtpK.d.mts} +162 -1
package/dist/{ir-CFHU3BUT.d.ts → ir-Wr5lc8Mi.d.ts} +162 -1
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/dist/{types-xeklorHU.d.ts → types-BiZKJU41.d.ts} +1 -1
package/dist/{types-DWF6mPGg.d.mts → types-zk238uNL.d.mts} +1 -1
package/package.json +1 -1

package/dist/{ir-C3P4gDt0.d.mts → ir-CruZBtpK.d.mts} RENAMED Viewed

@@ -90,6 +90,29 @@ interface Constraints {
     maxResponseWords?: number;
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
+    /**
+     * alpha.20: consumer-declared tool-orchestration shape for this call.
+     * - 'parallel': model may fire multiple tool calls per step (current
+     *   default behavior; the L-040 cliff applies — DeepSeek's
+     *   `tool_count >= 1` cliff trims tools because parallel-tool throughput
+     *   collapses to sequential semantics).
+     * - 'sequential': consumer commits to one tool call per step (the agentic
+     *   loop pattern). DeepSeek V4-Flash + V4-Pro can compete cleanly in
+     *   this mode — the L-040 cliff is silenced and the hunt chain shifts
+     *   to a DeepSeek-tier-1 ordering.
+     * - 'either': consumer doesn't care; library picks the parallel chain
+     *   (status-quo default) and may upgrade to brain-driven per-mode perf
+     *   selection in a future release.
+     *
+     * Affects:
+     *   - Chain composition for `archetype: 'hunt'` (see
+     *     `getDefaultFallbackChain` and `STARTER_CHAINS_BY_MODE`).
+     *   - L-040 cliff in `passApplyCliffs` (silent when 'sequential').
+     *
+     * Default (when undefined): equivalent to 'parallel' for back-compat
+     * with every pre-alpha.20 caller.
+     */
+    toolOrchestration?: 'parallel' | 'sequential' | 'either';
 }
 /**
  * Cache marker policy for the messages array (history + currentTurn).
@@ -308,6 +331,21 @@ interface BestPracticeAdvisory {
     suggestion?: string;
     /** Optional: link to docs anchor for more context. */
     docsUrl?: string;
+    /**
+     * alpha.20 — actionable category for routing/dashboard surfacing. When set,
+     * the brain persists this as `recommendation_type` on
+     * `compile_outcome_advisories` so consumers can filter "show me all
+     * client-side issues that are caching-fix recommendations." Optional;
+     * absent on legacy or uncategorized rules.
+     *
+     * - `'model-swap'`          — swap to a different model fixes this
+     * - `'prompt-fix'`          — restructure prompt (sections, tools, format)
+     * - `'caching-fix'`         — add cache markers (system or history)
+     * - `'no-ai-needed'`        — the call shouldn't be using an AI model
+     * - `'tier-down'`           — current model is overkill for this archetype
+     * - `'architecture-change'` — the issue isn't fixable at the kgauto layer
+     */
+    recommendationType?: 'model-swap' | 'prompt-fix' | 'caching-fix' | 'no-ai-needed' | 'tier-down' | 'architecture-change';
 }
 interface CompileResult {
     /** Unique handle for this call — pass to record() to correlate the outcome. */
@@ -359,6 +397,14 @@ interface CompileResult {
          * 0 when history is empty. alpha.7.
          */
         historyTokensTotal: number;
+        /**
+         * alpha.20 E3. Consumer-declared tool-orchestration mode for this call,
+         * mirrored from `ir.constraints.toolOrchestration` for downstream
+         * observability (Glass-Box panel, brain telemetry, advisor logs).
+         * Undefined when the consumer hadn't adopted the constraint yet —
+         * treat as 'parallel' equivalent for back-compat.
+         */
+        toolOrchestration?: 'parallel' | 'sequential' | 'either';
     };
 }
 /**
@@ -634,6 +680,121 @@ interface RecordInput {
      * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
      */
     ttftMs?: number;
+    /**
+     * alpha.20 — advisories fired at compile() time. Persisted to the brain's
+     * `compile_outcome_advisories` sibling table via a second POST that fires
+     * AFTER the primary outcome insert succeeds. Best-effort: a failed
+     * advisory POST is logged via onError but does NOT throw or roll back the
+     * primary outcome row.
+     *
+     * Pass `result.advisories` from the CompileResult directly. The brain
+     * uses these to compute the `empty_rate_clean` comparator (rows with
+     * zero advisories fired) so consumers can distinguish "model is bad"
+     * from "client sent a bloated/uncached/malformed request."
+     *
+     * Empty array / undefined → no second POST fires.
+     */
+    advisories?: BestPracticeAdvisory[];
+}
+/**
+ * alpha.20 Entry 4: kinds of consumer-declared outcomes feeding the quality
+ * loop. Surfaces in `recordOutcome()` as the verdict the consumer's UX is
+ * forwarding to the brain.
+ *
+ *   - `approved`  user explicitly approved (thumbs up, "looks good", accepted)
+ *   - `rejected`  user explicitly rejected (thumbs down, "redo", discarded)
+ *   - `partial`   accepted with edits or partial use (mixed signal)
+ *   - `engaged`   user engaged with the output (copy/scroll/dwell)
+ *   - `abandoned` user abandoned the response (closed, navigated away)
+ *   - `unknown`   verdict could not be inferred — recorded for completeness
+ */
+type OutcomeKind = 'approved' | 'rejected' | 'partial' | 'engaged' | 'abandoned' | 'unknown';
+/**
+ * Input to `recordOutcome()` — consumer's verdict on a previously-compiled
+ * call. Joins to the original `compile_outcomes` row via outcomeId,
+ * enabling per-(model, archetype) approve-rate measurement once N ≥ 10
+ * outcomes accumulate.
+ */
+interface RecordOutcomeInput {
+    /** Joins to compile_outcomes.id. Returned by compile() via CompileResult.outcomeId. */
+    outcomeId: number | string;
+    /** What did the user / system do with this output? */
+    outcome: OutcomeKind;
+    /** Optional 1-5 user rating (e.g., thumbs up/down with intensity, NPS-style). */
+    rating?: 1 | 2 | 3 | 4 | 5;
+    /** Optional free-text reason (e.g., user-typed feedback, system-inferred cause). */
+    reason?: string;
+    /**
+     * Optional model-reported confidence at compile time (0..1). Used for
+     * Brier-score calibration in later phases (alpha.21+) — pair this with
+     * the actual `outcome` to compute calibration error.
+     */
+    observedConfidence?: number;
+}
+/**
+ * Return shape of `recordOutcome()`. Never throws — persistence failures
+ * surface as `ok: false` with a stable `reason` string.
+ */
+interface OutcomeResult {
+    /** True when the POST landed (2xx). False when brain not configured or POST failed. */
+    ok: boolean;
+    /** Stable reason code when ok=false. One of: 'brain_not_configured' | 'persistence_failed'. */
+    reason?: string;
+}
+/**
+ * alpha.21 (s78 Entry 1): provenance label on a chain entry. Surfaces WHY
+ * an entry sits where it sits so consumers can distinguish:
+ *
+ *   - 'measured'         brain has N>=10 rows with a measurable quality
+ *                        outcome backing this placement. The number lives on
+ *                        `ChainEntry.n`.
+ *   - 'capability-fact'  inclusion or exclusion driven by a published or
+ *                        measured CAPABILITY (L-040 cliff, ctx window cap,
+ *                        structured-output support). Not an opinion — a
+ *                        fact about what the model can/can't do.
+ *   - 'judgment'         engineer's pick, no measured backing yet. Cold-start
+ *                        prior; entirely valid until evidence accumulates.
+ *
+ * "Judgment" is HONEST, not a downgrade. Most of `STARTER_CHAINS` lands here
+ * in alpha.21 — that's the point: consumers can SEE the grounding gap and
+ * prioritize the measurement work that would graduate them to 'measured'.
+ */
+type Grounding = 'measured' | 'capability-fact' | 'judgment';
+/**
+ * alpha.21 (s78 Entry 1): a single position in a fallback chain, carrying its
+ * provenance label and an optional human-readable reason. The shape replaces
+ * the old `string[]` representation everywhere chains are surfaced externally.
+ *
+ * `n` is REQUIRED when `grounding === 'measured'` — the runtime helper
+ * `makeMeasuredEntry()` enforces this. For 'capability-fact' and 'judgment'
+ * entries, `n` is undefined.
+ */
+interface ChainEntry {
+    /** Canonical model id (post-alias). */
+    id: string;
+    /** Why this entry sits in this position. */
+    grounding: Grounding;
+    /**
+     * Optional one-liner explaining the grounding decision. The inline comments
+     * that historically lived next to STARTER_CHAINS entries are now expressed
+     * here as machine-readable text.
+     */
+    reason?: string;
+    /**
+     * When `grounding === 'measured'`, the brain row count that backs this
+     * placement. Undefined for 'capability-fact' and 'judgment' entries.
+     */
+    n?: number;
+}
+/**
+ * alpha.21 introspection shape — a per-archetype chain with grounding on
+ * every position. Consumers reading this never see naked string ids;
+ * everything carries provenance.
+ */
+interface ChainWithGrounding {
+    archetype: IntentArchetypeName;
+    /** Ordered: position 0 = primary, rising index = fallback positions. */
+    entries: ChainEntry[];
 }
-export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, type Provider as f, type CallAttempt as g, CallError as h, type Constraints as i, type MutationApplied as j, type NormalizedTokens as k, type PromptSection as l, type ToolDefinition as m };
+export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type Grounding as G, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OutcomeResult as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type RecordOutcomeInput as e, type OracleScore as f, type CompileResult as g, type Provider as h, type ChainEntry as i, type CallAttempt as j, CallError as k, type ChainWithGrounding as l, type Constraints as m, type MutationApplied as n, type NormalizedTokens as o, type OutcomeKind as p, type PromptSection as q, type ToolDefinition as r };

package/dist/{ir-CFHU3BUT.d.ts → ir-Wr5lc8Mi.d.ts} RENAMED Viewed

@@ -90,6 +90,29 @@ interface Constraints {
     maxResponseWords?: number;
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
+    /**
+     * alpha.20: consumer-declared tool-orchestration shape for this call.
+     * - 'parallel': model may fire multiple tool calls per step (current
+     *   default behavior; the L-040 cliff applies — DeepSeek's
+     *   `tool_count >= 1` cliff trims tools because parallel-tool throughput
+     *   collapses to sequential semantics).
+     * - 'sequential': consumer commits to one tool call per step (the agentic
+     *   loop pattern). DeepSeek V4-Flash + V4-Pro can compete cleanly in
+     *   this mode — the L-040 cliff is silenced and the hunt chain shifts
+     *   to a DeepSeek-tier-1 ordering.
+     * - 'either': consumer doesn't care; library picks the parallel chain
+     *   (status-quo default) and may upgrade to brain-driven per-mode perf
+     *   selection in a future release.
+     *
+     * Affects:
+     *   - Chain composition for `archetype: 'hunt'` (see
+     *     `getDefaultFallbackChain` and `STARTER_CHAINS_BY_MODE`).
+     *   - L-040 cliff in `passApplyCliffs` (silent when 'sequential').
+     *
+     * Default (when undefined): equivalent to 'parallel' for back-compat
+     * with every pre-alpha.20 caller.
+     */
+    toolOrchestration?: 'parallel' | 'sequential' | 'either';
 }
 /**
  * Cache marker policy for the messages array (history + currentTurn).
@@ -308,6 +331,21 @@ interface BestPracticeAdvisory {
     suggestion?: string;
     /** Optional: link to docs anchor for more context. */
     docsUrl?: string;
+    /**
+     * alpha.20 — actionable category for routing/dashboard surfacing. When set,
+     * the brain persists this as `recommendation_type` on
+     * `compile_outcome_advisories` so consumers can filter "show me all
+     * client-side issues that are caching-fix recommendations." Optional;
+     * absent on legacy or uncategorized rules.
+     *
+     * - `'model-swap'`          — swap to a different model fixes this
+     * - `'prompt-fix'`          — restructure prompt (sections, tools, format)
+     * - `'caching-fix'`         — add cache markers (system or history)
+     * - `'no-ai-needed'`        — the call shouldn't be using an AI model
+     * - `'tier-down'`           — current model is overkill for this archetype
+     * - `'architecture-change'` — the issue isn't fixable at the kgauto layer
+     */
+    recommendationType?: 'model-swap' | 'prompt-fix' | 'caching-fix' | 'no-ai-needed' | 'tier-down' | 'architecture-change';
 }
 interface CompileResult {
     /** Unique handle for this call — pass to record() to correlate the outcome. */
@@ -359,6 +397,14 @@ interface CompileResult {
          * 0 when history is empty. alpha.7.
          */
         historyTokensTotal: number;
+        /**
+         * alpha.20 E3. Consumer-declared tool-orchestration mode for this call,
+         * mirrored from `ir.constraints.toolOrchestration` for downstream
+         * observability (Glass-Box panel, brain telemetry, advisor logs).
+         * Undefined when the consumer hadn't adopted the constraint yet —
+         * treat as 'parallel' equivalent for back-compat.
+         */
+        toolOrchestration?: 'parallel' | 'sequential' | 'either';
     };
 }
 /**
@@ -634,6 +680,121 @@ interface RecordInput {
      * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
      */
     ttftMs?: number;
+    /**
+     * alpha.20 — advisories fired at compile() time. Persisted to the brain's
+     * `compile_outcome_advisories` sibling table via a second POST that fires
+     * AFTER the primary outcome insert succeeds. Best-effort: a failed
+     * advisory POST is logged via onError but does NOT throw or roll back the
+     * primary outcome row.
+     *
+     * Pass `result.advisories` from the CompileResult directly. The brain
+     * uses these to compute the `empty_rate_clean` comparator (rows with
+     * zero advisories fired) so consumers can distinguish "model is bad"
+     * from "client sent a bloated/uncached/malformed request."
+     *
+     * Empty array / undefined → no second POST fires.
+     */
+    advisories?: BestPracticeAdvisory[];
+}
+/**
+ * alpha.20 Entry 4: kinds of consumer-declared outcomes feeding the quality
+ * loop. Surfaces in `recordOutcome()` as the verdict the consumer's UX is
+ * forwarding to the brain.
+ *
+ *   - `approved`  user explicitly approved (thumbs up, "looks good", accepted)
+ *   - `rejected`  user explicitly rejected (thumbs down, "redo", discarded)
+ *   - `partial`   accepted with edits or partial use (mixed signal)
+ *   - `engaged`   user engaged with the output (copy/scroll/dwell)
+ *   - `abandoned` user abandoned the response (closed, navigated away)
+ *   - `unknown`   verdict could not be inferred — recorded for completeness
+ */
+type OutcomeKind = 'approved' | 'rejected' | 'partial' | 'engaged' | 'abandoned' | 'unknown';
+/**
+ * Input to `recordOutcome()` — consumer's verdict on a previously-compiled
+ * call. Joins to the original `compile_outcomes` row via outcomeId,
+ * enabling per-(model, archetype) approve-rate measurement once N ≥ 10
+ * outcomes accumulate.
+ */
+interface RecordOutcomeInput {
+    /** Joins to compile_outcomes.id. Returned by compile() via CompileResult.outcomeId. */
+    outcomeId: number | string;
+    /** What did the user / system do with this output? */
+    outcome: OutcomeKind;
+    /** Optional 1-5 user rating (e.g., thumbs up/down with intensity, NPS-style). */
+    rating?: 1 | 2 | 3 | 4 | 5;
+    /** Optional free-text reason (e.g., user-typed feedback, system-inferred cause). */
+    reason?: string;
+    /**
+     * Optional model-reported confidence at compile time (0..1). Used for
+     * Brier-score calibration in later phases (alpha.21+) — pair this with
+     * the actual `outcome` to compute calibration error.
+     */
+    observedConfidence?: number;
+}
+/**
+ * Return shape of `recordOutcome()`. Never throws — persistence failures
+ * surface as `ok: false` with a stable `reason` string.
+ */
+interface OutcomeResult {
+    /** True when the POST landed (2xx). False when brain not configured or POST failed. */
+    ok: boolean;
+    /** Stable reason code when ok=false. One of: 'brain_not_configured' | 'persistence_failed'. */
+    reason?: string;
+}
+/**
+ * alpha.21 (s78 Entry 1): provenance label on a chain entry. Surfaces WHY
+ * an entry sits where it sits so consumers can distinguish:
+ *
+ *   - 'measured'         brain has N>=10 rows with a measurable quality
+ *                        outcome backing this placement. The number lives on
+ *                        `ChainEntry.n`.
+ *   - 'capability-fact'  inclusion or exclusion driven by a published or
+ *                        measured CAPABILITY (L-040 cliff, ctx window cap,
+ *                        structured-output support). Not an opinion — a
+ *                        fact about what the model can/can't do.
+ *   - 'judgment'         engineer's pick, no measured backing yet. Cold-start
+ *                        prior; entirely valid until evidence accumulates.
+ *
+ * "Judgment" is HONEST, not a downgrade. Most of `STARTER_CHAINS` lands here
+ * in alpha.21 — that's the point: consumers can SEE the grounding gap and
+ * prioritize the measurement work that would graduate them to 'measured'.
+ */
+type Grounding = 'measured' | 'capability-fact' | 'judgment';
+/**
+ * alpha.21 (s78 Entry 1): a single position in a fallback chain, carrying its
+ * provenance label and an optional human-readable reason. The shape replaces
+ * the old `string[]` representation everywhere chains are surfaced externally.
+ *
+ * `n` is REQUIRED when `grounding === 'measured'` — the runtime helper
+ * `makeMeasuredEntry()` enforces this. For 'capability-fact' and 'judgment'
+ * entries, `n` is undefined.
+ */
+interface ChainEntry {
+    /** Canonical model id (post-alias). */
+    id: string;
+    /** Why this entry sits in this position. */
+    grounding: Grounding;
+    /**
+     * Optional one-liner explaining the grounding decision. The inline comments
+     * that historically lived next to STARTER_CHAINS entries are now expressed
+     * here as machine-readable text.
+     */
+    reason?: string;
+    /**
+     * When `grounding === 'measured'`, the brain row count that backs this
+     * placement. Undefined for 'capability-fact' and 'judgment' entries.
+     */
+    n?: number;
+}
+/**
+ * alpha.21 introspection shape — a per-archetype chain with grounding on
+ * every position. Consumers reading this never see naked string ids;
+ * everything carries provenance.
+ */
+interface ChainWithGrounding {
+    archetype: IntentArchetypeName;
+    /** Ordered: position 0 = primary, rising index = fallback positions. */
+    entries: ChainEntry[];
 }
-export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, type Provider as f, type CallAttempt as g, CallError as h, type Constraints as i, type MutationApplied as j, type NormalizedTokens as k, type PromptSection as l, type ToolDefinition as m };
+export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type Grounding as G, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OutcomeResult as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type RecordOutcomeInput as e, type OracleScore as f, type CompileResult as g, type Provider as h, type ChainEntry as i, type CallAttempt as j, CallError as k, type ChainWithGrounding as l, type Constraints as m, type MutationApplied as n, type NormalizedTokens as o, type OutcomeKind as p, type PromptSection as q, type ToolDefinition as r };

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { f as Provider } from './ir-C3P4gDt0.mjs';
+import { h as Provider } from './ir-CruZBtpK.mjs';
 import { IntentArchetypeName } from './dialect.mjs';
 /**

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { f as Provider } from './ir-CFHU3BUT.js';
+import { h as Provider } from './ir-Wr5lc8Mi.js';
 import { IntentArchetypeName } from './dialect.js';
 /**

package/dist/{types-xeklorHU.d.ts → types-BiZKJU41.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { j as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, g as CallAttempt } from './ir-CFHU3BUT.js';
+import { n as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, j as CallAttempt } from './ir-Wr5lc8Mi.js';
 /**
  * Glass-Box observability types (alpha.17).

package/dist/{types-DWF6mPGg.d.mts → types-zk238uNL.d.mts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { j as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, g as CallAttempt } from './ir-C3P4gDt0.mjs';
+import { n as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, j as CallAttempt } from './ir-CruZBtpK.mjs';
 /**
  * Glass-Box observability types (alpha.17).

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.19",
+  "version": "2.0.0-alpha.21",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",