@warmdrift/kgauto-compiler 2.0.0-alpha.19 → 2.0.0-alpha.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,6 +90,29 @@ interface Constraints {
90
90
  maxResponseWords?: number;
91
91
  /** Override target model selection — if set, compiler uses this instead of routing. */
92
92
  forceModel?: string;
93
+ /**
94
+ * alpha.20: consumer-declared tool-orchestration shape for this call.
95
+ * - 'parallel': model may fire multiple tool calls per step (current
96
+ * default behavior; the L-040 cliff applies — DeepSeek's
97
+ * `tool_count >= 1` cliff trims tools because parallel-tool throughput
98
+ * collapses to sequential semantics).
99
+ * - 'sequential': consumer commits to one tool call per step (the agentic
100
+ * loop pattern). DeepSeek V4-Flash + V4-Pro can compete cleanly in
101
+ * this mode — the L-040 cliff is silenced and the hunt chain shifts
102
+ * to a DeepSeek-tier-1 ordering.
103
+ * - 'either': consumer doesn't care; library picks the parallel chain
104
+ * (status-quo default) and may upgrade to brain-driven per-mode perf
105
+ * selection in a future release.
106
+ *
107
+ * Affects:
108
+ * - Chain composition for `archetype: 'hunt'` (see
109
+ * `getDefaultFallbackChain` and `STARTER_CHAINS_BY_MODE`).
110
+ * - L-040 cliff in `passApplyCliffs` (silent when 'sequential').
111
+ *
112
+ * Default (when undefined): equivalent to 'parallel' for back-compat
113
+ * with every pre-alpha.20 caller.
114
+ */
115
+ toolOrchestration?: 'parallel' | 'sequential' | 'either';
93
116
  }
94
117
  /**
95
118
  * Cache marker policy for the messages array (history + currentTurn).
@@ -308,6 +331,21 @@ interface BestPracticeAdvisory {
308
331
  suggestion?: string;
309
332
  /** Optional: link to docs anchor for more context. */
310
333
  docsUrl?: string;
334
+ /**
335
+ * alpha.20 — actionable category for routing/dashboard surfacing. When set,
336
+ * the brain persists this as `recommendation_type` on
337
+ * `compile_outcome_advisories` so consumers can filter "show me all
338
+ * client-side issues that are caching-fix recommendations." Optional;
339
+ * absent on legacy or uncategorized rules.
340
+ *
341
+ * - `'model-swap'` — swap to a different model fixes this
342
+ * - `'prompt-fix'` — restructure prompt (sections, tools, format)
343
+ * - `'caching-fix'` — add cache markers (system or history)
344
+ * - `'no-ai-needed'` — the call shouldn't be using an AI model
345
+ * - `'tier-down'` — current model is overkill for this archetype
346
+ * - `'architecture-change'` — the issue isn't fixable at the kgauto layer
347
+ */
348
+ recommendationType?: 'model-swap' | 'prompt-fix' | 'caching-fix' | 'no-ai-needed' | 'tier-down' | 'architecture-change';
311
349
  }
312
350
  interface CompileResult {
313
351
  /** Unique handle for this call — pass to record() to correlate the outcome. */
@@ -359,6 +397,14 @@ interface CompileResult {
359
397
  * 0 when history is empty. alpha.7.
360
398
  */
361
399
  historyTokensTotal: number;
400
+ /**
401
+ * alpha.20 E3. Consumer-declared tool-orchestration mode for this call,
402
+ * mirrored from `ir.constraints.toolOrchestration` for downstream
403
+ * observability (Glass-Box panel, brain telemetry, advisor logs).
404
+ * Undefined when the consumer hadn't adopted the constraint yet —
405
+ * treat as 'parallel' equivalent for back-compat.
406
+ */
407
+ toolOrchestration?: 'parallel' | 'sequential' | 'either';
362
408
  };
363
409
  }
364
410
  /**
@@ -634,6 +680,121 @@ interface RecordInput {
634
680
  * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
635
681
  */
636
682
  ttftMs?: number;
683
+ /**
684
+ * alpha.20 — advisories fired at compile() time. Persisted to the brain's
685
+ * `compile_outcome_advisories` sibling table via a second POST that fires
686
+ * AFTER the primary outcome insert succeeds. Best-effort: a failed
687
+ * advisory POST is logged via onError but does NOT throw or roll back the
688
+ * primary outcome row.
689
+ *
690
+ * Pass `result.advisories` from the CompileResult directly. The brain
691
+ * uses these to compute the `empty_rate_clean` comparator (rows with
692
+ * zero advisories fired) so consumers can distinguish "model is bad"
693
+ * from "client sent a bloated/uncached/malformed request."
694
+ *
695
+ * Empty array / undefined → no second POST fires.
696
+ */
697
+ advisories?: BestPracticeAdvisory[];
698
+ }
699
+ /**
700
+ * alpha.20 Entry 4: kinds of consumer-declared outcomes feeding the quality
701
+ * loop. Surfaces in `recordOutcome()` as the verdict the consumer's UX is
702
+ * forwarding to the brain.
703
+ *
704
+ * - `approved` user explicitly approved (thumbs up, "looks good", accepted)
705
+ * - `rejected` user explicitly rejected (thumbs down, "redo", discarded)
706
+ * - `partial` accepted with edits or partial use (mixed signal)
707
+ * - `engaged` user engaged with the output (copy/scroll/dwell)
708
+ * - `abandoned` user abandoned the response (closed, navigated away)
709
+ * - `unknown` verdict could not be inferred — recorded for completeness
710
+ */
711
+ type OutcomeKind = 'approved' | 'rejected' | 'partial' | 'engaged' | 'abandoned' | 'unknown';
712
+ /**
713
+ * Input to `recordOutcome()` — consumer's verdict on a previously-compiled
714
+ * call. Joins to the original `compile_outcomes` row via outcomeId,
715
+ * enabling per-(model, archetype) approve-rate measurement once N ≥ 10
716
+ * outcomes accumulate.
717
+ */
718
+ interface RecordOutcomeInput {
719
+ /** Joins to compile_outcomes.id. Returned by compile() via CompileResult.outcomeId. */
720
+ outcomeId: number | string;
721
+ /** What did the user / system do with this output? */
722
+ outcome: OutcomeKind;
723
+ /** Optional 1-5 user rating (e.g., thumbs up/down with intensity, NPS-style). */
724
+ rating?: 1 | 2 | 3 | 4 | 5;
725
+ /** Optional free-text reason (e.g., user-typed feedback, system-inferred cause). */
726
+ reason?: string;
727
+ /**
728
+ * Optional model-reported confidence at compile time (0..1). Used for
729
+ * Brier-score calibration in later phases (alpha.21+) — pair this with
730
+ * the actual `outcome` to compute calibration error.
731
+ */
732
+ observedConfidence?: number;
733
+ }
734
+ /**
735
+ * Return shape of `recordOutcome()`. Never throws — persistence failures
736
+ * surface as `ok: false` with a stable `reason` string.
737
+ */
738
+ interface OutcomeResult {
739
+ /** True when the POST landed (2xx). False when brain not configured or POST failed. */
740
+ ok: boolean;
741
+ /** Stable reason code when ok=false. One of: 'brain_not_configured' | 'persistence_failed'. */
742
+ reason?: string;
743
+ }
744
+ /**
745
+ * alpha.21 (s78 Entry 1): provenance label on a chain entry. Surfaces WHY
746
+ * an entry sits where it sits so consumers can distinguish:
747
+ *
748
+ * - 'measured' brain has N>=10 rows with a measurable quality
749
+ * outcome backing this placement. The number lives on
750
+ * `ChainEntry.n`.
751
+ * - 'capability-fact' inclusion or exclusion driven by a published or
752
+ * measured CAPABILITY (L-040 cliff, ctx window cap,
753
+ * structured-output support). Not an opinion — a
754
+ * fact about what the model can/can't do.
755
+ * - 'judgment' engineer's pick, no measured backing yet. Cold-start
756
+ * prior; entirely valid until evidence accumulates.
757
+ *
758
+ * "Judgment" is HONEST, not a downgrade. Most of `STARTER_CHAINS` lands here
759
+ * in alpha.21 — that's the point: consumers can SEE the grounding gap and
760
+ * prioritize the measurement work that would graduate them to 'measured'.
761
+ */
762
+ type Grounding = 'measured' | 'capability-fact' | 'judgment';
763
+ /**
764
+ * alpha.21 (s78 Entry 1): a single position in a fallback chain, carrying its
765
+ * provenance label and an optional human-readable reason. The shape replaces
766
+ * the old `string[]` representation everywhere chains are surfaced externally.
767
+ *
768
+ * `n` is REQUIRED when `grounding === 'measured'` — the runtime helper
769
+ * `makeMeasuredEntry()` enforces this. For 'capability-fact' and 'judgment'
770
+ * entries, `n` is undefined.
771
+ */
772
+ interface ChainEntry {
773
+ /** Canonical model id (post-alias). */
774
+ id: string;
775
+ /** Why this entry sits in this position. */
776
+ grounding: Grounding;
777
+ /**
778
+ * Optional one-liner explaining the grounding decision. The inline comments
779
+ * that historically lived next to STARTER_CHAINS entries are now expressed
780
+ * here as machine-readable text.
781
+ */
782
+ reason?: string;
783
+ /**
784
+ * When `grounding === 'measured'`, the brain row count that backs this
785
+ * placement. Undefined for 'capability-fact' and 'judgment' entries.
786
+ */
787
+ n?: number;
788
+ }
789
+ /**
790
+ * alpha.21 introspection shape — a per-archetype chain with grounding on
791
+ * every position. Consumers reading this never see naked string ids;
792
+ * everything carries provenance.
793
+ */
794
+ interface ChainWithGrounding {
795
+ archetype: IntentArchetypeName;
796
+ /** Ordered: position 0 = primary, rising index = fallback positions. */
797
+ entries: ChainEntry[];
637
798
  }
638
799
 
639
- export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, type Provider as f, type CallAttempt as g, CallError as h, type Constraints as i, type MutationApplied as j, type NormalizedTokens as k, type PromptSection as l, type ToolDefinition as m };
800
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type Grounding as G, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OutcomeResult as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type RecordOutcomeInput as e, type OracleScore as f, type CompileResult as g, type Provider as h, type ChainEntry as i, type CallAttempt as j, CallError as k, type ChainWithGrounding as l, type Constraints as m, type MutationApplied as n, type NormalizedTokens as o, type OutcomeKind as p, type PromptSection as q, type ToolDefinition as r };
@@ -90,6 +90,29 @@ interface Constraints {
90
90
  maxResponseWords?: number;
91
91
  /** Override target model selection — if set, compiler uses this instead of routing. */
92
92
  forceModel?: string;
93
+ /**
94
+ * alpha.20: consumer-declared tool-orchestration shape for this call.
95
+ * - 'parallel': model may fire multiple tool calls per step (current
96
+ * default behavior; the L-040 cliff applies — DeepSeek's
97
+ * `tool_count >= 1` cliff trims tools because parallel-tool throughput
98
+ * collapses to sequential semantics).
99
+ * - 'sequential': consumer commits to one tool call per step (the agentic
100
+ * loop pattern). DeepSeek V4-Flash + V4-Pro can compete cleanly in
101
+ * this mode — the L-040 cliff is silenced and the hunt chain shifts
102
+ * to a DeepSeek-tier-1 ordering.
103
+ * - 'either': consumer doesn't care; library picks the parallel chain
104
+ * (status-quo default) and may upgrade to brain-driven per-mode perf
105
+ * selection in a future release.
106
+ *
107
+ * Affects:
108
+ * - Chain composition for `archetype: 'hunt'` (see
109
+ * `getDefaultFallbackChain` and `STARTER_CHAINS_BY_MODE`).
110
+ * - L-040 cliff in `passApplyCliffs` (silent when 'sequential').
111
+ *
112
+ * Default (when undefined): equivalent to 'parallel' for back-compat
113
+ * with every pre-alpha.20 caller.
114
+ */
115
+ toolOrchestration?: 'parallel' | 'sequential' | 'either';
93
116
  }
94
117
  /**
95
118
  * Cache marker policy for the messages array (history + currentTurn).
@@ -308,6 +331,21 @@ interface BestPracticeAdvisory {
308
331
  suggestion?: string;
309
332
  /** Optional: link to docs anchor for more context. */
310
333
  docsUrl?: string;
334
+ /**
335
+ * alpha.20 — actionable category for routing/dashboard surfacing. When set,
336
+ * the brain persists this as `recommendation_type` on
337
+ * `compile_outcome_advisories` so consumers can filter "show me all
338
+ * client-side issues that are caching-fix recommendations." Optional;
339
+ * absent on legacy or uncategorized rules.
340
+ *
341
+ * - `'model-swap'` — swap to a different model fixes this
342
+ * - `'prompt-fix'` — restructure prompt (sections, tools, format)
343
+ * - `'caching-fix'` — add cache markers (system or history)
344
+ * - `'no-ai-needed'` — the call shouldn't be using an AI model
345
+ * - `'tier-down'` — current model is overkill for this archetype
346
+ * - `'architecture-change'` — the issue isn't fixable at the kgauto layer
347
+ */
348
+ recommendationType?: 'model-swap' | 'prompt-fix' | 'caching-fix' | 'no-ai-needed' | 'tier-down' | 'architecture-change';
311
349
  }
312
350
  interface CompileResult {
313
351
  /** Unique handle for this call — pass to record() to correlate the outcome. */
@@ -359,6 +397,14 @@ interface CompileResult {
359
397
  * 0 when history is empty. alpha.7.
360
398
  */
361
399
  historyTokensTotal: number;
400
+ /**
401
+ * alpha.20 E3. Consumer-declared tool-orchestration mode for this call,
402
+ * mirrored from `ir.constraints.toolOrchestration` for downstream
403
+ * observability (Glass-Box panel, brain telemetry, advisor logs).
404
+ * Undefined when the consumer hadn't adopted the constraint yet —
405
+ * treat as 'parallel' equivalent for back-compat.
406
+ */
407
+ toolOrchestration?: 'parallel' | 'sequential' | 'either';
362
408
  };
363
409
  }
364
410
  /**
@@ -634,6 +680,121 @@ interface RecordInput {
634
680
  * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
635
681
  */
636
682
  ttftMs?: number;
683
+ /**
684
+ * alpha.20 — advisories fired at compile() time. Persisted to the brain's
685
+ * `compile_outcome_advisories` sibling table via a second POST that fires
686
+ * AFTER the primary outcome insert succeeds. Best-effort: a failed
687
+ * advisory POST is logged via onError but does NOT throw or roll back the
688
+ * primary outcome row.
689
+ *
690
+ * Pass `result.advisories` from the CompileResult directly. The brain
691
+ * uses these to compute the `empty_rate_clean` comparator (rows with
692
+ * zero advisories fired) so consumers can distinguish "model is bad"
693
+ * from "client sent a bloated/uncached/malformed request."
694
+ *
695
+ * Empty array / undefined → no second POST fires.
696
+ */
697
+ advisories?: BestPracticeAdvisory[];
698
+ }
699
+ /**
700
+ * alpha.20 Entry 4: kinds of consumer-declared outcomes feeding the quality
701
+ * loop. Surfaces in `recordOutcome()` as the verdict the consumer's UX is
702
+ * forwarding to the brain.
703
+ *
704
+ * - `approved` user explicitly approved (thumbs up, "looks good", accepted)
705
+ * - `rejected` user explicitly rejected (thumbs down, "redo", discarded)
706
+ * - `partial` accepted with edits or partial use (mixed signal)
707
+ * - `engaged` user engaged with the output (copy/scroll/dwell)
708
+ * - `abandoned` user abandoned the response (closed, navigated away)
709
+ * - `unknown` verdict could not be inferred — recorded for completeness
710
+ */
711
+ type OutcomeKind = 'approved' | 'rejected' | 'partial' | 'engaged' | 'abandoned' | 'unknown';
712
+ /**
713
+ * Input to `recordOutcome()` — consumer's verdict on a previously-compiled
714
+ * call. Joins to the original `compile_outcomes` row via outcomeId,
715
+ * enabling per-(model, archetype) approve-rate measurement once N ≥ 10
716
+ * outcomes accumulate.
717
+ */
718
+ interface RecordOutcomeInput {
719
+ /** Joins to compile_outcomes.id. Returned by compile() via CompileResult.outcomeId. */
720
+ outcomeId: number | string;
721
+ /** What did the user / system do with this output? */
722
+ outcome: OutcomeKind;
723
+ /** Optional 1-5 user rating (e.g., thumbs up/down with intensity, NPS-style). */
724
+ rating?: 1 | 2 | 3 | 4 | 5;
725
+ /** Optional free-text reason (e.g., user-typed feedback, system-inferred cause). */
726
+ reason?: string;
727
+ /**
728
+ * Optional model-reported confidence at compile time (0..1). Used for
729
+ * Brier-score calibration in later phases (alpha.21+) — pair this with
730
+ * the actual `outcome` to compute calibration error.
731
+ */
732
+ observedConfidence?: number;
733
+ }
734
+ /**
735
+ * Return shape of `recordOutcome()`. Never throws — persistence failures
736
+ * surface as `ok: false` with a stable `reason` string.
737
+ */
738
+ interface OutcomeResult {
739
+ /** True when the POST landed (2xx). False when brain not configured or POST failed. */
740
+ ok: boolean;
741
+ /** Stable reason code when ok=false. One of: 'brain_not_configured' | 'persistence_failed'. */
742
+ reason?: string;
743
+ }
744
+ /**
745
+ * alpha.21 (s78 Entry 1): provenance label on a chain entry. Surfaces WHY
746
+ * an entry sits where it sits so consumers can distinguish:
747
+ *
748
+ * - 'measured' brain has N>=10 rows with a measurable quality
749
+ * outcome backing this placement. The number lives on
750
+ * `ChainEntry.n`.
751
+ * - 'capability-fact' inclusion or exclusion driven by a published or
752
+ * measured CAPABILITY (L-040 cliff, ctx window cap,
753
+ * structured-output support). Not an opinion — a
754
+ * fact about what the model can/can't do.
755
+ * - 'judgment' engineer's pick, no measured backing yet. Cold-start
756
+ * prior; entirely valid until evidence accumulates.
757
+ *
758
+ * "Judgment" is HONEST, not a downgrade. Most of `STARTER_CHAINS` lands here
759
+ * in alpha.21 — that's the point: consumers can SEE the grounding gap and
760
+ * prioritize the measurement work that would graduate them to 'measured'.
761
+ */
762
+ type Grounding = 'measured' | 'capability-fact' | 'judgment';
763
+ /**
764
+ * alpha.21 (s78 Entry 1): a single position in a fallback chain, carrying its
765
+ * provenance label and an optional human-readable reason. The shape replaces
766
+ * the old `string[]` representation everywhere chains are surfaced externally.
767
+ *
768
+ * `n` is REQUIRED when `grounding === 'measured'` — the runtime helper
769
+ * `makeMeasuredEntry()` enforces this. For 'capability-fact' and 'judgment'
770
+ * entries, `n` is undefined.
771
+ */
772
+ interface ChainEntry {
773
+ /** Canonical model id (post-alias). */
774
+ id: string;
775
+ /** Why this entry sits in this position. */
776
+ grounding: Grounding;
777
+ /**
778
+ * Optional one-liner explaining the grounding decision. The inline comments
779
+ * that historically lived next to STARTER_CHAINS entries are now expressed
780
+ * here as machine-readable text.
781
+ */
782
+ reason?: string;
783
+ /**
784
+ * When `grounding === 'measured'`, the brain row count that backs this
785
+ * placement. Undefined for 'capability-fact' and 'judgment' entries.
786
+ */
787
+ n?: number;
788
+ }
789
+ /**
790
+ * alpha.21 introspection shape — a per-archetype chain with grounding on
791
+ * every position. Consumers reading this never see naked string ids;
792
+ * everything carries provenance.
793
+ */
794
+ interface ChainWithGrounding {
795
+ archetype: IntentArchetypeName;
796
+ /** Ordered: position 0 = primary, rising index = fallback positions. */
797
+ entries: ChainEntry[];
637
798
  }
638
799
 
639
- export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, type Provider as f, type CallAttempt as g, CallError as h, type Constraints as i, type MutationApplied as j, type NormalizedTokens as k, type PromptSection as l, type ToolDefinition as m };
800
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type Grounding as G, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OutcomeResult as O, type ProviderOverrides as P, type RecordInput as R, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type RecordOutcomeInput as e, type OracleScore as f, type CompileResult as g, type Provider as h, type ChainEntry as i, type CallAttempt as j, CallError as k, type ChainWithGrounding as l, type Constraints as m, type MutationApplied as n, type NormalizedTokens as o, type OutcomeKind as p, type PromptSection as q, type ToolDefinition as r };
@@ -1,4 +1,4 @@
1
- import { f as Provider } from './ir-C3P4gDt0.mjs';
1
+ import { h as Provider } from './ir-CruZBtpK.mjs';
2
2
  import { IntentArchetypeName } from './dialect.mjs';
3
3
 
4
4
  /**
@@ -1,4 +1,4 @@
1
- import { f as Provider } from './ir-CFHU3BUT.js';
1
+ import { h as Provider } from './ir-Wr5lc8Mi.js';
2
2
  import { IntentArchetypeName } from './dialect.js';
3
3
 
4
4
  /**
@@ -1,4 +1,4 @@
1
- import { j as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, g as CallAttempt } from './ir-CFHU3BUT.js';
1
+ import { n as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, j as CallAttempt } from './ir-Wr5lc8Mi.js';
2
2
 
3
3
  /**
4
4
  * Glass-Box observability types (alpha.17).
@@ -1,4 +1,4 @@
1
- import { j as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, g as CallAttempt } from './ir-C3P4gDt0.mjs';
1
+ import { n as MutationApplied, B as BestPracticeAdvisory, F as FallbackReason, j as CallAttempt } from './ir-CruZBtpK.mjs';
2
2
 
3
3
  /**
4
4
  * Glass-Box observability types (alpha.17).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.19",
3
+ "version": "2.0.0-alpha.21",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",