@warmdrift/kgauto-compiler 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/profiles.js CHANGED
@@ -644,6 +644,211 @@ var PROFILES_RAW = [
644
644
  hunt: 4
645
645
  // sequential tools — same as V4-Flash
646
646
  }
647
+ },
648
+ // ── Auto-onboarded (UNVERIFIED) ──
649
+ // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
650
+ // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
651
+ // provider docs. Verify before promoting status to 'current' (L-049/L-081).
652
+ {
653
+ id: "gemini-3-flash-preview",
654
+ verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
655
+ provider: "google",
656
+ status: "preview",
657
+ maxContextTokens: 1048576,
658
+ maxOutputTokens: 65535,
659
+ maxTools: 128,
660
+ parallelToolCalls: true,
661
+ structuredOutput: "native",
662
+ systemPromptMode: "separate",
663
+ streaming: true,
664
+ cliffs: [
665
+ {
666
+ metric: "input_tokens",
667
+ threshold: 8e3,
668
+ action: "downgrade_quality_warning",
669
+ reason: "Quality degrades significantly above ~8K context tokens"
670
+ },
671
+ {
672
+ metric: "tool_count",
673
+ threshold: 20,
674
+ action: "drop_to_top_relevant",
675
+ reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
676
+ },
677
+ {
678
+ metric: "thinking_with_short_output",
679
+ threshold: 1,
680
+ action: "force_thinking_budget_zero",
681
+ reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
682
+ },
683
+ {
684
+ // s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
685
+ // tt-intelligence/summarize/gemini-2.5-flash with tools offered.
686
+ // v1's disable_thinking_for_short_output already fired and didn't
687
+ // help — disabling thinking is necessary but not sufficient. Tools
688
+ // present + summarize intent confuses Flash into a no-output state
689
+ // (likely tool-decision purgatory). Strip tools entirely for this
690
+ // archetype on this model.
691
+ metric: "tool_count",
692
+ threshold: 1,
693
+ whenIntent: "summarize",
694
+ action: "strip_tools",
695
+ reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
696
+ }
697
+ ],
698
+ costInputPer1m: 0.3,
699
+ costOutputPer1m: 2.5,
700
+ lowering: {
701
+ ...GOOGLE_LOWERING_BASE,
702
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
703
+ },
704
+ recovery: [
705
+ {
706
+ signal: "empty_response_after_tool",
707
+ action: "retry_with_params",
708
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
709
+ maxRetries: 1,
710
+ reason: "Known: empty after tool result \u2014 retry with thinking off"
711
+ },
712
+ {
713
+ signal: "empty_response",
714
+ action: "retry_with_params",
715
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
716
+ maxRetries: 1,
717
+ reason: "Empty response \u2014 try with thinking off"
718
+ },
719
+ {
720
+ signal: "malformed_function_call",
721
+ action: "escalate",
722
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
723
+ }
724
+ ],
725
+ strengths: ["speed", "volume", "classification", "1m_context", "cost"],
726
+ weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
727
+ notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
728
+ // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
729
+ // 15-75 calls/step beats Sonnet — L-040), summarize, classify.
730
+ archetypePerf: {
731
+ hunt: 9,
732
+ // L-040: parallel tool throughput 15-75/step
733
+ classify: 7,
734
+ // brain-validated, 218 rows
735
+ summarize: 7,
736
+ // brain-validated; cliff strips tools when present
737
+ transform: 7,
738
+ ask: 7,
739
+ generate: 6,
740
+ plan: 5,
741
+ extract: 6,
742
+ // alpha.8 MAX_TOKENS history on structured output
743
+ critique: 4
744
+ // reasoning shallower than Sonnet/Opus
745
+ }
746
+ },
747
+ {
748
+ // ── Gemini 2.5 Flash-Lite ──
749
+ // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
750
+ // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
751
+ // stable. Positioned BELOW Flash on the cost/perf frontier:
752
+ // input $0.10/M (Flash $0.30/M) — 3× cheaper
753
+ // output $0.40/M (Flash $2.50/M) — 6× cheaper
754
+ // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
755
+ // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
756
+ // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
757
+ // thresholds. The brain will validate/relax these as evidence accumulates
758
+ // per (archetype, model) tuple. Currently ZERO brain rows for this model.
759
+ id: "gemini-3.1-flash-lite",
760
+ verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
761
+ provider: "google",
762
+ status: "preview",
763
+ maxContextTokens: 1048576,
764
+ maxOutputTokens: 65536,
765
+ maxTools: 128,
766
+ parallelToolCalls: true,
767
+ structuredOutput: "native",
768
+ systemPromptMode: "separate",
769
+ streaming: true,
770
+ cliffs: [
771
+ {
772
+ metric: "input_tokens",
773
+ threshold: 8e3,
774
+ action: "downgrade_quality_warning",
775
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
776
+ },
777
+ {
778
+ metric: "tool_count",
779
+ threshold: 10,
780
+ action: "drop_to_top_relevant",
781
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
782
+ },
783
+ {
784
+ metric: "thinking_with_short_output",
785
+ threshold: 1,
786
+ action: "force_thinking_budget_zero",
787
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
788
+ },
789
+ {
790
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
791
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
792
+ // same architectural family — almost certainly inherits this cliff.
793
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
794
+ metric: "tool_count",
795
+ threshold: 1,
796
+ whenIntent: "summarize",
797
+ action: "strip_tools",
798
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
799
+ }
800
+ ],
801
+ costInputPer1m: 0.1,
802
+ costOutputPer1m: 0.4,
803
+ lowering: {
804
+ ...GOOGLE_LOWERING_BASE,
805
+ // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
806
+ // $0.10/M input. Material for repeat-prompt workloads (classify shape).
807
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
808
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
809
+ },
810
+ recovery: [
811
+ {
812
+ signal: "empty_response_after_tool",
813
+ action: "retry_with_params",
814
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
815
+ maxRetries: 1,
816
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
817
+ },
818
+ {
819
+ signal: "empty_response",
820
+ action: "retry_with_params",
821
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
822
+ maxRetries: 1,
823
+ reason: "Empty response \u2014 try with thinking off."
824
+ },
825
+ {
826
+ signal: "malformed_function_call",
827
+ action: "escalate",
828
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
829
+ }
830
+ ],
831
+ strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
832
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
833
+ notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash-lite`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
834
+ // Tier 3 emergency floor for summarize/classify chains. ZERO brain
835
+ // rows — all values are starter hypotheses anchored to "smaller
836
+ // sibling of Flash, at-or-below Flash perf on every archetype." The
837
+ // first 50 brain rows per archetype will validate or relax these.
838
+ archetypePerf: {
839
+ classify: 6,
840
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
841
+ summarize: 6,
842
+ // starter hypothesis — verify; cliff strips tools
843
+ transform: 6,
844
+ // starter hypothesis — verify
845
+ ask: 5,
846
+ hunt: 5,
847
+ generate: 4,
848
+ extract: 4,
849
+ plan: 3,
850
+ critique: 3
851
+ }
647
852
  }
648
853
  ];
649
854
  var ALIASES = {
package/dist/profiles.mjs CHANGED
@@ -6,7 +6,7 @@ import {
6
6
  getProfile,
7
7
  profilesByProvider,
8
8
  tryGetProfile
9
- } from "./chunk-DICCTQLG.mjs";
9
+ } from "./chunk-SFF5EVTL.mjs";
10
10
  export {
11
11
  ALIASES,
12
12
  _setProfileBrainHook,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.13",
3
+ "version": "2.0.0-alpha.15",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",