@warmdrift/kgauto-compiler 2.0.0-alpha.15 → 2.0.0-alpha.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -615,6 +615,215 @@ var PROFILES_RAW = [
615
615
  // sequential tools — same as V4-Flash
616
616
  }
617
617
  },
618
+ // ── OpenAI ──
619
+ // alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
620
+ // already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
621
+ // + lowerOpenAI all existed; profile entries were missing, so the
622
+ // alpha.10 auto-filter would mark openai-keyed models reachable but
623
+ // there were no profiles to filter IN. Half-supported is now fully
624
+ // supported. PB request `openai-provider-profiles` (2026-05-16).
625
+ //
626
+ // Profile data verified against developers.openai.com/api/docs/pricing
627
+ // + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
628
+ // numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
629
+ // current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
630
+ // are the workhorse family. gpt-4.1 + gpt-4o are legacy.
631
+ //
632
+ // Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
633
+ // 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
634
+ // cliff because it ranks the model down at large-context shapes — the
635
+ // semantics of "this model is now 2x more expensive" map onto the
636
+ // existing penalty mechanism. Cost-watcher will catch high-context
637
+ // spikes empirically; the cliff prevents naive routing into the doubled
638
+ // pricing zone.
639
+ {
640
+ id: "gpt-5.5",
641
+ verifiedAgainstDocs: "2026-05-17",
642
+ provider: "openai",
643
+ status: "current",
644
+ maxContextTokens: 105e4,
645
+ maxOutputTokens: 128e3,
646
+ maxTools: 64,
647
+ parallelToolCalls: true,
648
+ structuredOutput: "native",
649
+ systemPromptMode: "inline",
650
+ streaming: true,
651
+ cliffs: [
652
+ {
653
+ metric: "input_tokens",
654
+ threshold: 272e3,
655
+ action: "downgrade_quality_warning",
656
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
657
+ }
658
+ ],
659
+ costInputPer1m: 5,
660
+ costOutputPer1m: 30,
661
+ lowering: {
662
+ system: { mode: "inline" },
663
+ // OpenAI caching is implicit (auto-applied to repeated prefixes
664
+ // ≥1024 tokens for prompt_tokens_details.cached_tokens). No
665
+ // wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
666
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
667
+ tools: { format: "openai" }
668
+ },
669
+ recovery: [
670
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
671
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
672
+ ],
673
+ strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
674
+ weaknesses: ["cost", "pricing_cliff_at_272k"],
675
+ notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
676
+ // Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
677
+ // price/positioning). Brain evidence will refine; no telemetry yet.
678
+ archetypePerf: {
679
+ critique: 9,
680
+ plan: 9,
681
+ generate: 9,
682
+ ask: 9,
683
+ extract: 9,
684
+ transform: 9,
685
+ hunt: 8,
686
+ // parallel tool support good but cliff at 272K hurts deep multi-step
687
+ summarize: 7,
688
+ // overkill for tolerant archetype
689
+ classify: 7
690
+ // overkill; cheaper models cover this
691
+ }
692
+ },
693
+ {
694
+ id: "gpt-5.4",
695
+ verifiedAgainstDocs: "2026-05-17",
696
+ provider: "openai",
697
+ status: "current",
698
+ maxContextTokens: 105e4,
699
+ maxOutputTokens: 128e3,
700
+ maxTools: 64,
701
+ parallelToolCalls: true,
702
+ structuredOutput: "native",
703
+ systemPromptMode: "inline",
704
+ streaming: true,
705
+ cliffs: [
706
+ {
707
+ metric: "input_tokens",
708
+ threshold: 272e3,
709
+ action: "downgrade_quality_warning",
710
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
711
+ }
712
+ ],
713
+ costInputPer1m: 2.5,
714
+ costOutputPer1m: 15,
715
+ lowering: {
716
+ system: { mode: "inline" },
717
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
718
+ tools: { format: "openai" }
719
+ },
720
+ recovery: [
721
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
722
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
723
+ ],
724
+ strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
725
+ weaknesses: ["pricing_cliff_at_272k"],
726
+ notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
727
+ // Anchored to Sonnet 4.6 row (similar price/positioning). Slight
728
+ // anthropic-side edge on agentic coding per master plan vibe.
729
+ archetypePerf: {
730
+ critique: 8,
731
+ plan: 8,
732
+ generate: 8,
733
+ ask: 8,
734
+ extract: 8,
735
+ transform: 8,
736
+ hunt: 7,
737
+ summarize: 7,
738
+ classify: 7
739
+ }
740
+ },
741
+ {
742
+ id: "gpt-5.4-mini",
743
+ verifiedAgainstDocs: "2026-05-17",
744
+ provider: "openai",
745
+ status: "current",
746
+ maxContextTokens: 4e5,
747
+ maxOutputTokens: 128e3,
748
+ maxTools: 64,
749
+ parallelToolCalls: true,
750
+ structuredOutput: "native",
751
+ systemPromptMode: "inline",
752
+ streaming: true,
753
+ cliffs: [],
754
+ costInputPer1m: 0.75,
755
+ costOutputPer1m: 4.5,
756
+ lowering: {
757
+ system: { mode: "inline" },
758
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
759
+ tools: { format: "openai" }
760
+ },
761
+ recovery: [
762
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
763
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
764
+ ],
765
+ strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
766
+ weaknesses: ["reasoning_depth"],
767
+ notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
768
+ // Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
769
+ // Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
770
+ // OpenAI claims strong coding/subagent perf.
771
+ archetypePerf: {
772
+ ask: 7,
773
+ generate: 7,
774
+ extract: 7,
775
+ transform: 7,
776
+ classify: 7,
777
+ summarize: 7,
778
+ hunt: 7,
779
+ plan: 6,
780
+ critique: 5
781
+ // reasoning depth gap — frontier models handle this
782
+ }
783
+ },
784
+ {
785
+ id: "gpt-5.4-nano",
786
+ verifiedAgainstDocs: "2026-05-17",
787
+ provider: "openai",
788
+ status: "current",
789
+ maxContextTokens: 4e5,
790
+ maxOutputTokens: 128e3,
791
+ maxTools: 64,
792
+ parallelToolCalls: true,
793
+ structuredOutput: "native",
794
+ systemPromptMode: "inline",
795
+ streaming: true,
796
+ cliffs: [],
797
+ costInputPer1m: 0.2,
798
+ costOutputPer1m: 1.25,
799
+ lowering: {
800
+ system: { mode: "inline" },
801
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
802
+ tools: { format: "openai" }
803
+ },
804
+ recovery: [
805
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
806
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
807
+ ],
808
+ strengths: ["cost", "speed", "volume", "structured_output"],
809
+ weaknesses: ["reasoning_depth", "no_computer_use"],
810
+ notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
811
+ // Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
812
+ // $0.20/$1.25). Slightly more expensive than Flash-Lite but with
813
+ // OpenAI brand reliability. Good fit for classify/summarize floor.
814
+ archetypePerf: {
815
+ classify: 7,
816
+ summarize: 6,
817
+ ask: 6,
818
+ transform: 6,
819
+ extract: 6,
820
+ generate: 5,
821
+ hunt: 5,
822
+ plan: 4,
823
+ critique: 3
824
+ // not for reasoning archetypes
825
+ }
826
+ },
618
827
  // ── Auto-onboarded (UNVERIFIED) ──
619
828
  // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
620
829
  // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-DTnIzGsA.mjs';
2
- export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DTnIzGsA.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-BoLYdl7F.mjs';
2
+ export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BoLYdl7F.mjs';
3
3
  import { IntentArchetypeName } from './dialect.mjs';
4
4
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
5
5
 
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-D0y6aLk0.js';
2
- export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-D0y6aLk0.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-CVB2_5C8.js';
2
+ export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CVB2_5C8.js';
3
3
  import { IntentArchetypeName } from './dialect.js';
4
4
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
5
5
 
package/dist/index.js CHANGED
@@ -1475,6 +1475,215 @@ var PROFILES_RAW = [
1475
1475
  // sequential tools — same as V4-Flash
1476
1476
  }
1477
1477
  },
1478
+ // ── OpenAI ──
1479
+ // alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
1480
+ // already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
1481
+ // + lowerOpenAI all existed; profile entries were missing, so the
1482
+ // alpha.10 auto-filter would mark openai-keyed models reachable but
1483
+ // there were no profiles to filter IN. Half-supported is now fully
1484
+ // supported. PB request `openai-provider-profiles` (2026-05-16).
1485
+ //
1486
+ // Profile data verified against developers.openai.com/api/docs/pricing
1487
+ // + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
1488
+ // numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
1489
+ // current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
1490
+ // are the workhorse family. gpt-4.1 + gpt-4o are legacy.
1491
+ //
1492
+ // Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
1493
+ // 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
1494
+ // cliff because it ranks the model down at large-context shapes — the
1495
+ // semantics of "this model is now 2x more expensive" map onto the
1496
+ // existing penalty mechanism. Cost-watcher will catch high-context
1497
+ // spikes empirically; the cliff prevents naive routing into the doubled
1498
+ // pricing zone.
1499
+ {
1500
+ id: "gpt-5.5",
1501
+ verifiedAgainstDocs: "2026-05-17",
1502
+ provider: "openai",
1503
+ status: "current",
1504
+ maxContextTokens: 105e4,
1505
+ maxOutputTokens: 128e3,
1506
+ maxTools: 64,
1507
+ parallelToolCalls: true,
1508
+ structuredOutput: "native",
1509
+ systemPromptMode: "inline",
1510
+ streaming: true,
1511
+ cliffs: [
1512
+ {
1513
+ metric: "input_tokens",
1514
+ threshold: 272e3,
1515
+ action: "downgrade_quality_warning",
1516
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
1517
+ }
1518
+ ],
1519
+ costInputPer1m: 5,
1520
+ costOutputPer1m: 30,
1521
+ lowering: {
1522
+ system: { mode: "inline" },
1523
+ // OpenAI caching is implicit (auto-applied to repeated prefixes
1524
+ // ≥1024 tokens for prompt_tokens_details.cached_tokens). No
1525
+ // wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
1526
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
1527
+ tools: { format: "openai" }
1528
+ },
1529
+ recovery: [
1530
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
1531
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
1532
+ ],
1533
+ strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
1534
+ weaknesses: ["cost", "pricing_cliff_at_272k"],
1535
+ notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
1536
+ // Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
1537
+ // price/positioning). Brain evidence will refine; no telemetry yet.
1538
+ archetypePerf: {
1539
+ critique: 9,
1540
+ plan: 9,
1541
+ generate: 9,
1542
+ ask: 9,
1543
+ extract: 9,
1544
+ transform: 9,
1545
+ hunt: 8,
1546
+ // parallel tool support good but cliff at 272K hurts deep multi-step
1547
+ summarize: 7,
1548
+ // overkill for tolerant archetype
1549
+ classify: 7
1550
+ // overkill; cheaper models cover this
1551
+ }
1552
+ },
1553
+ {
1554
+ id: "gpt-5.4",
1555
+ verifiedAgainstDocs: "2026-05-17",
1556
+ provider: "openai",
1557
+ status: "current",
1558
+ maxContextTokens: 105e4,
1559
+ maxOutputTokens: 128e3,
1560
+ maxTools: 64,
1561
+ parallelToolCalls: true,
1562
+ structuredOutput: "native",
1563
+ systemPromptMode: "inline",
1564
+ streaming: true,
1565
+ cliffs: [
1566
+ {
1567
+ metric: "input_tokens",
1568
+ threshold: 272e3,
1569
+ action: "downgrade_quality_warning",
1570
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
1571
+ }
1572
+ ],
1573
+ costInputPer1m: 2.5,
1574
+ costOutputPer1m: 15,
1575
+ lowering: {
1576
+ system: { mode: "inline" },
1577
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
1578
+ tools: { format: "openai" }
1579
+ },
1580
+ recovery: [
1581
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
1582
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
1583
+ ],
1584
+ strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
1585
+ weaknesses: ["pricing_cliff_at_272k"],
1586
+ notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
1587
+ // Anchored to Sonnet 4.6 row (similar price/positioning). Slight
1588
+ // anthropic-side edge on agentic coding per master plan vibe.
1589
+ archetypePerf: {
1590
+ critique: 8,
1591
+ plan: 8,
1592
+ generate: 8,
1593
+ ask: 8,
1594
+ extract: 8,
1595
+ transform: 8,
1596
+ hunt: 7,
1597
+ summarize: 7,
1598
+ classify: 7
1599
+ }
1600
+ },
1601
+ {
1602
+ id: "gpt-5.4-mini",
1603
+ verifiedAgainstDocs: "2026-05-17",
1604
+ provider: "openai",
1605
+ status: "current",
1606
+ maxContextTokens: 4e5,
1607
+ maxOutputTokens: 128e3,
1608
+ maxTools: 64,
1609
+ parallelToolCalls: true,
1610
+ structuredOutput: "native",
1611
+ systemPromptMode: "inline",
1612
+ streaming: true,
1613
+ cliffs: [],
1614
+ costInputPer1m: 0.75,
1615
+ costOutputPer1m: 4.5,
1616
+ lowering: {
1617
+ system: { mode: "inline" },
1618
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
1619
+ tools: { format: "openai" }
1620
+ },
1621
+ recovery: [
1622
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
1623
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
1624
+ ],
1625
+ strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
1626
+ weaknesses: ["reasoning_depth"],
1627
+ notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
1628
+ // Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
1629
+ // Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
1630
+ // OpenAI claims strong coding/subagent perf.
1631
+ archetypePerf: {
1632
+ ask: 7,
1633
+ generate: 7,
1634
+ extract: 7,
1635
+ transform: 7,
1636
+ classify: 7,
1637
+ summarize: 7,
1638
+ hunt: 7,
1639
+ plan: 6,
1640
+ critique: 5
1641
+ // reasoning depth gap — frontier models handle this
1642
+ }
1643
+ },
1644
+ {
1645
+ id: "gpt-5.4-nano",
1646
+ verifiedAgainstDocs: "2026-05-17",
1647
+ provider: "openai",
1648
+ status: "current",
1649
+ maxContextTokens: 4e5,
1650
+ maxOutputTokens: 128e3,
1651
+ maxTools: 64,
1652
+ parallelToolCalls: true,
1653
+ structuredOutput: "native",
1654
+ systemPromptMode: "inline",
1655
+ streaming: true,
1656
+ cliffs: [],
1657
+ costInputPer1m: 0.2,
1658
+ costOutputPer1m: 1.25,
1659
+ lowering: {
1660
+ system: { mode: "inline" },
1661
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
1662
+ tools: { format: "openai" }
1663
+ },
1664
+ recovery: [
1665
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
1666
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
1667
+ ],
1668
+ strengths: ["cost", "speed", "volume", "structured_output"],
1669
+ weaknesses: ["reasoning_depth", "no_computer_use"],
1670
+ notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
1671
+ // Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
1672
+ // $0.20/$1.25). Slightly more expensive than Flash-Lite but with
1673
+ // OpenAI brand reliability. Good fit for classify/summarize floor.
1674
+ archetypePerf: {
1675
+ classify: 7,
1676
+ summarize: 6,
1677
+ ask: 6,
1678
+ transform: 6,
1679
+ extract: 6,
1680
+ generate: 5,
1681
+ hunt: 5,
1682
+ plan: 4,
1683
+ critique: 3
1684
+ // not for reasoning archetypes
1685
+ }
1686
+ },
1478
1687
  // ── Auto-onboarded (UNVERIFIED) ──
1479
1688
  // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
1480
1689
  // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
@@ -2563,10 +2772,14 @@ var loadChainsFromBrain = createBrainQueryCache({
2563
2772
  // src/fallback.ts
2564
2773
  var STARTER_CHAINS = {
2565
2774
  // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
2775
+ // alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
2776
+ // archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
2777
+ // both Anthropic + Google are unreachable (consumer adds only OpenAI key).
2566
2778
  critique: [
2567
2779
  "claude-opus-4-7",
2568
2780
  "claude-sonnet-4-6",
2569
- "gemini-2.5-pro"
2781
+ "gemini-2.5-pro",
2782
+ "gpt-5.5"
2570
2783
  ],
2571
2784
  // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
2572
2785
  // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
@@ -2577,25 +2790,29 @@ var STARTER_CHAINS = {
2577
2790
  "deepseek-v4-pro"
2578
2791
  ],
2579
2792
  // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
2580
- // Flash floor for the open-posture chain.
2793
+ // gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
2794
+ // gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
2581
2795
  generate: [
2582
2796
  "claude-sonnet-4-6",
2583
2797
  "claude-haiku-4-5",
2584
2798
  "gemini-2.5-pro",
2585
- "gemini-2.5-flash"
2799
+ "gpt-5.4-mini"
2586
2800
  ],
2587
2801
  ask: [
2588
2802
  "claude-sonnet-4-6",
2589
2803
  "claude-haiku-4-5",
2590
2804
  "gemini-2.5-pro",
2591
- "gemini-2.5-flash"
2805
+ "gpt-5.4-mini"
2592
2806
  ],
2593
2807
  // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
2594
- // DeepSeek skipped (no brain evidence). Floor at Haiku.
2808
+ // DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
2809
+ // appended as third-provider extract floor (archetypePerf=8, native
2810
+ // structured-output support).
2595
2811
  extract: [
2596
2812
  "claude-sonnet-4-6",
2597
2813
  "claude-haiku-4-5",
2598
- "gemini-2.5-pro"
2814
+ "gemini-2.5-pro",
2815
+ "gpt-5.4"
2599
2816
  ],
2600
2817
  // Forgiving archetype — Sonnet primary but Flash safely floors it.
2601
2818
  transform: [
@@ -2765,6 +2982,47 @@ async function call(ir, opts = {}) {
2765
2982
  }
2766
2983
  }
2767
2984
  }
2985
+ let policyBlockedFiltered;
2986
+ if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
2987
+ const blocked = new Set(opts.policy.blockedModels);
2988
+ const filtered = [];
2989
+ const dropped = [];
2990
+ for (const t of targetsToTry) {
2991
+ if (blocked.has(t)) {
2992
+ dropped.push(t);
2993
+ } else {
2994
+ filtered.push(t);
2995
+ }
2996
+ }
2997
+ if (dropped.length > 0) {
2998
+ policyBlockedFiltered = dropped;
2999
+ targetsToTry = filtered;
3000
+ }
3001
+ if (targetsToTry.length === 0) {
3002
+ const latencyMs2 = Date.now() - start;
3003
+ await record({
3004
+ handle: initial.handle,
3005
+ tokensIn: 0,
3006
+ tokensOut: 0,
3007
+ latencyMs: latencyMs2,
3008
+ success: false,
3009
+ errorType: "all_blocked_by_policy",
3010
+ promptPreview: extractPromptPreview(ir)
3011
+ });
3012
+ const blockedAttempts = dropped.map((m) => ({
3013
+ model: m,
3014
+ status: "terminal",
3015
+ errorCode: "blocked_by_policy",
3016
+ message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
3017
+ }));
3018
+ throw new CallError(
3019
+ `call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
3020
+ blockedAttempts,
3021
+ void 0,
3022
+ "all_blocked_by_policy"
3023
+ );
3024
+ }
3025
+ }
2768
3026
  let activeCompile = initial;
2769
3027
  let lastErr;
2770
3028
  const failedProviders = /* @__PURE__ */ new Set();
@@ -2837,7 +3095,8 @@ async function call(ir, opts = {}) {
2837
3095
  servedBy: targetModel,
2838
3096
  fellOverFrom: fellOver ? initial.target : void 0,
2839
3097
  fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0,
2840
- unreachableFiltered
3098
+ unreachableFiltered,
3099
+ policyBlockedFiltered
2841
3100
  };
2842
3101
  }
2843
3102
  attempts.push({
@@ -2866,8 +3125,9 @@ async function call(ir, opts = {}) {
2866
3125
  promptPreview: extractPromptPreview(ir)
2867
3126
  });
2868
3127
  const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
3128
+ const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
2869
3129
  throw new CallError(
2870
- `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
3130
+ `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
2871
3131
  attempts,
2872
3132
  lastErr?.status,
2873
3133
  lastErr?.errorCode
package/dist/index.mjs CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  getProfile,
18
18
  profilesByProvider,
19
19
  tryGetProfile
20
- } from "./chunk-SFF5EVTL.mjs";
20
+ } from "./chunk-7MTHFSNY.mjs";
21
21
 
22
22
  // src/tokenizer.ts
23
23
  var tokenizerImpl = defaultCharBasedCounter;
@@ -1568,10 +1568,14 @@ var loadChainsFromBrain = createBrainQueryCache({
1568
1568
  // src/fallback.ts
1569
1569
  var STARTER_CHAINS = {
1570
1570
  // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
1571
+ // alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
1572
+ // archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
1573
+ // both Anthropic + Google are unreachable (consumer adds only OpenAI key).
1571
1574
  critique: [
1572
1575
  "claude-opus-4-7",
1573
1576
  "claude-sonnet-4-6",
1574
- "gemini-2.5-pro"
1577
+ "gemini-2.5-pro",
1578
+ "gpt-5.5"
1575
1579
  ],
1576
1580
  // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
1577
1581
  // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
@@ -1582,25 +1586,29 @@ var STARTER_CHAINS = {
1582
1586
  "deepseek-v4-pro"
1583
1587
  ],
1584
1588
  // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
1585
- // Flash floor for the open-posture chain.
1589
+ // gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
1590
+ // gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
1586
1591
  generate: [
1587
1592
  "claude-sonnet-4-6",
1588
1593
  "claude-haiku-4-5",
1589
1594
  "gemini-2.5-pro",
1590
- "gemini-2.5-flash"
1595
+ "gpt-5.4-mini"
1591
1596
  ],
1592
1597
  ask: [
1593
1598
  "claude-sonnet-4-6",
1594
1599
  "claude-haiku-4-5",
1595
1600
  "gemini-2.5-pro",
1596
- "gemini-2.5-flash"
1601
+ "gpt-5.4-mini"
1597
1602
  ],
1598
1603
  // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
1599
- // DeepSeek skipped (no brain evidence). Floor at Haiku.
1604
+ // DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
1605
+ // appended as third-provider extract floor (archetypePerf=8, native
1606
+ // structured-output support).
1600
1607
  extract: [
1601
1608
  "claude-sonnet-4-6",
1602
1609
  "claude-haiku-4-5",
1603
- "gemini-2.5-pro"
1610
+ "gemini-2.5-pro",
1611
+ "gpt-5.4"
1604
1612
  ],
1605
1613
  // Forgiving archetype — Sonnet primary but Flash safely floors it.
1606
1614
  transform: [
@@ -1770,6 +1778,47 @@ async function call(ir, opts = {}) {
1770
1778
  }
1771
1779
  }
1772
1780
  }
1781
+ let policyBlockedFiltered;
1782
+ if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
1783
+ const blocked = new Set(opts.policy.blockedModels);
1784
+ const filtered = [];
1785
+ const dropped = [];
1786
+ for (const t of targetsToTry) {
1787
+ if (blocked.has(t)) {
1788
+ dropped.push(t);
1789
+ } else {
1790
+ filtered.push(t);
1791
+ }
1792
+ }
1793
+ if (dropped.length > 0) {
1794
+ policyBlockedFiltered = dropped;
1795
+ targetsToTry = filtered;
1796
+ }
1797
+ if (targetsToTry.length === 0) {
1798
+ const latencyMs2 = Date.now() - start;
1799
+ await record({
1800
+ handle: initial.handle,
1801
+ tokensIn: 0,
1802
+ tokensOut: 0,
1803
+ latencyMs: latencyMs2,
1804
+ success: false,
1805
+ errorType: "all_blocked_by_policy",
1806
+ promptPreview: extractPromptPreview(ir)
1807
+ });
1808
+ const blockedAttempts = dropped.map((m) => ({
1809
+ model: m,
1810
+ status: "terminal",
1811
+ errorCode: "blocked_by_policy",
1812
+ message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
1813
+ }));
1814
+ throw new CallError(
1815
+ `call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
1816
+ blockedAttempts,
1817
+ void 0,
1818
+ "all_blocked_by_policy"
1819
+ );
1820
+ }
1821
+ }
1773
1822
  let activeCompile = initial;
1774
1823
  let lastErr;
1775
1824
  const failedProviders = /* @__PURE__ */ new Set();
@@ -1842,7 +1891,8 @@ async function call(ir, opts = {}) {
1842
1891
  servedBy: targetModel,
1843
1892
  fellOverFrom: fellOver ? initial.target : void 0,
1844
1893
  fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0,
1845
- unreachableFiltered
1894
+ unreachableFiltered,
1895
+ policyBlockedFiltered
1846
1896
  };
1847
1897
  }
1848
1898
  attempts.push({
@@ -1871,8 +1921,9 @@ async function call(ir, opts = {}) {
1871
1921
  promptPreview: extractPromptPreview(ir)
1872
1922
  });
1873
1923
  const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
1924
+ const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
1874
1925
  throw new CallError(
1875
- `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
1926
+ `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
1876
1927
  attempts,
1877
1928
  lastErr?.status,
1878
1929
  lastErr?.errorCode
@@ -521,6 +521,23 @@ interface CallResult {
521
521
  * when `noAutoFilter: true`).
522
522
  */
523
523
  unreachableFiltered?: string[];
524
+ /**
525
+ * alpha.16. Models that policy.blockedModels filtering dropped from the
526
+ * fallback walk. Defense-in-depth at the call() boundary — compile()'s
527
+ * passScoreTargets already excludes blocked entries from the initial
528
+ * target + fallbackChain, but if a consumer re-shapes the chain and
529
+ * threads policy through only partially, this filter catches the gap.
530
+ *
531
+ * Resolves TT-40 follow-on `policy-block-not-enforced-on-fallback-chain`
532
+ * (2026-05-15) where mutations_applied recorded the block intent but
533
+ * the call walker landed on the blocked model anyway.
534
+ *
535
+ * Undefined when no filter ran (no blockedModels set). Populated only
536
+ * when filter ran AND dropped at least one entry — empty drops are
537
+ * stored as `undefined` to keep brain telemetry quiet on the common
538
+ * case.
539
+ */
540
+ policyBlockedFiltered?: string[];
524
541
  }
525
542
  /**
526
543
  * Thrown when call() exhausts the fallback chain without success.
@@ -521,6 +521,23 @@ interface CallResult {
521
521
  * when `noAutoFilter: true`).
522
522
  */
523
523
  unreachableFiltered?: string[];
524
+ /**
525
+ * alpha.16. Models that policy.blockedModels filtering dropped from the
526
+ * fallback walk. Defense-in-depth at the call() boundary — compile()'s
527
+ * passScoreTargets already excludes blocked entries from the initial
528
+ * target + fallbackChain, but if a consumer re-shapes the chain and
529
+ * threads policy through only partially, this filter catches the gap.
530
+ *
531
+ * Resolves TT-40 follow-on `policy-block-not-enforced-on-fallback-chain`
532
+ * (2026-05-15) where mutations_applied recorded the block intent but
533
+ * the call walker landed on the blocked model anyway.
534
+ *
535
+ * Undefined when no filter ran (no blockedModels set). Populated only
536
+ * when filter ran AND dropped at least one entry — empty drops are
537
+ * stored as `undefined` to keep brain telemetry quiet on the common
538
+ * case.
539
+ */
540
+ policyBlockedFiltered?: string[];
524
541
  }
525
542
  /**
526
543
  * Thrown when call() exhausts the fallback chain without success.
@@ -1,2 +1,2 @@
1
- export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DTnIzGsA.mjs';
1
+ export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BoLYdl7F.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-D0y6aLk0.js';
1
+ export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CVB2_5C8.js';
2
2
  import './dialect.js';
package/dist/profiles.js CHANGED
@@ -645,6 +645,215 @@ var PROFILES_RAW = [
645
645
  // sequential tools — same as V4-Flash
646
646
  }
647
647
  },
648
+ // ── OpenAI ──
649
+ // alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
650
+ // already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
651
+ // + lowerOpenAI all existed; profile entries were missing, so the
652
+ // alpha.10 auto-filter would mark openai-keyed models reachable but
653
+ // there were no profiles to filter IN. Half-supported is now fully
654
+ // supported. PB request `openai-provider-profiles` (2026-05-16).
655
+ //
656
+ // Profile data verified against developers.openai.com/api/docs/pricing
657
+ // + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
658
+ // numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
659
+ // current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
660
+ // are the workhorse family. gpt-4.1 + gpt-4o are legacy.
661
+ //
662
+ // Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
663
+ // 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
664
+ // cliff because it ranks the model down at large-context shapes — the
665
+ // semantics of "this model is now 2x more expensive" map onto the
666
+ // existing penalty mechanism. Cost-watcher will catch high-context
667
+ // spikes empirically; the cliff prevents naive routing into the doubled
668
+ // pricing zone.
669
+ {
670
+ id: "gpt-5.5",
671
+ verifiedAgainstDocs: "2026-05-17",
672
+ provider: "openai",
673
+ status: "current",
674
+ maxContextTokens: 105e4,
675
+ maxOutputTokens: 128e3,
676
+ maxTools: 64,
677
+ parallelToolCalls: true,
678
+ structuredOutput: "native",
679
+ systemPromptMode: "inline",
680
+ streaming: true,
681
+ cliffs: [
682
+ {
683
+ metric: "input_tokens",
684
+ threshold: 272e3,
685
+ action: "downgrade_quality_warning",
686
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
687
+ }
688
+ ],
689
+ costInputPer1m: 5,
690
+ costOutputPer1m: 30,
691
+ lowering: {
692
+ system: { mode: "inline" },
693
+ // OpenAI caching is implicit (auto-applied to repeated prefixes
694
+ // ≥1024 tokens for prompt_tokens_details.cached_tokens). No
695
+ // wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
696
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
697
+ tools: { format: "openai" }
698
+ },
699
+ recovery: [
700
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
701
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
702
+ ],
703
+ strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
704
+ weaknesses: ["cost", "pricing_cliff_at_272k"],
705
+ notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
706
+ // Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
707
+ // price/positioning). Brain evidence will refine; no telemetry yet.
708
+ archetypePerf: {
709
+ critique: 9,
710
+ plan: 9,
711
+ generate: 9,
712
+ ask: 9,
713
+ extract: 9,
714
+ transform: 9,
715
+ hunt: 8,
716
+ // parallel tool support good but cliff at 272K hurts deep multi-step
717
+ summarize: 7,
718
+ // overkill for tolerant archetype
719
+ classify: 7
720
+ // overkill; cheaper models cover this
721
+ }
722
+ },
723
+ {
724
+ id: "gpt-5.4",
725
+ verifiedAgainstDocs: "2026-05-17",
726
+ provider: "openai",
727
+ status: "current",
728
+ maxContextTokens: 105e4,
729
+ maxOutputTokens: 128e3,
730
+ maxTools: 64,
731
+ parallelToolCalls: true,
732
+ structuredOutput: "native",
733
+ systemPromptMode: "inline",
734
+ streaming: true,
735
+ cliffs: [
736
+ {
737
+ metric: "input_tokens",
738
+ threshold: 272e3,
739
+ action: "downgrade_quality_warning",
740
+ reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
741
+ }
742
+ ],
743
+ costInputPer1m: 2.5,
744
+ costOutputPer1m: 15,
745
+ lowering: {
746
+ system: { mode: "inline" },
747
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
748
+ tools: { format: "openai" }
749
+ },
750
+ recovery: [
751
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
752
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
753
+ ],
754
+ strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
755
+ weaknesses: ["pricing_cliff_at_272k"],
756
+ notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
757
+ // Anchored to Sonnet 4.6 row (similar price/positioning). Slight
758
+ // anthropic-side edge on agentic coding per master plan vibe.
759
+ archetypePerf: {
760
+ critique: 8,
761
+ plan: 8,
762
+ generate: 8,
763
+ ask: 8,
764
+ extract: 8,
765
+ transform: 8,
766
+ hunt: 7,
767
+ summarize: 7,
768
+ classify: 7
769
+ }
770
+ },
771
+ {
772
+ id: "gpt-5.4-mini",
773
+ verifiedAgainstDocs: "2026-05-17",
774
+ provider: "openai",
775
+ status: "current",
776
+ maxContextTokens: 4e5,
777
+ maxOutputTokens: 128e3,
778
+ maxTools: 64,
779
+ parallelToolCalls: true,
780
+ structuredOutput: "native",
781
+ systemPromptMode: "inline",
782
+ streaming: true,
783
+ cliffs: [],
784
+ costInputPer1m: 0.75,
785
+ costOutputPer1m: 4.5,
786
+ lowering: {
787
+ system: { mode: "inline" },
788
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
789
+ tools: { format: "openai" }
790
+ },
791
+ recovery: [
792
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
793
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
794
+ ],
795
+ strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
796
+ weaknesses: ["reasoning_depth"],
797
+ notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
798
+ // Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
799
+ // Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
800
+ // OpenAI claims strong coding/subagent perf.
801
+ archetypePerf: {
802
+ ask: 7,
803
+ generate: 7,
804
+ extract: 7,
805
+ transform: 7,
806
+ classify: 7,
807
+ summarize: 7,
808
+ hunt: 7,
809
+ plan: 6,
810
+ critique: 5
811
+ // reasoning depth gap — frontier models handle this
812
+ }
813
+ },
814
+ {
815
+ id: "gpt-5.4-nano",
816
+ verifiedAgainstDocs: "2026-05-17",
817
+ provider: "openai",
818
+ status: "current",
819
+ maxContextTokens: 4e5,
820
+ maxOutputTokens: 128e3,
821
+ maxTools: 64,
822
+ parallelToolCalls: true,
823
+ structuredOutput: "native",
824
+ systemPromptMode: "inline",
825
+ streaming: true,
826
+ cliffs: [],
827
+ costInputPer1m: 0.2,
828
+ costOutputPer1m: 1.25,
829
+ lowering: {
830
+ system: { mode: "inline" },
831
+ cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
832
+ tools: { format: "openai" }
833
+ },
834
+ recovery: [
835
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
836
+ { signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
837
+ ],
838
+ strengths: ["cost", "speed", "volume", "structured_output"],
839
+ weaknesses: ["reasoning_depth", "no_computer_use"],
840
+ notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
841
+ // Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
842
+ // $0.20/$1.25). Slightly more expensive than Flash-Lite but with
843
+ // OpenAI brand reliability. Good fit for classify/summarize floor.
844
+ archetypePerf: {
845
+ classify: 7,
846
+ summarize: 6,
847
+ ask: 6,
848
+ transform: 6,
849
+ extract: 6,
850
+ generate: 5,
851
+ hunt: 5,
852
+ plan: 4,
853
+ critique: 3
854
+ // not for reasoning archetypes
855
+ }
856
+ },
648
857
  // ── Auto-onboarded (UNVERIFIED) ──
649
858
  // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
650
859
  // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
package/dist/profiles.mjs CHANGED
@@ -6,7 +6,7 @@ import {
6
6
  getProfile,
7
7
  profilesByProvider,
8
8
  tryGetProfile
9
- } from "./chunk-SFF5EVTL.mjs";
9
+ } from "./chunk-7MTHFSNY.mjs";
10
10
  export {
11
11
  ALIASES,
12
12
  _setProfileBrainHook,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.15",
3
+ "version": "2.0.0-alpha.16",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",