@warmdrift/kgauto-compiler 2.0.0-alpha.15 → 2.0.0-alpha.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-SFF5EVTL.mjs → chunk-7MTHFSNY.mjs} +209 -0
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +268 -8
- package/dist/index.mjs +60 -9
- package/dist/{profiles-DTnIzGsA.d.mts → profiles-BoLYdl7F.d.mts} +17 -0
- package/dist/{profiles-D0y6aLk0.d.ts → profiles-CVB2_5C8.d.ts} +17 -0
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +209 -0
- package/dist/profiles.mjs +1 -1
- package/package.json +1 -1
|
@@ -615,6 +615,215 @@ var PROFILES_RAW = [
|
|
|
615
615
|
// sequential tools — same as V4-Flash
|
|
616
616
|
}
|
|
617
617
|
},
|
|
618
|
+
// ── OpenAI ──
|
|
619
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
620
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
621
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
622
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
623
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
624
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
625
|
+
//
|
|
626
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
627
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
628
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
629
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
630
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
631
|
+
//
|
|
632
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
633
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
634
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
635
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
636
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
637
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
638
|
+
// pricing zone.
|
|
639
|
+
{
|
|
640
|
+
id: "gpt-5.5",
|
|
641
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
642
|
+
provider: "openai",
|
|
643
|
+
status: "current",
|
|
644
|
+
maxContextTokens: 105e4,
|
|
645
|
+
maxOutputTokens: 128e3,
|
|
646
|
+
maxTools: 64,
|
|
647
|
+
parallelToolCalls: true,
|
|
648
|
+
structuredOutput: "native",
|
|
649
|
+
systemPromptMode: "inline",
|
|
650
|
+
streaming: true,
|
|
651
|
+
cliffs: [
|
|
652
|
+
{
|
|
653
|
+
metric: "input_tokens",
|
|
654
|
+
threshold: 272e3,
|
|
655
|
+
action: "downgrade_quality_warning",
|
|
656
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
657
|
+
}
|
|
658
|
+
],
|
|
659
|
+
costInputPer1m: 5,
|
|
660
|
+
costOutputPer1m: 30,
|
|
661
|
+
lowering: {
|
|
662
|
+
system: { mode: "inline" },
|
|
663
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
664
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
665
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
666
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
667
|
+
tools: { format: "openai" }
|
|
668
|
+
},
|
|
669
|
+
recovery: [
|
|
670
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
671
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
672
|
+
],
|
|
673
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
674
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
675
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
676
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
677
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
678
|
+
archetypePerf: {
|
|
679
|
+
critique: 9,
|
|
680
|
+
plan: 9,
|
|
681
|
+
generate: 9,
|
|
682
|
+
ask: 9,
|
|
683
|
+
extract: 9,
|
|
684
|
+
transform: 9,
|
|
685
|
+
hunt: 8,
|
|
686
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
687
|
+
summarize: 7,
|
|
688
|
+
// overkill for tolerant archetype
|
|
689
|
+
classify: 7
|
|
690
|
+
// overkill; cheaper models cover this
|
|
691
|
+
}
|
|
692
|
+
},
|
|
693
|
+
{
|
|
694
|
+
id: "gpt-5.4",
|
|
695
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
696
|
+
provider: "openai",
|
|
697
|
+
status: "current",
|
|
698
|
+
maxContextTokens: 105e4,
|
|
699
|
+
maxOutputTokens: 128e3,
|
|
700
|
+
maxTools: 64,
|
|
701
|
+
parallelToolCalls: true,
|
|
702
|
+
structuredOutput: "native",
|
|
703
|
+
systemPromptMode: "inline",
|
|
704
|
+
streaming: true,
|
|
705
|
+
cliffs: [
|
|
706
|
+
{
|
|
707
|
+
metric: "input_tokens",
|
|
708
|
+
threshold: 272e3,
|
|
709
|
+
action: "downgrade_quality_warning",
|
|
710
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
711
|
+
}
|
|
712
|
+
],
|
|
713
|
+
costInputPer1m: 2.5,
|
|
714
|
+
costOutputPer1m: 15,
|
|
715
|
+
lowering: {
|
|
716
|
+
system: { mode: "inline" },
|
|
717
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
718
|
+
tools: { format: "openai" }
|
|
719
|
+
},
|
|
720
|
+
recovery: [
|
|
721
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
722
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
723
|
+
],
|
|
724
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
725
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
726
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
727
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
728
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
729
|
+
archetypePerf: {
|
|
730
|
+
critique: 8,
|
|
731
|
+
plan: 8,
|
|
732
|
+
generate: 8,
|
|
733
|
+
ask: 8,
|
|
734
|
+
extract: 8,
|
|
735
|
+
transform: 8,
|
|
736
|
+
hunt: 7,
|
|
737
|
+
summarize: 7,
|
|
738
|
+
classify: 7
|
|
739
|
+
}
|
|
740
|
+
},
|
|
741
|
+
{
|
|
742
|
+
id: "gpt-5.4-mini",
|
|
743
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
744
|
+
provider: "openai",
|
|
745
|
+
status: "current",
|
|
746
|
+
maxContextTokens: 4e5,
|
|
747
|
+
maxOutputTokens: 128e3,
|
|
748
|
+
maxTools: 64,
|
|
749
|
+
parallelToolCalls: true,
|
|
750
|
+
structuredOutput: "native",
|
|
751
|
+
systemPromptMode: "inline",
|
|
752
|
+
streaming: true,
|
|
753
|
+
cliffs: [],
|
|
754
|
+
costInputPer1m: 0.75,
|
|
755
|
+
costOutputPer1m: 4.5,
|
|
756
|
+
lowering: {
|
|
757
|
+
system: { mode: "inline" },
|
|
758
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
759
|
+
tools: { format: "openai" }
|
|
760
|
+
},
|
|
761
|
+
recovery: [
|
|
762
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
763
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
764
|
+
],
|
|
765
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
766
|
+
weaknesses: ["reasoning_depth"],
|
|
767
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
768
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
769
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
770
|
+
// OpenAI claims strong coding/subagent perf.
|
|
771
|
+
archetypePerf: {
|
|
772
|
+
ask: 7,
|
|
773
|
+
generate: 7,
|
|
774
|
+
extract: 7,
|
|
775
|
+
transform: 7,
|
|
776
|
+
classify: 7,
|
|
777
|
+
summarize: 7,
|
|
778
|
+
hunt: 7,
|
|
779
|
+
plan: 6,
|
|
780
|
+
critique: 5
|
|
781
|
+
// reasoning depth gap — frontier models handle this
|
|
782
|
+
}
|
|
783
|
+
},
|
|
784
|
+
{
|
|
785
|
+
id: "gpt-5.4-nano",
|
|
786
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
787
|
+
provider: "openai",
|
|
788
|
+
status: "current",
|
|
789
|
+
maxContextTokens: 4e5,
|
|
790
|
+
maxOutputTokens: 128e3,
|
|
791
|
+
maxTools: 64,
|
|
792
|
+
parallelToolCalls: true,
|
|
793
|
+
structuredOutput: "native",
|
|
794
|
+
systemPromptMode: "inline",
|
|
795
|
+
streaming: true,
|
|
796
|
+
cliffs: [],
|
|
797
|
+
costInputPer1m: 0.2,
|
|
798
|
+
costOutputPer1m: 1.25,
|
|
799
|
+
lowering: {
|
|
800
|
+
system: { mode: "inline" },
|
|
801
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
802
|
+
tools: { format: "openai" }
|
|
803
|
+
},
|
|
804
|
+
recovery: [
|
|
805
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
806
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
807
|
+
],
|
|
808
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
809
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
810
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
811
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
812
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
813
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
814
|
+
archetypePerf: {
|
|
815
|
+
classify: 7,
|
|
816
|
+
summarize: 6,
|
|
817
|
+
ask: 6,
|
|
818
|
+
transform: 6,
|
|
819
|
+
extract: 6,
|
|
820
|
+
generate: 5,
|
|
821
|
+
hunt: 5,
|
|
822
|
+
plan: 4,
|
|
823
|
+
critique: 3
|
|
824
|
+
// not for reasoning archetypes
|
|
825
|
+
}
|
|
826
|
+
},
|
|
618
827
|
// ── Auto-onboarded (UNVERIFIED) ──
|
|
619
828
|
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
620
829
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-
|
|
2
|
-
export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-BoLYdl7F.mjs';
|
|
2
|
+
export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BoLYdl7F.mjs';
|
|
3
3
|
import { IntentArchetypeName } from './dialect.mjs';
|
|
4
4
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
5
5
|
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-
|
|
2
|
-
export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory, f as Provider } from './profiles-CVB2_5C8.js';
|
|
2
|
+
export { g as ALIASES, h as CacheStrategy, i as CallAttempt, j as CallError, k as CliffRule, l as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, m as Message, n as MutationApplied, o as NormalizedTokens, p as PromptSection, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CVB2_5C8.js';
|
|
3
3
|
import { IntentArchetypeName } from './dialect.js';
|
|
4
4
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
5
5
|
|
package/dist/index.js
CHANGED
|
@@ -1475,6 +1475,215 @@ var PROFILES_RAW = [
|
|
|
1475
1475
|
// sequential tools — same as V4-Flash
|
|
1476
1476
|
}
|
|
1477
1477
|
},
|
|
1478
|
+
// ── OpenAI ──
|
|
1479
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
1480
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
1481
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
1482
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
1483
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
1484
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
1485
|
+
//
|
|
1486
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
1487
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
1488
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
1489
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
1490
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
1491
|
+
//
|
|
1492
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
1493
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
1494
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
1495
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
1496
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
1497
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
1498
|
+
// pricing zone.
|
|
1499
|
+
{
|
|
1500
|
+
id: "gpt-5.5",
|
|
1501
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1502
|
+
provider: "openai",
|
|
1503
|
+
status: "current",
|
|
1504
|
+
maxContextTokens: 105e4,
|
|
1505
|
+
maxOutputTokens: 128e3,
|
|
1506
|
+
maxTools: 64,
|
|
1507
|
+
parallelToolCalls: true,
|
|
1508
|
+
structuredOutput: "native",
|
|
1509
|
+
systemPromptMode: "inline",
|
|
1510
|
+
streaming: true,
|
|
1511
|
+
cliffs: [
|
|
1512
|
+
{
|
|
1513
|
+
metric: "input_tokens",
|
|
1514
|
+
threshold: 272e3,
|
|
1515
|
+
action: "downgrade_quality_warning",
|
|
1516
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
1517
|
+
}
|
|
1518
|
+
],
|
|
1519
|
+
costInputPer1m: 5,
|
|
1520
|
+
costOutputPer1m: 30,
|
|
1521
|
+
lowering: {
|
|
1522
|
+
system: { mode: "inline" },
|
|
1523
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
1524
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
1525
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
1526
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1527
|
+
tools: { format: "openai" }
|
|
1528
|
+
},
|
|
1529
|
+
recovery: [
|
|
1530
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1531
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1532
|
+
],
|
|
1533
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
1534
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
1535
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
1536
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
1537
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
1538
|
+
archetypePerf: {
|
|
1539
|
+
critique: 9,
|
|
1540
|
+
plan: 9,
|
|
1541
|
+
generate: 9,
|
|
1542
|
+
ask: 9,
|
|
1543
|
+
extract: 9,
|
|
1544
|
+
transform: 9,
|
|
1545
|
+
hunt: 8,
|
|
1546
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
1547
|
+
summarize: 7,
|
|
1548
|
+
// overkill for tolerant archetype
|
|
1549
|
+
classify: 7
|
|
1550
|
+
// overkill; cheaper models cover this
|
|
1551
|
+
}
|
|
1552
|
+
},
|
|
1553
|
+
{
|
|
1554
|
+
id: "gpt-5.4",
|
|
1555
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1556
|
+
provider: "openai",
|
|
1557
|
+
status: "current",
|
|
1558
|
+
maxContextTokens: 105e4,
|
|
1559
|
+
maxOutputTokens: 128e3,
|
|
1560
|
+
maxTools: 64,
|
|
1561
|
+
parallelToolCalls: true,
|
|
1562
|
+
structuredOutput: "native",
|
|
1563
|
+
systemPromptMode: "inline",
|
|
1564
|
+
streaming: true,
|
|
1565
|
+
cliffs: [
|
|
1566
|
+
{
|
|
1567
|
+
metric: "input_tokens",
|
|
1568
|
+
threshold: 272e3,
|
|
1569
|
+
action: "downgrade_quality_warning",
|
|
1570
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
1571
|
+
}
|
|
1572
|
+
],
|
|
1573
|
+
costInputPer1m: 2.5,
|
|
1574
|
+
costOutputPer1m: 15,
|
|
1575
|
+
lowering: {
|
|
1576
|
+
system: { mode: "inline" },
|
|
1577
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1578
|
+
tools: { format: "openai" }
|
|
1579
|
+
},
|
|
1580
|
+
recovery: [
|
|
1581
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1582
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1583
|
+
],
|
|
1584
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
1585
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
1586
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
1587
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
1588
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
1589
|
+
archetypePerf: {
|
|
1590
|
+
critique: 8,
|
|
1591
|
+
plan: 8,
|
|
1592
|
+
generate: 8,
|
|
1593
|
+
ask: 8,
|
|
1594
|
+
extract: 8,
|
|
1595
|
+
transform: 8,
|
|
1596
|
+
hunt: 7,
|
|
1597
|
+
summarize: 7,
|
|
1598
|
+
classify: 7
|
|
1599
|
+
}
|
|
1600
|
+
},
|
|
1601
|
+
{
|
|
1602
|
+
id: "gpt-5.4-mini",
|
|
1603
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1604
|
+
provider: "openai",
|
|
1605
|
+
status: "current",
|
|
1606
|
+
maxContextTokens: 4e5,
|
|
1607
|
+
maxOutputTokens: 128e3,
|
|
1608
|
+
maxTools: 64,
|
|
1609
|
+
parallelToolCalls: true,
|
|
1610
|
+
structuredOutput: "native",
|
|
1611
|
+
systemPromptMode: "inline",
|
|
1612
|
+
streaming: true,
|
|
1613
|
+
cliffs: [],
|
|
1614
|
+
costInputPer1m: 0.75,
|
|
1615
|
+
costOutputPer1m: 4.5,
|
|
1616
|
+
lowering: {
|
|
1617
|
+
system: { mode: "inline" },
|
|
1618
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1619
|
+
tools: { format: "openai" }
|
|
1620
|
+
},
|
|
1621
|
+
recovery: [
|
|
1622
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1623
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1624
|
+
],
|
|
1625
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
1626
|
+
weaknesses: ["reasoning_depth"],
|
|
1627
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
1628
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
1629
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
1630
|
+
// OpenAI claims strong coding/subagent perf.
|
|
1631
|
+
archetypePerf: {
|
|
1632
|
+
ask: 7,
|
|
1633
|
+
generate: 7,
|
|
1634
|
+
extract: 7,
|
|
1635
|
+
transform: 7,
|
|
1636
|
+
classify: 7,
|
|
1637
|
+
summarize: 7,
|
|
1638
|
+
hunt: 7,
|
|
1639
|
+
plan: 6,
|
|
1640
|
+
critique: 5
|
|
1641
|
+
// reasoning depth gap — frontier models handle this
|
|
1642
|
+
}
|
|
1643
|
+
},
|
|
1644
|
+
{
|
|
1645
|
+
id: "gpt-5.4-nano",
|
|
1646
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1647
|
+
provider: "openai",
|
|
1648
|
+
status: "current",
|
|
1649
|
+
maxContextTokens: 4e5,
|
|
1650
|
+
maxOutputTokens: 128e3,
|
|
1651
|
+
maxTools: 64,
|
|
1652
|
+
parallelToolCalls: true,
|
|
1653
|
+
structuredOutput: "native",
|
|
1654
|
+
systemPromptMode: "inline",
|
|
1655
|
+
streaming: true,
|
|
1656
|
+
cliffs: [],
|
|
1657
|
+
costInputPer1m: 0.2,
|
|
1658
|
+
costOutputPer1m: 1.25,
|
|
1659
|
+
lowering: {
|
|
1660
|
+
system: { mode: "inline" },
|
|
1661
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1662
|
+
tools: { format: "openai" }
|
|
1663
|
+
},
|
|
1664
|
+
recovery: [
|
|
1665
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1666
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1667
|
+
],
|
|
1668
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
1669
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
1670
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
1671
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
1672
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
1673
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
1674
|
+
archetypePerf: {
|
|
1675
|
+
classify: 7,
|
|
1676
|
+
summarize: 6,
|
|
1677
|
+
ask: 6,
|
|
1678
|
+
transform: 6,
|
|
1679
|
+
extract: 6,
|
|
1680
|
+
generate: 5,
|
|
1681
|
+
hunt: 5,
|
|
1682
|
+
plan: 4,
|
|
1683
|
+
critique: 3
|
|
1684
|
+
// not for reasoning archetypes
|
|
1685
|
+
}
|
|
1686
|
+
},
|
|
1478
1687
|
// ── Auto-onboarded (UNVERIFIED) ──
|
|
1479
1688
|
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
1480
1689
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
@@ -2563,10 +2772,14 @@ var loadChainsFromBrain = createBrainQueryCache({
|
|
|
2563
2772
|
// src/fallback.ts
|
|
2564
2773
|
var STARTER_CHAINS = {
|
|
2565
2774
|
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
2775
|
+
// alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
|
|
2776
|
+
// archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
|
|
2777
|
+
// both Anthropic + Google are unreachable (consumer adds only OpenAI key).
|
|
2566
2778
|
critique: [
|
|
2567
2779
|
"claude-opus-4-7",
|
|
2568
2780
|
"claude-sonnet-4-6",
|
|
2569
|
-
"gemini-2.5-pro"
|
|
2781
|
+
"gemini-2.5-pro",
|
|
2782
|
+
"gpt-5.5"
|
|
2570
2783
|
],
|
|
2571
2784
|
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
2572
2785
|
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
@@ -2577,25 +2790,29 @@ var STARTER_CHAINS = {
|
|
|
2577
2790
|
"deepseek-v4-pro"
|
|
2578
2791
|
],
|
|
2579
2792
|
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
2580
|
-
//
|
|
2793
|
+
// gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
|
|
2794
|
+
// gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
|
|
2581
2795
|
generate: [
|
|
2582
2796
|
"claude-sonnet-4-6",
|
|
2583
2797
|
"claude-haiku-4-5",
|
|
2584
2798
|
"gemini-2.5-pro",
|
|
2585
|
-
"
|
|
2799
|
+
"gpt-5.4-mini"
|
|
2586
2800
|
],
|
|
2587
2801
|
ask: [
|
|
2588
2802
|
"claude-sonnet-4-6",
|
|
2589
2803
|
"claude-haiku-4-5",
|
|
2590
2804
|
"gemini-2.5-pro",
|
|
2591
|
-
"
|
|
2805
|
+
"gpt-5.4-mini"
|
|
2592
2806
|
],
|
|
2593
2807
|
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
2594
|
-
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
2808
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
|
|
2809
|
+
// appended as third-provider extract floor (archetypePerf=8, native
|
|
2810
|
+
// structured-output support).
|
|
2595
2811
|
extract: [
|
|
2596
2812
|
"claude-sonnet-4-6",
|
|
2597
2813
|
"claude-haiku-4-5",
|
|
2598
|
-
"gemini-2.5-pro"
|
|
2814
|
+
"gemini-2.5-pro",
|
|
2815
|
+
"gpt-5.4"
|
|
2599
2816
|
],
|
|
2600
2817
|
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
2601
2818
|
transform: [
|
|
@@ -2765,6 +2982,47 @@ async function call(ir, opts = {}) {
|
|
|
2765
2982
|
}
|
|
2766
2983
|
}
|
|
2767
2984
|
}
|
|
2985
|
+
let policyBlockedFiltered;
|
|
2986
|
+
if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
|
|
2987
|
+
const blocked = new Set(opts.policy.blockedModels);
|
|
2988
|
+
const filtered = [];
|
|
2989
|
+
const dropped = [];
|
|
2990
|
+
for (const t of targetsToTry) {
|
|
2991
|
+
if (blocked.has(t)) {
|
|
2992
|
+
dropped.push(t);
|
|
2993
|
+
} else {
|
|
2994
|
+
filtered.push(t);
|
|
2995
|
+
}
|
|
2996
|
+
}
|
|
2997
|
+
if (dropped.length > 0) {
|
|
2998
|
+
policyBlockedFiltered = dropped;
|
|
2999
|
+
targetsToTry = filtered;
|
|
3000
|
+
}
|
|
3001
|
+
if (targetsToTry.length === 0) {
|
|
3002
|
+
const latencyMs2 = Date.now() - start;
|
|
3003
|
+
await record({
|
|
3004
|
+
handle: initial.handle,
|
|
3005
|
+
tokensIn: 0,
|
|
3006
|
+
tokensOut: 0,
|
|
3007
|
+
latencyMs: latencyMs2,
|
|
3008
|
+
success: false,
|
|
3009
|
+
errorType: "all_blocked_by_policy",
|
|
3010
|
+
promptPreview: extractPromptPreview(ir)
|
|
3011
|
+
});
|
|
3012
|
+
const blockedAttempts = dropped.map((m) => ({
|
|
3013
|
+
model: m,
|
|
3014
|
+
status: "terminal",
|
|
3015
|
+
errorCode: "blocked_by_policy",
|
|
3016
|
+
message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
|
|
3017
|
+
}));
|
|
3018
|
+
throw new CallError(
|
|
3019
|
+
`call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
|
|
3020
|
+
blockedAttempts,
|
|
3021
|
+
void 0,
|
|
3022
|
+
"all_blocked_by_policy"
|
|
3023
|
+
);
|
|
3024
|
+
}
|
|
3025
|
+
}
|
|
2768
3026
|
let activeCompile = initial;
|
|
2769
3027
|
let lastErr;
|
|
2770
3028
|
const failedProviders = /* @__PURE__ */ new Set();
|
|
@@ -2837,7 +3095,8 @@ async function call(ir, opts = {}) {
|
|
|
2837
3095
|
servedBy: targetModel,
|
|
2838
3096
|
fellOverFrom: fellOver ? initial.target : void 0,
|
|
2839
3097
|
fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0,
|
|
2840
|
-
unreachableFiltered
|
|
3098
|
+
unreachableFiltered,
|
|
3099
|
+
policyBlockedFiltered
|
|
2841
3100
|
};
|
|
2842
3101
|
}
|
|
2843
3102
|
attempts.push({
|
|
@@ -2866,8 +3125,9 @@ async function call(ir, opts = {}) {
|
|
|
2866
3125
|
promptPreview: extractPromptPreview(ir)
|
|
2867
3126
|
});
|
|
2868
3127
|
const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
|
|
3128
|
+
const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
|
|
2869
3129
|
throw new CallError(
|
|
2870
|
-
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
|
|
3130
|
+
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
|
|
2871
3131
|
attempts,
|
|
2872
3132
|
lastErr?.status,
|
|
2873
3133
|
lastErr?.errorCode
|
package/dist/index.mjs
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
getProfile,
|
|
18
18
|
profilesByProvider,
|
|
19
19
|
tryGetProfile
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-7MTHFSNY.mjs";
|
|
21
21
|
|
|
22
22
|
// src/tokenizer.ts
|
|
23
23
|
var tokenizerImpl = defaultCharBasedCounter;
|
|
@@ -1568,10 +1568,14 @@ var loadChainsFromBrain = createBrainQueryCache({
|
|
|
1568
1568
|
// src/fallback.ts
|
|
1569
1569
|
var STARTER_CHAINS = {
|
|
1570
1570
|
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
1571
|
+
// alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
|
|
1572
|
+
// archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
|
|
1573
|
+
// both Anthropic + Google are unreachable (consumer adds only OpenAI key).
|
|
1571
1574
|
critique: [
|
|
1572
1575
|
"claude-opus-4-7",
|
|
1573
1576
|
"claude-sonnet-4-6",
|
|
1574
|
-
"gemini-2.5-pro"
|
|
1577
|
+
"gemini-2.5-pro",
|
|
1578
|
+
"gpt-5.5"
|
|
1575
1579
|
],
|
|
1576
1580
|
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
1577
1581
|
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
@@ -1582,25 +1586,29 @@ var STARTER_CHAINS = {
|
|
|
1582
1586
|
"deepseek-v4-pro"
|
|
1583
1587
|
],
|
|
1584
1588
|
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
1585
|
-
//
|
|
1589
|
+
// gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
|
|
1590
|
+
// gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
|
|
1586
1591
|
generate: [
|
|
1587
1592
|
"claude-sonnet-4-6",
|
|
1588
1593
|
"claude-haiku-4-5",
|
|
1589
1594
|
"gemini-2.5-pro",
|
|
1590
|
-
"
|
|
1595
|
+
"gpt-5.4-mini"
|
|
1591
1596
|
],
|
|
1592
1597
|
ask: [
|
|
1593
1598
|
"claude-sonnet-4-6",
|
|
1594
1599
|
"claude-haiku-4-5",
|
|
1595
1600
|
"gemini-2.5-pro",
|
|
1596
|
-
"
|
|
1601
|
+
"gpt-5.4-mini"
|
|
1597
1602
|
],
|
|
1598
1603
|
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
1599
|
-
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
1604
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
|
|
1605
|
+
// appended as third-provider extract floor (archetypePerf=8, native
|
|
1606
|
+
// structured-output support).
|
|
1600
1607
|
extract: [
|
|
1601
1608
|
"claude-sonnet-4-6",
|
|
1602
1609
|
"claude-haiku-4-5",
|
|
1603
|
-
"gemini-2.5-pro"
|
|
1610
|
+
"gemini-2.5-pro",
|
|
1611
|
+
"gpt-5.4"
|
|
1604
1612
|
],
|
|
1605
1613
|
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
1606
1614
|
transform: [
|
|
@@ -1770,6 +1778,47 @@ async function call(ir, opts = {}) {
|
|
|
1770
1778
|
}
|
|
1771
1779
|
}
|
|
1772
1780
|
}
|
|
1781
|
+
let policyBlockedFiltered;
|
|
1782
|
+
if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
|
|
1783
|
+
const blocked = new Set(opts.policy.blockedModels);
|
|
1784
|
+
const filtered = [];
|
|
1785
|
+
const dropped = [];
|
|
1786
|
+
for (const t of targetsToTry) {
|
|
1787
|
+
if (blocked.has(t)) {
|
|
1788
|
+
dropped.push(t);
|
|
1789
|
+
} else {
|
|
1790
|
+
filtered.push(t);
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
if (dropped.length > 0) {
|
|
1794
|
+
policyBlockedFiltered = dropped;
|
|
1795
|
+
targetsToTry = filtered;
|
|
1796
|
+
}
|
|
1797
|
+
if (targetsToTry.length === 0) {
|
|
1798
|
+
const latencyMs2 = Date.now() - start;
|
|
1799
|
+
await record({
|
|
1800
|
+
handle: initial.handle,
|
|
1801
|
+
tokensIn: 0,
|
|
1802
|
+
tokensOut: 0,
|
|
1803
|
+
latencyMs: latencyMs2,
|
|
1804
|
+
success: false,
|
|
1805
|
+
errorType: "all_blocked_by_policy",
|
|
1806
|
+
promptPreview: extractPromptPreview(ir)
|
|
1807
|
+
});
|
|
1808
|
+
const blockedAttempts = dropped.map((m) => ({
|
|
1809
|
+
model: m,
|
|
1810
|
+
status: "terminal",
|
|
1811
|
+
errorCode: "blocked_by_policy",
|
|
1812
|
+
message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
|
|
1813
|
+
}));
|
|
1814
|
+
throw new CallError(
|
|
1815
|
+
`call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
|
|
1816
|
+
blockedAttempts,
|
|
1817
|
+
void 0,
|
|
1818
|
+
"all_blocked_by_policy"
|
|
1819
|
+
);
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1773
1822
|
let activeCompile = initial;
|
|
1774
1823
|
let lastErr;
|
|
1775
1824
|
const failedProviders = /* @__PURE__ */ new Set();
|
|
@@ -1842,7 +1891,8 @@ async function call(ir, opts = {}) {
|
|
|
1842
1891
|
servedBy: targetModel,
|
|
1843
1892
|
fellOverFrom: fellOver ? initial.target : void 0,
|
|
1844
1893
|
fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0,
|
|
1845
|
-
unreachableFiltered
|
|
1894
|
+
unreachableFiltered,
|
|
1895
|
+
policyBlockedFiltered
|
|
1846
1896
|
};
|
|
1847
1897
|
}
|
|
1848
1898
|
attempts.push({
|
|
@@ -1871,8 +1921,9 @@ async function call(ir, opts = {}) {
|
|
|
1871
1921
|
promptPreview: extractPromptPreview(ir)
|
|
1872
1922
|
});
|
|
1873
1923
|
const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
|
|
1924
|
+
const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
|
|
1874
1925
|
throw new CallError(
|
|
1875
|
-
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
|
|
1926
|
+
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
|
|
1876
1927
|
attempts,
|
|
1877
1928
|
lastErr?.status,
|
|
1878
1929
|
lastErr?.errorCode
|
|
@@ -521,6 +521,23 @@ interface CallResult {
|
|
|
521
521
|
* when `noAutoFilter: true`).
|
|
522
522
|
*/
|
|
523
523
|
unreachableFiltered?: string[];
|
|
524
|
+
/**
|
|
525
|
+
* alpha.16. Models that policy.blockedModels filtering dropped from the
|
|
526
|
+
* fallback walk. Defense-in-depth at the call() boundary — compile()'s
|
|
527
|
+
* passScoreTargets already excludes blocked entries from the initial
|
|
528
|
+
* target + fallbackChain, but if a consumer re-shapes the chain and
|
|
529
|
+
* threads policy through only partially, this filter catches the gap.
|
|
530
|
+
*
|
|
531
|
+
* Resolves TT-40 follow-on `policy-block-not-enforced-on-fallback-chain`
|
|
532
|
+
* (2026-05-15) where mutations_applied recorded the block intent but
|
|
533
|
+
* the call walker landed on the blocked model anyway.
|
|
534
|
+
*
|
|
535
|
+
* Undefined when no filter ran (no blockedModels set). Populated only
|
|
536
|
+
* when filter ran AND dropped at least one entry — empty drops are
|
|
537
|
+
* stored as `undefined` to keep brain telemetry quiet on the common
|
|
538
|
+
* case.
|
|
539
|
+
*/
|
|
540
|
+
policyBlockedFiltered?: string[];
|
|
524
541
|
}
|
|
525
542
|
/**
|
|
526
543
|
* Thrown when call() exhausts the fallback chain without success.
|
|
@@ -521,6 +521,23 @@ interface CallResult {
|
|
|
521
521
|
* when `noAutoFilter: true`).
|
|
522
522
|
*/
|
|
523
523
|
unreachableFiltered?: string[];
|
|
524
|
+
/**
|
|
525
|
+
* alpha.16. Models that policy.blockedModels filtering dropped from the
|
|
526
|
+
* fallback walk. Defense-in-depth at the call() boundary — compile()'s
|
|
527
|
+
* passScoreTargets already excludes blocked entries from the initial
|
|
528
|
+
* target + fallbackChain, but if a consumer re-shapes the chain and
|
|
529
|
+
* threads policy through only partially, this filter catches the gap.
|
|
530
|
+
*
|
|
531
|
+
* Resolves TT-40 follow-on `policy-block-not-enforced-on-fallback-chain`
|
|
532
|
+
* (2026-05-15) where mutations_applied recorded the block intent but
|
|
533
|
+
* the call walker landed on the blocked model anyway.
|
|
534
|
+
*
|
|
535
|
+
* Undefined when no filter ran (no blockedModels set). Populated only
|
|
536
|
+
* when filter ran AND dropped at least one entry — empty drops are
|
|
537
|
+
* stored as `undefined` to keep brain telemetry quiet on the common
|
|
538
|
+
* case.
|
|
539
|
+
*/
|
|
540
|
+
policyBlockedFiltered?: string[];
|
|
524
541
|
}
|
|
525
542
|
/**
|
|
526
543
|
* Thrown when call() exhausts the fallback chain without success.
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BoLYdl7F.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, _ as _setProfileBrainHook, t as allProfiles, x as allProfilesRaw, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CVB2_5C8.js';
|
|
2
2
|
import './dialect.js';
|
package/dist/profiles.js
CHANGED
|
@@ -645,6 +645,215 @@ var PROFILES_RAW = [
|
|
|
645
645
|
// sequential tools — same as V4-Flash
|
|
646
646
|
}
|
|
647
647
|
},
|
|
648
|
+
// ── OpenAI ──
|
|
649
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
650
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
651
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
652
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
653
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
654
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
655
|
+
//
|
|
656
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
657
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
658
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
659
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
660
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
661
|
+
//
|
|
662
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
663
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
664
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
665
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
666
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
667
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
668
|
+
// pricing zone.
|
|
669
|
+
{
|
|
670
|
+
id: "gpt-5.5",
|
|
671
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
672
|
+
provider: "openai",
|
|
673
|
+
status: "current",
|
|
674
|
+
maxContextTokens: 105e4,
|
|
675
|
+
maxOutputTokens: 128e3,
|
|
676
|
+
maxTools: 64,
|
|
677
|
+
parallelToolCalls: true,
|
|
678
|
+
structuredOutput: "native",
|
|
679
|
+
systemPromptMode: "inline",
|
|
680
|
+
streaming: true,
|
|
681
|
+
cliffs: [
|
|
682
|
+
{
|
|
683
|
+
metric: "input_tokens",
|
|
684
|
+
threshold: 272e3,
|
|
685
|
+
action: "downgrade_quality_warning",
|
|
686
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
687
|
+
}
|
|
688
|
+
],
|
|
689
|
+
costInputPer1m: 5,
|
|
690
|
+
costOutputPer1m: 30,
|
|
691
|
+
lowering: {
|
|
692
|
+
system: { mode: "inline" },
|
|
693
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
694
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
695
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
696
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
697
|
+
tools: { format: "openai" }
|
|
698
|
+
},
|
|
699
|
+
recovery: [
|
|
700
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
701
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
702
|
+
],
|
|
703
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
704
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
705
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
706
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
707
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
708
|
+
archetypePerf: {
|
|
709
|
+
critique: 9,
|
|
710
|
+
plan: 9,
|
|
711
|
+
generate: 9,
|
|
712
|
+
ask: 9,
|
|
713
|
+
extract: 9,
|
|
714
|
+
transform: 9,
|
|
715
|
+
hunt: 8,
|
|
716
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
717
|
+
summarize: 7,
|
|
718
|
+
// overkill for tolerant archetype
|
|
719
|
+
classify: 7
|
|
720
|
+
// overkill; cheaper models cover this
|
|
721
|
+
}
|
|
722
|
+
},
|
|
723
|
+
{
|
|
724
|
+
id: "gpt-5.4",
|
|
725
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
726
|
+
provider: "openai",
|
|
727
|
+
status: "current",
|
|
728
|
+
maxContextTokens: 105e4,
|
|
729
|
+
maxOutputTokens: 128e3,
|
|
730
|
+
maxTools: 64,
|
|
731
|
+
parallelToolCalls: true,
|
|
732
|
+
structuredOutput: "native",
|
|
733
|
+
systemPromptMode: "inline",
|
|
734
|
+
streaming: true,
|
|
735
|
+
cliffs: [
|
|
736
|
+
{
|
|
737
|
+
metric: "input_tokens",
|
|
738
|
+
threshold: 272e3,
|
|
739
|
+
action: "downgrade_quality_warning",
|
|
740
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
741
|
+
}
|
|
742
|
+
],
|
|
743
|
+
costInputPer1m: 2.5,
|
|
744
|
+
costOutputPer1m: 15,
|
|
745
|
+
lowering: {
|
|
746
|
+
system: { mode: "inline" },
|
|
747
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
748
|
+
tools: { format: "openai" }
|
|
749
|
+
},
|
|
750
|
+
recovery: [
|
|
751
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
752
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
753
|
+
],
|
|
754
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
755
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
756
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
757
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
758
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
759
|
+
archetypePerf: {
|
|
760
|
+
critique: 8,
|
|
761
|
+
plan: 8,
|
|
762
|
+
generate: 8,
|
|
763
|
+
ask: 8,
|
|
764
|
+
extract: 8,
|
|
765
|
+
transform: 8,
|
|
766
|
+
hunt: 7,
|
|
767
|
+
summarize: 7,
|
|
768
|
+
classify: 7
|
|
769
|
+
}
|
|
770
|
+
},
|
|
771
|
+
{
|
|
772
|
+
id: "gpt-5.4-mini",
|
|
773
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
774
|
+
provider: "openai",
|
|
775
|
+
status: "current",
|
|
776
|
+
maxContextTokens: 4e5,
|
|
777
|
+
maxOutputTokens: 128e3,
|
|
778
|
+
maxTools: 64,
|
|
779
|
+
parallelToolCalls: true,
|
|
780
|
+
structuredOutput: "native",
|
|
781
|
+
systemPromptMode: "inline",
|
|
782
|
+
streaming: true,
|
|
783
|
+
cliffs: [],
|
|
784
|
+
costInputPer1m: 0.75,
|
|
785
|
+
costOutputPer1m: 4.5,
|
|
786
|
+
lowering: {
|
|
787
|
+
system: { mode: "inline" },
|
|
788
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
789
|
+
tools: { format: "openai" }
|
|
790
|
+
},
|
|
791
|
+
recovery: [
|
|
792
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
793
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
794
|
+
],
|
|
795
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
796
|
+
weaknesses: ["reasoning_depth"],
|
|
797
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
798
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
799
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
800
|
+
// OpenAI claims strong coding/subagent perf.
|
|
801
|
+
archetypePerf: {
|
|
802
|
+
ask: 7,
|
|
803
|
+
generate: 7,
|
|
804
|
+
extract: 7,
|
|
805
|
+
transform: 7,
|
|
806
|
+
classify: 7,
|
|
807
|
+
summarize: 7,
|
|
808
|
+
hunt: 7,
|
|
809
|
+
plan: 6,
|
|
810
|
+
critique: 5
|
|
811
|
+
// reasoning depth gap — frontier models handle this
|
|
812
|
+
}
|
|
813
|
+
},
|
|
814
|
+
{
|
|
815
|
+
id: "gpt-5.4-nano",
|
|
816
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
817
|
+
provider: "openai",
|
|
818
|
+
status: "current",
|
|
819
|
+
maxContextTokens: 4e5,
|
|
820
|
+
maxOutputTokens: 128e3,
|
|
821
|
+
maxTools: 64,
|
|
822
|
+
parallelToolCalls: true,
|
|
823
|
+
structuredOutput: "native",
|
|
824
|
+
systemPromptMode: "inline",
|
|
825
|
+
streaming: true,
|
|
826
|
+
cliffs: [],
|
|
827
|
+
costInputPer1m: 0.2,
|
|
828
|
+
costOutputPer1m: 1.25,
|
|
829
|
+
lowering: {
|
|
830
|
+
system: { mode: "inline" },
|
|
831
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
832
|
+
tools: { format: "openai" }
|
|
833
|
+
},
|
|
834
|
+
recovery: [
|
|
835
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
836
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
837
|
+
],
|
|
838
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
839
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
840
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
841
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
842
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
843
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
844
|
+
archetypePerf: {
|
|
845
|
+
classify: 7,
|
|
846
|
+
summarize: 6,
|
|
847
|
+
ask: 6,
|
|
848
|
+
transform: 6,
|
|
849
|
+
extract: 6,
|
|
850
|
+
generate: 5,
|
|
851
|
+
hunt: 5,
|
|
852
|
+
plan: 4,
|
|
853
|
+
critique: 3
|
|
854
|
+
// not for reasoning archetypes
|
|
855
|
+
}
|
|
856
|
+
},
|
|
648
857
|
// ── Auto-onboarded (UNVERIFIED) ──
|
|
649
858
|
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
650
859
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
package/dist/profiles.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.16",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|