@warmdrift/kgauto-compiler 2.0.0-alpha.15 → 2.0.0-alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-SFF5EVTL.mjs → chunk-7MTHFSNY.mjs} +209 -0
- package/dist/chunk-NUTC7NUC.mjs +298 -0
- package/dist/glassbox/index.d.mts +159 -0
- package/dist/glassbox/index.d.ts +159 -0
- package/dist/glassbox/index.js +300 -0
- package/dist/glassbox/index.mjs +20 -0
- package/dist/index.d.mts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +624 -9
- package/dist/index.mjs +136 -10
- package/dist/{profiles-DTnIzGsA.d.mts → ir-C3P4gDt0.d.mts} +30 -134
- package/dist/{profiles-D0y6aLk0.d.ts → ir-CFHU3BUT.d.ts} +30 -134
- package/dist/profiles.d.mts +137 -2
- package/dist/profiles.d.ts +137 -2
- package/dist/profiles.js +209 -0
- package/dist/profiles.mjs +1 -1
- package/package.json +7 -2
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,137 @@
|
|
|
1
|
-
|
|
2
|
-
import './dialect.mjs';
|
|
1
|
+
import { f as Provider } from './ir-C3P4gDt0.mjs';
|
|
2
|
+
import { IntentArchetypeName } from './dialect.mjs';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Model profiles — executable knowledge about each provider/model.
|
|
6
|
+
*
|
|
7
|
+
* Unlike v1 which carried `known_failures` as prose strings, v2 makes them
|
|
8
|
+
* executable: cliffs trigger guards, lowering describes the wire format,
|
|
9
|
+
* recovery handlers describe what to do after specific failures.
|
|
10
|
+
*
|
|
11
|
+
* Each profile is the answer to "if I want to call THIS model with THIS
|
|
12
|
+
* shape of work, what does it need from me, and what should I do when it
|
|
13
|
+
* fails?"
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
type StructuredOutputCapability = 'native' | 'grammar' | 'none';
|
|
17
|
+
type SystemPromptMode = 'inline' | 'separate' | 'as_developer' | 'unsupported';
|
|
18
|
+
type CacheStrategy = 'cache_control' | 'cachedContent' | 'unsupported';
|
|
19
|
+
interface CliffRule {
|
|
20
|
+
/** What metric triggers this cliff. */
|
|
21
|
+
metric: 'input_tokens' | 'tool_count' | 'history_turns' | 'thinking_with_short_output';
|
|
22
|
+
/** Threshold — meaning depends on metric. */
|
|
23
|
+
threshold: number;
|
|
24
|
+
/** What action to take when triggered. */
|
|
25
|
+
action: 'downgrade_quality_warning' | 'drop_to_top_relevant' | 'force_thinking_budget_zero' | 'force_terse_output' | 'escalate_target' | 'strip_tools';
|
|
26
|
+
/**
|
|
27
|
+
* Optional: only fire this cliff when the IR's intent.archetype matches.
|
|
28
|
+
* Used for archetype-specific failure modes (e.g. Gemini Flash returns
|
|
29
|
+
* empty when summarize is offered tools).
|
|
30
|
+
*/
|
|
31
|
+
whenIntent?: IntentArchetypeName;
|
|
32
|
+
/** Human-readable reason for digest reporting. */
|
|
33
|
+
reason: string;
|
|
34
|
+
}
|
|
35
|
+
interface RecoveryRule {
|
|
36
|
+
/** What signal triggers recovery. */
|
|
37
|
+
signal: 'empty_response_after_tool' | 'empty_response' | 'malformed_function_call' | 'rate_limit' | 'model_not_found' | 'context_overflow';
|
|
38
|
+
/** Action: retry with adjusted params, or escalate to next fallback. */
|
|
39
|
+
action: 'retry_with_params' | 'escalate' | 'log_only';
|
|
40
|
+
/** When action=retry_with_params, the param adjustments to apply. */
|
|
41
|
+
retryParams?: Record<string, unknown>;
|
|
42
|
+
/** Max retries with this rule. */
|
|
43
|
+
maxRetries?: number;
|
|
44
|
+
/** Human-readable reason for digest reporting. */
|
|
45
|
+
reason: string;
|
|
46
|
+
}
|
|
47
|
+
interface LoweringSpec {
|
|
48
|
+
/** Where the system prompt goes. */
|
|
49
|
+
system: {
|
|
50
|
+
mode: SystemPromptMode;
|
|
51
|
+
field?: string;
|
|
52
|
+
};
|
|
53
|
+
/** Cache strategy + parameters. */
|
|
54
|
+
cache: {
|
|
55
|
+
strategy: CacheStrategy;
|
|
56
|
+
/** Min tokens before caching is worth it (provider rules). */
|
|
57
|
+
minTokens?: number;
|
|
58
|
+
/** Discount factor on cached input (0.1 = 10% of normal price). */
|
|
59
|
+
discount?: number;
|
|
60
|
+
/** TTL hint in seconds. */
|
|
61
|
+
ttlSeconds?: number;
|
|
62
|
+
};
|
|
63
|
+
/** Tool format identifier — see lower.ts for supported formats. */
|
|
64
|
+
tools?: {
|
|
65
|
+
format: 'anthropic' | 'google' | 'openai' | 'deepseek';
|
|
66
|
+
};
|
|
67
|
+
/** Thinking config — present iff this model has a thinking knob. */
|
|
68
|
+
thinking?: {
|
|
69
|
+
/** Field path on the request. */
|
|
70
|
+
field: string;
|
|
71
|
+
/** Default value when caller hasn't specified. */
|
|
72
|
+
default?: number | 'auto' | 'off';
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
interface ModelProfile {
|
|
76
|
+
id: string;
|
|
77
|
+
provider: Provider;
|
|
78
|
+
status: 'current' | 'preview' | 'legacy';
|
|
79
|
+
maxContextTokens: number;
|
|
80
|
+
maxOutputTokens: number;
|
|
81
|
+
maxTools: number;
|
|
82
|
+
parallelToolCalls: boolean;
|
|
83
|
+
structuredOutput: StructuredOutputCapability;
|
|
84
|
+
systemPromptMode: SystemPromptMode;
|
|
85
|
+
streaming: boolean;
|
|
86
|
+
cliffs: CliffRule[];
|
|
87
|
+
costInputPer1m: number;
|
|
88
|
+
costOutputPer1m: number;
|
|
89
|
+
lowering: LoweringSpec;
|
|
90
|
+
recovery: RecoveryRule[];
|
|
91
|
+
strengths: string[];
|
|
92
|
+
weaknesses: string[];
|
|
93
|
+
notes?: string;
|
|
94
|
+
verifiedAgainstDocs?: string;
|
|
95
|
+
/**
|
|
96
|
+
* Hand-curated per-archetype performance score on a 0-10 scale.
|
|
97
|
+
*
|
|
98
|
+
* 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
|
|
99
|
+
* 8 = strong second tier (Sonnet on plan, Pro on extract)
|
|
100
|
+
* 7 = competent (Haiku on classify, Flash on hunt)
|
|
101
|
+
* 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
|
|
102
|
+
* 3 = degraded (Flash on critique, DeepSeek on hunt)
|
|
103
|
+
*
|
|
104
|
+
* Missing archetypes default to `5` (no data, neutral). Each non-default
|
|
105
|
+
* value should carry a one-line rationale in the profile's note or inline
|
|
106
|
+
* comment citing brain evidence, family prior, or "starter hypothesis —
|
|
107
|
+
* verify with telemetry."
|
|
108
|
+
*
|
|
109
|
+
* Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
|
|
110
|
+
* Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
|
|
111
|
+
*
|
|
112
|
+
* Anti-hallucination guardrail (master plan §2.5): when the watcher's
|
|
113
|
+
* `--audit-fields` flag flags a profile stale (>90 days since
|
|
114
|
+
* verifiedAgainstDocs), the archetypePerf values get re-audited
|
|
115
|
+
* alongside capability fields. AI-trained intuition is NOT a valid
|
|
116
|
+
* source — only docs or brain evidence.
|
|
117
|
+
*
|
|
118
|
+
* alpha.9.
|
|
119
|
+
*/
|
|
120
|
+
archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
|
|
121
|
+
}
|
|
122
|
+
declare const ALIASES: Record<string, string>;
|
|
123
|
+
interface ProfileBrainHook {
|
|
124
|
+
getProfile?: (canonicalId: string) => ModelProfile | undefined;
|
|
125
|
+
resolveAlias?: (id: string) => string | undefined;
|
|
126
|
+
}
|
|
127
|
+
/** @internal — called by models-brain.ts at module load. */
|
|
128
|
+
declare function _setProfileBrainHook(hook: ProfileBrainHook): void;
|
|
129
|
+
declare function getProfile(id: string): ModelProfile;
|
|
130
|
+
declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
131
|
+
declare function allProfiles(): readonly ModelProfile[];
|
|
132
|
+
/** @internal — bundled-only access for adapters that need a non-brain
|
|
133
|
+
* fallback baseline (avoids a brain → profiles → brain re-entry). */
|
|
134
|
+
declare function allProfilesRaw(): readonly ModelProfile[];
|
|
135
|
+
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
136
|
+
|
|
137
|
+
export { ALIASES, type CacheStrategy, type CliffRule, type LoweringSpec, type ModelProfile, type RecoveryRule, type StructuredOutputCapability, type SystemPromptMode, _setProfileBrainHook, allProfiles, allProfilesRaw, getProfile, profilesByProvider, tryGetProfile };
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,137 @@
|
|
|
1
|
-
|
|
2
|
-
import './dialect.js';
|
|
1
|
+
import { f as Provider } from './ir-CFHU3BUT.js';
|
|
2
|
+
import { IntentArchetypeName } from './dialect.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Model profiles — executable knowledge about each provider/model.
|
|
6
|
+
*
|
|
7
|
+
* Unlike v1 which carried `known_failures` as prose strings, v2 makes them
|
|
8
|
+
* executable: cliffs trigger guards, lowering describes the wire format,
|
|
9
|
+
* recovery handlers describe what to do after specific failures.
|
|
10
|
+
*
|
|
11
|
+
* Each profile is the answer to "if I want to call THIS model with THIS
|
|
12
|
+
* shape of work, what does it need from me, and what should I do when it
|
|
13
|
+
* fails?"
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
type StructuredOutputCapability = 'native' | 'grammar' | 'none';
|
|
17
|
+
type SystemPromptMode = 'inline' | 'separate' | 'as_developer' | 'unsupported';
|
|
18
|
+
type CacheStrategy = 'cache_control' | 'cachedContent' | 'unsupported';
|
|
19
|
+
interface CliffRule {
|
|
20
|
+
/** What metric triggers this cliff. */
|
|
21
|
+
metric: 'input_tokens' | 'tool_count' | 'history_turns' | 'thinking_with_short_output';
|
|
22
|
+
/** Threshold — meaning depends on metric. */
|
|
23
|
+
threshold: number;
|
|
24
|
+
/** What action to take when triggered. */
|
|
25
|
+
action: 'downgrade_quality_warning' | 'drop_to_top_relevant' | 'force_thinking_budget_zero' | 'force_terse_output' | 'escalate_target' | 'strip_tools';
|
|
26
|
+
/**
|
|
27
|
+
* Optional: only fire this cliff when the IR's intent.archetype matches.
|
|
28
|
+
* Used for archetype-specific failure modes (e.g. Gemini Flash returns
|
|
29
|
+
* empty when summarize is offered tools).
|
|
30
|
+
*/
|
|
31
|
+
whenIntent?: IntentArchetypeName;
|
|
32
|
+
/** Human-readable reason for digest reporting. */
|
|
33
|
+
reason: string;
|
|
34
|
+
}
|
|
35
|
+
interface RecoveryRule {
|
|
36
|
+
/** What signal triggers recovery. */
|
|
37
|
+
signal: 'empty_response_after_tool' | 'empty_response' | 'malformed_function_call' | 'rate_limit' | 'model_not_found' | 'context_overflow';
|
|
38
|
+
/** Action: retry with adjusted params, or escalate to next fallback. */
|
|
39
|
+
action: 'retry_with_params' | 'escalate' | 'log_only';
|
|
40
|
+
/** When action=retry_with_params, the param adjustments to apply. */
|
|
41
|
+
retryParams?: Record<string, unknown>;
|
|
42
|
+
/** Max retries with this rule. */
|
|
43
|
+
maxRetries?: number;
|
|
44
|
+
/** Human-readable reason for digest reporting. */
|
|
45
|
+
reason: string;
|
|
46
|
+
}
|
|
47
|
+
interface LoweringSpec {
|
|
48
|
+
/** Where the system prompt goes. */
|
|
49
|
+
system: {
|
|
50
|
+
mode: SystemPromptMode;
|
|
51
|
+
field?: string;
|
|
52
|
+
};
|
|
53
|
+
/** Cache strategy + parameters. */
|
|
54
|
+
cache: {
|
|
55
|
+
strategy: CacheStrategy;
|
|
56
|
+
/** Min tokens before caching is worth it (provider rules). */
|
|
57
|
+
minTokens?: number;
|
|
58
|
+
/** Discount factor on cached input (0.1 = 10% of normal price). */
|
|
59
|
+
discount?: number;
|
|
60
|
+
/** TTL hint in seconds. */
|
|
61
|
+
ttlSeconds?: number;
|
|
62
|
+
};
|
|
63
|
+
/** Tool format identifier — see lower.ts for supported formats. */
|
|
64
|
+
tools?: {
|
|
65
|
+
format: 'anthropic' | 'google' | 'openai' | 'deepseek';
|
|
66
|
+
};
|
|
67
|
+
/** Thinking config — present iff this model has a thinking knob. */
|
|
68
|
+
thinking?: {
|
|
69
|
+
/** Field path on the request. */
|
|
70
|
+
field: string;
|
|
71
|
+
/** Default value when caller hasn't specified. */
|
|
72
|
+
default?: number | 'auto' | 'off';
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
interface ModelProfile {
|
|
76
|
+
id: string;
|
|
77
|
+
provider: Provider;
|
|
78
|
+
status: 'current' | 'preview' | 'legacy';
|
|
79
|
+
maxContextTokens: number;
|
|
80
|
+
maxOutputTokens: number;
|
|
81
|
+
maxTools: number;
|
|
82
|
+
parallelToolCalls: boolean;
|
|
83
|
+
structuredOutput: StructuredOutputCapability;
|
|
84
|
+
systemPromptMode: SystemPromptMode;
|
|
85
|
+
streaming: boolean;
|
|
86
|
+
cliffs: CliffRule[];
|
|
87
|
+
costInputPer1m: number;
|
|
88
|
+
costOutputPer1m: number;
|
|
89
|
+
lowering: LoweringSpec;
|
|
90
|
+
recovery: RecoveryRule[];
|
|
91
|
+
strengths: string[];
|
|
92
|
+
weaknesses: string[];
|
|
93
|
+
notes?: string;
|
|
94
|
+
verifiedAgainstDocs?: string;
|
|
95
|
+
/**
|
|
96
|
+
* Hand-curated per-archetype performance score on a 0-10 scale.
|
|
97
|
+
*
|
|
98
|
+
* 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
|
|
99
|
+
* 8 = strong second tier (Sonnet on plan, Pro on extract)
|
|
100
|
+
* 7 = competent (Haiku on classify, Flash on hunt)
|
|
101
|
+
* 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
|
|
102
|
+
* 3 = degraded (Flash on critique, DeepSeek on hunt)
|
|
103
|
+
*
|
|
104
|
+
* Missing archetypes default to `5` (no data, neutral). Each non-default
|
|
105
|
+
* value should carry a one-line rationale in the profile's note or inline
|
|
106
|
+
* comment citing brain evidence, family prior, or "starter hypothesis —
|
|
107
|
+
* verify with telemetry."
|
|
108
|
+
*
|
|
109
|
+
* Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
|
|
110
|
+
* Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
|
|
111
|
+
*
|
|
112
|
+
* Anti-hallucination guardrail (master plan §2.5): when the watcher's
|
|
113
|
+
* `--audit-fields` flag flags a profile stale (>90 days since
|
|
114
|
+
* verifiedAgainstDocs), the archetypePerf values get re-audited
|
|
115
|
+
* alongside capability fields. AI-trained intuition is NOT a valid
|
|
116
|
+
* source — only docs or brain evidence.
|
|
117
|
+
*
|
|
118
|
+
* alpha.9.
|
|
119
|
+
*/
|
|
120
|
+
archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
|
|
121
|
+
}
|
|
122
|
+
declare const ALIASES: Record<string, string>;
|
|
123
|
+
interface ProfileBrainHook {
|
|
124
|
+
getProfile?: (canonicalId: string) => ModelProfile | undefined;
|
|
125
|
+
resolveAlias?: (id: string) => string | undefined;
|
|
126
|
+
}
|
|
127
|
+
/** @internal — called by models-brain.ts at module load. */
|
|
128
|
+
declare function _setProfileBrainHook(hook: ProfileBrainHook): void;
|
|
129
|
+
declare function getProfile(id: string): ModelProfile;
|
|
130
|
+
declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
131
|
+
declare function allProfiles(): readonly ModelProfile[];
|
|
132
|
+
/** @internal — bundled-only access for adapters that need a non-brain
|
|
133
|
+
* fallback baseline (avoids a brain → profiles → brain re-entry). */
|
|
134
|
+
declare function allProfilesRaw(): readonly ModelProfile[];
|
|
135
|
+
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
136
|
+
|
|
137
|
+
export { ALIASES, type CacheStrategy, type CliffRule, type LoweringSpec, type ModelProfile, type RecoveryRule, type StructuredOutputCapability, type SystemPromptMode, _setProfileBrainHook, allProfiles, allProfilesRaw, getProfile, profilesByProvider, tryGetProfile };
|
package/dist/profiles.js
CHANGED
|
@@ -645,6 +645,215 @@ var PROFILES_RAW = [
|
|
|
645
645
|
// sequential tools — same as V4-Flash
|
|
646
646
|
}
|
|
647
647
|
},
|
|
648
|
+
// ── OpenAI ──
|
|
649
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
650
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
651
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
652
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
653
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
654
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
655
|
+
//
|
|
656
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
657
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
658
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
659
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
660
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
661
|
+
//
|
|
662
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
663
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
664
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
665
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
666
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
667
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
668
|
+
// pricing zone.
|
|
669
|
+
{
|
|
670
|
+
id: "gpt-5.5",
|
|
671
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
672
|
+
provider: "openai",
|
|
673
|
+
status: "current",
|
|
674
|
+
maxContextTokens: 105e4,
|
|
675
|
+
maxOutputTokens: 128e3,
|
|
676
|
+
maxTools: 64,
|
|
677
|
+
parallelToolCalls: true,
|
|
678
|
+
structuredOutput: "native",
|
|
679
|
+
systemPromptMode: "inline",
|
|
680
|
+
streaming: true,
|
|
681
|
+
cliffs: [
|
|
682
|
+
{
|
|
683
|
+
metric: "input_tokens",
|
|
684
|
+
threshold: 272e3,
|
|
685
|
+
action: "downgrade_quality_warning",
|
|
686
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
687
|
+
}
|
|
688
|
+
],
|
|
689
|
+
costInputPer1m: 5,
|
|
690
|
+
costOutputPer1m: 30,
|
|
691
|
+
lowering: {
|
|
692
|
+
system: { mode: "inline" },
|
|
693
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
694
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
695
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
696
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
697
|
+
tools: { format: "openai" }
|
|
698
|
+
},
|
|
699
|
+
recovery: [
|
|
700
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
701
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
702
|
+
],
|
|
703
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
704
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
705
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
706
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
707
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
708
|
+
archetypePerf: {
|
|
709
|
+
critique: 9,
|
|
710
|
+
plan: 9,
|
|
711
|
+
generate: 9,
|
|
712
|
+
ask: 9,
|
|
713
|
+
extract: 9,
|
|
714
|
+
transform: 9,
|
|
715
|
+
hunt: 8,
|
|
716
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
717
|
+
summarize: 7,
|
|
718
|
+
// overkill for tolerant archetype
|
|
719
|
+
classify: 7
|
|
720
|
+
// overkill; cheaper models cover this
|
|
721
|
+
}
|
|
722
|
+
},
|
|
723
|
+
{
|
|
724
|
+
id: "gpt-5.4",
|
|
725
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
726
|
+
provider: "openai",
|
|
727
|
+
status: "current",
|
|
728
|
+
maxContextTokens: 105e4,
|
|
729
|
+
maxOutputTokens: 128e3,
|
|
730
|
+
maxTools: 64,
|
|
731
|
+
parallelToolCalls: true,
|
|
732
|
+
structuredOutput: "native",
|
|
733
|
+
systemPromptMode: "inline",
|
|
734
|
+
streaming: true,
|
|
735
|
+
cliffs: [
|
|
736
|
+
{
|
|
737
|
+
metric: "input_tokens",
|
|
738
|
+
threshold: 272e3,
|
|
739
|
+
action: "downgrade_quality_warning",
|
|
740
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
741
|
+
}
|
|
742
|
+
],
|
|
743
|
+
costInputPer1m: 2.5,
|
|
744
|
+
costOutputPer1m: 15,
|
|
745
|
+
lowering: {
|
|
746
|
+
system: { mode: "inline" },
|
|
747
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
748
|
+
tools: { format: "openai" }
|
|
749
|
+
},
|
|
750
|
+
recovery: [
|
|
751
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
752
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
753
|
+
],
|
|
754
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
755
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
756
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
757
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
758
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
759
|
+
archetypePerf: {
|
|
760
|
+
critique: 8,
|
|
761
|
+
plan: 8,
|
|
762
|
+
generate: 8,
|
|
763
|
+
ask: 8,
|
|
764
|
+
extract: 8,
|
|
765
|
+
transform: 8,
|
|
766
|
+
hunt: 7,
|
|
767
|
+
summarize: 7,
|
|
768
|
+
classify: 7
|
|
769
|
+
}
|
|
770
|
+
},
|
|
771
|
+
{
|
|
772
|
+
id: "gpt-5.4-mini",
|
|
773
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
774
|
+
provider: "openai",
|
|
775
|
+
status: "current",
|
|
776
|
+
maxContextTokens: 4e5,
|
|
777
|
+
maxOutputTokens: 128e3,
|
|
778
|
+
maxTools: 64,
|
|
779
|
+
parallelToolCalls: true,
|
|
780
|
+
structuredOutput: "native",
|
|
781
|
+
systemPromptMode: "inline",
|
|
782
|
+
streaming: true,
|
|
783
|
+
cliffs: [],
|
|
784
|
+
costInputPer1m: 0.75,
|
|
785
|
+
costOutputPer1m: 4.5,
|
|
786
|
+
lowering: {
|
|
787
|
+
system: { mode: "inline" },
|
|
788
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
789
|
+
tools: { format: "openai" }
|
|
790
|
+
},
|
|
791
|
+
recovery: [
|
|
792
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
793
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
794
|
+
],
|
|
795
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
796
|
+
weaknesses: ["reasoning_depth"],
|
|
797
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
798
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
799
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
800
|
+
// OpenAI claims strong coding/subagent perf.
|
|
801
|
+
archetypePerf: {
|
|
802
|
+
ask: 7,
|
|
803
|
+
generate: 7,
|
|
804
|
+
extract: 7,
|
|
805
|
+
transform: 7,
|
|
806
|
+
classify: 7,
|
|
807
|
+
summarize: 7,
|
|
808
|
+
hunt: 7,
|
|
809
|
+
plan: 6,
|
|
810
|
+
critique: 5
|
|
811
|
+
// reasoning depth gap — frontier models handle this
|
|
812
|
+
}
|
|
813
|
+
},
|
|
814
|
+
{
|
|
815
|
+
id: "gpt-5.4-nano",
|
|
816
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
817
|
+
provider: "openai",
|
|
818
|
+
status: "current",
|
|
819
|
+
maxContextTokens: 4e5,
|
|
820
|
+
maxOutputTokens: 128e3,
|
|
821
|
+
maxTools: 64,
|
|
822
|
+
parallelToolCalls: true,
|
|
823
|
+
structuredOutput: "native",
|
|
824
|
+
systemPromptMode: "inline",
|
|
825
|
+
streaming: true,
|
|
826
|
+
cliffs: [],
|
|
827
|
+
costInputPer1m: 0.2,
|
|
828
|
+
costOutputPer1m: 1.25,
|
|
829
|
+
lowering: {
|
|
830
|
+
system: { mode: "inline" },
|
|
831
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
832
|
+
tools: { format: "openai" }
|
|
833
|
+
},
|
|
834
|
+
recovery: [
|
|
835
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
836
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
837
|
+
],
|
|
838
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
839
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
840
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
841
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
842
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
843
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
844
|
+
archetypePerf: {
|
|
845
|
+
classify: 7,
|
|
846
|
+
summarize: 6,
|
|
847
|
+
ask: 6,
|
|
848
|
+
transform: 6,
|
|
849
|
+
extract: 6,
|
|
850
|
+
generate: 5,
|
|
851
|
+
hunt: 5,
|
|
852
|
+
plan: 4,
|
|
853
|
+
critique: 3
|
|
854
|
+
// not for reasoning archetypes
|
|
855
|
+
}
|
|
856
|
+
},
|
|
648
857
|
// ── Auto-onboarded (UNVERIFIED) ──
|
|
649
858
|
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
650
859
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
package/dist/profiles.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.17",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -20,11 +20,16 @@
|
|
|
20
20
|
"types": "./dist/profiles.d.ts",
|
|
21
21
|
"import": "./dist/profiles.mjs",
|
|
22
22
|
"require": "./dist/profiles.js"
|
|
23
|
+
},
|
|
24
|
+
"./glassbox": {
|
|
25
|
+
"types": "./dist/glassbox/index.d.ts",
|
|
26
|
+
"import": "./dist/glassbox/index.mjs",
|
|
27
|
+
"require": "./dist/glassbox/index.js"
|
|
23
28
|
}
|
|
24
29
|
},
|
|
25
30
|
"files": ["dist", "README.md"],
|
|
26
31
|
"scripts": {
|
|
27
|
-
"build": "tsup src/index.ts src/dialect.ts src/profiles.ts --format cjs,esm --dts --clean",
|
|
32
|
+
"build": "tsup src/index.ts src/dialect.ts src/profiles.ts src/glassbox/index.ts --format cjs,esm --dts --clean",
|
|
28
33
|
"test": "vitest run",
|
|
29
34
|
"test:watch": "vitest",
|
|
30
35
|
"typecheck": "tsc --noEmit",
|