@animus-labs/cortex 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,9 @@ import type { PiEventSource } from './event-bridge.js';
26
26
  import { BudgetGuard } from './budget-guard.js';
27
27
  import { classifyError } from './error-classifier.js';
28
28
  import { parseWorkingTags } from './working-tags.js';
29
- import { UTILITY_MODEL_DEFAULTS } from './provider-registry.js';
29
+ import { getModel as getPiModel, getModels as getPiModels } from '@earendil-works/pi-ai';
30
+ import { UTILITY_MODEL_OVERRIDES } from './provider-registry.js';
31
+ import { inferUtilityModel } from './utility-model-inference.js';
30
32
  import { McpClientManager } from './mcp-client.js';
31
33
  import { CompactionManager, buildCompactionConfig } from './compaction/index.js';
32
34
  import { isContextOverflow } from './compaction/failsafe.js';
@@ -2863,6 +2865,25 @@ export class CortexAgent {
2863
2865
  * If 'default' or undefined, look up the provider default and preserve
2864
2866
  * the raw provider-specific fields from the primary pi-ai model.
2865
2867
  */
2868
+ private inferDefaultUtilityModel(provider: string): PiModel | null {
2869
+ const overrideModelId = UTILITY_MODEL_OVERRIDES[provider];
2870
+ if (overrideModelId) {
2871
+ try {
2872
+ const overrideModel = (getPiModel as unknown as (provider: string, modelId: string) => unknown)(provider, overrideModelId);
2873
+ if (overrideModel) return overrideModel as PiModel;
2874
+ } catch {
2875
+ return null;
2876
+ }
2877
+ }
2878
+
2879
+ try {
2880
+ const models = (getPiModels as unknown as (provider: string) => PiModel[])(provider);
2881
+ return inferUtilityModel(models as unknown as Array<Record<string, unknown>>) as PiModel | null;
2882
+ } catch {
2883
+ return null;
2884
+ }
2885
+ }
2886
+
2866
2887
  private resolveUtilityModels(
2867
2888
  primaryModel: CortexModel,
2868
2889
  primaryPiModel: PiModel,
@@ -2874,26 +2895,24 @@ export class CortexAgent {
2874
2895
  const primaryProvider = primaryModel.provider;
2875
2896
 
2876
2897
  if (!utilityModelConfig || utilityModelConfig === 'default') {
2877
- const defaultModelId = UTILITY_MODEL_DEFAULTS[primaryProvider];
2878
- if (!defaultModelId) {
2898
+ const utilityPiModel = this.inferDefaultUtilityModel(primaryProvider);
2899
+ if (!utilityPiModel) {
2879
2900
  return {
2880
2901
  utilityModel: primaryModel,
2881
2902
  utilityPiModel: primaryPiModel,
2882
2903
  };
2883
2904
  }
2884
2905
 
2885
- const utilityPiModel = {
2886
- ...primaryPiModel,
2887
- name: defaultModelId,
2888
- id: defaultModelId,
2889
- };
2906
+ const rawUtilityId = utilityPiModel['id'];
2907
+ const rawUtilityName = utilityPiModel['name'];
2908
+ const utilityModelId = typeof rawUtilityId === 'string' ? rawUtilityId : String(rawUtilityId ?? rawUtilityName);
2890
2909
 
2891
2910
  return {
2892
2911
  utilityPiModel,
2893
2912
  utilityModel: wrapModel(
2894
2913
  utilityPiModel,
2895
2914
  primaryProvider,
2896
- defaultModelId,
2915
+ utilityModelId,
2897
2916
  utilityPiModel.contextWindow ?? primaryModel.contextWindow,
2898
2917
  ),
2899
2918
  };
package/src/index.ts CHANGED
@@ -260,11 +260,13 @@ export type {
260
260
  // Model Wrapper (Phase 1D)
261
261
  export { wrapModel, unwrapModel, isCortexModel } from './model-wrapper.js';
262
262
  export type { CortexModel } from './model-wrapper.js';
263
+ export { inferUtilityModel, inferUtilityModelId } from './utility-model-inference.js';
263
264
 
264
265
  // Provider Registry (Phase 1D)
265
266
  export {
266
267
  PROVIDER_REGISTRY,
267
268
  OAUTH_PROVIDER_IDS,
269
+ UTILITY_MODEL_OVERRIDES,
268
270
  UTILITY_MODEL_DEFAULTS,
269
271
  PRIMARY_MODEL_DEFAULTS,
270
272
  PROVIDER_CACHE_CONFIG,
@@ -17,13 +17,14 @@
17
17
  import {
18
18
  PROVIDER_REGISTRY,
19
19
  OAUTH_PROVIDER_IDS,
20
- UTILITY_MODEL_DEFAULTS,
20
+ UTILITY_MODEL_OVERRIDES,
21
21
  } from './provider-registry.js';
22
22
  import { createRequire } from 'node:module';
23
23
  import type { IncomingMessage, ServerResponse } from 'node:http';
24
24
  import type { ThinkingLevel } from './types.js';
25
25
  import type { ProviderInfo, ModelInfo } from './provider-registry.js';
26
26
  import { wrapModel } from './model-wrapper.js';
27
+ import { inferUtilityModelId } from './utility-model-inference.js';
27
28
  import type { CortexModel } from './model-wrapper.js';
28
29
 
29
30
  const nodeRequire = createRequire(import.meta.url);
@@ -639,10 +640,12 @@ function mapRawToModelInfo(
639
640
  supportsThinking: supportedThinkingLevels.some(level => level !== 'off')
640
641
  || !!(raw['supportsThinking'] || raw['reasoning']),
641
642
  supportedThinkingLevels,
642
- supportsImages: !!raw['supportsImages'],
643
+ supportsImages: Array.isArray(raw['input'])
644
+ ? raw['input'].includes('image')
645
+ : !!raw['supportsImages'],
643
646
  };
644
647
 
645
- const rawPricing = raw['pricing'];
648
+ const rawPricing = raw['pricing'] ?? raw['cost'];
646
649
  if (rawPricing && typeof rawPricing === 'object') {
647
650
  const pricing = rawPricing as Record<string, unknown>;
648
651
  const inputPrice = pricing['input'];
@@ -830,32 +833,31 @@ export class ProviderManager implements IProviderManager {
830
833
  async validateApiKey(provider: string, apiKey: string): Promise<ApiKeyValidationResult> {
831
834
  const piAi = await loadPiAi();
832
835
 
833
- // Find the cheapest model for this provider to minimize validation cost
834
- const cheapestModelId = this.getSmallestModelId(provider);
835
- if (!cheapestModelId) {
836
- // No known model, try a generic test with the provider's first model
837
- const models = piAi.getModels(provider);
838
- if (models.length === 0) {
839
- return {
840
- provider,
841
- modelId: null,
842
- valid: false,
843
- retryable: false,
844
- status: 'resolution_error',
845
- message: `No models found for provider "${provider}"`,
846
- };
847
- }
848
- const firstRawId = models[0]!['id'];
849
- const firstRawName = models[0]!['name'];
850
- const firstModelId = typeof firstRawId === 'string'
851
- ? firstRawId
852
- : typeof firstRawName === 'string'
853
- ? firstRawName
854
- : String(firstRawId ?? firstRawName);
855
- return this.tryValidation(piAi, provider, firstModelId, apiKey);
836
+ const models = piAi.getModels(provider) ?? [];
837
+ if (models.length === 0) {
838
+ return {
839
+ provider,
840
+ modelId: null,
841
+ valid: false,
842
+ retryable: false,
843
+ status: 'resolution_error',
844
+ message: `No models found for provider "${provider}"`,
845
+ };
846
+ }
847
+
848
+ const modelId = this.getSmallestModelId(provider, models);
849
+ if (!modelId) {
850
+ return {
851
+ provider,
852
+ modelId: null,
853
+ valid: false,
854
+ retryable: false,
855
+ status: 'resolution_error',
856
+ message: `No usable models found for provider "${provider}"`,
857
+ };
856
858
  }
857
859
 
858
- return this.tryValidation(piAi, provider, cheapestModelId, apiKey);
860
+ return this.tryValidation(piAi, provider, modelId, apiKey);
859
861
  }
860
862
 
861
863
  /**
@@ -948,11 +950,10 @@ export class ProviderManager implements IProviderManager {
948
950
  // -----------------------------------------------------------------------
949
951
 
950
952
  /**
951
- * Get the cheapest known model ID for a provider.
952
- * Uses the UTILITY_MODEL_DEFAULTS as a proxy for "smallest model."
953
+ * Get the cheapest likely utility model ID for a provider.
953
954
  */
954
- private getSmallestModelId(provider: string): string | null {
955
- return UTILITY_MODEL_DEFAULTS[provider] ?? null;
955
+ private getSmallestModelId(provider: string, models: Array<Record<string, unknown>>): string | null {
956
+ return UTILITY_MODEL_OVERRIDES[provider] ?? inferUtilityModelId(models);
956
957
  }
957
958
 
958
959
  /**
@@ -4,7 +4,7 @@
4
4
  * This module contains:
5
5
  * 1. PROVIDER_REGISTRY: metadata for all known providers (auth methods, env vars, key prefixes)
6
6
  * 2. OAUTH_PROVIDER_IDS: the subset of providers that support OAuth
7
- * 3. UTILITY_MODEL_DEFAULTS: per-provider cheapest-capable model for utility operations
7
+ * 3. UTILITY_MODEL_OVERRIDES: per-provider utility model overrides for inference exceptions
8
8
  *
9
9
  * OAuth flows are resolved through pi-ai's OAuth provider registry at runtime.
10
10
  *
@@ -270,17 +270,9 @@ export const OAUTH_PROVIDER_IDS: string[] = [
270
270
  ];
271
271
 
272
272
  // ---------------------------------------------------------------------------
273
- // Utility Model Defaults
273
+ // Model Defaults
274
274
  // ---------------------------------------------------------------------------
275
275
 
276
- /**
277
- * Default utility model IDs per provider.
278
- * Used when utilityModel is 'default' or undefined.
279
- *
280
- * These are the cheapest capable models for each provider,
281
- * suitable for internal operations like WebFetch summarization
282
- * and safety classification.
283
- */
284
276
  /**
285
277
  * Default primary model IDs per provider.
286
278
  * Used when a user first connects a provider and no model is explicitly selected.
@@ -289,21 +281,22 @@ export const OAUTH_PROVIDER_IDS: string[] = [
289
281
  export const PRIMARY_MODEL_DEFAULTS: Record<string, string> = {
290
282
  anthropic: 'claude-sonnet-4-6',
291
283
  openai: 'gpt-5.4',
284
+ 'openai-codex': 'gpt-5.5',
292
285
  google: 'gemini-3.1-pro-preview',
286
+ xai: 'grok-4',
293
287
  groq: 'openai/gpt-oss-120b',
294
288
  cerebras: 'gpt-oss-120b',
295
289
  mistral: 'mistral-large-2512',
296
290
  };
297
291
 
298
- export const UTILITY_MODEL_DEFAULTS: Record<string, string> = {
299
- anthropic: 'claude-haiku-4-5-20251001', // $1.00/$5.00 per 1M tokens
300
- openai: 'gpt-4.1-nano', // $0.10/$0.40 per 1M tokens
301
- 'openai-codex': 'gpt-5.4-mini', // Current small Codex-capable model
302
- google: 'gemini-2.5-flash-lite', // $0.10/$0.40 per 1M tokens
303
- groq: 'llama-3.1-8b-instant', // ~$0.05/$0.08 per 1M tokens
304
- cerebras: 'llama3.1-8b', // ~$0.10/$0.10 per 1M tokens
305
- mistral: 'mistral-small-2506', // $0.06/$0.18 per 1M tokens
306
- };
292
+ /**
293
+ * Per-provider utility model overrides for inference exceptions.
294
+ * Leave empty unless dynamic inference picks a bad utility model for a provider.
295
+ */
296
+ export const UTILITY_MODEL_OVERRIDES: Record<string, string> = {};
297
+
298
+ /** Backwards-compatible alias. Prefer UTILITY_MODEL_OVERRIDES for new code. */
299
+ export const UTILITY_MODEL_DEFAULTS = UTILITY_MODEL_OVERRIDES;
307
300
 
308
301
  // ---------------------------------------------------------------------------
309
302
  // Cache Retention
@@ -0,0 +1,203 @@
1
+ type RawModel = Record<string, unknown>;
2
+
3
+ const MIN_UTILITY_CONTEXT_WINDOW = 32_000;
4
+
5
+ const SPECIAL_PURPOSE_MODEL_PATTERN = /(?:embedding|embed|rerank|moderation|whisper|tts|audio|speech|image-generation|vision|live|deep-research|safety|safeguard|guard|search|transcrib)/i;
6
+
7
+ const UTILITY_TERMS: Array<{ pattern: RegExp; score: number }> = [
8
+ { pattern: /(?:^|[\s._/-])flash[\s._/-]?lite(?:$|[\s._/-])/, score: 110 },
9
+ { pattern: /(?:^|[\s._/-])nano(?:$|[\s._/-])/, score: 100 },
10
+ { pattern: /(?:^|[\s._/-])mini(?:$|[\s._/-])/, score: 95 },
11
+ { pattern: /(?:^|[\s._/-])haiku(?:$|[\s._/-])/, score: 95 },
12
+ { pattern: /(?:^|[\s._/-])small(?:$|[\s._/-])/, score: 85 },
13
+ { pattern: /(?:^|[\s._/-])fast(?:$|[\s._/-])/, score: 80 },
14
+ { pattern: /(?:^|[\s._/-])spark(?:$|[\s._/-])/, score: 80 },
15
+ { pattern: /(?:^|[\s._/-])instant(?:$|[\s._/-])/, score: 75 },
16
+ { pattern: /(?:^|[\s._/-])lite(?:$|[\s._/-])/, score: 70 },
17
+ { pattern: /(?:^|[\s._/-])flash(?:$|[\s._/-])/, score: 65 },
18
+ { pattern: /(?:^|[\s._/-])(?:7|8)b(?:$|[\s._/-])/, score: 65 },
19
+ { pattern: /(?:^|[\s._/-])(?:12|20)b(?:$|[\s._/-])/, score: 45 },
20
+ { pattern: /(?:^|[\s._/-])32b(?:$|[\s._/-])/, score: 25 },
21
+ ];
22
+
23
+ interface UtilityCandidate {
24
+ model: RawModel;
25
+ id: string;
26
+ name: string;
27
+ utilityScore: number;
28
+ recencyScore: number;
29
+ costScore: number;
30
+ }
31
+
32
+ export function inferUtilityModel(models: readonly RawModel[] | null | undefined): RawModel | null {
33
+ if (!Array.isArray(models)) return null;
34
+
35
+ const capable = models
36
+ .map(toUtilityCandidate)
37
+ .filter((candidate): candidate is UtilityCandidate => candidate !== null);
38
+
39
+ const utilityCandidates = capable.filter(candidate => candidate.utilityScore > 0);
40
+ if (utilityCandidates.length > 0) {
41
+ return [...utilityCandidates].sort(compareUtilityCandidates)[0]!.model;
42
+ }
43
+ if (capable.length === 0) return null;
44
+
45
+ return [...capable].sort(compareFallbackCandidates)[0]!.model;
46
+ }
47
+
48
+ export function inferUtilityModelId(models: readonly RawModel[] | null | undefined): string | null {
49
+ const model = inferUtilityModel(models);
50
+ const id = model?.['id'];
51
+ if (typeof id === 'string') return id;
52
+ const name = model?.['name'];
53
+ return typeof name === 'string' ? name : null;
54
+ }
55
+
56
+ function toUtilityCandidate(model: RawModel): UtilityCandidate | null {
57
+ const id = getString(model['id']) ?? getString(model['name']);
58
+ if (!id) return null;
59
+
60
+ const name = getString(model['name']) ?? id;
61
+ const searchable = `${id} ${name}`.toLowerCase();
62
+ if (SPECIAL_PURPOSE_MODEL_PATTERN.test(searchable)) return null;
63
+
64
+ if (!supportsText(model)) return null;
65
+
66
+ const contextWindow = getNumber(model['contextWindow']) ?? 0;
67
+ if (contextWindow > 0 && contextWindow < MIN_UTILITY_CONTEXT_WINDOW) return null;
68
+
69
+ return {
70
+ model,
71
+ id,
72
+ name,
73
+ utilityScore: inferUtilityScore(searchable),
74
+ recencyScore: inferRecencyScore(searchable),
75
+ costScore: inferCostScore(model),
76
+ };
77
+ }
78
+
79
+ function compareUtilityCandidates(a: UtilityCandidate, b: UtilityCandidate): number {
80
+ if (b.recencyScore !== a.recencyScore) return b.recencyScore - a.recencyScore;
81
+ if (a.costScore !== b.costScore) return a.costScore - b.costScore;
82
+ if (b.utilityScore !== a.utilityScore) return b.utilityScore - a.utilityScore;
83
+ return a.id.localeCompare(b.id);
84
+ }
85
+
86
+ function compareFallbackCandidates(a: UtilityCandidate, b: UtilityCandidate): number {
87
+ if (a.costScore !== b.costScore) return a.costScore - b.costScore;
88
+ if (b.recencyScore !== a.recencyScore) return b.recencyScore - a.recencyScore;
89
+ return a.id.localeCompare(b.id);
90
+ }
91
+
92
+ function supportsText(model: RawModel): boolean {
93
+ const input = model['input'];
94
+ if (!Array.isArray(input)) return true;
95
+ return input.includes('text');
96
+ }
97
+
98
+ function inferUtilityScore(searchable: string): number {
99
+ let score = 0;
100
+ for (const term of UTILITY_TERMS) {
101
+ if (term.pattern.test(searchable)) {
102
+ score = Math.max(score, term.score);
103
+ }
104
+ }
105
+ return score;
106
+ }
107
+
108
+ function inferRecencyScore(searchable: string): number {
109
+ const dateScore = inferDateScore(searchable);
110
+ const versionScore = inferVersionScore(searchable);
111
+ return Math.max(dateScore, versionScore);
112
+ }
113
+
114
+ function inferDateScore(searchable: string): number {
115
+ let score = 0;
116
+
117
+ for (const match of searchable.matchAll(/20\d{6}/g)) {
118
+ const value = Number(match[0]);
119
+ if (isValidDateScore(value)) {
120
+ score = Math.max(score, value);
121
+ }
122
+ }
123
+
124
+ for (const match of searchable.matchAll(/(20\d{2})[-_/](0[1-9]|1[0-2])/g)) {
125
+ score = Math.max(score, Number(match[1]) * 10_000 + Number(match[2]) * 100);
126
+ }
127
+
128
+ for (const match of searchable.matchAll(/(0[1-9]|1[0-2])[-_/](20\d{2})/g)) {
129
+ score = Math.max(score, Number(match[2]) * 10_000 + Number(match[1]) * 100);
130
+ }
131
+
132
+ for (const match of searchable.matchAll(/(?:^|[^\d])(\d{4})(?:$|[^\da-z])/g)) {
133
+ const raw = match[1]!;
134
+ const year = Number(raw.slice(0, 2));
135
+ const month = Number(raw.slice(2, 4));
136
+ if (year >= 20 && year <= 40 && month >= 1 && month <= 12) {
137
+ score = Math.max(score, 20_000_000 + year * 10_000 + month * 100);
138
+ }
139
+ }
140
+
141
+ return score;
142
+ }
143
+
144
+ function inferVersionScore(searchable: string): number {
145
+ const scrubbed = searchable
146
+ .replace(/20\d{6}/g, ' ')
147
+ .replace(/(20\d{2})[-_/](0[1-9]|1[0-2])/g, ' ')
148
+ .replace(/(0[1-9]|1[0-2])[-_/](20\d{2})/g, ' ')
149
+ .replace(/(?:^|[^\d])(\d{4})(?:$|[^\da-z])/g, ' ')
150
+ .replace(/\b\d+(?:\.\d+)?b\b/g, ' ');
151
+
152
+ let score = 0;
153
+ for (const match of scrubbed.matchAll(/\d+(?:\.\d+)+/g)) {
154
+ score = Math.max(score, scoreVersionParts(match[0]!.split('.').map(Number)));
155
+ }
156
+
157
+ const tokens = scrubbed.split(/[^a-z0-9]+/).filter(Boolean);
158
+ for (let i = 0; i < tokens.length; i++) {
159
+ if (!/^\d+$/.test(tokens[i]!)) continue;
160
+ const parts: number[] = [];
161
+ for (let j = i; j < tokens.length && /^\d+$/.test(tokens[j]!) && parts.length < 4; j++) {
162
+ parts.push(Number(tokens[j]));
163
+ }
164
+ if (parts.length >= 2) {
165
+ score = Math.max(score, scoreVersionParts(parts));
166
+ }
167
+ }
168
+
169
+ return score;
170
+ }
171
+
172
+ function scoreVersionParts(parts: number[]): number {
173
+ const weights = [1_000_000, 10_000, 100, 1];
174
+ return parts.slice(0, weights.length).reduce((score, part, index) => (
175
+ Number.isFinite(part) ? score + part * weights[index]! : score
176
+ ), 0);
177
+ }
178
+
179
+ function inferCostScore(model: RawModel): number {
180
+ const cost = model['cost'] ?? model['pricing'];
181
+ if (!cost || typeof cost !== 'object') return Number.MAX_SAFE_INTEGER;
182
+
183
+ const rawCost = cost as RawModel;
184
+ const input = getNumber(rawCost['input']) ?? 0;
185
+ const output = getNumber(rawCost['output']) ?? 0;
186
+ if (input === 0 && output === 0) return Number.MAX_SAFE_INTEGER - 1;
187
+ return input + output * 3;
188
+ }
189
+
190
+ function isValidDateScore(value: number): boolean {
191
+ const year = Math.floor(value / 10_000);
192
+ const month = Math.floor((value % 10_000) / 100);
193
+ const day = value % 100;
194
+ return year >= 2020 && year <= 2040 && month >= 1 && month <= 12 && day >= 1 && day <= 31;
195
+ }
196
+
197
+ function getString(value: unknown): string | null {
198
+ return typeof value === 'string' && value.length > 0 ? value : null;
199
+ }
200
+
201
+ function getNumber(value: unknown): number | null {
202
+ return typeof value === 'number' && Number.isFinite(value) ? value : null;
203
+ }