@realtimex/folio 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,12 +5,17 @@ import { SDKService } from "./SDKService.js";
5
5
  const logger = createLogger("ModelCapabilityService");
6
6
 
7
7
  export type VisionCapabilityState = "supported" | "unsupported" | "unknown";
8
+ export type VisionCapabilityModality = "image" | "pdf";
9
+ type StoredVisionCapabilityState = "supported" | "unsupported" | "pending_unsupported";
8
10
 
9
11
  interface StoredVisionCapability {
10
- state: "supported" | "unsupported";
12
+ state: StoredVisionCapabilityState;
11
13
  learned_at: string;
12
14
  expires_at?: string;
13
15
  reason?: string;
16
+ failure_count?: number;
17
+ last_failure_at?: string;
18
+ evidence?: string[];
14
19
  }
15
20
 
16
21
  type VisionCapabilityMap = Record<string, StoredVisionCapability>;
@@ -24,38 +29,58 @@ interface SettingsLike {
24
29
  export interface VisionResolution {
25
30
  provider: string;
26
31
  model: string;
32
+ modality: VisionCapabilityModality;
27
33
  state: VisionCapabilityState;
28
34
  shouldAttempt: boolean;
29
35
  }
30
36
 
31
- export class ModelCapabilityService {
32
- private static readonly SUPPORTED_TTL_DAYS = 180;
33
- private static readonly UNSUPPORTED_TTL_DAYS = 30;
37
+ interface VisionFailureSignal {
38
+ message: string;
39
+ statusCodes: Set<number>;
40
+ codes: Set<string>;
41
+ }
42
+
43
+ interface VisionFailureClassification {
44
+ isCapabilityError: boolean;
45
+ reason: string;
46
+ score: number;
47
+ evidence: string[];
48
+ }
34
49
 
35
- static resolveVisionSupport(settingsRow: SettingsLike | null | undefined): VisionResolution {
50
+ export class ModelCapabilityService {
51
+ private static readonly SUPPORTED_TTL_MS = 180 * 24 * 60 * 60 * 1000;
52
+ private static readonly UNSUPPORTED_TTL_MS = 30 * 24 * 60 * 60 * 1000;
53
+ private static readonly PENDING_UNSUPPORTED_TTL_MS = 24 * 60 * 60 * 1000;
54
+ private static readonly UNSUPPORTED_CONFIRMATION_WINDOW_MS = 24 * 60 * 60 * 1000;
55
+ private static readonly UNSUPPORTED_CONFIRMATION_FAILURES = 2;
56
+ private static readonly UNSUPPORTED_SCORE_THRESHOLD = 3;
57
+
58
+ static resolveVisionSupport(
59
+ settingsRow: SettingsLike | null | undefined,
60
+ modality: VisionCapabilityModality = "image"
61
+ ): VisionResolution {
36
62
  const provider = (settingsRow?.llm_provider || SDKService.DEFAULT_LLM_PROVIDER).trim();
37
63
  const model = (settingsRow?.llm_model || SDKService.DEFAULT_LLM_MODEL).trim();
38
- const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model);
64
+ const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model, modality);
39
65
  return {
40
66
  provider,
41
67
  model,
68
+ modality,
42
69
  state,
43
70
  shouldAttempt: state !== "unsupported",
44
71
  };
45
72
  }
46
73
 
47
- static getVisionState(rawMap: unknown, provider: string, model: string): VisionCapabilityState {
74
+ static getVisionState(
75
+ rawMap: unknown,
76
+ provider: string,
77
+ model: string,
78
+ modality: VisionCapabilityModality = "image"
79
+ ): VisionCapabilityState {
48
80
  const map = this.normalizeCapabilityMap(rawMap);
49
- const entry = map[this.capabilityKey(provider, model)];
50
- if (!entry) return "unknown";
51
-
52
- if (entry.expires_at) {
53
- const expiryTs = Date.parse(entry.expires_at);
54
- if (Number.isFinite(expiryTs) && expiryTs <= Date.now()) {
55
- return "unknown";
56
- }
57
- }
58
-
81
+ const entry = map[this.capabilityKey(provider, model, modality)];
82
+ if (!entry || this.isExpired(entry)) return "unknown";
83
+ if (entry.state === "pending_unsupported") return "unknown";
59
84
  return entry.state;
60
85
  }
61
86
 
@@ -64,12 +89,14 @@ export class ModelCapabilityService {
64
89
  userId: string;
65
90
  provider: string;
66
91
  model: string;
92
+ modality?: VisionCapabilityModality;
67
93
  }): Promise<void> {
68
94
  await this.writeCapability({
69
95
  ...opts,
96
+ modality: opts.modality ?? "image",
70
97
  state: "supported",
71
98
  reason: "vision_request_succeeded",
72
- ttlDays: this.SUPPORTED_TTL_DAYS,
99
+ ttlMs: this.SUPPORTED_TTL_MS,
73
100
  });
74
101
  }
75
102
 
@@ -79,11 +106,50 @@ export class ModelCapabilityService {
79
106
  provider: string;
80
107
  model: string;
81
108
  error: unknown;
109
+ modality?: VisionCapabilityModality;
82
110
  }): Promise<VisionCapabilityState> {
83
- const classification = this.classifyVisionFailure(opts.error);
111
+ const modality = opts.modality ?? "image";
112
+ const classification = this.classifyVisionFailure({
113
+ error: opts.error,
114
+ provider: opts.provider,
115
+ modality,
116
+ });
117
+
84
118
  if (!classification.isCapabilityError) {
85
- logger.info(`Vision failure for ${opts.provider}/${opts.model} treated as transient; leaving capability unknown`, {
119
+ logger.info(
120
+ `Vision failure for ${opts.provider}/${opts.model} (${modality}) treated as non-capability; leaving capability unknown`,
121
+ {
86
122
  reason: classification.reason,
123
+ score: classification.score,
124
+ evidence: classification.evidence,
125
+ }
126
+ );
127
+ return "unknown";
128
+ }
129
+
130
+ const map = await this.readCapabilityMap(opts.supabase, opts.userId);
131
+ if (!map) {
132
+ return "unknown";
133
+ }
134
+
135
+ const key = this.capabilityKey(opts.provider, opts.model, modality);
136
+ const now = new Date();
137
+ const failureCount = this.nextFailureCount(map[key], now.getTime());
138
+
139
+ if (failureCount < this.UNSUPPORTED_CONFIRMATION_FAILURES) {
140
+ await this.writeCapability({
141
+ supabase: opts.supabase,
142
+ userId: opts.userId,
143
+ provider: opts.provider,
144
+ model: opts.model,
145
+ modality,
146
+ state: "pending_unsupported",
147
+ reason: "capability_signal_pending_confirmation",
148
+ ttlMs: this.PENDING_UNSUPPORTED_TTL_MS,
149
+ preloadedMap: map,
150
+ failureCount,
151
+ lastFailureAt: now.toISOString(),
152
+ evidence: classification.evidence,
87
153
  });
88
154
  return "unknown";
89
155
  }
@@ -93,45 +159,36 @@ export class ModelCapabilityService {
93
159
  userId: opts.userId,
94
160
  provider: opts.provider,
95
161
  model: opts.model,
162
+ modality,
96
163
  state: "unsupported",
97
164
  reason: classification.reason,
98
- ttlDays: this.UNSUPPORTED_TTL_DAYS,
165
+ ttlMs: this.UNSUPPORTED_TTL_MS,
166
+ preloadedMap: map,
167
+ failureCount,
168
+ lastFailureAt: now.toISOString(),
169
+ evidence: classification.evidence,
99
170
  });
171
+
100
172
  return "unsupported";
101
173
  }
102
174
 
103
- private static async writeCapability(opts: {
104
- supabase: SupabaseClient;
105
- userId: string;
106
- provider: string;
107
- model: string;
108
- state: "supported" | "unsupported";
109
- reason: string;
110
- ttlDays: number;
111
- }): Promise<void> {
112
- const { supabase, userId, provider, model, state, reason, ttlDays } = opts;
113
- const { data, error: readErr } = await supabase
175
+ private static async readCapabilityMap(supabase: SupabaseClient, userId: string): Promise<VisionCapabilityMap | null> {
176
+ const { data, error } = await supabase
114
177
  .from("user_settings")
115
178
  .select("vision_model_capabilities")
116
179
  .eq("user_id", userId)
117
180
  .maybeSingle();
118
181
 
119
- if (readErr) {
120
- logger.warn("Failed to read user_settings for model capability write", { userId, readErr });
121
- return;
182
+ if (error) {
183
+ logger.warn("Failed to read user_settings for model capability", { userId, error });
184
+ return null;
122
185
  }
123
186
 
124
- const map = this.normalizeCapabilityMap(data?.vision_model_capabilities);
125
- const now = new Date();
126
- const expiresAt = new Date(now.getTime() + ttlDays * 24 * 60 * 60 * 1000).toISOString();
127
- map[this.capabilityKey(provider, model)] = {
128
- state,
129
- learned_at: now.toISOString(),
130
- expires_at: expiresAt,
131
- reason,
132
- };
187
+ return this.normalizeCapabilityMap(data?.vision_model_capabilities);
188
+ }
133
189
 
134
- const { error: writeErr } = await supabase
190
+ private static async persistCapabilityMap(supabase: SupabaseClient, userId: string, map: VisionCapabilityMap): Promise<boolean> {
191
+ const { error } = await supabase
135
192
  .from("user_settings")
136
193
  .upsert(
137
194
  {
@@ -141,12 +198,83 @@ export class ModelCapabilityService {
141
198
  { onConflict: "user_id" }
142
199
  );
143
200
 
144
- if (writeErr) {
145
- logger.warn("Failed to persist model capability state", { userId, provider, model, state, writeErr });
201
+ if (error) {
202
+ logger.warn("Failed to persist model capability state", { userId, error });
203
+ return false;
204
+ }
205
+
206
+ return true;
207
+ }
208
+
209
+ private static async writeCapability(opts: {
210
+ supabase: SupabaseClient;
211
+ userId: string;
212
+ provider: string;
213
+ model: string;
214
+ modality: VisionCapabilityModality;
215
+ state: StoredVisionCapabilityState;
216
+ reason: string;
217
+ ttlMs: number;
218
+ preloadedMap?: VisionCapabilityMap;
219
+ failureCount?: number;
220
+ lastFailureAt?: string;
221
+ evidence?: string[];
222
+ }): Promise<void> {
223
+ const {
224
+ supabase,
225
+ userId,
226
+ provider,
227
+ model,
228
+ modality,
229
+ state,
230
+ reason,
231
+ ttlMs,
232
+ preloadedMap,
233
+ failureCount,
234
+ lastFailureAt,
235
+ evidence,
236
+ } = opts;
237
+
238
+ const map = preloadedMap ?? (await this.readCapabilityMap(supabase, userId));
239
+ if (!map) {
240
+ return;
241
+ }
242
+
243
+ const now = new Date();
244
+ const key = this.capabilityKey(provider, model, modality);
245
+
246
+ const nextEntry: StoredVisionCapability = {
247
+ state,
248
+ learned_at: now.toISOString(),
249
+ expires_at: new Date(now.getTime() + ttlMs).toISOString(),
250
+ reason,
251
+ };
252
+
253
+ if (typeof failureCount === "number" && Number.isFinite(failureCount) && failureCount > 0) {
254
+ nextEntry.failure_count = Math.floor(failureCount);
255
+ }
256
+
257
+ if (typeof lastFailureAt === "string") {
258
+ nextEntry.last_failure_at = lastFailureAt;
259
+ }
260
+
261
+ if (Array.isArray(evidence) && evidence.length > 0) {
262
+ nextEntry.evidence = evidence.slice(0, 5);
263
+ }
264
+
265
+ map[key] = nextEntry;
266
+
267
+ const persisted = await this.persistCapabilityMap(supabase, userId, map);
268
+ if (!persisted) {
146
269
  return;
147
270
  }
148
271
 
149
- logger.info(`Updated model capability for ${provider}/${model}: ${state}`, { reason, ttlDays });
272
+ logger.info(`Updated model capability for ${provider}/${model} (${modality}): ${state}`, {
273
+ reason,
274
+ ttlMs,
275
+ failureCount,
276
+ evidence: nextEntry.evidence,
277
+ });
150
278
  }
151
279
 
152
280
  private static normalizeCapabilityMap(rawMap: unknown): VisionCapabilityMap {
@@ -160,71 +288,249 @@ export class ModelCapabilityService {
160
288
  if (!value || typeof value !== "object" || Array.isArray(value)) {
161
289
  continue;
162
290
  }
163
- const state = String((value as Record<string, unknown>).state || "");
164
- if (state !== "supported" && state !== "unsupported") {
291
+
292
+ const record = value as Record<string, unknown>;
293
+ const state = String(record.state || "");
294
+ if (state !== "supported" && state !== "unsupported" && state !== "pending_unsupported") {
165
295
  continue;
166
296
  }
167
297
 
168
- const learnedAt = (value as Record<string, unknown>).learned_at;
169
- const expiresAt = (value as Record<string, unknown>).expires_at;
170
- const reason = (value as Record<string, unknown>).reason;
298
+ const learnedAt = record.learned_at;
299
+ const expiresAt = record.expires_at;
300
+ const reason = record.reason;
301
+ const failureCount = record.failure_count;
302
+ const lastFailureAt = record.last_failure_at;
303
+ const evidence = record.evidence;
171
304
 
172
- normalized[key] = {
305
+ const normalizedEntry: StoredVisionCapability = {
173
306
  state,
174
307
  learned_at: typeof learnedAt === "string" ? learnedAt : new Date(0).toISOString(),
175
308
  expires_at: typeof expiresAt === "string" ? expiresAt : undefined,
176
309
  reason: typeof reason === "string" ? reason : undefined,
177
310
  };
311
+
312
+ if (typeof failureCount === "number" && Number.isFinite(failureCount) && failureCount > 0) {
313
+ normalizedEntry.failure_count = Math.floor(failureCount);
314
+ }
315
+
316
+ if (typeof lastFailureAt === "string") {
317
+ normalizedEntry.last_failure_at = lastFailureAt;
318
+ }
319
+
320
+ if (Array.isArray(evidence)) {
321
+ normalizedEntry.evidence = evidence
322
+ .filter((item): item is string => typeof item === "string")
323
+ .map((item) => item.trim())
324
+ .filter((item) => item.length > 0)
325
+ .slice(0, 5);
326
+ }
327
+
328
+ normalized[key] = normalizedEntry;
178
329
  }
179
330
 
180
331
  return normalized;
181
332
  }
182
333
 
183
- private static capabilityKey(provider: string, model: string): string {
334
+ private static capabilityBaseKey(provider: string, model: string): string {
184
335
  return `${provider.toLowerCase().trim()}:${model.toLowerCase().trim()}`;
185
336
  }
186
337
 
187
- private static classifyVisionFailure(error: unknown): { isCapabilityError: boolean; reason: string } {
188
- const message = this.errorToMessage(error).toLowerCase();
189
- if (!message) return { isCapabilityError: false, reason: "empty_error" };
338
+ private static capabilityKey(provider: string, model: string, modality: VisionCapabilityModality = "image"): string {
339
+ const base = this.capabilityBaseKey(provider, model);
340
+ if (modality === "image") return base;
341
+ return `${base}:${modality}`;
342
+ }
190
343
 
191
- const hardCapabilityHints = [
192
- "does not support images",
193
- "model does not support image",
194
- "invalid model", // e.g. text-only models fed image payloads in realtimexai provider
195
- ];
344
+ private static isExpired(entry: StoredVisionCapability): boolean {
345
+ if (!entry.expires_at) return false;
346
+ const expiryTs = Date.parse(entry.expires_at);
347
+ return Number.isFinite(expiryTs) && expiryTs <= Date.now();
348
+ }
196
349
 
197
- if (hardCapabilityHints.some((hint) => message.includes(hint))) {
198
- return { isCapabilityError: true, reason: "capability_mismatch" };
350
+ private static nextFailureCount(entry: StoredVisionCapability | undefined, nowTs: number): number {
351
+ if (!entry || entry.state !== "pending_unsupported" || this.isExpired(entry)) {
352
+ return 1;
199
353
  }
200
354
 
201
- const documentSpecificHints = [
202
- "image_url",
203
- "vision",
204
- "multimodal",
205
- "multi-modal",
206
- "unsupported content type",
207
- "unsupported message content",
208
- "invalid content type",
209
- "invalid image",
210
- "unrecognized content type",
211
- "image too large",
212
- "base64",
213
- ];
355
+ const lastFailureTs = entry.last_failure_at ? Date.parse(entry.last_failure_at) : Number.NaN;
356
+ if (!Number.isFinite(lastFailureTs)) {
357
+ return 1;
358
+ }
359
+
360
+ if (nowTs - lastFailureTs > this.UNSUPPORTED_CONFIRMATION_WINDOW_MS) {
361
+ return 1;
362
+ }
363
+
364
+ const currentCount = typeof entry.failure_count === "number" && Number.isFinite(entry.failure_count)
365
+ ? Math.max(1, Math.floor(entry.failure_count))
366
+ : 1;
367
+
368
+ return currentCount + 1;
369
+ }
370
+
371
+ private static classifyVisionFailure(opts: {
372
+ error: unknown;
373
+ provider: string;
374
+ modality: VisionCapabilityModality;
375
+ }): VisionFailureClassification {
376
+ const signal = this.extractVisionFailureSignal(opts.error);
377
+ if (!signal.message && signal.codes.size === 0 && signal.statusCodes.size === 0) {
378
+ return { isCapabilityError: false, reason: "empty_error", score: 0, evidence: [] };
379
+ }
380
+
381
+ const transientEvidence = this.matchTransientOrAuth(signal);
382
+ if (transientEvidence.length > 0) {
383
+ return {
384
+ isCapabilityError: false,
385
+ reason: "transient_or_auth",
386
+ score: 0,
387
+ evidence: transientEvidence,
388
+ };
389
+ }
390
+
391
+ const documentEvidence = this.matchDocumentSpecific(signal, opts.modality);
392
+ if (documentEvidence.length > 0) {
393
+ return {
394
+ isCapabilityError: false,
395
+ reason: "document_specific_failure",
396
+ score: 0,
397
+ evidence: documentEvidence,
398
+ };
399
+ }
400
+
401
+ const capability = this.scoreCapabilitySignal(signal, opts.provider, opts.modality);
402
+ if (capability.score >= this.UNSUPPORTED_SCORE_THRESHOLD) {
403
+ return {
404
+ isCapabilityError: true,
405
+ reason: "capability_mismatch",
406
+ score: capability.score,
407
+ evidence: capability.evidence,
408
+ };
409
+ }
214
410
 
215
- if (documentSpecificHints.some((hint) => message.includes(hint))) {
216
- return { isCapabilityError: false, reason: "document_specific_failure" };
411
+ if (capability.score > 0) {
412
+ return {
413
+ isCapabilityError: false,
414
+ reason: "insufficient_capability_evidence",
415
+ score: capability.score,
416
+ evidence: capability.evidence,
417
+ };
217
418
  }
218
419
 
219
- const transientHints = [
420
+ return {
421
+ isCapabilityError: false,
422
+ reason: "unknown_error_class",
423
+ score: 0,
424
+ evidence: [],
425
+ };
426
+ }
427
+
428
+ private static extractVisionFailureSignal(error: unknown): VisionFailureSignal {
429
+ const messages = new Set<string>();
430
+ const statusCodes = new Set<number>();
431
+ const codes = new Set<string>();
432
+
433
+ const pushMessage = (value: unknown): void => {
434
+ if (typeof value !== "string") return;
435
+ const normalized = value.trim().toLowerCase();
436
+ if (normalized) messages.add(normalized);
437
+ };
438
+
439
+ const pushStatus = (value: unknown): void => {
440
+ const parsed = typeof value === "number" ? value : Number(value);
441
+ if (!Number.isFinite(parsed) || parsed <= 0) return;
442
+ statusCodes.add(Math.floor(parsed));
443
+ };
444
+
445
+ const pushCode = (value: unknown): void => {
446
+ if (typeof value !== "string") return;
447
+ const normalized = value.trim().toLowerCase();
448
+ if (!normalized) return;
449
+ codes.add(normalized);
450
+ codes.add(normalized.replace(/[\s.-]+/g, "_"));
451
+ };
452
+
453
+ pushMessage(this.errorToMessage(error));
454
+
455
+ const queue: Array<{ value: unknown; depth: number }> = [{ value: error, depth: 0 }];
456
+ const visited = new Set<object>();
457
+
458
+ while (queue.length > 0) {
459
+ const current = queue.shift();
460
+ if (!current || current.depth > 2) {
461
+ continue;
462
+ }
463
+
464
+ const { value, depth } = current;
465
+ if (!value || typeof value !== "object") {
466
+ continue;
467
+ }
468
+
469
+ if (visited.has(value)) {
470
+ continue;
471
+ }
472
+ visited.add(value);
473
+
474
+ const candidate = value as Record<string, unknown>;
475
+ pushMessage(candidate.message);
476
+ pushMessage(candidate.details);
477
+ pushMessage(candidate.error_description);
478
+ pushMessage(candidate.detail);
479
+ if (typeof candidate.error === "string") {
480
+ pushMessage(candidate.error);
481
+ }
482
+
483
+ pushStatus(candidate.status);
484
+ pushStatus(candidate.statusCode);
485
+ pushCode(candidate.code);
486
+ pushCode(candidate.type);
487
+ if (typeof candidate.error === "object") {
488
+ const nested = candidate.error as Record<string, unknown>;
489
+ pushCode(nested.code);
490
+ pushCode(nested.type);
491
+ pushStatus(nested.status);
492
+ pushMessage(nested.message);
493
+ }
494
+
495
+ for (const key of ["response", "data", "error", "cause"]) {
496
+ if (candidate[key] !== undefined) {
497
+ queue.push({ value: candidate[key], depth: depth + 1 });
498
+ }
499
+ }
500
+ }
501
+
502
+ return {
503
+ message: Array.from(messages).join(" | "),
504
+ statusCodes,
505
+ codes,
506
+ };
507
+ }
508
+
509
+ private static matchTransientOrAuth(signal: VisionFailureSignal): string[] {
510
+ const statusMatches = Array.from(signal.statusCodes).filter((status) =>
511
+ [401, 403, 408, 429, 500, 502, 503, 504].includes(status)
512
+ );
513
+
514
+ const codeMatches = this.matchCodes(signal.codes, [
515
+ "timeout",
516
+ "timed_out",
517
+ "rate_limit",
518
+ "too_many_requests",
519
+ "temporarily_unavailable",
520
+ "service_unavailable",
521
+ "network_error",
522
+ "connection_error",
523
+ "unauthorized",
524
+ "forbidden",
525
+ "invalid_api_key",
526
+ "insufficient_quota",
527
+ ]);
528
+
529
+ const messageMatches = this.matchMessage(signal.message, [
220
530
  "timeout",
221
531
  "timed out",
222
532
  "rate limit",
223
533
  "too many requests",
224
- "429",
225
- "503",
226
- "502",
227
- "504",
228
534
  "service unavailable",
229
535
  "temporar",
230
536
  "network",
@@ -232,13 +538,285 @@ export class ModelCapabilityService {
232
538
  "unauthorized",
233
539
  "forbidden",
234
540
  "invalid api key",
541
+ "insufficient quota",
542
+ "overloaded",
543
+ ]);
544
+
545
+ return [
546
+ ...statusMatches.map((status) => `status:${status}`),
547
+ ...codeMatches.map((match) => `code:${match}`),
548
+ ...messageMatches.map((match) => `msg:${match}`),
549
+ ];
550
+ }
551
+
552
+ private static matchDocumentSpecific(signal: VisionFailureSignal, modality: VisionCapabilityModality): string[] {
553
+ const imageCodeHints = [
554
+ "image_too_large",
555
+ "invalid_base64",
556
+ "invalid_image",
557
+ "invalid_image_data",
558
+ "malformed_image",
559
+ "invalid_image_url",
560
+ "image_decode_failed",
561
+ ];
562
+ const imageMessageHints = [
563
+ "image too large",
564
+ "invalid base64",
565
+ "malformed image",
566
+ "invalid image data",
567
+ "unable to decode image",
568
+ "failed to decode image",
569
+ "invalid image url",
570
+ ];
571
+ const pdfCodeHints = [
572
+ "invalid_pdf",
573
+ "malformed_pdf",
574
+ "corrupt_pdf",
575
+ "encrypted_pdf",
576
+ "password_protected_pdf",
577
+ "pdf_parse_error",
578
+ "file_too_large",
579
+ ];
580
+ const pdfMessageHints = [
581
+ "invalid pdf",
582
+ "malformed pdf",
583
+ "corrupt pdf",
584
+ "encrypted pdf",
585
+ "password protected pdf",
586
+ "failed to parse pdf",
587
+ "unable to parse pdf",
588
+ "pdf is corrupted",
589
+ "pdf too large",
590
+ "file too large",
235
591
  ];
236
592
 
237
- if (transientHints.some((hint) => message.includes(hint))) {
238
- return { isCapabilityError: false, reason: "transient_or_auth" };
593
+ const codeMatches = this.matchCodes(
594
+ signal.codes,
595
+ modality === "pdf" ? pdfCodeHints : imageCodeHints
596
+ );
597
+ const messageMatches = this.matchMessage(
598
+ signal.message,
599
+ modality === "pdf" ? pdfMessageHints : imageMessageHints
600
+ );
601
+
602
+ const statusMatches = Array.from(signal.statusCodes).filter((status) => {
603
+ if (status === 413) return true;
604
+ if (status === 415 || status === 422) {
605
+ return codeMatches.length > 0 || messageMatches.length > 0;
606
+ }
607
+ return false;
608
+ });
609
+
610
+ return [
611
+ ...statusMatches.map((status) => `status:${status}`),
612
+ ...codeMatches.map((match) => `code:${match}`),
613
+ ...messageMatches.map((match) => `msg:${match}`),
614
+ ];
615
+ }
616
+
617
+ private static scoreCapabilitySignal(
618
+ signal: VisionFailureSignal,
619
+ provider: string,
620
+ modality: VisionCapabilityModality
621
+ ): { score: number; evidence: string[] } {
622
+ const evidence: string[] = [];
623
+ let score = 0;
624
+
625
+ const explicitCapabilityCodes = this.matchCodes(
626
+ signal.codes,
627
+ modality === "pdf"
628
+ ? [
629
+ "pdf_not_supported",
630
+ "unsupported_pdf_input",
631
+ "unsupported_document_input",
632
+ "unsupported_file_input",
633
+ "input_file_not_supported",
634
+ "unsupported_file_type",
635
+ "model_not_document_capable",
636
+ ]
637
+ : [
638
+ "vision_not_supported",
639
+ "unsupported_vision",
640
+ "model_not_vision_capable",
641
+ "image_not_supported",
642
+ "unsupported_message_content",
643
+ "unsupported_content_type_for_model",
644
+ "unsupported_image_input",
645
+ "invalid_model_for_vision",
646
+ ]
647
+ );
648
+
649
+ if (explicitCapabilityCodes.length > 0) {
650
+ score += 3;
651
+ evidence.push(...explicitCapabilityCodes.map((match) => `code:${match}`));
652
+ }
653
+
654
+ const highPrecisionMessageMatches = this.matchMessage(
655
+ signal.message,
656
+ modality === "pdf"
657
+ ? [
658
+ "this model does not support pdf",
659
+ "model does not support pdf",
660
+ "pdf is not supported for this model",
661
+ "file input is not supported for this model",
662
+ "input_file is not supported",
663
+ "unsupported file type: application/pdf",
664
+ "application/pdf is not supported for this model",
665
+ ]
666
+ : [
667
+ "does not support images",
668
+ "does not support image inputs",
669
+ "model does not support image",
670
+ "this model cannot process images",
671
+ "text-only model",
672
+ "images are not supported for this model",
673
+ "vision is not supported for this model",
674
+ "vision is not supported",
675
+ "vision not supported",
676
+ "image_url is only supported by certain models",
677
+ ]
678
+ );
679
+
680
+ if (highPrecisionMessageMatches.length > 0) {
681
+ score += 3;
682
+ evidence.push(...highPrecisionMessageMatches.map((match) => `msg:${match}`));
683
+ }
684
+
685
+ const providerSpecificMatches = this.matchMessage(
686
+ signal.message,
687
+ this.providerCapabilityHints(provider, modality)
688
+ );
689
+ if (providerSpecificMatches.length > 0) {
690
+ score += 2;
691
+ evidence.push(...providerSpecificMatches.map((match) => `provider:${match}`));
692
+ }
693
+
694
+ const weakCapabilityHints = this.matchMessage(
695
+ signal.message,
696
+ modality === "pdf"
697
+ ? [
698
+ "pdf input",
699
+ "pdf support",
700
+ "pdf not supported",
701
+ "application/pdf",
702
+ "input_file",
703
+ "file input",
704
+ "document input",
705
+ "unsupported file type",
706
+ "unsupported content type",
707
+ "invalid content type",
708
+ ]
709
+ : [
710
+ "vision",
711
+ "unsupported content type",
712
+ "unsupported message content",
713
+ "invalid content type",
714
+ "unrecognized content type",
715
+ "image_url",
716
+ "multimodal",
717
+ "multi-modal",
718
+ ]
719
+ );
720
+
721
+ const hasClientValidationStatus = Array.from(signal.statusCodes).some((status) => [400, 415, 422].includes(status));
722
+ if (weakCapabilityHints.length > 0 && hasClientValidationStatus) {
723
+ score += 1;
724
+ evidence.push(...weakCapabilityHints.map((match) => `weak:${match}`));
725
+ }
726
+
727
+ if (Array.from(signal.statusCodes).some((status) => status === 400 || status === 422)) {
728
+ score += 1;
729
+ evidence.push("status:client_validation");
730
+ }
731
+
732
+ return {
733
+ score,
734
+ evidence: Array.from(new Set(evidence)).slice(0, 8),
735
+ };
736
+ }
737
+
738
+ private static providerCapabilityHints(provider: string, modality: VisionCapabilityModality): string[] {
739
+ const normalized = provider.toLowerCase().trim();
740
+
741
+ if (modality === "pdf") {
742
+ if (normalized.includes("openai")) {
743
+ return [
744
+ "input_file is not supported",
745
+ "unsupported file type: application/pdf",
746
+ "application/pdf is not supported for this model",
747
+ ];
748
+ }
749
+ if (normalized.includes("anthropic")) {
750
+ return [
751
+ "pdf is not supported for this model",
752
+ "file input is not supported for this model",
753
+ ];
754
+ }
755
+ if (normalized.includes("google") || normalized.includes("gemini")) {
756
+ return [
757
+ "unsupported document input",
758
+ "pdf input is not supported",
759
+ ];
760
+ }
761
+ if (normalized.includes("realtimex")) {
762
+ return [
763
+ "unsupported file input",
764
+ "invalid model",
765
+ ];
766
+ }
767
+ return [];
768
+ }
769
+
770
+ if (normalized.includes("openai")) {
771
+ return [
772
+ "image_url is only supported by certain models",
773
+ "this model does not support image inputs",
774
+ ];
775
+ }
776
+
777
+ if (normalized.includes("anthropic")) {
778
+ return [
779
+ "only some claude models support vision",
780
+ "images are not supported for this model",
781
+ ];
782
+ }
783
+
784
+ if (normalized.includes("google") || normalized.includes("gemini")) {
785
+ return [
786
+ "model does not support multimodal input",
787
+ "unsupported input modality",
788
+ ];
789
+ }
790
+
791
+ if (normalized.includes("realtimex")) {
792
+ return [
793
+ "invalid model",
794
+ "text-only model",
795
+ ];
796
+ }
797
+
798
+ return [];
799
+ }
800
+
801
+ private static matchMessage(message: string, hints: string[]): string[] {
802
+ if (!message) return [];
803
+ return hints.filter((hint) => message.includes(hint));
804
+ }
805
+
806
+ private static matchCodes(codes: Set<string>, hints: string[]): string[] {
807
+ const matches: string[] = [];
808
+
809
+ for (const code of codes) {
810
+ const normalizedCode = code.replace(/[\s.-]+/g, "_");
811
+ for (const hint of hints) {
812
+ if (normalizedCode === hint || normalizedCode.includes(hint)) {
813
+ matches.push(code);
814
+ break;
815
+ }
816
+ }
239
817
  }
240
818
 
241
- return { isCapabilityError: false, reason: "unknown_error_class" };
819
+ return matches;
242
820
  }
243
821
 
244
822
  private static errorToMessage(error: unknown): string {