@realtimex/folio 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,21 +49,42 @@ type ProcessWithPoliciesOptions = {
49
49
  allowLearnedFallback?: boolean;
50
50
  };
51
51
 
52
+ type VlmPayload = {
53
+ kind: "image" | "pdf";
54
+ dataUrl: string;
55
+ supplementalText: string;
56
+ };
57
+
52
58
  /**
53
- * Helper to build LLM message content. If the text contains the VLM marker
54
- * generated by IngestionService, it casts the payload to an OpenAI-compatible
55
- * Vision array structure so the underlying SDK bridge can transmit the image.
59
+ * Helper to build LLM message content. If the text contains a VLM marker
60
+ * generated by IngestionService, it casts the payload to multimodal blocks.
56
61
  */
57
- function extractVlmPayload(text: string): { imageDataUrl: string; supplementalText: string } | null {
58
- const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
59
- if (!marker) return null;
62
+ function extractVlmPayload(text: string): VlmPayload | null {
63
+ const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
64
+ if (imageMarker) {
65
+ const markerText = imageMarker[0];
66
+ return {
67
+ kind: "image",
68
+ dataUrl: imageMarker[1],
69
+ supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
70
+ };
71
+ }
60
72
 
61
- const markerText = marker[0];
62
- const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
63
- return {
64
- imageDataUrl: marker[1],
65
- supplementalText,
66
- };
73
+ const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
74
+ if (pdfMarker) {
75
+ const markerText = pdfMarker[0];
76
+ return {
77
+ kind: "pdf",
78
+ dataUrl: pdfMarker[1],
79
+ supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
80
+ };
81
+ }
82
+
83
+ return null;
84
+ }
85
+
86
+ function hasVlmPayload(text: string): boolean {
87
+ return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
67
88
  }
68
89
 
69
90
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -73,10 +94,13 @@ function buildMessageContent(prompt: string, text: string, textFirst = false): a
73
94
  const textPrompt = vlmPayload.supplementalText
74
95
  ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
75
96
  : prompt;
76
- return [
77
- { type: "text", text: textPrompt },
78
- { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
79
- ];
97
+ // `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
98
+ // that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
99
+ const assetBlock = vlmPayload.kind === "pdf"
100
+ ? { type: "input_file", file_url: vlmPayload.dataUrl }
101
+ : { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
102
+
103
+ return [{ type: "text", text: textPrompt }, assetBlock];
80
104
  }
81
105
  // Standard text payload
82
106
  return textFirst
@@ -444,7 +468,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
444
468
  model,
445
469
  condition_type: condition.type,
446
470
  prompt_preview: prompt.slice(0, 180),
447
- vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
471
+ vision_payload: hasVlmPayload(doc.text)
448
472
  }
449
473
  });
450
474
  Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
@@ -453,7 +477,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
453
477
  model,
454
478
  condition_type: condition.type,
455
479
  prompt_preview: prompt.slice(0, 180),
456
- vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
480
+ vision_payload: hasVlmPayload(doc.text)
457
481
  }, doc.supabase);
458
482
  const result = await sdk.llm.chat(
459
483
  [
@@ -563,7 +587,7 @@ Fields to extract:
563
587
  ${fieldDescriptions}`;
564
588
 
565
589
  try {
566
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
590
+ const isVlmPayload = hasVlmPayload(doc.text);
567
591
  const mixedPrompt = isVlmPayload
568
592
  ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
569
593
  : prompt;
@@ -722,7 +746,7 @@ Rules:
722
746
  known_fields_count: Object.keys(contractData).length,
723
747
  }, doc.supabase);
724
748
 
725
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
749
+ const isVlmPayload = hasVlmPayload(doc.text);
726
750
  const mixedPrompt = isVlmPayload
727
751
  ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
728
752
  : prompt;
@@ -1004,7 +1028,9 @@ export class PolicyEngine {
1004
1028
  const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
1005
1029
  if (allowLearnedFallback && doc.supabase && policies.length > 0) {
1006
1030
  try {
1007
- const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
1031
+ const learningText = doc.text
1032
+ .replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
1033
+ .replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
1008
1034
  const learned = await PolicyLearningService.resolveLearnedCandidate({
1009
1035
  supabase: doc.supabase,
1010
1036
  userId: doc.userId,
@@ -1118,7 +1144,7 @@ export class PolicyEngine {
1118
1144
  `No markdown, no explanation — only the JSON object.`;
1119
1145
 
1120
1146
  const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
1121
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
1147
+ const isVlmPayload = hasVlmPayload(doc.text);
1122
1148
  const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
1123
1149
 
1124
1150
  try {
@@ -135,8 +135,8 @@ export class RAGService {
135
135
  supabase: SupabaseClient,
136
136
  settings?: EmbeddingSettings
137
137
  ): Promise<void> {
138
- if (rawText.startsWith("[VLM_IMAGE_DATA:")) {
139
- logger.info(`Skipping chunking and embedding for VLM base64 image data (Ingestion: ${ingestionId})`);
138
+ if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
139
+ logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
140
140
  return;
141
141
  }
142
142
 
@@ -51,6 +51,8 @@ function isPdfTextExtractable(pdfData) {
51
51
  return true;
52
52
  }
53
53
  export class IngestionService {
54
+ static FAST_EXTS = ["txt", "md", "csv", "json"];
55
+ static IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"];
54
56
  static NON_IDEMPOTENT_ACTION_TYPES = new Set([
55
57
  "append_to_google_sheet",
56
58
  "webhook",
@@ -85,10 +87,10 @@ export class IngestionService {
85
87
  return String(value);
86
88
  }
87
89
  static buildVlmSemanticText(opts) {
88
- const { filename, finalStatus, policyName, extracted, tags } = opts;
90
+ const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
89
91
  const lines = [
90
92
  `Document filename: ${filename}`,
91
- "Document source: VLM image extraction",
93
+ `Document source: VLM ${modality} extraction`,
92
94
  `Processing status: ${finalStatus}`,
93
95
  ];
94
96
  if (policyName) {
@@ -134,6 +136,7 @@ export class IngestionService {
134
136
  policyName: opts.policyName,
135
137
  extracted: opts.extracted,
136
138
  tags: opts.tags,
139
+ modality: opts.modality,
137
140
  });
138
141
  const details = {
139
142
  synthetic_chars: syntheticText.length,
@@ -160,6 +163,15 @@ export class IngestionService {
160
163
  });
161
164
  return details;
162
165
  }
166
+ static buildVlmPayloadMarker(modality, dataUrl) {
167
+ const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
168
+ return `[${prefix}:${dataUrl}]`;
169
+ }
170
+ static async fileToDataUrl(filePath, mimeType) {
171
+ const buffer = await fs.readFile(filePath);
172
+ const base64 = buffer.toString("base64");
173
+ return `data:${mimeType};base64,${base64}`;
174
+ }
163
175
  /**
164
176
  * Ingest a document using Hybrid Routing Architecture.
165
177
  */
@@ -217,32 +229,31 @@ export class IngestionService {
217
229
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Ingestion started", source, filename, fileSize, is_high_intent: true }, supabase);
218
230
  // 2. Document Triage
219
231
  let isFastPath = false;
220
- let isVlmFastPath = false;
232
+ let isMultimodalFastPath = false;
233
+ let multimodalModality = null;
221
234
  let extractionContent = content;
222
235
  const ext = filename.toLowerCase().split('.').pop() || '';
223
- const fastExts = ['txt', 'md', 'csv', 'json'];
224
- const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
225
236
  // Pre-fetch settings to decide whether we should attempt VLM.
226
237
  const { data: triageSettingsRow } = await supabase
227
238
  .from("user_settings")
228
239
  .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
229
240
  .eq("user_id", userId)
230
241
  .maybeSingle();
231
- const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
232
- const llmModel = visionResolution.model;
233
- const llmProvider = visionResolution.provider;
234
- if (fastExts.includes(ext)) {
242
+ const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
243
+ const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
244
+ const llmModel = imageResolution.model;
245
+ const llmProvider = imageResolution.provider;
246
+ if (this.FAST_EXTS.includes(ext)) {
235
247
  isFastPath = true;
236
248
  }
237
- else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
249
+ else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
238
250
  try {
239
- const buffer = await fs.readFile(filePath);
240
- const base64 = buffer.toString('base64');
241
251
  const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
242
- // Special marker for PolicyEngine
243
- extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
252
+ const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
253
+ extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
244
254
  isFastPath = true;
245
- isVlmFastPath = true;
255
+ isMultimodalFastPath = true;
256
+ multimodalModality = "image";
246
257
  logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
247
258
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
248
259
  }
@@ -250,7 +261,7 @@ export class IngestionService {
250
261
  logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
251
262
  }
252
263
  }
253
- else if (imageExts.includes(ext)) {
264
+ else if (this.IMAGE_EXTS.includes(ext)) {
254
265
  logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
255
266
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
256
267
  action: "VLM skipped (model marked unsupported)",
@@ -270,9 +281,30 @@ export class IngestionService {
270
281
  logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
271
282
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
272
283
  }
284
+ else if (pdfResolution.shouldAttempt) {
285
+ // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
286
+ const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
287
+ extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
288
+ isFastPath = true;
289
+ isMultimodalFastPath = true;
290
+ multimodalModality = "pdf";
291
+ logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
292
+ Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
293
+ action: "VLM Fast Path selected",
294
+ type: "pdf",
295
+ modality: "pdf",
296
+ model: llmModel,
297
+ }, supabase);
298
+ }
273
299
  else {
274
- logger.info(`Smart Triage: PDF ${filename} failed text quality check. Routing to Heavy Path.`);
275
- Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage failed", type: "pdf", fast_path: false }, supabase);
300
+ logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
301
+ Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
302
+ action: "VLM skipped (model marked unsupported)",
303
+ type: "pdf",
304
+ modality: "pdf",
305
+ model: llmModel,
306
+ provider: llmProvider,
307
+ }, supabase);
276
308
  }
277
309
  }
278
310
  catch (err) {
@@ -310,7 +342,7 @@ export class IngestionService {
310
342
  details: {
311
343
  provider: llmSettings.llm_provider ?? llmProvider,
312
344
  model: llmSettings.llm_model ?? llmModel,
313
- mode: isVlmFastPath ? "vision" : "text",
345
+ mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
314
346
  }
315
347
  });
316
348
  const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
@@ -363,7 +395,7 @@ export class IngestionService {
363
395
  .eq("id", ingestion.id)
364
396
  .select()
365
397
  .single();
366
- if (isVlmFastPath) {
398
+ if (isMultimodalFastPath && multimodalModality) {
367
399
  const embeddingMeta = this.queueVlmSemanticEmbedding({
368
400
  ingestionId: ingestion.id,
369
401
  userId,
@@ -372,6 +404,7 @@ export class IngestionService {
372
404
  policyName,
373
405
  extracted: mergedExtracted,
374
406
  tags: autoTags,
407
+ modality: multimodalModality,
375
408
  supabase,
376
409
  embedSettings,
377
410
  });
@@ -388,25 +421,27 @@ export class IngestionService {
388
421
  .update({ trace: finalTrace })
389
422
  .eq("id", ingestion.id);
390
423
  }
391
- if (isVlmFastPath) {
424
+ if (isMultimodalFastPath && multimodalModality) {
392
425
  await ModelCapabilityService.learnVisionSuccess({
393
426
  supabase,
394
427
  userId,
395
428
  provider: llmSettings.llm_provider ?? llmProvider,
396
429
  model: llmSettings.llm_model ?? llmModel,
430
+ modality: multimodalModality,
397
431
  });
398
432
  }
399
433
  return updatedIngestion;
400
434
  }
401
435
  catch (err) {
402
436
  const msg = err instanceof Error ? err.message : String(err);
403
- if (isVlmFastPath) {
437
+ if (isMultimodalFastPath && multimodalModality) {
404
438
  const learnedState = await ModelCapabilityService.learnVisionFailure({
405
439
  supabase,
406
440
  userId,
407
441
  provider: llmProvider,
408
442
  model: llmModel,
409
443
  error: err,
444
+ modality: multimodalModality,
410
445
  });
411
446
  logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
412
447
  Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
@@ -477,31 +512,31 @@ export class IngestionService {
477
512
  if (!filePath)
478
513
  throw new Error("No storage path found for this ingestion");
479
514
  let isFastPath = false;
480
- let isVlmFastPath = false;
515
+ let isMultimodalFastPath = false;
516
+ let multimodalModality = null;
481
517
  let extractionContent = "";
482
518
  const ext = filename.toLowerCase().split('.').pop() || '';
483
- const fastExts = ['txt', 'md', 'csv', 'json'];
484
- const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
485
519
  const { data: triageSettingsRow } = await supabase
486
520
  .from("user_settings")
487
521
  .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
488
522
  .eq("user_id", userId)
489
523
  .maybeSingle();
490
- const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
491
- const llmModel = visionResolution.model;
492
- const llmProvider = visionResolution.provider;
493
- if (fastExts.includes(ext)) {
524
+ const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
525
+ const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
526
+ const llmModel = imageResolution.model;
527
+ const llmProvider = imageResolution.provider;
528
+ if (this.FAST_EXTS.includes(ext)) {
494
529
  isFastPath = true;
495
530
  extractionContent = await fs.readFile(filePath, "utf-8");
496
531
  }
497
- else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
532
+ else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
498
533
  try {
499
- const buffer = await fs.readFile(filePath);
500
- const base64 = buffer.toString('base64');
501
534
  const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
502
- extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
535
+ const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
536
+ extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
503
537
  isFastPath = true;
504
- isVlmFastPath = true;
538
+ isMultimodalFastPath = true;
539
+ multimodalModality = "image";
505
540
  logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
506
541
  Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
507
542
  }
@@ -509,7 +544,7 @@ export class IngestionService {
509
544
  logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
510
545
  }
511
546
  }
512
- else if (imageExts.includes(ext)) {
547
+ else if (this.IMAGE_EXTS.includes(ext)) {
513
548
  logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
514
549
  Actuator.logEvent(ingestionId, userId, "info", "Triage", {
515
550
  action: "VLM skipped (model marked unsupported)",
@@ -527,10 +562,34 @@ export class IngestionService {
527
562
  isFastPath = true;
528
563
  extractionContent = pdfData.text;
529
564
  }
530
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
565
+ else if (pdfResolution.shouldAttempt) {
566
+ // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
567
+ const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
568
+ extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
569
+ isFastPath = true;
570
+ isMultimodalFastPath = true;
571
+ multimodalModality = "pdf";
572
+ logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
573
+ Actuator.logEvent(ingestionId, userId, "info", "Triage", {
574
+ action: "VLM Fast Path selected",
575
+ type: "pdf",
576
+ modality: "pdf",
577
+ model: llmModel,
578
+ }, supabase);
579
+ }
580
+ else {
581
+ logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
582
+ Actuator.logEvent(ingestionId, userId, "info", "Triage", {
583
+ action: "VLM skipped (model marked unsupported)",
584
+ type: "pdf",
585
+ modality: "pdf",
586
+ model: llmModel,
587
+ provider: llmProvider
588
+ }, supabase);
589
+ }
531
590
  }
532
591
  catch (err) {
533
- // ignore
592
+ logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
534
593
  }
535
594
  }
536
595
  if (isFastPath) {
@@ -560,7 +619,7 @@ export class IngestionService {
560
619
  details: {
561
620
  provider: llmSettings.llm_provider ?? llmProvider,
562
621
  model: llmSettings.llm_model ?? llmModel,
563
- mode: isVlmFastPath ? "vision" : "text",
622
+ mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
564
623
  }
565
624
  });
566
625
  const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
@@ -626,7 +685,7 @@ export class IngestionService {
626
685
  baseline_config_id: baselineConfig?.id ?? null,
627
686
  })
628
687
  .eq("id", ingestionId);
629
- if (isVlmFastPath) {
688
+ if (isMultimodalFastPath && multimodalModality) {
630
689
  const embeddingMeta = this.queueVlmSemanticEmbedding({
631
690
  ingestionId,
632
691
  userId,
@@ -635,6 +694,7 @@ export class IngestionService {
635
694
  policyName,
636
695
  extracted: mergedExtracted,
637
696
  tags: mergedTags,
697
+ modality: multimodalModality,
638
698
  supabase,
639
699
  embedSettings,
640
700
  });
@@ -651,25 +711,27 @@ export class IngestionService {
651
711
  .update({ trace: rerunTrace })
652
712
  .eq("id", ingestionId);
653
713
  }
654
- if (isVlmFastPath) {
714
+ if (isMultimodalFastPath && multimodalModality) {
655
715
  await ModelCapabilityService.learnVisionSuccess({
656
716
  supabase,
657
717
  userId,
658
718
  provider: llmSettings.llm_provider ?? llmProvider,
659
719
  model: llmSettings.llm_model ?? llmModel,
720
+ modality: multimodalModality,
660
721
  });
661
722
  }
662
723
  return finalStatus === "matched";
663
724
  }
664
725
  catch (err) {
665
726
  const msg = err instanceof Error ? err.message : String(err);
666
- if (isVlmFastPath) {
727
+ if (isMultimodalFastPath && multimodalModality) {
667
728
  const learnedState = await ModelCapabilityService.learnVisionFailure({
668
729
  supabase,
669
730
  userId,
670
731
  provider: llmProvider,
671
732
  model: llmModel,
672
733
  error: err,
734
+ modality: multimodalModality,
673
735
  });
674
736
  logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
675
737
  Actuator.logEvent(ingestionId, userId, "error", "Processing", {