@realtimex/folio 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,6 +51,8 @@ function isPdfTextExtractable(pdfData) {
51
51
  return true;
52
52
  }
53
53
  export class IngestionService {
54
+ static FAST_EXTS = ["txt", "md", "csv", "json"];
55
+ static IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"];
54
56
  static NON_IDEMPOTENT_ACTION_TYPES = new Set([
55
57
  "append_to_google_sheet",
56
58
  "webhook",
@@ -85,10 +87,10 @@ export class IngestionService {
85
87
  return String(value);
86
88
  }
87
89
  static buildVlmSemanticText(opts) {
88
- const { filename, finalStatus, policyName, extracted, tags } = opts;
90
+ const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
89
91
  const lines = [
90
92
  `Document filename: ${filename}`,
91
- "Document source: VLM image extraction",
93
+ `Document source: VLM ${modality} extraction`,
92
94
  `Processing status: ${finalStatus}`,
93
95
  ];
94
96
  if (policyName) {
@@ -134,6 +136,7 @@ export class IngestionService {
134
136
  policyName: opts.policyName,
135
137
  extracted: opts.extracted,
136
138
  tags: opts.tags,
139
+ modality: opts.modality,
137
140
  });
138
141
  const details = {
139
142
  synthetic_chars: syntheticText.length,
@@ -160,6 +163,15 @@ export class IngestionService {
160
163
  });
161
164
  return details;
162
165
  }
166
+ static buildVlmPayloadMarker(modality, dataUrl) {
167
+ const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
168
+ return `[${prefix}:${dataUrl}]`;
169
+ }
170
+ static async fileToDataUrl(filePath, mimeType) {
171
+ const buffer = await fs.readFile(filePath);
172
+ const base64 = buffer.toString("base64");
173
+ return `data:${mimeType};base64,${base64}`;
174
+ }
163
175
  /**
164
176
  * Ingest a document using Hybrid Routing Architecture.
165
177
  */
@@ -217,32 +229,31 @@ export class IngestionService {
217
229
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Ingestion started", source, filename, fileSize, is_high_intent: true }, supabase);
218
230
  // 2. Document Triage
219
231
  let isFastPath = false;
220
- let isVlmFastPath = false;
232
+ let isMultimodalFastPath = false;
233
+ let multimodalModality = null;
221
234
  let extractionContent = content;
222
235
  const ext = filename.toLowerCase().split('.').pop() || '';
223
- const fastExts = ['txt', 'md', 'csv', 'json'];
224
- const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
225
236
  // Pre-fetch settings to decide whether we should attempt VLM.
226
237
  const { data: triageSettingsRow } = await supabase
227
238
  .from("user_settings")
228
239
  .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
229
240
  .eq("user_id", userId)
230
241
  .maybeSingle();
231
- const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
232
- const llmModel = visionResolution.model;
233
- const llmProvider = visionResolution.provider;
234
- if (fastExts.includes(ext)) {
242
+ const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
243
+ const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
244
+ const llmModel = imageResolution.model;
245
+ const llmProvider = imageResolution.provider;
246
+ if (this.FAST_EXTS.includes(ext)) {
235
247
  isFastPath = true;
236
248
  }
237
- else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
249
+ else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
238
250
  try {
239
- const buffer = await fs.readFile(filePath);
240
- const base64 = buffer.toString('base64');
241
251
  const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
242
- // Special marker for PolicyEngine
243
- extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
252
+ const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
253
+ extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
244
254
  isFastPath = true;
245
- isVlmFastPath = true;
255
+ isMultimodalFastPath = true;
256
+ multimodalModality = "image";
246
257
  logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
247
258
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
248
259
  }
@@ -250,7 +261,7 @@ export class IngestionService {
250
261
  logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
251
262
  }
252
263
  }
253
- else if (imageExts.includes(ext)) {
264
+ else if (this.IMAGE_EXTS.includes(ext)) {
254
265
  logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
255
266
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
256
267
  action: "VLM skipped (model marked unsupported)",
@@ -270,9 +281,30 @@ export class IngestionService {
270
281
  logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
271
282
  Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
272
283
  }
284
+ else if (pdfResolution.shouldAttempt) {
285
+ // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
286
+ const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
287
+ extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
288
+ isFastPath = true;
289
+ isMultimodalFastPath = true;
290
+ multimodalModality = "pdf";
291
+ logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
292
+ Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
293
+ action: "VLM Fast Path selected",
294
+ type: "pdf",
295
+ modality: "pdf",
296
+ model: llmModel,
297
+ }, supabase);
298
+ }
273
299
  else {
274
- logger.info(`Smart Triage: PDF ${filename} failed text quality check. Routing to Heavy Path.`);
275
- Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage failed", type: "pdf", fast_path: false }, supabase);
300
+ logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
301
+ Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
302
+ action: "VLM skipped (model marked unsupported)",
303
+ type: "pdf",
304
+ modality: "pdf",
305
+ model: llmModel,
306
+ provider: llmProvider,
307
+ }, supabase);
276
308
  }
277
309
  }
278
310
  catch (err) {
@@ -310,7 +342,7 @@ export class IngestionService {
310
342
  details: {
311
343
  provider: llmSettings.llm_provider ?? llmProvider,
312
344
  model: llmSettings.llm_model ?? llmModel,
313
- mode: isVlmFastPath ? "vision" : "text",
345
+ mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
314
346
  }
315
347
  });
316
348
  const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
@@ -363,7 +395,7 @@ export class IngestionService {
363
395
  .eq("id", ingestion.id)
364
396
  .select()
365
397
  .single();
366
- if (isVlmFastPath) {
398
+ if (isMultimodalFastPath && multimodalModality) {
367
399
  const embeddingMeta = this.queueVlmSemanticEmbedding({
368
400
  ingestionId: ingestion.id,
369
401
  userId,
@@ -372,6 +404,7 @@ export class IngestionService {
372
404
  policyName,
373
405
  extracted: mergedExtracted,
374
406
  tags: autoTags,
407
+ modality: multimodalModality,
375
408
  supabase,
376
409
  embedSettings,
377
410
  });
@@ -388,25 +421,27 @@ export class IngestionService {
388
421
  .update({ trace: finalTrace })
389
422
  .eq("id", ingestion.id);
390
423
  }
391
- if (isVlmFastPath) {
424
+ if (isMultimodalFastPath && multimodalModality) {
392
425
  await ModelCapabilityService.learnVisionSuccess({
393
426
  supabase,
394
427
  userId,
395
428
  provider: llmSettings.llm_provider ?? llmProvider,
396
429
  model: llmSettings.llm_model ?? llmModel,
430
+ modality: multimodalModality,
397
431
  });
398
432
  }
399
433
  return updatedIngestion;
400
434
  }
401
435
  catch (err) {
402
436
  const msg = err instanceof Error ? err.message : String(err);
403
- if (isVlmFastPath) {
437
+ if (isMultimodalFastPath && multimodalModality) {
404
438
  const learnedState = await ModelCapabilityService.learnVisionFailure({
405
439
  supabase,
406
440
  userId,
407
441
  provider: llmProvider,
408
442
  model: llmModel,
409
443
  error: err,
444
+ modality: multimodalModality,
410
445
  });
411
446
  logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
412
447
  Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
@@ -477,31 +512,31 @@ export class IngestionService {
477
512
  if (!filePath)
478
513
  throw new Error("No storage path found for this ingestion");
479
514
  let isFastPath = false;
480
- let isVlmFastPath = false;
515
+ let isMultimodalFastPath = false;
516
+ let multimodalModality = null;
481
517
  let extractionContent = "";
482
518
  const ext = filename.toLowerCase().split('.').pop() || '';
483
- const fastExts = ['txt', 'md', 'csv', 'json'];
484
- const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
485
519
  const { data: triageSettingsRow } = await supabase
486
520
  .from("user_settings")
487
521
  .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
488
522
  .eq("user_id", userId)
489
523
  .maybeSingle();
490
- const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
491
- const llmModel = visionResolution.model;
492
- const llmProvider = visionResolution.provider;
493
- if (fastExts.includes(ext)) {
524
+ const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
525
+ const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
526
+ const llmModel = imageResolution.model;
527
+ const llmProvider = imageResolution.provider;
528
+ if (this.FAST_EXTS.includes(ext)) {
494
529
  isFastPath = true;
495
530
  extractionContent = await fs.readFile(filePath, "utf-8");
496
531
  }
497
- else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
532
+ else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
498
533
  try {
499
- const buffer = await fs.readFile(filePath);
500
- const base64 = buffer.toString('base64');
501
534
  const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
502
- extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
535
+ const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
536
+ extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
503
537
  isFastPath = true;
504
- isVlmFastPath = true;
538
+ isMultimodalFastPath = true;
539
+ multimodalModality = "image";
505
540
  logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
506
541
  Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
507
542
  }
@@ -509,7 +544,7 @@ export class IngestionService {
509
544
  logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
510
545
  }
511
546
  }
512
- else if (imageExts.includes(ext)) {
547
+ else if (this.IMAGE_EXTS.includes(ext)) {
513
548
  logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
514
549
  Actuator.logEvent(ingestionId, userId, "info", "Triage", {
515
550
  action: "VLM skipped (model marked unsupported)",
@@ -527,10 +562,34 @@ export class IngestionService {
527
562
  isFastPath = true;
528
563
  extractionContent = pdfData.text;
529
564
  }
530
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
565
+ else if (pdfResolution.shouldAttempt) {
566
+ // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
567
+ const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
568
+ extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
569
+ isFastPath = true;
570
+ isMultimodalFastPath = true;
571
+ multimodalModality = "pdf";
572
+ logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
573
+ Actuator.logEvent(ingestionId, userId, "info", "Triage", {
574
+ action: "VLM Fast Path selected",
575
+ type: "pdf",
576
+ modality: "pdf",
577
+ model: llmModel,
578
+ }, supabase);
579
+ }
580
+ else {
581
+ logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
582
+ Actuator.logEvent(ingestionId, userId, "info", "Triage", {
583
+ action: "VLM skipped (model marked unsupported)",
584
+ type: "pdf",
585
+ modality: "pdf",
586
+ model: llmModel,
587
+ provider: llmProvider
588
+ }, supabase);
589
+ }
531
590
  }
532
591
  catch (err) {
533
- // ignore
592
+ logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
534
593
  }
535
594
  }
536
595
  if (isFastPath) {
@@ -560,7 +619,7 @@ export class IngestionService {
560
619
  details: {
561
620
  provider: llmSettings.llm_provider ?? llmProvider,
562
621
  model: llmSettings.llm_model ?? llmModel,
563
- mode: isVlmFastPath ? "vision" : "text",
622
+ mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
564
623
  }
565
624
  });
566
625
  const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
@@ -626,7 +685,7 @@ export class IngestionService {
626
685
  baseline_config_id: baselineConfig?.id ?? null,
627
686
  })
628
687
  .eq("id", ingestionId);
629
- if (isVlmFastPath) {
688
+ if (isMultimodalFastPath && multimodalModality) {
630
689
  const embeddingMeta = this.queueVlmSemanticEmbedding({
631
690
  ingestionId,
632
691
  userId,
@@ -635,6 +694,7 @@ export class IngestionService {
635
694
  policyName,
636
695
  extracted: mergedExtracted,
637
696
  tags: mergedTags,
697
+ modality: multimodalModality,
638
698
  supabase,
639
699
  embedSettings,
640
700
  });
@@ -651,25 +711,27 @@ export class IngestionService {
651
711
  .update({ trace: rerunTrace })
652
712
  .eq("id", ingestionId);
653
713
  }
654
- if (isVlmFastPath) {
714
+ if (isMultimodalFastPath && multimodalModality) {
655
715
  await ModelCapabilityService.learnVisionSuccess({
656
716
  supabase,
657
717
  userId,
658
718
  provider: llmSettings.llm_provider ?? llmProvider,
659
719
  model: llmSettings.llm_model ?? llmModel,
720
+ modality: multimodalModality,
660
721
  });
661
722
  }
662
723
  return finalStatus === "matched";
663
724
  }
664
725
  catch (err) {
665
726
  const msg = err instanceof Error ? err.message : String(err);
666
- if (isVlmFastPath) {
727
+ if (isMultimodalFastPath && multimodalModality) {
667
728
  const learnedState = await ModelCapabilityService.learnVisionFailure({
668
729
  supabase,
669
730
  userId,
670
731
  provider: llmProvider,
671
732
  model: llmModel,
672
733
  error: err,
734
+ modality: multimodalModality,
673
735
  });
674
736
  logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
675
737
  Actuator.logEvent(ingestionId, userId, "error", "Processing", {