@realtimex/folio 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api/src/services/IngestionService.ts +111 -47
- package/api/src/services/ModelCapabilityService.ts +194 -54
- package/api/src/services/PolicyEngine.ts +48 -22
- package/api/src/services/RAGService.ts +2 -2
- package/dist/api/src/services/IngestionService.js +103 -41
- package/dist/api/src/services/ModelCapabilityService.js +148 -52
- package/dist/api/src/services/PolicyEngine.js +38 -22
- package/dist/api/src/services/RAGService.js +2 -2
- package/dist/assets/{index-nxHX9No5.js → index-tVGLBfz6.js} +37 -37
- package/dist/index.html +1 -1
- package/package.json +1 -1
|
@@ -11,7 +11,7 @@ import { Actuator } from "../utils/Actuator.js";
|
|
|
11
11
|
import { extractLlmResponse, previewLlmText } from "../utils/llmResponse.js";
|
|
12
12
|
import { RAGService } from "./RAGService.js";
|
|
13
13
|
import { SDKService } from "./SDKService.js";
|
|
14
|
-
import { ModelCapabilityService } from "./ModelCapabilityService.js";
|
|
14
|
+
import { ModelCapabilityService, type VisionCapabilityModality } from "./ModelCapabilityService.js";
|
|
15
15
|
|
|
16
16
|
const logger = createLogger("IngestionService");
|
|
17
17
|
|
|
@@ -89,6 +89,9 @@ export interface Ingestion {
|
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
export class IngestionService {
|
|
92
|
+
private static readonly FAST_EXTS = ["txt", "md", "csv", "json"] as const;
|
|
93
|
+
private static readonly IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"] as const;
|
|
94
|
+
|
|
92
95
|
private static readonly NON_IDEMPOTENT_ACTION_TYPES = new Set([
|
|
93
96
|
"append_to_google_sheet",
|
|
94
97
|
"webhook",
|
|
@@ -129,11 +132,12 @@ export class IngestionService {
|
|
|
129
132
|
policyName?: string;
|
|
130
133
|
extracted: Record<string, unknown>;
|
|
131
134
|
tags: string[];
|
|
135
|
+
modality: VisionCapabilityModality;
|
|
132
136
|
}): string {
|
|
133
|
-
const { filename, finalStatus, policyName, extracted, tags } = opts;
|
|
137
|
+
const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
|
|
134
138
|
const lines: string[] = [
|
|
135
139
|
`Document filename: ${filename}`,
|
|
136
|
-
|
|
140
|
+
`Document source: VLM ${modality} extraction`,
|
|
137
141
|
`Processing status: ${finalStatus}`,
|
|
138
142
|
];
|
|
139
143
|
|
|
@@ -186,6 +190,7 @@ export class IngestionService {
|
|
|
186
190
|
policyName?: string;
|
|
187
191
|
extracted: Record<string, unknown>;
|
|
188
192
|
tags: string[];
|
|
193
|
+
modality: VisionCapabilityModality;
|
|
189
194
|
supabase: SupabaseClient;
|
|
190
195
|
embedSettings: { embedding_provider?: string; embedding_model?: string };
|
|
191
196
|
}): { synthetic_chars: number; extracted_fields: number; tags_count: number } {
|
|
@@ -195,6 +200,7 @@ export class IngestionService {
|
|
|
195
200
|
policyName: opts.policyName,
|
|
196
201
|
extracted: opts.extracted,
|
|
197
202
|
tags: opts.tags,
|
|
203
|
+
modality: opts.modality,
|
|
198
204
|
});
|
|
199
205
|
const details = {
|
|
200
206
|
synthetic_chars: syntheticText.length,
|
|
@@ -231,6 +237,17 @@ export class IngestionService {
|
|
|
231
237
|
return details;
|
|
232
238
|
}
|
|
233
239
|
|
|
240
|
+
private static buildVlmPayloadMarker(modality: VisionCapabilityModality, dataUrl: string): string {
|
|
241
|
+
const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
|
|
242
|
+
return `[${prefix}:${dataUrl}]`;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
private static async fileToDataUrl(filePath: string, mimeType: string): Promise<string> {
|
|
246
|
+
const buffer = await fs.readFile(filePath);
|
|
247
|
+
const base64 = buffer.toString("base64");
|
|
248
|
+
return `data:${mimeType};base64,${base64}`;
|
|
249
|
+
}
|
|
250
|
+
|
|
234
251
|
/**
|
|
235
252
|
* Ingest a document using Hybrid Routing Architecture.
|
|
236
253
|
*/
|
|
@@ -303,11 +320,10 @@ export class IngestionService {
|
|
|
303
320
|
|
|
304
321
|
// 2. Document Triage
|
|
305
322
|
let isFastPath = false;
|
|
306
|
-
let
|
|
323
|
+
let isMultimodalFastPath = false;
|
|
324
|
+
let multimodalModality: VisionCapabilityModality | null = null;
|
|
307
325
|
let extractionContent = content;
|
|
308
326
|
const ext = filename.toLowerCase().split('.').pop() || '';
|
|
309
|
-
const fastExts = ['txt', 'md', 'csv', 'json'];
|
|
310
|
-
const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
|
|
311
327
|
|
|
312
328
|
// Pre-fetch settings to decide whether we should attempt VLM.
|
|
313
329
|
const { data: triageSettingsRow } = await supabase
|
|
@@ -315,27 +331,27 @@ export class IngestionService {
|
|
|
315
331
|
.select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
|
|
316
332
|
.eq("user_id", userId)
|
|
317
333
|
.maybeSingle();
|
|
318
|
-
const
|
|
319
|
-
const
|
|
320
|
-
const
|
|
334
|
+
const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
|
|
335
|
+
const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
|
|
336
|
+
const llmModel = imageResolution.model;
|
|
337
|
+
const llmProvider = imageResolution.provider;
|
|
321
338
|
|
|
322
|
-
if (
|
|
339
|
+
if (this.FAST_EXTS.includes(ext as typeof this.FAST_EXTS[number])) {
|
|
323
340
|
isFastPath = true;
|
|
324
|
-
} else if (
|
|
341
|
+
} else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number]) && imageResolution.shouldAttempt) {
|
|
325
342
|
try {
|
|
326
|
-
const buffer = await fs.readFile(filePath);
|
|
327
|
-
const base64 = buffer.toString('base64');
|
|
328
343
|
const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
|
|
329
|
-
|
|
330
|
-
extractionContent =
|
|
344
|
+
const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
|
|
345
|
+
extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
|
|
331
346
|
isFastPath = true;
|
|
332
|
-
|
|
347
|
+
isMultimodalFastPath = true;
|
|
348
|
+
multimodalModality = "image";
|
|
333
349
|
logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
|
|
334
350
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
|
|
335
351
|
} catch (err) {
|
|
336
352
|
logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
|
|
337
353
|
}
|
|
338
|
-
} else if (
|
|
354
|
+
} else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number])) {
|
|
339
355
|
logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
|
|
340
356
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
341
357
|
action: "VLM skipped (model marked unsupported)",
|
|
@@ -353,9 +369,29 @@ export class IngestionService {
|
|
|
353
369
|
extractionContent = pdfData.text;
|
|
354
370
|
logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
|
|
355
371
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
|
|
372
|
+
} else if (pdfResolution.shouldAttempt) {
|
|
373
|
+
// Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
|
|
374
|
+
const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
|
|
375
|
+
extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
|
|
376
|
+
isFastPath = true;
|
|
377
|
+
isMultimodalFastPath = true;
|
|
378
|
+
multimodalModality = "pdf";
|
|
379
|
+
logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
|
|
380
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
381
|
+
action: "VLM Fast Path selected",
|
|
382
|
+
type: "pdf",
|
|
383
|
+
modality: "pdf",
|
|
384
|
+
model: llmModel,
|
|
385
|
+
}, supabase);
|
|
356
386
|
} else {
|
|
357
|
-
logger.info(`Smart Triage: PDF ${filename}
|
|
358
|
-
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
387
|
+
logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
|
|
388
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
389
|
+
action: "VLM skipped (model marked unsupported)",
|
|
390
|
+
type: "pdf",
|
|
391
|
+
modality: "pdf",
|
|
392
|
+
model: llmModel,
|
|
393
|
+
provider: llmProvider,
|
|
394
|
+
}, supabase);
|
|
359
395
|
}
|
|
360
396
|
} catch (err) {
|
|
361
397
|
logger.warn(`Failed to parse PDF ${filename}. Routing to Heavy Path.`, { err });
|
|
@@ -395,7 +431,7 @@ export class IngestionService {
|
|
|
395
431
|
details: {
|
|
396
432
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
397
433
|
model: llmSettings.llm_model ?? llmModel,
|
|
398
|
-
mode:
|
|
434
|
+
mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
|
|
399
435
|
}
|
|
400
436
|
});
|
|
401
437
|
|
|
@@ -458,7 +494,7 @@ export class IngestionService {
|
|
|
458
494
|
.select()
|
|
459
495
|
.single();
|
|
460
496
|
|
|
461
|
-
if (
|
|
497
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
462
498
|
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
463
499
|
ingestionId: ingestion.id,
|
|
464
500
|
userId,
|
|
@@ -467,6 +503,7 @@ export class IngestionService {
|
|
|
467
503
|
policyName,
|
|
468
504
|
extracted: mergedExtracted,
|
|
469
505
|
tags: autoTags,
|
|
506
|
+
modality: multimodalModality,
|
|
470
507
|
supabase,
|
|
471
508
|
embedSettings,
|
|
472
509
|
});
|
|
@@ -484,12 +521,13 @@ export class IngestionService {
|
|
|
484
521
|
.eq("id", ingestion.id);
|
|
485
522
|
}
|
|
486
523
|
|
|
487
|
-
if (
|
|
524
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
488
525
|
await ModelCapabilityService.learnVisionSuccess({
|
|
489
526
|
supabase,
|
|
490
527
|
userId,
|
|
491
528
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
492
529
|
model: llmSettings.llm_model ?? llmModel,
|
|
530
|
+
modality: multimodalModality,
|
|
493
531
|
});
|
|
494
532
|
}
|
|
495
533
|
|
|
@@ -498,13 +536,14 @@ export class IngestionService {
|
|
|
498
536
|
} catch (err) {
|
|
499
537
|
const msg = err instanceof Error ? err.message : String(err);
|
|
500
538
|
|
|
501
|
-
if (
|
|
539
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
502
540
|
const learnedState = await ModelCapabilityService.learnVisionFailure({
|
|
503
541
|
supabase,
|
|
504
542
|
userId,
|
|
505
543
|
provider: llmProvider,
|
|
506
544
|
model: llmModel,
|
|
507
545
|
error: err,
|
|
546
|
+
modality: multimodalModality,
|
|
508
547
|
});
|
|
509
548
|
logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
|
|
510
549
|
Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
|
|
@@ -587,38 +626,38 @@ export class IngestionService {
|
|
|
587
626
|
if (!filePath) throw new Error("No storage path found for this ingestion");
|
|
588
627
|
|
|
589
628
|
let isFastPath = false;
|
|
590
|
-
let
|
|
629
|
+
let isMultimodalFastPath = false;
|
|
630
|
+
let multimodalModality: VisionCapabilityModality | null = null;
|
|
591
631
|
let extractionContent = "";
|
|
592
632
|
const ext = filename.toLowerCase().split('.').pop() || '';
|
|
593
|
-
const fastExts = ['txt', 'md', 'csv', 'json'];
|
|
594
|
-
const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
|
|
595
633
|
|
|
596
634
|
const { data: triageSettingsRow } = await supabase
|
|
597
635
|
.from("user_settings")
|
|
598
636
|
.select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
|
|
599
637
|
.eq("user_id", userId)
|
|
600
638
|
.maybeSingle();
|
|
601
|
-
const
|
|
602
|
-
const
|
|
603
|
-
const
|
|
639
|
+
const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
|
|
640
|
+
const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
|
|
641
|
+
const llmModel = imageResolution.model;
|
|
642
|
+
const llmProvider = imageResolution.provider;
|
|
604
643
|
|
|
605
|
-
if (
|
|
644
|
+
if (this.FAST_EXTS.includes(ext as typeof this.FAST_EXTS[number])) {
|
|
606
645
|
isFastPath = true;
|
|
607
646
|
extractionContent = await fs.readFile(filePath, "utf-8");
|
|
608
|
-
} else if (
|
|
647
|
+
} else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number]) && imageResolution.shouldAttempt) {
|
|
609
648
|
try {
|
|
610
|
-
const buffer = await fs.readFile(filePath);
|
|
611
|
-
const base64 = buffer.toString('base64');
|
|
612
649
|
const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
|
|
613
|
-
|
|
650
|
+
const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
|
|
651
|
+
extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
|
|
614
652
|
isFastPath = true;
|
|
615
|
-
|
|
653
|
+
isMultimodalFastPath = true;
|
|
654
|
+
multimodalModality = "image";
|
|
616
655
|
logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
|
|
617
656
|
Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
|
|
618
657
|
} catch (err) {
|
|
619
658
|
logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
|
|
620
659
|
}
|
|
621
|
-
} else if (
|
|
660
|
+
} else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number])) {
|
|
622
661
|
logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
|
|
623
662
|
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
624
663
|
action: "VLM skipped (model marked unsupported)",
|
|
@@ -634,10 +673,32 @@ export class IngestionService {
|
|
|
634
673
|
if (isPdfTextExtractable(pdfData)) {
|
|
635
674
|
isFastPath = true;
|
|
636
675
|
extractionContent = pdfData.text;
|
|
676
|
+
} else if (pdfResolution.shouldAttempt) {
|
|
677
|
+
// Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
|
|
678
|
+
const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
|
|
679
|
+
extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
|
|
680
|
+
isFastPath = true;
|
|
681
|
+
isMultimodalFastPath = true;
|
|
682
|
+
multimodalModality = "pdf";
|
|
683
|
+
logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
|
|
684
|
+
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
685
|
+
action: "VLM Fast Path selected",
|
|
686
|
+
type: "pdf",
|
|
687
|
+
modality: "pdf",
|
|
688
|
+
model: llmModel,
|
|
689
|
+
}, supabase);
|
|
690
|
+
} else {
|
|
691
|
+
logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
|
|
692
|
+
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
693
|
+
action: "VLM skipped (model marked unsupported)",
|
|
694
|
+
type: "pdf",
|
|
695
|
+
modality: "pdf",
|
|
696
|
+
model: llmModel,
|
|
697
|
+
provider: llmProvider
|
|
698
|
+
}, supabase);
|
|
637
699
|
}
|
|
638
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
639
700
|
} catch (err) {
|
|
640
|
-
|
|
701
|
+
logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
|
|
641
702
|
}
|
|
642
703
|
}
|
|
643
704
|
|
|
@@ -667,12 +728,12 @@ export class IngestionService {
|
|
|
667
728
|
baselineTrace.push({
|
|
668
729
|
timestamp: new Date().toISOString(),
|
|
669
730
|
step: "LLM request (baseline extraction)",
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
731
|
+
details: {
|
|
732
|
+
provider: llmSettings.llm_provider ?? llmProvider,
|
|
733
|
+
model: llmSettings.llm_model ?? llmModel,
|
|
734
|
+
mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
|
|
735
|
+
}
|
|
736
|
+
});
|
|
676
737
|
|
|
677
738
|
const baselineResult = await PolicyEngine.extractBaseline(
|
|
678
739
|
doc,
|
|
@@ -754,7 +815,7 @@ export class IngestionService {
|
|
|
754
815
|
})
|
|
755
816
|
.eq("id", ingestionId);
|
|
756
817
|
|
|
757
|
-
if (
|
|
818
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
758
819
|
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
759
820
|
ingestionId,
|
|
760
821
|
userId,
|
|
@@ -763,6 +824,7 @@ export class IngestionService {
|
|
|
763
824
|
policyName,
|
|
764
825
|
extracted: mergedExtracted,
|
|
765
826
|
tags: mergedTags,
|
|
827
|
+
modality: multimodalModality,
|
|
766
828
|
supabase,
|
|
767
829
|
embedSettings,
|
|
768
830
|
});
|
|
@@ -780,25 +842,27 @@ export class IngestionService {
|
|
|
780
842
|
.eq("id", ingestionId);
|
|
781
843
|
}
|
|
782
844
|
|
|
783
|
-
if (
|
|
845
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
784
846
|
await ModelCapabilityService.learnVisionSuccess({
|
|
785
847
|
supabase,
|
|
786
848
|
userId,
|
|
787
849
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
788
850
|
model: llmSettings.llm_model ?? llmModel,
|
|
851
|
+
modality: multimodalModality,
|
|
789
852
|
});
|
|
790
853
|
}
|
|
791
854
|
|
|
792
855
|
return finalStatus === "matched";
|
|
793
856
|
} catch (err: unknown) {
|
|
794
857
|
const msg = err instanceof Error ? err.message : String(err);
|
|
795
|
-
if (
|
|
858
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
796
859
|
const learnedState = await ModelCapabilityService.learnVisionFailure({
|
|
797
860
|
supabase,
|
|
798
861
|
userId,
|
|
799
862
|
provider: llmProvider,
|
|
800
863
|
model: llmModel,
|
|
801
864
|
error: err,
|
|
865
|
+
modality: multimodalModality,
|
|
802
866
|
});
|
|
803
867
|
logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
|
|
804
868
|
Actuator.logEvent(ingestionId, userId, "error", "Processing", {
|