@realtimex/folio 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api/src/services/IngestionService.ts +111 -47
- package/api/src/services/ModelCapabilityService.ts +666 -88
- package/api/src/services/PolicyEngine.ts +48 -22
- package/api/src/services/RAGService.ts +2 -2
- package/dist/api/src/services/IngestionService.js +103 -41
- package/dist/api/src/services/ModelCapabilityService.js +521 -77
- package/dist/api/src/services/PolicyEngine.js +38 -22
- package/dist/api/src/services/RAGService.js +2 -2
- package/dist/assets/{index-_NgwdVu8.js → index-tVGLBfz6.js} +37 -37
- package/dist/index.html +1 -1
- package/package.json +1 -1
|
@@ -49,21 +49,42 @@ type ProcessWithPoliciesOptions = {
|
|
|
49
49
|
allowLearnedFallback?: boolean;
|
|
50
50
|
};
|
|
51
51
|
|
|
52
|
+
type VlmPayload = {
|
|
53
|
+
kind: "image" | "pdf";
|
|
54
|
+
dataUrl: string;
|
|
55
|
+
supplementalText: string;
|
|
56
|
+
};
|
|
57
|
+
|
|
52
58
|
/**
|
|
53
|
-
* Helper to build LLM message content. If the text contains
|
|
54
|
-
* generated by IngestionService, it casts the payload to
|
|
55
|
-
* Vision array structure so the underlying SDK bridge can transmit the image.
|
|
59
|
+
* Helper to build LLM message content. If the text contains a VLM marker
|
|
60
|
+
* generated by IngestionService, it casts the payload to multimodal blocks.
|
|
56
61
|
*/
|
|
57
|
-
function extractVlmPayload(text: string):
|
|
58
|
-
const
|
|
59
|
-
if (
|
|
62
|
+
function extractVlmPayload(text: string): VlmPayload | null {
|
|
63
|
+
const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
|
|
64
|
+
if (imageMarker) {
|
|
65
|
+
const markerText = imageMarker[0];
|
|
66
|
+
return {
|
|
67
|
+
kind: "image",
|
|
68
|
+
dataUrl: imageMarker[1],
|
|
69
|
+
supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
60
72
|
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
73
|
+
const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
|
|
74
|
+
if (pdfMarker) {
|
|
75
|
+
const markerText = pdfMarker[0];
|
|
76
|
+
return {
|
|
77
|
+
kind: "pdf",
|
|
78
|
+
dataUrl: pdfMarker[1],
|
|
79
|
+
supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function hasVlmPayload(text: string): boolean {
|
|
87
|
+
return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
|
|
67
88
|
}
|
|
68
89
|
|
|
69
90
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -73,10 +94,13 @@ function buildMessageContent(prompt: string, text: string, textFirst = false): a
|
|
|
73
94
|
const textPrompt = vlmPayload.supplementalText
|
|
74
95
|
? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
|
|
75
96
|
: prompt;
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
97
|
+
// `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
|
|
98
|
+
// that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
|
|
99
|
+
const assetBlock = vlmPayload.kind === "pdf"
|
|
100
|
+
? { type: "input_file", file_url: vlmPayload.dataUrl }
|
|
101
|
+
: { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
|
|
102
|
+
|
|
103
|
+
return [{ type: "text", text: textPrompt }, assetBlock];
|
|
80
104
|
}
|
|
81
105
|
// Standard text payload
|
|
82
106
|
return textFirst
|
|
@@ -444,7 +468,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
|
|
|
444
468
|
model,
|
|
445
469
|
condition_type: condition.type,
|
|
446
470
|
prompt_preview: prompt.slice(0, 180),
|
|
447
|
-
vision_payload: doc.text
|
|
471
|
+
vision_payload: hasVlmPayload(doc.text)
|
|
448
472
|
}
|
|
449
473
|
});
|
|
450
474
|
Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
|
|
@@ -453,7 +477,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
|
|
|
453
477
|
model,
|
|
454
478
|
condition_type: condition.type,
|
|
455
479
|
prompt_preview: prompt.slice(0, 180),
|
|
456
|
-
vision_payload: doc.text
|
|
480
|
+
vision_payload: hasVlmPayload(doc.text)
|
|
457
481
|
}, doc.supabase);
|
|
458
482
|
const result = await sdk.llm.chat(
|
|
459
483
|
[
|
|
@@ -563,7 +587,7 @@ Fields to extract:
|
|
|
563
587
|
${fieldDescriptions}`;
|
|
564
588
|
|
|
565
589
|
try {
|
|
566
|
-
const isVlmPayload = doc.text
|
|
590
|
+
const isVlmPayload = hasVlmPayload(doc.text);
|
|
567
591
|
const mixedPrompt = isVlmPayload
|
|
568
592
|
? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
|
|
569
593
|
: prompt;
|
|
@@ -722,7 +746,7 @@ Rules:
|
|
|
722
746
|
known_fields_count: Object.keys(contractData).length,
|
|
723
747
|
}, doc.supabase);
|
|
724
748
|
|
|
725
|
-
const isVlmPayload = doc.text
|
|
749
|
+
const isVlmPayload = hasVlmPayload(doc.text);
|
|
726
750
|
const mixedPrompt = isVlmPayload
|
|
727
751
|
? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
|
|
728
752
|
: prompt;
|
|
@@ -1004,7 +1028,9 @@ export class PolicyEngine {
|
|
|
1004
1028
|
const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
|
|
1005
1029
|
if (allowLearnedFallback && doc.supabase && policies.length > 0) {
|
|
1006
1030
|
try {
|
|
1007
|
-
const learningText = doc.text
|
|
1031
|
+
const learningText = doc.text
|
|
1032
|
+
.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
|
|
1033
|
+
.replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
|
|
1008
1034
|
const learned = await PolicyLearningService.resolveLearnedCandidate({
|
|
1009
1035
|
supabase: doc.supabase,
|
|
1010
1036
|
userId: doc.userId,
|
|
@@ -1118,7 +1144,7 @@ export class PolicyEngine {
|
|
|
1118
1144
|
`No markdown, no explanation — only the JSON object.`;
|
|
1119
1145
|
|
|
1120
1146
|
const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
|
|
1121
|
-
const isVlmPayload = doc.text
|
|
1147
|
+
const isVlmPayload = hasVlmPayload(doc.text);
|
|
1122
1148
|
const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
|
|
1123
1149
|
|
|
1124
1150
|
try {
|
|
@@ -135,8 +135,8 @@ export class RAGService {
|
|
|
135
135
|
supabase: SupabaseClient,
|
|
136
136
|
settings?: EmbeddingSettings
|
|
137
137
|
): Promise<void> {
|
|
138
|
-
if (
|
|
139
|
-
logger.info(`Skipping chunking and embedding for VLM base64
|
|
138
|
+
if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
|
|
139
|
+
logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
|
|
140
140
|
return;
|
|
141
141
|
}
|
|
142
142
|
|
|
@@ -51,6 +51,8 @@ function isPdfTextExtractable(pdfData) {
|
|
|
51
51
|
return true;
|
|
52
52
|
}
|
|
53
53
|
export class IngestionService {
|
|
54
|
+
static FAST_EXTS = ["txt", "md", "csv", "json"];
|
|
55
|
+
static IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"];
|
|
54
56
|
static NON_IDEMPOTENT_ACTION_TYPES = new Set([
|
|
55
57
|
"append_to_google_sheet",
|
|
56
58
|
"webhook",
|
|
@@ -85,10 +87,10 @@ export class IngestionService {
|
|
|
85
87
|
return String(value);
|
|
86
88
|
}
|
|
87
89
|
static buildVlmSemanticText(opts) {
|
|
88
|
-
const { filename, finalStatus, policyName, extracted, tags } = opts;
|
|
90
|
+
const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
|
|
89
91
|
const lines = [
|
|
90
92
|
`Document filename: ${filename}`,
|
|
91
|
-
|
|
93
|
+
`Document source: VLM ${modality} extraction`,
|
|
92
94
|
`Processing status: ${finalStatus}`,
|
|
93
95
|
];
|
|
94
96
|
if (policyName) {
|
|
@@ -134,6 +136,7 @@ export class IngestionService {
|
|
|
134
136
|
policyName: opts.policyName,
|
|
135
137
|
extracted: opts.extracted,
|
|
136
138
|
tags: opts.tags,
|
|
139
|
+
modality: opts.modality,
|
|
137
140
|
});
|
|
138
141
|
const details = {
|
|
139
142
|
synthetic_chars: syntheticText.length,
|
|
@@ -160,6 +163,15 @@ export class IngestionService {
|
|
|
160
163
|
});
|
|
161
164
|
return details;
|
|
162
165
|
}
|
|
166
|
+
static buildVlmPayloadMarker(modality, dataUrl) {
|
|
167
|
+
const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
|
|
168
|
+
return `[${prefix}:${dataUrl}]`;
|
|
169
|
+
}
|
|
170
|
+
static async fileToDataUrl(filePath, mimeType) {
|
|
171
|
+
const buffer = await fs.readFile(filePath);
|
|
172
|
+
const base64 = buffer.toString("base64");
|
|
173
|
+
return `data:${mimeType};base64,${base64}`;
|
|
174
|
+
}
|
|
163
175
|
/**
|
|
164
176
|
* Ingest a document using Hybrid Routing Architecture.
|
|
165
177
|
*/
|
|
@@ -217,32 +229,31 @@ export class IngestionService {
|
|
|
217
229
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Ingestion started", source, filename, fileSize, is_high_intent: true }, supabase);
|
|
218
230
|
// 2. Document Triage
|
|
219
231
|
let isFastPath = false;
|
|
220
|
-
let
|
|
232
|
+
let isMultimodalFastPath = false;
|
|
233
|
+
let multimodalModality = null;
|
|
221
234
|
let extractionContent = content;
|
|
222
235
|
const ext = filename.toLowerCase().split('.').pop() || '';
|
|
223
|
-
const fastExts = ['txt', 'md', 'csv', 'json'];
|
|
224
|
-
const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
|
|
225
236
|
// Pre-fetch settings to decide whether we should attempt VLM.
|
|
226
237
|
const { data: triageSettingsRow } = await supabase
|
|
227
238
|
.from("user_settings")
|
|
228
239
|
.select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
|
|
229
240
|
.eq("user_id", userId)
|
|
230
241
|
.maybeSingle();
|
|
231
|
-
const
|
|
232
|
-
const
|
|
233
|
-
const
|
|
234
|
-
|
|
242
|
+
const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
|
|
243
|
+
const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
|
|
244
|
+
const llmModel = imageResolution.model;
|
|
245
|
+
const llmProvider = imageResolution.provider;
|
|
246
|
+
if (this.FAST_EXTS.includes(ext)) {
|
|
235
247
|
isFastPath = true;
|
|
236
248
|
}
|
|
237
|
-
else if (
|
|
249
|
+
else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
|
|
238
250
|
try {
|
|
239
|
-
const buffer = await fs.readFile(filePath);
|
|
240
|
-
const base64 = buffer.toString('base64');
|
|
241
251
|
const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
|
|
242
|
-
|
|
243
|
-
extractionContent =
|
|
252
|
+
const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
|
|
253
|
+
extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
|
|
244
254
|
isFastPath = true;
|
|
245
|
-
|
|
255
|
+
isMultimodalFastPath = true;
|
|
256
|
+
multimodalModality = "image";
|
|
246
257
|
logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
|
|
247
258
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
|
|
248
259
|
}
|
|
@@ -250,7 +261,7 @@ export class IngestionService {
|
|
|
250
261
|
logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
|
|
251
262
|
}
|
|
252
263
|
}
|
|
253
|
-
else if (
|
|
264
|
+
else if (this.IMAGE_EXTS.includes(ext)) {
|
|
254
265
|
logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
|
|
255
266
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
256
267
|
action: "VLM skipped (model marked unsupported)",
|
|
@@ -270,9 +281,30 @@ export class IngestionService {
|
|
|
270
281
|
logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
|
|
271
282
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
|
|
272
283
|
}
|
|
284
|
+
else if (pdfResolution.shouldAttempt) {
|
|
285
|
+
// Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
|
|
286
|
+
const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
|
|
287
|
+
extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
|
|
288
|
+
isFastPath = true;
|
|
289
|
+
isMultimodalFastPath = true;
|
|
290
|
+
multimodalModality = "pdf";
|
|
291
|
+
logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
|
|
292
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
293
|
+
action: "VLM Fast Path selected",
|
|
294
|
+
type: "pdf",
|
|
295
|
+
modality: "pdf",
|
|
296
|
+
model: llmModel,
|
|
297
|
+
}, supabase);
|
|
298
|
+
}
|
|
273
299
|
else {
|
|
274
|
-
logger.info(`Smart Triage: PDF ${filename}
|
|
275
|
-
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
300
|
+
logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
|
|
301
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
302
|
+
action: "VLM skipped (model marked unsupported)",
|
|
303
|
+
type: "pdf",
|
|
304
|
+
modality: "pdf",
|
|
305
|
+
model: llmModel,
|
|
306
|
+
provider: llmProvider,
|
|
307
|
+
}, supabase);
|
|
276
308
|
}
|
|
277
309
|
}
|
|
278
310
|
catch (err) {
|
|
@@ -310,7 +342,7 @@ export class IngestionService {
|
|
|
310
342
|
details: {
|
|
311
343
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
312
344
|
model: llmSettings.llm_model ?? llmModel,
|
|
313
|
-
mode:
|
|
345
|
+
mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
|
|
314
346
|
}
|
|
315
347
|
});
|
|
316
348
|
const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
|
|
@@ -363,7 +395,7 @@ export class IngestionService {
|
|
|
363
395
|
.eq("id", ingestion.id)
|
|
364
396
|
.select()
|
|
365
397
|
.single();
|
|
366
|
-
if (
|
|
398
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
367
399
|
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
368
400
|
ingestionId: ingestion.id,
|
|
369
401
|
userId,
|
|
@@ -372,6 +404,7 @@ export class IngestionService {
|
|
|
372
404
|
policyName,
|
|
373
405
|
extracted: mergedExtracted,
|
|
374
406
|
tags: autoTags,
|
|
407
|
+
modality: multimodalModality,
|
|
375
408
|
supabase,
|
|
376
409
|
embedSettings,
|
|
377
410
|
});
|
|
@@ -388,25 +421,27 @@ export class IngestionService {
|
|
|
388
421
|
.update({ trace: finalTrace })
|
|
389
422
|
.eq("id", ingestion.id);
|
|
390
423
|
}
|
|
391
|
-
if (
|
|
424
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
392
425
|
await ModelCapabilityService.learnVisionSuccess({
|
|
393
426
|
supabase,
|
|
394
427
|
userId,
|
|
395
428
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
396
429
|
model: llmSettings.llm_model ?? llmModel,
|
|
430
|
+
modality: multimodalModality,
|
|
397
431
|
});
|
|
398
432
|
}
|
|
399
433
|
return updatedIngestion;
|
|
400
434
|
}
|
|
401
435
|
catch (err) {
|
|
402
436
|
const msg = err instanceof Error ? err.message : String(err);
|
|
403
|
-
if (
|
|
437
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
404
438
|
const learnedState = await ModelCapabilityService.learnVisionFailure({
|
|
405
439
|
supabase,
|
|
406
440
|
userId,
|
|
407
441
|
provider: llmProvider,
|
|
408
442
|
model: llmModel,
|
|
409
443
|
error: err,
|
|
444
|
+
modality: multimodalModality,
|
|
410
445
|
});
|
|
411
446
|
logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
|
|
412
447
|
Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
|
|
@@ -477,31 +512,31 @@ export class IngestionService {
|
|
|
477
512
|
if (!filePath)
|
|
478
513
|
throw new Error("No storage path found for this ingestion");
|
|
479
514
|
let isFastPath = false;
|
|
480
|
-
let
|
|
515
|
+
let isMultimodalFastPath = false;
|
|
516
|
+
let multimodalModality = null;
|
|
481
517
|
let extractionContent = "";
|
|
482
518
|
const ext = filename.toLowerCase().split('.').pop() || '';
|
|
483
|
-
const fastExts = ['txt', 'md', 'csv', 'json'];
|
|
484
|
-
const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
|
|
485
519
|
const { data: triageSettingsRow } = await supabase
|
|
486
520
|
.from("user_settings")
|
|
487
521
|
.select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
|
|
488
522
|
.eq("user_id", userId)
|
|
489
523
|
.maybeSingle();
|
|
490
|
-
const
|
|
491
|
-
const
|
|
492
|
-
const
|
|
493
|
-
|
|
524
|
+
const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
|
|
525
|
+
const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
|
|
526
|
+
const llmModel = imageResolution.model;
|
|
527
|
+
const llmProvider = imageResolution.provider;
|
|
528
|
+
if (this.FAST_EXTS.includes(ext)) {
|
|
494
529
|
isFastPath = true;
|
|
495
530
|
extractionContent = await fs.readFile(filePath, "utf-8");
|
|
496
531
|
}
|
|
497
|
-
else if (
|
|
532
|
+
else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
|
|
498
533
|
try {
|
|
499
|
-
const buffer = await fs.readFile(filePath);
|
|
500
|
-
const base64 = buffer.toString('base64');
|
|
501
534
|
const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
|
|
502
|
-
|
|
535
|
+
const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
|
|
536
|
+
extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
|
|
503
537
|
isFastPath = true;
|
|
504
|
-
|
|
538
|
+
isMultimodalFastPath = true;
|
|
539
|
+
multimodalModality = "image";
|
|
505
540
|
logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
|
|
506
541
|
Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
|
|
507
542
|
}
|
|
@@ -509,7 +544,7 @@ export class IngestionService {
|
|
|
509
544
|
logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
|
|
510
545
|
}
|
|
511
546
|
}
|
|
512
|
-
else if (
|
|
547
|
+
else if (this.IMAGE_EXTS.includes(ext)) {
|
|
513
548
|
logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
|
|
514
549
|
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
515
550
|
action: "VLM skipped (model marked unsupported)",
|
|
@@ -527,10 +562,34 @@ export class IngestionService {
|
|
|
527
562
|
isFastPath = true;
|
|
528
563
|
extractionContent = pdfData.text;
|
|
529
564
|
}
|
|
530
|
-
|
|
565
|
+
else if (pdfResolution.shouldAttempt) {
|
|
566
|
+
// Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
|
|
567
|
+
const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
|
|
568
|
+
extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
|
|
569
|
+
isFastPath = true;
|
|
570
|
+
isMultimodalFastPath = true;
|
|
571
|
+
multimodalModality = "pdf";
|
|
572
|
+
logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
|
|
573
|
+
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
574
|
+
action: "VLM Fast Path selected",
|
|
575
|
+
type: "pdf",
|
|
576
|
+
modality: "pdf",
|
|
577
|
+
model: llmModel,
|
|
578
|
+
}, supabase);
|
|
579
|
+
}
|
|
580
|
+
else {
|
|
581
|
+
logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
|
|
582
|
+
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
583
|
+
action: "VLM skipped (model marked unsupported)",
|
|
584
|
+
type: "pdf",
|
|
585
|
+
modality: "pdf",
|
|
586
|
+
model: llmModel,
|
|
587
|
+
provider: llmProvider
|
|
588
|
+
}, supabase);
|
|
589
|
+
}
|
|
531
590
|
}
|
|
532
591
|
catch (err) {
|
|
533
|
-
|
|
592
|
+
logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
|
|
534
593
|
}
|
|
535
594
|
}
|
|
536
595
|
if (isFastPath) {
|
|
@@ -560,7 +619,7 @@ export class IngestionService {
|
|
|
560
619
|
details: {
|
|
561
620
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
562
621
|
model: llmSettings.llm_model ?? llmModel,
|
|
563
|
-
mode:
|
|
622
|
+
mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
|
|
564
623
|
}
|
|
565
624
|
});
|
|
566
625
|
const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
|
|
@@ -626,7 +685,7 @@ export class IngestionService {
|
|
|
626
685
|
baseline_config_id: baselineConfig?.id ?? null,
|
|
627
686
|
})
|
|
628
687
|
.eq("id", ingestionId);
|
|
629
|
-
if (
|
|
688
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
630
689
|
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
631
690
|
ingestionId,
|
|
632
691
|
userId,
|
|
@@ -635,6 +694,7 @@ export class IngestionService {
|
|
|
635
694
|
policyName,
|
|
636
695
|
extracted: mergedExtracted,
|
|
637
696
|
tags: mergedTags,
|
|
697
|
+
modality: multimodalModality,
|
|
638
698
|
supabase,
|
|
639
699
|
embedSettings,
|
|
640
700
|
});
|
|
@@ -651,25 +711,27 @@ export class IngestionService {
|
|
|
651
711
|
.update({ trace: rerunTrace })
|
|
652
712
|
.eq("id", ingestionId);
|
|
653
713
|
}
|
|
654
|
-
if (
|
|
714
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
655
715
|
await ModelCapabilityService.learnVisionSuccess({
|
|
656
716
|
supabase,
|
|
657
717
|
userId,
|
|
658
718
|
provider: llmSettings.llm_provider ?? llmProvider,
|
|
659
719
|
model: llmSettings.llm_model ?? llmModel,
|
|
720
|
+
modality: multimodalModality,
|
|
660
721
|
});
|
|
661
722
|
}
|
|
662
723
|
return finalStatus === "matched";
|
|
663
724
|
}
|
|
664
725
|
catch (err) {
|
|
665
726
|
const msg = err instanceof Error ? err.message : String(err);
|
|
666
|
-
if (
|
|
727
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
667
728
|
const learnedState = await ModelCapabilityService.learnVisionFailure({
|
|
668
729
|
supabase,
|
|
669
730
|
userId,
|
|
670
731
|
provider: llmProvider,
|
|
671
732
|
model: llmModel,
|
|
672
733
|
error: err,
|
|
734
|
+
modality: multimodalModality,
|
|
673
735
|
});
|
|
674
736
|
logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
|
|
675
737
|
Actuator.logEvent(ingestionId, userId, "error", "Processing", {
|