@realtimex/folio 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -0
- package/api/src/services/IngestionService.ts +513 -206
- package/api/src/services/ModelCapabilityService.ts +213 -56
- package/api/src/services/PolicyEngine.ts +48 -22
- package/api/src/services/RAGService.ts +2 -2
- package/dist/api/src/services/IngestionService.js +467 -194
- package/dist/api/src/services/ModelCapabilityService.js +165 -54
- package/dist/api/src/services/PolicyEngine.js +38 -22
- package/dist/api/src/services/RAGService.js +2 -2
- package/dist/assets/{index-nxHX9No5.js → index-CLpalZvv.js} +37 -37
- package/dist/index.html +1 -1
- package/package.json +1 -1
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import fs from "fs/promises";
|
|
2
|
+
import { execFile } from "child_process";
|
|
3
|
+
import os from "os";
|
|
4
|
+
import path from "path";
|
|
2
5
|
import { PDFParse } from "pdf-parse";
|
|
6
|
+
import { promisify } from "util";
|
|
3
7
|
import { createLogger } from "../utils/logger.js";
|
|
4
8
|
import { PolicyLoader } from "./PolicyLoader.js";
|
|
5
9
|
import { PolicyEngine } from "./PolicyEngine.js";
|
|
@@ -11,6 +15,7 @@ import { RAGService } from "./RAGService.js";
|
|
|
11
15
|
import { SDKService } from "./SDKService.js";
|
|
12
16
|
import { ModelCapabilityService } from "./ModelCapabilityService.js";
|
|
13
17
|
const logger = createLogger("IngestionService");
|
|
18
|
+
const execFileAsync = promisify(execFile);
|
|
14
19
|
/**
|
|
15
20
|
* Multi-signal classifier that decides whether pdf-parse extracted enough
|
|
16
21
|
* real text to skip GPU OCR and go straight to the local LLM (Fast Path).
|
|
@@ -51,6 +56,17 @@ function isPdfTextExtractable(pdfData) {
|
|
|
51
56
|
return true;
|
|
52
57
|
}
|
|
53
58
|
export class IngestionService {
|
|
59
|
+
static FAST_EXTS = ["txt", "md", "csv", "json"];
|
|
60
|
+
static IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"];
|
|
61
|
+
static IMAGE_REENCODE_TIMEOUT_MS = 15000;
|
|
62
|
+
static IMAGE_REENCODE_RETRY_ENABLED = (process.env.FOLIO_VLM_IMAGE_REENCODE_RETRY_ENABLED ?? "true").toLowerCase() !== "false";
|
|
63
|
+
static IMAGE_REENCODE_RETRY_METRICS = {
|
|
64
|
+
attempted: 0,
|
|
65
|
+
succeeded: 0,
|
|
66
|
+
failed: 0,
|
|
67
|
+
skipped_disabled: 0,
|
|
68
|
+
skipped_unavailable: 0,
|
|
69
|
+
};
|
|
54
70
|
static NON_IDEMPOTENT_ACTION_TYPES = new Set([
|
|
55
71
|
"append_to_google_sheet",
|
|
56
72
|
"webhook",
|
|
@@ -85,10 +101,10 @@ export class IngestionService {
|
|
|
85
101
|
return String(value);
|
|
86
102
|
}
|
|
87
103
|
static buildVlmSemanticText(opts) {
|
|
88
|
-
const { filename, finalStatus, policyName, extracted, tags } = opts;
|
|
104
|
+
const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
|
|
89
105
|
const lines = [
|
|
90
106
|
`Document filename: ${filename}`,
|
|
91
|
-
|
|
107
|
+
`Document source: VLM ${modality} extraction`,
|
|
92
108
|
`Processing status: ${finalStatus}`,
|
|
93
109
|
];
|
|
94
110
|
if (policyName) {
|
|
@@ -134,6 +150,7 @@ export class IngestionService {
|
|
|
134
150
|
policyName: opts.policyName,
|
|
135
151
|
extracted: opts.extracted,
|
|
136
152
|
tags: opts.tags,
|
|
153
|
+
modality: opts.modality,
|
|
137
154
|
});
|
|
138
155
|
const details = {
|
|
139
156
|
synthetic_chars: syntheticText.length,
|
|
@@ -160,6 +177,76 @@ export class IngestionService {
|
|
|
160
177
|
});
|
|
161
178
|
return details;
|
|
162
179
|
}
|
|
180
|
+
static buildVlmPayloadMarker(modality, dataUrl) {
|
|
181
|
+
const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
|
|
182
|
+
return `[${prefix}:${dataUrl}]`;
|
|
183
|
+
}
|
|
184
|
+
static async fileToDataUrl(filePath, mimeType) {
|
|
185
|
+
const buffer = await fs.readFile(filePath);
|
|
186
|
+
const base64 = buffer.toString("base64");
|
|
187
|
+
return `data:${mimeType};base64,${base64}`;
|
|
188
|
+
}
|
|
189
|
+
static errorToMessage(error) {
|
|
190
|
+
if (error instanceof Error)
|
|
191
|
+
return error.message;
|
|
192
|
+
if (typeof error === "string")
|
|
193
|
+
return error;
|
|
194
|
+
if (error && typeof error === "object") {
|
|
195
|
+
const candidate = error;
|
|
196
|
+
if (typeof candidate.message === "string")
|
|
197
|
+
return candidate.message;
|
|
198
|
+
}
|
|
199
|
+
return String(error ?? "");
|
|
200
|
+
}
|
|
201
|
+
static isInvalidModelError(error) {
|
|
202
|
+
const message = this.errorToMessage(error).toLowerCase();
|
|
203
|
+
return message.includes("invalid model");
|
|
204
|
+
}
|
|
205
|
+
static async reencodeImageToPngDataUrl(filePath) {
|
|
206
|
+
const tempOutputPath = path.join(os.tmpdir(), `folio-vlm-reencode-${Date.now()}-${Math.random().toString(16).slice(2)}.png`);
|
|
207
|
+
try {
|
|
208
|
+
await execFileAsync("sips", ["-s", "format", "png", filePath, "--out", tempOutputPath], {
|
|
209
|
+
timeout: this.IMAGE_REENCODE_TIMEOUT_MS,
|
|
210
|
+
maxBuffer: 1024 * 1024,
|
|
211
|
+
});
|
|
212
|
+
const pngBuffer = await fs.readFile(tempOutputPath);
|
|
213
|
+
return `data:image/png;base64,${pngBuffer.toString("base64")}`;
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
finally {
|
|
219
|
+
await fs.unlink(tempOutputPath).catch(() => undefined);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
static async maybeBuildImageRetryMarker(opts) {
|
|
223
|
+
if (!this.isInvalidModelError(opts.error))
|
|
224
|
+
return null;
|
|
225
|
+
if (!this.IMAGE_REENCODE_RETRY_ENABLED) {
|
|
226
|
+
this.bumpImageReencodeRetryMetric("skipped_disabled", opts);
|
|
227
|
+
logger.info(`VLM ${opts.phase} retry skipped for ${opts.filename}: re-encode retry disabled (${opts.provider}/${opts.model}).`);
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
const retryDataUrl = await this.reencodeImageToPngDataUrl(opts.filePath);
|
|
231
|
+
if (!retryDataUrl) {
|
|
232
|
+
this.bumpImageReencodeRetryMetric("skipped_unavailable", opts);
|
|
233
|
+
logger.warn(`VLM ${opts.phase} retry skipped for ${opts.filename}: image re-encode unavailable (${opts.provider}/${opts.model}).`);
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
logger.warn(`VLM ${opts.phase} failed for ${opts.filename} with invalid model. Retrying once with re-encoded image payload (${opts.provider}/${opts.model}).`);
|
|
237
|
+
return this.buildVlmPayloadMarker("image", retryDataUrl);
|
|
238
|
+
}
|
|
239
|
+
static bumpImageReencodeRetryMetric(outcome, meta) {
|
|
240
|
+
this.IMAGE_REENCODE_RETRY_METRICS[outcome] += 1;
|
|
241
|
+
logger.info("VLM image re-encode retry metric", {
|
|
242
|
+
outcome,
|
|
243
|
+
phase: meta.phase,
|
|
244
|
+
provider: meta.provider,
|
|
245
|
+
model: meta.model,
|
|
246
|
+
filename: meta.filename,
|
|
247
|
+
counters: { ...this.IMAGE_REENCODE_RETRY_METRICS },
|
|
248
|
+
});
|
|
249
|
+
}
|
|
163
250
|
/**
|
|
164
251
|
* Ingest a document using Hybrid Routing Architecture.
|
|
165
252
|
*/
|
|
@@ -217,32 +304,31 @@ export class IngestionService {
|
|
|
217
304
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Ingestion started", source, filename, fileSize, is_high_intent: true }, supabase);
|
|
218
305
|
// 2. Document Triage
|
|
219
306
|
let isFastPath = false;
|
|
220
|
-
let
|
|
307
|
+
let isMultimodalFastPath = false;
|
|
308
|
+
let multimodalModality = null;
|
|
221
309
|
let extractionContent = content;
|
|
222
310
|
const ext = filename.toLowerCase().split('.').pop() || '';
|
|
223
|
-
const fastExts = ['txt', 'md', 'csv', 'json'];
|
|
224
|
-
const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
|
|
225
311
|
// Pre-fetch settings to decide whether we should attempt VLM.
|
|
226
312
|
const { data: triageSettingsRow } = await supabase
|
|
227
313
|
.from("user_settings")
|
|
228
314
|
.select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
|
|
229
315
|
.eq("user_id", userId)
|
|
230
316
|
.maybeSingle();
|
|
231
|
-
const
|
|
232
|
-
const
|
|
233
|
-
const
|
|
234
|
-
|
|
317
|
+
const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
|
|
318
|
+
const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
|
|
319
|
+
const llmModel = imageResolution.model;
|
|
320
|
+
const llmProvider = imageResolution.provider;
|
|
321
|
+
if (this.FAST_EXTS.includes(ext)) {
|
|
235
322
|
isFastPath = true;
|
|
236
323
|
}
|
|
237
|
-
else if (
|
|
324
|
+
else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
|
|
238
325
|
try {
|
|
239
|
-
const buffer = await fs.readFile(filePath);
|
|
240
|
-
const base64 = buffer.toString('base64');
|
|
241
326
|
const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
|
|
242
|
-
|
|
243
|
-
extractionContent =
|
|
327
|
+
const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
|
|
328
|
+
extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
|
|
244
329
|
isFastPath = true;
|
|
245
|
-
|
|
330
|
+
isMultimodalFastPath = true;
|
|
331
|
+
multimodalModality = "image";
|
|
246
332
|
logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
|
|
247
333
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
|
|
248
334
|
}
|
|
@@ -250,7 +336,7 @@ export class IngestionService {
|
|
|
250
336
|
logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
|
|
251
337
|
}
|
|
252
338
|
}
|
|
253
|
-
else if (
|
|
339
|
+
else if (this.IMAGE_EXTS.includes(ext)) {
|
|
254
340
|
logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
|
|
255
341
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
256
342
|
action: "VLM skipped (model marked unsupported)",
|
|
@@ -270,9 +356,30 @@ export class IngestionService {
|
|
|
270
356
|
logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
|
|
271
357
|
Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
|
|
272
358
|
}
|
|
359
|
+
else if (pdfResolution.shouldAttempt) {
|
|
360
|
+
// Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
|
|
361
|
+
const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
|
|
362
|
+
extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
|
|
363
|
+
isFastPath = true;
|
|
364
|
+
isMultimodalFastPath = true;
|
|
365
|
+
multimodalModality = "pdf";
|
|
366
|
+
logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
|
|
367
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
368
|
+
action: "VLM Fast Path selected",
|
|
369
|
+
type: "pdf",
|
|
370
|
+
modality: "pdf",
|
|
371
|
+
model: llmModel,
|
|
372
|
+
}, supabase);
|
|
373
|
+
}
|
|
273
374
|
else {
|
|
274
|
-
logger.info(`Smart Triage: PDF ${filename}
|
|
275
|
-
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
375
|
+
logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
|
|
376
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
|
|
377
|
+
action: "VLM skipped (model marked unsupported)",
|
|
378
|
+
type: "pdf",
|
|
379
|
+
modality: "pdf",
|
|
380
|
+
model: llmModel,
|
|
381
|
+
provider: llmProvider,
|
|
382
|
+
}, supabase);
|
|
276
383
|
}
|
|
277
384
|
}
|
|
278
385
|
catch (err) {
|
|
@@ -296,117 +403,184 @@ export class IngestionService {
|
|
|
296
403
|
embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
|
|
297
404
|
embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
|
|
298
405
|
};
|
|
299
|
-
const
|
|
300
|
-
|
|
301
|
-
const
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
406
|
+
const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
|
|
407
|
+
const resolvedModel = llmSettings.llm_model ?? llmModel;
|
|
408
|
+
const runFastPathAttempt = async (attemptContent, attemptType) => {
|
|
409
|
+
const doc = { filePath: filePath, text: attemptContent, ingestionId: ingestion.id, userId, supabase };
|
|
410
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
411
|
+
const baselineTrace = [];
|
|
412
|
+
// Fire and forget Semantic Embedding Storage
|
|
413
|
+
RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
|
|
414
|
+
logger.error(`RAG embedding failed for ${ingestion.id}`, err);
|
|
415
|
+
});
|
|
416
|
+
// 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
|
|
417
|
+
baselineTrace.push({
|
|
418
|
+
timestamp: new Date().toISOString(),
|
|
419
|
+
step: "LLM request (baseline extraction)",
|
|
420
|
+
details: {
|
|
421
|
+
provider: resolvedProvider,
|
|
422
|
+
model: resolvedModel,
|
|
423
|
+
mode: isMultimodalFastPath
|
|
424
|
+
? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
|
|
425
|
+
: "text",
|
|
426
|
+
}
|
|
427
|
+
});
|
|
428
|
+
const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
|
|
429
|
+
const baselineEntities = baselineResult.entities;
|
|
430
|
+
const autoTags = baselineResult.tags;
|
|
431
|
+
baselineTrace.push({
|
|
432
|
+
timestamp: new Date().toISOString(),
|
|
433
|
+
step: "LLM response (baseline extraction)",
|
|
434
|
+
details: {
|
|
435
|
+
entities_count: Object.keys(baselineEntities).length,
|
|
436
|
+
uncertain_count: baselineResult.uncertain_fields.length,
|
|
437
|
+
tags_count: autoTags.length,
|
|
438
|
+
}
|
|
439
|
+
});
|
|
440
|
+
// Enrich the document with extracted entities so policy keyword/semantic
|
|
441
|
+
// conditions can match against semantic field values (e.g. document_type:
|
|
442
|
+
// "invoice") even when those exact words don't appear in the raw text.
|
|
443
|
+
const entityLines = Object.entries(baselineEntities)
|
|
444
|
+
.filter(([, v]) => v != null)
|
|
445
|
+
.map(([k, v]) => `${k}: ${Array.isArray(v) ? v.join(", ") : String(v)}`);
|
|
446
|
+
const enrichedDoc = entityLines.length > 0
|
|
447
|
+
? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
|
|
448
|
+
: doc;
|
|
449
|
+
// 5. Stage 2: Policy matching + policy-specific field extraction
|
|
450
|
+
let result;
|
|
451
|
+
if (userPolicies.length > 0) {
|
|
452
|
+
result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
|
|
314
453
|
}
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
const baselineEntities = baselineResult.entities;
|
|
318
|
-
const autoTags = baselineResult.tags;
|
|
319
|
-
baselineTrace.push({
|
|
320
|
-
timestamp: new Date().toISOString(),
|
|
321
|
-
step: "LLM response (baseline extraction)",
|
|
322
|
-
details: {
|
|
323
|
-
entities_count: Object.keys(baselineEntities).length,
|
|
324
|
-
uncertain_count: baselineResult.uncertain_fields.length,
|
|
325
|
-
tags_count: autoTags.length,
|
|
454
|
+
else {
|
|
455
|
+
result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
|
|
326
456
|
}
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
457
|
+
const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
|
|
458
|
+
const finalStatus = result.status === "fallback" ? "no_match" : result.status;
|
|
459
|
+
// Merge: baseline entities are the foundation; policy-specific fields
|
|
460
|
+
// are overlaid on top so more precise extractions take precedence.
|
|
461
|
+
const mergedExtracted = { ...baselineEntities, ...result.extractedData };
|
|
462
|
+
let finalTrace = [...baselineTrace, ...(result.trace || [])];
|
|
463
|
+
const { data: updatedIngestion } = await supabase
|
|
464
|
+
.from("ingestions")
|
|
465
|
+
.update({
|
|
466
|
+
status: finalStatus,
|
|
467
|
+
policy_id: result.matchedPolicy,
|
|
468
|
+
policy_name: policyName,
|
|
469
|
+
extracted: mergedExtracted,
|
|
470
|
+
actions_taken: result.actionsExecuted,
|
|
471
|
+
trace: finalTrace,
|
|
472
|
+
tags: autoTags,
|
|
473
|
+
baseline_config_id: baselineConfig?.id ?? null,
|
|
474
|
+
})
|
|
475
|
+
.eq("id", ingestion.id)
|
|
476
|
+
.select()
|
|
477
|
+
.single();
|
|
478
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
479
|
+
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
480
|
+
ingestionId: ingestion.id,
|
|
481
|
+
userId,
|
|
482
|
+
filename,
|
|
483
|
+
finalStatus,
|
|
484
|
+
policyName,
|
|
485
|
+
extracted: mergedExtracted,
|
|
486
|
+
tags: autoTags,
|
|
487
|
+
modality: multimodalModality,
|
|
488
|
+
supabase,
|
|
489
|
+
embedSettings,
|
|
490
|
+
});
|
|
491
|
+
finalTrace = [
|
|
492
|
+
...finalTrace,
|
|
493
|
+
{
|
|
494
|
+
timestamp: new Date().toISOString(),
|
|
495
|
+
step: "Queued synthetic VLM embedding",
|
|
496
|
+
details: embeddingMeta,
|
|
497
|
+
}
|
|
498
|
+
];
|
|
499
|
+
await supabase
|
|
500
|
+
.from("ingestions")
|
|
501
|
+
.update({ trace: finalTrace })
|
|
502
|
+
.eq("id", ingestion.id);
|
|
503
|
+
}
|
|
504
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
505
|
+
await ModelCapabilityService.learnVisionSuccess({
|
|
506
|
+
supabase,
|
|
507
|
+
userId,
|
|
508
|
+
provider: resolvedProvider,
|
|
509
|
+
model: resolvedModel,
|
|
510
|
+
modality: multimodalModality,
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
return updatedIngestion;
|
|
514
|
+
};
|
|
515
|
+
let terminalError = null;
|
|
516
|
+
try {
|
|
517
|
+
return await runFastPathAttempt(extractionContent, "primary");
|
|
341
518
|
}
|
|
342
|
-
|
|
343
|
-
|
|
519
|
+
catch (primaryErr) {
|
|
520
|
+
terminalError = primaryErr;
|
|
344
521
|
}
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
const mergedExtracted = { ...baselineEntities, ...result.extractedData };
|
|
350
|
-
let finalTrace = [...baselineTrace, ...(result.trace || [])];
|
|
351
|
-
const { data: updatedIngestion } = await supabase
|
|
352
|
-
.from("ingestions")
|
|
353
|
-
.update({
|
|
354
|
-
status: finalStatus,
|
|
355
|
-
policy_id: result.matchedPolicy,
|
|
356
|
-
policy_name: policyName,
|
|
357
|
-
extracted: mergedExtracted,
|
|
358
|
-
actions_taken: result.actionsExecuted,
|
|
359
|
-
trace: finalTrace,
|
|
360
|
-
tags: autoTags,
|
|
361
|
-
baseline_config_id: baselineConfig?.id ?? null,
|
|
362
|
-
})
|
|
363
|
-
.eq("id", ingestion.id)
|
|
364
|
-
.select()
|
|
365
|
-
.single();
|
|
366
|
-
if (isVlmFastPath) {
|
|
367
|
-
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
368
|
-
ingestionId: ingestion.id,
|
|
369
|
-
userId,
|
|
522
|
+
if (isMultimodalFastPath && multimodalModality === "image") {
|
|
523
|
+
const retryMarker = await this.maybeBuildImageRetryMarker({
|
|
524
|
+
error: terminalError,
|
|
525
|
+
filePath,
|
|
370
526
|
filename,
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
tags: autoTags,
|
|
375
|
-
supabase,
|
|
376
|
-
embedSettings,
|
|
527
|
+
provider: resolvedProvider,
|
|
528
|
+
model: resolvedModel,
|
|
529
|
+
phase: "ingest",
|
|
377
530
|
});
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
531
|
+
if (retryMarker) {
|
|
532
|
+
this.bumpImageReencodeRetryMetric("attempted", {
|
|
533
|
+
phase: "ingest",
|
|
534
|
+
provider: resolvedProvider,
|
|
535
|
+
model: resolvedModel,
|
|
536
|
+
filename,
|
|
537
|
+
});
|
|
538
|
+
Actuator.logEvent(ingestion.id, userId, "info", "Processing", {
|
|
539
|
+
action: "Retrying VLM with re-encoded image payload",
|
|
540
|
+
provider: resolvedProvider,
|
|
541
|
+
model: resolvedModel,
|
|
542
|
+
}, supabase);
|
|
543
|
+
try {
|
|
544
|
+
const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
|
|
545
|
+
this.bumpImageReencodeRetryMetric("succeeded", {
|
|
546
|
+
phase: "ingest",
|
|
547
|
+
provider: resolvedProvider,
|
|
548
|
+
model: resolvedModel,
|
|
549
|
+
filename,
|
|
550
|
+
});
|
|
551
|
+
Actuator.logEvent(ingestion.id, userId, "analysis", "Processing", {
|
|
552
|
+
action: "VLM re-encoded image retry succeeded",
|
|
553
|
+
provider: resolvedProvider,
|
|
554
|
+
model: resolvedModel,
|
|
555
|
+
}, supabase);
|
|
556
|
+
return retryResult;
|
|
384
557
|
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
558
|
+
catch (retryErr) {
|
|
559
|
+
this.bumpImageReencodeRetryMetric("failed", {
|
|
560
|
+
phase: "ingest",
|
|
561
|
+
provider: resolvedProvider,
|
|
562
|
+
model: resolvedModel,
|
|
563
|
+
filename,
|
|
564
|
+
});
|
|
565
|
+
Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
|
|
566
|
+
action: "VLM re-encoded image retry failed",
|
|
567
|
+
provider: resolvedProvider,
|
|
568
|
+
model: resolvedModel,
|
|
569
|
+
error: this.errorToMessage(retryErr),
|
|
570
|
+
}, supabase);
|
|
571
|
+
terminalError = retryErr;
|
|
572
|
+
}
|
|
573
|
+
}
|
|
398
574
|
}
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
catch (err) {
|
|
402
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
403
|
-
if (isVlmFastPath) {
|
|
575
|
+
const msg = this.errorToMessage(terminalError);
|
|
576
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
404
577
|
const learnedState = await ModelCapabilityService.learnVisionFailure({
|
|
405
578
|
supabase,
|
|
406
579
|
userId,
|
|
407
|
-
provider:
|
|
408
|
-
model:
|
|
409
|
-
error:
|
|
580
|
+
provider: resolvedProvider,
|
|
581
|
+
model: resolvedModel,
|
|
582
|
+
error: terminalError,
|
|
583
|
+
modality: multimodalModality,
|
|
410
584
|
});
|
|
411
585
|
logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
|
|
412
586
|
Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
|
|
@@ -428,6 +602,17 @@ export class IngestionService {
|
|
|
428
602
|
return updatedIngestion;
|
|
429
603
|
}
|
|
430
604
|
}
|
|
605
|
+
catch (err) {
|
|
606
|
+
const msg = this.errorToMessage(err);
|
|
607
|
+
Actuator.logEvent(ingestion.id, userId, "error", "Processing", { error: msg }, supabase);
|
|
608
|
+
const { data: updatedIngestion } = await supabase
|
|
609
|
+
.from("ingestions")
|
|
610
|
+
.update({ status: "error", error_message: msg })
|
|
611
|
+
.eq("id", ingestion.id)
|
|
612
|
+
.select()
|
|
613
|
+
.single();
|
|
614
|
+
return updatedIngestion;
|
|
615
|
+
}
|
|
431
616
|
}
|
|
432
617
|
// 4. Heavy Path (Delegate to RealTimeX)
|
|
433
618
|
const { error: rtxErr } = await supabase
|
|
@@ -477,31 +662,31 @@ export class IngestionService {
|
|
|
477
662
|
if (!filePath)
|
|
478
663
|
throw new Error("No storage path found for this ingestion");
|
|
479
664
|
let isFastPath = false;
|
|
480
|
-
let
|
|
665
|
+
let isMultimodalFastPath = false;
|
|
666
|
+
let multimodalModality = null;
|
|
481
667
|
let extractionContent = "";
|
|
482
668
|
const ext = filename.toLowerCase().split('.').pop() || '';
|
|
483
|
-
const fastExts = ['txt', 'md', 'csv', 'json'];
|
|
484
|
-
const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
|
|
485
669
|
const { data: triageSettingsRow } = await supabase
|
|
486
670
|
.from("user_settings")
|
|
487
671
|
.select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
|
|
488
672
|
.eq("user_id", userId)
|
|
489
673
|
.maybeSingle();
|
|
490
|
-
const
|
|
491
|
-
const
|
|
492
|
-
const
|
|
493
|
-
|
|
674
|
+
const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
|
|
675
|
+
const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
|
|
676
|
+
const llmModel = imageResolution.model;
|
|
677
|
+
const llmProvider = imageResolution.provider;
|
|
678
|
+
if (this.FAST_EXTS.includes(ext)) {
|
|
494
679
|
isFastPath = true;
|
|
495
680
|
extractionContent = await fs.readFile(filePath, "utf-8");
|
|
496
681
|
}
|
|
497
|
-
else if (
|
|
682
|
+
else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
|
|
498
683
|
try {
|
|
499
|
-
const buffer = await fs.readFile(filePath);
|
|
500
|
-
const base64 = buffer.toString('base64');
|
|
501
684
|
const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
|
|
502
|
-
|
|
685
|
+
const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
|
|
686
|
+
extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
|
|
503
687
|
isFastPath = true;
|
|
504
|
-
|
|
688
|
+
isMultimodalFastPath = true;
|
|
689
|
+
multimodalModality = "image";
|
|
505
690
|
logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
|
|
506
691
|
Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
|
|
507
692
|
}
|
|
@@ -509,7 +694,7 @@ export class IngestionService {
|
|
|
509
694
|
logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
|
|
510
695
|
}
|
|
511
696
|
}
|
|
512
|
-
else if (
|
|
697
|
+
else if (this.IMAGE_EXTS.includes(ext)) {
|
|
513
698
|
logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
|
|
514
699
|
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
515
700
|
action: "VLM skipped (model marked unsupported)",
|
|
@@ -527,10 +712,34 @@ export class IngestionService {
|
|
|
527
712
|
isFastPath = true;
|
|
528
713
|
extractionContent = pdfData.text;
|
|
529
714
|
}
|
|
530
|
-
|
|
715
|
+
else if (pdfResolution.shouldAttempt) {
|
|
716
|
+
// Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
|
|
717
|
+
const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
|
|
718
|
+
extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
|
|
719
|
+
isFastPath = true;
|
|
720
|
+
isMultimodalFastPath = true;
|
|
721
|
+
multimodalModality = "pdf";
|
|
722
|
+
logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
|
|
723
|
+
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
724
|
+
action: "VLM Fast Path selected",
|
|
725
|
+
type: "pdf",
|
|
726
|
+
modality: "pdf",
|
|
727
|
+
model: llmModel,
|
|
728
|
+
}, supabase);
|
|
729
|
+
}
|
|
730
|
+
else {
|
|
731
|
+
logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
|
|
732
|
+
Actuator.logEvent(ingestionId, userId, "info", "Triage", {
|
|
733
|
+
action: "VLM skipped (model marked unsupported)",
|
|
734
|
+
type: "pdf",
|
|
735
|
+
modality: "pdf",
|
|
736
|
+
model: llmModel,
|
|
737
|
+
provider: llmProvider
|
|
738
|
+
}, supabase);
|
|
739
|
+
}
|
|
531
740
|
}
|
|
532
741
|
catch (err) {
|
|
533
|
-
|
|
742
|
+
logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
|
|
534
743
|
}
|
|
535
744
|
}
|
|
536
745
|
if (isFastPath) {
|
|
@@ -547,44 +756,47 @@ export class IngestionService {
|
|
|
547
756
|
embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
|
|
548
757
|
embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
|
|
549
758
|
};
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
const
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
759
|
+
const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
|
|
760
|
+
const resolvedModel = llmSettings.llm_model ?? llmModel;
|
|
761
|
+
const runFastPathAttempt = async (attemptContent, attemptType) => {
|
|
762
|
+
const doc = { filePath, text: attemptContent, ingestionId, userId, supabase };
|
|
763
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
764
|
+
const baselineTrace = [];
|
|
765
|
+
// Fire and forget Semantic Embedding Storage for re-runs
|
|
766
|
+
RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
|
|
767
|
+
logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
|
|
768
|
+
});
|
|
769
|
+
baselineTrace.push({
|
|
770
|
+
timestamp: new Date().toISOString(),
|
|
771
|
+
step: "LLM request (baseline extraction)",
|
|
772
|
+
details: {
|
|
773
|
+
provider: resolvedProvider,
|
|
774
|
+
model: resolvedModel,
|
|
775
|
+
mode: isMultimodalFastPath
|
|
776
|
+
? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
|
|
777
|
+
: "text",
|
|
778
|
+
}
|
|
779
|
+
});
|
|
780
|
+
const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
|
|
781
|
+
const baselineEntities = baselineResult.entities;
|
|
782
|
+
const autoTags = baselineResult.tags;
|
|
783
|
+
baselineTrace.push({
|
|
784
|
+
timestamp: new Date().toISOString(),
|
|
785
|
+
step: "LLM response (baseline extraction)",
|
|
786
|
+
details: {
|
|
787
|
+
entities_count: Object.keys(baselineEntities).length,
|
|
788
|
+
uncertain_count: baselineResult.uncertain_fields.length,
|
|
789
|
+
tags_count: autoTags.length,
|
|
790
|
+
}
|
|
791
|
+
});
|
|
792
|
+
const entityLines = Object.entries(baselineEntities)
|
|
793
|
+
.filter(([, v]) => v != null)
|
|
794
|
+
.map(([k, v]) => `${k}: ${Array.isArray(v) ? v.join(", ") : String(v)}`);
|
|
795
|
+
const enrichedDoc = entityLines.length > 0
|
|
796
|
+
? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
|
|
797
|
+
: doc;
|
|
798
|
+
let finalStatus = "no_match";
|
|
799
|
+
let result;
|
|
588
800
|
const forcedPolicyId = opts.forcedPolicyId?.trim();
|
|
589
801
|
const activePolicies = forcedPolicyId
|
|
590
802
|
? userPolicies.filter((policy) => policy.metadata.id === forcedPolicyId)
|
|
@@ -601,7 +813,7 @@ export class IngestionService {
|
|
|
601
813
|
else {
|
|
602
814
|
result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
|
|
603
815
|
}
|
|
604
|
-
policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
|
|
816
|
+
const policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
|
|
605
817
|
finalStatus = result.status === "fallback" ? "no_match" : result.status;
|
|
606
818
|
const mergedExtracted = { ...baselineEntities, ...result.extractedData };
|
|
607
819
|
// Preserve any human-added tags; merge with freshly generated auto-tags.
|
|
@@ -626,7 +838,7 @@ export class IngestionService {
|
|
|
626
838
|
baseline_config_id: baselineConfig?.id ?? null,
|
|
627
839
|
})
|
|
628
840
|
.eq("id", ingestionId);
|
|
629
|
-
if (
|
|
841
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
630
842
|
const embeddingMeta = this.queueVlmSemanticEmbedding({
|
|
631
843
|
ingestionId,
|
|
632
844
|
userId,
|
|
@@ -635,6 +847,7 @@ export class IngestionService {
|
|
|
635
847
|
policyName,
|
|
636
848
|
extracted: mergedExtracted,
|
|
637
849
|
tags: mergedTags,
|
|
850
|
+
modality: multimodalModality,
|
|
638
851
|
supabase,
|
|
639
852
|
embedSettings,
|
|
640
853
|
});
|
|
@@ -651,38 +864,98 @@ export class IngestionService {
|
|
|
651
864
|
.update({ trace: rerunTrace })
|
|
652
865
|
.eq("id", ingestionId);
|
|
653
866
|
}
|
|
654
|
-
if (
|
|
867
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
655
868
|
await ModelCapabilityService.learnVisionSuccess({
|
|
656
869
|
supabase,
|
|
657
870
|
userId,
|
|
658
|
-
provider:
|
|
659
|
-
model:
|
|
871
|
+
provider: resolvedProvider,
|
|
872
|
+
model: resolvedModel,
|
|
873
|
+
modality: multimodalModality,
|
|
660
874
|
});
|
|
661
875
|
}
|
|
662
876
|
return finalStatus === "matched";
|
|
877
|
+
};
|
|
878
|
+
let terminalError = null;
|
|
879
|
+
try {
|
|
880
|
+
return await runFastPathAttempt(extractionContent, "primary");
|
|
663
881
|
}
|
|
664
|
-
catch (
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
882
|
+
catch (primaryErr) {
|
|
883
|
+
terminalError = primaryErr;
|
|
884
|
+
}
|
|
885
|
+
if (isMultimodalFastPath && multimodalModality === "image") {
|
|
886
|
+
const retryMarker = await this.maybeBuildImageRetryMarker({
|
|
887
|
+
error: terminalError,
|
|
888
|
+
filePath,
|
|
889
|
+
filename,
|
|
890
|
+
provider: resolvedProvider,
|
|
891
|
+
model: resolvedModel,
|
|
892
|
+
phase: "rerun",
|
|
893
|
+
});
|
|
894
|
+
if (retryMarker) {
|
|
895
|
+
this.bumpImageReencodeRetryMetric("attempted", {
|
|
896
|
+
phase: "rerun",
|
|
897
|
+
provider: resolvedProvider,
|
|
898
|
+
model: resolvedModel,
|
|
899
|
+
filename,
|
|
673
900
|
});
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
learned_state: learnedState,
|
|
901
|
+
Actuator.logEvent(ingestionId, userId, "info", "Processing", {
|
|
902
|
+
action: "Retrying VLM with re-encoded image payload",
|
|
903
|
+
provider: resolvedProvider,
|
|
904
|
+
model: resolvedModel,
|
|
679
905
|
}, supabase);
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
906
|
+
try {
|
|
907
|
+
const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
|
|
908
|
+
this.bumpImageReencodeRetryMetric("succeeded", {
|
|
909
|
+
phase: "rerun",
|
|
910
|
+
provider: resolvedProvider,
|
|
911
|
+
model: resolvedModel,
|
|
912
|
+
filename,
|
|
913
|
+
});
|
|
914
|
+
Actuator.logEvent(ingestionId, userId, "analysis", "Processing", {
|
|
915
|
+
action: "VLM re-encoded image retry succeeded",
|
|
916
|
+
provider: resolvedProvider,
|
|
917
|
+
model: resolvedModel,
|
|
918
|
+
}, supabase);
|
|
919
|
+
return retryResult;
|
|
920
|
+
}
|
|
921
|
+
catch (retryErr) {
|
|
922
|
+
this.bumpImageReencodeRetryMetric("failed", {
|
|
923
|
+
phase: "rerun",
|
|
924
|
+
provider: resolvedProvider,
|
|
925
|
+
model: resolvedModel,
|
|
926
|
+
filename,
|
|
927
|
+
});
|
|
928
|
+
Actuator.logEvent(ingestionId, userId, "error", "Processing", {
|
|
929
|
+
action: "VLM re-encoded image retry failed",
|
|
930
|
+
provider: resolvedProvider,
|
|
931
|
+
model: resolvedModel,
|
|
932
|
+
error: this.errorToMessage(retryErr),
|
|
933
|
+
}, supabase);
|
|
934
|
+
terminalError = retryErr;
|
|
935
|
+
}
|
|
684
936
|
}
|
|
685
937
|
}
|
|
938
|
+
const msg = this.errorToMessage(terminalError);
|
|
939
|
+
if (isMultimodalFastPath && multimodalModality) {
|
|
940
|
+
const learnedState = await ModelCapabilityService.learnVisionFailure({
|
|
941
|
+
supabase,
|
|
942
|
+
userId,
|
|
943
|
+
provider: resolvedProvider,
|
|
944
|
+
model: resolvedModel,
|
|
945
|
+
error: terminalError,
|
|
946
|
+
modality: multimodalModality,
|
|
947
|
+
});
|
|
948
|
+
logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
|
|
949
|
+
Actuator.logEvent(ingestionId, userId, "error", "Processing", {
|
|
950
|
+
action: "VLM Failed, Fallback to Heavy",
|
|
951
|
+
error: msg,
|
|
952
|
+
learned_state: learnedState,
|
|
953
|
+
}, supabase);
|
|
954
|
+
isFastPath = false; // Trigger heavy path fallthrough
|
|
955
|
+
}
|
|
956
|
+
else {
|
|
957
|
+
throw terminalError instanceof Error ? terminalError : new Error(msg); // Re-throw to caller
|
|
958
|
+
}
|
|
686
959
|
}
|
|
687
960
|
// Re-delegate to rtx_activities
|
|
688
961
|
await supabase
|