@realtimex/folio 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  import fs from "fs/promises";
2
+ import { execFile } from "child_process";
3
+ import os from "os";
4
+ import path from "path";
2
5
  import { PDFParse } from "pdf-parse";
6
+ import { promisify } from "util";
3
7
  import { createLogger } from "../utils/logger.js";
4
8
  import { PolicyLoader } from "./PolicyLoader.js";
5
9
  import { PolicyEngine } from "./PolicyEngine.js";
@@ -11,6 +15,7 @@ import { RAGService } from "./RAGService.js";
11
15
  import { SDKService } from "./SDKService.js";
12
16
  import { ModelCapabilityService } from "./ModelCapabilityService.js";
13
17
  const logger = createLogger("IngestionService");
18
+ const execFileAsync = promisify(execFile);
14
19
  /**
15
20
  * Multi-signal classifier that decides whether pdf-parse extracted enough
16
21
  * real text to skip GPU OCR and go straight to the local LLM (Fast Path).
@@ -53,6 +58,15 @@ function isPdfTextExtractable(pdfData) {
53
58
  export class IngestionService {
54
59
  static FAST_EXTS = ["txt", "md", "csv", "json"];
55
60
  static IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"];
61
+ static IMAGE_REENCODE_TIMEOUT_MS = 15000;
62
+ static IMAGE_REENCODE_RETRY_ENABLED = (process.env.FOLIO_VLM_IMAGE_REENCODE_RETRY_ENABLED ?? "true").toLowerCase() !== "false";
63
+ static IMAGE_REENCODE_RETRY_METRICS = {
64
+ attempted: 0,
65
+ succeeded: 0,
66
+ failed: 0,
67
+ skipped_disabled: 0,
68
+ skipped_unavailable: 0,
69
+ };
56
70
  static NON_IDEMPOTENT_ACTION_TYPES = new Set([
57
71
  "append_to_google_sheet",
58
72
  "webhook",
@@ -172,6 +186,73 @@ export class IngestionService {
172
186
  const base64 = buffer.toString("base64");
173
187
  return `data:${mimeType};base64,${base64}`;
174
188
  }
189
+ static resolveIngestionLlmSettings(settingsRow) {
190
+ return {
191
+ llm_provider: settingsRow?.ingestion_llm_provider ?? settingsRow?.llm_provider ?? undefined,
192
+ llm_model: settingsRow?.ingestion_llm_model ?? settingsRow?.llm_model ?? undefined,
193
+ };
194
+ }
195
+ static errorToMessage(error) {
196
+ if (error instanceof Error)
197
+ return error.message;
198
+ if (typeof error === "string")
199
+ return error;
200
+ if (error && typeof error === "object") {
201
+ const candidate = error;
202
+ if (typeof candidate.message === "string")
203
+ return candidate.message;
204
+ }
205
+ return String(error ?? "");
206
+ }
207
+ static isInvalidModelError(error) {
208
+ const message = this.errorToMessage(error).toLowerCase();
209
+ return message.includes("invalid model");
210
+ }
211
+ static async reencodeImageToPngDataUrl(filePath) {
212
+ const tempOutputPath = path.join(os.tmpdir(), `folio-vlm-reencode-${Date.now()}-${Math.random().toString(16).slice(2)}.png`);
213
+ try {
214
+ await execFileAsync("sips", ["-s", "format", "png", filePath, "--out", tempOutputPath], {
215
+ timeout: this.IMAGE_REENCODE_TIMEOUT_MS,
216
+ maxBuffer: 1024 * 1024,
217
+ });
218
+ const pngBuffer = await fs.readFile(tempOutputPath);
219
+ return `data:image/png;base64,${pngBuffer.toString("base64")}`;
220
+ }
221
+ catch {
222
+ return null;
223
+ }
224
+ finally {
225
+ await fs.unlink(tempOutputPath).catch(() => undefined);
226
+ }
227
+ }
228
+ static async maybeBuildImageRetryMarker(opts) {
229
+ if (!this.isInvalidModelError(opts.error))
230
+ return null;
231
+ if (!this.IMAGE_REENCODE_RETRY_ENABLED) {
232
+ this.bumpImageReencodeRetryMetric("skipped_disabled", opts);
233
+ logger.info(`VLM ${opts.phase} retry skipped for ${opts.filename}: re-encode retry disabled (${opts.provider}/${opts.model}).`);
234
+ return null;
235
+ }
236
+ const retryDataUrl = await this.reencodeImageToPngDataUrl(opts.filePath);
237
+ if (!retryDataUrl) {
238
+ this.bumpImageReencodeRetryMetric("skipped_unavailable", opts);
239
+ logger.warn(`VLM ${opts.phase} retry skipped for ${opts.filename}: image re-encode unavailable (${opts.provider}/${opts.model}).`);
240
+ return null;
241
+ }
242
+ logger.warn(`VLM ${opts.phase} failed for ${opts.filename} with invalid model. Retrying once with re-encoded image payload (${opts.provider}/${opts.model}).`);
243
+ return this.buildVlmPayloadMarker("image", retryDataUrl);
244
+ }
245
+ static bumpImageReencodeRetryMetric(outcome, meta) {
246
+ this.IMAGE_REENCODE_RETRY_METRICS[outcome] += 1;
247
+ logger.info("VLM image re-encode retry metric", {
248
+ outcome,
249
+ phase: meta.phase,
250
+ provider: meta.provider,
251
+ model: meta.model,
252
+ filename: meta.filename,
253
+ counters: { ...this.IMAGE_REENCODE_RETRY_METRICS },
254
+ });
255
+ }
175
256
  /**
176
257
  * Ingest a document using Hybrid Routing Architecture.
177
258
  */
@@ -236,7 +317,7 @@ export class IngestionService {
236
317
  // Pre-fetch settings to decide whether we should attempt VLM.
237
318
  const { data: triageSettingsRow } = await supabase
238
319
  .from("user_settings")
239
- .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
320
+ .select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model, vision_model_capabilities")
240
321
  .eq("user_id", userId)
241
322
  .maybeSingle();
242
323
  const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
@@ -317,130 +398,191 @@ export class IngestionService {
317
398
  // 3. Fast Path — fetch all dependencies in parallel
318
399
  const [userPolicies, processingSettingsRow, baselineConfig] = await Promise.all([
319
400
  PolicyLoader.load(false, supabase),
320
- supabase.from("user_settings").select("llm_provider, llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
401
+ supabase.from("user_settings").select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
321
402
  BaselineConfigService.getActive(supabase, userId),
322
403
  ]);
323
- const llmSettings = {
324
- llm_provider: processingSettingsRow.data?.llm_provider ?? undefined,
325
- llm_model: processingSettingsRow.data?.llm_model ?? undefined,
326
- };
404
+ const llmSettings = this.resolveIngestionLlmSettings(processingSettingsRow.data);
327
405
  const embedSettings = {
328
406
  embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
329
407
  embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
330
408
  };
331
- const doc = { filePath: filePath, text: extractionContent, ingestionId: ingestion.id, userId, supabase };
332
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
333
- const baselineTrace = [];
334
- // Fire and forget Semantic Embedding Storage
335
- RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
336
- logger.error(`RAG embedding failed for ${ingestion.id}`, err);
337
- });
338
- // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
339
- baselineTrace.push({
340
- timestamp: new Date().toISOString(),
341
- step: "LLM request (baseline extraction)",
342
- details: {
343
- provider: llmSettings.llm_provider ?? llmProvider,
344
- model: llmSettings.llm_model ?? llmModel,
345
- mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
409
+ const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
410
+ const resolvedModel = llmSettings.llm_model ?? llmModel;
411
+ const runFastPathAttempt = async (attemptContent, attemptType) => {
412
+ const doc = { filePath: filePath, text: attemptContent, ingestionId: ingestion.id, userId, supabase };
413
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
414
+ const baselineTrace = [];
415
+ // Fire and forget Semantic Embedding Storage
416
+ RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
417
+ logger.error(`RAG embedding failed for ${ingestion.id}`, err);
418
+ });
419
+ // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
420
+ baselineTrace.push({
421
+ timestamp: new Date().toISOString(),
422
+ step: "LLM request (baseline extraction)",
423
+ details: {
424
+ provider: resolvedProvider,
425
+ model: resolvedModel,
426
+ mode: isMultimodalFastPath
427
+ ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
428
+ : "text",
429
+ }
430
+ });
431
+ const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
432
+ const baselineEntities = baselineResult.entities;
433
+ const autoTags = baselineResult.tags;
434
+ baselineTrace.push({
435
+ timestamp: new Date().toISOString(),
436
+ step: "LLM response (baseline extraction)",
437
+ details: {
438
+ entities_count: Object.keys(baselineEntities).length,
439
+ uncertain_count: baselineResult.uncertain_fields.length,
440
+ tags_count: autoTags.length,
441
+ }
442
+ });
443
+ // Enrich the document with extracted entities so policy keyword/semantic
444
+ // conditions can match against semantic field values (e.g. document_type:
445
+ // "invoice") even when those exact words don't appear in the raw text.
446
+ const entityLines = Object.entries(baselineEntities)
447
+ .filter(([, v]) => v != null)
448
+ .map(([k, v]) => `${k}: ${Array.isArray(v) ? v.join(", ") : String(v)}`);
449
+ const enrichedDoc = entityLines.length > 0
450
+ ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
451
+ : doc;
452
+ // 5. Stage 2: Policy matching + policy-specific field extraction
453
+ let result;
454
+ if (userPolicies.length > 0) {
455
+ result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
346
456
  }
347
- });
348
- const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
349
- const baselineEntities = baselineResult.entities;
350
- const autoTags = baselineResult.tags;
351
- baselineTrace.push({
352
- timestamp: new Date().toISOString(),
353
- step: "LLM response (baseline extraction)",
354
- details: {
355
- entities_count: Object.keys(baselineEntities).length,
356
- uncertain_count: baselineResult.uncertain_fields.length,
357
- tags_count: autoTags.length,
457
+ else {
458
+ result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
358
459
  }
359
- });
360
- // Enrich the document with extracted entities so policy keyword/semantic
361
- // conditions can match against semantic field values (e.g. document_type:
362
- // "invoice") even when those exact words don't appear in the raw text.
363
- const entityLines = Object.entries(baselineEntities)
364
- .filter(([, v]) => v != null)
365
- .map(([k, v]) => `${k}: ${Array.isArray(v) ? v.join(", ") : String(v)}`);
366
- const enrichedDoc = entityLines.length > 0
367
- ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
368
- : doc;
369
- // 5. Stage 2: Policy matching + policy-specific field extraction
370
- let result;
371
- if (userPolicies.length > 0) {
372
- result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
460
+ const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
461
+ const finalStatus = result.status === "fallback" ? "no_match" : result.status;
462
+ // Merge: baseline entities are the foundation; policy-specific fields
463
+ // are overlaid on top so more precise extractions take precedence.
464
+ const mergedExtracted = { ...baselineEntities, ...result.extractedData };
465
+ let finalTrace = [...baselineTrace, ...(result.trace || [])];
466
+ const { data: updatedIngestion } = await supabase
467
+ .from("ingestions")
468
+ .update({
469
+ status: finalStatus,
470
+ policy_id: result.matchedPolicy,
471
+ policy_name: policyName,
472
+ extracted: mergedExtracted,
473
+ actions_taken: result.actionsExecuted,
474
+ trace: finalTrace,
475
+ tags: autoTags,
476
+ baseline_config_id: baselineConfig?.id ?? null,
477
+ })
478
+ .eq("id", ingestion.id)
479
+ .select()
480
+ .single();
481
+ if (isMultimodalFastPath && multimodalModality) {
482
+ const embeddingMeta = this.queueVlmSemanticEmbedding({
483
+ ingestionId: ingestion.id,
484
+ userId,
485
+ filename,
486
+ finalStatus,
487
+ policyName,
488
+ extracted: mergedExtracted,
489
+ tags: autoTags,
490
+ modality: multimodalModality,
491
+ supabase,
492
+ embedSettings,
493
+ });
494
+ finalTrace = [
495
+ ...finalTrace,
496
+ {
497
+ timestamp: new Date().toISOString(),
498
+ step: "Queued synthetic VLM embedding",
499
+ details: embeddingMeta,
500
+ }
501
+ ];
502
+ await supabase
503
+ .from("ingestions")
504
+ .update({ trace: finalTrace })
505
+ .eq("id", ingestion.id);
506
+ }
507
+ if (isMultimodalFastPath && multimodalModality) {
508
+ await ModelCapabilityService.learnVisionSuccess({
509
+ supabase,
510
+ userId,
511
+ provider: resolvedProvider,
512
+ model: resolvedModel,
513
+ modality: multimodalModality,
514
+ });
515
+ }
516
+ return updatedIngestion;
517
+ };
518
+ let terminalError = null;
519
+ try {
520
+ return await runFastPathAttempt(extractionContent, "primary");
373
521
  }
374
- else {
375
- result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
522
+ catch (primaryErr) {
523
+ terminalError = primaryErr;
376
524
  }
377
- const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
378
- const finalStatus = result.status === "fallback" ? "no_match" : result.status;
379
- // Merge: baseline entities are the foundation; policy-specific fields
380
- // are overlaid on top so more precise extractions take precedence.
381
- const mergedExtracted = { ...baselineEntities, ...result.extractedData };
382
- let finalTrace = [...baselineTrace, ...(result.trace || [])];
383
- const { data: updatedIngestion } = await supabase
384
- .from("ingestions")
385
- .update({
386
- status: finalStatus,
387
- policy_id: result.matchedPolicy,
388
- policy_name: policyName,
389
- extracted: mergedExtracted,
390
- actions_taken: result.actionsExecuted,
391
- trace: finalTrace,
392
- tags: autoTags,
393
- baseline_config_id: baselineConfig?.id ?? null,
394
- })
395
- .eq("id", ingestion.id)
396
- .select()
397
- .single();
398
- if (isMultimodalFastPath && multimodalModality) {
399
- const embeddingMeta = this.queueVlmSemanticEmbedding({
400
- ingestionId: ingestion.id,
401
- userId,
525
+ if (isMultimodalFastPath && multimodalModality === "image") {
526
+ const retryMarker = await this.maybeBuildImageRetryMarker({
527
+ error: terminalError,
528
+ filePath,
402
529
  filename,
403
- finalStatus,
404
- policyName,
405
- extracted: mergedExtracted,
406
- tags: autoTags,
407
- modality: multimodalModality,
408
- supabase,
409
- embedSettings,
530
+ provider: resolvedProvider,
531
+ model: resolvedModel,
532
+ phase: "ingest",
410
533
  });
411
- finalTrace = [
412
- ...finalTrace,
413
- {
414
- timestamp: new Date().toISOString(),
415
- step: "Queued synthetic VLM embedding",
416
- details: embeddingMeta,
534
+ if (retryMarker) {
535
+ this.bumpImageReencodeRetryMetric("attempted", {
536
+ phase: "ingest",
537
+ provider: resolvedProvider,
538
+ model: resolvedModel,
539
+ filename,
540
+ });
541
+ Actuator.logEvent(ingestion.id, userId, "info", "Processing", {
542
+ action: "Retrying VLM with re-encoded image payload",
543
+ provider: resolvedProvider,
544
+ model: resolvedModel,
545
+ }, supabase);
546
+ try {
547
+ const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
548
+ this.bumpImageReencodeRetryMetric("succeeded", {
549
+ phase: "ingest",
550
+ provider: resolvedProvider,
551
+ model: resolvedModel,
552
+ filename,
553
+ });
554
+ Actuator.logEvent(ingestion.id, userId, "analysis", "Processing", {
555
+ action: "VLM re-encoded image retry succeeded",
556
+ provider: resolvedProvider,
557
+ model: resolvedModel,
558
+ }, supabase);
559
+ return retryResult;
417
560
  }
418
- ];
419
- await supabase
420
- .from("ingestions")
421
- .update({ trace: finalTrace })
422
- .eq("id", ingestion.id);
423
- }
424
- if (isMultimodalFastPath && multimodalModality) {
425
- await ModelCapabilityService.learnVisionSuccess({
426
- supabase,
427
- userId,
428
- provider: llmSettings.llm_provider ?? llmProvider,
429
- model: llmSettings.llm_model ?? llmModel,
430
- modality: multimodalModality,
431
- });
561
+ catch (retryErr) {
562
+ this.bumpImageReencodeRetryMetric("failed", {
563
+ phase: "ingest",
564
+ provider: resolvedProvider,
565
+ model: resolvedModel,
566
+ filename,
567
+ });
568
+ Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
569
+ action: "VLM re-encoded image retry failed",
570
+ provider: resolvedProvider,
571
+ model: resolvedModel,
572
+ error: this.errorToMessage(retryErr),
573
+ }, supabase);
574
+ terminalError = retryErr;
575
+ }
576
+ }
432
577
  }
433
- return updatedIngestion;
434
- }
435
- catch (err) {
436
- const msg = err instanceof Error ? err.message : String(err);
578
+ const msg = this.errorToMessage(terminalError);
437
579
  if (isMultimodalFastPath && multimodalModality) {
438
580
  const learnedState = await ModelCapabilityService.learnVisionFailure({
439
581
  supabase,
440
582
  userId,
441
- provider: llmProvider,
442
- model: llmModel,
443
- error: err,
583
+ provider: resolvedProvider,
584
+ model: resolvedModel,
585
+ error: terminalError,
444
586
  modality: multimodalModality,
445
587
  });
446
588
  logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
@@ -463,6 +605,17 @@ export class IngestionService {
463
605
  return updatedIngestion;
464
606
  }
465
607
  }
608
+ catch (err) {
609
+ const msg = this.errorToMessage(err);
610
+ Actuator.logEvent(ingestion.id, userId, "error", "Processing", { error: msg }, supabase);
611
+ const { data: updatedIngestion } = await supabase
612
+ .from("ingestions")
613
+ .update({ status: "error", error_message: msg })
614
+ .eq("id", ingestion.id)
615
+ .select()
616
+ .single();
617
+ return updatedIngestion;
618
+ }
466
619
  }
467
620
  // 4. Heavy Path (Delegate to RealTimeX)
468
621
  const { error: rtxErr } = await supabase
@@ -518,7 +671,7 @@ export class IngestionService {
518
671
  const ext = filename.toLowerCase().split('.').pop() || '';
519
672
  const { data: triageSettingsRow } = await supabase
520
673
  .from("user_settings")
521
- .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
674
+ .select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model, vision_model_capabilities")
522
675
  .eq("user_id", userId)
523
676
  .maybeSingle();
524
677
  const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
@@ -595,55 +748,55 @@ export class IngestionService {
595
748
  if (isFastPath) {
596
749
  const [userPolicies, processingSettingsRow, baselineConfig] = await Promise.all([
597
750
  PolicyLoader.load(false, supabase),
598
- supabase.from("user_settings").select("llm_provider, llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
751
+ supabase.from("user_settings").select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
599
752
  BaselineConfigService.getActive(supabase, userId),
600
753
  ]);
601
- const llmSettings = {
602
- llm_provider: processingSettingsRow.data?.llm_provider ?? undefined,
603
- llm_model: processingSettingsRow.data?.llm_model ?? undefined,
604
- };
754
+ const llmSettings = this.resolveIngestionLlmSettings(processingSettingsRow.data);
605
755
  const embedSettings = {
606
756
  embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
607
757
  embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
608
758
  };
609
- const doc = { filePath, text: extractionContent, ingestionId, userId, supabase };
610
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
611
- const baselineTrace = [];
612
- // Fire and forget Semantic Embedding Storage for re-runs
613
- RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
614
- logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
615
- });
616
- baselineTrace.push({
617
- timestamp: new Date().toISOString(),
618
- step: "LLM request (baseline extraction)",
619
- details: {
620
- provider: llmSettings.llm_provider ?? llmProvider,
621
- model: llmSettings.llm_model ?? llmModel,
622
- mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
623
- }
624
- });
625
- const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
626
- const baselineEntities = baselineResult.entities;
627
- const autoTags = baselineResult.tags;
628
- baselineTrace.push({
629
- timestamp: new Date().toISOString(),
630
- step: "LLM response (baseline extraction)",
631
- details: {
632
- entities_count: Object.keys(baselineEntities).length,
633
- uncertain_count: baselineResult.uncertain_fields.length,
634
- tags_count: autoTags.length,
635
- }
636
- });
637
- const entityLines = Object.entries(baselineEntities)
638
- .filter(([, v]) => v != null)
639
- .map(([k, v]) => `${k}: ${Array.isArray(v) ? v.join(", ") : String(v)}`);
640
- const enrichedDoc = entityLines.length > 0
641
- ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
642
- : doc;
643
- let finalStatus = "no_match";
644
- let result;
645
- let policyName;
646
- try {
759
+ const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
760
+ const resolvedModel = llmSettings.llm_model ?? llmModel;
761
+ const runFastPathAttempt = async (attemptContent, attemptType) => {
762
+ const doc = { filePath, text: attemptContent, ingestionId, userId, supabase };
763
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
764
+ const baselineTrace = [];
765
+ // Fire and forget Semantic Embedding Storage for re-runs
766
+ RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
767
+ logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
768
+ });
769
+ baselineTrace.push({
770
+ timestamp: new Date().toISOString(),
771
+ step: "LLM request (baseline extraction)",
772
+ details: {
773
+ provider: resolvedProvider,
774
+ model: resolvedModel,
775
+ mode: isMultimodalFastPath
776
+ ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
777
+ : "text",
778
+ }
779
+ });
780
+ const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
781
+ const baselineEntities = baselineResult.entities;
782
+ const autoTags = baselineResult.tags;
783
+ baselineTrace.push({
784
+ timestamp: new Date().toISOString(),
785
+ step: "LLM response (baseline extraction)",
786
+ details: {
787
+ entities_count: Object.keys(baselineEntities).length,
788
+ uncertain_count: baselineResult.uncertain_fields.length,
789
+ tags_count: autoTags.length,
790
+ }
791
+ });
792
+ const entityLines = Object.entries(baselineEntities)
793
+ .filter(([, v]) => v != null)
794
+ .map(([k, v]) => `${k}: ${Array.isArray(v) ? v.join(", ") : String(v)}`);
795
+ const enrichedDoc = entityLines.length > 0
796
+ ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
797
+ : doc;
798
+ let finalStatus = "no_match";
799
+ let result;
647
800
  const forcedPolicyId = opts.forcedPolicyId?.trim();
648
801
  const activePolicies = forcedPolicyId
649
802
  ? userPolicies.filter((policy) => policy.metadata.id === forcedPolicyId)
@@ -660,7 +813,7 @@ export class IngestionService {
660
813
  else {
661
814
  result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
662
815
  }
663
- policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
816
+ const policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
664
817
  finalStatus = result.status === "fallback" ? "no_match" : result.status;
665
818
  const mergedExtracted = { ...baselineEntities, ...result.extractedData };
666
819
  // Preserve any human-added tags; merge with freshly generated auto-tags.
@@ -715,36 +868,94 @@ export class IngestionService {
715
868
  await ModelCapabilityService.learnVisionSuccess({
716
869
  supabase,
717
870
  userId,
718
- provider: llmSettings.llm_provider ?? llmProvider,
719
- model: llmSettings.llm_model ?? llmModel,
871
+ provider: resolvedProvider,
872
+ model: resolvedModel,
720
873
  modality: multimodalModality,
721
874
  });
722
875
  }
723
876
  return finalStatus === "matched";
877
+ };
878
+ let terminalError = null;
879
+ try {
880
+ return await runFastPathAttempt(extractionContent, "primary");
724
881
  }
725
- catch (err) {
726
- const msg = err instanceof Error ? err.message : String(err);
727
- if (isMultimodalFastPath && multimodalModality) {
728
- const learnedState = await ModelCapabilityService.learnVisionFailure({
729
- supabase,
730
- userId,
731
- provider: llmProvider,
732
- model: llmModel,
733
- error: err,
734
- modality: multimodalModality,
882
+ catch (primaryErr) {
883
+ terminalError = primaryErr;
884
+ }
885
+ if (isMultimodalFastPath && multimodalModality === "image") {
886
+ const retryMarker = await this.maybeBuildImageRetryMarker({
887
+ error: terminalError,
888
+ filePath,
889
+ filename,
890
+ provider: resolvedProvider,
891
+ model: resolvedModel,
892
+ phase: "rerun",
893
+ });
894
+ if (retryMarker) {
895
+ this.bumpImageReencodeRetryMetric("attempted", {
896
+ phase: "rerun",
897
+ provider: resolvedProvider,
898
+ model: resolvedModel,
899
+ filename,
735
900
  });
736
- logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
737
- Actuator.logEvent(ingestionId, userId, "error", "Processing", {
738
- action: "VLM Failed, Fallback to Heavy",
739
- error: msg,
740
- learned_state: learnedState,
901
+ Actuator.logEvent(ingestionId, userId, "info", "Processing", {
902
+ action: "Retrying VLM with re-encoded image payload",
903
+ provider: resolvedProvider,
904
+ model: resolvedModel,
741
905
  }, supabase);
742
- isFastPath = false; // Trigger heavy path fallthrough
743
- }
744
- else {
745
- throw err; // Re-throw to caller
906
+ try {
907
+ const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
908
+ this.bumpImageReencodeRetryMetric("succeeded", {
909
+ phase: "rerun",
910
+ provider: resolvedProvider,
911
+ model: resolvedModel,
912
+ filename,
913
+ });
914
+ Actuator.logEvent(ingestionId, userId, "analysis", "Processing", {
915
+ action: "VLM re-encoded image retry succeeded",
916
+ provider: resolvedProvider,
917
+ model: resolvedModel,
918
+ }, supabase);
919
+ return retryResult;
920
+ }
921
+ catch (retryErr) {
922
+ this.bumpImageReencodeRetryMetric("failed", {
923
+ phase: "rerun",
924
+ provider: resolvedProvider,
925
+ model: resolvedModel,
926
+ filename,
927
+ });
928
+ Actuator.logEvent(ingestionId, userId, "error", "Processing", {
929
+ action: "VLM re-encoded image retry failed",
930
+ provider: resolvedProvider,
931
+ model: resolvedModel,
932
+ error: this.errorToMessage(retryErr),
933
+ }, supabase);
934
+ terminalError = retryErr;
935
+ }
746
936
  }
747
937
  }
938
+ const msg = this.errorToMessage(terminalError);
939
+ if (isMultimodalFastPath && multimodalModality) {
940
+ const learnedState = await ModelCapabilityService.learnVisionFailure({
941
+ supabase,
942
+ userId,
943
+ provider: resolvedProvider,
944
+ model: resolvedModel,
945
+ error: terminalError,
946
+ modality: multimodalModality,
947
+ });
948
+ logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
949
+ Actuator.logEvent(ingestionId, userId, "error", "Processing", {
950
+ action: "VLM Failed, Fallback to Heavy",
951
+ error: msg,
952
+ learned_state: learnedState,
953
+ }, supabase);
954
+ isFastPath = false; // Trigger heavy path fallthrough
955
+ }
956
+ else {
957
+ throw terminalError instanceof Error ? terminalError : new Error(msg); // Re-throw to caller
958
+ }
748
959
  }
749
960
  // Re-delegate to rtx_activities
750
961
  await supabase