@realtimex/folio 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
1
  import type { SupabaseClient } from "@supabase/supabase-js";
2
2
  import fs from "fs/promises";
3
+ import { execFile } from "child_process";
4
+ import os from "os";
5
+ import path from "path";
3
6
  import { PDFParse } from "pdf-parse";
7
+ import { promisify } from "util";
4
8
  import { createLogger } from "../utils/logger.js";
5
9
  import { PolicyLoader } from "./PolicyLoader.js";
6
10
  import type { FolioPolicy } from "./PolicyLoader.js";
@@ -14,6 +18,7 @@ import { SDKService } from "./SDKService.js";
14
18
  import { ModelCapabilityService, type VisionCapabilityModality } from "./ModelCapabilityService.js";
15
19
 
16
20
  const logger = createLogger("IngestionService");
21
+ const execFileAsync = promisify(execFile);
17
22
 
18
23
  /**
19
24
  * Multi-signal classifier that decides whether pdf-parse extracted enough
@@ -91,6 +96,15 @@ export interface Ingestion {
91
96
  export class IngestionService {
92
97
  private static readonly FAST_EXTS = ["txt", "md", "csv", "json"] as const;
93
98
  private static readonly IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"] as const;
99
+ private static readonly IMAGE_REENCODE_TIMEOUT_MS = 15000;
100
+ private static readonly IMAGE_REENCODE_RETRY_ENABLED = (process.env.FOLIO_VLM_IMAGE_REENCODE_RETRY_ENABLED ?? "true").toLowerCase() !== "false";
101
+ private static readonly IMAGE_REENCODE_RETRY_METRICS = {
102
+ attempted: 0,
103
+ succeeded: 0,
104
+ failed: 0,
105
+ skipped_disabled: 0,
106
+ skipped_unavailable: 0,
107
+ };
94
108
 
95
109
  private static readonly NON_IDEMPOTENT_ACTION_TYPES = new Set([
96
110
  "append_to_google_sheet",
@@ -248,6 +262,102 @@ export class IngestionService {
248
262
  return `data:${mimeType};base64,${base64}`;
249
263
  }
250
264
 
265
+ static resolveIngestionLlmSettings(settingsRow: {
266
+ llm_provider?: string | null;
267
+ llm_model?: string | null;
268
+ ingestion_llm_provider?: string | null;
269
+ ingestion_llm_model?: string | null;
270
+ } | null | undefined): { llm_provider?: string; llm_model?: string } {
271
+ return {
272
+ llm_provider: settingsRow?.ingestion_llm_provider ?? settingsRow?.llm_provider ?? undefined,
273
+ llm_model: settingsRow?.ingestion_llm_model ?? settingsRow?.llm_model ?? undefined,
274
+ };
275
+ }
276
+
277
+ private static errorToMessage(error: unknown): string {
278
+ if (error instanceof Error) return error.message;
279
+ if (typeof error === "string") return error;
280
+ if (error && typeof error === "object") {
281
+ const candidate = error as Record<string, unknown>;
282
+ if (typeof candidate.message === "string") return candidate.message;
283
+ }
284
+ return String(error ?? "");
285
+ }
286
+
287
+ private static isInvalidModelError(error: unknown): boolean {
288
+ const message = this.errorToMessage(error).toLowerCase();
289
+ return message.includes("invalid model");
290
+ }
291
+
292
+ private static async reencodeImageToPngDataUrl(filePath: string): Promise<string | null> {
293
+ const tempOutputPath = path.join(
294
+ os.tmpdir(),
295
+ `folio-vlm-reencode-${Date.now()}-${Math.random().toString(16).slice(2)}.png`
296
+ );
297
+ try {
298
+ await execFileAsync("sips", ["-s", "format", "png", filePath, "--out", tempOutputPath], {
299
+ timeout: this.IMAGE_REENCODE_TIMEOUT_MS,
300
+ maxBuffer: 1024 * 1024,
301
+ });
302
+ const pngBuffer = await fs.readFile(tempOutputPath);
303
+ return `data:image/png;base64,${pngBuffer.toString("base64")}`;
304
+ } catch {
305
+ return null;
306
+ } finally {
307
+ await fs.unlink(tempOutputPath).catch(() => undefined);
308
+ }
309
+ }
310
+
311
+ private static async maybeBuildImageRetryMarker(opts: {
312
+ error: unknown;
313
+ filePath: string;
314
+ filename: string;
315
+ provider: string;
316
+ model: string;
317
+ phase: "ingest" | "rerun";
318
+ }): Promise<string | null> {
319
+ if (!this.isInvalidModelError(opts.error)) return null;
320
+ if (!this.IMAGE_REENCODE_RETRY_ENABLED) {
321
+ this.bumpImageReencodeRetryMetric("skipped_disabled", opts);
322
+ logger.info(
323
+ `VLM ${opts.phase} retry skipped for ${opts.filename}: re-encode retry disabled (${opts.provider}/${opts.model}).`
324
+ );
325
+ return null;
326
+ }
327
+ const retryDataUrl = await this.reencodeImageToPngDataUrl(opts.filePath);
328
+ if (!retryDataUrl) {
329
+ this.bumpImageReencodeRetryMetric("skipped_unavailable", opts);
330
+ logger.warn(
331
+ `VLM ${opts.phase} retry skipped for ${opts.filename}: image re-encode unavailable (${opts.provider}/${opts.model}).`
332
+ );
333
+ return null;
334
+ }
335
+ logger.warn(
336
+ `VLM ${opts.phase} failed for ${opts.filename} with invalid model. Retrying once with re-encoded image payload (${opts.provider}/${opts.model}).`
337
+ );
338
+ return this.buildVlmPayloadMarker("image", retryDataUrl);
339
+ }
340
+
341
+ private static bumpImageReencodeRetryMetric(
342
+ outcome: keyof typeof IngestionService.IMAGE_REENCODE_RETRY_METRICS,
343
+ meta: {
344
+ phase: "ingest" | "rerun";
345
+ provider: string;
346
+ model: string;
347
+ filename: string;
348
+ }
349
+ ): void {
350
+ this.IMAGE_REENCODE_RETRY_METRICS[outcome] += 1;
351
+ logger.info("VLM image re-encode retry metric", {
352
+ outcome,
353
+ phase: meta.phase,
354
+ provider: meta.provider,
355
+ model: meta.model,
356
+ filename: meta.filename,
357
+ counters: { ...this.IMAGE_REENCODE_RETRY_METRICS },
358
+ });
359
+ }
360
+
251
361
  /**
252
362
  * Ingest a document using Hybrid Routing Architecture.
253
363
  */
@@ -328,7 +438,7 @@ export class IngestionService {
328
438
  // Pre-fetch settings to decide whether we should attempt VLM.
329
439
  const { data: triageSettingsRow } = await supabase
330
440
  .from("user_settings")
331
- .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
441
+ .select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model, vision_model_capabilities")
332
442
  .eq("user_id", userId)
333
443
  .maybeSingle();
334
444
  const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
@@ -404,145 +514,210 @@ export class IngestionService {
404
514
  // 3. Fast Path — fetch all dependencies in parallel
405
515
  const [userPolicies, processingSettingsRow, baselineConfig] = await Promise.all([
406
516
  PolicyLoader.load(false, supabase),
407
- supabase.from("user_settings").select("llm_provider, llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
517
+ supabase.from("user_settings").select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
408
518
  BaselineConfigService.getActive(supabase, userId),
409
519
  ]);
410
- const llmSettings = {
411
- llm_provider: processingSettingsRow.data?.llm_provider ?? undefined,
412
- llm_model: processingSettingsRow.data?.llm_model ?? undefined,
413
- };
520
+ const llmSettings = this.resolveIngestionLlmSettings(processingSettingsRow.data);
414
521
  const embedSettings = {
415
522
  embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
416
523
  embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
417
524
  };
418
- const doc = { filePath: filePath, text: extractionContent, ingestionId: ingestion.id, userId, supabase };
419
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
420
- const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
525
+ const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
526
+ const resolvedModel = llmSettings.llm_model ?? llmModel;
527
+
528
+ const runFastPathAttempt = async (
529
+ attemptContent: string,
530
+ attemptType: "primary" | "reencoded_image_retry"
531
+ ): Promise<Ingestion> => {
532
+ const doc = { filePath: filePath, text: attemptContent, ingestionId: ingestion.id, userId, supabase };
533
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
534
+ const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
535
+
536
+ // Fire and forget Semantic Embedding Storage
537
+ RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
538
+ logger.error(`RAG embedding failed for ${ingestion.id}`, err);
539
+ });
421
540
 
422
- // Fire and forget Semantic Embedding Storage
423
- RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
424
- logger.error(`RAG embedding failed for ${ingestion.id}`, err);
425
- });
541
+ // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
542
+ baselineTrace.push({
543
+ timestamp: new Date().toISOString(),
544
+ step: "LLM request (baseline extraction)",
545
+ details: {
546
+ provider: resolvedProvider,
547
+ model: resolvedModel,
548
+ mode: isMultimodalFastPath
549
+ ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
550
+ : "text",
551
+ }
552
+ });
426
553
 
427
- // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
428
- baselineTrace.push({
429
- timestamp: new Date().toISOString(),
430
- step: "LLM request (baseline extraction)",
431
- details: {
432
- provider: llmSettings.llm_provider ?? llmProvider,
433
- model: llmSettings.llm_model ?? llmModel,
434
- mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
435
- }
436
- });
554
+ const baselineResult = await PolicyEngine.extractBaseline(
555
+ doc,
556
+ { context: baselineConfig?.context, fields: baselineConfig?.fields },
557
+ llmSettings
558
+ );
559
+ const baselineEntities = baselineResult.entities;
560
+ const autoTags = baselineResult.tags;
561
+ baselineTrace.push({
562
+ timestamp: new Date().toISOString(),
563
+ step: "LLM response (baseline extraction)",
564
+ details: {
565
+ entities_count: Object.keys(baselineEntities).length,
566
+ uncertain_count: baselineResult.uncertain_fields.length,
567
+ tags_count: autoTags.length,
568
+ }
569
+ });
437
570
 
438
- const baselineResult = await PolicyEngine.extractBaseline(
439
- doc,
440
- { context: baselineConfig?.context, fields: baselineConfig?.fields },
441
- llmSettings
442
- );
443
- const baselineEntities = baselineResult.entities;
444
- const autoTags = baselineResult.tags;
445
- baselineTrace.push({
446
- timestamp: new Date().toISOString(),
447
- step: "LLM response (baseline extraction)",
448
- details: {
449
- entities_count: Object.keys(baselineEntities).length,
450
- uncertain_count: baselineResult.uncertain_fields.length,
451
- tags_count: autoTags.length,
571
+ // Enrich the document with extracted entities so policy keyword/semantic
572
+ // conditions can match against semantic field values (e.g. document_type:
573
+ // "invoice") even when those exact words don't appear in the raw text.
574
+ const entityLines = Object.entries(baselineEntities)
575
+ .filter(([, v]) => v != null)
576
+ .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
577
+ const enrichedDoc = entityLines.length > 0
578
+ ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
579
+ : doc;
580
+
581
+ // 5. Stage 2: Policy matching + policy-specific field extraction
582
+ let result;
583
+ if (userPolicies.length > 0) {
584
+ result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
585
+ } else {
586
+ result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
452
587
  }
453
- });
454
588
 
455
- // Enrich the document with extracted entities so policy keyword/semantic
456
- // conditions can match against semantic field values (e.g. document_type:
457
- // "invoice") even when those exact words don't appear in the raw text.
458
- const entityLines = Object.entries(baselineEntities)
459
- .filter(([, v]) => v != null)
460
- .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
461
- const enrichedDoc = entityLines.length > 0
462
- ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
463
- : doc;
589
+ const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
590
+ const finalStatus = result.status === "fallback" ? "no_match" : result.status;
464
591
 
465
- // 5. Stage 2: Policy matching + policy-specific field extraction
466
- let result;
467
- if (userPolicies.length > 0) {
468
- result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
469
- } else {
470
- result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
471
- }
592
+ // Merge: baseline entities are the foundation; policy-specific fields
593
+ // are overlaid on top so more precise extractions take precedence.
594
+ const mergedExtracted = { ...baselineEntities, ...result.extractedData };
595
+ let finalTrace = [...baselineTrace, ...(result.trace || [])];
472
596
 
473
- const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
474
- const finalStatus = result.status === "fallback" ? "no_match" : result.status;
597
+ const { data: updatedIngestion } = await supabase
598
+ .from("ingestions")
599
+ .update({
600
+ status: finalStatus,
601
+ policy_id: result.matchedPolicy,
602
+ policy_name: policyName,
603
+ extracted: mergedExtracted,
604
+ actions_taken: result.actionsExecuted,
605
+ trace: finalTrace,
606
+ tags: autoTags,
607
+ baseline_config_id: baselineConfig?.id ?? null,
608
+ })
609
+ .eq("id", ingestion.id)
610
+ .select()
611
+ .single();
475
612
 
476
- // Merge: baseline entities are the foundation; policy-specific fields
477
- // are overlaid on top so more precise extractions take precedence.
478
- const mergedExtracted = { ...baselineEntities, ...result.extractedData };
479
- let finalTrace = [...baselineTrace, ...(result.trace || [])];
613
+ if (isMultimodalFastPath && multimodalModality) {
614
+ const embeddingMeta = this.queueVlmSemanticEmbedding({
615
+ ingestionId: ingestion.id,
616
+ userId,
617
+ filename,
618
+ finalStatus,
619
+ policyName,
620
+ extracted: mergedExtracted,
621
+ tags: autoTags,
622
+ modality: multimodalModality,
623
+ supabase,
624
+ embedSettings,
625
+ });
626
+ finalTrace = [
627
+ ...finalTrace,
628
+ {
629
+ timestamp: new Date().toISOString(),
630
+ step: "Queued synthetic VLM embedding",
631
+ details: embeddingMeta,
632
+ }
633
+ ];
634
+ await supabase
635
+ .from("ingestions")
636
+ .update({ trace: finalTrace })
637
+ .eq("id", ingestion.id);
638
+ }
480
639
 
481
- const { data: updatedIngestion } = await supabase
482
- .from("ingestions")
483
- .update({
484
- status: finalStatus,
485
- policy_id: result.matchedPolicy,
486
- policy_name: policyName,
487
- extracted: mergedExtracted,
488
- actions_taken: result.actionsExecuted,
489
- trace: finalTrace,
490
- tags: autoTags,
491
- baseline_config_id: baselineConfig?.id ?? null,
492
- })
493
- .eq("id", ingestion.id)
494
- .select()
495
- .single();
640
+ if (isMultimodalFastPath && multimodalModality) {
641
+ await ModelCapabilityService.learnVisionSuccess({
642
+ supabase,
643
+ userId,
644
+ provider: resolvedProvider,
645
+ model: resolvedModel,
646
+ modality: multimodalModality,
647
+ });
648
+ }
496
649
 
497
- if (isMultimodalFastPath && multimodalModality) {
498
- const embeddingMeta = this.queueVlmSemanticEmbedding({
499
- ingestionId: ingestion.id,
500
- userId,
501
- filename,
502
- finalStatus,
503
- policyName,
504
- extracted: mergedExtracted,
505
- tags: autoTags,
506
- modality: multimodalModality,
507
- supabase,
508
- embedSettings,
509
- });
510
- finalTrace = [
511
- ...finalTrace,
512
- {
513
- timestamp: new Date().toISOString(),
514
- step: "Queued synthetic VLM embedding",
515
- details: embeddingMeta,
516
- }
517
- ];
518
- await supabase
519
- .from("ingestions")
520
- .update({ trace: finalTrace })
521
- .eq("id", ingestion.id);
650
+ return updatedIngestion as Ingestion;
651
+ };
652
+
653
+ let terminalError: unknown = null;
654
+ try {
655
+ return await runFastPathAttempt(extractionContent, "primary");
656
+ } catch (primaryErr) {
657
+ terminalError = primaryErr;
522
658
  }
523
659
 
524
- if (isMultimodalFastPath && multimodalModality) {
525
- await ModelCapabilityService.learnVisionSuccess({
526
- supabase,
527
- userId,
528
- provider: llmSettings.llm_provider ?? llmProvider,
529
- model: llmSettings.llm_model ?? llmModel,
530
- modality: multimodalModality,
660
+ if (isMultimodalFastPath && multimodalModality === "image") {
661
+ const retryMarker = await this.maybeBuildImageRetryMarker({
662
+ error: terminalError,
663
+ filePath,
664
+ filename,
665
+ provider: resolvedProvider,
666
+ model: resolvedModel,
667
+ phase: "ingest",
531
668
  });
669
+ if (retryMarker) {
670
+ this.bumpImageReencodeRetryMetric("attempted", {
671
+ phase: "ingest",
672
+ provider: resolvedProvider,
673
+ model: resolvedModel,
674
+ filename,
675
+ });
676
+ Actuator.logEvent(ingestion.id, userId, "info", "Processing", {
677
+ action: "Retrying VLM with re-encoded image payload",
678
+ provider: resolvedProvider,
679
+ model: resolvedModel,
680
+ }, supabase);
681
+ try {
682
+ const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
683
+ this.bumpImageReencodeRetryMetric("succeeded", {
684
+ phase: "ingest",
685
+ provider: resolvedProvider,
686
+ model: resolvedModel,
687
+ filename,
688
+ });
689
+ Actuator.logEvent(ingestion.id, userId, "analysis", "Processing", {
690
+ action: "VLM re-encoded image retry succeeded",
691
+ provider: resolvedProvider,
692
+ model: resolvedModel,
693
+ }, supabase);
694
+ return retryResult;
695
+ } catch (retryErr) {
696
+ this.bumpImageReencodeRetryMetric("failed", {
697
+ phase: "ingest",
698
+ provider: resolvedProvider,
699
+ model: resolvedModel,
700
+ filename,
701
+ });
702
+ Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
703
+ action: "VLM re-encoded image retry failed",
704
+ provider: resolvedProvider,
705
+ model: resolvedModel,
706
+ error: this.errorToMessage(retryErr),
707
+ }, supabase);
708
+ terminalError = retryErr;
709
+ }
710
+ }
532
711
  }
533
712
 
534
- return updatedIngestion as Ingestion;
535
-
536
- } catch (err) {
537
- const msg = err instanceof Error ? err.message : String(err);
538
-
713
+ const msg = this.errorToMessage(terminalError);
539
714
  if (isMultimodalFastPath && multimodalModality) {
540
715
  const learnedState = await ModelCapabilityService.learnVisionFailure({
541
716
  supabase,
542
717
  userId,
543
- provider: llmProvider,
544
- model: llmModel,
545
- error: err,
718
+ provider: resolvedProvider,
719
+ model: resolvedModel,
720
+ error: terminalError,
546
721
  modality: multimodalModality,
547
722
  });
548
723
  logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
@@ -563,6 +738,16 @@ export class IngestionService {
563
738
  .single();
564
739
  return updatedIngestion as Ingestion;
565
740
  }
741
+ } catch (err) {
742
+ const msg = this.errorToMessage(err);
743
+ Actuator.logEvent(ingestion.id, userId, "error", "Processing", { error: msg }, supabase);
744
+ const { data: updatedIngestion } = await supabase
745
+ .from("ingestions")
746
+ .update({ status: "error", error_message: msg })
747
+ .eq("id", ingestion.id)
748
+ .select()
749
+ .single();
750
+ return updatedIngestion as Ingestion;
566
751
  }
567
752
  }
568
753
 
@@ -633,7 +818,7 @@ export class IngestionService {
633
818
 
634
819
  const { data: triageSettingsRow } = await supabase
635
820
  .from("user_settings")
636
- .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
821
+ .select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model, vision_model_capabilities")
637
822
  .eq("user_id", userId)
638
823
  .maybeSingle();
639
824
  const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
@@ -705,64 +890,68 @@ export class IngestionService {
705
890
  if (isFastPath) {
706
891
  const [userPolicies, processingSettingsRow, baselineConfig] = await Promise.all([
707
892
  PolicyLoader.load(false, supabase),
708
- supabase.from("user_settings").select("llm_provider, llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
893
+ supabase.from("user_settings").select("llm_provider, llm_model, ingestion_llm_provider, ingestion_llm_model, embedding_provider, embedding_model").eq("user_id", userId).maybeSingle(),
709
894
  BaselineConfigService.getActive(supabase, userId),
710
895
  ]);
711
- const llmSettings = {
712
- llm_provider: processingSettingsRow.data?.llm_provider ?? undefined,
713
- llm_model: processingSettingsRow.data?.llm_model ?? undefined,
714
- };
896
+ const llmSettings = this.resolveIngestionLlmSettings(processingSettingsRow.data);
715
897
  const embedSettings = {
716
898
  embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
717
899
  embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
718
900
  };
719
- const doc = { filePath, text: extractionContent, ingestionId, userId, supabase };
720
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
721
- const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
901
+ const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
902
+ const resolvedModel = llmSettings.llm_model ?? llmModel;
903
+
904
+ const runFastPathAttempt = async (
905
+ attemptContent: string,
906
+ attemptType: "primary" | "reencoded_image_retry"
907
+ ): Promise<boolean> => {
908
+ const doc = { filePath, text: attemptContent, ingestionId, userId, supabase };
909
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
910
+ const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
722
911
 
723
- // Fire and forget Semantic Embedding Storage for re-runs
724
- RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
725
- logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
726
- });
912
+ // Fire and forget Semantic Embedding Storage for re-runs
913
+ RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
914
+ logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
915
+ });
727
916
 
728
- baselineTrace.push({
729
- timestamp: new Date().toISOString(),
730
- step: "LLM request (baseline extraction)",
917
+ baselineTrace.push({
918
+ timestamp: new Date().toISOString(),
919
+ step: "LLM request (baseline extraction)",
731
920
  details: {
732
- provider: llmSettings.llm_provider ?? llmProvider,
733
- model: llmSettings.llm_model ?? llmModel,
734
- mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
921
+ provider: resolvedProvider,
922
+ model: resolvedModel,
923
+ mode: isMultimodalFastPath
924
+ ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
925
+ : "text",
735
926
  }
736
927
  });
737
928
 
738
- const baselineResult = await PolicyEngine.extractBaseline(
739
- doc,
740
- { context: baselineConfig?.context, fields: baselineConfig?.fields },
741
- llmSettings
742
- );
743
- const baselineEntities = baselineResult.entities;
744
- const autoTags = baselineResult.tags;
745
- baselineTrace.push({
746
- timestamp: new Date().toISOString(),
747
- step: "LLM response (baseline extraction)",
748
- details: {
749
- entities_count: Object.keys(baselineEntities).length,
750
- uncertain_count: baselineResult.uncertain_fields.length,
751
- tags_count: autoTags.length,
752
- }
753
- });
929
+ const baselineResult = await PolicyEngine.extractBaseline(
930
+ doc,
931
+ { context: baselineConfig?.context, fields: baselineConfig?.fields },
932
+ llmSettings
933
+ );
934
+ const baselineEntities = baselineResult.entities;
935
+ const autoTags = baselineResult.tags;
936
+ baselineTrace.push({
937
+ timestamp: new Date().toISOString(),
938
+ step: "LLM response (baseline extraction)",
939
+ details: {
940
+ entities_count: Object.keys(baselineEntities).length,
941
+ uncertain_count: baselineResult.uncertain_fields.length,
942
+ tags_count: autoTags.length,
943
+ }
944
+ });
754
945
 
755
- const entityLines = Object.entries(baselineEntities)
756
- .filter(([, v]) => v != null)
757
- .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
758
- const enrichedDoc = entityLines.length > 0
759
- ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
760
- : doc;
946
+ const entityLines = Object.entries(baselineEntities)
947
+ .filter(([, v]) => v != null)
948
+ .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
949
+ const enrichedDoc = entityLines.length > 0
950
+ ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
951
+ : doc;
761
952
 
762
- let finalStatus = "no_match";
763
- let result: import("./PolicyEngine.js").ProcessingResult;
764
- let policyName;
765
- try {
953
+ let finalStatus = "no_match";
954
+ let result: import("./PolicyEngine.js").ProcessingResult;
766
955
  const forcedPolicyId = opts.forcedPolicyId?.trim();
767
956
  const activePolicies = forcedPolicyId
768
957
  ? userPolicies.filter((policy) => policy.metadata.id === forcedPolicyId)
@@ -787,7 +976,7 @@ export class IngestionService {
787
976
  result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
788
977
  }
789
978
 
790
- policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
979
+ const policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
791
980
  finalStatus = result.status === "fallback" ? "no_match" : result.status;
792
981
  const mergedExtracted = { ...baselineEntities, ...result.extractedData };
793
982
 
@@ -846,35 +1035,95 @@ export class IngestionService {
846
1035
  await ModelCapabilityService.learnVisionSuccess({
847
1036
  supabase,
848
1037
  userId,
849
- provider: llmSettings.llm_provider ?? llmProvider,
850
- model: llmSettings.llm_model ?? llmModel,
1038
+ provider: resolvedProvider,
1039
+ model: resolvedModel,
851
1040
  modality: multimodalModality,
852
1041
  });
853
1042
  }
854
1043
 
855
1044
  return finalStatus === "matched";
856
- } catch (err: unknown) {
857
- const msg = err instanceof Error ? err.message : String(err);
858
- if (isMultimodalFastPath && multimodalModality) {
859
- const learnedState = await ModelCapabilityService.learnVisionFailure({
860
- supabase,
861
- userId,
862
- provider: llmProvider,
863
- model: llmModel,
864
- error: err,
865
- modality: multimodalModality,
1045
+ };
1046
+
1047
+ let terminalError: unknown = null;
1048
+ try {
1049
+ return await runFastPathAttempt(extractionContent, "primary");
1050
+ } catch (primaryErr) {
1051
+ terminalError = primaryErr;
1052
+ }
1053
+
1054
+ if (isMultimodalFastPath && multimodalModality === "image") {
1055
+ const retryMarker = await this.maybeBuildImageRetryMarker({
1056
+ error: terminalError,
1057
+ filePath,
1058
+ filename,
1059
+ provider: resolvedProvider,
1060
+ model: resolvedModel,
1061
+ phase: "rerun",
1062
+ });
1063
+ if (retryMarker) {
1064
+ this.bumpImageReencodeRetryMetric("attempted", {
1065
+ phase: "rerun",
1066
+ provider: resolvedProvider,
1067
+ model: resolvedModel,
1068
+ filename,
866
1069
  });
867
- logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
868
- Actuator.logEvent(ingestionId, userId, "error", "Processing", {
869
- action: "VLM Failed, Fallback to Heavy",
870
- error: msg,
871
- learned_state: learnedState,
1070
+ Actuator.logEvent(ingestionId, userId, "info", "Processing", {
1071
+ action: "Retrying VLM with re-encoded image payload",
1072
+ provider: resolvedProvider,
1073
+ model: resolvedModel,
872
1074
  }, supabase);
873
- isFastPath = false; // Trigger heavy path fallthrough
874
- } else {
875
- throw err; // Re-throw to caller
1075
+ try {
1076
+ const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
1077
+ this.bumpImageReencodeRetryMetric("succeeded", {
1078
+ phase: "rerun",
1079
+ provider: resolvedProvider,
1080
+ model: resolvedModel,
1081
+ filename,
1082
+ });
1083
+ Actuator.logEvent(ingestionId, userId, "analysis", "Processing", {
1084
+ action: "VLM re-encoded image retry succeeded",
1085
+ provider: resolvedProvider,
1086
+ model: resolvedModel,
1087
+ }, supabase);
1088
+ return retryResult;
1089
+ } catch (retryErr) {
1090
+ this.bumpImageReencodeRetryMetric("failed", {
1091
+ phase: "rerun",
1092
+ provider: resolvedProvider,
1093
+ model: resolvedModel,
1094
+ filename,
1095
+ });
1096
+ Actuator.logEvent(ingestionId, userId, "error", "Processing", {
1097
+ action: "VLM re-encoded image retry failed",
1098
+ provider: resolvedProvider,
1099
+ model: resolvedModel,
1100
+ error: this.errorToMessage(retryErr),
1101
+ }, supabase);
1102
+ terminalError = retryErr;
1103
+ }
876
1104
  }
877
1105
  }
1106
+
1107
+ const msg = this.errorToMessage(terminalError);
1108
+ if (isMultimodalFastPath && multimodalModality) {
1109
+ const learnedState = await ModelCapabilityService.learnVisionFailure({
1110
+ supabase,
1111
+ userId,
1112
+ provider: resolvedProvider,
1113
+ model: resolvedModel,
1114
+ error: terminalError,
1115
+ modality: multimodalModality,
1116
+ });
1117
+ logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
1118
+ Actuator.logEvent(ingestionId, userId, "error", "Processing", {
1119
+ action: "VLM Failed, Fallback to Heavy",
1120
+ error: msg,
1121
+ learned_state: learnedState,
1122
+ }, supabase);
1123
+ isFastPath = false; // Trigger heavy path fallthrough
1124
+ } else {
1125
+ throw terminalError instanceof Error ? terminalError : new Error(msg); // Re-throw to caller
1126
+ }
878
1127
  }
879
1128
 
880
1129
  // Re-delegate to rtx_activities