@realtimex/folio 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
1
  import type { SupabaseClient } from "@supabase/supabase-js";
2
2
  import fs from "fs/promises";
3
+ import { execFile } from "child_process";
4
+ import os from "os";
5
+ import path from "path";
3
6
  import { PDFParse } from "pdf-parse";
7
+ import { promisify } from "util";
4
8
  import { createLogger } from "../utils/logger.js";
5
9
  import { PolicyLoader } from "./PolicyLoader.js";
6
10
  import type { FolioPolicy } from "./PolicyLoader.js";
@@ -14,6 +18,7 @@ import { SDKService } from "./SDKService.js";
14
18
  import { ModelCapabilityService, type VisionCapabilityModality } from "./ModelCapabilityService.js";
15
19
 
16
20
  const logger = createLogger("IngestionService");
21
+ const execFileAsync = promisify(execFile);
17
22
 
18
23
  /**
19
24
  * Multi-signal classifier that decides whether pdf-parse extracted enough
@@ -91,6 +96,15 @@ export interface Ingestion {
91
96
  export class IngestionService {
92
97
  private static readonly FAST_EXTS = ["txt", "md", "csv", "json"] as const;
93
98
  private static readonly IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"] as const;
99
+ private static readonly IMAGE_REENCODE_TIMEOUT_MS = 15000;
100
+ private static readonly IMAGE_REENCODE_RETRY_ENABLED = (process.env.FOLIO_VLM_IMAGE_REENCODE_RETRY_ENABLED ?? "true").toLowerCase() !== "false";
101
+ private static readonly IMAGE_REENCODE_RETRY_METRICS = {
102
+ attempted: 0,
103
+ succeeded: 0,
104
+ failed: 0,
105
+ skipped_disabled: 0,
106
+ skipped_unavailable: 0,
107
+ };
94
108
 
95
109
  private static readonly NON_IDEMPOTENT_ACTION_TYPES = new Set([
96
110
  "append_to_google_sheet",
@@ -248,6 +262,90 @@ export class IngestionService {
248
262
  return `data:${mimeType};base64,${base64}`;
249
263
  }
250
264
 
265
+ private static errorToMessage(error: unknown): string {
266
+ if (error instanceof Error) return error.message;
267
+ if (typeof error === "string") return error;
268
+ if (error && typeof error === "object") {
269
+ const candidate = error as Record<string, unknown>;
270
+ if (typeof candidate.message === "string") return candidate.message;
271
+ }
272
+ return String(error ?? "");
273
+ }
274
+
275
+ private static isInvalidModelError(error: unknown): boolean {
276
+ const message = this.errorToMessage(error).toLowerCase();
277
+ return message.includes("invalid model");
278
+ }
279
+
280
+ private static async reencodeImageToPngDataUrl(filePath: string): Promise<string | null> {
281
+ const tempOutputPath = path.join(
282
+ os.tmpdir(),
283
+ `folio-vlm-reencode-${Date.now()}-${Math.random().toString(16).slice(2)}.png`
284
+ );
285
+ try {
286
+ await execFileAsync("sips", ["-s", "format", "png", filePath, "--out", tempOutputPath], {
287
+ timeout: this.IMAGE_REENCODE_TIMEOUT_MS,
288
+ maxBuffer: 1024 * 1024,
289
+ });
290
+ const pngBuffer = await fs.readFile(tempOutputPath);
291
+ return `data:image/png;base64,${pngBuffer.toString("base64")}`;
292
+ } catch {
293
+ return null;
294
+ } finally {
295
+ await fs.unlink(tempOutputPath).catch(() => undefined);
296
+ }
297
+ }
298
+
299
+ private static async maybeBuildImageRetryMarker(opts: {
300
+ error: unknown;
301
+ filePath: string;
302
+ filename: string;
303
+ provider: string;
304
+ model: string;
305
+ phase: "ingest" | "rerun";
306
+ }): Promise<string | null> {
307
+ if (!this.isInvalidModelError(opts.error)) return null;
308
+ if (!this.IMAGE_REENCODE_RETRY_ENABLED) {
309
+ this.bumpImageReencodeRetryMetric("skipped_disabled", opts);
310
+ logger.info(
311
+ `VLM ${opts.phase} retry skipped for ${opts.filename}: re-encode retry disabled (${opts.provider}/${opts.model}).`
312
+ );
313
+ return null;
314
+ }
315
+ const retryDataUrl = await this.reencodeImageToPngDataUrl(opts.filePath);
316
+ if (!retryDataUrl) {
317
+ this.bumpImageReencodeRetryMetric("skipped_unavailable", opts);
318
+ logger.warn(
319
+ `VLM ${opts.phase} retry skipped for ${opts.filename}: image re-encode unavailable (${opts.provider}/${opts.model}).`
320
+ );
321
+ return null;
322
+ }
323
+ logger.warn(
324
+ `VLM ${opts.phase} failed for ${opts.filename} with invalid model. Retrying once with re-encoded image payload (${opts.provider}/${opts.model}).`
325
+ );
326
+ return this.buildVlmPayloadMarker("image", retryDataUrl);
327
+ }
328
+
329
+ private static bumpImageReencodeRetryMetric(
330
+ outcome: keyof typeof IngestionService.IMAGE_REENCODE_RETRY_METRICS,
331
+ meta: {
332
+ phase: "ingest" | "rerun";
333
+ provider: string;
334
+ model: string;
335
+ filename: string;
336
+ }
337
+ ): void {
338
+ this.IMAGE_REENCODE_RETRY_METRICS[outcome] += 1;
339
+ logger.info("VLM image re-encode retry metric", {
340
+ outcome,
341
+ phase: meta.phase,
342
+ provider: meta.provider,
343
+ model: meta.model,
344
+ filename: meta.filename,
345
+ counters: { ...this.IMAGE_REENCODE_RETRY_METRICS },
346
+ });
347
+ }
348
+
251
349
  /**
252
350
  * Ingest a document using Hybrid Routing Architecture.
253
351
  */
@@ -415,134 +513,202 @@ export class IngestionService {
415
513
  embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
416
514
  embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
417
515
  };
418
- const doc = { filePath: filePath, text: extractionContent, ingestionId: ingestion.id, userId, supabase };
419
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
420
- const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
516
+ const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
517
+ const resolvedModel = llmSettings.llm_model ?? llmModel;
518
+
519
+ const runFastPathAttempt = async (
520
+ attemptContent: string,
521
+ attemptType: "primary" | "reencoded_image_retry"
522
+ ): Promise<Ingestion> => {
523
+ const doc = { filePath: filePath, text: attemptContent, ingestionId: ingestion.id, userId, supabase };
524
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
525
+ const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
526
+
527
+ // Fire and forget Semantic Embedding Storage
528
+ RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
529
+ logger.error(`RAG embedding failed for ${ingestion.id}`, err);
530
+ });
421
531
 
422
- // Fire and forget Semantic Embedding Storage
423
- RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
424
- logger.error(`RAG embedding failed for ${ingestion.id}`, err);
425
- });
532
+ // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
533
+ baselineTrace.push({
534
+ timestamp: new Date().toISOString(),
535
+ step: "LLM request (baseline extraction)",
536
+ details: {
537
+ provider: resolvedProvider,
538
+ model: resolvedModel,
539
+ mode: isMultimodalFastPath
540
+ ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
541
+ : "text",
542
+ }
543
+ });
426
544
 
427
- // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
428
- baselineTrace.push({
429
- timestamp: new Date().toISOString(),
430
- step: "LLM request (baseline extraction)",
431
- details: {
432
- provider: llmSettings.llm_provider ?? llmProvider,
433
- model: llmSettings.llm_model ?? llmModel,
434
- mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
435
- }
436
- });
545
+ const baselineResult = await PolicyEngine.extractBaseline(
546
+ doc,
547
+ { context: baselineConfig?.context, fields: baselineConfig?.fields },
548
+ llmSettings
549
+ );
550
+ const baselineEntities = baselineResult.entities;
551
+ const autoTags = baselineResult.tags;
552
+ baselineTrace.push({
553
+ timestamp: new Date().toISOString(),
554
+ step: "LLM response (baseline extraction)",
555
+ details: {
556
+ entities_count: Object.keys(baselineEntities).length,
557
+ uncertain_count: baselineResult.uncertain_fields.length,
558
+ tags_count: autoTags.length,
559
+ }
560
+ });
437
561
 
438
- const baselineResult = await PolicyEngine.extractBaseline(
439
- doc,
440
- { context: baselineConfig?.context, fields: baselineConfig?.fields },
441
- llmSettings
442
- );
443
- const baselineEntities = baselineResult.entities;
444
- const autoTags = baselineResult.tags;
445
- baselineTrace.push({
446
- timestamp: new Date().toISOString(),
447
- step: "LLM response (baseline extraction)",
448
- details: {
449
- entities_count: Object.keys(baselineEntities).length,
450
- uncertain_count: baselineResult.uncertain_fields.length,
451
- tags_count: autoTags.length,
562
+ // Enrich the document with extracted entities so policy keyword/semantic
563
+ // conditions can match against semantic field values (e.g. document_type:
564
+ // "invoice") even when those exact words don't appear in the raw text.
565
+ const entityLines = Object.entries(baselineEntities)
566
+ .filter(([, v]) => v != null)
567
+ .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
568
+ const enrichedDoc = entityLines.length > 0
569
+ ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
570
+ : doc;
571
+
572
+ // 5. Stage 2: Policy matching + policy-specific field extraction
573
+ let result;
574
+ if (userPolicies.length > 0) {
575
+ result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
576
+ } else {
577
+ result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
452
578
  }
453
- });
454
579
 
455
- // Enrich the document with extracted entities so policy keyword/semantic
456
- // conditions can match against semantic field values (e.g. document_type:
457
- // "invoice") even when those exact words don't appear in the raw text.
458
- const entityLines = Object.entries(baselineEntities)
459
- .filter(([, v]) => v != null)
460
- .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
461
- const enrichedDoc = entityLines.length > 0
462
- ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
463
- : doc;
580
+ const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
581
+ const finalStatus = result.status === "fallback" ? "no_match" : result.status;
464
582
 
465
- // 5. Stage 2: Policy matching + policy-specific field extraction
466
- let result;
467
- if (userPolicies.length > 0) {
468
- result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
469
- } else {
470
- result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
471
- }
583
+ // Merge: baseline entities are the foundation; policy-specific fields
584
+ // are overlaid on top so more precise extractions take precedence.
585
+ const mergedExtracted = { ...baselineEntities, ...result.extractedData };
586
+ let finalTrace = [...baselineTrace, ...(result.trace || [])];
472
587
 
473
- const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
474
- const finalStatus = result.status === "fallback" ? "no_match" : result.status;
588
+ const { data: updatedIngestion } = await supabase
589
+ .from("ingestions")
590
+ .update({
591
+ status: finalStatus,
592
+ policy_id: result.matchedPolicy,
593
+ policy_name: policyName,
594
+ extracted: mergedExtracted,
595
+ actions_taken: result.actionsExecuted,
596
+ trace: finalTrace,
597
+ tags: autoTags,
598
+ baseline_config_id: baselineConfig?.id ?? null,
599
+ })
600
+ .eq("id", ingestion.id)
601
+ .select()
602
+ .single();
475
603
 
476
- // Merge: baseline entities are the foundation; policy-specific fields
477
- // are overlaid on top so more precise extractions take precedence.
478
- const mergedExtracted = { ...baselineEntities, ...result.extractedData };
479
- let finalTrace = [...baselineTrace, ...(result.trace || [])];
604
+ if (isMultimodalFastPath && multimodalModality) {
605
+ const embeddingMeta = this.queueVlmSemanticEmbedding({
606
+ ingestionId: ingestion.id,
607
+ userId,
608
+ filename,
609
+ finalStatus,
610
+ policyName,
611
+ extracted: mergedExtracted,
612
+ tags: autoTags,
613
+ modality: multimodalModality,
614
+ supabase,
615
+ embedSettings,
616
+ });
617
+ finalTrace = [
618
+ ...finalTrace,
619
+ {
620
+ timestamp: new Date().toISOString(),
621
+ step: "Queued synthetic VLM embedding",
622
+ details: embeddingMeta,
623
+ }
624
+ ];
625
+ await supabase
626
+ .from("ingestions")
627
+ .update({ trace: finalTrace })
628
+ .eq("id", ingestion.id);
629
+ }
480
630
 
481
- const { data: updatedIngestion } = await supabase
482
- .from("ingestions")
483
- .update({
484
- status: finalStatus,
485
- policy_id: result.matchedPolicy,
486
- policy_name: policyName,
487
- extracted: mergedExtracted,
488
- actions_taken: result.actionsExecuted,
489
- trace: finalTrace,
490
- tags: autoTags,
491
- baseline_config_id: baselineConfig?.id ?? null,
492
- })
493
- .eq("id", ingestion.id)
494
- .select()
495
- .single();
631
+ if (isMultimodalFastPath && multimodalModality) {
632
+ await ModelCapabilityService.learnVisionSuccess({
633
+ supabase,
634
+ userId,
635
+ provider: resolvedProvider,
636
+ model: resolvedModel,
637
+ modality: multimodalModality,
638
+ });
639
+ }
496
640
 
497
- if (isMultimodalFastPath && multimodalModality) {
498
- const embeddingMeta = this.queueVlmSemanticEmbedding({
499
- ingestionId: ingestion.id,
500
- userId,
501
- filename,
502
- finalStatus,
503
- policyName,
504
- extracted: mergedExtracted,
505
- tags: autoTags,
506
- modality: multimodalModality,
507
- supabase,
508
- embedSettings,
509
- });
510
- finalTrace = [
511
- ...finalTrace,
512
- {
513
- timestamp: new Date().toISOString(),
514
- step: "Queued synthetic VLM embedding",
515
- details: embeddingMeta,
516
- }
517
- ];
518
- await supabase
519
- .from("ingestions")
520
- .update({ trace: finalTrace })
521
- .eq("id", ingestion.id);
641
+ return updatedIngestion as Ingestion;
642
+ };
643
+
644
+ let terminalError: unknown = null;
645
+ try {
646
+ return await runFastPathAttempt(extractionContent, "primary");
647
+ } catch (primaryErr) {
648
+ terminalError = primaryErr;
522
649
  }
523
650
 
524
- if (isMultimodalFastPath && multimodalModality) {
525
- await ModelCapabilityService.learnVisionSuccess({
526
- supabase,
527
- userId,
528
- provider: llmSettings.llm_provider ?? llmProvider,
529
- model: llmSettings.llm_model ?? llmModel,
530
- modality: multimodalModality,
651
+ if (isMultimodalFastPath && multimodalModality === "image") {
652
+ const retryMarker = await this.maybeBuildImageRetryMarker({
653
+ error: terminalError,
654
+ filePath,
655
+ filename,
656
+ provider: resolvedProvider,
657
+ model: resolvedModel,
658
+ phase: "ingest",
531
659
  });
660
+ if (retryMarker) {
661
+ this.bumpImageReencodeRetryMetric("attempted", {
662
+ phase: "ingest",
663
+ provider: resolvedProvider,
664
+ model: resolvedModel,
665
+ filename,
666
+ });
667
+ Actuator.logEvent(ingestion.id, userId, "info", "Processing", {
668
+ action: "Retrying VLM with re-encoded image payload",
669
+ provider: resolvedProvider,
670
+ model: resolvedModel,
671
+ }, supabase);
672
+ try {
673
+ const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
674
+ this.bumpImageReencodeRetryMetric("succeeded", {
675
+ phase: "ingest",
676
+ provider: resolvedProvider,
677
+ model: resolvedModel,
678
+ filename,
679
+ });
680
+ Actuator.logEvent(ingestion.id, userId, "analysis", "Processing", {
681
+ action: "VLM re-encoded image retry succeeded",
682
+ provider: resolvedProvider,
683
+ model: resolvedModel,
684
+ }, supabase);
685
+ return retryResult;
686
+ } catch (retryErr) {
687
+ this.bumpImageReencodeRetryMetric("failed", {
688
+ phase: "ingest",
689
+ provider: resolvedProvider,
690
+ model: resolvedModel,
691
+ filename,
692
+ });
693
+ Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
694
+ action: "VLM re-encoded image retry failed",
695
+ provider: resolvedProvider,
696
+ model: resolvedModel,
697
+ error: this.errorToMessage(retryErr),
698
+ }, supabase);
699
+ terminalError = retryErr;
700
+ }
701
+ }
532
702
  }
533
703
 
534
- return updatedIngestion as Ingestion;
535
-
536
- } catch (err) {
537
- const msg = err instanceof Error ? err.message : String(err);
538
-
704
+ const msg = this.errorToMessage(terminalError);
539
705
  if (isMultimodalFastPath && multimodalModality) {
540
706
  const learnedState = await ModelCapabilityService.learnVisionFailure({
541
707
  supabase,
542
708
  userId,
543
- provider: llmProvider,
544
- model: llmModel,
545
- error: err,
709
+ provider: resolvedProvider,
710
+ model: resolvedModel,
711
+ error: terminalError,
546
712
  modality: multimodalModality,
547
713
  });
548
714
  logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
@@ -563,6 +729,16 @@ export class IngestionService {
563
729
  .single();
564
730
  return updatedIngestion as Ingestion;
565
731
  }
732
+ } catch (err) {
733
+ const msg = this.errorToMessage(err);
734
+ Actuator.logEvent(ingestion.id, userId, "error", "Processing", { error: msg }, supabase);
735
+ const { data: updatedIngestion } = await supabase
736
+ .from("ingestions")
737
+ .update({ status: "error", error_message: msg })
738
+ .eq("id", ingestion.id)
739
+ .select()
740
+ .single();
741
+ return updatedIngestion as Ingestion;
566
742
  }
567
743
  }
568
744
 
@@ -716,53 +892,60 @@ export class IngestionService {
716
892
  embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
717
893
  embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
718
894
  };
719
- const doc = { filePath, text: extractionContent, ingestionId, userId, supabase };
720
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
721
- const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
895
+ const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
896
+ const resolvedModel = llmSettings.llm_model ?? llmModel;
897
+
898
+ const runFastPathAttempt = async (
899
+ attemptContent: string,
900
+ attemptType: "primary" | "reencoded_image_retry"
901
+ ): Promise<boolean> => {
902
+ const doc = { filePath, text: attemptContent, ingestionId, userId, supabase };
903
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
904
+ const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
722
905
 
723
- // Fire and forget Semantic Embedding Storage for re-runs
724
- RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
725
- logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
726
- });
906
+ // Fire and forget Semantic Embedding Storage for re-runs
907
+ RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
908
+ logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
909
+ });
727
910
 
728
- baselineTrace.push({
729
- timestamp: new Date().toISOString(),
730
- step: "LLM request (baseline extraction)",
911
+ baselineTrace.push({
912
+ timestamp: new Date().toISOString(),
913
+ step: "LLM request (baseline extraction)",
731
914
  details: {
732
- provider: llmSettings.llm_provider ?? llmProvider,
733
- model: llmSettings.llm_model ?? llmModel,
734
- mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
915
+ provider: resolvedProvider,
916
+ model: resolvedModel,
917
+ mode: isMultimodalFastPath
918
+ ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
919
+ : "text",
735
920
  }
736
921
  });
737
922
 
738
- const baselineResult = await PolicyEngine.extractBaseline(
739
- doc,
740
- { context: baselineConfig?.context, fields: baselineConfig?.fields },
741
- llmSettings
742
- );
743
- const baselineEntities = baselineResult.entities;
744
- const autoTags = baselineResult.tags;
745
- baselineTrace.push({
746
- timestamp: new Date().toISOString(),
747
- step: "LLM response (baseline extraction)",
748
- details: {
749
- entities_count: Object.keys(baselineEntities).length,
750
- uncertain_count: baselineResult.uncertain_fields.length,
751
- tags_count: autoTags.length,
752
- }
753
- });
923
+ const baselineResult = await PolicyEngine.extractBaseline(
924
+ doc,
925
+ { context: baselineConfig?.context, fields: baselineConfig?.fields },
926
+ llmSettings
927
+ );
928
+ const baselineEntities = baselineResult.entities;
929
+ const autoTags = baselineResult.tags;
930
+ baselineTrace.push({
931
+ timestamp: new Date().toISOString(),
932
+ step: "LLM response (baseline extraction)",
933
+ details: {
934
+ entities_count: Object.keys(baselineEntities).length,
935
+ uncertain_count: baselineResult.uncertain_fields.length,
936
+ tags_count: autoTags.length,
937
+ }
938
+ });
754
939
 
755
- const entityLines = Object.entries(baselineEntities)
756
- .filter(([, v]) => v != null)
757
- .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
758
- const enrichedDoc = entityLines.length > 0
759
- ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
760
- : doc;
940
+ const entityLines = Object.entries(baselineEntities)
941
+ .filter(([, v]) => v != null)
942
+ .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
943
+ const enrichedDoc = entityLines.length > 0
944
+ ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
945
+ : doc;
761
946
 
762
- let finalStatus = "no_match";
763
- let result: import("./PolicyEngine.js").ProcessingResult;
764
- let policyName;
765
- try {
947
+ let finalStatus = "no_match";
948
+ let result: import("./PolicyEngine.js").ProcessingResult;
766
949
  const forcedPolicyId = opts.forcedPolicyId?.trim();
767
950
  const activePolicies = forcedPolicyId
768
951
  ? userPolicies.filter((policy) => policy.metadata.id === forcedPolicyId)
@@ -787,7 +970,7 @@ export class IngestionService {
787
970
  result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
788
971
  }
789
972
 
790
- policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
973
+ const policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
791
974
  finalStatus = result.status === "fallback" ? "no_match" : result.status;
792
975
  const mergedExtracted = { ...baselineEntities, ...result.extractedData };
793
976
 
@@ -846,35 +1029,95 @@ export class IngestionService {
846
1029
  await ModelCapabilityService.learnVisionSuccess({
847
1030
  supabase,
848
1031
  userId,
849
- provider: llmSettings.llm_provider ?? llmProvider,
850
- model: llmSettings.llm_model ?? llmModel,
1032
+ provider: resolvedProvider,
1033
+ model: resolvedModel,
851
1034
  modality: multimodalModality,
852
1035
  });
853
1036
  }
854
1037
 
855
1038
  return finalStatus === "matched";
856
- } catch (err: unknown) {
857
- const msg = err instanceof Error ? err.message : String(err);
858
- if (isMultimodalFastPath && multimodalModality) {
859
- const learnedState = await ModelCapabilityService.learnVisionFailure({
860
- supabase,
861
- userId,
862
- provider: llmProvider,
863
- model: llmModel,
864
- error: err,
865
- modality: multimodalModality,
1039
+ };
1040
+
1041
+ let terminalError: unknown = null;
1042
+ try {
1043
+ return await runFastPathAttempt(extractionContent, "primary");
1044
+ } catch (primaryErr) {
1045
+ terminalError = primaryErr;
1046
+ }
1047
+
1048
+ if (isMultimodalFastPath && multimodalModality === "image") {
1049
+ const retryMarker = await this.maybeBuildImageRetryMarker({
1050
+ error: terminalError,
1051
+ filePath,
1052
+ filename,
1053
+ provider: resolvedProvider,
1054
+ model: resolvedModel,
1055
+ phase: "rerun",
1056
+ });
1057
+ if (retryMarker) {
1058
+ this.bumpImageReencodeRetryMetric("attempted", {
1059
+ phase: "rerun",
1060
+ provider: resolvedProvider,
1061
+ model: resolvedModel,
1062
+ filename,
866
1063
  });
867
- logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
868
- Actuator.logEvent(ingestionId, userId, "error", "Processing", {
869
- action: "VLM Failed, Fallback to Heavy",
870
- error: msg,
871
- learned_state: learnedState,
1064
+ Actuator.logEvent(ingestionId, userId, "info", "Processing", {
1065
+ action: "Retrying VLM with re-encoded image payload",
1066
+ provider: resolvedProvider,
1067
+ model: resolvedModel,
872
1068
  }, supabase);
873
- isFastPath = false; // Trigger heavy path fallthrough
874
- } else {
875
- throw err; // Re-throw to caller
1069
+ try {
1070
+ const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
1071
+ this.bumpImageReencodeRetryMetric("succeeded", {
1072
+ phase: "rerun",
1073
+ provider: resolvedProvider,
1074
+ model: resolvedModel,
1075
+ filename,
1076
+ });
1077
+ Actuator.logEvent(ingestionId, userId, "analysis", "Processing", {
1078
+ action: "VLM re-encoded image retry succeeded",
1079
+ provider: resolvedProvider,
1080
+ model: resolvedModel,
1081
+ }, supabase);
1082
+ return retryResult;
1083
+ } catch (retryErr) {
1084
+ this.bumpImageReencodeRetryMetric("failed", {
1085
+ phase: "rerun",
1086
+ provider: resolvedProvider,
1087
+ model: resolvedModel,
1088
+ filename,
1089
+ });
1090
+ Actuator.logEvent(ingestionId, userId, "error", "Processing", {
1091
+ action: "VLM re-encoded image retry failed",
1092
+ provider: resolvedProvider,
1093
+ model: resolvedModel,
1094
+ error: this.errorToMessage(retryErr),
1095
+ }, supabase);
1096
+ terminalError = retryErr;
1097
+ }
876
1098
  }
877
1099
  }
1100
+
1101
+ const msg = this.errorToMessage(terminalError);
1102
+ if (isMultimodalFastPath && multimodalModality) {
1103
+ const learnedState = await ModelCapabilityService.learnVisionFailure({
1104
+ supabase,
1105
+ userId,
1106
+ provider: resolvedProvider,
1107
+ model: resolvedModel,
1108
+ error: terminalError,
1109
+ modality: multimodalModality,
1110
+ });
1111
+ logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
1112
+ Actuator.logEvent(ingestionId, userId, "error", "Processing", {
1113
+ action: "VLM Failed, Fallback to Heavy",
1114
+ error: msg,
1115
+ learned_state: learnedState,
1116
+ }, supabase);
1117
+ isFastPath = false; // Trigger heavy path fallthrough
1118
+ } else {
1119
+ throw terminalError instanceof Error ? terminalError : new Error(msg); // Re-throw to caller
1120
+ }
878
1121
  }
879
1122
 
880
1123
  // Re-delegate to rtx_activities