npm - @realtimex/folio - Versions diffs - 0.1.11 → 0.1.13 - Mend

@realtimex/folio 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.env.example +1 -0
package/api/src/services/IngestionService.ts +513 -206
package/api/src/services/ModelCapabilityService.ts +213 -56
package/api/src/services/PolicyEngine.ts +48 -22
package/api/src/services/RAGService.ts +2 -2
package/dist/api/src/services/IngestionService.js +467 -194
package/dist/api/src/services/ModelCapabilityService.js +165 -54
package/dist/api/src/services/PolicyEngine.js +38 -22
package/dist/api/src/services/RAGService.js +2 -2
package/dist/assets/{index-nxHX9No5.js → index-CLpalZvv.js} +37 -37
package/dist/index.html +1 -1
package/package.json +1 -1

package/api/src/services/IngestionService.ts CHANGED Viewed

@@ -1,6 +1,10 @@
 import type { SupabaseClient } from "@supabase/supabase-js";
 import fs from "fs/promises";
+import { execFile } from "child_process";
+import os from "os";
+import path from "path";
 import { PDFParse } from "pdf-parse";
+import { promisify } from "util";
 import { createLogger } from "../utils/logger.js";
 import { PolicyLoader } from "./PolicyLoader.js";
 import type { FolioPolicy } from "./PolicyLoader.js";
@@ -11,9 +15,10 @@ import { Actuator } from "../utils/Actuator.js";
 import { extractLlmResponse, previewLlmText } from "../utils/llmResponse.js";
 import { RAGService } from "./RAGService.js";
 import { SDKService } from "./SDKService.js";
-import { ModelCapabilityService } from "./ModelCapabilityService.js";
+import { ModelCapabilityService, type VisionCapabilityModality } from "./ModelCapabilityService.js";
 const logger = createLogger("IngestionService");
+const execFileAsync = promisify(execFile);
 /**
  * Multi-signal classifier that decides whether pdf-parse extracted enough
@@ -89,6 +94,18 @@ export interface Ingestion {
 }
 export class IngestionService {
+    private static readonly FAST_EXTS = ["txt", "md", "csv", "json"] as const;
+    private static readonly IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"] as const;
+    private static readonly IMAGE_REENCODE_TIMEOUT_MS = 15000;
+    private static readonly IMAGE_REENCODE_RETRY_ENABLED = (process.env.FOLIO_VLM_IMAGE_REENCODE_RETRY_ENABLED ?? "true").toLowerCase() !== "false";
+    private static readonly IMAGE_REENCODE_RETRY_METRICS = {
+        attempted: 0,
+        succeeded: 0,
+        failed: 0,
+        skipped_disabled: 0,
+        skipped_unavailable: 0,
+    };
     private static readonly NON_IDEMPOTENT_ACTION_TYPES = new Set([
         "append_to_google_sheet",
         "webhook",
@@ -129,11 +146,12 @@ export class IngestionService {
         policyName?: string;
         extracted: Record<string, unknown>;
         tags: string[];
+        modality: VisionCapabilityModality;
     }): string {
-        const { filename, finalStatus, policyName, extracted, tags } = opts;
+        const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
         const lines: string[] = [
             `Document filename: ${filename}`,
-            "Document source: VLM image extraction",
+            `Document source: VLM ${modality} extraction`,
             `Processing status: ${finalStatus}`,
         ];
@@ -186,6 +204,7 @@ export class IngestionService {
         policyName?: string;
         extracted: Record<string, unknown>;
         tags: string[];
+        modality: VisionCapabilityModality;
         supabase: SupabaseClient;
         embedSettings: { embedding_provider?: string; embedding_model?: string };
     }): { synthetic_chars: number; extracted_fields: number; tags_count: number } {
@@ -195,6 +214,7 @@ export class IngestionService {
             policyName: opts.policyName,
             extracted: opts.extracted,
             tags: opts.tags,
+            modality: opts.modality,
         });
         const details = {
             synthetic_chars: syntheticText.length,
@@ -231,6 +251,101 @@ export class IngestionService {
         return details;
     }
+    private static buildVlmPayloadMarker(modality: VisionCapabilityModality, dataUrl: string): string {
+        const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
+        return `[${prefix}:${dataUrl}]`;
+    }
+    private static async fileToDataUrl(filePath: string, mimeType: string): Promise<string> {
+        const buffer = await fs.readFile(filePath);
+        const base64 = buffer.toString("base64");
+        return `data:${mimeType};base64,${base64}`;
+    }
+    private static errorToMessage(error: unknown): string {
+        if (error instanceof Error) return error.message;
+        if (typeof error === "string") return error;
+        if (error && typeof error === "object") {
+            const candidate = error as Record<string, unknown>;
+            if (typeof candidate.message === "string") return candidate.message;
+        }
+        return String(error ?? "");
+    }
+    private static isInvalidModelError(error: unknown): boolean {
+        const message = this.errorToMessage(error).toLowerCase();
+        return message.includes("invalid model");
+    }
+    private static async reencodeImageToPngDataUrl(filePath: string): Promise<string | null> {
+        const tempOutputPath = path.join(
+            os.tmpdir(),
+            `folio-vlm-reencode-${Date.now()}-${Math.random().toString(16).slice(2)}.png`
+        );
+        try {
+            await execFileAsync("sips", ["-s", "format", "png", filePath, "--out", tempOutputPath], {
+                timeout: this.IMAGE_REENCODE_TIMEOUT_MS,
+                maxBuffer: 1024 * 1024,
+            });
+            const pngBuffer = await fs.readFile(tempOutputPath);
+            return `data:image/png;base64,${pngBuffer.toString("base64")}`;
+        } catch {
+            return null;
+        } finally {
+            await fs.unlink(tempOutputPath).catch(() => undefined);
+        }
+    }
+    private static async maybeBuildImageRetryMarker(opts: {
+        error: unknown;
+        filePath: string;
+        filename: string;
+        provider: string;
+        model: string;
+        phase: "ingest" | "rerun";
+    }): Promise<string | null> {
+        if (!this.isInvalidModelError(opts.error)) return null;
+        if (!this.IMAGE_REENCODE_RETRY_ENABLED) {
+            this.bumpImageReencodeRetryMetric("skipped_disabled", opts);
+            logger.info(
+                `VLM ${opts.phase} retry skipped for ${opts.filename}: re-encode retry disabled (${opts.provider}/${opts.model}).`
+            );
+            return null;
+        }
+        const retryDataUrl = await this.reencodeImageToPngDataUrl(opts.filePath);
+        if (!retryDataUrl) {
+            this.bumpImageReencodeRetryMetric("skipped_unavailable", opts);
+            logger.warn(
+                `VLM ${opts.phase} retry skipped for ${opts.filename}: image re-encode unavailable (${opts.provider}/${opts.model}).`
+            );
+            return null;
+        }
+        logger.warn(
+            `VLM ${opts.phase} failed for ${opts.filename} with invalid model. Retrying once with re-encoded image payload (${opts.provider}/${opts.model}).`
+        );
+        return this.buildVlmPayloadMarker("image", retryDataUrl);
+    }
+    private static bumpImageReencodeRetryMetric(
+        outcome: keyof typeof IngestionService.IMAGE_REENCODE_RETRY_METRICS,
+        meta: {
+            phase: "ingest" | "rerun";
+            provider: string;
+            model: string;
+            filename: string;
+        }
+    ): void {
+        this.IMAGE_REENCODE_RETRY_METRICS[outcome] += 1;
+        logger.info("VLM image re-encode retry metric", {
+            outcome,
+            phase: meta.phase,
+            provider: meta.provider,
+            model: meta.model,
+            filename: meta.filename,
+            counters: { ...this.IMAGE_REENCODE_RETRY_METRICS },
+        });
+    }
     /**
      * Ingest a document using Hybrid Routing Architecture.
      */
@@ -303,11 +418,10 @@ export class IngestionService {
         // 2. Document Triage
         let isFastPath = false;
-        let isVlmFastPath = false;
+        let isMultimodalFastPath = false;
+        let multimodalModality: VisionCapabilityModality | null = null;
         let extractionContent = content;
         const ext = filename.toLowerCase().split('.').pop() || '';
-        const fastExts = ['txt', 'md', 'csv', 'json'];
-        const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
         // Pre-fetch settings to decide whether we should attempt VLM.
         const { data: triageSettingsRow } = await supabase
@@ -315,27 +429,27 @@ export class IngestionService {
             .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
             .eq("user_id", userId)
             .maybeSingle();
-        const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
-        const llmModel = visionResolution.model;
-        const llmProvider = visionResolution.provider;
+        const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
+        const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
+        const llmModel = imageResolution.model;
+        const llmProvider = imageResolution.provider;
-        if (fastExts.includes(ext)) {
+        if (this.FAST_EXTS.includes(ext as typeof this.FAST_EXTS[number])) {
             isFastPath = true;
-        } else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
+        } else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number]) && imageResolution.shouldAttempt) {
             try {
-                const buffer = await fs.readFile(filePath);
-                const base64 = buffer.toString('base64');
                 const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
-                // Special marker for PolicyEngine
-                extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
+                const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
+                extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
                 isFastPath = true;
-                isVlmFastPath = true;
+                isMultimodalFastPath = true;
+                multimodalModality = "image";
                 logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
                 Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
             } catch (err) {
                 logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
             }
-        } else if (imageExts.includes(ext)) {
+        } else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number])) {
             logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
             Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
                 action: "VLM skipped (model marked unsupported)",
@@ -353,9 +467,29 @@ export class IngestionService {
                     extractionContent = pdfData.text;
                     logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
                     Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
+                } else if (pdfResolution.shouldAttempt) {
+                    // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
+                    const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
+                    extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
+                    isFastPath = true;
+                    isMultimodalFastPath = true;
+                    multimodalModality = "pdf";
+                    logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
+                    Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
+                        action: "VLM Fast Path selected",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                    }, supabase);
                 } else {
-                    logger.info(`Smart Triage: PDF ${filename} failed text quality check. Routing to Heavy Path.`);
-                    Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage failed", type: "pdf", fast_path: false }, supabase);
+                    logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
+                    Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
+                        action: "VLM skipped (model marked unsupported)",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                        provider: llmProvider,
+                    }, supabase);
                 }
             } catch (err) {
                 logger.warn(`Failed to parse PDF ${filename}. Routing to Heavy Path.`, { err });
@@ -379,132 +513,203 @@ export class IngestionService {
                     embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
                     embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
                 };
-                const doc = { filePath: filePath, text: extractionContent, ingestionId: ingestion.id, userId, supabase };
-                // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
+                const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
+                const resolvedModel = llmSettings.llm_model ?? llmModel;
+                const runFastPathAttempt = async (
+                    attemptContent: string,
+                    attemptType: "primary" | "reencoded_image_retry"
+                ): Promise<Ingestion> => {
+                    const doc = { filePath: filePath, text: attemptContent, ingestionId: ingestion.id, userId, supabase };
+                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                    const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
+                    // Fire and forget Semantic Embedding Storage
+                    RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
+                        logger.error(`RAG embedding failed for ${ingestion.id}`, err);
+                    });
-                // Fire and forget Semantic Embedding Storage
-                RAGService.chunkAndEmbed(ingestion.id, userId, doc.text, supabase, embedSettings).catch(err => {
-                    logger.error(`RAG embedding failed for ${ingestion.id}`, err);
-                });
+                    // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
+                    baselineTrace.push({
+                        timestamp: new Date().toISOString(),
+                        step: "LLM request (baseline extraction)",
+                        details: {
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                            mode: isMultimodalFastPath
+                                ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
+                                : "text",
+                        }
+                    });
-                // 4. Stage 1: Baseline extraction (always runs, LLM call 1 of max 2)
-                baselineTrace.push({
-                    timestamp: new Date().toISOString(),
-                    step: "LLM request (baseline extraction)",
-                    details: {
-                        provider: llmSettings.llm_provider ?? llmProvider,
-                        model: llmSettings.llm_model ?? llmModel,
-                        mode: isVlmFastPath ? "vision" : "text",
-                    }
-                });
+                    const baselineResult = await PolicyEngine.extractBaseline(
+                        doc,
+                        { context: baselineConfig?.context, fields: baselineConfig?.fields },
+                        llmSettings
+                    );
+                    const baselineEntities = baselineResult.entities;
+                    const autoTags = baselineResult.tags;
+                    baselineTrace.push({
+                        timestamp: new Date().toISOString(),
+                        step: "LLM response (baseline extraction)",
+                        details: {
+                            entities_count: Object.keys(baselineEntities).length,
+                            uncertain_count: baselineResult.uncertain_fields.length,
+                            tags_count: autoTags.length,
+                        }
+                    });
-                const baselineResult = await PolicyEngine.extractBaseline(
-                    doc,
-                    { context: baselineConfig?.context, fields: baselineConfig?.fields },
-                    llmSettings
-                );
-                const baselineEntities = baselineResult.entities;
-                const autoTags = baselineResult.tags;
-                baselineTrace.push({
-                    timestamp: new Date().toISOString(),
-                    step: "LLM response (baseline extraction)",
-                    details: {
-                        entities_count: Object.keys(baselineEntities).length,
-                        uncertain_count: baselineResult.uncertain_fields.length,
-                        tags_count: autoTags.length,
+                    // Enrich the document with extracted entities so policy keyword/semantic
+                    // conditions can match against semantic field values (e.g. document_type:
+                    // "invoice") even when those exact words don't appear in the raw text.
+                    const entityLines = Object.entries(baselineEntities)
+                        .filter(([, v]) => v != null)
+                        .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
+                    const enrichedDoc = entityLines.length > 0
+                        ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
+                        : doc;
+                    // 5. Stage 2: Policy matching + policy-specific field extraction
+                    let result;
+                    if (userPolicies.length > 0) {
+                        result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
+                    } else {
+                        result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
                     }
-                });
-                // Enrich the document with extracted entities so policy keyword/semantic
-                // conditions can match against semantic field values (e.g. document_type:
-                // "invoice") even when those exact words don't appear in the raw text.
-                const entityLines = Object.entries(baselineEntities)
-                    .filter(([, v]) => v != null)
-                    .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
-                const enrichedDoc = entityLines.length > 0
-                    ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
-                    : doc;
+                    const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
+                    const finalStatus = result.status === "fallback" ? "no_match" : result.status;
-                // 5. Stage 2: Policy matching + policy-specific field extraction
-                let result;
-                if (userPolicies.length > 0) {
-                    result = await PolicyEngine.processWithPolicies(enrichedDoc, userPolicies, llmSettings, baselineEntities);
-                } else {
-                    result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
-                }
+                    // Merge: baseline entities are the foundation; policy-specific fields
+                    // are overlaid on top so more precise extractions take precedence.
+                    const mergedExtracted = { ...baselineEntities, ...result.extractedData };
+                    let finalTrace = [...baselineTrace, ...(result.trace || [])];
-                const policyName = userPolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name;
-                const finalStatus = result.status === "fallback" ? "no_match" : result.status;
+                    const { data: updatedIngestion } = await supabase
+                        .from("ingestions")
+                        .update({
+                            status: finalStatus,
+                            policy_id: result.matchedPolicy,
+                            policy_name: policyName,
+                            extracted: mergedExtracted,
+                            actions_taken: result.actionsExecuted,
+                            trace: finalTrace,
+                            tags: autoTags,
+                            baseline_config_id: baselineConfig?.id ?? null,
+                        })
+                        .eq("id", ingestion.id)
+                        .select()
+                        .single();
-                // Merge: baseline entities are the foundation; policy-specific fields
-                // are overlaid on top so more precise extractions take precedence.
-                const mergedExtracted = { ...baselineEntities, ...result.extractedData };
-                let finalTrace = [...baselineTrace, ...(result.trace || [])];
+                    if (isMultimodalFastPath && multimodalModality) {
+                        const embeddingMeta = this.queueVlmSemanticEmbedding({
+                            ingestionId: ingestion.id,
+                            userId,
+                            filename,
+                            finalStatus,
+                            policyName,
+                            extracted: mergedExtracted,
+                            tags: autoTags,
+                            modality: multimodalModality,
+                            supabase,
+                            embedSettings,
+                        });
+                        finalTrace = [
+                            ...finalTrace,
+                            {
+                                timestamp: new Date().toISOString(),
+                                step: "Queued synthetic VLM embedding",
+                                details: embeddingMeta,
+                            }
+                        ];
+                        await supabase
+                            .from("ingestions")
+                            .update({ trace: finalTrace })
+                            .eq("id", ingestion.id);
+                    }
-                const { data: updatedIngestion } = await supabase
-                    .from("ingestions")
-                    .update({
-                        status: finalStatus,
-                        policy_id: result.matchedPolicy,
-                        policy_name: policyName,
-                        extracted: mergedExtracted,
-                        actions_taken: result.actionsExecuted,
-                        trace: finalTrace,
-                        tags: autoTags,
-                        baseline_config_id: baselineConfig?.id ?? null,
-                    })
-                    .eq("id", ingestion.id)
-                    .select()
-                    .single();
+                    if (isMultimodalFastPath && multimodalModality) {
+                        await ModelCapabilityService.learnVisionSuccess({
+                            supabase,
+                            userId,
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                            modality: multimodalModality,
+                        });
+                    }
-                if (isVlmFastPath) {
-                    const embeddingMeta = this.queueVlmSemanticEmbedding({
-                        ingestionId: ingestion.id,
-                        userId,
-                        filename,
-                        finalStatus,
-                        policyName,
-                        extracted: mergedExtracted,
-                        tags: autoTags,
-                        supabase,
-                        embedSettings,
-                    });
-                    finalTrace = [
-                        ...finalTrace,
-                        {
-                            timestamp: new Date().toISOString(),
-                            step: "Queued synthetic VLM embedding",
-                            details: embeddingMeta,
-                        }
-                    ];
-                    await supabase
-                        .from("ingestions")
-                        .update({ trace: finalTrace })
-                        .eq("id", ingestion.id);
+                    return updatedIngestion as Ingestion;
+                };
+                let terminalError: unknown = null;
+                try {
+                    return await runFastPathAttempt(extractionContent, "primary");
+                } catch (primaryErr) {
+                    terminalError = primaryErr;
                 }
-                if (isVlmFastPath) {
-                    await ModelCapabilityService.learnVisionSuccess({
-                        supabase,
-                        userId,
-                        provider: llmSettings.llm_provider ?? llmProvider,
-                        model: llmSettings.llm_model ?? llmModel,
+                if (isMultimodalFastPath && multimodalModality === "image") {
+                    const retryMarker = await this.maybeBuildImageRetryMarker({
+                        error: terminalError,
+                        filePath,
+                        filename,
+                        provider: resolvedProvider,
+                        model: resolvedModel,
+                        phase: "ingest",
                     });
+                    if (retryMarker) {
+                        this.bumpImageReencodeRetryMetric("attempted", {
+                            phase: "ingest",
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                            filename,
+                        });
+                        Actuator.logEvent(ingestion.id, userId, "info", "Processing", {
+                            action: "Retrying VLM with re-encoded image payload",
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                        }, supabase);
+                        try {
+                            const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
+                            this.bumpImageReencodeRetryMetric("succeeded", {
+                                phase: "ingest",
+                                provider: resolvedProvider,
+                                model: resolvedModel,
+                                filename,
+                            });
+                            Actuator.logEvent(ingestion.id, userId, "analysis", "Processing", {
+                                action: "VLM re-encoded image retry succeeded",
+                                provider: resolvedProvider,
+                                model: resolvedModel,
+                            }, supabase);
+                            return retryResult;
+                        } catch (retryErr) {
+                            this.bumpImageReencodeRetryMetric("failed", {
+                                phase: "ingest",
+                                provider: resolvedProvider,
+                                model: resolvedModel,
+                                filename,
+                            });
+                            Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
+                                action: "VLM re-encoded image retry failed",
+                                provider: resolvedProvider,
+                                model: resolvedModel,
+                                error: this.errorToMessage(retryErr),
+                            }, supabase);
+                            terminalError = retryErr;
+                        }
+                    }
                 }
-                return updatedIngestion as Ingestion;
-            } catch (err) {
-                const msg = err instanceof Error ? err.message : String(err);
-                if (isVlmFastPath) {
+                const msg = this.errorToMessage(terminalError);
+                if (isMultimodalFastPath && multimodalModality) {
                     const learnedState = await ModelCapabilityService.learnVisionFailure({
                         supabase,
                         userId,
-                        provider: llmProvider,
-                        model: llmModel,
-                        error: err,
+                        provider: resolvedProvider,
+                        model: resolvedModel,
+                        error: terminalError,
+                        modality: multimodalModality,
                     });
                     logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
                     Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
@@ -524,6 +729,16 @@ export class IngestionService {
                         .single();
                     return updatedIngestion as Ingestion;
                 }
+            } catch (err) {
+                const msg = this.errorToMessage(err);
+                Actuator.logEvent(ingestion.id, userId, "error", "Processing", { error: msg }, supabase);
+                const { data: updatedIngestion } = await supabase
+                    .from("ingestions")
+                    .update({ status: "error", error_message: msg })
+                    .eq("id", ingestion.id)
+                    .select()
+                    .single();
+                return updatedIngestion as Ingestion;
             }
         }
@@ -587,38 +802,38 @@ export class IngestionService {
         if (!filePath) throw new Error("No storage path found for this ingestion");
         let isFastPath = false;
-        let isVlmFastPath = false;
+        let isMultimodalFastPath = false;
+        let multimodalModality: VisionCapabilityModality | null = null;
         let extractionContent = "";
         const ext = filename.toLowerCase().split('.').pop() || '';
-        const fastExts = ['txt', 'md', 'csv', 'json'];
-        const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
         const { data: triageSettingsRow } = await supabase
             .from("user_settings")
             .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
             .eq("user_id", userId)
             .maybeSingle();
-        const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
-        const llmModel = visionResolution.model;
-        const llmProvider = visionResolution.provider;
+        const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
+        const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
+        const llmModel = imageResolution.model;
+        const llmProvider = imageResolution.provider;
-        if (fastExts.includes(ext)) {
+        if (this.FAST_EXTS.includes(ext as typeof this.FAST_EXTS[number])) {
             isFastPath = true;
             extractionContent = await fs.readFile(filePath, "utf-8");
-        } else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
+        } else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number]) && imageResolution.shouldAttempt) {
             try {
-                const buffer = await fs.readFile(filePath);
-                const base64 = buffer.toString('base64');
                 const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
-                extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
+                const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
+                extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
                 isFastPath = true;
-                isVlmFastPath = true;
+                isMultimodalFastPath = true;
+                multimodalModality = "image";
                 logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
                 Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
             } catch (err) {
                 logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
             }
-        } else if (imageExts.includes(ext)) {
+        } else if (this.IMAGE_EXTS.includes(ext as typeof this.IMAGE_EXTS[number])) {
             logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
             Actuator.logEvent(ingestionId, userId, "info", "Triage", {
                 action: "VLM skipped (model marked unsupported)",
@@ -634,10 +849,32 @@ export class IngestionService {
                 if (isPdfTextExtractable(pdfData)) {
                     isFastPath = true;
                     extractionContent = pdfData.text;
+                } else if (pdfResolution.shouldAttempt) {
+                    // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
+                    const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
+                    extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
+                    isFastPath = true;
+                    isMultimodalFastPath = true;
+                    multimodalModality = "pdf";
+                    logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
+                    Actuator.logEvent(ingestionId, userId, "info", "Triage", {
+                        action: "VLM Fast Path selected",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                    }, supabase);
+                } else {
+                    logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
+                    Actuator.logEvent(ingestionId, userId, "info", "Triage", {
+                        action: "VLM skipped (model marked unsupported)",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                        provider: llmProvider
+                    }, supabase);
                 }
-                // eslint-disable-next-line @typescript-eslint/no-unused-vars
             } catch (err) {
-                // ignore
+                logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
             }
         }
@@ -655,53 +892,60 @@ export class IngestionService {
                 embedding_provider: processingSettingsRow.data?.embedding_provider ?? undefined,
                 embedding_model: processingSettingsRow.data?.embedding_model ?? undefined,
             };
-            const doc = { filePath, text: extractionContent, ingestionId, userId, supabase };
-            // eslint-disable-next-line @typescript-eslint/no-explicit-any
-            const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
+            const resolvedProvider = llmSettings.llm_provider ?? llmProvider;
+            const resolvedModel = llmSettings.llm_model ?? llmModel;
+            const runFastPathAttempt = async (
+                attemptContent: string,
+                attemptType: "primary" | "reencoded_image_retry"
+            ): Promise<boolean> => {
+                const doc = { filePath, text: attemptContent, ingestionId, userId, supabase };
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                const baselineTrace: Array<{ timestamp: string; step: string; details?: any }> = [];
-            // Fire and forget Semantic Embedding Storage for re-runs
-            RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
-                logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
-            });
+                // Fire and forget Semantic Embedding Storage for re-runs
+                RAGService.chunkAndEmbed(ingestionId, userId, doc.text, supabase, embedSettings).catch(err => {
+                    logger.error(`RAG embedding failed during rerun for ${ingestionId}`, err);
+                });
-            baselineTrace.push({
-                timestamp: new Date().toISOString(),
-                step: "LLM request (baseline extraction)",
-                details: {
-                    provider: llmSettings.llm_provider ?? llmProvider,
-                    model: llmSettings.llm_model ?? llmModel,
-                    mode: isVlmFastPath ? "vision" : "text",
-                }
-            });
+                baselineTrace.push({
+                    timestamp: new Date().toISOString(),
+                    step: "LLM request (baseline extraction)",
+                    details: {
+                        provider: resolvedProvider,
+                        model: resolvedModel,
+                        mode: isMultimodalFastPath
+                            ? `vision:${multimodalModality ?? "image"}${attemptType === "reencoded_image_retry" ? ":reencoded" : ""}`
+                            : "text",
+                    }
+                });
-            const baselineResult = await PolicyEngine.extractBaseline(
-                doc,
-                { context: baselineConfig?.context, fields: baselineConfig?.fields },
-                llmSettings
-            );
-            const baselineEntities = baselineResult.entities;
-            const autoTags = baselineResult.tags;
-            baselineTrace.push({
-                timestamp: new Date().toISOString(),
-                step: "LLM response (baseline extraction)",
-                details: {
-                    entities_count: Object.keys(baselineEntities).length,
-                    uncertain_count: baselineResult.uncertain_fields.length,
-                    tags_count: autoTags.length,
-                }
-            });
+                const baselineResult = await PolicyEngine.extractBaseline(
+                    doc,
+                    { context: baselineConfig?.context, fields: baselineConfig?.fields },
+                    llmSettings
+                );
+                const baselineEntities = baselineResult.entities;
+                const autoTags = baselineResult.tags;
+                baselineTrace.push({
+                    timestamp: new Date().toISOString(),
+                    step: "LLM response (baseline extraction)",
+                    details: {
+                        entities_count: Object.keys(baselineEntities).length,
+                        uncertain_count: baselineResult.uncertain_fields.length,
+                        tags_count: autoTags.length,
+                    }
+                });
-            const entityLines = Object.entries(baselineEntities)
-                .filter(([, v]) => v != null)
-                .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
-            const enrichedDoc = entityLines.length > 0
-                ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
-                : doc;
+                const entityLines = Object.entries(baselineEntities)
+                    .filter(([, v]) => v != null)
+                    .map(([k, v]) => `${k}: ${Array.isArray(v) ? (v as unknown[]).join(", ") : String(v)}`);
+                const enrichedDoc = entityLines.length > 0
+                    ? { ...doc, text: doc.text + "\n\n[Extracted fields]\n" + entityLines.join("\n") }
+                    : doc;
-            let finalStatus = "no_match";
-            let result: import("./PolicyEngine.js").ProcessingResult;
-            let policyName;
-            try {
+                let finalStatus = "no_match";
+                let result: import("./PolicyEngine.js").ProcessingResult;
                 const forcedPolicyId = opts.forcedPolicyId?.trim();
                 const activePolicies = forcedPolicyId
                     ? userPolicies.filter((policy) => policy.metadata.id === forcedPolicyId)
@@ -726,7 +970,7 @@ export class IngestionService {
                     result = await PolicyEngine.process(enrichedDoc, llmSettings, baselineEntities);
                 }
-                policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
+                const policyName = result.matchedPolicy ? activePolicies.find((p) => p.metadata.id === result.matchedPolicy)?.metadata.name : undefined;
                 finalStatus = result.status === "fallback" ? "no_match" : result.status;
                 const mergedExtracted = { ...baselineEntities, ...result.extractedData };
@@ -754,7 +998,7 @@ export class IngestionService {
                     })
                     .eq("id", ingestionId);
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     const embeddingMeta = this.queueVlmSemanticEmbedding({
                         ingestionId,
                         userId,
@@ -763,6 +1007,7 @@ export class IngestionService {
                         policyName,
                         extracted: mergedExtracted,
                         tags: mergedTags,
+                        modality: multimodalModality,
                         supabase,
                         embedSettings,
                     });
@@ -780,37 +1025,99 @@ export class IngestionService {
                         .eq("id", ingestionId);
                 }
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     await ModelCapabilityService.learnVisionSuccess({
                         supabase,
                         userId,
-                        provider: llmSettings.llm_provider ?? llmProvider,
-                        model: llmSettings.llm_model ?? llmModel,
+                        provider: resolvedProvider,
+                        model: resolvedModel,
+                        modality: multimodalModality,
                     });
                 }
                 return finalStatus === "matched";
-            } catch (err: unknown) {
-                const msg = err instanceof Error ? err.message : String(err);
-                if (isVlmFastPath) {
-                    const learnedState = await ModelCapabilityService.learnVisionFailure({
-                        supabase,
-                        userId,
-                        provider: llmProvider,
-                        model: llmModel,
-                        error: err,
+            };
+            let terminalError: unknown = null;
+            try {
+                return await runFastPathAttempt(extractionContent, "primary");
+            } catch (primaryErr) {
+                terminalError = primaryErr;
+            }
+            if (isMultimodalFastPath && multimodalModality === "image") {
+                const retryMarker = await this.maybeBuildImageRetryMarker({
+                    error: terminalError,
+                    filePath,
+                    filename,
+                    provider: resolvedProvider,
+                    model: resolvedModel,
+                    phase: "rerun",
+                });
+                if (retryMarker) {
+                    this.bumpImageReencodeRetryMetric("attempted", {
+                        phase: "rerun",
+                        provider: resolvedProvider,
+                        model: resolvedModel,
+                        filename,
                     });
-                    logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
-                    Actuator.logEvent(ingestionId, userId, "error", "Processing", {
-                        action: "VLM Failed, Fallback to Heavy",
-                        error: msg,
-                        learned_state: learnedState,
+                    Actuator.logEvent(ingestionId, userId, "info", "Processing", {
+                        action: "Retrying VLM with re-encoded image payload",
+                        provider: resolvedProvider,
+                        model: resolvedModel,
                     }, supabase);
-                    isFastPath = false; // Trigger heavy path fallthrough
-                } else {
-                    throw err; // Re-throw to caller
+                    try {
+                        const retryResult = await runFastPathAttempt(retryMarker, "reencoded_image_retry");
+                        this.bumpImageReencodeRetryMetric("succeeded", {
+                            phase: "rerun",
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                            filename,
+                        });
+                        Actuator.logEvent(ingestionId, userId, "analysis", "Processing", {
+                            action: "VLM re-encoded image retry succeeded",
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                        }, supabase);
+                        return retryResult;
+                    } catch (retryErr) {
+                        this.bumpImageReencodeRetryMetric("failed", {
+                            phase: "rerun",
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                            filename,
+                        });
+                        Actuator.logEvent(ingestionId, userId, "error", "Processing", {
+                            action: "VLM re-encoded image retry failed",
+                            provider: resolvedProvider,
+                            model: resolvedModel,
+                            error: this.errorToMessage(retryErr),
+                        }, supabase);
+                        terminalError = retryErr;
+                    }
                 }
             }
+            const msg = this.errorToMessage(terminalError);
+            if (isMultimodalFastPath && multimodalModality) {
+                const learnedState = await ModelCapabilityService.learnVisionFailure({
+                    supabase,
+                    userId,
+                    provider: resolvedProvider,
+                    model: resolvedModel,
+                    error: terminalError,
+                    modality: multimodalModality,
+                });
+                logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
+                Actuator.logEvent(ingestionId, userId, "error", "Processing", {
+                    action: "VLM Failed, Fallback to Heavy",
+                    error: msg,
+                    learned_state: learnedState,
+                }, supabase);
+                isFastPath = false; // Trigger heavy path fallthrough
+            } else {
+                throw terminalError instanceof Error ? terminalError : new Error(msg); // Re-throw to caller
+            }
         }
         // Re-delegate to rtx_activities