npm - @realtimex/folio - Versions diffs - 0.1.10 → 0.1.12 - Mend

@realtimex/folio 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/api/src/services/IngestionService.ts +111 -47
package/api/src/services/ModelCapabilityService.ts +666 -88
package/api/src/services/PolicyEngine.ts +48 -22
package/api/src/services/RAGService.ts +2 -2
package/dist/api/src/services/IngestionService.js +103 -41
package/dist/api/src/services/ModelCapabilityService.js +521 -77
package/dist/api/src/services/PolicyEngine.js +38 -22
package/dist/api/src/services/RAGService.js +2 -2
package/dist/assets/{index-_NgwdVu8.js → index-tVGLBfz6.js} +37 -37
package/dist/index.html +1 -1
package/package.json +1 -1

package/api/src/services/PolicyEngine.ts CHANGED Viewed

@@ -49,21 +49,42 @@ type ProcessWithPoliciesOptions = {
     allowLearnedFallback?: boolean;
 };
+type VlmPayload = {
+    kind: "image" | "pdf";
+    dataUrl: string;
+    supplementalText: string;
+};
 /**
- * Helper to build LLM message content. If the text contains the VLM marker
- * generated by IngestionService, it casts the payload to an OpenAI-compatible
- * Vision array structure so the underlying SDK bridge can transmit the image.
+ * Helper to build LLM message content. If the text contains a VLM marker
+ * generated by IngestionService, it casts the payload to multimodal blocks.
  */
-function extractVlmPayload(text: string): { imageDataUrl: string; supplementalText: string } | null {
-    const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
-    if (!marker) return null;
+function extractVlmPayload(text: string): VlmPayload | null {
+    const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
+    if (imageMarker) {
+        const markerText = imageMarker[0];
+        return {
+            kind: "image",
+            dataUrl: imageMarker[1],
+            supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
+        };
+    }
-    const markerText = marker[0];
-    const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
-    return {
-        imageDataUrl: marker[1],
-        supplementalText,
-    };
+    const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
+    if (pdfMarker) {
+        const markerText = pdfMarker[0];
+        return {
+            kind: "pdf",
+            dataUrl: pdfMarker[1],
+            supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
+        };
+    }
+    return null;
+}
+function hasVlmPayload(text: string): boolean {
+    return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
 }
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -73,10 +94,13 @@ function buildMessageContent(prompt: string, text: string, textFirst = false): a
         const textPrompt = vlmPayload.supplementalText
             ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
             : prompt;
-        return [
-            { type: "text", text: textPrompt },
-            { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
-        ];
+        // `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
+        // that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
+        const assetBlock = vlmPayload.kind === "pdf"
+            ? { type: "input_file", file_url: vlmPayload.dataUrl }
+            : { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
+        return [{ type: "text", text: textPrompt }, assetBlock];
     }
     // Standard text payload
     return textFirst
@@ -444,7 +468,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
                     model,
                     condition_type: condition.type,
                     prompt_preview: prompt.slice(0, 180),
-                    vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
+                    vision_payload: hasVlmPayload(doc.text)
                 }
             });
             Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
@@ -453,7 +477,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
                 model,
                 condition_type: condition.type,
                 prompt_preview: prompt.slice(0, 180),
-                vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
+                vision_payload: hasVlmPayload(doc.text)
             }, doc.supabase);
             const result = await sdk.llm.chat(
                 [
@@ -563,7 +587,7 @@ Fields to extract:
 ${fieldDescriptions}`;
     try {
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload
             ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
             : prompt;
@@ -722,7 +746,7 @@ Rules:
             known_fields_count: Object.keys(contractData).length,
         }, doc.supabase);
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload
             ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
             : prompt;
@@ -1004,7 +1028,9 @@ export class PolicyEngine {
         const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
         if (allowLearnedFallback && doc.supabase && policies.length > 0) {
             try {
-                const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
+                const learningText = doc.text
+                    .replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
+                    .replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
                 const learned = await PolicyLearningService.resolveLearnedCandidate({
                     supabase: doc.supabase,
                     userId: doc.userId,
@@ -1118,7 +1144,7 @@ export class PolicyEngine {
             `No markdown, no explanation — only the JSON object.`;
         const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
         try {

package/api/src/services/RAGService.ts CHANGED Viewed

@@ -135,8 +135,8 @@ export class RAGService {
         supabase: SupabaseClient,
         settings?: EmbeddingSettings
     ): Promise<void> {
-        if (rawText.startsWith("[VLM_IMAGE_DATA:")) {
-            logger.info(`Skipping chunking and embedding for VLM base64 image data (Ingestion: ${ingestionId})`);
+        if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
+            logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
             return;
         }

package/dist/api/src/services/IngestionService.js CHANGED Viewed

@@ -51,6 +51,8 @@ function isPdfTextExtractable(pdfData) {
     return true;
 }
 export class IngestionService {
+    static FAST_EXTS = ["txt", "md", "csv", "json"];
+    static IMAGE_EXTS = ["png", "jpg", "jpeg", "webp"];
     static NON_IDEMPOTENT_ACTION_TYPES = new Set([
         "append_to_google_sheet",
         "webhook",
@@ -85,10 +87,10 @@ export class IngestionService {
         return String(value);
     }
     static buildVlmSemanticText(opts) {
-        const { filename, finalStatus, policyName, extracted, tags } = opts;
+        const { filename, finalStatus, policyName, extracted, tags, modality } = opts;
         const lines = [
             `Document filename: ${filename}`,
-            "Document source: VLM image extraction",
+            `Document source: VLM ${modality} extraction`,
             `Processing status: ${finalStatus}`,
         ];
         if (policyName) {
@@ -134,6 +136,7 @@ export class IngestionService {
             policyName: opts.policyName,
             extracted: opts.extracted,
             tags: opts.tags,
+            modality: opts.modality,
         });
         const details = {
             synthetic_chars: syntheticText.length,
@@ -160,6 +163,15 @@ export class IngestionService {
         });
         return details;
     }
+    static buildVlmPayloadMarker(modality, dataUrl) {
+        const prefix = modality === "pdf" ? "VLM_PDF_DATA" : "VLM_IMAGE_DATA";
+        return `[${prefix}:${dataUrl}]`;
+    }
+    static async fileToDataUrl(filePath, mimeType) {
+        const buffer = await fs.readFile(filePath);
+        const base64 = buffer.toString("base64");
+        return `data:${mimeType};base64,${base64}`;
+    }
     /**
      * Ingest a document using Hybrid Routing Architecture.
      */
@@ -217,32 +229,31 @@ export class IngestionService {
         Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Ingestion started", source, filename, fileSize, is_high_intent: true }, supabase);
         // 2. Document Triage
         let isFastPath = false;
-        let isVlmFastPath = false;
+        let isMultimodalFastPath = false;
+        let multimodalModality = null;
         let extractionContent = content;
         const ext = filename.toLowerCase().split('.').pop() || '';
-        const fastExts = ['txt', 'md', 'csv', 'json'];
-        const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
         // Pre-fetch settings to decide whether we should attempt VLM.
         const { data: triageSettingsRow } = await supabase
             .from("user_settings")
             .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
             .eq("user_id", userId)
             .maybeSingle();
-        const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
-        const llmModel = visionResolution.model;
-        const llmProvider = visionResolution.provider;
-        if (fastExts.includes(ext)) {
+        const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
+        const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
+        const llmModel = imageResolution.model;
+        const llmProvider = imageResolution.provider;
+        if (this.FAST_EXTS.includes(ext)) {
             isFastPath = true;
         }
-        else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
+        else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
             try {
-                const buffer = await fs.readFile(filePath);
-                const base64 = buffer.toString('base64');
                 const mimeTypeActual = mimeType || `image/${ext === 'jpg' ? 'jpeg' : ext}`;
-                // Special marker for PolicyEngine
-                extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
+                const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
+                extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
                 isFastPath = true;
-                isVlmFastPath = true;
+                isMultimodalFastPath = true;
+                multimodalModality = "image";
                 logger.info(`Smart Triage: Image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
                 Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
             }
@@ -250,7 +261,7 @@ export class IngestionService {
                 logger.warn(`Failed to read VLM image ${filename}. Routing to Heavy Path.`, { err });
             }
         }
-        else if (imageExts.includes(ext)) {
+        else if (this.IMAGE_EXTS.includes(ext)) {
             logger.info(`Smart Triage: Image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
             Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
                 action: "VLM skipped (model marked unsupported)",
@@ -270,9 +281,30 @@ export class IngestionService {
                     logger.info(`Smart Triage: PDF ${filename} passed text quality check (${pdfData.pages.filter(p => p.text.trim().length > 30).length}/${pdfData.total} pages with text). Routing to Fast Path.`);
                     Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage passed", type: "pdf", fast_path: true }, supabase);
                 }
+                else if (pdfResolution.shouldAttempt) {
+                    // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
+                    const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
+                    extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
+                    isFastPath = true;
+                    isMultimodalFastPath = true;
+                    multimodalModality = "pdf";
+                    logger.info(`Smart Triage: PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
+                    Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
+                        action: "VLM Fast Path selected",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                    }, supabase);
+                }
                 else {
-                    logger.info(`Smart Triage: PDF ${filename} failed text quality check. Routing to Heavy Path.`);
-                    Actuator.logEvent(ingestion.id, userId, "info", "Triage", { action: "Smart Triage failed", type: "pdf", fast_path: false }, supabase);
+                    logger.info(`Smart Triage: PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
+                    Actuator.logEvent(ingestion.id, userId, "info", "Triage", {
+                        action: "VLM skipped (model marked unsupported)",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                        provider: llmProvider,
+                    }, supabase);
                 }
             }
             catch (err) {
@@ -310,7 +342,7 @@ export class IngestionService {
                     details: {
                         provider: llmSettings.llm_provider ?? llmProvider,
                         model: llmSettings.llm_model ?? llmModel,
-                        mode: isVlmFastPath ? "vision" : "text",
+                        mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
                     }
                 });
                 const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
@@ -363,7 +395,7 @@ export class IngestionService {
                     .eq("id", ingestion.id)
                     .select()
                     .single();
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     const embeddingMeta = this.queueVlmSemanticEmbedding({
                         ingestionId: ingestion.id,
                         userId,
@@ -372,6 +404,7 @@ export class IngestionService {
                         policyName,
                         extracted: mergedExtracted,
                         tags: autoTags,
+                        modality: multimodalModality,
                         supabase,
                         embedSettings,
                     });
@@ -388,25 +421,27 @@ export class IngestionService {
                         .update({ trace: finalTrace })
                         .eq("id", ingestion.id);
                 }
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     await ModelCapabilityService.learnVisionSuccess({
                         supabase,
                         userId,
                         provider: llmSettings.llm_provider ?? llmProvider,
                         model: llmSettings.llm_model ?? llmModel,
+                        modality: multimodalModality,
                     });
                 }
                 return updatedIngestion;
             }
             catch (err) {
                 const msg = err instanceof Error ? err.message : String(err);
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     const learnedState = await ModelCapabilityService.learnVisionFailure({
                         supabase,
                         userId,
                         provider: llmProvider,
                         model: llmModel,
                         error: err,
+                        modality: multimodalModality,
                     });
                     logger.warn(`VLM extraction failed for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
                     Actuator.logEvent(ingestion.id, userId, "error", "Processing", {
@@ -477,31 +512,31 @@ export class IngestionService {
         if (!filePath)
             throw new Error("No storage path found for this ingestion");
         let isFastPath = false;
-        let isVlmFastPath = false;
+        let isMultimodalFastPath = false;
+        let multimodalModality = null;
         let extractionContent = "";
         const ext = filename.toLowerCase().split('.').pop() || '';
-        const fastExts = ['txt', 'md', 'csv', 'json'];
-        const imageExts = ['png', 'jpg', 'jpeg', 'webp'];
         const { data: triageSettingsRow } = await supabase
             .from("user_settings")
             .select("llm_provider, llm_model, embedding_provider, embedding_model, vision_model_capabilities")
             .eq("user_id", userId)
             .maybeSingle();
-        const visionResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow);
-        const llmModel = visionResolution.model;
-        const llmProvider = visionResolution.provider;
-        if (fastExts.includes(ext)) {
+        const imageResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "image");
+        const pdfResolution = ModelCapabilityService.resolveVisionSupport(triageSettingsRow, "pdf");
+        const llmModel = imageResolution.model;
+        const llmProvider = imageResolution.provider;
+        if (this.FAST_EXTS.includes(ext)) {
             isFastPath = true;
             extractionContent = await fs.readFile(filePath, "utf-8");
         }
-        else if (imageExts.includes(ext) && visionResolution.shouldAttempt) {
+        else if (this.IMAGE_EXTS.includes(ext) && imageResolution.shouldAttempt) {
             try {
-                const buffer = await fs.readFile(filePath);
-                const base64 = buffer.toString('base64');
                 const mimeTypeActual = `image/${ext === 'jpg' ? 'jpeg' : ext}`;
-                extractionContent = `[VLM_IMAGE_DATA:data:${mimeTypeActual};base64,${base64}]`;
+                const dataUrl = await this.fileToDataUrl(filePath, mimeTypeActual);
+                extractionContent = this.buildVlmPayloadMarker("image", dataUrl);
                 isFastPath = true;
-                isVlmFastPath = true;
+                isMultimodalFastPath = true;
+                multimodalModality = "image";
                 logger.info(`Smart Triage: Re-run image ${filename} routed to Fast Path using native VLM (${llmModel}).`);
                 Actuator.logEvent(ingestionId, userId, "info", "Triage", { action: "VLM Fast Path selected", type: ext, model: llmModel }, supabase);
             }
@@ -509,7 +544,7 @@ export class IngestionService {
                 logger.warn(`Failed to read VLM image ${filename} during rerun. Routing to Heavy Path.`, { err });
             }
         }
-        else if (imageExts.includes(ext)) {
+        else if (this.IMAGE_EXTS.includes(ext)) {
             logger.info(`Smart Triage: Re-run image ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked vision-unsupported.`);
             Actuator.logEvent(ingestionId, userId, "info", "Triage", {
                 action: "VLM skipped (model marked unsupported)",
@@ -527,10 +562,34 @@ export class IngestionService {
                     isFastPath = true;
                     extractionContent = pdfData.text;
                 }
-                // eslint-disable-next-line @typescript-eslint/no-unused-vars
+                else if (pdfResolution.shouldAttempt) {
+                    // Reuse the already-loaded parse buffer; avoid a second readFile in fileToDataUrl.
+                    const dataUrl = `data:application/pdf;base64,${buffer.toString("base64")}`;
+                    extractionContent = this.buildVlmPayloadMarker("pdf", dataUrl);
+                    isFastPath = true;
+                    isMultimodalFastPath = true;
+                    multimodalModality = "pdf";
+                    logger.info(`Smart Triage: Re-run PDF ${filename} routed to multimodal Fast Path using native VLM (${llmModel}).`);
+                    Actuator.logEvent(ingestionId, userId, "info", "Triage", {
+                        action: "VLM Fast Path selected",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                    }, supabase);
+                }
+                else {
+                    logger.info(`Smart Triage: Re-run PDF ${filename} kept on Heavy Path because ${llmProvider}/${llmModel} is marked PDF-unsupported.`);
+                    Actuator.logEvent(ingestionId, userId, "info", "Triage", {
+                        action: "VLM skipped (model marked unsupported)",
+                        type: "pdf",
+                        modality: "pdf",
+                        model: llmModel,
+                        provider: llmProvider
+                    }, supabase);
+                }
             }
             catch (err) {
-                // ignore
+                logger.warn(`Failed to parse PDF ${filename} during rerun. Routing to Heavy Path.`, { err });
             }
         }
         if (isFastPath) {
@@ -560,7 +619,7 @@ export class IngestionService {
                 details: {
                     provider: llmSettings.llm_provider ?? llmProvider,
                     model: llmSettings.llm_model ?? llmModel,
-                    mode: isVlmFastPath ? "vision" : "text",
+                    mode: isMultimodalFastPath ? `vision:${multimodalModality ?? "image"}` : "text",
                 }
             });
             const baselineResult = await PolicyEngine.extractBaseline(doc, { context: baselineConfig?.context, fields: baselineConfig?.fields }, llmSettings);
@@ -626,7 +685,7 @@ export class IngestionService {
                     baseline_config_id: baselineConfig?.id ?? null,
                 })
                     .eq("id", ingestionId);
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     const embeddingMeta = this.queueVlmSemanticEmbedding({
                         ingestionId,
                         userId,
@@ -635,6 +694,7 @@ export class IngestionService {
                         policyName,
                         extracted: mergedExtracted,
                         tags: mergedTags,
+                        modality: multimodalModality,
                         supabase,
                         embedSettings,
                     });
@@ -651,25 +711,27 @@ export class IngestionService {
                         .update({ trace: rerunTrace })
                         .eq("id", ingestionId);
                 }
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     await ModelCapabilityService.learnVisionSuccess({
                         supabase,
                         userId,
                         provider: llmSettings.llm_provider ?? llmProvider,
                         model: llmSettings.llm_model ?? llmModel,
+                        modality: multimodalModality,
                     });
                 }
                 return finalStatus === "matched";
             }
             catch (err) {
                 const msg = err instanceof Error ? err.message : String(err);
-                if (isVlmFastPath) {
+                if (isMultimodalFastPath && multimodalModality) {
                     const learnedState = await ModelCapabilityService.learnVisionFailure({
                         supabase,
                         userId,
                         provider: llmProvider,
                         model: llmModel,
                         error: err,
+                        modality: multimodalModality,
                     });
                     logger.warn(`VLM extraction failed during rerun for ${filename}. Falling back to Heavy Path. Error: ${msg}`);
                     Actuator.logEvent(ingestionId, userId, "error", "Processing", {