npm - @realtimex/folio - Versions diffs - 0.1.10 → 0.1.12 - Mend

@realtimex/folio 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/api/src/services/IngestionService.ts +111 -47
package/api/src/services/ModelCapabilityService.ts +666 -88
package/api/src/services/PolicyEngine.ts +48 -22
package/api/src/services/RAGService.ts +2 -2
package/dist/api/src/services/IngestionService.js +103 -41
package/dist/api/src/services/ModelCapabilityService.js +521 -77
package/dist/api/src/services/PolicyEngine.js +38 -22
package/dist/api/src/services/RAGService.js +2 -2
package/dist/assets/{index-_NgwdVu8.js → index-tVGLBfz6.js} +37 -37
package/dist/index.html +1 -1
package/package.json +1 -1

package/dist/api/src/services/PolicyEngine.js CHANGED Viewed

@@ -8,20 +8,32 @@ import { extractLlmResponse, normalizeLlmContent, previewLlmText } from "../util
 import { DEFAULT_BASELINE_FIELDS } from "./BaselineConfigService.js";
 const logger = createLogger("PolicyEngine");
 /**
- * Helper to build LLM message content. If the text contains the VLM marker
- * generated by IngestionService, it casts the payload to an OpenAI-compatible
- * Vision array structure so the underlying SDK bridge can transmit the image.
+ * Helper to build LLM message content. If the text contains a VLM marker
+ * generated by IngestionService, it casts the payload to multimodal blocks.
  */
 function extractVlmPayload(text) {
-    const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
-    if (!marker)
-        return null;
-    const markerText = marker[0];
-    const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
-    return {
-        imageDataUrl: marker[1],
-        supplementalText,
-    };
+    const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
+    if (imageMarker) {
+        const markerText = imageMarker[0];
+        return {
+            kind: "image",
+            dataUrl: imageMarker[1],
+            supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
+        };
+    }
+    const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
+    if (pdfMarker) {
+        const markerText = pdfMarker[0];
+        return {
+            kind: "pdf",
+            dataUrl: pdfMarker[1],
+            supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
+        };
+    }
+    return null;
+}
+function hasVlmPayload(text) {
+    return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
 }
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 function buildMessageContent(prompt, text, textFirst = false) {
@@ -30,10 +42,12 @@ function buildMessageContent(prompt, text, textFirst = false) {
         const textPrompt = vlmPayload.supplementalText
             ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
             : prompt;
-        return [
-            { type: "text", text: textPrompt },
-            { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
-        ];
+        // `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
+        // that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
+        const assetBlock = vlmPayload.kind === "pdf"
+            ? { type: "input_file", file_url: vlmPayload.dataUrl }
+            : { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
+        return [{ type: "text", text: textPrompt }, assetBlock];
     }
     // Standard text payload
     return textFirst
@@ -340,7 +354,7 @@ async function evaluateCondition(condition, doc, trace, settings = {}) {
                     model,
                     condition_type: condition.type,
                     prompt_preview: prompt.slice(0, 180),
-                    vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
+                    vision_payload: hasVlmPayload(doc.text)
                 }
             });
             Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
@@ -349,7 +363,7 @@ async function evaluateCondition(condition, doc, trace, settings = {}) {
                 model,
                 condition_type: condition.type,
                 prompt_preview: prompt.slice(0, 180),
-                vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
+                vision_payload: hasVlmPayload(doc.text)
             }, doc.supabase);
             const result = await sdk.llm.chat([
                 {
@@ -443,7 +457,7 @@ async function extractData(fields, doc, trace, settings = {}) {
 Fields to extract:
 ${fieldDescriptions}`;
     try {
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload
             ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
             : prompt;
@@ -593,7 +607,7 @@ Rules:
             model,
             known_fields_count: Object.keys(contractData).length,
         }, doc.supabase);
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload
             ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
             : prompt;
@@ -821,7 +835,9 @@ export class PolicyEngine {
         const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
         if (allowLearnedFallback && doc.supabase && policies.length > 0) {
             try {
-                const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
+                const learningText = doc.text
+                    .replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
+                    .replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
                 const learned = await PolicyLearningService.resolveLearnedCandidate({
                     supabase: doc.supabase,
                     userId: doc.userId,
@@ -923,7 +939,7 @@ export class PolicyEngine {
             `Include the calendar year if clearly present. Prefer hyphenated multi-word tags.\n` +
             `No markdown, no explanation — only the JSON object.`;
         const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
         try {
             Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Baseline Extraction", {

package/dist/api/src/services/RAGService.js CHANGED Viewed

@@ -90,8 +90,8 @@ export class RAGService {
      * Process an ingested document's raw text: chunk it, embed it, and store in DB.
      */
     static async chunkAndEmbed(ingestionId, userId, rawText, supabase, settings) {
-        if (rawText.startsWith("[VLM_IMAGE_DATA:")) {
-            logger.info(`Skipping chunking and embedding for VLM base64 image data (Ingestion: ${ingestionId})`);
+        if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
+            logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
             return;
         }
         const chunks = this.chunkText(rawText);