npm - @realtimex/folio - Versions diffs - 0.1.11 → 0.1.13 - Mend

@realtimex/folio 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.env.example +1 -0
package/api/src/services/IngestionService.ts +513 -206
package/api/src/services/ModelCapabilityService.ts +213 -56
package/api/src/services/PolicyEngine.ts +48 -22
package/api/src/services/RAGService.ts +2 -2
package/dist/api/src/services/IngestionService.js +467 -194
package/dist/api/src/services/ModelCapabilityService.js +165 -54
package/dist/api/src/services/PolicyEngine.js +38 -22
package/dist/api/src/services/RAGService.js +2 -2
package/dist/assets/{index-nxHX9No5.js → index-CLpalZvv.js} +37 -37
package/dist/index.html +1 -1
package/package.json +1 -1

package/dist/api/src/services/ModelCapabilityService.js CHANGED Viewed

@@ -8,20 +8,21 @@ export class ModelCapabilityService {
     static UNSUPPORTED_CONFIRMATION_WINDOW_MS = 24 * 60 * 60 * 1000;
     static UNSUPPORTED_CONFIRMATION_FAILURES = 2;
     static UNSUPPORTED_SCORE_THRESHOLD = 3;
-    static resolveVisionSupport(settingsRow) {
+    static resolveVisionSupport(settingsRow, modality = "image") {
         const provider = (settingsRow?.llm_provider || SDKService.DEFAULT_LLM_PROVIDER).trim();
         const model = (settingsRow?.llm_model || SDKService.DEFAULT_LLM_MODEL).trim();
-        const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model);
+        const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model, modality);
         return {
             provider,
             model,
+            modality,
             state,
             shouldAttempt: state !== "unsupported",
         };
     }
-    static getVisionState(rawMap, provider, model) {
+    static getVisionState(rawMap, provider, model, modality = "image") {
         const map = this.normalizeCapabilityMap(rawMap);
-        const entry = map[this.capabilityKey(provider, model)];
+        const entry = map[this.capabilityKey(provider, model, modality)];
         if (!entry || this.isExpired(entry))
             return "unknown";
         if (entry.state === "pending_unsupported")
@@ -31,18 +32,21 @@ export class ModelCapabilityService {
     static async learnVisionSuccess(opts) {
         await this.writeCapability({
             ...opts,
+            modality: opts.modality ?? "image",
             state: "supported",
             reason: "vision_request_succeeded",
             ttlMs: this.SUPPORTED_TTL_MS,
         });
     }
     static async learnVisionFailure(opts) {
+        const modality = opts.modality ?? "image";
         const classification = this.classifyVisionFailure({
             error: opts.error,
             provider: opts.provider,
+            modality,
         });
         if (!classification.isCapabilityError) {
-            logger.info(`Vision failure for ${opts.provider}/${opts.model} treated as non-capability; leaving capability unknown`, {
+            logger.info(`Vision failure for ${opts.provider}/${opts.model} (${modality}) treated as non-capability; leaving capability unknown`, {
                 reason: classification.reason,
                 score: classification.score,
                 evidence: classification.evidence,
@@ -53,7 +57,7 @@ export class ModelCapabilityService {
         if (!map) {
             return "unknown";
         }
-        const key = this.capabilityKey(opts.provider, opts.model);
+        const key = this.capabilityKey(opts.provider, opts.model, modality);
         const now = new Date();
         const failureCount = this.nextFailureCount(map[key], now.getTime());
         if (failureCount < this.UNSUPPORTED_CONFIRMATION_FAILURES) {
@@ -62,6 +66,7 @@ export class ModelCapabilityService {
                 userId: opts.userId,
                 provider: opts.provider,
                 model: opts.model,
+                modality,
                 state: "pending_unsupported",
                 reason: "capability_signal_pending_confirmation",
                 ttlMs: this.PENDING_UNSUPPORTED_TTL_MS,
@@ -77,6 +82,7 @@ export class ModelCapabilityService {
             userId: opts.userId,
             provider: opts.provider,
             model: opts.model,
+            modality,
             state: "unsupported",
             reason: classification.reason,
             ttlMs: this.UNSUPPORTED_TTL_MS,
@@ -113,13 +119,23 @@ export class ModelCapabilityService {
         return true;
     }
     static async writeCapability(opts) {
-        const { supabase, userId, provider, model, state, reason, ttlMs, preloadedMap, failureCount, lastFailureAt, evidence, } = opts;
+        const { supabase, userId, provider, model, modality, state, reason, ttlMs, preloadedMap, failureCount, lastFailureAt, evidence, } = opts;
         const map = preloadedMap ?? (await this.readCapabilityMap(supabase, userId));
         if (!map) {
             return;
         }
         const now = new Date();
-        const key = this.capabilityKey(provider, model);
+        const key = this.capabilityKey(provider, model, modality);
+        const existingEntry = map[key];
+        if (this.isManualOverrideActive(existingEntry) && reason !== "manual_override") {
+            logger.info(`Skipping auto capability update for ${provider}/${model} (${modality}) because manual override is active`, {
+                requestedState: state,
+                requestedReason: reason,
+                currentState: existingEntry?.state,
+                currentReason: existingEntry?.reason,
+            });
+            return;
+        }
         const nextEntry = {
             state,
             learned_at: now.toISOString(),
@@ -140,7 +156,7 @@ export class ModelCapabilityService {
         if (!persisted) {
             return;
         }
-        logger.info(`Updated model capability for ${provider}/${model}: ${state}`, {
+        logger.info(`Updated model capability for ${provider}/${model} (${modality}): ${state}`, {
             reason,
             ttlMs,
             failureCount,
@@ -191,15 +207,28 @@ export class ModelCapabilityService {
         }
         return normalized;
     }
-    static capabilityKey(provider, model) {
+    static capabilityBaseKey(provider, model) {
         return `${provider.toLowerCase().trim()}:${model.toLowerCase().trim()}`;
     }
+    static capabilityKey(provider, model, modality = "image") {
+        const base = this.capabilityBaseKey(provider, model);
+        if (modality === "image")
+            return base;
+        return `${base}:${modality}`;
+    }
     static isExpired(entry) {
         if (!entry.expires_at)
             return false;
         const expiryTs = Date.parse(entry.expires_at);
         return Number.isFinite(expiryTs) && expiryTs <= Date.now();
     }
+    static isManualOverrideActive(entry) {
+        if (!entry)
+            return false;
+        if (entry.reason !== "manual_override")
+            return false;
+        return !this.isExpired(entry);
+    }
     static nextFailureCount(entry, nowTs) {
         if (!entry || entry.state !== "pending_unsupported" || this.isExpired(entry)) {
             return 1;
@@ -230,7 +259,7 @@ export class ModelCapabilityService {
                 evidence: transientEvidence,
             };
         }
-        const documentEvidence = this.matchDocumentSpecific(signal);
+        const documentEvidence = this.matchDocumentSpecific(signal, opts.modality);
         if (documentEvidence.length > 0) {
             return {
                 isCapabilityError: false,
@@ -239,7 +268,7 @@ export class ModelCapabilityService {
                 evidence: documentEvidence,
             };
         }
-        const capability = this.scoreCapabilitySignal(signal, opts.provider);
+        const capability = this.scoreCapabilitySignal(signal, opts.provider, opts.modality);
         if (capability.score >= this.UNSUPPORTED_SCORE_THRESHOLD) {
             return {
                 isCapabilityError: true,
@@ -373,8 +402,8 @@ export class ModelCapabilityService {
             ...messageMatches.map((match) => `msg:${match}`),
         ];
     }
-    static matchDocumentSpecific(signal) {
-        const codeMatches = this.matchCodes(signal.codes, [
+    static matchDocumentSpecific(signal, modality) {
+        const imageCodeHints = [
             "image_too_large",
             "invalid_base64",
             "invalid_image",
@@ -382,8 +411,8 @@ export class ModelCapabilityService {
             "malformed_image",
             "invalid_image_url",
             "image_decode_failed",
-        ]);
-        const messageMatches = this.matchMessage(signal.message, [
+        ];
+        const imageMessageHints = [
             "image too large",
             "invalid base64",
             "malformed image",
@@ -391,7 +420,30 @@ export class ModelCapabilityService {
             "unable to decode image",
             "failed to decode image",
             "invalid image url",
-        ]);
+        ];
+        const pdfCodeHints = [
+            "invalid_pdf",
+            "malformed_pdf",
+            "corrupt_pdf",
+            "encrypted_pdf",
+            "password_protected_pdf",
+            "pdf_parse_error",
+            "file_too_large",
+        ];
+        const pdfMessageHints = [
+            "invalid pdf",
+            "malformed pdf",
+            "corrupt pdf",
+            "encrypted pdf",
+            "password protected pdf",
+            "failed to parse pdf",
+            "unable to parse pdf",
+            "pdf is corrupted",
+            "pdf too large",
+            "file too large",
+        ];
+        const codeMatches = this.matchCodes(signal.codes, modality === "pdf" ? pdfCodeHints : imageCodeHints);
+        const messageMatches = this.matchMessage(signal.message, modality === "pdf" ? pdfMessageHints : imageMessageHints);
         const statusMatches = Array.from(signal.statusCodes).filter((status) => {
             if (status === 413)
                 return true;
@@ -406,54 +458,87 @@ export class ModelCapabilityService {
             ...messageMatches.map((match) => `msg:${match}`),
         ];
     }
-    static scoreCapabilitySignal(signal, provider) {
+    static scoreCapabilitySignal(signal, provider, modality) {
         const evidence = [];
         let score = 0;
-        const explicitCapabilityCodes = this.matchCodes(signal.codes, [
-            "vision_not_supported",
-            "unsupported_vision",
-            "model_not_vision_capable",
-            "image_not_supported",
-            "unsupported_message_content",
-            "unsupported_content_type_for_model",
-            "unsupported_image_input",
-            "invalid_model_for_vision",
-        ]);
+        const explicitCapabilityCodes = this.matchCodes(signal.codes, modality === "pdf"
+            ? [
+                "pdf_not_supported",
+                "unsupported_pdf_input",
+                "unsupported_document_input",
+                "unsupported_file_input",
+                "input_file_not_supported",
+                "unsupported_file_type",
+                "model_not_document_capable",
+            ]
+            : [
+                "vision_not_supported",
+                "unsupported_vision",
+                "model_not_vision_capable",
+                "image_not_supported",
+                "unsupported_message_content",
+                "unsupported_content_type_for_model",
+                "unsupported_image_input",
+                "invalid_model_for_vision",
+            ]);
         if (explicitCapabilityCodes.length > 0) {
             score += 3;
             evidence.push(...explicitCapabilityCodes.map((match) => `code:${match}`));
         }
-        const highPrecisionMessageMatches = this.matchMessage(signal.message, [
-            "does not support images",
-            "does not support image inputs",
-            "model does not support image",
-            "this model cannot process images",
-            "text-only model",
-            "images are not supported for this model",
-            "vision is not supported for this model",
-            "vision is not supported",
-            "vision not supported",
-            "image_url is only supported by certain models",
-        ]);
+        const highPrecisionMessageMatches = this.matchMessage(signal.message, modality === "pdf"
+            ? [
+                "this model does not support pdf",
+                "model does not support pdf",
+                "pdf is not supported for this model",
+                "file input is not supported for this model",
+                "input_file is not supported",
+                "unsupported file type: application/pdf",
+                "application/pdf is not supported for this model",
+            ]
+            : [
+                "does not support images",
+                "does not support image inputs",
+                "model does not support image",
+                "this model cannot process images",
+                "text-only model",
+                "images are not supported for this model",
+                "vision is not supported for this model",
+                "vision is not supported",
+                "vision not supported",
+                "image_url is only supported by certain models",
+            ]);
         if (highPrecisionMessageMatches.length > 0) {
             score += 3;
             evidence.push(...highPrecisionMessageMatches.map((match) => `msg:${match}`));
         }
-        const providerSpecificMatches = this.matchMessage(signal.message, this.providerCapabilityHints(provider));
+        const providerSpecificMatches = this.matchMessage(signal.message, this.providerCapabilityHints(provider, modality));
         if (providerSpecificMatches.length > 0) {
-            score += 2;
+            score += 3;
             evidence.push(...providerSpecificMatches.map((match) => `provider:${match}`));
         }
-        const weakCapabilityHints = this.matchMessage(signal.message, [
-            "vision",
-            "unsupported content type",
-            "unsupported message content",
-            "invalid content type",
-            "unrecognized content type",
-            "image_url",
-            "multimodal",
-            "multi-modal",
-        ]);
+        const weakCapabilityHints = this.matchMessage(signal.message, modality === "pdf"
+            ? [
+                "pdf input",
+                "pdf support",
+                "pdf not supported",
+                "application/pdf",
+                "input_file",
+                "file input",
+                "document input",
+                "unsupported file type",
+                "unsupported content type",
+                "invalid content type",
+            ]
+            : [
+                "vision",
+                "unsupported content type",
+                "unsupported message content",
+                "invalid content type",
+                "unrecognized content type",
+                "image_url",
+                "multimodal",
+                "multi-modal",
+            ]);
         const hasClientValidationStatus = Array.from(signal.statusCodes).some((status) => [400, 415, 422].includes(status));
         if (weakCapabilityHints.length > 0 && hasClientValidationStatus) {
             score += 1;
@@ -468,8 +553,35 @@ export class ModelCapabilityService {
             evidence: Array.from(new Set(evidence)).slice(0, 8),
         };
     }
-    static providerCapabilityHints(provider) {
+    static providerCapabilityHints(provider, modality) {
         const normalized = provider.toLowerCase().trim();
+        if (modality === "pdf") {
+            if (normalized.includes("openai")) {
+                return [
+                    "input_file is not supported",
+                    "unsupported file type: application/pdf",
+                    "application/pdf is not supported for this model",
+                ];
+            }
+            if (normalized.includes("anthropic")) {
+                return [
+                    "pdf is not supported for this model",
+                    "file input is not supported for this model",
+                ];
+            }
+            if (normalized.includes("google") || normalized.includes("gemini")) {
+                return [
+                    "unsupported document input",
+                    "pdf input is not supported",
+                ];
+            }
+            if (normalized.includes("realtimex")) {
+                return [
+                    "unsupported file input",
+                ];
+            }
+            return [];
+        }
         if (normalized.includes("openai")) {
             return [
                 "image_url is only supported by certain models",
@@ -490,7 +602,6 @@ export class ModelCapabilityService {
         }
         if (normalized.includes("realtimex")) {
             return [
-                "invalid model",
                 "text-only model",
             ];
         }

package/dist/api/src/services/PolicyEngine.js CHANGED Viewed

@@ -8,20 +8,32 @@ import { extractLlmResponse, normalizeLlmContent, previewLlmText } from "../util
 import { DEFAULT_BASELINE_FIELDS } from "./BaselineConfigService.js";
 const logger = createLogger("PolicyEngine");
 /**
- * Helper to build LLM message content. If the text contains the VLM marker
- * generated by IngestionService, it casts the payload to an OpenAI-compatible
- * Vision array structure so the underlying SDK bridge can transmit the image.
+ * Helper to build LLM message content. If the text contains a VLM marker
+ * generated by IngestionService, it casts the payload to multimodal blocks.
  */
 function extractVlmPayload(text) {
-    const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
-    if (!marker)
-        return null;
-    const markerText = marker[0];
-    const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
-    return {
-        imageDataUrl: marker[1],
-        supplementalText,
-    };
+    const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
+    if (imageMarker) {
+        const markerText = imageMarker[0];
+        return {
+            kind: "image",
+            dataUrl: imageMarker[1],
+            supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
+        };
+    }
+    const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
+    if (pdfMarker) {
+        const markerText = pdfMarker[0];
+        return {
+            kind: "pdf",
+            dataUrl: pdfMarker[1],
+            supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
+        };
+    }
+    return null;
+}
+function hasVlmPayload(text) {
+    return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
 }
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 function buildMessageContent(prompt, text, textFirst = false) {
@@ -30,10 +42,12 @@ function buildMessageContent(prompt, text, textFirst = false) {
         const textPrompt = vlmPayload.supplementalText
             ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
             : prompt;
-        return [
-            { type: "text", text: textPrompt },
-            { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
-        ];
+        // `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
+        // that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
+        const assetBlock = vlmPayload.kind === "pdf"
+            ? { type: "input_file", file_url: vlmPayload.dataUrl }
+            : { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
+        return [{ type: "text", text: textPrompt }, assetBlock];
     }
     // Standard text payload
     return textFirst
@@ -340,7 +354,7 @@ async function evaluateCondition(condition, doc, trace, settings = {}) {
                     model,
                     condition_type: condition.type,
                     prompt_preview: prompt.slice(0, 180),
-                    vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
+                    vision_payload: hasVlmPayload(doc.text)
                 }
             });
             Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
@@ -349,7 +363,7 @@ async function evaluateCondition(condition, doc, trace, settings = {}) {
                 model,
                 condition_type: condition.type,
                 prompt_preview: prompt.slice(0, 180),
-                vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
+                vision_payload: hasVlmPayload(doc.text)
             }, doc.supabase);
             const result = await sdk.llm.chat([
                 {
@@ -443,7 +457,7 @@ async function extractData(fields, doc, trace, settings = {}) {
 Fields to extract:
 ${fieldDescriptions}`;
     try {
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload
             ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
             : prompt;
@@ -593,7 +607,7 @@ Rules:
             model,
             known_fields_count: Object.keys(contractData).length,
         }, doc.supabase);
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload
             ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
             : prompt;
@@ -821,7 +835,9 @@ export class PolicyEngine {
         const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
         if (allowLearnedFallback && doc.supabase && policies.length > 0) {
             try {
-                const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
+                const learningText = doc.text
+                    .replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
+                    .replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
                 const learned = await PolicyLearningService.resolveLearnedCandidate({
                     supabase: doc.supabase,
                     userId: doc.userId,
@@ -923,7 +939,7 @@ export class PolicyEngine {
             `Include the calendar year if clearly present. Prefer hyphenated multi-word tags.\n` +
             `No markdown, no explanation — only the JSON object.`;
         const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
-        const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
+        const isVlmPayload = hasVlmPayload(doc.text);
         const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
         try {
             Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Baseline Extraction", {

package/dist/api/src/services/RAGService.js CHANGED Viewed

@@ -90,8 +90,8 @@ export class RAGService {
      * Process an ingested document's raw text: chunk it, embed it, and store in DB.
      */
     static async chunkAndEmbed(ingestionId, userId, rawText, supabase, settings) {
-        if (rawText.startsWith("[VLM_IMAGE_DATA:")) {
-            logger.info(`Skipping chunking and embedding for VLM base64 image data (Ingestion: ${ingestionId})`);
+        if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
+            logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
             return;
         }
         const chunks = this.chunkText(rawText);