@realtimex/folio 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,20 +8,32 @@ import { extractLlmResponse, normalizeLlmContent, previewLlmText } from "../util
8
8
  import { DEFAULT_BASELINE_FIELDS } from "./BaselineConfigService.js";
9
9
  const logger = createLogger("PolicyEngine");
10
10
  /**
11
- * Helper to build LLM message content. If the text contains the VLM marker
12
- * generated by IngestionService, it casts the payload to an OpenAI-compatible
13
- * Vision array structure so the underlying SDK bridge can transmit the image.
11
+ * Helper to build LLM message content. If the text contains a VLM marker
12
+ * generated by IngestionService, it casts the payload to multimodal blocks.
14
13
  */
15
14
  function extractVlmPayload(text) {
16
- const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
17
- if (!marker)
18
- return null;
19
- const markerText = marker[0];
20
- const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
21
- return {
22
- imageDataUrl: marker[1],
23
- supplementalText,
24
- };
15
+ const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
16
+ if (imageMarker) {
17
+ const markerText = imageMarker[0];
18
+ return {
19
+ kind: "image",
20
+ dataUrl: imageMarker[1],
21
+ supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
22
+ };
23
+ }
24
+ const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
25
+ if (pdfMarker) {
26
+ const markerText = pdfMarker[0];
27
+ return {
28
+ kind: "pdf",
29
+ dataUrl: pdfMarker[1],
30
+ supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
31
+ };
32
+ }
33
+ return null;
34
+ }
35
+ function hasVlmPayload(text) {
36
+ return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
25
37
  }
26
38
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
27
39
  function buildMessageContent(prompt, text, textFirst = false) {
@@ -30,10 +42,12 @@ function buildMessageContent(prompt, text, textFirst = false) {
30
42
  const textPrompt = vlmPayload.supplementalText
31
43
  ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
32
44
  : prompt;
33
- return [
34
- { type: "text", text: textPrompt },
35
- { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
36
- ];
45
+ // `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
46
+ // that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
47
+ const assetBlock = vlmPayload.kind === "pdf"
48
+ ? { type: "input_file", file_url: vlmPayload.dataUrl }
49
+ : { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
50
+ return [{ type: "text", text: textPrompt }, assetBlock];
37
51
  }
38
52
  // Standard text payload
39
53
  return textFirst
@@ -340,7 +354,7 @@ async function evaluateCondition(condition, doc, trace, settings = {}) {
340
354
  model,
341
355
  condition_type: condition.type,
342
356
  prompt_preview: prompt.slice(0, 180),
343
- vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
357
+ vision_payload: hasVlmPayload(doc.text)
344
358
  }
345
359
  });
346
360
  Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
@@ -349,7 +363,7 @@ async function evaluateCondition(condition, doc, trace, settings = {}) {
349
363
  model,
350
364
  condition_type: condition.type,
351
365
  prompt_preview: prompt.slice(0, 180),
352
- vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
366
+ vision_payload: hasVlmPayload(doc.text)
353
367
  }, doc.supabase);
354
368
  const result = await sdk.llm.chat([
355
369
  {
@@ -443,7 +457,7 @@ async function extractData(fields, doc, trace, settings = {}) {
443
457
  Fields to extract:
444
458
  ${fieldDescriptions}`;
445
459
  try {
446
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
460
+ const isVlmPayload = hasVlmPayload(doc.text);
447
461
  const mixedPrompt = isVlmPayload
448
462
  ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
449
463
  : prompt;
@@ -593,7 +607,7 @@ Rules:
593
607
  model,
594
608
  known_fields_count: Object.keys(contractData).length,
595
609
  }, doc.supabase);
596
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
610
+ const isVlmPayload = hasVlmPayload(doc.text);
597
611
  const mixedPrompt = isVlmPayload
598
612
  ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
599
613
  : prompt;
@@ -821,7 +835,9 @@ export class PolicyEngine {
821
835
  const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
822
836
  if (allowLearnedFallback && doc.supabase && policies.length > 0) {
823
837
  try {
824
- const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
838
+ const learningText = doc.text
839
+ .replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
840
+ .replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
825
841
  const learned = await PolicyLearningService.resolveLearnedCandidate({
826
842
  supabase: doc.supabase,
827
843
  userId: doc.userId,
@@ -923,7 +939,7 @@ export class PolicyEngine {
923
939
  `Include the calendar year if clearly present. Prefer hyphenated multi-word tags.\n` +
924
940
  `No markdown, no explanation — only the JSON object.`;
925
941
  const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
926
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
942
+ const isVlmPayload = hasVlmPayload(doc.text);
927
943
  const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
928
944
  try {
929
945
  Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Baseline Extraction", {
@@ -90,8 +90,8 @@ export class RAGService {
90
90
  * Process an ingested document's raw text: chunk it, embed it, and store in DB.
91
91
  */
92
92
  static async chunkAndEmbed(ingestionId, userId, rawText, supabase, settings) {
93
- if (rawText.startsWith("[VLM_IMAGE_DATA:")) {
94
- logger.info(`Skipping chunking and embedding for VLM base64 image data (Ingestion: ${ingestionId})`);
93
+ if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
94
+ logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
95
95
  return;
96
96
  }
97
97
  const chunks = this.chunkText(rawText);