@realtimex/folio 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import { SDKService } from "./SDKService.js";
5
5
  const logger = createLogger("ModelCapabilityService");
6
6
 
7
7
  export type VisionCapabilityState = "supported" | "unsupported" | "unknown";
8
+ export type VisionCapabilityModality = "image" | "pdf";
8
9
  type StoredVisionCapabilityState = "supported" | "unsupported" | "pending_unsupported";
9
10
 
10
11
  interface StoredVisionCapability {
@@ -28,6 +29,7 @@ interface SettingsLike {
28
29
  export interface VisionResolution {
29
30
  provider: string;
30
31
  model: string;
32
+ modality: VisionCapabilityModality;
31
33
  state: VisionCapabilityState;
32
34
  shouldAttempt: boolean;
33
35
  }
@@ -53,21 +55,30 @@ export class ModelCapabilityService {
53
55
  private static readonly UNSUPPORTED_CONFIRMATION_FAILURES = 2;
54
56
  private static readonly UNSUPPORTED_SCORE_THRESHOLD = 3;
55
57
 
56
- static resolveVisionSupport(settingsRow: SettingsLike | null | undefined): VisionResolution {
58
+ static resolveVisionSupport(
59
+ settingsRow: SettingsLike | null | undefined,
60
+ modality: VisionCapabilityModality = "image"
61
+ ): VisionResolution {
57
62
  const provider = (settingsRow?.llm_provider || SDKService.DEFAULT_LLM_PROVIDER).trim();
58
63
  const model = (settingsRow?.llm_model || SDKService.DEFAULT_LLM_MODEL).trim();
59
- const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model);
64
+ const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model, modality);
60
65
  return {
61
66
  provider,
62
67
  model,
68
+ modality,
63
69
  state,
64
70
  shouldAttempt: state !== "unsupported",
65
71
  };
66
72
  }
67
73
 
68
- static getVisionState(rawMap: unknown, provider: string, model: string): VisionCapabilityState {
74
+ static getVisionState(
75
+ rawMap: unknown,
76
+ provider: string,
77
+ model: string,
78
+ modality: VisionCapabilityModality = "image"
79
+ ): VisionCapabilityState {
69
80
  const map = this.normalizeCapabilityMap(rawMap);
70
- const entry = map[this.capabilityKey(provider, model)];
81
+ const entry = map[this.capabilityKey(provider, model, modality)];
71
82
  if (!entry || this.isExpired(entry)) return "unknown";
72
83
  if (entry.state === "pending_unsupported") return "unknown";
73
84
  return entry.state;
@@ -78,9 +89,11 @@ export class ModelCapabilityService {
78
89
  userId: string;
79
90
  provider: string;
80
91
  model: string;
92
+ modality?: VisionCapabilityModality;
81
93
  }): Promise<void> {
82
94
  await this.writeCapability({
83
95
  ...opts,
96
+ modality: opts.modality ?? "image",
84
97
  state: "supported",
85
98
  reason: "vision_request_succeeded",
86
99
  ttlMs: this.SUPPORTED_TTL_MS,
@@ -93,18 +106,24 @@ export class ModelCapabilityService {
93
106
  provider: string;
94
107
  model: string;
95
108
  error: unknown;
109
+ modality?: VisionCapabilityModality;
96
110
  }): Promise<VisionCapabilityState> {
111
+ const modality = opts.modality ?? "image";
97
112
  const classification = this.classifyVisionFailure({
98
113
  error: opts.error,
99
114
  provider: opts.provider,
115
+ modality,
100
116
  });
101
117
 
102
118
  if (!classification.isCapabilityError) {
103
- logger.info(`Vision failure for ${opts.provider}/${opts.model} treated as non-capability; leaving capability unknown`, {
119
+ logger.info(
120
+ `Vision failure for ${opts.provider}/${opts.model} (${modality}) treated as non-capability; leaving capability unknown`,
121
+ {
104
122
  reason: classification.reason,
105
123
  score: classification.score,
106
124
  evidence: classification.evidence,
107
- });
125
+ }
126
+ );
108
127
  return "unknown";
109
128
  }
110
129
 
@@ -113,7 +132,7 @@ export class ModelCapabilityService {
113
132
  return "unknown";
114
133
  }
115
134
 
116
- const key = this.capabilityKey(opts.provider, opts.model);
135
+ const key = this.capabilityKey(opts.provider, opts.model, modality);
117
136
  const now = new Date();
118
137
  const failureCount = this.nextFailureCount(map[key], now.getTime());
119
138
 
@@ -123,6 +142,7 @@ export class ModelCapabilityService {
123
142
  userId: opts.userId,
124
143
  provider: opts.provider,
125
144
  model: opts.model,
145
+ modality,
126
146
  state: "pending_unsupported",
127
147
  reason: "capability_signal_pending_confirmation",
128
148
  ttlMs: this.PENDING_UNSUPPORTED_TTL_MS,
@@ -139,6 +159,7 @@ export class ModelCapabilityService {
139
159
  userId: opts.userId,
140
160
  provider: opts.provider,
141
161
  model: opts.model,
162
+ modality,
142
163
  state: "unsupported",
143
164
  reason: classification.reason,
144
165
  ttlMs: this.UNSUPPORTED_TTL_MS,
@@ -190,6 +211,7 @@ export class ModelCapabilityService {
190
211
  userId: string;
191
212
  provider: string;
192
213
  model: string;
214
+ modality: VisionCapabilityModality;
193
215
  state: StoredVisionCapabilityState;
194
216
  reason: string;
195
217
  ttlMs: number;
@@ -203,6 +225,7 @@ export class ModelCapabilityService {
203
225
  userId,
204
226
  provider,
205
227
  model,
228
+ modality,
206
229
  state,
207
230
  reason,
208
231
  ttlMs,
@@ -218,7 +241,20 @@ export class ModelCapabilityService {
218
241
  }
219
242
 
220
243
  const now = new Date();
221
- const key = this.capabilityKey(provider, model);
244
+ const key = this.capabilityKey(provider, model, modality);
245
+ const existingEntry = map[key];
246
+ if (this.isManualOverrideActive(existingEntry) && reason !== "manual_override") {
247
+ logger.info(
248
+ `Skipping auto capability update for ${provider}/${model} (${modality}) because manual override is active`,
249
+ {
250
+ requestedState: state,
251
+ requestedReason: reason,
252
+ currentState: existingEntry?.state,
253
+ currentReason: existingEntry?.reason,
254
+ }
255
+ );
256
+ return;
257
+ }
222
258
 
223
259
  const nextEntry: StoredVisionCapability = {
224
260
  state,
@@ -246,7 +282,7 @@ export class ModelCapabilityService {
246
282
  return;
247
283
  }
248
284
 
249
- logger.info(`Updated model capability for ${provider}/${model}: ${state}`, {
285
+ logger.info(`Updated model capability for ${provider}/${model} (${modality}): ${state}`, {
250
286
  reason,
251
287
  ttlMs,
252
288
  failureCount,
@@ -308,16 +344,28 @@ export class ModelCapabilityService {
308
344
  return normalized;
309
345
  }
310
346
 
311
- private static capabilityKey(provider: string, model: string): string {
347
+ private static capabilityBaseKey(provider: string, model: string): string {
312
348
  return `${provider.toLowerCase().trim()}:${model.toLowerCase().trim()}`;
313
349
  }
314
350
 
351
+ private static capabilityKey(provider: string, model: string, modality: VisionCapabilityModality = "image"): string {
352
+ const base = this.capabilityBaseKey(provider, model);
353
+ if (modality === "image") return base;
354
+ return `${base}:${modality}`;
355
+ }
356
+
315
357
  private static isExpired(entry: StoredVisionCapability): boolean {
316
358
  if (!entry.expires_at) return false;
317
359
  const expiryTs = Date.parse(entry.expires_at);
318
360
  return Number.isFinite(expiryTs) && expiryTs <= Date.now();
319
361
  }
320
362
 
363
+ private static isManualOverrideActive(entry: StoredVisionCapability | undefined): boolean {
364
+ if (!entry) return false;
365
+ if (entry.reason !== "manual_override") return false;
366
+ return !this.isExpired(entry);
367
+ }
368
+
321
369
  private static nextFailureCount(entry: StoredVisionCapability | undefined, nowTs: number): number {
322
370
  if (!entry || entry.state !== "pending_unsupported" || this.isExpired(entry)) {
323
371
  return 1;
@@ -339,7 +387,11 @@ export class ModelCapabilityService {
339
387
  return currentCount + 1;
340
388
  }
341
389
 
342
- private static classifyVisionFailure(opts: { error: unknown; provider: string }): VisionFailureClassification {
390
+ private static classifyVisionFailure(opts: {
391
+ error: unknown;
392
+ provider: string;
393
+ modality: VisionCapabilityModality;
394
+ }): VisionFailureClassification {
343
395
  const signal = this.extractVisionFailureSignal(opts.error);
344
396
  if (!signal.message && signal.codes.size === 0 && signal.statusCodes.size === 0) {
345
397
  return { isCapabilityError: false, reason: "empty_error", score: 0, evidence: [] };
@@ -355,7 +407,7 @@ export class ModelCapabilityService {
355
407
  };
356
408
  }
357
409
 
358
- const documentEvidence = this.matchDocumentSpecific(signal);
410
+ const documentEvidence = this.matchDocumentSpecific(signal, opts.modality);
359
411
  if (documentEvidence.length > 0) {
360
412
  return {
361
413
  isCapabilityError: false,
@@ -365,7 +417,7 @@ export class ModelCapabilityService {
365
417
  };
366
418
  }
367
419
 
368
- const capability = this.scoreCapabilitySignal(signal, opts.provider);
420
+ const capability = this.scoreCapabilitySignal(signal, opts.provider, opts.modality);
369
421
  if (capability.score >= this.UNSUPPORTED_SCORE_THRESHOLD) {
370
422
  return {
371
423
  isCapabilityError: true,
@@ -516,8 +568,8 @@ export class ModelCapabilityService {
516
568
  ];
517
569
  }
518
570
 
519
- private static matchDocumentSpecific(signal: VisionFailureSignal): string[] {
520
- const codeMatches = this.matchCodes(signal.codes, [
571
+ private static matchDocumentSpecific(signal: VisionFailureSignal, modality: VisionCapabilityModality): string[] {
572
+ const imageCodeHints = [
521
573
  "image_too_large",
522
574
  "invalid_base64",
523
575
  "invalid_image",
@@ -525,9 +577,8 @@ export class ModelCapabilityService {
525
577
  "malformed_image",
526
578
  "invalid_image_url",
527
579
  "image_decode_failed",
528
- ]);
529
-
530
- const messageMatches = this.matchMessage(signal.message, [
580
+ ];
581
+ const imageMessageHints = [
531
582
  "image too large",
532
583
  "invalid base64",
533
584
  "malformed image",
@@ -535,7 +586,37 @@ export class ModelCapabilityService {
535
586
  "unable to decode image",
536
587
  "failed to decode image",
537
588
  "invalid image url",
538
- ]);
589
+ ];
590
+ const pdfCodeHints = [
591
+ "invalid_pdf",
592
+ "malformed_pdf",
593
+ "corrupt_pdf",
594
+ "encrypted_pdf",
595
+ "password_protected_pdf",
596
+ "pdf_parse_error",
597
+ "file_too_large",
598
+ ];
599
+ const pdfMessageHints = [
600
+ "invalid pdf",
601
+ "malformed pdf",
602
+ "corrupt pdf",
603
+ "encrypted pdf",
604
+ "password protected pdf",
605
+ "failed to parse pdf",
606
+ "unable to parse pdf",
607
+ "pdf is corrupted",
608
+ "pdf too large",
609
+ "file too large",
610
+ ];
611
+
612
+ const codeMatches = this.matchCodes(
613
+ signal.codes,
614
+ modality === "pdf" ? pdfCodeHints : imageCodeHints
615
+ );
616
+ const messageMatches = this.matchMessage(
617
+ signal.message,
618
+ modality === "pdf" ? pdfMessageHints : imageMessageHints
619
+ );
539
620
 
540
621
  const statusMatches = Array.from(signal.statusCodes).filter((status) => {
541
622
  if (status === 413) return true;
@@ -552,60 +633,109 @@ export class ModelCapabilityService {
552
633
  ];
553
634
  }
554
635
 
555
- private static scoreCapabilitySignal(signal: VisionFailureSignal, provider: string): { score: number; evidence: string[] } {
636
+ private static scoreCapabilitySignal(
637
+ signal: VisionFailureSignal,
638
+ provider: string,
639
+ modality: VisionCapabilityModality
640
+ ): { score: number; evidence: string[] } {
556
641
  const evidence: string[] = [];
557
642
  let score = 0;
558
643
 
559
- const explicitCapabilityCodes = this.matchCodes(signal.codes, [
560
- "vision_not_supported",
561
- "unsupported_vision",
562
- "model_not_vision_capable",
563
- "image_not_supported",
564
- "unsupported_message_content",
565
- "unsupported_content_type_for_model",
566
- "unsupported_image_input",
567
- "invalid_model_for_vision",
568
- ]);
644
+ const explicitCapabilityCodes = this.matchCodes(
645
+ signal.codes,
646
+ modality === "pdf"
647
+ ? [
648
+ "pdf_not_supported",
649
+ "unsupported_pdf_input",
650
+ "unsupported_document_input",
651
+ "unsupported_file_input",
652
+ "input_file_not_supported",
653
+ "unsupported_file_type",
654
+ "model_not_document_capable",
655
+ ]
656
+ : [
657
+ "vision_not_supported",
658
+ "unsupported_vision",
659
+ "model_not_vision_capable",
660
+ "image_not_supported",
661
+ "unsupported_message_content",
662
+ "unsupported_content_type_for_model",
663
+ "unsupported_image_input",
664
+ "invalid_model_for_vision",
665
+ ]
666
+ );
569
667
 
570
668
  if (explicitCapabilityCodes.length > 0) {
571
669
  score += 3;
572
670
  evidence.push(...explicitCapabilityCodes.map((match) => `code:${match}`));
573
671
  }
574
672
 
575
- const highPrecisionMessageMatches = this.matchMessage(signal.message, [
576
- "does not support images",
577
- "does not support image inputs",
578
- "model does not support image",
579
- "this model cannot process images",
580
- "text-only model",
581
- "images are not supported for this model",
582
- "vision is not supported for this model",
583
- "vision is not supported",
584
- "vision not supported",
585
- "image_url is only supported by certain models",
586
- ]);
673
+ const highPrecisionMessageMatches = this.matchMessage(
674
+ signal.message,
675
+ modality === "pdf"
676
+ ? [
677
+ "this model does not support pdf",
678
+ "model does not support pdf",
679
+ "pdf is not supported for this model",
680
+ "file input is not supported for this model",
681
+ "input_file is not supported",
682
+ "unsupported file type: application/pdf",
683
+ "application/pdf is not supported for this model",
684
+ ]
685
+ : [
686
+ "does not support images",
687
+ "does not support image inputs",
688
+ "model does not support image",
689
+ "this model cannot process images",
690
+ "text-only model",
691
+ "images are not supported for this model",
692
+ "vision is not supported for this model",
693
+ "vision is not supported",
694
+ "vision not supported",
695
+ "image_url is only supported by certain models",
696
+ ]
697
+ );
587
698
 
588
699
  if (highPrecisionMessageMatches.length > 0) {
589
700
  score += 3;
590
701
  evidence.push(...highPrecisionMessageMatches.map((match) => `msg:${match}`));
591
702
  }
592
703
 
593
- const providerSpecificMatches = this.matchMessage(signal.message, this.providerCapabilityHints(provider));
704
+ const providerSpecificMatches = this.matchMessage(
705
+ signal.message,
706
+ this.providerCapabilityHints(provider, modality)
707
+ );
594
708
  if (providerSpecificMatches.length > 0) {
595
- score += 2;
709
+ score += 3;
596
710
  evidence.push(...providerSpecificMatches.map((match) => `provider:${match}`));
597
711
  }
598
712
 
599
- const weakCapabilityHints = this.matchMessage(signal.message, [
600
- "vision",
601
- "unsupported content type",
602
- "unsupported message content",
603
- "invalid content type",
604
- "unrecognized content type",
605
- "image_url",
606
- "multimodal",
607
- "multi-modal",
608
- ]);
713
+ const weakCapabilityHints = this.matchMessage(
714
+ signal.message,
715
+ modality === "pdf"
716
+ ? [
717
+ "pdf input",
718
+ "pdf support",
719
+ "pdf not supported",
720
+ "application/pdf",
721
+ "input_file",
722
+ "file input",
723
+ "document input",
724
+ "unsupported file type",
725
+ "unsupported content type",
726
+ "invalid content type",
727
+ ]
728
+ : [
729
+ "vision",
730
+ "unsupported content type",
731
+ "unsupported message content",
732
+ "invalid content type",
733
+ "unrecognized content type",
734
+ "image_url",
735
+ "multimodal",
736
+ "multi-modal",
737
+ ]
738
+ );
609
739
 
610
740
  const hasClientValidationStatus = Array.from(signal.statusCodes).some((status) => [400, 415, 422].includes(status));
611
741
  if (weakCapabilityHints.length > 0 && hasClientValidationStatus) {
@@ -624,9 +754,37 @@ export class ModelCapabilityService {
624
754
  };
625
755
  }
626
756
 
627
- private static providerCapabilityHints(provider: string): string[] {
757
+ private static providerCapabilityHints(provider: string, modality: VisionCapabilityModality): string[] {
628
758
  const normalized = provider.toLowerCase().trim();
629
759
 
760
+ if (modality === "pdf") {
761
+ if (normalized.includes("openai")) {
762
+ return [
763
+ "input_file is not supported",
764
+ "unsupported file type: application/pdf",
765
+ "application/pdf is not supported for this model",
766
+ ];
767
+ }
768
+ if (normalized.includes("anthropic")) {
769
+ return [
770
+ "pdf is not supported for this model",
771
+ "file input is not supported for this model",
772
+ ];
773
+ }
774
+ if (normalized.includes("google") || normalized.includes("gemini")) {
775
+ return [
776
+ "unsupported document input",
777
+ "pdf input is not supported",
778
+ ];
779
+ }
780
+ if (normalized.includes("realtimex")) {
781
+ return [
782
+ "unsupported file input",
783
+ ];
784
+ }
785
+ return [];
786
+ }
787
+
630
788
  if (normalized.includes("openai")) {
631
789
  return [
632
790
  "image_url is only supported by certain models",
@@ -650,7 +808,6 @@ export class ModelCapabilityService {
650
808
 
651
809
  if (normalized.includes("realtimex")) {
652
810
  return [
653
- "invalid model",
654
811
  "text-only model",
655
812
  ];
656
813
  }
@@ -49,21 +49,42 @@ type ProcessWithPoliciesOptions = {
49
49
  allowLearnedFallback?: boolean;
50
50
  };
51
51
 
52
+ type VlmPayload = {
53
+ kind: "image" | "pdf";
54
+ dataUrl: string;
55
+ supplementalText: string;
56
+ };
57
+
52
58
  /**
53
- * Helper to build LLM message content. If the text contains the VLM marker
54
- * generated by IngestionService, it casts the payload to an OpenAI-compatible
55
- * Vision array structure so the underlying SDK bridge can transmit the image.
59
+ * Helper to build LLM message content. If the text contains a VLM marker
60
+ * generated by IngestionService, it casts the payload to multimodal blocks.
56
61
  */
57
- function extractVlmPayload(text: string): { imageDataUrl: string; supplementalText: string } | null {
58
- const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
59
- if (!marker) return null;
62
+ function extractVlmPayload(text: string): VlmPayload | null {
63
+ const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
64
+ if (imageMarker) {
65
+ const markerText = imageMarker[0];
66
+ return {
67
+ kind: "image",
68
+ dataUrl: imageMarker[1],
69
+ supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
70
+ };
71
+ }
60
72
 
61
- const markerText = marker[0];
62
- const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
63
- return {
64
- imageDataUrl: marker[1],
65
- supplementalText,
66
- };
73
+ const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
74
+ if (pdfMarker) {
75
+ const markerText = pdfMarker[0];
76
+ return {
77
+ kind: "pdf",
78
+ dataUrl: pdfMarker[1],
79
+ supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
80
+ };
81
+ }
82
+
83
+ return null;
84
+ }
85
+
86
+ function hasVlmPayload(text: string): boolean {
87
+ return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
67
88
  }
68
89
 
69
90
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -73,10 +94,13 @@ function buildMessageContent(prompt: string, text: string, textFirst = false): a
73
94
  const textPrompt = vlmPayload.supplementalText
74
95
  ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
75
96
  : prompt;
76
- return [
77
- { type: "text", text: textPrompt },
78
- { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
79
- ];
97
+ // `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
98
+ // that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
99
+ const assetBlock = vlmPayload.kind === "pdf"
100
+ ? { type: "input_file", file_url: vlmPayload.dataUrl }
101
+ : { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
102
+
103
+ return [{ type: "text", text: textPrompt }, assetBlock];
80
104
  }
81
105
  // Standard text payload
82
106
  return textFirst
@@ -444,7 +468,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
444
468
  model,
445
469
  condition_type: condition.type,
446
470
  prompt_preview: prompt.slice(0, 180),
447
- vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
471
+ vision_payload: hasVlmPayload(doc.text)
448
472
  }
449
473
  });
450
474
  Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
@@ -453,7 +477,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
453
477
  model,
454
478
  condition_type: condition.type,
455
479
  prompt_preview: prompt.slice(0, 180),
456
- vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
480
+ vision_payload: hasVlmPayload(doc.text)
457
481
  }, doc.supabase);
458
482
  const result = await sdk.llm.chat(
459
483
  [
@@ -563,7 +587,7 @@ Fields to extract:
563
587
  ${fieldDescriptions}`;
564
588
 
565
589
  try {
566
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
590
+ const isVlmPayload = hasVlmPayload(doc.text);
567
591
  const mixedPrompt = isVlmPayload
568
592
  ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
569
593
  : prompt;
@@ -722,7 +746,7 @@ Rules:
722
746
  known_fields_count: Object.keys(contractData).length,
723
747
  }, doc.supabase);
724
748
 
725
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
749
+ const isVlmPayload = hasVlmPayload(doc.text);
726
750
  const mixedPrompt = isVlmPayload
727
751
  ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
728
752
  : prompt;
@@ -1004,7 +1028,9 @@ export class PolicyEngine {
1004
1028
  const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
1005
1029
  if (allowLearnedFallback && doc.supabase && policies.length > 0) {
1006
1030
  try {
1007
- const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
1031
+ const learningText = doc.text
1032
+ .replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
1033
+ .replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
1008
1034
  const learned = await PolicyLearningService.resolveLearnedCandidate({
1009
1035
  supabase: doc.supabase,
1010
1036
  userId: doc.userId,
@@ -1118,7 +1144,7 @@ export class PolicyEngine {
1118
1144
  `No markdown, no explanation — only the JSON object.`;
1119
1145
 
1120
1146
  const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
1121
- const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
1147
+ const isVlmPayload = hasVlmPayload(doc.text);
1122
1148
  const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
1123
1149
 
1124
1150
  try {
@@ -135,8 +135,8 @@ export class RAGService {
135
135
  supabase: SupabaseClient,
136
136
  settings?: EmbeddingSettings
137
137
  ): Promise<void> {
138
- if (rawText.startsWith("[VLM_IMAGE_DATA:")) {
139
- logger.info(`Skipping chunking and embedding for VLM base64 image data (Ingestion: ${ingestionId})`);
138
+ if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
139
+ logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
140
140
  return;
141
141
  }
142
142