@realtimex/folio 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -0
- package/api/src/services/IngestionService.ts +513 -206
- package/api/src/services/ModelCapabilityService.ts +213 -56
- package/api/src/services/PolicyEngine.ts +48 -22
- package/api/src/services/RAGService.ts +2 -2
- package/dist/api/src/services/IngestionService.js +467 -194
- package/dist/api/src/services/ModelCapabilityService.js +165 -54
- package/dist/api/src/services/PolicyEngine.js +38 -22
- package/dist/api/src/services/RAGService.js +2 -2
- package/dist/assets/{index-nxHX9No5.js → index-CLpalZvv.js} +37 -37
- package/dist/index.html +1 -1
- package/package.json +1 -1
|
@@ -5,6 +5,7 @@ import { SDKService } from "./SDKService.js";
|
|
|
5
5
|
const logger = createLogger("ModelCapabilityService");
|
|
6
6
|
|
|
7
7
|
export type VisionCapabilityState = "supported" | "unsupported" | "unknown";
|
|
8
|
+
export type VisionCapabilityModality = "image" | "pdf";
|
|
8
9
|
type StoredVisionCapabilityState = "supported" | "unsupported" | "pending_unsupported";
|
|
9
10
|
|
|
10
11
|
interface StoredVisionCapability {
|
|
@@ -28,6 +29,7 @@ interface SettingsLike {
|
|
|
28
29
|
export interface VisionResolution {
|
|
29
30
|
provider: string;
|
|
30
31
|
model: string;
|
|
32
|
+
modality: VisionCapabilityModality;
|
|
31
33
|
state: VisionCapabilityState;
|
|
32
34
|
shouldAttempt: boolean;
|
|
33
35
|
}
|
|
@@ -53,21 +55,30 @@ export class ModelCapabilityService {
|
|
|
53
55
|
private static readonly UNSUPPORTED_CONFIRMATION_FAILURES = 2;
|
|
54
56
|
private static readonly UNSUPPORTED_SCORE_THRESHOLD = 3;
|
|
55
57
|
|
|
56
|
-
static resolveVisionSupport(
|
|
58
|
+
static resolveVisionSupport(
|
|
59
|
+
settingsRow: SettingsLike | null | undefined,
|
|
60
|
+
modality: VisionCapabilityModality = "image"
|
|
61
|
+
): VisionResolution {
|
|
57
62
|
const provider = (settingsRow?.llm_provider || SDKService.DEFAULT_LLM_PROVIDER).trim();
|
|
58
63
|
const model = (settingsRow?.llm_model || SDKService.DEFAULT_LLM_MODEL).trim();
|
|
59
|
-
const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model);
|
|
64
|
+
const state = this.getVisionState(settingsRow?.vision_model_capabilities, provider, model, modality);
|
|
60
65
|
return {
|
|
61
66
|
provider,
|
|
62
67
|
model,
|
|
68
|
+
modality,
|
|
63
69
|
state,
|
|
64
70
|
shouldAttempt: state !== "unsupported",
|
|
65
71
|
};
|
|
66
72
|
}
|
|
67
73
|
|
|
68
|
-
static getVisionState(
|
|
74
|
+
static getVisionState(
|
|
75
|
+
rawMap: unknown,
|
|
76
|
+
provider: string,
|
|
77
|
+
model: string,
|
|
78
|
+
modality: VisionCapabilityModality = "image"
|
|
79
|
+
): VisionCapabilityState {
|
|
69
80
|
const map = this.normalizeCapabilityMap(rawMap);
|
|
70
|
-
const entry = map[this.capabilityKey(provider, model)];
|
|
81
|
+
const entry = map[this.capabilityKey(provider, model, modality)];
|
|
71
82
|
if (!entry || this.isExpired(entry)) return "unknown";
|
|
72
83
|
if (entry.state === "pending_unsupported") return "unknown";
|
|
73
84
|
return entry.state;
|
|
@@ -78,9 +89,11 @@ export class ModelCapabilityService {
|
|
|
78
89
|
userId: string;
|
|
79
90
|
provider: string;
|
|
80
91
|
model: string;
|
|
92
|
+
modality?: VisionCapabilityModality;
|
|
81
93
|
}): Promise<void> {
|
|
82
94
|
await this.writeCapability({
|
|
83
95
|
...opts,
|
|
96
|
+
modality: opts.modality ?? "image",
|
|
84
97
|
state: "supported",
|
|
85
98
|
reason: "vision_request_succeeded",
|
|
86
99
|
ttlMs: this.SUPPORTED_TTL_MS,
|
|
@@ -93,18 +106,24 @@ export class ModelCapabilityService {
|
|
|
93
106
|
provider: string;
|
|
94
107
|
model: string;
|
|
95
108
|
error: unknown;
|
|
109
|
+
modality?: VisionCapabilityModality;
|
|
96
110
|
}): Promise<VisionCapabilityState> {
|
|
111
|
+
const modality = opts.modality ?? "image";
|
|
97
112
|
const classification = this.classifyVisionFailure({
|
|
98
113
|
error: opts.error,
|
|
99
114
|
provider: opts.provider,
|
|
115
|
+
modality,
|
|
100
116
|
});
|
|
101
117
|
|
|
102
118
|
if (!classification.isCapabilityError) {
|
|
103
|
-
logger.info(
|
|
119
|
+
logger.info(
|
|
120
|
+
`Vision failure for ${opts.provider}/${opts.model} (${modality}) treated as non-capability; leaving capability unknown`,
|
|
121
|
+
{
|
|
104
122
|
reason: classification.reason,
|
|
105
123
|
score: classification.score,
|
|
106
124
|
evidence: classification.evidence,
|
|
107
|
-
|
|
125
|
+
}
|
|
126
|
+
);
|
|
108
127
|
return "unknown";
|
|
109
128
|
}
|
|
110
129
|
|
|
@@ -113,7 +132,7 @@ export class ModelCapabilityService {
|
|
|
113
132
|
return "unknown";
|
|
114
133
|
}
|
|
115
134
|
|
|
116
|
-
const key = this.capabilityKey(opts.provider, opts.model);
|
|
135
|
+
const key = this.capabilityKey(opts.provider, opts.model, modality);
|
|
117
136
|
const now = new Date();
|
|
118
137
|
const failureCount = this.nextFailureCount(map[key], now.getTime());
|
|
119
138
|
|
|
@@ -123,6 +142,7 @@ export class ModelCapabilityService {
|
|
|
123
142
|
userId: opts.userId,
|
|
124
143
|
provider: opts.provider,
|
|
125
144
|
model: opts.model,
|
|
145
|
+
modality,
|
|
126
146
|
state: "pending_unsupported",
|
|
127
147
|
reason: "capability_signal_pending_confirmation",
|
|
128
148
|
ttlMs: this.PENDING_UNSUPPORTED_TTL_MS,
|
|
@@ -139,6 +159,7 @@ export class ModelCapabilityService {
|
|
|
139
159
|
userId: opts.userId,
|
|
140
160
|
provider: opts.provider,
|
|
141
161
|
model: opts.model,
|
|
162
|
+
modality,
|
|
142
163
|
state: "unsupported",
|
|
143
164
|
reason: classification.reason,
|
|
144
165
|
ttlMs: this.UNSUPPORTED_TTL_MS,
|
|
@@ -190,6 +211,7 @@ export class ModelCapabilityService {
|
|
|
190
211
|
userId: string;
|
|
191
212
|
provider: string;
|
|
192
213
|
model: string;
|
|
214
|
+
modality: VisionCapabilityModality;
|
|
193
215
|
state: StoredVisionCapabilityState;
|
|
194
216
|
reason: string;
|
|
195
217
|
ttlMs: number;
|
|
@@ -203,6 +225,7 @@ export class ModelCapabilityService {
|
|
|
203
225
|
userId,
|
|
204
226
|
provider,
|
|
205
227
|
model,
|
|
228
|
+
modality,
|
|
206
229
|
state,
|
|
207
230
|
reason,
|
|
208
231
|
ttlMs,
|
|
@@ -218,7 +241,20 @@ export class ModelCapabilityService {
|
|
|
218
241
|
}
|
|
219
242
|
|
|
220
243
|
const now = new Date();
|
|
221
|
-
const key = this.capabilityKey(provider, model);
|
|
244
|
+
const key = this.capabilityKey(provider, model, modality);
|
|
245
|
+
const existingEntry = map[key];
|
|
246
|
+
if (this.isManualOverrideActive(existingEntry) && reason !== "manual_override") {
|
|
247
|
+
logger.info(
|
|
248
|
+
`Skipping auto capability update for ${provider}/${model} (${modality}) because manual override is active`,
|
|
249
|
+
{
|
|
250
|
+
requestedState: state,
|
|
251
|
+
requestedReason: reason,
|
|
252
|
+
currentState: existingEntry?.state,
|
|
253
|
+
currentReason: existingEntry?.reason,
|
|
254
|
+
}
|
|
255
|
+
);
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
222
258
|
|
|
223
259
|
const nextEntry: StoredVisionCapability = {
|
|
224
260
|
state,
|
|
@@ -246,7 +282,7 @@ export class ModelCapabilityService {
|
|
|
246
282
|
return;
|
|
247
283
|
}
|
|
248
284
|
|
|
249
|
-
logger.info(`Updated model capability for ${provider}/${model}: ${state}`, {
|
|
285
|
+
logger.info(`Updated model capability for ${provider}/${model} (${modality}): ${state}`, {
|
|
250
286
|
reason,
|
|
251
287
|
ttlMs,
|
|
252
288
|
failureCount,
|
|
@@ -308,16 +344,28 @@ export class ModelCapabilityService {
|
|
|
308
344
|
return normalized;
|
|
309
345
|
}
|
|
310
346
|
|
|
311
|
-
private static
|
|
347
|
+
private static capabilityBaseKey(provider: string, model: string): string {
|
|
312
348
|
return `${provider.toLowerCase().trim()}:${model.toLowerCase().trim()}`;
|
|
313
349
|
}
|
|
314
350
|
|
|
351
|
+
private static capabilityKey(provider: string, model: string, modality: VisionCapabilityModality = "image"): string {
|
|
352
|
+
const base = this.capabilityBaseKey(provider, model);
|
|
353
|
+
if (modality === "image") return base;
|
|
354
|
+
return `${base}:${modality}`;
|
|
355
|
+
}
|
|
356
|
+
|
|
315
357
|
private static isExpired(entry: StoredVisionCapability): boolean {
|
|
316
358
|
if (!entry.expires_at) return false;
|
|
317
359
|
const expiryTs = Date.parse(entry.expires_at);
|
|
318
360
|
return Number.isFinite(expiryTs) && expiryTs <= Date.now();
|
|
319
361
|
}
|
|
320
362
|
|
|
363
|
+
private static isManualOverrideActive(entry: StoredVisionCapability | undefined): boolean {
|
|
364
|
+
if (!entry) return false;
|
|
365
|
+
if (entry.reason !== "manual_override") return false;
|
|
366
|
+
return !this.isExpired(entry);
|
|
367
|
+
}
|
|
368
|
+
|
|
321
369
|
private static nextFailureCount(entry: StoredVisionCapability | undefined, nowTs: number): number {
|
|
322
370
|
if (!entry || entry.state !== "pending_unsupported" || this.isExpired(entry)) {
|
|
323
371
|
return 1;
|
|
@@ -339,7 +387,11 @@ export class ModelCapabilityService {
|
|
|
339
387
|
return currentCount + 1;
|
|
340
388
|
}
|
|
341
389
|
|
|
342
|
-
private static classifyVisionFailure(opts: {
|
|
390
|
+
private static classifyVisionFailure(opts: {
|
|
391
|
+
error: unknown;
|
|
392
|
+
provider: string;
|
|
393
|
+
modality: VisionCapabilityModality;
|
|
394
|
+
}): VisionFailureClassification {
|
|
343
395
|
const signal = this.extractVisionFailureSignal(opts.error);
|
|
344
396
|
if (!signal.message && signal.codes.size === 0 && signal.statusCodes.size === 0) {
|
|
345
397
|
return { isCapabilityError: false, reason: "empty_error", score: 0, evidence: [] };
|
|
@@ -355,7 +407,7 @@ export class ModelCapabilityService {
|
|
|
355
407
|
};
|
|
356
408
|
}
|
|
357
409
|
|
|
358
|
-
const documentEvidence = this.matchDocumentSpecific(signal);
|
|
410
|
+
const documentEvidence = this.matchDocumentSpecific(signal, opts.modality);
|
|
359
411
|
if (documentEvidence.length > 0) {
|
|
360
412
|
return {
|
|
361
413
|
isCapabilityError: false,
|
|
@@ -365,7 +417,7 @@ export class ModelCapabilityService {
|
|
|
365
417
|
};
|
|
366
418
|
}
|
|
367
419
|
|
|
368
|
-
const capability = this.scoreCapabilitySignal(signal, opts.provider);
|
|
420
|
+
const capability = this.scoreCapabilitySignal(signal, opts.provider, opts.modality);
|
|
369
421
|
if (capability.score >= this.UNSUPPORTED_SCORE_THRESHOLD) {
|
|
370
422
|
return {
|
|
371
423
|
isCapabilityError: true,
|
|
@@ -516,8 +568,8 @@ export class ModelCapabilityService {
|
|
|
516
568
|
];
|
|
517
569
|
}
|
|
518
570
|
|
|
519
|
-
private static matchDocumentSpecific(signal: VisionFailureSignal): string[] {
|
|
520
|
-
const
|
|
571
|
+
private static matchDocumentSpecific(signal: VisionFailureSignal, modality: VisionCapabilityModality): string[] {
|
|
572
|
+
const imageCodeHints = [
|
|
521
573
|
"image_too_large",
|
|
522
574
|
"invalid_base64",
|
|
523
575
|
"invalid_image",
|
|
@@ -525,9 +577,8 @@ export class ModelCapabilityService {
|
|
|
525
577
|
"malformed_image",
|
|
526
578
|
"invalid_image_url",
|
|
527
579
|
"image_decode_failed",
|
|
528
|
-
]
|
|
529
|
-
|
|
530
|
-
const messageMatches = this.matchMessage(signal.message, [
|
|
580
|
+
];
|
|
581
|
+
const imageMessageHints = [
|
|
531
582
|
"image too large",
|
|
532
583
|
"invalid base64",
|
|
533
584
|
"malformed image",
|
|
@@ -535,7 +586,37 @@ export class ModelCapabilityService {
|
|
|
535
586
|
"unable to decode image",
|
|
536
587
|
"failed to decode image",
|
|
537
588
|
"invalid image url",
|
|
538
|
-
]
|
|
589
|
+
];
|
|
590
|
+
const pdfCodeHints = [
|
|
591
|
+
"invalid_pdf",
|
|
592
|
+
"malformed_pdf",
|
|
593
|
+
"corrupt_pdf",
|
|
594
|
+
"encrypted_pdf",
|
|
595
|
+
"password_protected_pdf",
|
|
596
|
+
"pdf_parse_error",
|
|
597
|
+
"file_too_large",
|
|
598
|
+
];
|
|
599
|
+
const pdfMessageHints = [
|
|
600
|
+
"invalid pdf",
|
|
601
|
+
"malformed pdf",
|
|
602
|
+
"corrupt pdf",
|
|
603
|
+
"encrypted pdf",
|
|
604
|
+
"password protected pdf",
|
|
605
|
+
"failed to parse pdf",
|
|
606
|
+
"unable to parse pdf",
|
|
607
|
+
"pdf is corrupted",
|
|
608
|
+
"pdf too large",
|
|
609
|
+
"file too large",
|
|
610
|
+
];
|
|
611
|
+
|
|
612
|
+
const codeMatches = this.matchCodes(
|
|
613
|
+
signal.codes,
|
|
614
|
+
modality === "pdf" ? pdfCodeHints : imageCodeHints
|
|
615
|
+
);
|
|
616
|
+
const messageMatches = this.matchMessage(
|
|
617
|
+
signal.message,
|
|
618
|
+
modality === "pdf" ? pdfMessageHints : imageMessageHints
|
|
619
|
+
);
|
|
539
620
|
|
|
540
621
|
const statusMatches = Array.from(signal.statusCodes).filter((status) => {
|
|
541
622
|
if (status === 413) return true;
|
|
@@ -552,60 +633,109 @@ export class ModelCapabilityService {
|
|
|
552
633
|
];
|
|
553
634
|
}
|
|
554
635
|
|
|
555
|
-
private static scoreCapabilitySignal(
|
|
636
|
+
private static scoreCapabilitySignal(
|
|
637
|
+
signal: VisionFailureSignal,
|
|
638
|
+
provider: string,
|
|
639
|
+
modality: VisionCapabilityModality
|
|
640
|
+
): { score: number; evidence: string[] } {
|
|
556
641
|
const evidence: string[] = [];
|
|
557
642
|
let score = 0;
|
|
558
643
|
|
|
559
|
-
const explicitCapabilityCodes = this.matchCodes(
|
|
560
|
-
|
|
561
|
-
"
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
644
|
+
const explicitCapabilityCodes = this.matchCodes(
|
|
645
|
+
signal.codes,
|
|
646
|
+
modality === "pdf"
|
|
647
|
+
? [
|
|
648
|
+
"pdf_not_supported",
|
|
649
|
+
"unsupported_pdf_input",
|
|
650
|
+
"unsupported_document_input",
|
|
651
|
+
"unsupported_file_input",
|
|
652
|
+
"input_file_not_supported",
|
|
653
|
+
"unsupported_file_type",
|
|
654
|
+
"model_not_document_capable",
|
|
655
|
+
]
|
|
656
|
+
: [
|
|
657
|
+
"vision_not_supported",
|
|
658
|
+
"unsupported_vision",
|
|
659
|
+
"model_not_vision_capable",
|
|
660
|
+
"image_not_supported",
|
|
661
|
+
"unsupported_message_content",
|
|
662
|
+
"unsupported_content_type_for_model",
|
|
663
|
+
"unsupported_image_input",
|
|
664
|
+
"invalid_model_for_vision",
|
|
665
|
+
]
|
|
666
|
+
);
|
|
569
667
|
|
|
570
668
|
if (explicitCapabilityCodes.length > 0) {
|
|
571
669
|
score += 3;
|
|
572
670
|
evidence.push(...explicitCapabilityCodes.map((match) => `code:${match}`));
|
|
573
671
|
}
|
|
574
672
|
|
|
575
|
-
const highPrecisionMessageMatches = this.matchMessage(
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
673
|
+
const highPrecisionMessageMatches = this.matchMessage(
|
|
674
|
+
signal.message,
|
|
675
|
+
modality === "pdf"
|
|
676
|
+
? [
|
|
677
|
+
"this model does not support pdf",
|
|
678
|
+
"model does not support pdf",
|
|
679
|
+
"pdf is not supported for this model",
|
|
680
|
+
"file input is not supported for this model",
|
|
681
|
+
"input_file is not supported",
|
|
682
|
+
"unsupported file type: application/pdf",
|
|
683
|
+
"application/pdf is not supported for this model",
|
|
684
|
+
]
|
|
685
|
+
: [
|
|
686
|
+
"does not support images",
|
|
687
|
+
"does not support image inputs",
|
|
688
|
+
"model does not support image",
|
|
689
|
+
"this model cannot process images",
|
|
690
|
+
"text-only model",
|
|
691
|
+
"images are not supported for this model",
|
|
692
|
+
"vision is not supported for this model",
|
|
693
|
+
"vision is not supported",
|
|
694
|
+
"vision not supported",
|
|
695
|
+
"image_url is only supported by certain models",
|
|
696
|
+
]
|
|
697
|
+
);
|
|
587
698
|
|
|
588
699
|
if (highPrecisionMessageMatches.length > 0) {
|
|
589
700
|
score += 3;
|
|
590
701
|
evidence.push(...highPrecisionMessageMatches.map((match) => `msg:${match}`));
|
|
591
702
|
}
|
|
592
703
|
|
|
593
|
-
const providerSpecificMatches = this.matchMessage(
|
|
704
|
+
const providerSpecificMatches = this.matchMessage(
|
|
705
|
+
signal.message,
|
|
706
|
+
this.providerCapabilityHints(provider, modality)
|
|
707
|
+
);
|
|
594
708
|
if (providerSpecificMatches.length > 0) {
|
|
595
|
-
score +=
|
|
709
|
+
score += 3;
|
|
596
710
|
evidence.push(...providerSpecificMatches.map((match) => `provider:${match}`));
|
|
597
711
|
}
|
|
598
712
|
|
|
599
|
-
const weakCapabilityHints = this.matchMessage(
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
713
|
+
const weakCapabilityHints = this.matchMessage(
|
|
714
|
+
signal.message,
|
|
715
|
+
modality === "pdf"
|
|
716
|
+
? [
|
|
717
|
+
"pdf input",
|
|
718
|
+
"pdf support",
|
|
719
|
+
"pdf not supported",
|
|
720
|
+
"application/pdf",
|
|
721
|
+
"input_file",
|
|
722
|
+
"file input",
|
|
723
|
+
"document input",
|
|
724
|
+
"unsupported file type",
|
|
725
|
+
"unsupported content type",
|
|
726
|
+
"invalid content type",
|
|
727
|
+
]
|
|
728
|
+
: [
|
|
729
|
+
"vision",
|
|
730
|
+
"unsupported content type",
|
|
731
|
+
"unsupported message content",
|
|
732
|
+
"invalid content type",
|
|
733
|
+
"unrecognized content type",
|
|
734
|
+
"image_url",
|
|
735
|
+
"multimodal",
|
|
736
|
+
"multi-modal",
|
|
737
|
+
]
|
|
738
|
+
);
|
|
609
739
|
|
|
610
740
|
const hasClientValidationStatus = Array.from(signal.statusCodes).some((status) => [400, 415, 422].includes(status));
|
|
611
741
|
if (weakCapabilityHints.length > 0 && hasClientValidationStatus) {
|
|
@@ -624,9 +754,37 @@ export class ModelCapabilityService {
|
|
|
624
754
|
};
|
|
625
755
|
}
|
|
626
756
|
|
|
627
|
-
private static providerCapabilityHints(provider: string): string[] {
|
|
757
|
+
private static providerCapabilityHints(provider: string, modality: VisionCapabilityModality): string[] {
|
|
628
758
|
const normalized = provider.toLowerCase().trim();
|
|
629
759
|
|
|
760
|
+
if (modality === "pdf") {
|
|
761
|
+
if (normalized.includes("openai")) {
|
|
762
|
+
return [
|
|
763
|
+
"input_file is not supported",
|
|
764
|
+
"unsupported file type: application/pdf",
|
|
765
|
+
"application/pdf is not supported for this model",
|
|
766
|
+
];
|
|
767
|
+
}
|
|
768
|
+
if (normalized.includes("anthropic")) {
|
|
769
|
+
return [
|
|
770
|
+
"pdf is not supported for this model",
|
|
771
|
+
"file input is not supported for this model",
|
|
772
|
+
];
|
|
773
|
+
}
|
|
774
|
+
if (normalized.includes("google") || normalized.includes("gemini")) {
|
|
775
|
+
return [
|
|
776
|
+
"unsupported document input",
|
|
777
|
+
"pdf input is not supported",
|
|
778
|
+
];
|
|
779
|
+
}
|
|
780
|
+
if (normalized.includes("realtimex")) {
|
|
781
|
+
return [
|
|
782
|
+
"unsupported file input",
|
|
783
|
+
];
|
|
784
|
+
}
|
|
785
|
+
return [];
|
|
786
|
+
}
|
|
787
|
+
|
|
630
788
|
if (normalized.includes("openai")) {
|
|
631
789
|
return [
|
|
632
790
|
"image_url is only supported by certain models",
|
|
@@ -650,7 +808,6 @@ export class ModelCapabilityService {
|
|
|
650
808
|
|
|
651
809
|
if (normalized.includes("realtimex")) {
|
|
652
810
|
return [
|
|
653
|
-
"invalid model",
|
|
654
811
|
"text-only model",
|
|
655
812
|
];
|
|
656
813
|
}
|
|
@@ -49,21 +49,42 @@ type ProcessWithPoliciesOptions = {
|
|
|
49
49
|
allowLearnedFallback?: boolean;
|
|
50
50
|
};
|
|
51
51
|
|
|
52
|
+
type VlmPayload = {
|
|
53
|
+
kind: "image" | "pdf";
|
|
54
|
+
dataUrl: string;
|
|
55
|
+
supplementalText: string;
|
|
56
|
+
};
|
|
57
|
+
|
|
52
58
|
/**
|
|
53
|
-
* Helper to build LLM message content. If the text contains
|
|
54
|
-
* generated by IngestionService, it casts the payload to
|
|
55
|
-
* Vision array structure so the underlying SDK bridge can transmit the image.
|
|
59
|
+
* Helper to build LLM message content. If the text contains a VLM marker
|
|
60
|
+
* generated by IngestionService, it casts the payload to multimodal blocks.
|
|
56
61
|
*/
|
|
57
|
-
function extractVlmPayload(text: string):
|
|
58
|
-
const
|
|
59
|
-
if (
|
|
62
|
+
function extractVlmPayload(text: string): VlmPayload | null {
|
|
63
|
+
const imageMarker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
|
|
64
|
+
if (imageMarker) {
|
|
65
|
+
const markerText = imageMarker[0];
|
|
66
|
+
return {
|
|
67
|
+
kind: "image",
|
|
68
|
+
dataUrl: imageMarker[1],
|
|
69
|
+
supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
60
72
|
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
73
|
+
const pdfMarker = text.match(/\[VLM_PDF_DATA:(data:[^;]+;base64,[^\]]+)\]/);
|
|
74
|
+
if (pdfMarker) {
|
|
75
|
+
const markerText = pdfMarker[0];
|
|
76
|
+
return {
|
|
77
|
+
kind: "pdf",
|
|
78
|
+
dataUrl: pdfMarker[1],
|
|
79
|
+
supplementalText: text.replace(markerText, "").trim().slice(0, 4000),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function hasVlmPayload(text: string): boolean {
|
|
87
|
+
return text.includes("[VLM_IMAGE_DATA:") || text.includes("[VLM_PDF_DATA:");
|
|
67
88
|
}
|
|
68
89
|
|
|
69
90
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -73,10 +94,13 @@ function buildMessageContent(prompt: string, text: string, textFirst = false): a
|
|
|
73
94
|
const textPrompt = vlmPayload.supplementalText
|
|
74
95
|
? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
|
|
75
96
|
: prompt;
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
97
|
+
// `input_file` is not provider-agnostic (e.g. Anthropic-style block); providers
|
|
98
|
+
// that don't accept it will fail, and IngestionService will learn unsupported pdf modality.
|
|
99
|
+
const assetBlock = vlmPayload.kind === "pdf"
|
|
100
|
+
? { type: "input_file", file_url: vlmPayload.dataUrl }
|
|
101
|
+
: { type: "image_url", image_url: { url: vlmPayload.dataUrl } };
|
|
102
|
+
|
|
103
|
+
return [{ type: "text", text: textPrompt }, assetBlock];
|
|
80
104
|
}
|
|
81
105
|
// Standard text payload
|
|
82
106
|
return textFirst
|
|
@@ -444,7 +468,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
|
|
|
444
468
|
model,
|
|
445
469
|
condition_type: condition.type,
|
|
446
470
|
prompt_preview: prompt.slice(0, 180),
|
|
447
|
-
vision_payload: doc.text
|
|
471
|
+
vision_payload: hasVlmPayload(doc.text)
|
|
448
472
|
}
|
|
449
473
|
});
|
|
450
474
|
Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
|
|
@@ -453,7 +477,7 @@ async function evaluateCondition(condition: MatchCondition, doc: DocumentObject,
|
|
|
453
477
|
model,
|
|
454
478
|
condition_type: condition.type,
|
|
455
479
|
prompt_preview: prompt.slice(0, 180),
|
|
456
|
-
vision_payload: doc.text
|
|
480
|
+
vision_payload: hasVlmPayload(doc.text)
|
|
457
481
|
}, doc.supabase);
|
|
458
482
|
const result = await sdk.llm.chat(
|
|
459
483
|
[
|
|
@@ -563,7 +587,7 @@ Fields to extract:
|
|
|
563
587
|
${fieldDescriptions}`;
|
|
564
588
|
|
|
565
589
|
try {
|
|
566
|
-
const isVlmPayload = doc.text
|
|
590
|
+
const isVlmPayload = hasVlmPayload(doc.text);
|
|
567
591
|
const mixedPrompt = isVlmPayload
|
|
568
592
|
? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
|
|
569
593
|
: prompt;
|
|
@@ -722,7 +746,7 @@ Rules:
|
|
|
722
746
|
known_fields_count: Object.keys(contractData).length,
|
|
723
747
|
}, doc.supabase);
|
|
724
748
|
|
|
725
|
-
const isVlmPayload = doc.text
|
|
749
|
+
const isVlmPayload = hasVlmPayload(doc.text);
|
|
726
750
|
const mixedPrompt = isVlmPayload
|
|
727
751
|
? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
|
|
728
752
|
: prompt;
|
|
@@ -1004,7 +1028,9 @@ export class PolicyEngine {
|
|
|
1004
1028
|
const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
|
|
1005
1029
|
if (allowLearnedFallback && doc.supabase && policies.length > 0) {
|
|
1006
1030
|
try {
|
|
1007
|
-
const learningText = doc.text
|
|
1031
|
+
const learningText = doc.text
|
|
1032
|
+
.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "")
|
|
1033
|
+
.replace(/\[VLM_PDF_DATA:[^\]]+\]/g, "");
|
|
1008
1034
|
const learned = await PolicyLearningService.resolveLearnedCandidate({
|
|
1009
1035
|
supabase: doc.supabase,
|
|
1010
1036
|
userId: doc.userId,
|
|
@@ -1118,7 +1144,7 @@ export class PolicyEngine {
|
|
|
1118
1144
|
`No markdown, no explanation — only the JSON object.`;
|
|
1119
1145
|
|
|
1120
1146
|
const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
|
|
1121
|
-
const isVlmPayload = doc.text
|
|
1147
|
+
const isVlmPayload = hasVlmPayload(doc.text);
|
|
1122
1148
|
const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
|
|
1123
1149
|
|
|
1124
1150
|
try {
|
|
@@ -135,8 +135,8 @@ export class RAGService {
|
|
|
135
135
|
supabase: SupabaseClient,
|
|
136
136
|
settings?: EmbeddingSettings
|
|
137
137
|
): Promise<void> {
|
|
138
|
-
if (
|
|
139
|
-
logger.info(`Skipping chunking and embedding for VLM base64
|
|
138
|
+
if (/^\[VLM_(IMAGE|PDF)_DATA:/.test(rawText)) {
|
|
139
|
+
logger.info(`Skipping chunking and embedding for VLM base64 multimodal data (Ingestion: ${ingestionId})`);
|
|
140
140
|
return;
|
|
141
141
|
}
|
|
142
142
|
|