@claritylabs/cl-sdk 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -6
- package/dist/index.d.mts +3543 -282
- package/dist/index.d.ts +3543 -282
- package/dist/index.js +2898 -659
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2836 -659
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +430 -1
- package/dist/storage-sqlite.d.ts +430 -1
- package/dist/storage-sqlite.js +346 -8
- package/dist/storage-sqlite.js.map +1 -1
- package/dist/storage-sqlite.mjs +343 -8
- package/dist/storage-sqlite.mjs.map +1 -1
- package/package.json +2 -2
package/dist/index.mjs
CHANGED
|
@@ -203,6 +203,72 @@ function createPipelineContext(opts) {
|
|
|
203
203
|
};
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
+
// src/core/model-budget.ts
|
|
207
|
+
function positiveInteger(value) {
|
|
208
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : void 0;
|
|
209
|
+
}
|
|
210
|
+
function resolveModelBudget(params) {
|
|
211
|
+
const { taskKind, modelCapabilities, constraint } = params;
|
|
212
|
+
const hintTokens = positiveInteger(params.hintTokens) ?? 4096;
|
|
213
|
+
const taskCapability = positiveInteger(modelCapabilities?.taskOutputTokens?.[taskKind]);
|
|
214
|
+
const longListCapability = taskKind === "extraction_long_list" ? positiveInteger(modelCapabilities?.longListOutputTokens) : void 0;
|
|
215
|
+
const defaultCapability = positiveInteger(modelCapabilities?.defaultOutputTokens);
|
|
216
|
+
const constrainedPreference = positiveInteger(constraint?.outputTokens);
|
|
217
|
+
const minOutputTokens = positiveInteger(constraint?.minOutputTokens);
|
|
218
|
+
const modelMaxOutputTokens = positiveInteger(modelCapabilities?.maxOutputTokens);
|
|
219
|
+
const providerMaxOutputTokens = positiveInteger(params.providerMaxOutputTokens);
|
|
220
|
+
const hardMaxOutputTokens = positiveInteger(constraint?.maxOutputTokens) ?? providerMaxOutputTokens;
|
|
221
|
+
const estimatedInputTokens = estimateTokens(params.inputContextBytes);
|
|
222
|
+
const schemaTokens = estimateTokens(params.schemaSizeBytes) ?? 0;
|
|
223
|
+
const expectedListLength = positiveInteger(params.expectedListLength) ?? 0;
|
|
224
|
+
const warnings = [];
|
|
225
|
+
let maxTokens = constrainedPreference ?? taskCapability ?? longListCapability ?? defaultCapability ?? hintTokens;
|
|
226
|
+
if (minOutputTokens) {
|
|
227
|
+
maxTokens = Math.max(maxTokens, minOutputTokens);
|
|
228
|
+
}
|
|
229
|
+
if (modelMaxOutputTokens) {
|
|
230
|
+
if (maxTokens > modelMaxOutputTokens) {
|
|
231
|
+
warnings.push(`Resolved ${taskKind} budget was capped by model max output tokens.`);
|
|
232
|
+
}
|
|
233
|
+
maxTokens = Math.min(maxTokens, modelMaxOutputTokens);
|
|
234
|
+
}
|
|
235
|
+
if (hardMaxOutputTokens) {
|
|
236
|
+
if (maxTokens > hardMaxOutputTokens) {
|
|
237
|
+
warnings.push(`Resolved ${taskKind} budget was capped by an explicit hard max output token constraint.`);
|
|
238
|
+
}
|
|
239
|
+
maxTokens = Math.min(maxTokens, hardMaxOutputTokens);
|
|
240
|
+
}
|
|
241
|
+
const expectedOutputFloor = expectedOutputTokensFloor(taskKind, schemaTokens, expectedListLength, hintTokens);
|
|
242
|
+
const outputTruncationRisk = maxTokens < expectedOutputFloor * 0.65 ? "high" : maxTokens < expectedOutputFloor ? "medium" : "low";
|
|
243
|
+
if (outputTruncationRisk !== "low") {
|
|
244
|
+
warnings.push(`Resolved ${taskKind} budget may be under-sized for the expected output shape.`);
|
|
245
|
+
}
|
|
246
|
+
const maxInputTokens = positiveInteger(modelCapabilities?.maxInputTokens);
|
|
247
|
+
if (estimatedInputTokens && maxInputTokens && estimatedInputTokens > maxInputTokens * 0.9) {
|
|
248
|
+
warnings.push(`Estimated ${taskKind} input context is close to or above the configured model input limit.`);
|
|
249
|
+
}
|
|
250
|
+
return {
|
|
251
|
+
taskKind,
|
|
252
|
+
maxTokens,
|
|
253
|
+
hintTokens,
|
|
254
|
+
modelMaxOutputTokens,
|
|
255
|
+
hardMaxOutputTokens,
|
|
256
|
+
estimatedInputTokens,
|
|
257
|
+
outputTruncationRisk,
|
|
258
|
+
warnings
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
function estimateTokens(bytes) {
|
|
262
|
+
const positive = positiveInteger(bytes);
|
|
263
|
+
if (!positive) return void 0;
|
|
264
|
+
return Math.ceil(positive / 4);
|
|
265
|
+
}
|
|
266
|
+
function expectedOutputTokensFloor(taskKind, schemaTokens, expectedListLength, hintTokens) {
|
|
267
|
+
const listMultiplier = taskKind === "extraction_long_list" ? 90 : 45;
|
|
268
|
+
const listFloor = expectedListLength > 0 ? expectedListLength * listMultiplier : 0;
|
|
269
|
+
return Math.max(Math.ceil(schemaTokens * 1.5), listFloor, Math.floor(hintTokens * 0.75));
|
|
270
|
+
}
|
|
271
|
+
|
|
206
272
|
// src/schemas/enums.ts
|
|
207
273
|
import { z as z2 } from "zod";
|
|
208
274
|
var PolicyTypeSchema = z2.enum([
|
|
@@ -554,7 +620,10 @@ var CoverageSchema = z4.object({
|
|
|
554
620
|
formNumber: z4.string().optional(),
|
|
555
621
|
pageNumber: z4.number().optional(),
|
|
556
622
|
sectionRef: z4.string().optional(),
|
|
557
|
-
originalContent: z4.string().optional()
|
|
623
|
+
originalContent: z4.string().optional(),
|
|
624
|
+
recordId: z4.string().optional(),
|
|
625
|
+
sourceSpanIds: z4.array(z4.string()).optional(),
|
|
626
|
+
sourceTextHash: z4.string().optional()
|
|
558
627
|
});
|
|
559
628
|
var EnrichedCoverageSchema = z4.object({
|
|
560
629
|
name: z4.string(),
|
|
@@ -578,7 +647,10 @@ var EnrichedCoverageSchema = z4.object({
|
|
|
578
647
|
premium: z4.string().optional(),
|
|
579
648
|
pageNumber: z4.number().optional(),
|
|
580
649
|
sectionRef: z4.string().optional(),
|
|
581
|
-
originalContent: z4.string().optional()
|
|
650
|
+
originalContent: z4.string().optional(),
|
|
651
|
+
recordId: z4.string().optional(),
|
|
652
|
+
sourceSpanIds: z4.array(z4.string()).optional(),
|
|
653
|
+
sourceTextHash: z4.string().optional()
|
|
582
654
|
});
|
|
583
655
|
|
|
584
656
|
// src/schemas/endorsement.ts
|
|
@@ -602,7 +674,10 @@ var EndorsementSchema = z5.object({
|
|
|
602
674
|
premiumImpact: z5.string().optional(),
|
|
603
675
|
content: z5.string(),
|
|
604
676
|
pageStart: z5.number(),
|
|
605
|
-
pageEnd: z5.number().optional()
|
|
677
|
+
pageEnd: z5.number().optional(),
|
|
678
|
+
recordId: z5.string().optional(),
|
|
679
|
+
sourceSpanIds: z5.array(z5.string()).optional(),
|
|
680
|
+
sourceTextHash: z5.string().optional()
|
|
606
681
|
});
|
|
607
682
|
|
|
608
683
|
// src/schemas/exclusion.ts
|
|
@@ -617,7 +692,10 @@ var ExclusionSchema = z6.object({
|
|
|
617
692
|
buybackEndorsement: z6.string().optional(),
|
|
618
693
|
appliesTo: z6.array(z6.string()).optional(),
|
|
619
694
|
content: z6.string(),
|
|
620
|
-
pageNumber: z6.number().optional()
|
|
695
|
+
pageNumber: z6.number().optional(),
|
|
696
|
+
recordId: z6.string().optional(),
|
|
697
|
+
sourceSpanIds: z6.array(z6.string()).optional(),
|
|
698
|
+
sourceTextHash: z6.string().optional()
|
|
621
699
|
});
|
|
622
700
|
|
|
623
701
|
// src/schemas/condition.ts
|
|
@@ -631,7 +709,10 @@ var PolicyConditionSchema = z7.object({
|
|
|
631
709
|
conditionType: ConditionTypeSchema,
|
|
632
710
|
content: z7.string(),
|
|
633
711
|
keyValues: z7.array(ConditionKeyValueSchema).optional(),
|
|
634
|
-
pageNumber: z7.number().optional()
|
|
712
|
+
pageNumber: z7.number().optional(),
|
|
713
|
+
recordId: z7.string().optional(),
|
|
714
|
+
sourceSpanIds: z7.array(z7.string()).optional(),
|
|
715
|
+
sourceTextHash: z7.string().optional()
|
|
635
716
|
});
|
|
636
717
|
|
|
637
718
|
// src/schemas/parties.ts
|
|
@@ -1214,7 +1295,10 @@ var SectionSchema = z16.object({
|
|
|
1214
1295
|
type: z16.string(),
|
|
1215
1296
|
coverageType: z16.string().optional(),
|
|
1216
1297
|
content: z16.string(),
|
|
1217
|
-
subsections: z16.array(SubsectionSchema).optional()
|
|
1298
|
+
subsections: z16.array(SubsectionSchema).optional(),
|
|
1299
|
+
recordId: z16.string().optional(),
|
|
1300
|
+
sourceSpanIds: z16.array(z16.string()).optional(),
|
|
1301
|
+
sourceTextHash: z16.string().optional()
|
|
1218
1302
|
});
|
|
1219
1303
|
var SubjectivitySchema = z16.object({
|
|
1220
1304
|
description: z16.string(),
|
|
@@ -1240,7 +1324,10 @@ var DefinitionSchema = z16.object({
|
|
|
1240
1324
|
formNumber: z16.string().optional(),
|
|
1241
1325
|
formTitle: z16.string().optional(),
|
|
1242
1326
|
sectionRef: z16.string().optional(),
|
|
1243
|
-
originalContent: z16.string().optional()
|
|
1327
|
+
originalContent: z16.string().optional(),
|
|
1328
|
+
recordId: z16.string().optional(),
|
|
1329
|
+
sourceSpanIds: z16.array(z16.string()).optional(),
|
|
1330
|
+
sourceTextHash: z16.string().optional()
|
|
1244
1331
|
});
|
|
1245
1332
|
var CoveredReasonSchema = z16.object({
|
|
1246
1333
|
coverageName: z16.string(),
|
|
@@ -1254,7 +1341,10 @@ var CoveredReasonSchema = z16.object({
|
|
|
1254
1341
|
formNumber: z16.string().optional(),
|
|
1255
1342
|
formTitle: z16.string().optional(),
|
|
1256
1343
|
sectionRef: z16.string().optional(),
|
|
1257
|
-
originalContent: z16.string().optional()
|
|
1344
|
+
originalContent: z16.string().optional(),
|
|
1345
|
+
recordId: z16.string().optional(),
|
|
1346
|
+
sourceSpanIds: z16.array(z16.string()).optional(),
|
|
1347
|
+
sourceTextHash: z16.string().optional()
|
|
1258
1348
|
});
|
|
1259
1349
|
var BaseDocumentFields = {
|
|
1260
1350
|
id: z16.string(),
|
|
@@ -1393,6 +1483,294 @@ var PLATFORM_CONFIGS = {
|
|
|
1393
1483
|
}
|
|
1394
1484
|
};
|
|
1395
1485
|
|
|
1486
|
+
// src/schemas/pce.ts
|
|
1487
|
+
import { z as z19 } from "zod";
|
|
1488
|
+
|
|
1489
|
+
// src/case/index.ts
|
|
1490
|
+
import { z as z18 } from "zod";
|
|
1491
|
+
var CaseEvidenceSourceSchema = z18.object({
|
|
1492
|
+
id: z18.string(),
|
|
1493
|
+
label: z18.string().optional(),
|
|
1494
|
+
documentId: z18.string().optional(),
|
|
1495
|
+
page: z18.number().optional(),
|
|
1496
|
+
fieldPath: z18.string().optional(),
|
|
1497
|
+
text: z18.string().describe("Source text available for span validation and citation"),
|
|
1498
|
+
metadata: z18.record(z18.string()).optional()
|
|
1499
|
+
});
|
|
1500
|
+
var CaseCitationSchema = z18.object({
|
|
1501
|
+
sourceId: z18.string(),
|
|
1502
|
+
quote: z18.string(),
|
|
1503
|
+
page: z18.number().optional(),
|
|
1504
|
+
fieldPath: z18.string().optional()
|
|
1505
|
+
});
|
|
1506
|
+
var ValidationIssueSeveritySchema = z18.enum(["info", "warning", "blocking"]);
|
|
1507
|
+
var CaseValidationIssueSchema = z18.object({
|
|
1508
|
+
code: z18.string(),
|
|
1509
|
+
severity: ValidationIssueSeveritySchema,
|
|
1510
|
+
message: z18.string(),
|
|
1511
|
+
itemId: z18.string().optional(),
|
|
1512
|
+
fieldPath: z18.string().optional(),
|
|
1513
|
+
sourceId: z18.string().optional()
|
|
1514
|
+
});
|
|
1515
|
+
var MissingInfoQuestionSchema = z18.object({
|
|
1516
|
+
id: z18.string(),
|
|
1517
|
+
itemId: z18.string().optional(),
|
|
1518
|
+
fieldPath: z18.string().optional(),
|
|
1519
|
+
question: z18.string(),
|
|
1520
|
+
reason: z18.string(),
|
|
1521
|
+
answer: z18.string().optional()
|
|
1522
|
+
});
|
|
1523
|
+
var CasePacketArtifactKindSchema = z18.enum([
|
|
1524
|
+
"underwriter_summary",
|
|
1525
|
+
"carrier_email",
|
|
1526
|
+
"missing_info_request",
|
|
1527
|
+
"json_packet",
|
|
1528
|
+
"validation_report"
|
|
1529
|
+
]);
|
|
1530
|
+
var CasePacketArtifactSchema = z18.object({
|
|
1531
|
+
id: z18.string(),
|
|
1532
|
+
kind: CasePacketArtifactKindSchema,
|
|
1533
|
+
title: z18.string(),
|
|
1534
|
+
content: z18.string(),
|
|
1535
|
+
citations: z18.array(CaseCitationSchema).default([])
|
|
1536
|
+
});
|
|
1537
|
+
var CaseSubmissionPacketSchema = z18.object({
|
|
1538
|
+
id: z18.string(),
|
|
1539
|
+
caseId: z18.string(),
|
|
1540
|
+
artifacts: z18.array(CasePacketArtifactSchema),
|
|
1541
|
+
validationIssues: z18.array(CaseValidationIssueSchema),
|
|
1542
|
+
missingInfoQuestions: z18.array(MissingInfoQuestionSchema),
|
|
1543
|
+
createdAt: z18.number()
|
|
1544
|
+
});
|
|
1545
|
+
var CaseActionSchema = z18.enum([
|
|
1546
|
+
"inspect_attachments",
|
|
1547
|
+
"retrieve_policy_evidence",
|
|
1548
|
+
"retrieve_prior_applications",
|
|
1549
|
+
"normalize_requested_change",
|
|
1550
|
+
"extract_application_fields",
|
|
1551
|
+
"fill_from_org_context",
|
|
1552
|
+
"fill_from_source_spans",
|
|
1553
|
+
"ask_missing_info_questions",
|
|
1554
|
+
"run_validation",
|
|
1555
|
+
"generate_packet",
|
|
1556
|
+
"answer_field_or_case_question"
|
|
1557
|
+
]);
|
|
1558
|
+
var AgenticExecutionModeSchema = z18.enum(["deterministic_tree", "market_eval", "hybrid"]);
|
|
1559
|
+
var CaseProposalScoreSchema = z18.object({
|
|
1560
|
+
grounding: z18.number().min(0).max(1),
|
|
1561
|
+
completeness: z18.number().min(0).max(1),
|
|
1562
|
+
consistency: z18.number().min(0).max(1),
|
|
1563
|
+
determinism: z18.number().min(0).max(1),
|
|
1564
|
+
risk: z18.number().min(0).max(1),
|
|
1565
|
+
cost: z18.number().min(0).max(1)
|
|
1566
|
+
});
|
|
1567
|
+
var CaseProposalSchema = z18.object({
|
|
1568
|
+
id: z18.string(),
|
|
1569
|
+
sourceSpanIds: z18.array(z18.string()).default([]),
|
|
1570
|
+
confidence: z18.number().min(0).max(1),
|
|
1571
|
+
missingInfo: z18.array(z18.string()).default([]),
|
|
1572
|
+
validationIssues: z18.array(CaseValidationIssueSchema).default([]),
|
|
1573
|
+
estimatedRisk: z18.number().min(0).max(1).default(0.5),
|
|
1574
|
+
estimatedCost: z18.number().min(0).max(1).default(0.5),
|
|
1575
|
+
score: CaseProposalScoreSchema.optional()
|
|
1576
|
+
});
|
|
1577
|
+
function stableCaseId(prefix, parts) {
|
|
1578
|
+
return `${prefix}-${stableHash(stableStringify(parts)).slice(0, 12)}`;
|
|
1579
|
+
}
|
|
1580
|
+
function stableStringify(value) {
|
|
1581
|
+
if (Array.isArray(value)) {
|
|
1582
|
+
return `[${value.map((entry) => stableStringify(entry)).join(",")}]`;
|
|
1583
|
+
}
|
|
1584
|
+
if (value && typeof value === "object") {
|
|
1585
|
+
const record = value;
|
|
1586
|
+
return `{${Object.keys(record).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(",")}}`;
|
|
1587
|
+
}
|
|
1588
|
+
return JSON.stringify(value);
|
|
1589
|
+
}
|
|
1590
|
+
function stableHash(input) {
|
|
1591
|
+
let hashA = 2166136261;
|
|
1592
|
+
let hashB = 2654435769;
|
|
1593
|
+
for (let index = 0; index < input.length; index++) {
|
|
1594
|
+
const char = input.charCodeAt(index);
|
|
1595
|
+
hashA ^= char;
|
|
1596
|
+
hashA = Math.imul(hashA, 16777619);
|
|
1597
|
+
hashB ^= char + index;
|
|
1598
|
+
hashB = Math.imul(hashB, 2246822507);
|
|
1599
|
+
}
|
|
1600
|
+
return `${(hashA >>> 0).toString(16).padStart(8, "0")}${(hashB >>> 0).toString(16).padStart(8, "0")}`;
|
|
1601
|
+
}
|
|
1602
|
+
function normalizeForMatch(value) {
|
|
1603
|
+
return value.replace(/\s+/g, " ").trim().toLowerCase();
|
|
1604
|
+
}
|
|
1605
|
+
function evidenceContainsQuote(source, quote) {
|
|
1606
|
+
if (!source || !quote.trim()) return false;
|
|
1607
|
+
return normalizeForMatch(source.text).includes(normalizeForMatch(quote));
|
|
1608
|
+
}
|
|
1609
|
+
function validateQuotedEvidence(params) {
|
|
1610
|
+
const quote = params.quote?.trim();
|
|
1611
|
+
if (!quote) return [];
|
|
1612
|
+
const citation = params.citation;
|
|
1613
|
+
if (!citation) {
|
|
1614
|
+
return [{
|
|
1615
|
+
code: "missing_citation",
|
|
1616
|
+
severity: params.severity ?? "blocking",
|
|
1617
|
+
message: `Quoted value for ${params.fieldPath} is missing a citation.`,
|
|
1618
|
+
itemId: params.itemId,
|
|
1619
|
+
fieldPath: params.fieldPath
|
|
1620
|
+
}];
|
|
1621
|
+
}
|
|
1622
|
+
const source = params.sources.find((candidate) => candidate.id === citation.sourceId);
|
|
1623
|
+
if (!source) {
|
|
1624
|
+
return [{
|
|
1625
|
+
code: "unknown_source",
|
|
1626
|
+
severity: params.severity ?? "blocking",
|
|
1627
|
+
message: `Citation source ${citation.sourceId} was not provided for ${params.fieldPath}.`,
|
|
1628
|
+
itemId: params.itemId,
|
|
1629
|
+
fieldPath: params.fieldPath,
|
|
1630
|
+
sourceId: citation.sourceId
|
|
1631
|
+
}];
|
|
1632
|
+
}
|
|
1633
|
+
const citedQuote = citation.quote.trim() || quote;
|
|
1634
|
+
if (!evidenceContainsQuote(source, citedQuote) || !evidenceContainsQuote(source, quote)) {
|
|
1635
|
+
return [{
|
|
1636
|
+
code: "quote_not_found",
|
|
1637
|
+
severity: params.severity ?? "blocking",
|
|
1638
|
+
message: `Quoted value for ${params.fieldPath} was not found in source ${source.id}.`,
|
|
1639
|
+
itemId: params.itemId,
|
|
1640
|
+
fieldPath: params.fieldPath,
|
|
1641
|
+
sourceId: source.id
|
|
1642
|
+
}];
|
|
1643
|
+
}
|
|
1644
|
+
return [];
|
|
1645
|
+
}
|
|
1646
|
+
var validateEvidence = validateQuotedEvidence;
|
|
1647
|
+
function mergeQuestionAnswers(questions, answers) {
|
|
1648
|
+
let answeredCount = 0;
|
|
1649
|
+
const merged = questions.map((question) => {
|
|
1650
|
+
const answer = answers.find(
|
|
1651
|
+
(candidate) => candidate.questionId && candidate.questionId === question.id || candidate.fieldPath && candidate.fieldPath === question.fieldPath
|
|
1652
|
+
);
|
|
1653
|
+
if (!answer?.answer.trim()) return question;
|
|
1654
|
+
answeredCount += question.answer === answer.answer ? 0 : 1;
|
|
1655
|
+
return { ...question, answer: answer.answer };
|
|
1656
|
+
});
|
|
1657
|
+
return { questions: merged, answeredCount };
|
|
1658
|
+
}
|
|
1659
|
+
var processReply = mergeQuestionAnswers;
|
|
1660
|
+
function generateNextMessage(questions) {
|
|
1661
|
+
const openQuestions = questions.filter((question) => !question.answer?.trim());
|
|
1662
|
+
if (openQuestions.length === 0) return "No missing information questions are open.";
|
|
1663
|
+
return openQuestions.map((question) => question.question).join("\n");
|
|
1664
|
+
}
|
|
1665
|
+
function scoreCaseProposal(proposal) {
|
|
1666
|
+
if (proposal.score) return proposal.score;
|
|
1667
|
+
const hasBlockingIssue = proposal.validationIssues.some((issue) => issue.severity === "blocking");
|
|
1668
|
+
const grounding = proposal.sourceSpanIds.length > 0 ? 1 : 0;
|
|
1669
|
+
return {
|
|
1670
|
+
grounding,
|
|
1671
|
+
completeness: proposal.missingInfo.length === 0 ? 1 : 0.4,
|
|
1672
|
+
consistency: hasBlockingIssue ? 0 : 1,
|
|
1673
|
+
determinism: proposal.id.trim().length > 0 ? 1 : 0,
|
|
1674
|
+
risk: 1 - proposal.estimatedRisk,
|
|
1675
|
+
cost: 1 - proposal.estimatedCost
|
|
1676
|
+
};
|
|
1677
|
+
}
|
|
1678
|
+
function evaluateCaseProposals(proposals) {
|
|
1679
|
+
return proposals.filter((proposal) => !proposal.validationIssues.some(
|
|
1680
|
+
(issue) => issue.severity === "blocking" && (issue.code === "missing_citation" || issue.code === "unknown_source" || issue.code === "quote_not_found")
|
|
1681
|
+
)).map((proposal) => ({ proposal, score: scoreCaseProposal(proposal) })).sort((left, right) => {
|
|
1682
|
+
const leftTotal = totalProposalScore(left.score);
|
|
1683
|
+
const rightTotal = totalProposalScore(right.score);
|
|
1684
|
+
if (rightTotal !== leftTotal) return rightTotal - leftTotal;
|
|
1685
|
+
return left.proposal.id.localeCompare(right.proposal.id);
|
|
1686
|
+
})[0]?.proposal;
|
|
1687
|
+
}
|
|
1688
|
+
function totalProposalScore(score) {
|
|
1689
|
+
return score.grounding * 3 + score.completeness * 2 + score.consistency * 3 + score.determinism + score.risk + score.cost;
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
// src/schemas/pce.ts
|
|
1693
|
+
var PolicyChangeActionSchema = z19.enum(["add", "remove", "update", "replace", "clarify"]);
|
|
1694
|
+
var PolicyChangeKindSchema = z19.enum([
|
|
1695
|
+
"named_insured_change",
|
|
1696
|
+
"additional_insured_change",
|
|
1697
|
+
"coverage_change",
|
|
1698
|
+
"limit_change",
|
|
1699
|
+
"deductible_change",
|
|
1700
|
+
"location_change",
|
|
1701
|
+
"vehicle_change",
|
|
1702
|
+
"certificate_endorsement_request",
|
|
1703
|
+
"cancellation",
|
|
1704
|
+
"nonrenewal",
|
|
1705
|
+
"renewal_submission_update",
|
|
1706
|
+
"general_endorsement"
|
|
1707
|
+
]);
|
|
1708
|
+
var PolicyChangeConfidenceSchema = z19.enum(["high", "medium", "low"]);
|
|
1709
|
+
var PolicyChangeStatusSchema = z19.enum(["draft", "needs_info", "ready", "blocked"]);
|
|
1710
|
+
var PolicyChangeItemSchema = z19.object({
|
|
1711
|
+
id: z19.string(),
|
|
1712
|
+
kind: PolicyChangeKindSchema.default("general_endorsement"),
|
|
1713
|
+
action: PolicyChangeActionSchema,
|
|
1714
|
+
affectedPolicyId: z19.string().default("unknown"),
|
|
1715
|
+
fieldPath: z19.string().describe("Stable policy field path or business field name"),
|
|
1716
|
+
label: z19.string(),
|
|
1717
|
+
beforeValue: z19.string().optional().describe("Existing policy value, when cited from policy evidence"),
|
|
1718
|
+
afterValue: z19.string().optional().describe("Requested new value"),
|
|
1719
|
+
requestedValue: z19.string().optional().describe("Alias for afterValue used by policy-change workflows"),
|
|
1720
|
+
effectiveDate: z19.string().optional(),
|
|
1721
|
+
reason: z19.string().optional(),
|
|
1722
|
+
sourceIds: z19.array(z19.string()).default([]),
|
|
1723
|
+
sourceSpanIds: z19.array(z19.string()).default([]),
|
|
1724
|
+
userSourceSpanIds: z19.array(z19.string()).optional(),
|
|
1725
|
+
citations: z19.array(CaseCitationSchema).default([]),
|
|
1726
|
+
confidence: PolicyChangeConfidenceSchema.default("medium"),
|
|
1727
|
+
confidenceScore: z19.number().min(0).max(1).optional(),
|
|
1728
|
+
status: PolicyChangeStatusSchema.default("ready")
|
|
1729
|
+
});
|
|
1730
|
+
var PceNormalizationResultSchema = z19.object({
|
|
1731
|
+
summary: z19.string(),
|
|
1732
|
+
items: z19.array(PolicyChangeItemSchema.omit({ id: true, status: true }).extend({
|
|
1733
|
+
id: z19.string().optional(),
|
|
1734
|
+
status: PolicyChangeStatusSchema.optional()
|
|
1735
|
+
})),
|
|
1736
|
+
missingInfoQuestions: z19.array(MissingInfoQuestionSchema.omit({ id: true }).extend({
|
|
1737
|
+
id: z19.string().optional()
|
|
1738
|
+
})).default([])
|
|
1739
|
+
});
|
|
1740
|
+
var PolicyChangeImpactSchema = z19.object({
|
|
1741
|
+
itemId: z19.string(),
|
|
1742
|
+
beforeValue: z19.string().optional(),
|
|
1743
|
+
requestedValue: z19.string().optional(),
|
|
1744
|
+
likelyEndorsementRequired: z19.boolean().default(true),
|
|
1745
|
+
carrierApprovalLikelyRequired: z19.boolean().default(true),
|
|
1746
|
+
affectedCoverageForms: z19.array(z19.string()).default([]),
|
|
1747
|
+
sourceSpanIds: z19.array(z19.string()).default([])
|
|
1748
|
+
});
|
|
1749
|
+
var PceCaseStateSchema = z19.object({
|
|
1750
|
+
id: z19.string(),
|
|
1751
|
+
requestText: z19.string(),
|
|
1752
|
+
summary: z19.string(),
|
|
1753
|
+
executionMode: AgenticExecutionModeSchema.default("deterministic_tree"),
|
|
1754
|
+
items: z19.array(PolicyChangeItemSchema),
|
|
1755
|
+
impacts: z19.array(PolicyChangeImpactSchema),
|
|
1756
|
+
evidenceSources: z19.array(CaseEvidenceSourceSchema),
|
|
1757
|
+
validationIssues: z19.array(CaseValidationIssueSchema),
|
|
1758
|
+
missingInfoQuestions: z19.array(MissingInfoQuestionSchema),
|
|
1759
|
+
createdAt: z19.number(),
|
|
1760
|
+
updatedAt: z19.number()
|
|
1761
|
+
});
|
|
1762
|
+
var PolicyChangeRequestSchema = z19.object({
|
|
1763
|
+
id: z19.string(),
|
|
1764
|
+
text: z19.string(),
|
|
1765
|
+
executionMode: AgenticExecutionModeSchema.optional(),
|
|
1766
|
+
userSourceSpanIds: z19.array(z19.string()).optional(),
|
|
1767
|
+
createdAt: z19.number().optional()
|
|
1768
|
+
});
|
|
1769
|
+
var PceSubmissionPacketSchema = CaseSubmissionPacketSchema.extend({
|
|
1770
|
+
pceCase: PceCaseStateSchema,
|
|
1771
|
+
artifacts: z19.array(CasePacketArtifactSchema)
|
|
1772
|
+
});
|
|
1773
|
+
|
|
1396
1774
|
// src/schemas/context-keys.ts
|
|
1397
1775
|
var CONTEXT_KEY_MAP = [
|
|
1398
1776
|
{ extractedField: "insuredName", category: "company_info", contextKey: "company_name", description: "Primary named insured" },
|
|
@@ -1445,6 +1823,400 @@ var CONTEXT_KEY_MAP = [
|
|
|
1445
1823
|
{ extractedField: "declarations.breed", category: "pet_info", contextKey: "pet_breed", description: "Pet breed" }
|
|
1446
1824
|
];
|
|
1447
1825
|
|
|
1826
|
+
// src/source/schemas.ts
|
|
1827
|
+
import { z as z20 } from "zod";
|
|
1828
|
+
var SourceSpanKindSchema = z20.enum([
|
|
1829
|
+
"pdf_text",
|
|
1830
|
+
"pdf_image",
|
|
1831
|
+
"html",
|
|
1832
|
+
"markdown",
|
|
1833
|
+
"plain_text",
|
|
1834
|
+
"structured_field"
|
|
1835
|
+
]);
|
|
1836
|
+
var SourceKindSchema = z20.enum([
|
|
1837
|
+
"policy_pdf",
|
|
1838
|
+
"application_pdf",
|
|
1839
|
+
"email",
|
|
1840
|
+
"attachment",
|
|
1841
|
+
"manual_note"
|
|
1842
|
+
]);
|
|
1843
|
+
var SourceSpanBBoxSchema = z20.object({
|
|
1844
|
+
page: z20.number().int().positive(),
|
|
1845
|
+
x: z20.number(),
|
|
1846
|
+
y: z20.number(),
|
|
1847
|
+
width: z20.number(),
|
|
1848
|
+
height: z20.number()
|
|
1849
|
+
});
|
|
1850
|
+
var SourceSpanLocationSchema = z20.object({
|
|
1851
|
+
page: z20.number().int().positive().optional(),
|
|
1852
|
+
startPage: z20.number().int().positive().optional(),
|
|
1853
|
+
endPage: z20.number().int().positive().optional(),
|
|
1854
|
+
charStart: z20.number().int().nonnegative().optional(),
|
|
1855
|
+
charEnd: z20.number().int().nonnegative().optional(),
|
|
1856
|
+
lineStart: z20.number().int().positive().optional(),
|
|
1857
|
+
lineEnd: z20.number().int().positive().optional(),
|
|
1858
|
+
fieldPath: z20.string().optional()
|
|
1859
|
+
});
|
|
1860
|
+
var SourceSpanSchema = z20.object({
|
|
1861
|
+
id: z20.string().min(1),
|
|
1862
|
+
documentId: z20.string().min(1),
|
|
1863
|
+
sourceKind: SourceKindSchema.optional(),
|
|
1864
|
+
chunkId: z20.string().optional(),
|
|
1865
|
+
kind: SourceSpanKindSchema,
|
|
1866
|
+
text: z20.string(),
|
|
1867
|
+
hash: z20.string().min(1),
|
|
1868
|
+
textHash: z20.string().optional(),
|
|
1869
|
+
pageStart: z20.number().int().positive().optional(),
|
|
1870
|
+
pageEnd: z20.number().int().positive().optional(),
|
|
1871
|
+
sectionId: z20.string().optional(),
|
|
1872
|
+
formNumber: z20.string().optional(),
|
|
1873
|
+
bbox: z20.array(SourceSpanBBoxSchema).optional(),
|
|
1874
|
+
location: SourceSpanLocationSchema.optional(),
|
|
1875
|
+
metadata: z20.record(z20.string()).optional()
|
|
1876
|
+
});
|
|
1877
|
+
var SourceSpanRefSchema = z20.object({
|
|
1878
|
+
sourceSpanId: z20.string().min(1),
|
|
1879
|
+
documentId: z20.string().min(1).optional(),
|
|
1880
|
+
chunkId: z20.string().optional(),
|
|
1881
|
+
quote: z20.string().optional(),
|
|
1882
|
+
hash: z20.string().optional(),
|
|
1883
|
+
location: SourceSpanLocationSchema.optional()
|
|
1884
|
+
});
|
|
1885
|
+
var SourceChunkSchema = z20.object({
|
|
1886
|
+
id: z20.string().min(1),
|
|
1887
|
+
documentId: z20.string().min(1),
|
|
1888
|
+
sourceSpanIds: z20.array(z20.string().min(1)),
|
|
1889
|
+
text: z20.string(),
|
|
1890
|
+
textHash: z20.string().min(1),
|
|
1891
|
+
pageStart: z20.number().int().positive().optional(),
|
|
1892
|
+
pageEnd: z20.number().int().positive().optional(),
|
|
1893
|
+
metadata: z20.record(z20.string()).default({})
|
|
1894
|
+
});
|
|
1895
|
+
|
|
1896
|
+
// src/source/ids.ts
|
|
1897
|
+
function normalizeText(text) {
|
|
1898
|
+
return text.replace(/\s+/g, " ").trim();
|
|
1899
|
+
}
|
|
1900
|
+
function stableStringify2(value) {
|
|
1901
|
+
if (value === void 0) {
|
|
1902
|
+
return "undefined";
|
|
1903
|
+
}
|
|
1904
|
+
if (value === null || typeof value !== "object") {
|
|
1905
|
+
return JSON.stringify(value) ?? "undefined";
|
|
1906
|
+
}
|
|
1907
|
+
if (Array.isArray(value)) {
|
|
1908
|
+
return `[${value.map((item) => stableStringify2(item)).join(",")}]`;
|
|
1909
|
+
}
|
|
1910
|
+
const record = value;
|
|
1911
|
+
return `{${Object.keys(record).sort().filter((key) => record[key] !== void 0).map((key) => `${JSON.stringify(key)}:${stableStringify2(record[key])}`).join(",")}}`;
|
|
1912
|
+
}
|
|
1913
|
+
function stableHash2(value) {
|
|
1914
|
+
const input = stableStringify2(value);
|
|
1915
|
+
let hashA = 2166136261;
|
|
1916
|
+
let hashB = 73244475;
|
|
1917
|
+
for (let index = 0; index < input.length; index++) {
|
|
1918
|
+
const char = input.charCodeAt(index);
|
|
1919
|
+
hashA ^= char;
|
|
1920
|
+
hashA = Math.imul(hashA, 16777619);
|
|
1921
|
+
hashB ^= char + index;
|
|
1922
|
+
hashB = Math.imul(hashB, 668265261);
|
|
1923
|
+
}
|
|
1924
|
+
return `${(hashA >>> 0).toString(16).padStart(8, "0")}${(hashB >>> 0).toString(16).padStart(8, "0")}`;
|
|
1925
|
+
}
|
|
1926
|
+
function sourceSpanTextHash(text) {
|
|
1927
|
+
return stableHash2(normalizeText(text));
|
|
1928
|
+
}
|
|
1929
|
+
function buildSourceSpanId(input) {
|
|
1930
|
+
const hash = stableHash2({
|
|
1931
|
+
documentId: input.documentId,
|
|
1932
|
+
chunkId: input.chunkId,
|
|
1933
|
+
fieldPath: input.fieldPath,
|
|
1934
|
+
location: input.location,
|
|
1935
|
+
text: input.text ? normalizeText(input.text) : void 0
|
|
1936
|
+
}).slice(0, 16);
|
|
1937
|
+
return [input.documentId, input.chunkId, input.fieldPath, hash].filter((part) => !!part).map((part) => part.replace(/[^a-zA-Z0-9_.:-]/g, "_")).join(":");
|
|
1938
|
+
}
|
|
1939
|
+
|
|
1940
|
+
// src/source/retrieval.ts
|
|
1941
|
+
function evidenceTieBreakId(evidence) {
|
|
1942
|
+
return [
|
|
1943
|
+
evidence.source ?? "",
|
|
1944
|
+
evidence.sourceSpanId ?? "",
|
|
1945
|
+
evidence.chunkId ?? "",
|
|
1946
|
+
evidence.documentId ?? "",
|
|
1947
|
+
evidence.turnId ?? "",
|
|
1948
|
+
evidence.attachmentId ?? "",
|
|
1949
|
+
evidence.text
|
|
1950
|
+
].join("|");
|
|
1951
|
+
}
|
|
1952
|
+
function compareSourceEvidence(a, b) {
|
|
1953
|
+
const relevanceDelta = b.relevance - a.relevance;
|
|
1954
|
+
if (relevanceDelta !== 0) return relevanceDelta;
|
|
1955
|
+
return evidenceTieBreakId(a).localeCompare(evidenceTieBreakId(b));
|
|
1956
|
+
}
|
|
1957
|
+
function orderSourceEvidence(evidence) {
|
|
1958
|
+
return [...evidence].sort(compareSourceEvidence);
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
// src/source/extraction.ts
|
|
1962
|
+
function normalizeWhitespace(value) {
|
|
1963
|
+
return value.replace(/\s+/g, " ").trim();
|
|
1964
|
+
}
|
|
1965
|
+
function sanitizeIdPart(value) {
|
|
1966
|
+
return value.replace(/[^a-zA-Z0-9_.:-]/g, "_");
|
|
1967
|
+
}
|
|
1968
|
+
function buildSourceSpan(input, localIndex = 0) {
|
|
1969
|
+
const text = normalizeWhitespace(input.text);
|
|
1970
|
+
const textHash = sourceSpanTextHash(text);
|
|
1971
|
+
const pagePart = input.pageStart ?? "na";
|
|
1972
|
+
const id = [
|
|
1973
|
+
sanitizeIdPart(input.documentId),
|
|
1974
|
+
"span",
|
|
1975
|
+
pagePart,
|
|
1976
|
+
localIndex,
|
|
1977
|
+
textHash.slice(0, 12)
|
|
1978
|
+
].join(":");
|
|
1979
|
+
return SourceSpanSchema.parse({
|
|
1980
|
+
id,
|
|
1981
|
+
documentId: input.documentId,
|
|
1982
|
+
sourceKind: input.sourceKind,
|
|
1983
|
+
kind: input.sourceKind.endsWith("_pdf") ? "pdf_text" : "plain_text",
|
|
1984
|
+
text,
|
|
1985
|
+
hash: textHash,
|
|
1986
|
+
textHash,
|
|
1987
|
+
pageStart: input.pageStart,
|
|
1988
|
+
pageEnd: input.pageEnd,
|
|
1989
|
+
sectionId: input.sectionId,
|
|
1990
|
+
formNumber: input.formNumber,
|
|
1991
|
+
location: {
|
|
1992
|
+
page: input.pageStart === input.pageEnd ? input.pageStart : void 0,
|
|
1993
|
+
startPage: input.pageStart,
|
|
1994
|
+
endPage: input.pageEnd,
|
|
1995
|
+
fieldPath: input.sectionId
|
|
1996
|
+
},
|
|
1997
|
+
metadata: input.metadata
|
|
1998
|
+
});
|
|
1999
|
+
}
|
|
2000
|
+
function buildPageSourceSpans(pages) {
|
|
2001
|
+
return pages.filter((page) => normalizeWhitespace(page.text).length > 0).map(
|
|
2002
|
+
(page, index) => buildSourceSpan(
|
|
2003
|
+
{
|
|
2004
|
+
documentId: page.documentId,
|
|
2005
|
+
sourceKind: page.sourceKind ?? "policy_pdf",
|
|
2006
|
+
text: page.text,
|
|
2007
|
+
pageStart: page.pageNumber,
|
|
2008
|
+
pageEnd: page.pageNumber,
|
|
2009
|
+
sectionId: page.sectionId,
|
|
2010
|
+
formNumber: page.formNumber,
|
|
2011
|
+
metadata: page.metadata
|
|
2012
|
+
},
|
|
2013
|
+
index
|
|
2014
|
+
)
|
|
2015
|
+
);
|
|
2016
|
+
}
|
|
2017
|
+
function buildSectionSourceSpans(pages, options = {}) {
|
|
2018
|
+
const headingPattern = options.headingPattern ?? /^(?:SECTION|COVERAGE|EXCLUSION|EXCLUSIONS|CONDITION|CONDITIONS|ENDORSEMENT|ENDORSEMENTS|DEFINITION|DEFINITIONS|DECLARATIONS?|SCHEDULE|FORM)\b[\s:.-]*(.*)$/i;
|
|
2019
|
+
const minSectionChars = options.minSectionChars ?? 120;
|
|
2020
|
+
const spans = [];
|
|
2021
|
+
for (const page of pages) {
|
|
2022
|
+
const sections = splitPageIntoSections(page.text, headingPattern, minSectionChars);
|
|
2023
|
+
for (const section of sections) {
|
|
2024
|
+
spans.push(buildSourceSpan(
|
|
2025
|
+
{
|
|
2026
|
+
documentId: page.documentId,
|
|
2027
|
+
sourceKind: page.sourceKind ?? "policy_pdf",
|
|
2028
|
+
text: section.text,
|
|
2029
|
+
pageStart: page.pageNumber,
|
|
2030
|
+
pageEnd: page.pageNumber,
|
|
2031
|
+
sectionId: section.title,
|
|
2032
|
+
formNumber: inferFormNumber(section.text),
|
|
2033
|
+
metadata: {
|
|
2034
|
+
...page.metadata ?? {},
|
|
2035
|
+
sourceUnit: "section_candidate"
|
|
2036
|
+
}
|
|
2037
|
+
},
|
|
2038
|
+
spans.length
|
|
2039
|
+
));
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
return spans;
|
|
2043
|
+
}
|
|
2044
|
+
function buildTextSourceSpans(input, options = {}) {
|
|
2045
|
+
const maxChars = options.maxChars ?? 4e3;
|
|
2046
|
+
const overlapChars = Math.min(options.overlapChars ?? 0, Math.max(0, maxChars - 1));
|
|
2047
|
+
const text = normalizeWhitespace(input.text);
|
|
2048
|
+
if (!text) return [];
|
|
2049
|
+
const spans = [];
|
|
2050
|
+
let cursor = 0;
|
|
2051
|
+
while (cursor < text.length) {
|
|
2052
|
+
const end = Math.min(text.length, cursor + maxChars);
|
|
2053
|
+
const unitText = text.slice(cursor, end);
|
|
2054
|
+
spans.push(buildSourceSpan({ ...input, text: unitText }, spans.length));
|
|
2055
|
+
if (end === text.length) break;
|
|
2056
|
+
cursor = end - overlapChars;
|
|
2057
|
+
}
|
|
2058
|
+
return spans;
|
|
2059
|
+
}
|
|
2060
|
+
function chunkSourceSpans(spans, options = {}) {
|
|
2061
|
+
const maxChars = options.maxChars ?? 6e3;
|
|
2062
|
+
const chunks = [];
|
|
2063
|
+
let current = [];
|
|
2064
|
+
let currentLength = 0;
|
|
2065
|
+
const flush = () => {
|
|
2066
|
+
if (current.length === 0) return;
|
|
2067
|
+
const text = current.map((span) => span.text).join("\n\n");
|
|
2068
|
+
const textHash = sourceSpanTextHash(text);
|
|
2069
|
+
const pageStart = firstNumber(current.map((span) => span.pageStart));
|
|
2070
|
+
const pageEnd = lastNumber(current.map((span) => span.pageEnd ?? span.pageStart));
|
|
2071
|
+
const chunk = {
|
|
2072
|
+
id: `${sanitizeIdPart(current[0].documentId)}:source_chunk:${chunks.length}:${stableHash2({
|
|
2073
|
+
sourceSpanIds: current.map((span) => span.id),
|
|
2074
|
+
textHash
|
|
2075
|
+
}).slice(0, 12)}`,
|
|
2076
|
+
documentId: current[0].documentId,
|
|
2077
|
+
sourceSpanIds: current.map((span) => span.id),
|
|
2078
|
+
text,
|
|
2079
|
+
textHash,
|
|
2080
|
+
pageStart,
|
|
2081
|
+
pageEnd,
|
|
2082
|
+
metadata: mergeMetadata(current)
|
|
2083
|
+
};
|
|
2084
|
+
chunks.push(SourceChunkSchema.parse(chunk));
|
|
2085
|
+
current = [];
|
|
2086
|
+
currentLength = 0;
|
|
2087
|
+
};
|
|
2088
|
+
for (const span of spans) {
|
|
2089
|
+
const nextLength = currentLength + span.text.length + (current.length > 0 ? 2 : 0);
|
|
2090
|
+
if (current.length > 0 && nextLength > maxChars) {
|
|
2091
|
+
flush();
|
|
2092
|
+
}
|
|
2093
|
+
current.push(span);
|
|
2094
|
+
currentLength += span.text.length + (current.length > 1 ? 2 : 0);
|
|
2095
|
+
}
|
|
2096
|
+
flush();
|
|
2097
|
+
return chunks;
|
|
2098
|
+
}
|
|
2099
|
+
function splitPageIntoSections(text, headingPattern, minSectionChars) {
|
|
2100
|
+
const lines = text.split(/\r?\n/);
|
|
2101
|
+
const sections = [];
|
|
2102
|
+
let current;
|
|
2103
|
+
for (const rawLine of lines) {
|
|
2104
|
+
const line = rawLine.trim();
|
|
2105
|
+
const match = line.match(headingPattern);
|
|
2106
|
+
if (match) {
|
|
2107
|
+
if (current) sections.push(current);
|
|
2108
|
+
const suffix = match[1]?.trim();
|
|
2109
|
+
current = {
|
|
2110
|
+
title: normalizeWhitespace(suffix ? `${line}` : line).slice(0, 120),
|
|
2111
|
+
lines: [line]
|
|
2112
|
+
};
|
|
2113
|
+
continue;
|
|
2114
|
+
}
|
|
2115
|
+
current?.lines.push(rawLine);
|
|
2116
|
+
}
|
|
2117
|
+
if (current) sections.push(current);
|
|
2118
|
+
return sections.map((section) => ({
|
|
2119
|
+
title: section.title,
|
|
2120
|
+
text: normalizeWhitespace(section.lines.join("\n"))
|
|
2121
|
+
})).filter((section) => section.text.length >= minSectionChars);
|
|
2122
|
+
}
|
|
2123
|
+
function inferFormNumber(text) {
|
|
2124
|
+
return text.match(/\b[A-Z]{2,8}\s+\d{2,5}(?:\s+\d{2,4})?\b/)?.[0];
|
|
2125
|
+
}
|
|
2126
|
+
function firstNumber(values) {
|
|
2127
|
+
return values.find((value) => typeof value === "number");
|
|
2128
|
+
}
|
|
2129
|
+
function lastNumber(values) {
|
|
2130
|
+
return [...values].reverse().find((value) => typeof value === "number");
|
|
2131
|
+
}
|
|
2132
|
+
function mergeMetadata(spans) {
|
|
2133
|
+
const metadata = {};
|
|
2134
|
+
for (const span of spans) {
|
|
2135
|
+
for (const [key, value] of Object.entries(span.metadata ?? {})) {
|
|
2136
|
+
metadata[key] = metadata[key] ? `${metadata[key]},${value}` : value;
|
|
2137
|
+
}
|
|
2138
|
+
if (span.formNumber) metadata.formNumber = span.formNumber;
|
|
2139
|
+
if (span.sectionId) metadata.sectionId = span.sectionId;
|
|
2140
|
+
if (span.sourceKind) metadata.sourceKind = span.sourceKind;
|
|
2141
|
+
}
|
|
2142
|
+
return metadata;
|
|
2143
|
+
}
|
|
2144
|
+
|
|
2145
|
+
// src/source/store.ts
|
|
2146
|
+
var MemorySourceStore = class {
|
|
2147
|
+
constructor() {
|
|
2148
|
+
this.spans = /* @__PURE__ */ new Map();
|
|
2149
|
+
this.chunks = /* @__PURE__ */ new Map();
|
|
2150
|
+
}
|
|
2151
|
+
async addSourceSpans(spans) {
|
|
2152
|
+
for (const span of spans) {
|
|
2153
|
+
this.spans.set(span.id, span);
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
async addSourceChunks(chunks) {
|
|
2157
|
+
for (const chunk of chunks) {
|
|
2158
|
+
this.chunks.set(chunk.id, chunk);
|
|
2159
|
+
}
|
|
2160
|
+
}
|
|
2161
|
+
async getSourceSpan(id) {
|
|
2162
|
+
return this.spans.get(id) ?? null;
|
|
2163
|
+
}
|
|
2164
|
+
async getSourceSpansByDocument(documentId) {
|
|
2165
|
+
return [...this.spans.values()].filter((span) => span.documentId === documentId).sort((left, right) => left.id.localeCompare(right.id));
|
|
2166
|
+
}
|
|
2167
|
+
async getSourceChunksByDocument(documentId) {
|
|
2168
|
+
return [...this.chunks.values()].filter((chunk) => chunk.documentId === documentId).sort((left, right) => left.id.localeCompare(right.id));
|
|
2169
|
+
}
|
|
2170
|
+
async deleteDocumentSource(documentId) {
|
|
2171
|
+
for (const [id, span] of this.spans.entries()) {
|
|
2172
|
+
if (span.documentId === documentId) this.spans.delete(id);
|
|
2173
|
+
}
|
|
2174
|
+
for (const [id, chunk] of this.chunks.entries()) {
|
|
2175
|
+
if (chunk.documentId === documentId) this.chunks.delete(id);
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
async searchSourceSpans(query) {
|
|
2179
|
+
const terms = tokenize(query.question);
|
|
2180
|
+
const documentFilter = new Set(query.documentIds ?? []);
|
|
2181
|
+
const chunkFilter = new Set(query.chunkIds ?? []);
|
|
2182
|
+
const limit = query.limit ?? 10;
|
|
2183
|
+
const results = [...this.spans.values()].filter((span) => documentFilter.size === 0 || documentFilter.has(span.documentId)).filter((span) => chunkFilter.size === 0 || (span.chunkId ? chunkFilter.has(span.chunkId) : false)).filter((span) => matchesFilters(span, query.filters)).map((span) => ({
|
|
2184
|
+
span,
|
|
2185
|
+
relevance: lexicalRelevance(span.text, terms)
|
|
2186
|
+
})).filter((result) => result.relevance > 0);
|
|
2187
|
+
return orderSourceEvidence(results.map((result) => ({
|
|
2188
|
+
...result,
|
|
2189
|
+
sourceSpanId: result.span.id,
|
|
2190
|
+
documentId: result.span.documentId,
|
|
2191
|
+
chunkId: result.span.chunkId,
|
|
2192
|
+
text: result.span.text
|
|
2193
|
+
}))).map(({ span, relevance }) => ({ span, relevance })).slice(0, limit);
|
|
2194
|
+
}
|
|
2195
|
+
};
|
|
2196
|
+
function tokenize(value) {
|
|
2197
|
+
return Array.from(new Set(
|
|
2198
|
+
value.toLowerCase().split(/[^a-z0-9$.,%-]+/).map((term) => term.trim()).filter((term) => term.length >= 2)
|
|
2199
|
+
));
|
|
2200
|
+
}
|
|
2201
|
+
function lexicalRelevance(text, terms) {
|
|
2202
|
+
if (terms.length === 0) return 0;
|
|
2203
|
+
const normalized = text.toLowerCase();
|
|
2204
|
+
const matches = terms.filter((term) => normalized.includes(term)).length;
|
|
2205
|
+
if (matches === 0) return 0;
|
|
2206
|
+
return Math.min(1, matches / terms.length);
|
|
2207
|
+
}
|
|
2208
|
+
function matchesFilters(span, filters) {
|
|
2209
|
+
if (!filters) return true;
|
|
2210
|
+
for (const [key, value] of Object.entries(filters)) {
|
|
2211
|
+
if (span.metadata?.[key] === value) continue;
|
|
2212
|
+
if (key === "sourceKind" && span.sourceKind === value) continue;
|
|
2213
|
+
if (key === "formNumber" && span.formNumber === value) continue;
|
|
2214
|
+
if (key === "sectionId" && span.sectionId === value) continue;
|
|
2215
|
+
return false;
|
|
2216
|
+
}
|
|
2217
|
+
return true;
|
|
2218
|
+
}
|
|
2219
|
+
|
|
1448
2220
|
// src/extraction/pdf.ts
|
|
1449
2221
|
import {
|
|
1450
2222
|
PDFDocument,
|
|
@@ -1670,6 +2442,35 @@ async function overlayTextOnPdf(pdfBytes, overlays) {
|
|
|
1670
2442
|
}
|
|
1671
2443
|
|
|
1672
2444
|
// src/extraction/extractor.ts
|
|
2445
|
+
function sourceSpansForPageRange(providerOptions, startPage, endPage) {
|
|
2446
|
+
const sourceSpans = providerOptions?.sourceSpans;
|
|
2447
|
+
if (!Array.isArray(sourceSpans)) return [];
|
|
2448
|
+
return sourceSpans.filter((span) => {
|
|
2449
|
+
const spanStart = span.pageStart ?? span.location?.startPage ?? span.location?.page;
|
|
2450
|
+
const spanEnd = span.pageEnd ?? span.location?.endPage ?? spanStart;
|
|
2451
|
+
if (!spanStart || !spanEnd) return false;
|
|
2452
|
+
return spanEnd >= startPage && spanStart <= endPage;
|
|
2453
|
+
});
|
|
2454
|
+
}
|
|
2455
|
+
function buildSourceContext(spans, maxChars = 12e3) {
|
|
2456
|
+
if (spans.length === 0) return "";
|
|
2457
|
+
const lines = [];
|
|
2458
|
+
let length = 0;
|
|
2459
|
+
for (const span of spans) {
|
|
2460
|
+
const header = `[sourceSpan:${span.id}${span.pageStart ? ` page:${span.pageStart}${span.pageEnd && span.pageEnd !== span.pageStart ? `-${span.pageEnd}` : ""}` : ""}${span.sectionId ? ` section:${span.sectionId}` : ""}${span.formNumber ? ` form:${span.formNumber}` : ""}]`;
|
|
2461
|
+
const text = `${header}
|
|
2462
|
+
${span.text}`;
|
|
2463
|
+
if (length + text.length > maxChars && lines.length > 0) break;
|
|
2464
|
+
lines.push(text);
|
|
2465
|
+
length += text.length;
|
|
2466
|
+
}
|
|
2467
|
+
return `
|
|
2468
|
+
|
|
2469
|
+
SOURCE SPANS FOR THESE PAGES:
|
|
2470
|
+
${lines.join("\n\n")}
|
|
2471
|
+
|
|
2472
|
+
Use sourceSpan IDs when grounding extracted contractual values.`;
|
|
2473
|
+
}
|
|
1673
2474
|
async function runExtractor(params) {
|
|
1674
2475
|
const {
|
|
1675
2476
|
name,
|
|
@@ -1681,7 +2482,8 @@ async function runExtractor(params) {
|
|
|
1681
2482
|
generateObject,
|
|
1682
2483
|
convertPdfToImages,
|
|
1683
2484
|
maxTokens = 4096,
|
|
1684
|
-
providerOptions
|
|
2485
|
+
providerOptions,
|
|
2486
|
+
pageRangeCache
|
|
1685
2487
|
} = params;
|
|
1686
2488
|
const extractorProviderOptions = { ...providerOptions };
|
|
1687
2489
|
let fullPrompt;
|
|
@@ -1693,12 +2495,21 @@ async function runExtractor(params) {
|
|
|
1693
2495
|
|
|
1694
2496
|
[Document pages ${startPage}-${endPage} are provided as images.]`;
|
|
1695
2497
|
} else {
|
|
1696
|
-
const
|
|
2498
|
+
const cacheKey = `${startPage}-${endPage}`;
|
|
2499
|
+
let pagesPdf = pageRangeCache?.get(cacheKey);
|
|
2500
|
+
if (!pagesPdf) {
|
|
2501
|
+
pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
|
|
2502
|
+
pageRangeCache?.set(cacheKey, pagesPdf);
|
|
2503
|
+
}
|
|
1697
2504
|
extractorProviderOptions.pdfBase64 = pagesPdf;
|
|
1698
2505
|
fullPrompt = `${prompt}
|
|
1699
2506
|
|
|
1700
2507
|
[Document pages ${startPage}-${endPage} are provided as a PDF file.]`;
|
|
1701
2508
|
}
|
|
2509
|
+
const sourceContext = buildSourceContext(sourceSpansForPageRange(providerOptions, startPage, endPage));
|
|
2510
|
+
if (sourceContext) {
|
|
2511
|
+
fullPrompt += sourceContext;
|
|
2512
|
+
}
|
|
1702
2513
|
const strictSchema = toStrictSchema(schema);
|
|
1703
2514
|
const result = await withRetry(
|
|
1704
2515
|
() => generateObject({
|
|
@@ -2297,6 +3108,45 @@ function promoteExtractedFields(doc) {
|
|
|
2297
3108
|
promotePremium(doc);
|
|
2298
3109
|
}
|
|
2299
3110
|
|
|
3111
|
+
// src/extraction/alignment.ts
|
|
3112
|
+
function normalizeKeyPart(value) {
|
|
3113
|
+
if (value === void 0 || value === null) return "na";
|
|
3114
|
+
const normalized = String(value).trim().toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "");
|
|
3115
|
+
return normalized || "na";
|
|
3116
|
+
}
|
|
3117
|
+
function hashText(value) {
|
|
3118
|
+
let hash = 2166136261;
|
|
3119
|
+
for (let index = 0; index < value.length; index++) {
|
|
3120
|
+
hash ^= value.charCodeAt(index);
|
|
3121
|
+
hash = Math.imul(hash, 16777619);
|
|
3122
|
+
}
|
|
3123
|
+
return (hash >>> 0).toString(16).padStart(8, "0").slice(0, 8);
|
|
3124
|
+
}
|
|
3125
|
+
function evidencePart(record) {
|
|
3126
|
+
const spans = Array.isArray(record.sourceSpanIds) ? record.sourceSpanIds.join(",") : "";
|
|
3127
|
+
return [
|
|
3128
|
+
spans,
|
|
3129
|
+
record.sourceTextHash,
|
|
3130
|
+
record.formNumber,
|
|
3131
|
+
record.pageNumber ?? record.pageStart,
|
|
3132
|
+
record.sectionRef,
|
|
3133
|
+
record.originalContent ?? record.content
|
|
3134
|
+
].filter((part) => part !== void 0 && part !== null && String(part).trim().length > 0).map(normalizeKeyPart).join("|");
|
|
3135
|
+
}
|
|
3136
|
+
function buildExtractionRecordId(documentId, recordKind, record, labelParts) {
|
|
3137
|
+
const label = labelParts.map(normalizeKeyPart).join(":");
|
|
3138
|
+
const evidence = evidencePart(record);
|
|
3139
|
+
const hash = hashText(`${documentId}|${recordKind}|${label}|${evidence}`);
|
|
3140
|
+
return `${recordKind}:${normalizeKeyPart(documentId)}:${label}:${hash}`;
|
|
3141
|
+
}
|
|
3142
|
+
function alignExtractionRecords(documentId, recordKind, records, labelParts) {
|
|
3143
|
+
if (!records?.length) return [];
|
|
3144
|
+
return records.map((record) => {
|
|
3145
|
+
const recordId = typeof record.recordId === "string" && record.recordId.trim().length > 0 ? record.recordId : buildExtractionRecordId(documentId, recordKind, record, labelParts(record));
|
|
3146
|
+
return { ...record, recordId };
|
|
3147
|
+
}).sort((left, right) => String(left.recordId).localeCompare(String(right.recordId)));
|
|
3148
|
+
}
|
|
3149
|
+
|
|
2300
3150
|
// src/extraction/assembler.ts
|
|
2301
3151
|
function assembleDocument(documentId, documentType, memory) {
|
|
2302
3152
|
const carrier = getCarrierInfo(memory);
|
|
@@ -2313,11 +3163,53 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
2313
3163
|
const classify = readMemoryRecord(memory, "classify");
|
|
2314
3164
|
const lossPayees = readRecordArray(insured, "lossPayees");
|
|
2315
3165
|
const mortgageHolders = readRecordArray(insured, "mortgageHolders");
|
|
3166
|
+
const coverageRecords = alignExtractionRecords(
|
|
3167
|
+
documentId,
|
|
3168
|
+
"coverage",
|
|
3169
|
+
getCoverageLimitCoverages(memory),
|
|
3170
|
+
(coverage) => [coverage.name, coverage.formNumber, coverage.pageNumber, coverage.limit, coverage.deductible]
|
|
3171
|
+
);
|
|
3172
|
+
const endorsementRecords = alignExtractionRecords(
|
|
3173
|
+
documentId,
|
|
3174
|
+
"endorsement",
|
|
3175
|
+
readRecordValue(endorsements, "endorsements"),
|
|
3176
|
+
(endorsement) => [endorsement.formNumber, endorsement.title, endorsement.pageStart]
|
|
3177
|
+
);
|
|
3178
|
+
const exclusionRecords = alignExtractionRecords(
|
|
3179
|
+
documentId,
|
|
3180
|
+
"exclusion",
|
|
3181
|
+
readRecordValue(exclusions, "exclusions"),
|
|
3182
|
+
(exclusion) => [exclusion.name, exclusion.formNumber, exclusion.pageNumber]
|
|
3183
|
+
);
|
|
3184
|
+
const conditionRecords = alignExtractionRecords(
|
|
3185
|
+
documentId,
|
|
3186
|
+
"condition",
|
|
3187
|
+
readRecordValue(conditions, "conditions"),
|
|
3188
|
+
(condition) => [condition.name, condition.conditionType, condition.pageNumber]
|
|
3189
|
+
);
|
|
3190
|
+
const sectionRecords = alignExtractionRecords(
|
|
3191
|
+
documentId,
|
|
3192
|
+
"section",
|
|
3193
|
+
getSections(memory),
|
|
3194
|
+
(section) => [section.title, section.type, section.pageStart, section.pageEnd]
|
|
3195
|
+
);
|
|
3196
|
+
const definitionRecords = alignExtractionRecords(
|
|
3197
|
+
documentId,
|
|
3198
|
+
"definition",
|
|
3199
|
+
getDefinitions(memory),
|
|
3200
|
+
(definition) => [definition.term, definition.formNumber, definition.pageNumber]
|
|
3201
|
+
);
|
|
3202
|
+
const coveredReasonRecords = alignExtractionRecords(
|
|
3203
|
+
documentId,
|
|
3204
|
+
"covered_reason",
|
|
3205
|
+
getCoveredReasons(memory),
|
|
3206
|
+
(reason2) => [reason2.coverageName, reason2.reasonNumber, reason2.title, reason2.pageNumber]
|
|
3207
|
+
);
|
|
2316
3208
|
const base = {
|
|
2317
3209
|
id: documentId,
|
|
2318
3210
|
carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
|
|
2319
3211
|
insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
|
|
2320
|
-
coverages:
|
|
3212
|
+
coverages: coverageRecords,
|
|
2321
3213
|
policyTypes: readRecordValue(classify, "policyTypes"),
|
|
2322
3214
|
...sanitizeNulls(carrier ?? {}),
|
|
2323
3215
|
...sanitizeNulls(insured ?? {}),
|
|
@@ -2333,13 +3225,13 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
2333
3225
|
...sanitizeNulls(premium ?? {}),
|
|
2334
3226
|
...sanitizeNulls(supplementary ?? {}),
|
|
2335
3227
|
supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
|
|
2336
|
-
endorsements:
|
|
2337
|
-
exclusions:
|
|
2338
|
-
conditions:
|
|
2339
|
-
sections:
|
|
3228
|
+
endorsements: endorsementRecords.length > 0 ? endorsementRecords : void 0,
|
|
3229
|
+
exclusions: exclusionRecords.length > 0 ? exclusionRecords : void 0,
|
|
3230
|
+
conditions: conditionRecords.length > 0 ? conditionRecords : void 0,
|
|
3231
|
+
sections: sectionRecords.length > 0 ? sectionRecords : void 0,
|
|
2340
3232
|
formInventory: readRecordValue(formInventory, "forms"),
|
|
2341
|
-
definitions:
|
|
2342
|
-
coveredReasons:
|
|
3233
|
+
definitions: definitionRecords.length > 0 ? definitionRecords : void 0,
|
|
3234
|
+
coveredReasons: coveredReasonRecords.length > 0 ? coveredReasonRecords : void 0,
|
|
2343
3235
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
2344
3236
|
...sanitizeNulls(lossHistory ?? {})
|
|
2345
3237
|
};
|
|
@@ -2601,7 +3493,7 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
2601
3493
|
const result = await withRetry(
|
|
2602
3494
|
() => generateText({
|
|
2603
3495
|
prompt,
|
|
2604
|
-
maxTokens: 16384,
|
|
3496
|
+
maxTokens: options?.maxTokens ?? 16384,
|
|
2605
3497
|
providerOptions: options?.providerOptions
|
|
2606
3498
|
})
|
|
2607
3499
|
);
|
|
@@ -3596,12 +4488,12 @@ function dedupeByKey(items, keyFn) {
|
|
|
3596
4488
|
}
|
|
3597
4489
|
return merged;
|
|
3598
4490
|
}
|
|
3599
|
-
function
|
|
4491
|
+
function normalizeKeyPart2(value) {
|
|
3600
4492
|
if (value === void 0 || value === null) return "";
|
|
3601
4493
|
return String(value).toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "");
|
|
3602
4494
|
}
|
|
3603
4495
|
function keyFromParts(...parts) {
|
|
3604
|
-
return parts.map(
|
|
4496
|
+
return parts.map(normalizeKeyPart2).join("|");
|
|
3605
4497
|
}
|
|
3606
4498
|
function mergeUniqueObjects(existing, incoming, keyFn) {
|
|
3607
4499
|
return dedupeByKey([...existing, ...incoming], keyFn);
|
|
@@ -4492,11 +5384,11 @@ function getTemplate(policyType) {
|
|
|
4492
5384
|
}
|
|
4493
5385
|
|
|
4494
5386
|
// src/prompts/coordinator/classify.ts
|
|
4495
|
-
import { z as
|
|
4496
|
-
var ClassifyResultSchema =
|
|
4497
|
-
documentType:
|
|
4498
|
-
policyTypes:
|
|
4499
|
-
confidence:
|
|
5387
|
+
import { z as z21 } from "zod";
|
|
5388
|
+
var ClassifyResultSchema = z21.object({
|
|
5389
|
+
documentType: z21.enum(["policy", "quote"]).describe("Whether this is a bound policy or a proposed quote"),
|
|
5390
|
+
policyTypes: z21.array(PolicyTypeSchema).min(1).describe("Lines of business covered \u2014 at least one required"),
|
|
5391
|
+
confidence: z21.number().describe("Confidence score from 0.0 to 1.0")
|
|
4500
5392
|
});
|
|
4501
5393
|
function buildClassifyPrompt() {
|
|
4502
5394
|
return `You are classifying an insurance document. Examine the document and determine:
|
|
@@ -4586,14 +5478,14 @@ Return JSON only:
|
|
|
4586
5478
|
}
|
|
4587
5479
|
|
|
4588
5480
|
// src/prompts/coordinator/form-inventory.ts
|
|
4589
|
-
import { z as
|
|
5481
|
+
import { z as z22 } from "zod";
|
|
4590
5482
|
var FormInventoryEntrySchema = FormReferenceSchema.extend({
|
|
4591
5483
|
formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
|
|
4592
5484
|
pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
|
|
4593
5485
|
pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
|
|
4594
5486
|
});
|
|
4595
|
-
var FormInventorySchema =
|
|
4596
|
-
forms:
|
|
5487
|
+
var FormInventorySchema = z22.object({
|
|
5488
|
+
forms: z22.array(FormInventoryEntrySchema)
|
|
4597
5489
|
});
|
|
4598
5490
|
function buildFormInventoryPrompt(templateHints) {
|
|
4599
5491
|
return `You are building a form inventory for an insurance document.
|
|
@@ -4622,8 +5514,8 @@ Respond with JSON only.`;
|
|
|
4622
5514
|
}
|
|
4623
5515
|
|
|
4624
5516
|
// src/prompts/coordinator/page-map.ts
|
|
4625
|
-
import { z as
|
|
4626
|
-
var PageExtractorSchema =
|
|
5517
|
+
import { z as z23 } from "zod";
|
|
5518
|
+
var PageExtractorSchema = z23.enum([
|
|
4627
5519
|
"carrier_info",
|
|
4628
5520
|
"named_insured",
|
|
4629
5521
|
"coverage_limits",
|
|
@@ -4638,10 +5530,10 @@ var PageExtractorSchema = z20.enum([
|
|
|
4638
5530
|
"sections",
|
|
4639
5531
|
"supplementary"
|
|
4640
5532
|
]);
|
|
4641
|
-
var PageAssignmentSchema =
|
|
4642
|
-
localPageNumber:
|
|
4643
|
-
extractorNames:
|
|
4644
|
-
pageRole:
|
|
5533
|
+
var PageAssignmentSchema = z23.object({
|
|
5534
|
+
localPageNumber: z23.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
|
|
5535
|
+
extractorNames: z23.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
|
|
5536
|
+
pageRole: z23.enum([
|
|
4645
5537
|
"declarations_schedule",
|
|
4646
5538
|
"endorsement_schedule",
|
|
4647
5539
|
"policy_form",
|
|
@@ -4650,12 +5542,12 @@ var PageAssignmentSchema = z20.object({
|
|
|
4650
5542
|
"supplementary",
|
|
4651
5543
|
"other"
|
|
4652
5544
|
]).optional().describe("Primary role of the page"),
|
|
4653
|
-
hasScheduleValues:
|
|
4654
|
-
confidence:
|
|
4655
|
-
notes:
|
|
5545
|
+
hasScheduleValues: z23.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
|
|
5546
|
+
confidence: z23.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
|
|
5547
|
+
notes: z23.string().optional().describe("Short explanation of what appears on the page")
|
|
4656
5548
|
});
|
|
4657
|
-
var PageMapChunkSchema =
|
|
4658
|
-
pages:
|
|
5549
|
+
var PageMapChunkSchema = z23.object({
|
|
5550
|
+
pages: z23.array(PageAssignmentSchema)
|
|
4659
5551
|
});
|
|
4660
5552
|
function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
|
|
4661
5553
|
const inventoryBlock = formInventoryHint ? `
|
|
@@ -4731,16 +5623,16 @@ function formatFormInventoryForPageMap(forms) {
|
|
|
4731
5623
|
}
|
|
4732
5624
|
|
|
4733
5625
|
// src/prompts/coordinator/review.ts
|
|
4734
|
-
import { z as
|
|
4735
|
-
var ReviewResultSchema =
|
|
4736
|
-
complete:
|
|
4737
|
-
missingFields:
|
|
4738
|
-
qualityIssues:
|
|
4739
|
-
additionalTasks:
|
|
4740
|
-
extractorName:
|
|
4741
|
-
startPage:
|
|
4742
|
-
endPage:
|
|
4743
|
-
description:
|
|
5626
|
+
import { z as z24 } from "zod";
|
|
5627
|
+
var ReviewResultSchema = z24.object({
|
|
5628
|
+
complete: z24.boolean(),
|
|
5629
|
+
missingFields: z24.array(z24.string()),
|
|
5630
|
+
qualityIssues: z24.array(z24.string()).optional(),
|
|
5631
|
+
additionalTasks: z24.array(z24.object({
|
|
5632
|
+
extractorName: z24.string(),
|
|
5633
|
+
startPage: z24.number(),
|
|
5634
|
+
endPage: z24.number(),
|
|
5635
|
+
description: z24.string()
|
|
4744
5636
|
}))
|
|
4745
5637
|
});
|
|
4746
5638
|
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
|
|
@@ -4796,9 +5688,9 @@ Respond with JSON only.`;
|
|
|
4796
5688
|
}
|
|
4797
5689
|
|
|
4798
5690
|
// src/prompts/coordinator/summarize.ts
|
|
4799
|
-
import { z as
|
|
4800
|
-
var SummaryResultSchema =
|
|
4801
|
-
summary:
|
|
5691
|
+
import { z as z25 } from "zod";
|
|
5692
|
+
var SummaryResultSchema = z25.object({
|
|
5693
|
+
summary: z25.string().describe("A 1-3 sentence overview of this insurance document")
|
|
4802
5694
|
});
|
|
4803
5695
|
function buildSummaryPrompt(doc) {
|
|
4804
5696
|
const snapshot = {
|
|
@@ -4841,23 +5733,23 @@ Return JSON only with a "summary" field.`;
|
|
|
4841
5733
|
}
|
|
4842
5734
|
|
|
4843
5735
|
// src/prompts/extractors/carrier-info.ts
|
|
4844
|
-
import { z as
|
|
4845
|
-
var CarrierInfoSchema =
|
|
4846
|
-
carrierName:
|
|
4847
|
-
carrierLegalName:
|
|
4848
|
-
naicNumber:
|
|
4849
|
-
amBestRating:
|
|
4850
|
-
admittedStatus:
|
|
4851
|
-
mga:
|
|
4852
|
-
underwriter:
|
|
4853
|
-
brokerAgency:
|
|
4854
|
-
brokerContactName:
|
|
4855
|
-
brokerLicenseNumber:
|
|
4856
|
-
policyNumber:
|
|
4857
|
-
effectiveDate:
|
|
4858
|
-
expirationDate:
|
|
4859
|
-
quoteNumber:
|
|
4860
|
-
proposedEffectiveDate:
|
|
5736
|
+
import { z as z26 } from "zod";
|
|
5737
|
+
var CarrierInfoSchema = z26.object({
|
|
5738
|
+
carrierName: z26.string().describe("Primary insurance company name for display"),
|
|
5739
|
+
carrierLegalName: z26.string().optional().describe("Legal entity name of insurer"),
|
|
5740
|
+
naicNumber: z26.string().optional().describe("NAIC company code"),
|
|
5741
|
+
amBestRating: z26.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
|
|
5742
|
+
admittedStatus: z26.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
|
|
5743
|
+
mga: z26.string().optional().describe("Managing General Agent or Program Administrator name"),
|
|
5744
|
+
underwriter: z26.string().optional().describe("Named individual underwriter"),
|
|
5745
|
+
brokerAgency: z26.string().optional().describe("Broker or producer agency name"),
|
|
5746
|
+
brokerContactName: z26.string().optional().describe("Broker or producer contact person name"),
|
|
5747
|
+
brokerLicenseNumber: z26.string().optional().describe("Broker or producer license number"),
|
|
5748
|
+
policyNumber: z26.string().optional().describe("Policy or quote reference number"),
|
|
5749
|
+
effectiveDate: z26.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
|
|
5750
|
+
expirationDate: z26.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
|
|
5751
|
+
quoteNumber: z26.string().optional().describe("Quote or proposal reference number"),
|
|
5752
|
+
proposedEffectiveDate: z26.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
|
|
4861
5753
|
});
|
|
4862
5754
|
function buildCarrierInfoPrompt() {
|
|
4863
5755
|
return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
|
|
@@ -4880,18 +5772,18 @@ Return JSON only.`;
|
|
|
4880
5772
|
}
|
|
4881
5773
|
|
|
4882
5774
|
// src/prompts/extractors/named-insured.ts
|
|
4883
|
-
import { z as
|
|
4884
|
-
var AddressSchema2 =
|
|
4885
|
-
street1:
|
|
4886
|
-
city:
|
|
4887
|
-
state:
|
|
4888
|
-
zip:
|
|
4889
|
-
});
|
|
4890
|
-
var NamedInsuredSchema2 =
|
|
4891
|
-
insuredName:
|
|
4892
|
-
insuredDba:
|
|
5775
|
+
import { z as z27 } from "zod";
|
|
5776
|
+
var AddressSchema2 = z27.object({
|
|
5777
|
+
street1: z27.string(),
|
|
5778
|
+
city: z27.string(),
|
|
5779
|
+
state: z27.string(),
|
|
5780
|
+
zip: z27.string()
|
|
5781
|
+
});
|
|
5782
|
+
var NamedInsuredSchema2 = z27.object({
|
|
5783
|
+
insuredName: z27.string().describe("Name of primary named insured"),
|
|
5784
|
+
insuredDba: z27.string().optional().describe("Doing-business-as name"),
|
|
4893
5785
|
insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
|
|
4894
|
-
insuredEntityType:
|
|
5786
|
+
insuredEntityType: z27.enum([
|
|
4895
5787
|
"corporation",
|
|
4896
5788
|
"llc",
|
|
4897
5789
|
"partnership",
|
|
@@ -4904,25 +5796,25 @@ var NamedInsuredSchema2 = z24.object({
|
|
|
4904
5796
|
"married_couple",
|
|
4905
5797
|
"other"
|
|
4906
5798
|
]).optional().describe("Legal entity type of the insured"),
|
|
4907
|
-
insuredFein:
|
|
4908
|
-
insuredSicCode:
|
|
4909
|
-
insuredNaicsCode:
|
|
4910
|
-
additionalNamedInsureds:
|
|
4911
|
-
|
|
4912
|
-
name:
|
|
4913
|
-
relationship:
|
|
5799
|
+
insuredFein: z27.string().optional().describe("Federal Employer Identification Number"),
|
|
5800
|
+
insuredSicCode: z27.string().optional().describe("SIC code"),
|
|
5801
|
+
insuredNaicsCode: z27.string().optional().describe("NAICS code"),
|
|
5802
|
+
additionalNamedInsureds: z27.array(
|
|
5803
|
+
z27.object({
|
|
5804
|
+
name: z27.string(),
|
|
5805
|
+
relationship: z27.string().optional().describe("e.g. subsidiary, affiliate"),
|
|
4914
5806
|
address: AddressSchema2.optional()
|
|
4915
5807
|
})
|
|
4916
5808
|
).optional().describe("Additional named insureds listed on the policy"),
|
|
4917
|
-
lossPayees:
|
|
4918
|
-
|
|
4919
|
-
name:
|
|
5809
|
+
lossPayees: z27.array(
|
|
5810
|
+
z27.object({
|
|
5811
|
+
name: z27.string(),
|
|
4920
5812
|
address: AddressSchema2.optional()
|
|
4921
5813
|
})
|
|
4922
5814
|
).optional().describe("Loss payees listed on the policy"),
|
|
4923
|
-
mortgageHolders:
|
|
4924
|
-
|
|
4925
|
-
name:
|
|
5815
|
+
mortgageHolders: z27.array(
|
|
5816
|
+
z27.object({
|
|
5817
|
+
name: z27.string(),
|
|
4926
5818
|
address: AddressSchema2.optional()
|
|
4927
5819
|
})
|
|
4928
5820
|
).optional().describe("Mortgage holders / lienholders listed on the policy")
|
|
@@ -4945,14 +5837,14 @@ Return JSON only.`;
|
|
|
4945
5837
|
}
|
|
4946
5838
|
|
|
4947
5839
|
// src/prompts/extractors/coverage-limits.ts
|
|
4948
|
-
import { z as
|
|
5840
|
+
import { z as z28 } from "zod";
|
|
4949
5841
|
var ExtractorCoverageSchema = CoverageSchema.extend({
|
|
4950
|
-
coverageCode:
|
|
5842
|
+
coverageCode: z28.string().optional().describe("Coverage code or class code")
|
|
4951
5843
|
});
|
|
4952
|
-
var CoverageLimitsSchema =
|
|
4953
|
-
coverages:
|
|
4954
|
-
coverageForm:
|
|
4955
|
-
retroactiveDate:
|
|
5844
|
+
var CoverageLimitsSchema = z28.object({
|
|
5845
|
+
coverages: z28.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
|
|
5846
|
+
coverageForm: z28.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
5847
|
+
retroactiveDate: z28.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
4956
5848
|
});
|
|
4957
5849
|
function buildCoverageLimitsPrompt() {
|
|
4958
5850
|
return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
|
|
@@ -4991,14 +5883,14 @@ Return JSON only.`;
|
|
|
4991
5883
|
}
|
|
4992
5884
|
|
|
4993
5885
|
// src/prompts/extractors/endorsements.ts
|
|
4994
|
-
import { z as
|
|
4995
|
-
var EndorsementsSchema =
|
|
4996
|
-
endorsements:
|
|
4997
|
-
|
|
4998
|
-
formNumber:
|
|
4999
|
-
editionDate:
|
|
5000
|
-
title:
|
|
5001
|
-
endorsementType:
|
|
5886
|
+
import { z as z29 } from "zod";
|
|
5887
|
+
var EndorsementsSchema = z29.object({
|
|
5888
|
+
endorsements: z29.array(
|
|
5889
|
+
z29.object({
|
|
5890
|
+
formNumber: z29.string().describe("Form number, e.g. 'CG 21 47'"),
|
|
5891
|
+
editionDate: z29.string().optional().describe("Edition date, e.g. '12 07'"),
|
|
5892
|
+
title: z29.string().describe("Endorsement title"),
|
|
5893
|
+
endorsementType: z29.enum([
|
|
5002
5894
|
"additional_insured",
|
|
5003
5895
|
"waiver_of_subrogation",
|
|
5004
5896
|
"primary_noncontributory",
|
|
@@ -5018,12 +5910,12 @@ var EndorsementsSchema = z26.object({
|
|
|
5018
5910
|
"territorial_extension",
|
|
5019
5911
|
"other"
|
|
5020
5912
|
]).describe("Endorsement type classification"),
|
|
5021
|
-
effectiveDate:
|
|
5022
|
-
affectedCoverageParts:
|
|
5023
|
-
namedParties:
|
|
5024
|
-
|
|
5025
|
-
name:
|
|
5026
|
-
role:
|
|
5913
|
+
effectiveDate: z29.string().optional().describe("Endorsement effective date"),
|
|
5914
|
+
affectedCoverageParts: z29.array(z29.string()).optional().describe("Coverage parts affected by this endorsement"),
|
|
5915
|
+
namedParties: z29.array(
|
|
5916
|
+
z29.object({
|
|
5917
|
+
name: z29.string().describe("Party name"),
|
|
5918
|
+
role: z29.enum([
|
|
5027
5919
|
"additional_insured",
|
|
5028
5920
|
"loss_payee",
|
|
5029
5921
|
"mortgage_holder",
|
|
@@ -5032,15 +5924,15 @@ var EndorsementsSchema = z26.object({
|
|
|
5032
5924
|
"designated_person",
|
|
5033
5925
|
"other"
|
|
5034
5926
|
]).describe("Party role"),
|
|
5035
|
-
relationship:
|
|
5036
|
-
scope:
|
|
5927
|
+
relationship: z29.string().optional().describe("Relationship to insured"),
|
|
5928
|
+
scope: z29.string().optional().describe("Scope of coverage for this party")
|
|
5037
5929
|
})
|
|
5038
5930
|
).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
|
|
5039
|
-
keyTerms:
|
|
5040
|
-
premiumImpact:
|
|
5041
|
-
content:
|
|
5042
|
-
pageStart:
|
|
5043
|
-
pageEnd:
|
|
5931
|
+
keyTerms: z29.array(z29.string()).optional().describe("Key terms or notable provisions in the endorsement"),
|
|
5932
|
+
premiumImpact: z29.string().optional().describe("Additional premium or credit"),
|
|
5933
|
+
content: z29.string().describe("Full verbatim text of the endorsement"),
|
|
5934
|
+
pageStart: z29.number().describe("Starting page number of this endorsement"),
|
|
5935
|
+
pageEnd: z29.number().optional().describe("Ending page number of this endorsement")
|
|
5044
5936
|
})
|
|
5045
5937
|
).describe("All endorsements found in the document")
|
|
5046
5938
|
});
|
|
@@ -5071,20 +5963,20 @@ Return JSON only.`;
|
|
|
5071
5963
|
}
|
|
5072
5964
|
|
|
5073
5965
|
// src/prompts/extractors/exclusions.ts
|
|
5074
|
-
import { z as
|
|
5075
|
-
var ExclusionsSchema =
|
|
5076
|
-
exclusions:
|
|
5077
|
-
|
|
5078
|
-
name:
|
|
5079
|
-
formNumber:
|
|
5080
|
-
excludedPerils:
|
|
5081
|
-
isAbsolute:
|
|
5082
|
-
exceptions:
|
|
5083
|
-
buybackAvailable:
|
|
5084
|
-
buybackEndorsement:
|
|
5085
|
-
appliesTo:
|
|
5086
|
-
content:
|
|
5087
|
-
pageNumber:
|
|
5966
|
+
import { z as z30 } from "zod";
|
|
5967
|
+
var ExclusionsSchema = z30.object({
|
|
5968
|
+
exclusions: z30.array(
|
|
5969
|
+
z30.object({
|
|
5970
|
+
name: z30.string().describe("Exclusion title or short description"),
|
|
5971
|
+
formNumber: z30.string().optional().describe("Form number if part of a named endorsement"),
|
|
5972
|
+
excludedPerils: z30.array(z30.string()).optional().describe("Specific perils excluded"),
|
|
5973
|
+
isAbsolute: z30.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
|
|
5974
|
+
exceptions: z30.array(z30.string()).optional().describe("Exceptions to the exclusion, if any"),
|
|
5975
|
+
buybackAvailable: z30.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
|
|
5976
|
+
buybackEndorsement: z30.string().optional().describe("Form number of the buyback endorsement if available"),
|
|
5977
|
+
appliesTo: z30.array(z30.string()).optional().describe("Coverage types this exclusion applies to"),
|
|
5978
|
+
content: z30.string().describe("Full verbatim exclusion text"),
|
|
5979
|
+
pageNumber: z30.number().optional().describe("Page number where exclusion appears")
|
|
5088
5980
|
})
|
|
5089
5981
|
).describe("All exclusions found in the document")
|
|
5090
5982
|
});
|
|
@@ -5120,12 +6012,12 @@ Return JSON only.`;
|
|
|
5120
6012
|
}
|
|
5121
6013
|
|
|
5122
6014
|
// src/prompts/extractors/conditions.ts
|
|
5123
|
-
import { z as
|
|
5124
|
-
var ConditionsSchema =
|
|
5125
|
-
conditions:
|
|
5126
|
-
|
|
5127
|
-
name:
|
|
5128
|
-
conditionType:
|
|
6015
|
+
import { z as z31 } from "zod";
|
|
6016
|
+
var ConditionsSchema = z31.object({
|
|
6017
|
+
conditions: z31.array(
|
|
6018
|
+
z31.object({
|
|
6019
|
+
name: z31.string().describe("Condition title"),
|
|
6020
|
+
conditionType: z31.enum([
|
|
5129
6021
|
"duties_after_loss",
|
|
5130
6022
|
"notice_requirements",
|
|
5131
6023
|
"other_insurance",
|
|
@@ -5144,14 +6036,14 @@ var ConditionsSchema = z28.object({
|
|
|
5144
6036
|
"separation_of_insureds",
|
|
5145
6037
|
"other"
|
|
5146
6038
|
]).describe("Condition category"),
|
|
5147
|
-
content:
|
|
5148
|
-
keyValues:
|
|
5149
|
-
|
|
5150
|
-
key:
|
|
5151
|
-
value:
|
|
6039
|
+
content: z31.string().describe("Full verbatim condition text"),
|
|
6040
|
+
keyValues: z31.array(
|
|
6041
|
+
z31.object({
|
|
6042
|
+
key: z31.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
|
|
6043
|
+
value: z31.string().describe("Value (e.g. '30 days', '2 years')")
|
|
5152
6044
|
})
|
|
5153
6045
|
).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
|
|
5154
|
-
pageNumber:
|
|
6046
|
+
pageNumber: z31.number().optional().describe("Page number where condition appears")
|
|
5155
6047
|
})
|
|
5156
6048
|
).describe("All policy conditions found in the document")
|
|
5157
6049
|
});
|
|
@@ -5189,28 +6081,28 @@ Return JSON only.`;
|
|
|
5189
6081
|
}
|
|
5190
6082
|
|
|
5191
6083
|
// src/prompts/extractors/premium-breakdown.ts
|
|
5192
|
-
import { z as
|
|
5193
|
-
var PremiumBreakdownSchema =
|
|
5194
|
-
premium:
|
|
5195
|
-
totalCost:
|
|
5196
|
-
premiumBreakdown:
|
|
5197
|
-
|
|
5198
|
-
line:
|
|
5199
|
-
amount:
|
|
6084
|
+
import { z as z32 } from "zod";
|
|
6085
|
+
var PremiumBreakdownSchema = z32.object({
|
|
6086
|
+
premium: z32.string().optional().describe("Total premium amount, e.g. '$5,000'"),
|
|
6087
|
+
totalCost: z32.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
|
|
6088
|
+
premiumBreakdown: z32.array(
|
|
6089
|
+
z32.object({
|
|
6090
|
+
line: z32.string().describe("Coverage line name"),
|
|
6091
|
+
amount: z32.string().describe("Premium amount for this line")
|
|
5200
6092
|
})
|
|
5201
6093
|
).optional().describe("Per-coverage-line premium breakdown"),
|
|
5202
|
-
taxesAndFees:
|
|
5203
|
-
|
|
5204
|
-
name:
|
|
5205
|
-
amount:
|
|
5206
|
-
type:
|
|
6094
|
+
taxesAndFees: z32.array(
|
|
6095
|
+
z32.object({
|
|
6096
|
+
name: z32.string().describe("Fee or tax name"),
|
|
6097
|
+
amount: z32.string().describe("Dollar amount"),
|
|
6098
|
+
type: z32.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
|
|
5207
6099
|
})
|
|
5208
6100
|
).optional().describe("Taxes, fees, surcharges, and assessments"),
|
|
5209
|
-
minimumPremium:
|
|
5210
|
-
depositPremium:
|
|
5211
|
-
paymentPlan:
|
|
5212
|
-
auditType:
|
|
5213
|
-
ratingBasis:
|
|
6101
|
+
minimumPremium: z32.string().optional().describe("Minimum premium if stated"),
|
|
6102
|
+
depositPremium: z32.string().optional().describe("Deposit premium if stated"),
|
|
6103
|
+
paymentPlan: z32.string().optional().describe("Payment plan description"),
|
|
6104
|
+
auditType: z32.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
|
|
6105
|
+
ratingBasis: z32.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
|
|
5214
6106
|
});
|
|
5215
6107
|
function buildPremiumBreakdownPrompt() {
|
|
5216
6108
|
return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
|
|
@@ -5230,14 +6122,14 @@ Return JSON only.`;
|
|
|
5230
6122
|
}
|
|
5231
6123
|
|
|
5232
6124
|
// src/prompts/extractors/declarations.ts
|
|
5233
|
-
import { z as
|
|
5234
|
-
var DeclarationsFieldSchema =
|
|
5235
|
-
field:
|
|
5236
|
-
value:
|
|
5237
|
-
section:
|
|
6125
|
+
import { z as z33 } from "zod";
|
|
6126
|
+
var DeclarationsFieldSchema = z33.object({
|
|
6127
|
+
field: z33.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
|
|
6128
|
+
value: z33.string().describe("Extracted value exactly as it appears in the document"),
|
|
6129
|
+
section: z33.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
|
|
5238
6130
|
});
|
|
5239
|
-
var DeclarationsExtractSchema =
|
|
5240
|
-
fields:
|
|
6131
|
+
var DeclarationsExtractSchema = z33.object({
|
|
6132
|
+
fields: z33.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
|
|
5241
6133
|
});
|
|
5242
6134
|
function buildDeclarationsPrompt() {
|
|
5243
6135
|
return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
|
|
@@ -5277,21 +6169,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
|
|
|
5277
6169
|
}
|
|
5278
6170
|
|
|
5279
6171
|
// src/prompts/extractors/loss-history.ts
|
|
5280
|
-
import { z as
|
|
5281
|
-
var LossHistorySchema =
|
|
5282
|
-
lossSummary:
|
|
5283
|
-
individualClaims:
|
|
5284
|
-
|
|
5285
|
-
date:
|
|
5286
|
-
type:
|
|
5287
|
-
description:
|
|
5288
|
-
amountPaid:
|
|
5289
|
-
amountReserved:
|
|
5290
|
-
status:
|
|
5291
|
-
claimNumber:
|
|
6172
|
+
import { z as z34 } from "zod";
|
|
6173
|
+
var LossHistorySchema = z34.object({
|
|
6174
|
+
lossSummary: z34.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
|
|
6175
|
+
individualClaims: z34.array(
|
|
6176
|
+
z34.object({
|
|
6177
|
+
date: z34.string().optional().describe("Date of loss or claim"),
|
|
6178
|
+
type: z34.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
|
|
6179
|
+
description: z34.string().optional().describe("Brief description of the claim"),
|
|
6180
|
+
amountPaid: z34.string().optional().describe("Amount paid"),
|
|
6181
|
+
amountReserved: z34.string().optional().describe("Amount reserved"),
|
|
6182
|
+
status: z34.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
|
|
6183
|
+
claimNumber: z34.string().optional().describe("Claim reference number")
|
|
5292
6184
|
})
|
|
5293
6185
|
).optional().describe("Individual claim records"),
|
|
5294
|
-
experienceMod:
|
|
6186
|
+
experienceMod: z34.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
|
|
5295
6187
|
});
|
|
5296
6188
|
function buildLossHistoryPrompt() {
|
|
5297
6189
|
return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
|
|
@@ -5308,18 +6200,18 @@ Return JSON only.`;
|
|
|
5308
6200
|
}
|
|
5309
6201
|
|
|
5310
6202
|
// src/prompts/extractors/sections.ts
|
|
5311
|
-
import { z as
|
|
5312
|
-
var SubsectionSchema2 =
|
|
5313
|
-
title:
|
|
5314
|
-
sectionNumber:
|
|
5315
|
-
pageNumber:
|
|
5316
|
-
content:
|
|
5317
|
-
});
|
|
5318
|
-
var SectionsSchema =
|
|
5319
|
-
sections:
|
|
5320
|
-
|
|
5321
|
-
title:
|
|
5322
|
-
type:
|
|
6203
|
+
import { z as z35 } from "zod";
|
|
6204
|
+
var SubsectionSchema2 = z35.object({
|
|
6205
|
+
title: z35.string().describe("Subsection title"),
|
|
6206
|
+
sectionNumber: z35.string().optional().describe("Subsection number"),
|
|
6207
|
+
pageNumber: z35.number().optional().describe("Page number"),
|
|
6208
|
+
content: z35.string().describe("Full verbatim text")
|
|
6209
|
+
});
|
|
6210
|
+
var SectionsSchema = z35.object({
|
|
6211
|
+
sections: z35.array(
|
|
6212
|
+
z35.object({
|
|
6213
|
+
title: z35.string().describe("Section title"),
|
|
6214
|
+
type: z35.enum([
|
|
5323
6215
|
"declarations",
|
|
5324
6216
|
"insuring_agreement",
|
|
5325
6217
|
"policy_form",
|
|
@@ -5334,10 +6226,10 @@ var SectionsSchema = z32.object({
|
|
|
5334
6226
|
"regulatory",
|
|
5335
6227
|
"other"
|
|
5336
6228
|
]).describe("Section type classification"),
|
|
5337
|
-
content:
|
|
5338
|
-
pageStart:
|
|
5339
|
-
pageEnd:
|
|
5340
|
-
subsections:
|
|
6229
|
+
content: z35.string().describe("Full verbatim text of the section"),
|
|
6230
|
+
pageStart: z35.number().describe("Starting page number"),
|
|
6231
|
+
pageEnd: z35.number().optional().describe("Ending page number"),
|
|
6232
|
+
subsections: z35.array(SubsectionSchema2).optional().describe("Subsections within this section")
|
|
5341
6233
|
})
|
|
5342
6234
|
).describe("All document sections")
|
|
5343
6235
|
});
|
|
@@ -5368,27 +6260,27 @@ Return JSON only.`;
|
|
|
5368
6260
|
}
|
|
5369
6261
|
|
|
5370
6262
|
// src/prompts/extractors/supplementary.ts
|
|
5371
|
-
import { z as
|
|
5372
|
-
var ContactSchema2 =
|
|
5373
|
-
name:
|
|
5374
|
-
phone:
|
|
5375
|
-
email:
|
|
5376
|
-
address:
|
|
5377
|
-
type:
|
|
5378
|
-
});
|
|
5379
|
-
var AuxiliaryFactSchema2 =
|
|
5380
|
-
key:
|
|
5381
|
-
value:
|
|
5382
|
-
subject:
|
|
5383
|
-
context:
|
|
5384
|
-
});
|
|
5385
|
-
var SupplementarySchema =
|
|
5386
|
-
regulatoryContacts:
|
|
5387
|
-
claimsContacts:
|
|
5388
|
-
thirdPartyAdministrators:
|
|
5389
|
-
cancellationNoticeDays:
|
|
5390
|
-
nonrenewalNoticeDays:
|
|
5391
|
-
auxiliaryFacts:
|
|
6263
|
+
import { z as z36 } from "zod";
|
|
6264
|
+
var ContactSchema2 = z36.object({
|
|
6265
|
+
name: z36.string().optional().describe("Organization or person name"),
|
|
6266
|
+
phone: z36.string().optional().describe("Phone number"),
|
|
6267
|
+
email: z36.string().optional().describe("Email address"),
|
|
6268
|
+
address: z36.string().optional().describe("Mailing address"),
|
|
6269
|
+
type: z36.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
|
|
6270
|
+
});
|
|
6271
|
+
var AuxiliaryFactSchema2 = z36.object({
|
|
6272
|
+
key: z36.string().describe("Normalized machine-readable fact key, e.g. 'policyholder_age' or 'insured_name'"),
|
|
6273
|
+
value: z36.string().describe("Concrete extracted fact value"),
|
|
6274
|
+
subject: z36.string().optional().describe("Person, entity, vehicle, property, or schedule item this fact belongs to"),
|
|
6275
|
+
context: z36.string().optional().describe("Short disambiguating context, such as 'Driver Schedule' or 'Named Insured'")
|
|
6276
|
+
});
|
|
6277
|
+
var SupplementarySchema = z36.object({
|
|
6278
|
+
regulatoryContacts: z36.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
|
|
6279
|
+
claimsContacts: z36.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
|
|
6280
|
+
thirdPartyAdministrators: z36.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
|
|
6281
|
+
cancellationNoticeDays: z36.number().optional().describe("Required notice period for cancellation in days"),
|
|
6282
|
+
nonrenewalNoticeDays: z36.number().optional().describe("Required notice period for nonrenewal in days"),
|
|
6283
|
+
auxiliaryFacts: z36.array(AuxiliaryFactSchema2).optional().describe("Additional retrieval-only facts that do not fit the strict primary schema")
|
|
5392
6284
|
});
|
|
5393
6285
|
function buildSupplementaryPrompt(alreadyExtractedSummary) {
|
|
5394
6286
|
const exclusionBlock = alreadyExtractedSummary ? `
|
|
@@ -5426,17 +6318,17 @@ Return JSON only.`;
|
|
|
5426
6318
|
}
|
|
5427
6319
|
|
|
5428
6320
|
// src/prompts/extractors/definitions.ts
|
|
5429
|
-
import { z as
|
|
5430
|
-
var DefinitionsSchema =
|
|
5431
|
-
definitions:
|
|
5432
|
-
|
|
5433
|
-
term:
|
|
5434
|
-
definition:
|
|
5435
|
-
pageNumber:
|
|
5436
|
-
formNumber:
|
|
5437
|
-
formTitle:
|
|
5438
|
-
sectionRef:
|
|
5439
|
-
originalContent:
|
|
6321
|
+
import { z as z37 } from "zod";
|
|
6322
|
+
var DefinitionsSchema = z37.object({
|
|
6323
|
+
definitions: z37.array(
|
|
6324
|
+
z37.object({
|
|
6325
|
+
term: z37.string().describe("Defined term exactly as shown in the document"),
|
|
6326
|
+
definition: z37.string().describe("Full verbatim definition text, preserving original wording"),
|
|
6327
|
+
pageNumber: z37.number().optional().describe("Original document page number"),
|
|
6328
|
+
formNumber: z37.string().optional().describe("Form number where this definition appears"),
|
|
6329
|
+
formTitle: z37.string().optional().describe("Form title where this definition appears"),
|
|
6330
|
+
sectionRef: z37.string().optional().describe("Definition section heading or subsection reference"),
|
|
6331
|
+
originalContent: z37.string().optional().describe("Short verbatim source snippet containing the term and definition")
|
|
5440
6332
|
})
|
|
5441
6333
|
).describe("All substantive insurance definitions found in the document")
|
|
5442
6334
|
});
|
|
@@ -5470,22 +6362,22 @@ Return JSON only.`;
|
|
|
5470
6362
|
}
|
|
5471
6363
|
|
|
5472
6364
|
// src/prompts/extractors/covered-reasons.ts
|
|
5473
|
-
import { z as
|
|
5474
|
-
var CoveredReasonsSchema =
|
|
5475
|
-
coveredReasons:
|
|
5476
|
-
|
|
5477
|
-
coverageName:
|
|
5478
|
-
reasonNumber:
|
|
5479
|
-
title:
|
|
5480
|
-
content:
|
|
5481
|
-
conditions:
|
|
5482
|
-
exceptions:
|
|
5483
|
-
appliesTo:
|
|
5484
|
-
pageNumber:
|
|
5485
|
-
formNumber:
|
|
5486
|
-
formTitle:
|
|
5487
|
-
sectionRef:
|
|
5488
|
-
originalContent:
|
|
6365
|
+
import { z as z38 } from "zod";
|
|
6366
|
+
var CoveredReasonsSchema = z38.object({
|
|
6367
|
+
coveredReasons: z38.array(
|
|
6368
|
+
z38.object({
|
|
6369
|
+
coverageName: z38.string().describe("Coverage, coverage part, or form this covered reason belongs to"),
|
|
6370
|
+
reasonNumber: z38.string().optional().describe("Source number or letter for the covered reason, if shown"),
|
|
6371
|
+
title: z38.string().optional().describe("Covered reason title, peril, cause of loss, trigger, or short name"),
|
|
6372
|
+
content: z38.string().describe("Full verbatim covered-reason or insuring-agreement text"),
|
|
6373
|
+
conditions: z38.array(z38.string()).optional().describe("Conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason"),
|
|
6374
|
+
exceptions: z38.array(z38.string()).optional().describe("Exceptions or limitations attached to this covered reason"),
|
|
6375
|
+
appliesTo: z38.array(z38.string()).optional().describe("Covered property, persons, autos, locations, operations, or coverage parts this reason applies to"),
|
|
6376
|
+
pageNumber: z38.number().optional().describe("Original document page number"),
|
|
6377
|
+
formNumber: z38.string().optional().describe("Form number where this covered reason appears"),
|
|
6378
|
+
formTitle: z38.string().optional().describe("Form title where this covered reason appears"),
|
|
6379
|
+
sectionRef: z38.string().optional().describe("Section heading where this covered reason appears"),
|
|
6380
|
+
originalContent: z38.string().optional().describe("Short verbatim source snippet used for this covered reason")
|
|
5489
6381
|
})
|
|
5490
6382
|
).describe("Covered causes, perils, triggers, or reasons that affirmatively grant coverage")
|
|
5491
6383
|
});
|
|
@@ -5618,21 +6510,21 @@ function formatExtractorCatalogForPrompt() {
|
|
|
5618
6510
|
}
|
|
5619
6511
|
|
|
5620
6512
|
// src/extraction/resolve-referential.ts
|
|
5621
|
-
import { z as
|
|
6513
|
+
import { z as z40 } from "zod";
|
|
5622
6514
|
|
|
5623
6515
|
// src/prompts/extractors/referential-lookup.ts
|
|
5624
|
-
import { z as
|
|
5625
|
-
var ReferentialLookupSchema =
|
|
5626
|
-
resolvedCoverages:
|
|
5627
|
-
|
|
5628
|
-
coverageName:
|
|
5629
|
-
resolvedLimit:
|
|
6516
|
+
import { z as z39 } from "zod";
|
|
6517
|
+
var ReferentialLookupSchema = z39.object({
|
|
6518
|
+
resolvedCoverages: z39.array(
|
|
6519
|
+
z39.object({
|
|
6520
|
+
coverageName: z39.string().describe("The coverage name that was referenced"),
|
|
6521
|
+
resolvedLimit: z39.string().optional().describe("The concrete limit value found, if any"),
|
|
5630
6522
|
resolvedLimitValueType: CoverageValueTypeSchema.optional(),
|
|
5631
|
-
resolvedDeductible:
|
|
6523
|
+
resolvedDeductible: z39.string().optional().describe("The concrete deductible value found, if any"),
|
|
5632
6524
|
resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
|
|
5633
|
-
pageNumber:
|
|
5634
|
-
originalContent:
|
|
5635
|
-
confidence:
|
|
6525
|
+
pageNumber: z39.number().optional().describe("Page where the resolved value was found"),
|
|
6526
|
+
originalContent: z39.string().optional().describe("Verbatim source text for the resolved value"),
|
|
6527
|
+
confidence: z39.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
|
|
5636
6528
|
})
|
|
5637
6529
|
)
|
|
5638
6530
|
});
|
|
@@ -5680,11 +6572,11 @@ function looksCoveredReasonSection(section) {
|
|
|
5680
6572
|
}
|
|
5681
6573
|
|
|
5682
6574
|
// src/extraction/referential-workflow.ts
|
|
5683
|
-
function
|
|
6575
|
+
function normalizeText2(value) {
|
|
5684
6576
|
return typeof value === "string" ? value.trim().toLowerCase() : "";
|
|
5685
6577
|
}
|
|
5686
6578
|
function containsTarget(value, target) {
|
|
5687
|
-
const normalizedValue =
|
|
6579
|
+
const normalizedValue = normalizeText2(value);
|
|
5688
6580
|
return Boolean(normalizedValue && target && normalizedValue.includes(target));
|
|
5689
6581
|
}
|
|
5690
6582
|
function pageRangeFrom(startPage, endPage) {
|
|
@@ -5727,8 +6619,8 @@ function findLocalReferentialPages(params) {
|
|
|
5727
6619
|
}
|
|
5728
6620
|
function findDeclarationsSchedulePages(parsedTarget, formInventory) {
|
|
5729
6621
|
for (const form of formInventory) {
|
|
5730
|
-
const formType =
|
|
5731
|
-
const title =
|
|
6622
|
+
const formType = normalizeText2(form.formType);
|
|
6623
|
+
const title = normalizeText2(form.title);
|
|
5732
6624
|
const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
|
|
5733
6625
|
const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
|
|
5734
6626
|
const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
|
|
@@ -5741,8 +6633,8 @@ function findDeclarationsSchedulePages(parsedTarget, formInventory) {
|
|
|
5741
6633
|
}
|
|
5742
6634
|
function findSectionPages(parsedTarget, sections) {
|
|
5743
6635
|
for (const section of sections) {
|
|
5744
|
-
const title =
|
|
5745
|
-
const type =
|
|
6636
|
+
const title = normalizeText2(section.title);
|
|
6637
|
+
const type = normalizeText2(section.type);
|
|
5746
6638
|
const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
|
|
5747
6639
|
if (matchesKind) {
|
|
5748
6640
|
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
@@ -5797,9 +6689,9 @@ function parseReferenceTarget(text) {
|
|
|
5797
6689
|
if (/if applicable/i.test(normalized)) return void 0;
|
|
5798
6690
|
return void 0;
|
|
5799
6691
|
}
|
|
5800
|
-
var PageLocationSchema =
|
|
5801
|
-
startPage:
|
|
5802
|
-
endPage:
|
|
6692
|
+
var PageLocationSchema = z40.object({
|
|
6693
|
+
startPage: z40.number(),
|
|
6694
|
+
endPage: z40.number()
|
|
5803
6695
|
});
|
|
5804
6696
|
async function findReferencedPages(params) {
|
|
5805
6697
|
const {
|
|
@@ -5811,6 +6703,8 @@ async function findReferencedPages(params) {
|
|
|
5811
6703
|
generateObject,
|
|
5812
6704
|
providerOptions,
|
|
5813
6705
|
trackUsage,
|
|
6706
|
+
modelCapabilities,
|
|
6707
|
+
modelBudgetConstraints,
|
|
5814
6708
|
log
|
|
5815
6709
|
} = params;
|
|
5816
6710
|
const localPageRange = findLocalReferentialPages({
|
|
@@ -5837,6 +6731,12 @@ async function findReferencedPages(params) {
|
|
|
5837
6731
|
return void 0;
|
|
5838
6732
|
}
|
|
5839
6733
|
try {
|
|
6734
|
+
const budget = resolveModelBudget({
|
|
6735
|
+
taskKind: "extraction_referential_lookup",
|
|
6736
|
+
hintTokens: 256,
|
|
6737
|
+
modelCapabilities,
|
|
6738
|
+
constraint: modelBudgetConstraints?.extraction_referential_lookup
|
|
6739
|
+
});
|
|
5840
6740
|
const result = await safeGenerateObject(
|
|
5841
6741
|
generateObject,
|
|
5842
6742
|
{
|
|
@@ -5850,7 +6750,7 @@ If you cannot find the section, return startPage: 0 and endPage: 0.
|
|
|
5850
6750
|
|
|
5851
6751
|
Return JSON only.`,
|
|
5852
6752
|
schema: PageLocationSchema,
|
|
5853
|
-
maxTokens:
|
|
6753
|
+
maxTokens: budget.maxTokens,
|
|
5854
6754
|
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
5855
6755
|
},
|
|
5856
6756
|
{
|
|
@@ -5885,6 +6785,8 @@ async function resolveReferentialCoverages(params) {
|
|
|
5885
6785
|
convertPdfToImages,
|
|
5886
6786
|
concurrency = 2,
|
|
5887
6787
|
providerOptions,
|
|
6788
|
+
modelCapabilities,
|
|
6789
|
+
modelBudgetConstraints,
|
|
5888
6790
|
log,
|
|
5889
6791
|
onProgress
|
|
5890
6792
|
} = params;
|
|
@@ -5947,6 +6849,8 @@ async function resolveReferentialCoverages(params) {
|
|
|
5947
6849
|
generateObject,
|
|
5948
6850
|
providerOptions,
|
|
5949
6851
|
trackUsage,
|
|
6852
|
+
modelCapabilities,
|
|
6853
|
+
modelBudgetConstraints,
|
|
5950
6854
|
log
|
|
5951
6855
|
});
|
|
5952
6856
|
if (!pageRange) {
|
|
@@ -5973,6 +6877,12 @@ async function resolveReferentialCoverages(params) {
|
|
|
5973
6877
|
sectionRef: coverage.sectionRef ? String(coverage.sectionRef) : void 0
|
|
5974
6878
|
}));
|
|
5975
6879
|
try {
|
|
6880
|
+
const budget = resolveModelBudget({
|
|
6881
|
+
taskKind: "extraction_referential_lookup",
|
|
6882
|
+
hintTokens: 4096,
|
|
6883
|
+
modelCapabilities,
|
|
6884
|
+
constraint: modelBudgetConstraints?.extraction_referential_lookup
|
|
6885
|
+
});
|
|
5976
6886
|
const result = await runExtractor({
|
|
5977
6887
|
name: "referential_lookup",
|
|
5978
6888
|
prompt: buildReferentialLookupPrompt(promptCoverages),
|
|
@@ -5982,7 +6892,7 @@ async function resolveReferentialCoverages(params) {
|
|
|
5982
6892
|
endPage: pageRange.endPage,
|
|
5983
6893
|
generateObject,
|
|
5984
6894
|
convertPdfToImages,
|
|
5985
|
-
maxTokens:
|
|
6895
|
+
maxTokens: budget.maxTokens,
|
|
5986
6896
|
providerOptions
|
|
5987
6897
|
});
|
|
5988
6898
|
trackUsage(result.usage);
|
|
@@ -6072,7 +6982,9 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
6072
6982
|
generateObject,
|
|
6073
6983
|
convertPdfToImages,
|
|
6074
6984
|
providerOptions,
|
|
6985
|
+
pageRangeCache,
|
|
6075
6986
|
trackUsage,
|
|
6987
|
+
resolveBudget,
|
|
6076
6988
|
log
|
|
6077
6989
|
} = params;
|
|
6078
6990
|
const ext = getExtractor(task.extractorName);
|
|
@@ -6081,6 +6993,9 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
6081
6993
|
return null;
|
|
6082
6994
|
}
|
|
6083
6995
|
try {
|
|
6996
|
+
const hintTokens = ext.maxTokens ?? 4096;
|
|
6997
|
+
const taskKind = hintTokens >= 8192 ? "extraction_long_list" : "extraction_focused";
|
|
6998
|
+
const budget = resolveBudget(taskKind, hintTokens);
|
|
6084
6999
|
const result = await runExtractor({
|
|
6085
7000
|
name: task.extractorName,
|
|
6086
7001
|
prompt: ext.buildPrompt(),
|
|
@@ -6090,10 +7005,15 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
6090
7005
|
endPage: task.endPage,
|
|
6091
7006
|
generateObject,
|
|
6092
7007
|
convertPdfToImages,
|
|
6093
|
-
maxTokens:
|
|
6094
|
-
providerOptions
|
|
7008
|
+
maxTokens: budget.maxTokens,
|
|
7009
|
+
providerOptions,
|
|
7010
|
+
pageRangeCache
|
|
7011
|
+
});
|
|
7012
|
+
trackUsage(result.usage, {
|
|
7013
|
+
taskKind,
|
|
7014
|
+
label: task.extractorName,
|
|
7015
|
+
maxTokens: budget.maxTokens
|
|
6095
7016
|
});
|
|
6096
|
-
trackUsage(result.usage);
|
|
6097
7017
|
if (!ext.fallback?.isEmpty(result.data)) {
|
|
6098
7018
|
return result;
|
|
6099
7019
|
}
|
|
@@ -6112,6 +7032,9 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
6112
7032
|
`Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
|
|
6113
7033
|
);
|
|
6114
7034
|
try {
|
|
7035
|
+
const hintTokens = fallbackExt.maxTokens ?? 4096;
|
|
7036
|
+
const taskKind = hintTokens >= 8192 ? "extraction_long_list" : "extraction_focused";
|
|
7037
|
+
const budget = resolveBudget(taskKind, hintTokens);
|
|
6115
7038
|
const fallbackResult = await runExtractor({
|
|
6116
7039
|
name: ext.fallback.extractorName,
|
|
6117
7040
|
prompt: fallbackExt.buildPrompt(),
|
|
@@ -6121,10 +7044,15 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
6121
7044
|
endPage: task.endPage,
|
|
6122
7045
|
generateObject,
|
|
6123
7046
|
convertPdfToImages,
|
|
6124
|
-
maxTokens:
|
|
6125
|
-
providerOptions
|
|
7047
|
+
maxTokens: budget.maxTokens,
|
|
7048
|
+
providerOptions,
|
|
7049
|
+
pageRangeCache
|
|
7050
|
+
});
|
|
7051
|
+
trackUsage(fallbackResult.usage, {
|
|
7052
|
+
taskKind,
|
|
7053
|
+
label: ext.fallback.extractorName,
|
|
7054
|
+
maxTokens: budget.maxTokens
|
|
6126
7055
|
});
|
|
6127
|
-
trackUsage(fallbackResult.usage);
|
|
6128
7056
|
const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
|
|
6129
7057
|
return focusedData ? [
|
|
6130
7058
|
fallbackResult,
|
|
@@ -6201,6 +7129,15 @@ function buildExtractionReviewReport(params) {
|
|
|
6201
7129
|
const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
|
|
6202
7130
|
const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
|
|
6203
7131
|
const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
|
|
7132
|
+
if (params.sourceSpansAvailable) {
|
|
7133
|
+
addMissingSourceGroundingIssues(deterministicIssues, "coverage_limits", "coverages", coverages, "name");
|
|
7134
|
+
addMissingSourceGroundingIssues(deterministicIssues, "endorsements", "endorsements", endorsements, "title");
|
|
7135
|
+
addMissingSourceGroundingIssues(deterministicIssues, "exclusions", "exclusions", exclusions, "name");
|
|
7136
|
+
addMissingSourceGroundingIssues(deterministicIssues, "conditions", "conditions", conditions, "name");
|
|
7137
|
+
addMissingSourceGroundingIssues(deterministicIssues, "sections", "sections", sections, "title");
|
|
7138
|
+
addMissingSourceGroundingIssues(deterministicIssues, "definitions", "definitions", definitions, "term");
|
|
7139
|
+
addMissingSourceGroundingIssues(deterministicIssues, "covered_reasons", "coveredReasons", coveredReasons, "name");
|
|
7140
|
+
}
|
|
6204
7141
|
if (mappedDefinitions && definitions.length === 0) {
|
|
6205
7142
|
deterministicIssues.push({
|
|
6206
7143
|
code: "definitions_mapped_but_empty",
|
|
@@ -6514,6 +7451,24 @@ function buildExtractionReviewReport(params) {
|
|
|
6514
7451
|
qualityGateStatus
|
|
6515
7452
|
};
|
|
6516
7453
|
}
|
|
7454
|
+
function addMissingSourceGroundingIssues(issues, extractorName, arrayName, records, labelKey) {
|
|
7455
|
+
for (const record of records) {
|
|
7456
|
+
if (!recordHasContent(record)) continue;
|
|
7457
|
+
if (Array.isArray(record.sourceSpanIds) && record.sourceSpanIds.length > 0) continue;
|
|
7458
|
+
issues.push({
|
|
7459
|
+
code: "record_missing_source_span",
|
|
7460
|
+
severity: "blocking",
|
|
7461
|
+
message: `${extractorName}.${arrayName} record "${String(record[labelKey] ?? record.name ?? record.title ?? "unknown")}" is missing source span grounding.`,
|
|
7462
|
+
extractorName,
|
|
7463
|
+
pageNumber: typeof record.pageNumber === "number" ? record.pageNumber : typeof record.pageStart === "number" ? record.pageStart : void 0,
|
|
7464
|
+
formNumber: typeof record.formNumber === "string" ? record.formNumber : void 0,
|
|
7465
|
+
itemName: typeof record[labelKey] === "string" ? record[labelKey] : void 0
|
|
7466
|
+
});
|
|
7467
|
+
}
|
|
7468
|
+
}
|
|
7469
|
+
function recordHasContent(record) {
|
|
7470
|
+
return ["name", "title", "term", "field", "coverageName", "content", "originalContent", "value", "limit", "deductible", "premium"].some((key) => typeof record[key] === "string" && record[key].trim().length > 0);
|
|
7471
|
+
}
|
|
6517
7472
|
function toReviewRoundRecord(round, review) {
|
|
6518
7473
|
return {
|
|
6519
7474
|
round,
|
|
@@ -6652,6 +7607,104 @@ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory)
|
|
|
6652
7607
|
};
|
|
6653
7608
|
}
|
|
6654
7609
|
|
|
7610
|
+
// src/extraction/source-grounding.ts
|
|
7611
|
+
var ARRAY_PATHS = [
|
|
7612
|
+
{ memoryKey: "coverage_limits", arrayKeys: ["coverages"] },
|
|
7613
|
+
{ memoryKey: "endorsements", arrayKeys: ["endorsements"] },
|
|
7614
|
+
{ memoryKey: "exclusions", arrayKeys: ["exclusions"] },
|
|
7615
|
+
{ memoryKey: "conditions", arrayKeys: ["conditions"] },
|
|
7616
|
+
{ memoryKey: "sections", arrayKeys: ["sections"] },
|
|
7617
|
+
{ memoryKey: "definitions", arrayKeys: ["definitions"] },
|
|
7618
|
+
{ memoryKey: "covered_reasons", arrayKeys: ["coveredReasons", "covered_reasons"] },
|
|
7619
|
+
{ memoryKey: "declarations", arrayKeys: ["fields"] }
|
|
7620
|
+
];
|
|
7621
|
+
function normalize(value) {
|
|
7622
|
+
return value.replace(/\s+/g, " ").trim().toLowerCase();
|
|
7623
|
+
}
|
|
7624
|
+
function textValue(record, ...keys) {
|
|
7625
|
+
for (const key of keys) {
|
|
7626
|
+
const value = record[key];
|
|
7627
|
+
if (typeof value === "string" && value.trim()) return value.trim();
|
|
7628
|
+
}
|
|
7629
|
+
return void 0;
|
|
7630
|
+
}
|
|
7631
|
+
function numberValue(record, ...keys) {
|
|
7632
|
+
for (const key of keys) {
|
|
7633
|
+
const value = record[key];
|
|
7634
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
7635
|
+
}
|
|
7636
|
+
return void 0;
|
|
7637
|
+
}
|
|
7638
|
+
function pageOverlaps(recordStart, recordEnd, span) {
|
|
7639
|
+
if (!recordStart && !recordEnd) return false;
|
|
7640
|
+
const start = recordStart ?? recordEnd;
|
|
7641
|
+
const end = recordEnd ?? recordStart;
|
|
7642
|
+
const spanStart = span.pageStart ?? span.location?.page ?? span.location?.startPage;
|
|
7643
|
+
const spanEnd = span.pageEnd ?? span.location?.page ?? span.location?.endPage ?? spanStart;
|
|
7644
|
+
if (!spanStart) return false;
|
|
7645
|
+
return start <= (spanEnd ?? spanStart) && end >= spanStart;
|
|
7646
|
+
}
|
|
7647
|
+
function formMatches(record, span) {
|
|
7648
|
+
const formNumber = textValue(record, "formNumber");
|
|
7649
|
+
if (!formNumber || !span.formNumber) return false;
|
|
7650
|
+
return normalize(formNumber) === normalize(span.formNumber);
|
|
7651
|
+
}
|
|
7652
|
+
function textMatches(record, span) {
|
|
7653
|
+
const spanText = normalize(span.text);
|
|
7654
|
+
const candidates = [
|
|
7655
|
+
textValue(record, "originalContent", "content", "definition", "value"),
|
|
7656
|
+
textValue(record, "name", "title", "term", "field", "coverageName"),
|
|
7657
|
+
textValue(record, "limit", "deductible", "premium")
|
|
7658
|
+
].filter((value) => !!value && value.length >= 3);
|
|
7659
|
+
return candidates.some((candidate) => spanText.includes(normalize(candidate)));
|
|
7660
|
+
}
|
|
7661
|
+
function sourceHashFor(spans) {
|
|
7662
|
+
return spans.map((span) => span.textHash ?? span.hash).filter(Boolean).join(":") || void 0;
|
|
7663
|
+
}
|
|
7664
|
+
function findSourceSpansForRecord(record, sourceSpans) {
|
|
7665
|
+
if (sourceSpans.length === 0) return [];
|
|
7666
|
+
const pageStart = numberValue(record, "pageNumber", "pageStart");
|
|
7667
|
+
const pageEnd = numberValue(record, "pageNumber", "pageEnd");
|
|
7668
|
+
const scored = sourceSpans.map((span) => {
|
|
7669
|
+
let score = 0;
|
|
7670
|
+
if (pageOverlaps(pageStart, pageEnd, span)) score += 4;
|
|
7671
|
+
if (formMatches(record, span)) score += 3;
|
|
7672
|
+
if (textMatches(record, span)) score += 2;
|
|
7673
|
+
return { span, score };
|
|
7674
|
+
}).filter((item) => item.score >= 2).sort((left, right) => {
|
|
7675
|
+
if (right.score !== left.score) return right.score - left.score;
|
|
7676
|
+
return left.span.id.localeCompare(right.span.id);
|
|
7677
|
+
});
|
|
7678
|
+
return scored.slice(0, 3).map((item) => item.span);
|
|
7679
|
+
}
|
|
7680
|
+
function groundRecord(record, sourceSpans) {
|
|
7681
|
+
if (Array.isArray(record.sourceSpanIds) && record.sourceSpanIds.length > 0 && record.sourceTextHash) {
|
|
7682
|
+
return record;
|
|
7683
|
+
}
|
|
7684
|
+
const matches = findSourceSpansForRecord(record, sourceSpans);
|
|
7685
|
+
if (matches.length === 0) return record;
|
|
7686
|
+
return {
|
|
7687
|
+
...record,
|
|
7688
|
+
sourceSpanIds: Array.isArray(record.sourceSpanIds) && record.sourceSpanIds.length > 0 ? record.sourceSpanIds : matches.map((span) => span.id),
|
|
7689
|
+
sourceTextHash: typeof record.sourceTextHash === "string" && record.sourceTextHash.trim() ? record.sourceTextHash : sourceHashFor(matches)
|
|
7690
|
+
};
|
|
7691
|
+
}
|
|
7692
|
+
function groundExtractionMemoryWithSourceSpans(memory, sourceSpans) {
|
|
7693
|
+
if (sourceSpans.length === 0) return;
|
|
7694
|
+
for (const { memoryKey, arrayKeys } of ARRAY_PATHS) {
|
|
7695
|
+
const payload = memory.get(memoryKey);
|
|
7696
|
+
if (!payload || typeof payload !== "object" || Array.isArray(payload)) continue;
|
|
7697
|
+
const record = payload;
|
|
7698
|
+
for (const arrayKey of arrayKeys) {
|
|
7699
|
+
const items = record[arrayKey];
|
|
7700
|
+
if (!Array.isArray(items)) continue;
|
|
7701
|
+
record[arrayKey] = items.map(
|
|
7702
|
+
(item) => item && typeof item === "object" && !Array.isArray(item) ? groundRecord(item, sourceSpans) : item
|
|
7703
|
+
);
|
|
7704
|
+
}
|
|
7705
|
+
}
|
|
7706
|
+
}
|
|
7707
|
+
|
|
6655
7708
|
// src/extraction/coordinator.ts
|
|
6656
7709
|
function createExtractor(config) {
|
|
6657
7710
|
const {
|
|
@@ -6664,7 +7717,10 @@ function createExtractor(config) {
|
|
|
6664
7717
|
onProgress,
|
|
6665
7718
|
log,
|
|
6666
7719
|
providerOptions,
|
|
7720
|
+
sourceStore,
|
|
6667
7721
|
qualityGate = "warn",
|
|
7722
|
+
modelCapabilities,
|
|
7723
|
+
modelBudgetConstraints,
|
|
6668
7724
|
onCheckpointSave
|
|
6669
7725
|
} = config;
|
|
6670
7726
|
const limit = pLimit(concurrency);
|
|
@@ -6673,7 +7729,20 @@ function createExtractor(config) {
|
|
|
6673
7729
|
let modelCalls = 0;
|
|
6674
7730
|
let callsWithUsage = 0;
|
|
6675
7731
|
let callsMissingUsage = 0;
|
|
6676
|
-
|
|
7732
|
+
let performanceReport = {
|
|
7733
|
+
modelCalls: [],
|
|
7734
|
+
totalModelCallDurationMs: 0
|
|
7735
|
+
};
|
|
7736
|
+
let activeProviderOptions = providerOptions;
|
|
7737
|
+
function resolveBudget(taskKind, hintTokens) {
|
|
7738
|
+
return resolveModelBudget({
|
|
7739
|
+
taskKind,
|
|
7740
|
+
hintTokens,
|
|
7741
|
+
modelCapabilities,
|
|
7742
|
+
constraint: modelBudgetConstraints?.[taskKind]
|
|
7743
|
+
});
|
|
7744
|
+
}
|
|
7745
|
+
function trackUsage(usage, report) {
|
|
6677
7746
|
modelCalls += 1;
|
|
6678
7747
|
if (usage) {
|
|
6679
7748
|
callsWithUsage += 1;
|
|
@@ -6683,6 +7752,16 @@ function createExtractor(config) {
|
|
|
6683
7752
|
} else {
|
|
6684
7753
|
callsMissingUsage += 1;
|
|
6685
7754
|
}
|
|
7755
|
+
if (report) {
|
|
7756
|
+
performanceReport.modelCalls.push({
|
|
7757
|
+
...report,
|
|
7758
|
+
usage,
|
|
7759
|
+
usageReported: !!usage
|
|
7760
|
+
});
|
|
7761
|
+
if (report.durationMs) {
|
|
7762
|
+
performanceReport.totalModelCallDurationMs += report.durationMs;
|
|
7763
|
+
}
|
|
7764
|
+
}
|
|
6686
7765
|
}
|
|
6687
7766
|
function mergeMemoryResult(name, data, memory) {
|
|
6688
7767
|
const existing = memory.get(name);
|
|
@@ -6765,9 +7844,10 @@ function createExtractor(config) {
|
|
|
6765
7844
|
}
|
|
6766
7845
|
return lines.length > 0 ? lines.join("\n") : "";
|
|
6767
7846
|
}
|
|
6768
|
-
async function runFocusedExtractorTask(task, pdfInput, memory) {
|
|
7847
|
+
async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache) {
|
|
6769
7848
|
if (task.extractorName === "supplementary") {
|
|
6770
7849
|
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
7850
|
+
const budget = resolveBudget("extraction_focused", 4096);
|
|
6771
7851
|
const result = await runExtractor({
|
|
6772
7852
|
name: "supplementary",
|
|
6773
7853
|
prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
|
|
@@ -6777,10 +7857,15 @@ function createExtractor(config) {
|
|
|
6777
7857
|
endPage: task.endPage,
|
|
6778
7858
|
generateObject,
|
|
6779
7859
|
convertPdfToImages,
|
|
6780
|
-
maxTokens:
|
|
6781
|
-
providerOptions
|
|
7860
|
+
maxTokens: budget.maxTokens,
|
|
7861
|
+
providerOptions: activeProviderOptions,
|
|
7862
|
+
pageRangeCache
|
|
7863
|
+
});
|
|
7864
|
+
trackUsage(result.usage, {
|
|
7865
|
+
taskKind: "extraction_focused",
|
|
7866
|
+
label: "supplementary",
|
|
7867
|
+
maxTokens: budget.maxTokens
|
|
6782
7868
|
});
|
|
6783
|
-
trackUsage(result.usage);
|
|
6784
7869
|
return result;
|
|
6785
7870
|
}
|
|
6786
7871
|
return runFocusedExtractorWithFallback({
|
|
@@ -6788,8 +7873,10 @@ function createExtractor(config) {
|
|
|
6788
7873
|
pdfInput,
|
|
6789
7874
|
generateObject,
|
|
6790
7875
|
convertPdfToImages,
|
|
6791
|
-
providerOptions,
|
|
7876
|
+
providerOptions: activeProviderOptions,
|
|
7877
|
+
pageRangeCache,
|
|
6792
7878
|
trackUsage,
|
|
7879
|
+
resolveBudget,
|
|
6793
7880
|
log
|
|
6794
7881
|
});
|
|
6795
7882
|
}
|
|
@@ -6810,6 +7897,19 @@ function createExtractor(config) {
|
|
|
6810
7897
|
modelCalls = 0;
|
|
6811
7898
|
callsWithUsage = 0;
|
|
6812
7899
|
callsMissingUsage = 0;
|
|
7900
|
+
performanceReport = {
|
|
7901
|
+
modelCalls: [],
|
|
7902
|
+
totalModelCallDurationMs: 0
|
|
7903
|
+
};
|
|
7904
|
+
const sourceSpans = options?.sourceSpans ?? [];
|
|
7905
|
+
const sourceChunks = sourceSpans.length ? chunkSourceSpans(sourceSpans) : [];
|
|
7906
|
+
activeProviderOptions = sourceSpans.length ? { ...providerOptions, sourceSpans, sourceChunks } : providerOptions;
|
|
7907
|
+
if (sourceStore && sourceSpans.length > 0) {
|
|
7908
|
+
await sourceStore.addSourceSpans(sourceSpans);
|
|
7909
|
+
if (sourceChunks.length > 0) {
|
|
7910
|
+
await sourceStore.addSourceChunks(sourceChunks);
|
|
7911
|
+
}
|
|
7912
|
+
}
|
|
6813
7913
|
const pipelineCtx = createPipelineContext({
|
|
6814
7914
|
id,
|
|
6815
7915
|
onSave: onCheckpointSave,
|
|
@@ -6823,12 +7923,21 @@ function createExtractor(config) {
|
|
|
6823
7923
|
}
|
|
6824
7924
|
}
|
|
6825
7925
|
let pdfBase64Cache;
|
|
7926
|
+
const pageRangePdfCache = /* @__PURE__ */ new Map();
|
|
6826
7927
|
async function getPdfBase64ForExtraction() {
|
|
6827
7928
|
if (pdfBase64Cache === void 0) {
|
|
6828
7929
|
pdfBase64Cache = await pdfInputToBase64(pdfInput);
|
|
6829
7930
|
}
|
|
6830
7931
|
return pdfBase64Cache;
|
|
6831
7932
|
}
|
|
7933
|
+
async function getPageRangePdf(startPage, endPage) {
|
|
7934
|
+
const cacheKey = `${startPage}-${endPage}`;
|
|
7935
|
+
const cached = pageRangePdfCache.get(cacheKey);
|
|
7936
|
+
if (cached) return cached;
|
|
7937
|
+
const pagesPdf = await extractPageRange(await getPdfBase64ForExtraction(), startPage, endPage);
|
|
7938
|
+
pageRangePdfCache.set(cacheKey, pagesPdf);
|
|
7939
|
+
return pagesPdf;
|
|
7940
|
+
}
|
|
6832
7941
|
let classifyResult;
|
|
6833
7942
|
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
6834
7943
|
classifyResult = resumed.classifyResult;
|
|
@@ -6836,13 +7945,14 @@ function createExtractor(config) {
|
|
|
6836
7945
|
} else {
|
|
6837
7946
|
onProgress?.("Classifying document...");
|
|
6838
7947
|
const pageCount2 = await getPdfPageCount(pdfInput);
|
|
7948
|
+
const budget = resolveBudget("extraction_classify", 512);
|
|
6839
7949
|
const classifyResponse = await safeGenerateObject(
|
|
6840
7950
|
generateObject,
|
|
6841
7951
|
{
|
|
6842
7952
|
prompt: buildClassifyPrompt(),
|
|
6843
7953
|
schema: ClassifyResultSchema,
|
|
6844
|
-
maxTokens:
|
|
6845
|
-
providerOptions: await buildPdfProviderOptions(pdfInput,
|
|
7954
|
+
maxTokens: budget.maxTokens,
|
|
7955
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, activeProviderOptions)
|
|
6846
7956
|
},
|
|
6847
7957
|
{
|
|
6848
7958
|
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
@@ -6851,7 +7961,11 @@ function createExtractor(config) {
|
|
|
6851
7961
|
onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
6852
7962
|
}
|
|
6853
7963
|
);
|
|
6854
|
-
trackUsage(classifyResponse.usage
|
|
7964
|
+
trackUsage(classifyResponse.usage, {
|
|
7965
|
+
taskKind: "extraction_classify",
|
|
7966
|
+
label: "classify",
|
|
7967
|
+
maxTokens: budget.maxTokens
|
|
7968
|
+
});
|
|
6855
7969
|
classifyResult = classifyResponse.object;
|
|
6856
7970
|
if (classifyResult.confidence === 0) {
|
|
6857
7971
|
await log?.(`WARNING: classify returned fallback (policyTypes: ["other"]). This usually means the generateObject callback failed \u2014 check that the document content is accessible to the model.`);
|
|
@@ -6864,7 +7978,8 @@ function createExtractor(config) {
|
|
|
6864
7978
|
memory: Object.fromEntries(memory)
|
|
6865
7979
|
});
|
|
6866
7980
|
}
|
|
6867
|
-
const
|
|
7981
|
+
const documentType = classifyResult.documentType;
|
|
7982
|
+
const policyTypes = classifyResult.policyTypes ?? [];
|
|
6868
7983
|
const primaryType = policyTypes[0] ?? "other";
|
|
6869
7984
|
const template = getTemplate(primaryType);
|
|
6870
7985
|
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfInput);
|
|
@@ -6876,13 +7991,14 @@ function createExtractor(config) {
|
|
|
6876
7991
|
onProgress?.("Resuming from checkpoint (form inventory complete)...");
|
|
6877
7992
|
} else {
|
|
6878
7993
|
onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
|
|
7994
|
+
const budget = resolveBudget("extraction_form_inventory", 2048);
|
|
6879
7995
|
const formInventoryResponse = await safeGenerateObject(
|
|
6880
7996
|
generateObject,
|
|
6881
7997
|
{
|
|
6882
7998
|
prompt: buildFormInventoryPrompt(templateHints),
|
|
6883
7999
|
schema: FormInventorySchema,
|
|
6884
|
-
maxTokens:
|
|
6885
|
-
providerOptions: await buildPdfProviderOptions(pdfInput,
|
|
8000
|
+
maxTokens: budget.maxTokens,
|
|
8001
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, activeProviderOptions)
|
|
6886
8002
|
},
|
|
6887
8003
|
{
|
|
6888
8004
|
fallback: { forms: [] },
|
|
@@ -6890,7 +8006,11 @@ function createExtractor(config) {
|
|
|
6890
8006
|
onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
6891
8007
|
}
|
|
6892
8008
|
);
|
|
6893
|
-
trackUsage(formInventoryResponse.usage
|
|
8009
|
+
trackUsage(formInventoryResponse.usage, {
|
|
8010
|
+
taskKind: "extraction_form_inventory",
|
|
8011
|
+
label: "form_inventory",
|
|
8012
|
+
maxTokens: budget.maxTokens
|
|
8013
|
+
});
|
|
6894
8014
|
formInventory = formInventoryResponse.object;
|
|
6895
8015
|
memory.set("form_inventory", formInventory);
|
|
6896
8016
|
await pipelineCtx.save("form_inventory", {
|
|
@@ -6909,39 +8029,54 @@ function createExtractor(config) {
|
|
|
6909
8029
|
onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
|
|
6910
8030
|
const chunkSize = 8;
|
|
6911
8031
|
const collectedAssignments = [];
|
|
6912
|
-
const formInventoryHint = formInventory?.forms
|
|
6913
|
-
const
|
|
6914
|
-
|
|
6915
|
-
|
|
6916
|
-
|
|
6917
|
-
|
|
6918
|
-
generateObject,
|
|
6919
|
-
{
|
|
6920
|
-
prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
|
|
6921
|
-
schema: PageMapChunkSchema,
|
|
6922
|
-
maxTokens: 2048,
|
|
6923
|
-
providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
|
|
6924
|
-
},
|
|
6925
|
-
{
|
|
6926
|
-
fallback: {
|
|
6927
|
-
pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
|
|
6928
|
-
localPageNumber: index + 1,
|
|
6929
|
-
extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
|
|
6930
|
-
confidence: 0,
|
|
6931
|
-
notes: "Fallback page assignment"
|
|
6932
|
-
}))
|
|
6933
|
-
},
|
|
6934
|
-
log,
|
|
6935
|
-
onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
|
|
6936
|
-
}
|
|
6937
|
-
);
|
|
6938
|
-
trackUsage(mapResponse.usage);
|
|
6939
|
-
for (const assignment of mapResponse.object.pages) {
|
|
6940
|
-
collectedAssignments.push({
|
|
6941
|
-
...assignment,
|
|
6942
|
-
localPageNumber: startPage + assignment.localPageNumber - 1
|
|
6943
|
-
});
|
|
8032
|
+
const formInventoryHint = formInventory?.forms?.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
|
|
8033
|
+
const pageMapChunks = Array.from(
|
|
8034
|
+
{ length: Math.ceil(pageCount / chunkSize) },
|
|
8035
|
+
(_, index) => {
|
|
8036
|
+
const startPage = index * chunkSize + 1;
|
|
8037
|
+
return { startPage, endPage: Math.min(pageCount, startPage + chunkSize - 1) };
|
|
6944
8038
|
}
|
|
8039
|
+
);
|
|
8040
|
+
const pageMapResults = await Promise.all(
|
|
8041
|
+
pageMapChunks.map(
|
|
8042
|
+
({ startPage, endPage }) => limit(async () => {
|
|
8043
|
+
const pagesPdf = await getPageRangePdf(startPage, endPage);
|
|
8044
|
+
const budget = resolveBudget("extraction_page_map", 2048);
|
|
8045
|
+
const mapResponse = await safeGenerateObject(
|
|
8046
|
+
generateObject,
|
|
8047
|
+
{
|
|
8048
|
+
prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
|
|
8049
|
+
schema: PageMapChunkSchema,
|
|
8050
|
+
maxTokens: budget.maxTokens,
|
|
8051
|
+
providerOptions: { ...activeProviderOptions, pdfBase64: pagesPdf }
|
|
8052
|
+
},
|
|
8053
|
+
{
|
|
8054
|
+
fallback: {
|
|
8055
|
+
pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
|
|
8056
|
+
localPageNumber: index + 1,
|
|
8057
|
+
extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
|
|
8058
|
+
confidence: 0,
|
|
8059
|
+
notes: "Fallback page assignment"
|
|
8060
|
+
}))
|
|
8061
|
+
},
|
|
8062
|
+
log,
|
|
8063
|
+
onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
|
|
8064
|
+
}
|
|
8065
|
+
);
|
|
8066
|
+
trackUsage(mapResponse.usage, {
|
|
8067
|
+
taskKind: "extraction_page_map",
|
|
8068
|
+
label: `page_map:${startPage}-${endPage}`,
|
|
8069
|
+
maxTokens: budget.maxTokens
|
|
8070
|
+
});
|
|
8071
|
+
return mapResponse.object.pages.map((assignment) => ({
|
|
8072
|
+
...assignment,
|
|
8073
|
+
localPageNumber: startPage + assignment.localPageNumber - 1
|
|
8074
|
+
}));
|
|
8075
|
+
})
|
|
8076
|
+
)
|
|
8077
|
+
);
|
|
8078
|
+
for (const assignments of pageMapResults) {
|
|
8079
|
+
collectedAssignments.push(...assignments);
|
|
6945
8080
|
}
|
|
6946
8081
|
pageAssignments = collectedAssignments.length > 0 ? collectedAssignments : Array.from({ length: pageCount }, (_, index) => ({
|
|
6947
8082
|
localPageNumber: index + 1,
|
|
@@ -6979,11 +8114,12 @@ function createExtractor(config) {
|
|
|
6979
8114
|
if (!pipelineCtx.isPhaseComplete("extract")) {
|
|
6980
8115
|
const tasks = plan.tasks;
|
|
6981
8116
|
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
8117
|
+
const extractionPdfInput = await getPdfBase64ForExtraction();
|
|
6982
8118
|
const extractorResults = await Promise.all(
|
|
6983
8119
|
tasks.map(
|
|
6984
8120
|
(task) => limit(async () => {
|
|
6985
8121
|
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
6986
|
-
return runFocusedExtractorTask(task,
|
|
8122
|
+
return runFocusedExtractorTask(task, extractionPdfInput, memory, pageRangePdfCache);
|
|
6987
8123
|
})
|
|
6988
8124
|
)
|
|
6989
8125
|
);
|
|
@@ -6997,6 +8133,7 @@ function createExtractor(config) {
|
|
|
6997
8133
|
onProgress?.("Extracting supplementary retrieval facts...");
|
|
6998
8134
|
try {
|
|
6999
8135
|
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
8136
|
+
const budget = resolveBudget("extraction_focused", 4096);
|
|
7000
8137
|
const supplementaryResult = await runExtractor({
|
|
7001
8138
|
name: "supplementary",
|
|
7002
8139
|
prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
|
|
@@ -7006,10 +8143,15 @@ function createExtractor(config) {
|
|
|
7006
8143
|
endPage: pageCount,
|
|
7007
8144
|
generateObject,
|
|
7008
8145
|
convertPdfToImages,
|
|
7009
|
-
maxTokens:
|
|
7010
|
-
providerOptions
|
|
8146
|
+
maxTokens: budget.maxTokens,
|
|
8147
|
+
providerOptions: activeProviderOptions,
|
|
8148
|
+
pageRangeCache: pageRangePdfCache
|
|
8149
|
+
});
|
|
8150
|
+
trackUsage(supplementaryResult.usage, {
|
|
8151
|
+
taskKind: "extraction_focused",
|
|
8152
|
+
label: "supplementary",
|
|
8153
|
+
maxTokens: budget.maxTokens
|
|
7011
8154
|
});
|
|
7012
|
-
trackUsage(supplementaryResult.usage);
|
|
7013
8155
|
mergeMemoryResult(supplementaryResult.name, supplementaryResult.data, memory);
|
|
7014
8156
|
} catch (error) {
|
|
7015
8157
|
await log?.(`Supplementary extractor failed: ${error}`);
|
|
@@ -7035,11 +8177,16 @@ function createExtractor(config) {
|
|
|
7035
8177
|
generateObject,
|
|
7036
8178
|
convertPdfToImages,
|
|
7037
8179
|
concurrency,
|
|
7038
|
-
providerOptions,
|
|
8180
|
+
providerOptions: activeProviderOptions,
|
|
8181
|
+
modelCapabilities,
|
|
8182
|
+
modelBudgetConstraints,
|
|
7039
8183
|
log,
|
|
7040
8184
|
onProgress
|
|
7041
8185
|
});
|
|
7042
|
-
trackUsage(resolution.usage
|
|
8186
|
+
trackUsage(resolution.usage, {
|
|
8187
|
+
taskKind: "extraction_referential_lookup",
|
|
8188
|
+
label: "referential_resolution"
|
|
8189
|
+
});
|
|
7043
8190
|
if (resolution.attempts > 0) {
|
|
7044
8191
|
await log?.(`Referential resolution: ${resolution.resolved}/${resolution.attempts} resolved, ${resolution.unresolved} unresolved`);
|
|
7045
8192
|
}
|
|
@@ -7064,13 +8211,14 @@ function createExtractor(config) {
|
|
|
7064
8211
|
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
7065
8212
|
const extractionSummary = summarizeExtraction(memory);
|
|
7066
8213
|
const pageMapSummary = formatPageMapSummary(pageAssignments);
|
|
8214
|
+
const budget = resolveBudget("extraction_review", 1536);
|
|
7067
8215
|
const reviewResponse = await safeGenerateObject(
|
|
7068
8216
|
generateObject,
|
|
7069
8217
|
{
|
|
7070
8218
|
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
|
|
7071
8219
|
schema: ReviewResultSchema,
|
|
7072
|
-
maxTokens:
|
|
7073
|
-
providerOptions: await buildPdfProviderOptions(pdfInput,
|
|
8220
|
+
maxTokens: budget.maxTokens,
|
|
8221
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, activeProviderOptions)
|
|
7074
8222
|
},
|
|
7075
8223
|
{
|
|
7076
8224
|
fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
|
|
@@ -7078,7 +8226,11 @@ function createExtractor(config) {
|
|
|
7078
8226
|
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
7079
8227
|
}
|
|
7080
8228
|
);
|
|
7081
|
-
trackUsage(reviewResponse.usage
|
|
8229
|
+
trackUsage(reviewResponse.usage, {
|
|
8230
|
+
taskKind: "extraction_review",
|
|
8231
|
+
label: `review:${round + 1}`,
|
|
8232
|
+
maxTokens: budget.maxTokens
|
|
8233
|
+
});
|
|
7082
8234
|
reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
|
|
7083
8235
|
if (reviewResponse.object.qualityIssues?.length) {
|
|
7084
8236
|
await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
|
|
@@ -7088,10 +8240,11 @@ function createExtractor(config) {
|
|
|
7088
8240
|
break;
|
|
7089
8241
|
}
|
|
7090
8242
|
onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
|
|
8243
|
+
const extractionPdfInput = await getPdfBase64ForExtraction();
|
|
7091
8244
|
const followUpResults = await Promise.all(
|
|
7092
8245
|
reviewResponse.object.additionalTasks.map(
|
|
7093
8246
|
(task) => limit(async () => {
|
|
7094
|
-
return runFocusedExtractorTask(task,
|
|
8247
|
+
return runFocusedExtractorTask(task, extractionPdfInput, memory, pageRangePdfCache);
|
|
7095
8248
|
})
|
|
7096
8249
|
)
|
|
7097
8250
|
);
|
|
@@ -7101,10 +8254,12 @@ function createExtractor(config) {
|
|
|
7101
8254
|
}
|
|
7102
8255
|
}
|
|
7103
8256
|
}
|
|
8257
|
+
groundExtractionMemoryWithSourceSpans(memory, sourceSpans);
|
|
7104
8258
|
reviewReport = buildExtractionReviewReport({
|
|
7105
8259
|
memory,
|
|
7106
8260
|
pageAssignments,
|
|
7107
|
-
reviewRounds
|
|
8261
|
+
reviewRounds,
|
|
8262
|
+
sourceSpansAvailable: sourceSpans.length > 0
|
|
7108
8263
|
});
|
|
7109
8264
|
if (reviewReport.issues.length > 0) {
|
|
7110
8265
|
await log?.(
|
|
@@ -7125,10 +8280,12 @@ function createExtractor(config) {
|
|
|
7125
8280
|
memory: Object.fromEntries(memory)
|
|
7126
8281
|
});
|
|
7127
8282
|
}
|
|
8283
|
+
groundExtractionMemoryWithSourceSpans(memory, sourceSpans);
|
|
7128
8284
|
reviewReport ?? (reviewReport = buildExtractionReviewReport({
|
|
7129
8285
|
memory,
|
|
7130
8286
|
pageAssignments,
|
|
7131
|
-
reviewRounds
|
|
8287
|
+
reviewRounds,
|
|
8288
|
+
sourceSpansAvailable: sourceSpans.length > 0
|
|
7132
8289
|
}));
|
|
7133
8290
|
onProgress?.("Assembling document...");
|
|
7134
8291
|
const document = assembleDocument(id, documentType, memory);
|
|
@@ -7146,13 +8303,14 @@ function createExtractor(config) {
|
|
|
7146
8303
|
if (!document.summary) {
|
|
7147
8304
|
onProgress?.("Generating document summary...");
|
|
7148
8305
|
try {
|
|
8306
|
+
const budget = resolveBudget("extraction_summary", 512);
|
|
7149
8307
|
const summaryResponse = await safeGenerateObject(
|
|
7150
8308
|
generateObject,
|
|
7151
8309
|
{
|
|
7152
8310
|
prompt: buildSummaryPrompt(document),
|
|
7153
8311
|
schema: SummaryResultSchema,
|
|
7154
|
-
maxTokens:
|
|
7155
|
-
providerOptions
|
|
8312
|
+
maxTokens: budget.maxTokens,
|
|
8313
|
+
providerOptions: activeProviderOptions
|
|
7156
8314
|
},
|
|
7157
8315
|
{
|
|
7158
8316
|
fallback: { summary: "" },
|
|
@@ -7160,7 +8318,11 @@ function createExtractor(config) {
|
|
|
7160
8318
|
onError: (err, attempt) => log?.(`Summary attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
7161
8319
|
}
|
|
7162
8320
|
);
|
|
7163
|
-
trackUsage(summaryResponse.usage
|
|
8321
|
+
trackUsage(summaryResponse.usage, {
|
|
8322
|
+
taskKind: "extraction_summary",
|
|
8323
|
+
label: "summary",
|
|
8324
|
+
maxTokens: budget.maxTokens
|
|
8325
|
+
});
|
|
7164
8326
|
if (summaryResponse.object.summary) {
|
|
7165
8327
|
document.summary = summaryResponse.object.summary;
|
|
7166
8328
|
}
|
|
@@ -7169,12 +8331,18 @@ function createExtractor(config) {
|
|
|
7169
8331
|
}
|
|
7170
8332
|
}
|
|
7171
8333
|
onProgress?.("Formatting extracted content...");
|
|
8334
|
+
const formatBudget = resolveBudget("extraction_format", 16384);
|
|
7172
8335
|
const formatResult = await formatDocumentContent(document, generateText, {
|
|
7173
|
-
providerOptions,
|
|
8336
|
+
providerOptions: activeProviderOptions,
|
|
8337
|
+
maxTokens: formatBudget.maxTokens,
|
|
7174
8338
|
onProgress,
|
|
7175
8339
|
log
|
|
7176
8340
|
});
|
|
7177
|
-
trackUsage(formatResult.usage
|
|
8341
|
+
trackUsage(formatResult.usage, {
|
|
8342
|
+
taskKind: "extraction_format",
|
|
8343
|
+
label: "format",
|
|
8344
|
+
maxTokens: formatBudget.maxTokens
|
|
8345
|
+
});
|
|
7178
8346
|
const chunks = chunkDocument(formatResult.document);
|
|
7179
8347
|
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
7180
8348
|
if (callsMissingUsage > 0) {
|
|
@@ -7184,12 +8352,15 @@ function createExtractor(config) {
|
|
|
7184
8352
|
return {
|
|
7185
8353
|
document: formatResult.document,
|
|
7186
8354
|
chunks,
|
|
8355
|
+
sourceSpans,
|
|
8356
|
+
sourceChunks,
|
|
7187
8357
|
tokenUsage: totalUsage,
|
|
7188
8358
|
usageReporting: {
|
|
7189
8359
|
modelCalls,
|
|
7190
8360
|
callsWithUsage,
|
|
7191
8361
|
callsMissingUsage
|
|
7192
8362
|
},
|
|
8363
|
+
performanceReport,
|
|
7193
8364
|
checkpoint: finalCheckpoint,
|
|
7194
8365
|
reviewReport
|
|
7195
8366
|
};
|
|
@@ -7411,8 +8582,8 @@ Respond with JSON only:
|
|
|
7411
8582
|
}`;
|
|
7412
8583
|
|
|
7413
8584
|
// src/schemas/application.ts
|
|
7414
|
-
import { z as
|
|
7415
|
-
var FieldTypeSchema =
|
|
8585
|
+
import { z as z41 } from "zod";
|
|
8586
|
+
var FieldTypeSchema = z41.enum([
|
|
7416
8587
|
"text",
|
|
7417
8588
|
"numeric",
|
|
7418
8589
|
"currency",
|
|
@@ -7421,144 +8592,153 @@ var FieldTypeSchema = z38.enum([
|
|
|
7421
8592
|
"table",
|
|
7422
8593
|
"declaration"
|
|
7423
8594
|
]);
|
|
7424
|
-
var ApplicationFieldSchema =
|
|
7425
|
-
id:
|
|
7426
|
-
label:
|
|
7427
|
-
section:
|
|
8595
|
+
var ApplicationFieldSchema = z41.object({
|
|
8596
|
+
id: z41.string(),
|
|
8597
|
+
label: z41.string(),
|
|
8598
|
+
section: z41.string(),
|
|
7428
8599
|
fieldType: FieldTypeSchema,
|
|
7429
|
-
required:
|
|
7430
|
-
options:
|
|
7431
|
-
columns:
|
|
7432
|
-
requiresExplanationIfYes:
|
|
7433
|
-
condition:
|
|
7434
|
-
dependsOn:
|
|
7435
|
-
whenValue:
|
|
8600
|
+
required: z41.boolean(),
|
|
8601
|
+
options: z41.array(z41.string()).optional(),
|
|
8602
|
+
columns: z41.array(z41.string()).optional(),
|
|
8603
|
+
requiresExplanationIfYes: z41.boolean().optional(),
|
|
8604
|
+
condition: z41.object({
|
|
8605
|
+
dependsOn: z41.string(),
|
|
8606
|
+
whenValue: z41.string()
|
|
7436
8607
|
}).optional(),
|
|
7437
|
-
value:
|
|
7438
|
-
source:
|
|
7439
|
-
confidence:
|
|
7440
|
-
|
|
7441
|
-
|
|
7442
|
-
|
|
7443
|
-
|
|
7444
|
-
|
|
7445
|
-
|
|
7446
|
-
|
|
7447
|
-
|
|
7448
|
-
|
|
7449
|
-
|
|
7450
|
-
|
|
7451
|
-
|
|
7452
|
-
|
|
7453
|
-
|
|
7454
|
-
});
|
|
7455
|
-
var
|
|
7456
|
-
|
|
7457
|
-
|
|
7458
|
-
|
|
7459
|
-
|
|
7460
|
-
});
|
|
7461
|
-
var
|
|
7462
|
-
|
|
7463
|
-
|
|
7464
|
-
|
|
7465
|
-
|
|
7466
|
-
});
|
|
7467
|
-
var
|
|
7468
|
-
|
|
7469
|
-
|
|
7470
|
-
|
|
7471
|
-
|
|
7472
|
-
|
|
7473
|
-
|
|
7474
|
-
|
|
7475
|
-
|
|
7476
|
-
|
|
7477
|
-
|
|
7478
|
-
|
|
7479
|
-
|
|
7480
|
-
|
|
7481
|
-
|
|
7482
|
-
|
|
7483
|
-
|
|
7484
|
-
|
|
7485
|
-
|
|
7486
|
-
|
|
7487
|
-
|
|
7488
|
-
|
|
7489
|
-
|
|
7490
|
-
|
|
7491
|
-
|
|
7492
|
-
|
|
7493
|
-
|
|
7494
|
-
|
|
7495
|
-
|
|
7496
|
-
|
|
7497
|
-
|
|
7498
|
-
|
|
7499
|
-
|
|
7500
|
-
|
|
7501
|
-
|
|
7502
|
-
|
|
7503
|
-
|
|
7504
|
-
|
|
7505
|
-
|
|
7506
|
-
|
|
7507
|
-
|
|
7508
|
-
|
|
7509
|
-
var
|
|
7510
|
-
|
|
8608
|
+
value: z41.string().optional(),
|
|
8609
|
+
source: z41.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
8610
|
+
confidence: z41.enum(["confirmed", "high", "medium", "low"]).optional(),
|
|
8611
|
+
sourceSpanIds: z41.array(z41.string()).optional().describe("Stable source spans that support the field value or field anchor"),
|
|
8612
|
+
userSourceSpanIds: z41.array(z41.string()).optional().describe("Message or attachment spans that support user-provided values"),
|
|
8613
|
+
pageNumber: z41.number().int().positive().optional().describe("Application page where the field label or anchor appears"),
|
|
8614
|
+
fieldAnchorId: z41.string().optional().describe("Stable field anchor ID derived from page, section, label, and form metadata"),
|
|
8615
|
+
acroFormName: z41.string().optional().describe("Native PDF AcroForm field name when available"),
|
|
8616
|
+
validationStatus: z41.enum(["valid", "needs_review", "unsupported", "missing"]).optional()
|
|
8617
|
+
});
|
|
8618
|
+
var ApplicationClassifyResultSchema = z41.object({
|
|
8619
|
+
isApplication: z41.boolean(),
|
|
8620
|
+
confidence: z41.number().min(0).max(1),
|
|
8621
|
+
applicationType: z41.string().nullable()
|
|
8622
|
+
});
|
|
8623
|
+
var FieldExtractionResultSchema = z41.object({
|
|
8624
|
+
fields: z41.array(ApplicationFieldSchema)
|
|
8625
|
+
});
|
|
8626
|
+
var AutoFillMatchSchema = z41.object({
|
|
8627
|
+
fieldId: z41.string(),
|
|
8628
|
+
value: z41.string(),
|
|
8629
|
+
confidence: z41.enum(["confirmed"]),
|
|
8630
|
+
contextKey: z41.string()
|
|
8631
|
+
});
|
|
8632
|
+
var AutoFillResultSchema = z41.object({
|
|
8633
|
+
matches: z41.array(AutoFillMatchSchema)
|
|
8634
|
+
});
|
|
8635
|
+
var QuestionBatchResultSchema = z41.object({
|
|
8636
|
+
batches: z41.array(z41.array(z41.string()).describe("Array of field IDs in this batch"))
|
|
8637
|
+
});
|
|
8638
|
+
var LookupRequestSchema = z41.object({
|
|
8639
|
+
type: z41.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
8640
|
+
description: z41.string(),
|
|
8641
|
+
url: z41.string().optional(),
|
|
8642
|
+
targetFieldIds: z41.array(z41.string())
|
|
8643
|
+
});
|
|
8644
|
+
var ReplyIntentSchema = z41.object({
|
|
8645
|
+
primaryIntent: z41.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
8646
|
+
hasAnswers: z41.boolean(),
|
|
8647
|
+
questionText: z41.string().optional(),
|
|
8648
|
+
questionFieldIds: z41.array(z41.string()).optional(),
|
|
8649
|
+
lookupRequests: z41.array(LookupRequestSchema).optional()
|
|
8650
|
+
});
|
|
8651
|
+
var ParsedAnswerSchema = z41.object({
|
|
8652
|
+
fieldId: z41.string(),
|
|
8653
|
+
value: z41.string(),
|
|
8654
|
+
explanation: z41.string().optional()
|
|
8655
|
+
});
|
|
8656
|
+
var AnswerParsingResultSchema = z41.object({
|
|
8657
|
+
answers: z41.array(ParsedAnswerSchema),
|
|
8658
|
+
unanswered: z41.array(z41.string()).describe("Field IDs that were not answered")
|
|
8659
|
+
});
|
|
8660
|
+
var LookupFillSchema = z41.object({
|
|
8661
|
+
fieldId: z41.string(),
|
|
8662
|
+
value: z41.string(),
|
|
8663
|
+
source: z41.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'"),
|
|
8664
|
+
sourceSpanIds: z41.array(z41.string()).optional()
|
|
8665
|
+
});
|
|
8666
|
+
var LookupFillResultSchema = z41.object({
|
|
8667
|
+
fills: z41.array(LookupFillSchema),
|
|
8668
|
+
unfillable: z41.array(z41.string()),
|
|
8669
|
+
explanation: z41.string().optional()
|
|
8670
|
+
});
|
|
8671
|
+
var FlatPdfPlacementSchema = z41.object({
|
|
8672
|
+
fieldId: z41.string(),
|
|
8673
|
+
page: z41.number(),
|
|
8674
|
+
x: z41.number().describe("Percentage from left edge (0-100)"),
|
|
8675
|
+
y: z41.number().describe("Percentage from top edge (0-100)"),
|
|
8676
|
+
text: z41.string(),
|
|
8677
|
+
fontSize: z41.number().optional(),
|
|
8678
|
+
isCheckmark: z41.boolean().optional()
|
|
8679
|
+
});
|
|
8680
|
+
var AcroFormMappingSchema = z41.object({
|
|
8681
|
+
fieldId: z41.string(),
|
|
8682
|
+
acroFormName: z41.string(),
|
|
8683
|
+
value: z41.string()
|
|
8684
|
+
});
|
|
8685
|
+
var QualityGateStatusSchema = z41.enum(["passed", "warning", "failed"]);
|
|
8686
|
+
var QualitySeveritySchema = z41.enum(["info", "warning", "blocking"]);
|
|
8687
|
+
var ApplicationQualityIssueSchema = z41.object({
|
|
8688
|
+
code: z41.string(),
|
|
7511
8689
|
severity: QualitySeveritySchema,
|
|
7512
|
-
message:
|
|
7513
|
-
fieldId:
|
|
8690
|
+
message: z41.string(),
|
|
8691
|
+
fieldId: z41.string().optional()
|
|
7514
8692
|
});
|
|
7515
|
-
var ApplicationQualityRoundSchema =
|
|
7516
|
-
round:
|
|
7517
|
-
kind:
|
|
8693
|
+
var ApplicationQualityRoundSchema = z41.object({
|
|
8694
|
+
round: z41.number(),
|
|
8695
|
+
kind: z41.string(),
|
|
7518
8696
|
status: QualityGateStatusSchema,
|
|
7519
|
-
summary:
|
|
8697
|
+
summary: z41.string().optional()
|
|
7520
8698
|
});
|
|
7521
|
-
var ApplicationQualityArtifactSchema =
|
|
7522
|
-
kind:
|
|
7523
|
-
label:
|
|
7524
|
-
itemCount:
|
|
8699
|
+
var ApplicationQualityArtifactSchema = z41.object({
|
|
8700
|
+
kind: z41.string(),
|
|
8701
|
+
label: z41.string().optional(),
|
|
8702
|
+
itemCount: z41.number().optional()
|
|
7525
8703
|
});
|
|
7526
|
-
var ApplicationEmailReviewSchema =
|
|
7527
|
-
issues:
|
|
8704
|
+
var ApplicationEmailReviewSchema = z41.object({
|
|
8705
|
+
issues: z41.array(ApplicationQualityIssueSchema),
|
|
7528
8706
|
qualityGateStatus: QualityGateStatusSchema
|
|
7529
8707
|
});
|
|
7530
|
-
var ApplicationQualityReportSchema =
|
|
7531
|
-
issues:
|
|
7532
|
-
rounds:
|
|
7533
|
-
artifacts:
|
|
8708
|
+
var ApplicationQualityReportSchema = z41.object({
|
|
8709
|
+
issues: z41.array(ApplicationQualityIssueSchema),
|
|
8710
|
+
rounds: z41.array(ApplicationQualityRoundSchema).optional(),
|
|
8711
|
+
artifacts: z41.array(ApplicationQualityArtifactSchema).optional(),
|
|
7534
8712
|
emailReview: ApplicationEmailReviewSchema.optional(),
|
|
7535
8713
|
qualityGateStatus: QualityGateStatusSchema
|
|
7536
8714
|
});
|
|
7537
|
-
var ApplicationStateSchema =
|
|
7538
|
-
id:
|
|
7539
|
-
pdfBase64:
|
|
7540
|
-
title:
|
|
7541
|
-
applicationType:
|
|
7542
|
-
fields:
|
|
7543
|
-
batches:
|
|
7544
|
-
currentBatchIndex:
|
|
8715
|
+
var ApplicationStateSchema = z41.object({
|
|
8716
|
+
id: z41.string(),
|
|
8717
|
+
pdfBase64: z41.string().optional().describe("Original PDF, omitted after extraction"),
|
|
8718
|
+
title: z41.string().optional(),
|
|
8719
|
+
applicationType: z41.string().nullable().optional(),
|
|
8720
|
+
fields: z41.array(ApplicationFieldSchema),
|
|
8721
|
+
batches: z41.array(z41.array(z41.string())).optional(),
|
|
8722
|
+
currentBatchIndex: z41.number().default(0),
|
|
7545
8723
|
qualityReport: ApplicationQualityReportSchema.optional(),
|
|
7546
|
-
status:
|
|
7547
|
-
createdAt:
|
|
7548
|
-
updatedAt:
|
|
8724
|
+
status: z41.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
8725
|
+
createdAt: z41.number(),
|
|
8726
|
+
updatedAt: z41.number()
|
|
7549
8727
|
});
|
|
7550
8728
|
|
|
7551
8729
|
// src/application/agents/classifier.ts
|
|
7552
|
-
async function classifyApplication(pdfContent, generateObject, providerOptions) {
|
|
8730
|
+
async function classifyApplication(pdfContent, generateObject, providerOptions, maxTokens = 512) {
|
|
7553
8731
|
const { object, usage } = await withRetry(
|
|
7554
8732
|
() => generateObject({
|
|
7555
8733
|
prompt: `${APPLICATION_CLASSIFY_PROMPT}
|
|
7556
8734
|
|
|
7557
|
-
Analyze the
|
|
7558
|
-
${pdfContent}`,
|
|
8735
|
+
Analyze the attached insurance document. If text source units are provided in provider options, use them as supporting context. Do not infer from base64 text.`,
|
|
7559
8736
|
schema: ApplicationClassifyResultSchema,
|
|
7560
|
-
maxTokens
|
|
7561
|
-
providerOptions
|
|
8737
|
+
maxTokens,
|
|
8738
|
+
providerOptions: {
|
|
8739
|
+
...providerOptions,
|
|
8740
|
+
pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
|
|
8741
|
+
}
|
|
7562
8742
|
})
|
|
7563
8743
|
);
|
|
7564
8744
|
return { result: object, usage };
|
|
@@ -7571,13 +8751,18 @@ function buildFieldExtractionPrompt() {
|
|
|
7571
8751
|
Field types: "text", "numeric", "currency", "date", "yes_no", "table", "declaration"
|
|
7572
8752
|
|
|
7573
8753
|
Required keys per field:
|
|
7574
|
-
- "id": short snake_case ID
|
|
8754
|
+
- "id": short provisional snake_case ID. The SDK will replace this with a stable deterministic ID.
|
|
7575
8755
|
- "label": field label \u2014 a clear, natural question that a human would understand
|
|
7576
8756
|
- "section": section heading
|
|
7577
8757
|
- "fieldType": one of the types above
|
|
7578
8758
|
- "required": boolean
|
|
7579
8759
|
|
|
7580
8760
|
Optional keys (only include when applicable):
|
|
8761
|
+
- "sourceSpanIds": stable source span IDs if the caller provided source units for this application
|
|
8762
|
+
- "pageNumber": PDF page number where the field label/anchor appears
|
|
8763
|
+
- "fieldAnchorId": stable caller-provided field anchor ID, when available
|
|
8764
|
+
- "acroFormName": native PDF form field name, when visible or provided
|
|
8765
|
+
- "validationStatus": "missing" for extracted blank fields, "needs_review" for prefilled fields that need source validation
|
|
7581
8766
|
- "options": array of strings \u2014 for fields with checkboxes/radio buttons/multiple choices (e.g. business type, state selections). Use "text" fieldType with options.
|
|
7582
8767
|
- "columns": array of {"name","type"} \u2014 tables only
|
|
7583
8768
|
- "requiresExplanationIfYes": boolean \u2014 declarations only
|
|
@@ -7593,25 +8778,73 @@ Example:
|
|
|
7593
8778
|
{"id":"prior_claims","text":"Any claims in past 5 years?","section":"Declarations","fieldType":"declaration","required":true,"requiresExplanationIfYes":true}
|
|
7594
8779
|
]
|
|
7595
8780
|
|
|
7596
|
-
Extract ALL fields. Respond with ONLY the JSON array, no other text.`;
|
|
7597
|
-
}
|
|
8781
|
+
Extract ALL fields. Prefer page numbers and source span IDs over model-generated guesses whenever source units are supplied. Respond with ONLY the JSON array, no other text.`;
|
|
8782
|
+
}
|
|
8783
|
+
|
|
8784
|
+
// src/application/field-ids.ts
|
|
8785
|
+
function normalizePart(value) {
|
|
8786
|
+
const normalized = (value ?? "").trim().toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "");
|
|
8787
|
+
return normalized || "unknown";
|
|
8788
|
+
}
|
|
8789
|
+
function hashText2(value) {
|
|
8790
|
+
let hash = 2166136261;
|
|
8791
|
+
for (let index = 0; index < value.length; index++) {
|
|
8792
|
+
hash ^= value.charCodeAt(index);
|
|
8793
|
+
hash = Math.imul(hash, 16777619);
|
|
8794
|
+
}
|
|
8795
|
+
return (hash >>> 0).toString(16).padStart(8, "0").slice(0, 8);
|
|
8796
|
+
}
|
|
8797
|
+
function buildApplicationFieldAnchorId(field) {
|
|
8798
|
+
const page = field.pageNumber ? `p${field.pageNumber}` : "pna";
|
|
8799
|
+
const section = normalizePart(field.section);
|
|
8800
|
+
const label = normalizePart(field.label);
|
|
8801
|
+
const acroFormName = normalizePart(field.acroFormName);
|
|
8802
|
+
const hash = hashText2(`${page}|${section}|${label}|${acroFormName}`);
|
|
8803
|
+
return `app_field_anchor:${page}:${section}:${label}:${hash}`;
|
|
8804
|
+
}
|
|
8805
|
+
function buildStableApplicationFieldId(field) {
|
|
8806
|
+
const page = field.pageNumber ? `p${field.pageNumber}` : "pna";
|
|
8807
|
+
const section = normalizePart(field.section);
|
|
8808
|
+
const label = normalizePart(field.label);
|
|
8809
|
+
const fieldType = normalizePart(field.fieldType);
|
|
8810
|
+
const anchor = field.fieldAnchorId ?? buildApplicationFieldAnchorId(field);
|
|
8811
|
+
const hash = hashText2(`${page}|${section}|${label}|${fieldType}|${field.acroFormName ?? ""}|${anchor}`);
|
|
8812
|
+
return `app_field:${page}:${section}:${label}:${hash}`;
|
|
8813
|
+
}
|
|
8814
|
+
function normalizeApplicationFields(fields) {
|
|
8815
|
+
const seen = /* @__PURE__ */ new Map();
|
|
8816
|
+
return fields.map((field) => {
|
|
8817
|
+
const fieldAnchorId = field.fieldAnchorId ?? buildApplicationFieldAnchorId(field);
|
|
8818
|
+
const baseId = buildStableApplicationFieldId({ ...field, fieldAnchorId });
|
|
8819
|
+
const count = seen.get(baseId) ?? 0;
|
|
8820
|
+
seen.set(baseId, count + 1);
|
|
8821
|
+
return {
|
|
8822
|
+
...field,
|
|
8823
|
+
id: count === 0 ? baseId : `${baseId}:${count + 1}`,
|
|
8824
|
+
fieldAnchorId,
|
|
8825
|
+
validationStatus: field.validationStatus ?? (field.value ? "needs_review" : "missing")
|
|
8826
|
+
};
|
|
8827
|
+
});
|
|
8828
|
+
}
|
|
7598
8829
|
|
|
7599
8830
|
// src/application/agents/field-extractor.ts
|
|
7600
|
-
async function extractFields(pdfContent, generateObject, providerOptions) {
|
|
8831
|
+
async function extractFields(pdfContent, generateObject, providerOptions, maxTokens = 8192) {
|
|
7601
8832
|
const prompt = `${buildFieldExtractionPrompt()}
|
|
7602
8833
|
|
|
7603
|
-
Extract fields from
|
|
7604
|
-
${pdfContent}`;
|
|
8834
|
+
Extract fields from the attached application PDF. Use provider-supplied source units/spans for page numbers and anchors when present. Do not treat raw base64 as readable document text.`;
|
|
7605
8835
|
const { object, usage } = await withRetry(
|
|
7606
8836
|
() => generateObject({
|
|
7607
8837
|
prompt,
|
|
7608
8838
|
schema: FieldExtractionResultSchema,
|
|
7609
|
-
maxTokens
|
|
7610
|
-
providerOptions
|
|
8839
|
+
maxTokens,
|
|
8840
|
+
providerOptions: {
|
|
8841
|
+
...providerOptions,
|
|
8842
|
+
pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
|
|
8843
|
+
}
|
|
7611
8844
|
})
|
|
7612
8845
|
);
|
|
7613
8846
|
const result = object;
|
|
7614
|
-
return { fields: result.fields, usage };
|
|
8847
|
+
return { fields: normalizeApplicationFields(result.fields), usage };
|
|
7615
8848
|
}
|
|
7616
8849
|
|
|
7617
8850
|
// src/prompts/application/auto-fill.ts
|
|
@@ -7644,7 +8877,7 @@ Only include fields you can confidently fill. Do not guess or fabricate values.`
|
|
|
7644
8877
|
}
|
|
7645
8878
|
|
|
7646
8879
|
// src/application/agents/auto-filler.ts
|
|
7647
|
-
async function autoFillFromContext(fields, orgContext, generateObject, providerOptions) {
|
|
8880
|
+
async function autoFillFromContext(fields, orgContext, generateObject, providerOptions, maxTokens = 4096) {
|
|
7648
8881
|
const fieldSummaries = fields.map((f) => ({
|
|
7649
8882
|
id: f.id,
|
|
7650
8883
|
label: f.label,
|
|
@@ -7656,7 +8889,7 @@ async function autoFillFromContext(fields, orgContext, generateObject, providerO
|
|
|
7656
8889
|
() => generateObject({
|
|
7657
8890
|
prompt,
|
|
7658
8891
|
schema: AutoFillResultSchema,
|
|
7659
|
-
maxTokens
|
|
8892
|
+
maxTokens,
|
|
7660
8893
|
providerOptions
|
|
7661
8894
|
})
|
|
7662
8895
|
);
|
|
@@ -7711,7 +8944,7 @@ Respond with JSON only:
|
|
|
7711
8944
|
}
|
|
7712
8945
|
|
|
7713
8946
|
// src/application/agents/batcher.ts
|
|
7714
|
-
async function batchQuestions(unfilledFields, generateObject, providerOptions) {
|
|
8947
|
+
async function batchQuestions(unfilledFields, generateObject, providerOptions, maxTokens = 2048) {
|
|
7715
8948
|
const fieldSummaries = unfilledFields.map((f) => ({
|
|
7716
8949
|
id: f.id,
|
|
7717
8950
|
label: f.label,
|
|
@@ -7726,7 +8959,7 @@ async function batchQuestions(unfilledFields, generateObject, providerOptions) {
|
|
|
7726
8959
|
() => generateObject({
|
|
7727
8960
|
prompt,
|
|
7728
8961
|
schema: QuestionBatchResultSchema,
|
|
7729
|
-
maxTokens
|
|
8962
|
+
maxTokens,
|
|
7730
8963
|
providerOptions
|
|
7731
8964
|
})
|
|
7732
8965
|
);
|
|
@@ -7770,14 +9003,14 @@ Respond with JSON only:
|
|
|
7770
9003
|
}
|
|
7771
9004
|
|
|
7772
9005
|
// src/application/agents/reply-router.ts
|
|
7773
|
-
async function classifyReplyIntent(fields, replyText, generateObject, providerOptions) {
|
|
9006
|
+
async function classifyReplyIntent(fields, replyText, generateObject, providerOptions, maxTokens = 1024) {
|
|
7774
9007
|
const fieldSummaries = fields.map((f) => ({ id: f.id, label: f.label }));
|
|
7775
9008
|
const prompt = buildReplyIntentClassificationPrompt(fieldSummaries, replyText);
|
|
7776
9009
|
const { object, usage } = await withRetry(
|
|
7777
9010
|
() => generateObject({
|
|
7778
9011
|
prompt,
|
|
7779
9012
|
schema: ReplyIntentSchema,
|
|
7780
|
-
maxTokens
|
|
9013
|
+
maxTokens,
|
|
7781
9014
|
providerOptions
|
|
7782
9015
|
})
|
|
7783
9016
|
);
|
|
@@ -7824,7 +9057,7 @@ Only include answers you are confident about. If a response is ambiguous, includ
|
|
|
7824
9057
|
}
|
|
7825
9058
|
|
|
7826
9059
|
// src/application/agents/answer-parser.ts
|
|
7827
|
-
async function parseAnswers(fields, replyText, generateObject, providerOptions) {
|
|
9060
|
+
async function parseAnswers(fields, replyText, generateObject, providerOptions, maxTokens = 4096) {
|
|
7828
9061
|
const questions = fields.map((f) => ({
|
|
7829
9062
|
id: f.id,
|
|
7830
9063
|
label: f.label,
|
|
@@ -7836,7 +9069,7 @@ async function parseAnswers(fields, replyText, generateObject, providerOptions)
|
|
|
7836
9069
|
() => generateObject({
|
|
7837
9070
|
prompt,
|
|
7838
9071
|
schema: AnswerParsingResultSchema,
|
|
7839
|
-
maxTokens
|
|
9072
|
+
maxTokens,
|
|
7840
9073
|
providerOptions
|
|
7841
9074
|
})
|
|
7842
9075
|
);
|
|
@@ -7936,11 +9169,12 @@ IMPORTANT: The "source" field must be a specific, citable reference that will be
|
|
|
7936
9169
|
- "Business Context (company_info)"
|
|
7937
9170
|
- "User Profile"
|
|
7938
9171
|
Never use vague sources like "existing records" or "available data".
|
|
9172
|
+
If AVAILABLE DATA contains sourceSpanId values, include them in "sourceSpanIds" for every value filled from that source. Existing policy values such as policy numbers, dates, limits, deductibles, premiums, coverages, exclusions, conditions, endorsements, locations, vehicles, or named insureds must not be filled without sourceSpanIds unless the value is explicitly marked for review.
|
|
7939
9173
|
|
|
7940
9174
|
Respond with JSON only:
|
|
7941
9175
|
{
|
|
7942
9176
|
"fills": [
|
|
7943
|
-
{ "fieldId": "field_id", "value": "the value from data", "source": "Specific source with identifier (e.g. GL Policy #ABC123, stripe.com)" }
|
|
9177
|
+
{ "fieldId": "field_id", "value": "the value from data", "source": "Specific source with identifier (e.g. GL Policy #ABC123, stripe.com)", "sourceSpanIds": ["doc-1:span:1:0:abcd1234"] }
|
|
7944
9178
|
],
|
|
7945
9179
|
"unfillable": ["field_ids that couldn't be matched"],
|
|
7946
9180
|
"explanation": "Brief note about what was filled and what couldn't be found, citing sources"
|
|
@@ -7948,7 +9182,7 @@ Respond with JSON only:
|
|
|
7948
9182
|
}
|
|
7949
9183
|
|
|
7950
9184
|
// src/application/agents/lookup-filler.ts
|
|
7951
|
-
async function fillFromLookup(requests, targetFields, availableData, generateObject, providerOptions) {
|
|
9185
|
+
async function fillFromLookup(requests, targetFields, availableData, generateObject, providerOptions, maxTokens = 4096) {
|
|
7952
9186
|
const requestSummaries = requests.map((r) => ({
|
|
7953
9187
|
type: r.type,
|
|
7954
9188
|
description: r.description,
|
|
@@ -7964,7 +9198,7 @@ async function fillFromLookup(requests, targetFields, availableData, generateObj
|
|
|
7964
9198
|
() => generateObject({
|
|
7965
9199
|
prompt,
|
|
7966
9200
|
schema: LookupFillResultSchema,
|
|
7967
|
-
maxTokens
|
|
9201
|
+
maxTokens,
|
|
7968
9202
|
providerOptions
|
|
7969
9203
|
})
|
|
7970
9204
|
);
|
|
@@ -8025,7 +9259,7 @@ Output the email body text ONLY. No subject line, no JSON. Use markdown for numb
|
|
|
8025
9259
|
}
|
|
8026
9260
|
|
|
8027
9261
|
// src/application/agents/email-generator.ts
|
|
8028
|
-
async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, generateText, providerOptions) {
|
|
9262
|
+
async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, generateText, providerOptions, maxTokens = 2048) {
|
|
8029
9263
|
const fieldSummaries = batchFields.map((f) => ({
|
|
8030
9264
|
id: f.id,
|
|
8031
9265
|
label: f.label,
|
|
@@ -8046,7 +9280,7 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
|
|
|
8046
9280
|
const { text, usage } = await withRetry(
|
|
8047
9281
|
() => generateText({
|
|
8048
9282
|
prompt,
|
|
8049
|
-
maxTokens
|
|
9283
|
+
maxTokens,
|
|
8050
9284
|
providerOptions
|
|
8051
9285
|
})
|
|
8052
9286
|
);
|
|
@@ -8059,6 +9293,17 @@ function isVagueSource(source) {
|
|
|
8059
9293
|
const normalized = source.trim().toLowerCase();
|
|
8060
9294
|
return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
|
|
8061
9295
|
}
|
|
9296
|
+
function isSourceGroundedPolicyValue(field) {
|
|
9297
|
+
if (!field.value) return false;
|
|
9298
|
+
const source = field.source?.toLowerCase() ?? "";
|
|
9299
|
+
if (field.sourceSpanIds?.length) return false;
|
|
9300
|
+
if (field.userSourceSpanIds?.length) return false;
|
|
9301
|
+
const label = `${field.section} ${field.label}`.toLowerCase();
|
|
9302
|
+
const highValueLabel = /\b(policy|effective|expiration|date|limit|deductible|premium|coverage|exclusion|condition|endorsement|location|vehicle|named insured|revenue|payroll|loss|claim|prior)\b/.test(label);
|
|
9303
|
+
const highValueType = field.fieldType === "currency" || field.fieldType === "date" || field.fieldType === "numeric" || field.fieldType === "declaration";
|
|
9304
|
+
const fromPolicyLikeSource = /\b(policy|quote|document|lookup|carrier|endorsement)\b/.test(source);
|
|
9305
|
+
return fromPolicyLikeSource && (highValueLabel || highValueType);
|
|
9306
|
+
}
|
|
8062
9307
|
function buildApplicationQualityReport(state) {
|
|
8063
9308
|
const issues = [];
|
|
8064
9309
|
const seenIds = /* @__PURE__ */ new Set();
|
|
@@ -8104,6 +9349,14 @@ function buildApplicationQualityReport(state) {
|
|
|
8104
9349
|
fieldId: field.id
|
|
8105
9350
|
});
|
|
8106
9351
|
}
|
|
9352
|
+
if (isSourceGroundedPolicyValue(field)) {
|
|
9353
|
+
issues.push({
|
|
9354
|
+
code: "policy_value_missing_source_span",
|
|
9355
|
+
severity: "blocking",
|
|
9356
|
+
message: `Filled policy-derived field "${field.label}" is missing source span evidence.`,
|
|
9357
|
+
fieldId: field.id
|
|
9358
|
+
});
|
|
9359
|
+
}
|
|
8107
9360
|
}
|
|
8108
9361
|
return {
|
|
8109
9362
|
issues,
|
|
@@ -8213,7 +9466,9 @@ function createApplicationPipeline(config) {
|
|
|
8213
9466
|
onProgress,
|
|
8214
9467
|
log,
|
|
8215
9468
|
providerOptions,
|
|
8216
|
-
qualityGate = "warn"
|
|
9469
|
+
qualityGate = "warn",
|
|
9470
|
+
modelCapabilities,
|
|
9471
|
+
modelBudgetConstraints
|
|
8217
9472
|
} = config;
|
|
8218
9473
|
const limit = pLimit(concurrency);
|
|
8219
9474
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -8224,9 +9479,18 @@ function createApplicationPipeline(config) {
|
|
|
8224
9479
|
onTokenUsage?.(usage);
|
|
8225
9480
|
}
|
|
8226
9481
|
}
|
|
9482
|
+
function resolveBudget(taskKind, hintTokens) {
|
|
9483
|
+
return resolveModelBudget({
|
|
9484
|
+
taskKind,
|
|
9485
|
+
hintTokens,
|
|
9486
|
+
modelCapabilities,
|
|
9487
|
+
constraint: modelBudgetConstraints?.[taskKind]
|
|
9488
|
+
});
|
|
9489
|
+
}
|
|
8227
9490
|
async function processApplication(input) {
|
|
8228
9491
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
8229
9492
|
const { pdfBase64, context } = input;
|
|
9493
|
+
const applicationProviderOptions = input.sourceSpans?.length ? { ...providerOptions, sourceSpans: input.sourceSpans } : providerOptions;
|
|
8230
9494
|
const id = input.applicationId ?? `app-${Date.now()}`;
|
|
8231
9495
|
const now = Date.now();
|
|
8232
9496
|
let state = {
|
|
@@ -8247,9 +9511,10 @@ function createApplicationPipeline(config) {
|
|
|
8247
9511
|
let classifyResult;
|
|
8248
9512
|
try {
|
|
8249
9513
|
const { result, usage: classifyUsage } = await classifyApplication(
|
|
8250
|
-
pdfBase64
|
|
9514
|
+
pdfBase64,
|
|
8251
9515
|
generateObject,
|
|
8252
|
-
|
|
9516
|
+
applicationProviderOptions,
|
|
9517
|
+
resolveBudget("application_classify", 512).maxTokens
|
|
8253
9518
|
);
|
|
8254
9519
|
trackUsage(classifyUsage);
|
|
8255
9520
|
classifyResult = result;
|
|
@@ -8274,7 +9539,8 @@ function createApplicationPipeline(config) {
|
|
|
8274
9539
|
const { fields: extractedFields, usage: extractUsage } = await extractFields(
|
|
8275
9540
|
pdfBase64,
|
|
8276
9541
|
generateObject,
|
|
8277
|
-
|
|
9542
|
+
applicationProviderOptions,
|
|
9543
|
+
resolveBudget("application_extract_fields", 8192).maxTokens
|
|
8278
9544
|
);
|
|
8279
9545
|
trackUsage(extractUsage);
|
|
8280
9546
|
fields = extractedFields;
|
|
@@ -8312,6 +9578,7 @@ function createApplicationPipeline(config) {
|
|
|
8312
9578
|
field.value = pa.value;
|
|
8313
9579
|
field.source = `backfill: ${pa.source}`;
|
|
8314
9580
|
field.confidence = "high";
|
|
9581
|
+
field.validationStatus = "needs_review";
|
|
8315
9582
|
}
|
|
8316
9583
|
}
|
|
8317
9584
|
} catch (e) {
|
|
@@ -8336,7 +9603,8 @@ function createApplicationPipeline(config) {
|
|
|
8336
9603
|
unfilledFields2,
|
|
8337
9604
|
orgContext,
|
|
8338
9605
|
generateObject,
|
|
8339
|
-
providerOptions
|
|
9606
|
+
providerOptions,
|
|
9607
|
+
resolveBudget("application_auto_fill", 4096).maxTokens
|
|
8340
9608
|
);
|
|
8341
9609
|
trackUsage(afUsage);
|
|
8342
9610
|
for (const match of autoFillResult.matches) {
|
|
@@ -8345,6 +9613,7 @@ function createApplicationPipeline(config) {
|
|
|
8345
9613
|
field.value = match.value;
|
|
8346
9614
|
field.source = `auto-fill: ${match.contextKey}`;
|
|
8347
9615
|
field.confidence = match.confidence;
|
|
9616
|
+
field.validationStatus = "valid";
|
|
8348
9617
|
}
|
|
8349
9618
|
}
|
|
8350
9619
|
} catch (e) {
|
|
@@ -8387,7 +9656,8 @@ function createApplicationPipeline(config) {
|
|
|
8387
9656
|
const { result: batchResult, usage: batchUsage } = await batchQuestions(
|
|
8388
9657
|
unfilledFields,
|
|
8389
9658
|
generateObject,
|
|
8390
|
-
providerOptions
|
|
9659
|
+
providerOptions,
|
|
9660
|
+
resolveBudget("application_batch", 2048).maxTokens
|
|
8391
9661
|
);
|
|
8392
9662
|
trackUsage(batchUsage);
|
|
8393
9663
|
state.batches = batchResult.batches;
|
|
@@ -8410,9 +9680,15 @@ function createApplicationPipeline(config) {
|
|
|
8410
9680
|
onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
|
|
8411
9681
|
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
8412
9682
|
}
|
|
8413
|
-
async function
|
|
9683
|
+
async function processReply2(input) {
|
|
8414
9684
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
8415
9685
|
const { applicationId, replyText, context } = input;
|
|
9686
|
+
const replySourceSpanIds = input.replySourceSpanIds?.length ? input.replySourceSpanIds : buildTextSourceSpans({
|
|
9687
|
+
documentId: `${applicationId}:reply:${sourceSpanTextHash(replyText).slice(0, 12)}`,
|
|
9688
|
+
sourceKind: "email",
|
|
9689
|
+
text: replyText,
|
|
9690
|
+
metadata: { applicationId }
|
|
9691
|
+
}).map((span) => span.id);
|
|
8416
9692
|
let state = null;
|
|
8417
9693
|
if (applicationStore) {
|
|
8418
9694
|
state = await applicationStore.get(applicationId);
|
|
@@ -8431,7 +9707,8 @@ function createApplicationPipeline(config) {
|
|
|
8431
9707
|
currentBatchFields,
|
|
8432
9708
|
replyText,
|
|
8433
9709
|
generateObject,
|
|
8434
|
-
providerOptions
|
|
9710
|
+
providerOptions,
|
|
9711
|
+
resolveBudget("application_classify", 1024).maxTokens
|
|
8435
9712
|
);
|
|
8436
9713
|
trackUsage(intentUsage);
|
|
8437
9714
|
intent = classifiedIntent;
|
|
@@ -8459,7 +9736,8 @@ function createApplicationPipeline(config) {
|
|
|
8459
9736
|
currentBatchFields,
|
|
8460
9737
|
replyText,
|
|
8461
9738
|
generateObject,
|
|
8462
|
-
providerOptions
|
|
9739
|
+
providerOptions,
|
|
9740
|
+
resolveBudget("application_parse_answers", 4096).maxTokens
|
|
8463
9741
|
);
|
|
8464
9742
|
trackUsage(parseUsage);
|
|
8465
9743
|
for (const answer of parseResult.answers) {
|
|
@@ -8468,6 +9746,8 @@ function createApplicationPipeline(config) {
|
|
|
8468
9746
|
field.value = answer.value;
|
|
8469
9747
|
field.source = "user";
|
|
8470
9748
|
field.confidence = "confirmed";
|
|
9749
|
+
field.userSourceSpanIds = replySourceSpanIds;
|
|
9750
|
+
field.validationStatus = "valid";
|
|
8471
9751
|
fieldsFilled++;
|
|
8472
9752
|
}
|
|
8473
9753
|
}
|
|
@@ -8499,7 +9779,8 @@ function createApplicationPipeline(config) {
|
|
|
8499
9779
|
targetFields,
|
|
8500
9780
|
availableData,
|
|
8501
9781
|
generateObject,
|
|
8502
|
-
providerOptions
|
|
9782
|
+
providerOptions,
|
|
9783
|
+
resolveBudget("application_lookup", 4096).maxTokens
|
|
8503
9784
|
);
|
|
8504
9785
|
trackUsage(lookupUsage);
|
|
8505
9786
|
for (const fill of lookupResult.fills) {
|
|
@@ -8508,6 +9789,10 @@ function createApplicationPipeline(config) {
|
|
|
8508
9789
|
field.value = fill.value;
|
|
8509
9790
|
field.source = `lookup: ${fill.source}`;
|
|
8510
9791
|
field.confidence = "high";
|
|
9792
|
+
field.validationStatus = fill.sourceSpanIds?.length ? "valid" : "needs_review";
|
|
9793
|
+
if (fill.sourceSpanIds?.length) {
|
|
9794
|
+
field.sourceSpanIds = fill.sourceSpanIds;
|
|
9795
|
+
}
|
|
8511
9796
|
fieldsFilled++;
|
|
8512
9797
|
}
|
|
8513
9798
|
}
|
|
@@ -8522,7 +9807,7 @@ function createApplicationPipeline(config) {
|
|
|
8522
9807
|
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
8523
9808
|
|
|
8524
9809
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
8525
|
-
maxTokens: 512,
|
|
9810
|
+
maxTokens: resolveBudget("application_email", 512).maxTokens,
|
|
8526
9811
|
providerOptions
|
|
8527
9812
|
});
|
|
8528
9813
|
trackUsage(usage);
|
|
@@ -8570,7 +9855,8 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
8570
9855
|
companyName: context?.companyName
|
|
8571
9856
|
},
|
|
8572
9857
|
generateText,
|
|
8573
|
-
providerOptions
|
|
9858
|
+
providerOptions,
|
|
9859
|
+
resolveBudget("application_email", 2048).maxTokens
|
|
8574
9860
|
);
|
|
8575
9861
|
trackUsage(emailUsage);
|
|
8576
9862
|
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
@@ -8628,7 +9914,8 @@ ${emailText}`;
|
|
|
8628
9914
|
previousBatchSummary: opts?.previousBatchSummary
|
|
8629
9915
|
},
|
|
8630
9916
|
generateText,
|
|
8631
|
-
providerOptions
|
|
9917
|
+
providerOptions,
|
|
9918
|
+
resolveBudget("application_email", 2048).maxTokens
|
|
8632
9919
|
);
|
|
8633
9920
|
trackUsage(usage);
|
|
8634
9921
|
const emailReview = reviewBatchEmail(text, batchFields);
|
|
@@ -8652,7 +9939,7 @@ Application: ${state.title ?? "Insurance Application"}
|
|
|
8652
9939
|
|
|
8653
9940
|
Fields:
|
|
8654
9941
|
${fieldSummary}`,
|
|
8655
|
-
maxTokens: 4096,
|
|
9942
|
+
maxTokens: resolveBudget("application_email", 4096).maxTokens,
|
|
8656
9943
|
providerOptions
|
|
8657
9944
|
});
|
|
8658
9945
|
trackUsage(usage);
|
|
@@ -8660,7 +9947,7 @@ ${fieldSummary}`,
|
|
|
8660
9947
|
}
|
|
8661
9948
|
return {
|
|
8662
9949
|
processApplication,
|
|
8663
|
-
processReply,
|
|
9950
|
+
processReply: processReply2,
|
|
8664
9951
|
generateCurrentBatchEmail,
|
|
8665
9952
|
getConfirmationSummary
|
|
8666
9953
|
};
|
|
@@ -8777,91 +10064,104 @@ Respond with the final answer, deduplicated citations array, overall confidence
|
|
|
8777
10064
|
}
|
|
8778
10065
|
|
|
8779
10066
|
// src/schemas/query.ts
|
|
8780
|
-
import { z as
|
|
8781
|
-
var QueryIntentSchema =
|
|
10067
|
+
import { z as z42 } from "zod";
|
|
10068
|
+
var QueryIntentSchema = z42.enum([
|
|
8782
10069
|
"policy_question",
|
|
8783
10070
|
"coverage_comparison",
|
|
8784
10071
|
"document_search",
|
|
8785
10072
|
"claims_inquiry",
|
|
8786
10073
|
"general_knowledge"
|
|
8787
10074
|
]);
|
|
8788
|
-
var QueryAttachmentKindSchema =
|
|
8789
|
-
var QueryAttachmentSchema =
|
|
8790
|
-
id:
|
|
10075
|
+
var QueryAttachmentKindSchema = z42.enum(["image", "pdf", "text"]);
|
|
10076
|
+
var QueryAttachmentSchema = z42.object({
|
|
10077
|
+
id: z42.string().optional().describe("Optional stable attachment ID from the caller"),
|
|
8791
10078
|
kind: QueryAttachmentKindSchema,
|
|
8792
|
-
name:
|
|
8793
|
-
mimeType:
|
|
8794
|
-
base64:
|
|
8795
|
-
text:
|
|
8796
|
-
description:
|
|
8797
|
-
});
|
|
8798
|
-
var
|
|
8799
|
-
|
|
10079
|
+
name: z42.string().optional().describe("Original filename or user-facing label"),
|
|
10080
|
+
mimeType: z42.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
|
|
10081
|
+
base64: z42.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
|
|
10082
|
+
text: z42.string().optional().describe("Plain-text attachment content when available"),
|
|
10083
|
+
description: z42.string().optional().describe("Caller-provided description of the attachment")
|
|
10084
|
+
});
|
|
10085
|
+
var QueryRetrievalModeSchema = z42.enum([
|
|
10086
|
+
"graph_only",
|
|
10087
|
+
"source_rag",
|
|
10088
|
+
"long_context",
|
|
10089
|
+
"hybrid"
|
|
10090
|
+
]);
|
|
10091
|
+
var SubQuestionSchema = z42.object({
|
|
10092
|
+
question: z42.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
8800
10093
|
intent: QueryIntentSchema,
|
|
8801
|
-
chunkTypes:
|
|
8802
|
-
documentFilters:
|
|
8803
|
-
type:
|
|
8804
|
-
carrier:
|
|
8805
|
-
insuredName:
|
|
8806
|
-
policyNumber:
|
|
8807
|
-
quoteNumber:
|
|
8808
|
-
policyTypes:
|
|
10094
|
+
chunkTypes: z42.array(z42.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
10095
|
+
documentFilters: z42.object({
|
|
10096
|
+
type: z42.enum(["policy", "quote"]).optional(),
|
|
10097
|
+
carrier: z42.string().optional(),
|
|
10098
|
+
insuredName: z42.string().optional(),
|
|
10099
|
+
policyNumber: z42.string().optional(),
|
|
10100
|
+
quoteNumber: z42.string().optional(),
|
|
10101
|
+
policyTypes: z42.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
|
|
8809
10102
|
}).optional().describe("Structured filters to narrow document lookup")
|
|
8810
10103
|
});
|
|
8811
|
-
var QueryClassifyResultSchema =
|
|
10104
|
+
var QueryClassifyResultSchema = z42.object({
|
|
8812
10105
|
intent: QueryIntentSchema,
|
|
8813
|
-
subQuestions:
|
|
8814
|
-
requiresDocumentLookup:
|
|
8815
|
-
requiresChunkSearch:
|
|
8816
|
-
requiresConversationHistory:
|
|
8817
|
-
|
|
8818
|
-
|
|
8819
|
-
|
|
8820
|
-
|
|
8821
|
-
|
|
8822
|
-
|
|
8823
|
-
|
|
8824
|
-
|
|
8825
|
-
|
|
8826
|
-
|
|
8827
|
-
|
|
8828
|
-
|
|
8829
|
-
|
|
8830
|
-
|
|
8831
|
-
|
|
8832
|
-
|
|
8833
|
-
|
|
8834
|
-
|
|
8835
|
-
|
|
8836
|
-
|
|
8837
|
-
});
|
|
8838
|
-
var
|
|
8839
|
-
|
|
8840
|
-
|
|
8841
|
-
|
|
8842
|
-
|
|
8843
|
-
|
|
8844
|
-
|
|
8845
|
-
|
|
8846
|
-
|
|
8847
|
-
|
|
8848
|
-
|
|
8849
|
-
|
|
8850
|
-
|
|
8851
|
-
|
|
8852
|
-
|
|
8853
|
-
});
|
|
8854
|
-
var
|
|
8855
|
-
|
|
8856
|
-
|
|
8857
|
-
|
|
8858
|
-
|
|
8859
|
-
|
|
8860
|
-
|
|
8861
|
-
|
|
10106
|
+
subQuestions: z42.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
10107
|
+
requiresDocumentLookup: z42.boolean().describe("Whether structured document lookup is needed"),
|
|
10108
|
+
requiresChunkSearch: z42.boolean().describe("Whether semantic chunk search is needed"),
|
|
10109
|
+
requiresConversationHistory: z42.boolean().describe("Whether conversation history is relevant"),
|
|
10110
|
+
retrievalMode: QueryRetrievalModeSchema.optional().describe("Preferred retrieval strategy for the query when source-span retrieval is available")
|
|
10111
|
+
});
|
|
10112
|
+
var EvidenceItemSchema = z42.object({
|
|
10113
|
+
source: z42.enum(["chunk", "document", "conversation", "attachment", "source_span"]),
|
|
10114
|
+
chunkId: z42.string().optional(),
|
|
10115
|
+
sourceSpanId: z42.string().optional(),
|
|
10116
|
+
documentId: z42.string().optional(),
|
|
10117
|
+
turnId: z42.string().optional(),
|
|
10118
|
+
attachmentId: z42.string().optional(),
|
|
10119
|
+
text: z42.string().describe("Text excerpt from the source"),
|
|
10120
|
+
relevance: z42.number().min(0).max(1),
|
|
10121
|
+
retrievalMode: QueryRetrievalModeSchema.optional(),
|
|
10122
|
+
sourceLocation: SourceSpanLocationSchema.optional(),
|
|
10123
|
+
metadata: z42.array(z42.object({ key: z42.string(), value: z42.string() })).optional()
|
|
10124
|
+
});
|
|
10125
|
+
var AttachmentInterpretationSchema = z42.object({
|
|
10126
|
+
summary: z42.string().describe("Concise summary of what the attachment shows or contains"),
|
|
10127
|
+
extractedFacts: z42.array(z42.string()).describe("Specific observable or document facts grounded in the attachment"),
|
|
10128
|
+
recommendedFocus: z42.array(z42.string()).describe("Important details to incorporate when answering follow-up questions"),
|
|
10129
|
+
confidence: z42.number().min(0).max(1)
|
|
10130
|
+
});
|
|
10131
|
+
var RetrievalResultSchema = z42.object({
|
|
10132
|
+
subQuestion: z42.string(),
|
|
10133
|
+
evidence: z42.array(EvidenceItemSchema)
|
|
10134
|
+
});
|
|
10135
|
+
var CitationSchema = z42.object({
|
|
10136
|
+
index: z42.number().describe("Citation number [1], [2], etc."),
|
|
10137
|
+
chunkId: z42.string().optional().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
10138
|
+
sourceSpanId: z42.string().optional().describe("Precise source span ID when available"),
|
|
10139
|
+
documentId: z42.string(),
|
|
10140
|
+
documentType: z42.enum(["policy", "quote"]).optional(),
|
|
10141
|
+
field: z42.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
10142
|
+
quote: z42.string().describe("Exact text from source that supports the claim"),
|
|
10143
|
+
relevance: z42.number().min(0).max(1),
|
|
10144
|
+
retrievalMode: QueryRetrievalModeSchema.optional(),
|
|
10145
|
+
sourceLocation: SourceSpanLocationSchema.optional()
|
|
10146
|
+
});
|
|
10147
|
+
var SubAnswerSchema = z42.object({
|
|
10148
|
+
subQuestion: z42.string(),
|
|
10149
|
+
answer: z42.string(),
|
|
10150
|
+
citations: z42.array(CitationSchema),
|
|
10151
|
+
confidence: z42.number().min(0).max(1),
|
|
10152
|
+
needsMoreContext: z42.boolean().describe("True if evidence was insufficient to answer fully")
|
|
10153
|
+
});
|
|
10154
|
+
var VerifyResultSchema = z42.object({
|
|
10155
|
+
approved: z42.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
10156
|
+
issues: z42.array(z42.string()).describe("Specific grounding or consistency issues found"),
|
|
10157
|
+
retrySubQuestions: z42.array(z42.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
10158
|
+
});
|
|
10159
|
+
var QueryResultSchema = z42.object({
|
|
10160
|
+
answer: z42.string(),
|
|
10161
|
+
citations: z42.array(CitationSchema),
|
|
8862
10162
|
intent: QueryIntentSchema,
|
|
8863
|
-
confidence:
|
|
8864
|
-
followUp:
|
|
10163
|
+
confidence: z42.number().min(0).max(1),
|
|
10164
|
+
followUp: z42.string().optional().describe("Suggested follow-up question if applicable")
|
|
8865
10165
|
});
|
|
8866
10166
|
|
|
8867
10167
|
// src/query/retriever.ts
|
|
@@ -8869,23 +10169,69 @@ function recordToKVArray(record) {
|
|
|
8869
10169
|
return Object.entries(record).map(([key, value]) => ({ key, value }));
|
|
8870
10170
|
}
|
|
8871
10171
|
async function retrieve(subQuestion, conversationId, config) {
|
|
8872
|
-
const { documentStore, memoryStore, retrievalLimit, log } = config;
|
|
10172
|
+
const { documentStore, memoryStore, sourceRetriever, retrievalLimit, retrievalMode, log } = config;
|
|
8873
10173
|
const evidence = [];
|
|
8874
10174
|
const tasks = [];
|
|
8875
|
-
|
|
8876
|
-
(
|
|
8877
|
-
|
|
8878
|
-
|
|
8879
|
-
|
|
8880
|
-
|
|
8881
|
-
|
|
8882
|
-
|
|
8883
|
-
|
|
8884
|
-
|
|
8885
|
-
|
|
8886
|
-
|
|
8887
|
-
|
|
8888
|
-
|
|
10175
|
+
if (retrievalMode === "source_rag" || retrievalMode === "hybrid" || retrievalMode === "long_context") {
|
|
10176
|
+
tasks.push(
|
|
10177
|
+
(async () => {
|
|
10178
|
+
try {
|
|
10179
|
+
const sourceResults = await sourceRetriever?.searchSourceSpans({
|
|
10180
|
+
question: subQuestion.question,
|
|
10181
|
+
limit: retrievalLimit,
|
|
10182
|
+
mode: retrievalMode
|
|
10183
|
+
}) ?? [];
|
|
10184
|
+
for (const result of sourceResults) {
|
|
10185
|
+
evidence.push({
|
|
10186
|
+
source: "source_span",
|
|
10187
|
+
sourceSpanId: result.span.id,
|
|
10188
|
+
chunkId: result.span.chunkId,
|
|
10189
|
+
documentId: result.span.documentId,
|
|
10190
|
+
text: result.span.text,
|
|
10191
|
+
relevance: result.relevance,
|
|
10192
|
+
retrievalMode,
|
|
10193
|
+
sourceLocation: result.span.location,
|
|
10194
|
+
metadata: result.span.metadata ? recordToKVArray(result.span.metadata) : void 0
|
|
10195
|
+
});
|
|
10196
|
+
}
|
|
10197
|
+
} catch (e) {
|
|
10198
|
+
await log?.(`Source span search failed for "${subQuestion.question}": ${e}`);
|
|
10199
|
+
}
|
|
10200
|
+
})()
|
|
10201
|
+
);
|
|
10202
|
+
}
|
|
10203
|
+
if (retrievalMode === "graph_only" || retrievalMode === "hybrid" || !sourceRetriever) {
|
|
10204
|
+
tasks.push(
|
|
10205
|
+
(async () => {
|
|
10206
|
+
try {
|
|
10207
|
+
const filter = {};
|
|
10208
|
+
if (subQuestion.chunkTypes?.length) {
|
|
10209
|
+
const chunkResults = await Promise.all(
|
|
10210
|
+
subQuestion.chunkTypes.map(
|
|
10211
|
+
(type) => memoryStore.search(subQuestion.question, {
|
|
10212
|
+
limit: Math.ceil(retrievalLimit / subQuestion.chunkTypes.length),
|
|
10213
|
+
filter: { ...filter, type }
|
|
10214
|
+
})
|
|
10215
|
+
)
|
|
10216
|
+
);
|
|
10217
|
+
for (const chunks of chunkResults) {
|
|
10218
|
+
for (const chunk of chunks) {
|
|
10219
|
+
evidence.push({
|
|
10220
|
+
source: "chunk",
|
|
10221
|
+
chunkId: chunk.id,
|
|
10222
|
+
documentId: chunk.documentId,
|
|
10223
|
+
text: chunk.text,
|
|
10224
|
+
relevance: 0.8,
|
|
10225
|
+
// Default — store doesn't expose scores directly
|
|
10226
|
+
retrievalMode,
|
|
10227
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
10228
|
+
});
|
|
10229
|
+
}
|
|
10230
|
+
}
|
|
10231
|
+
} else {
|
|
10232
|
+
const chunks = await memoryStore.search(subQuestion.question, {
|
|
10233
|
+
limit: retrievalLimit
|
|
10234
|
+
});
|
|
8889
10235
|
for (const chunk of chunks) {
|
|
8890
10236
|
evidence.push({
|
|
8891
10237
|
source: "chunk",
|
|
@@ -8893,32 +10239,18 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
8893
10239
|
documentId: chunk.documentId,
|
|
8894
10240
|
text: chunk.text,
|
|
8895
10241
|
relevance: 0.8,
|
|
8896
|
-
|
|
10242
|
+
retrievalMode,
|
|
8897
10243
|
metadata: recordToKVArray(chunk.metadata)
|
|
8898
10244
|
});
|
|
8899
10245
|
}
|
|
8900
10246
|
}
|
|
8901
|
-
}
|
|
8902
|
-
|
|
8903
|
-
limit: retrievalLimit
|
|
8904
|
-
});
|
|
8905
|
-
for (const chunk of chunks) {
|
|
8906
|
-
evidence.push({
|
|
8907
|
-
source: "chunk",
|
|
8908
|
-
chunkId: chunk.id,
|
|
8909
|
-
documentId: chunk.documentId,
|
|
8910
|
-
text: chunk.text,
|
|
8911
|
-
relevance: 0.8,
|
|
8912
|
-
metadata: recordToKVArray(chunk.metadata)
|
|
8913
|
-
});
|
|
8914
|
-
}
|
|
10247
|
+
} catch (e) {
|
|
10248
|
+
await log?.(`Chunk search failed for "${subQuestion.question}": ${e}`);
|
|
8915
10249
|
}
|
|
8916
|
-
}
|
|
8917
|
-
|
|
8918
|
-
|
|
8919
|
-
|
|
8920
|
-
);
|
|
8921
|
-
if (subQuestion.documentFilters) {
|
|
10250
|
+
})()
|
|
10251
|
+
);
|
|
10252
|
+
}
|
|
10253
|
+
if (subQuestion.documentFilters && (retrievalMode === "graph_only" || retrievalMode === "hybrid" || retrievalMode === "long_context")) {
|
|
8922
10254
|
tasks.push(
|
|
8923
10255
|
(async () => {
|
|
8924
10256
|
try {
|
|
@@ -8937,6 +10269,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
8937
10269
|
text: summary,
|
|
8938
10270
|
relevance: 0.9,
|
|
8939
10271
|
// Direct lookup is high relevance
|
|
10272
|
+
retrievalMode,
|
|
8940
10273
|
metadata: [
|
|
8941
10274
|
{ key: "type", value: doc.type },
|
|
8942
10275
|
{ key: "carrier", value: doc.carrier ?? "" },
|
|
@@ -8963,8 +10296,9 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
8963
10296
|
source: "conversation",
|
|
8964
10297
|
turnId: turn.id,
|
|
8965
10298
|
text: `[${turn.role}]: ${turn.content}`,
|
|
8966
|
-
relevance: 0.6
|
|
10299
|
+
relevance: 0.6,
|
|
8967
10300
|
// Conversation context is lower relevance than documents
|
|
10301
|
+
retrievalMode
|
|
8968
10302
|
});
|
|
8969
10303
|
}
|
|
8970
10304
|
} catch (e) {
|
|
@@ -8974,10 +10308,10 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
8974
10308
|
);
|
|
8975
10309
|
}
|
|
8976
10310
|
await Promise.all(tasks);
|
|
8977
|
-
|
|
10311
|
+
const orderedEvidence = orderSourceEvidence(evidence);
|
|
8978
10312
|
return {
|
|
8979
10313
|
subQuestion: subQuestion.question,
|
|
8980
|
-
evidence:
|
|
10314
|
+
evidence: orderedEvidence.slice(0, retrievalLimit)
|
|
8981
10315
|
};
|
|
8982
10316
|
}
|
|
8983
10317
|
function buildDocumentSummary(doc) {
|
|
@@ -9063,16 +10397,22 @@ Answer the sub-question based on the evidence above. For every factual claim, in
|
|
|
9063
10397
|
async function reason(subQuestion, intent, evidence, config) {
|
|
9064
10398
|
const { generateObject, providerOptions } = config;
|
|
9065
10399
|
const evidenceText = evidence.map((e, i) => {
|
|
9066
|
-
const sourceLabel = e.source === "chunk" ? `[chunk:${e.chunkId}]` : e.source === "document" ? `[doc:${e.documentId}]` : `[turn:${e.turnId}]`;
|
|
10400
|
+
const sourceLabel = e.source === "source_span" ? `[source-span:${e.sourceSpanId}]` : e.source === "chunk" ? `[chunk:${e.chunkId}]` : e.source === "document" ? `[doc:${e.documentId}]` : e.source === "attachment" ? `[attachment:${e.attachmentId}]` : `[turn:${e.turnId}]`;
|
|
9067
10401
|
return `Evidence ${i + 1} ${sourceLabel} (relevance: ${e.relevance.toFixed(2)}):
|
|
9068
10402
|
${e.text}`;
|
|
9069
10403
|
}).join("\n\n");
|
|
9070
10404
|
const prompt = buildReasonPrompt(subQuestion, intent, evidenceText);
|
|
10405
|
+
const budget = resolveModelBudget({
|
|
10406
|
+
taskKind: "query_reason",
|
|
10407
|
+
hintTokens: 4096,
|
|
10408
|
+
modelCapabilities: config.modelCapabilities,
|
|
10409
|
+
constraint: config.modelBudgetConstraints?.query_reason
|
|
10410
|
+
});
|
|
9071
10411
|
const { object, usage } = await withRetry(
|
|
9072
10412
|
() => generateObject({
|
|
9073
10413
|
prompt,
|
|
9074
10414
|
schema: SubAnswerSchema,
|
|
9075
|
-
maxTokens:
|
|
10415
|
+
maxTokens: budget.maxTokens,
|
|
9076
10416
|
providerOptions
|
|
9077
10417
|
})
|
|
9078
10418
|
);
|
|
@@ -9112,49 +10452,41 @@ RESPOND WITH:
|
|
|
9112
10452
|
- retrySubQuestions: sub-questions that need re-retrieval or re-reasoning (only if not approved)`;
|
|
9113
10453
|
}
|
|
9114
10454
|
|
|
9115
|
-
// src/query/verifier.ts
|
|
9116
|
-
async function verify(originalQuestion, subAnswers, allEvidence, config) {
|
|
9117
|
-
const { generateObject, providerOptions } = config;
|
|
9118
|
-
const subAnswersJson = JSON.stringify(
|
|
9119
|
-
subAnswers.map((sa) => ({
|
|
9120
|
-
subQuestion: sa.subQuestion,
|
|
9121
|
-
answer: sa.answer,
|
|
9122
|
-
citations: sa.citations,
|
|
9123
|
-
confidence: sa.confidence,
|
|
9124
|
-
needsMoreContext: sa.needsMoreContext
|
|
9125
|
-
})),
|
|
9126
|
-
null,
|
|
9127
|
-
2
|
|
9128
|
-
);
|
|
9129
|
-
const evidenceJson = JSON.stringify(
|
|
9130
|
-
allEvidence.map((e) => ({
|
|
9131
|
-
source: e.source,
|
|
9132
|
-
id: e.chunkId ?? e.documentId ?? e.turnId,
|
|
9133
|
-
text: e.text.slice(0, 500),
|
|
9134
|
-
// Truncate for context efficiency
|
|
9135
|
-
relevance: e.relevance
|
|
9136
|
-
})),
|
|
9137
|
-
null,
|
|
9138
|
-
2
|
|
9139
|
-
);
|
|
9140
|
-
const prompt = buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson);
|
|
9141
|
-
const { object, usage } = await withRetry(
|
|
9142
|
-
() => generateObject({
|
|
9143
|
-
prompt,
|
|
9144
|
-
schema: VerifyResultSchema,
|
|
9145
|
-
maxTokens: 2048,
|
|
9146
|
-
providerOptions
|
|
9147
|
-
})
|
|
9148
|
-
);
|
|
9149
|
-
return { result: object, usage };
|
|
9150
|
-
}
|
|
9151
|
-
|
|
9152
10455
|
// src/query/quality.ts
|
|
9153
10456
|
function sourceIdForEvidence(evidence) {
|
|
9154
|
-
return evidence.chunkId ?? evidence.documentId ?? evidence.turnId ?? evidence.attachmentId;
|
|
10457
|
+
return evidence.sourceSpanId ?? evidence.chunkId ?? evidence.documentId ?? evidence.turnId ?? evidence.attachmentId;
|
|
9155
10458
|
}
|
|
9156
10459
|
function citationSourceId(citation) {
|
|
9157
|
-
return citation.chunkId || citation.documentId;
|
|
10460
|
+
return citation.sourceSpanId || citation.chunkId || citation.documentId;
|
|
10461
|
+
}
|
|
10462
|
+
function hasGroundingEvidence(evidence) {
|
|
10463
|
+
return evidence.some((item) => item.source === "chunk" || item.source === "source_span");
|
|
10464
|
+
}
|
|
10465
|
+
function containsQuotedNumericDateOrContractualClaim(text) {
|
|
10466
|
+
const normalized = text.toLowerCase();
|
|
10467
|
+
return /[$€£]\s?\d|\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\s?(?:%|percent|days?|months?|years?)\b/.test(text) || /\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{4}-\d{2}-\d{2}\b/.test(text) || /\b(?:january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2},?\s+\d{4}\b/i.test(text) || /\b(?:shall|must|required|subject to|excluded|exclusion|condition|endorsement|deductible|limit|premium|retention)\b/.test(normalized);
|
|
10468
|
+
}
|
|
10469
|
+
function deterministicQueryGroundingIssues(subAnswers, evidence) {
|
|
10470
|
+
const issues = [];
|
|
10471
|
+
const evidenceBySource = /* @__PURE__ */ new Map();
|
|
10472
|
+
for (const item of evidence) {
|
|
10473
|
+
const sourceId = sourceIdForEvidence(item);
|
|
10474
|
+
if (!sourceId) continue;
|
|
10475
|
+
evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
|
|
10476
|
+
}
|
|
10477
|
+
for (const subAnswer of subAnswers) {
|
|
10478
|
+
if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0 && containsQuotedNumericDateOrContractualClaim(subAnswer.answer)) {
|
|
10479
|
+
issues.push(`Sub-answer "${subAnswer.subQuestion}" contains a numeric, date, or contractual claim without citations.`);
|
|
10480
|
+
}
|
|
10481
|
+
for (const citation of subAnswer.citations) {
|
|
10482
|
+
const sourceId = citationSourceId(citation);
|
|
10483
|
+
const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
|
|
10484
|
+
if (containsQuotedNumericDateOrContractualClaim(citation.quote) && !hasGroundingEvidence(supportedEvidence)) {
|
|
10485
|
+
issues.push(`Citation [${citation.index}] in "${subAnswer.subQuestion}" supports a numeric, date, or contractual claim without chunk or source-span evidence.`);
|
|
10486
|
+
}
|
|
10487
|
+
}
|
|
10488
|
+
}
|
|
10489
|
+
return issues;
|
|
9158
10490
|
}
|
|
9159
10491
|
function buildQueryReviewReport(params) {
|
|
9160
10492
|
const { subAnswers, evidence, finalResult, verifyRounds } = params;
|
|
@@ -9207,6 +10539,16 @@ function buildQueryReviewReport(params) {
|
|
|
9207
10539
|
sourceId
|
|
9208
10540
|
});
|
|
9209
10541
|
}
|
|
10542
|
+
if (containsQuotedNumericDateOrContractualClaim(citation.quote) && !hasGroundingEvidence(supportedEvidence)) {
|
|
10543
|
+
issues.push({
|
|
10544
|
+
code: "citation_claim_lacks_chunk_or_source_span",
|
|
10545
|
+
severity: "blocking",
|
|
10546
|
+
message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" supports a numeric, date, or contractual claim without chunk or source-span evidence.`,
|
|
10547
|
+
subQuestion: subAnswer.subQuestion,
|
|
10548
|
+
citationIndex: citation.index,
|
|
10549
|
+
sourceId
|
|
10550
|
+
});
|
|
10551
|
+
}
|
|
9210
10552
|
}
|
|
9211
10553
|
}
|
|
9212
10554
|
if (finalResult) {
|
|
@@ -9218,10 +10560,10 @@ function buildQueryReviewReport(params) {
|
|
|
9218
10560
|
});
|
|
9219
10561
|
}
|
|
9220
10562
|
const knownCitationIds = new Set(
|
|
9221
|
-
subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
|
|
10563
|
+
subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.sourceSpanId ?? ""}|${citation.chunkId ?? ""}|${citation.documentId}`))
|
|
9222
10564
|
);
|
|
9223
10565
|
for (const citation of finalResult.citations) {
|
|
9224
|
-
const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
|
|
10566
|
+
const key = `${citation.index}|${citation.sourceSpanId ?? ""}|${citation.chunkId ?? ""}|${citation.documentId}`;
|
|
9225
10567
|
if (!knownCitationIds.has(key)) {
|
|
9226
10568
|
issues.push({
|
|
9227
10569
|
code: "final_answer_unknown_citation",
|
|
@@ -9255,6 +10597,67 @@ function buildQueryReviewReport(params) {
|
|
|
9255
10597
|
};
|
|
9256
10598
|
}
|
|
9257
10599
|
|
|
10600
|
+
// src/query/verifier.ts
|
|
10601
|
+
async function verify(originalQuestion, subAnswers, allEvidence, config) {
|
|
10602
|
+
const { generateObject, providerOptions } = config;
|
|
10603
|
+
const subAnswersJson = JSON.stringify(
|
|
10604
|
+
subAnswers.map((sa) => ({
|
|
10605
|
+
subQuestion: sa.subQuestion,
|
|
10606
|
+
answer: sa.answer,
|
|
10607
|
+
citations: sa.citations,
|
|
10608
|
+
confidence: sa.confidence,
|
|
10609
|
+
needsMoreContext: sa.needsMoreContext
|
|
10610
|
+
})),
|
|
10611
|
+
null,
|
|
10612
|
+
2
|
|
10613
|
+
);
|
|
10614
|
+
const evidenceJson = JSON.stringify(
|
|
10615
|
+
allEvidence.map((e) => ({
|
|
10616
|
+
source: e.source,
|
|
10617
|
+
id: e.sourceSpanId ?? e.chunkId ?? e.documentId ?? e.turnId ?? e.attachmentId,
|
|
10618
|
+
chunkId: e.chunkId,
|
|
10619
|
+
sourceSpanId: e.sourceSpanId,
|
|
10620
|
+
text: e.text.slice(0, 500),
|
|
10621
|
+
// Truncate for context efficiency
|
|
10622
|
+
relevance: e.relevance
|
|
10623
|
+
})),
|
|
10624
|
+
null,
|
|
10625
|
+
2
|
|
10626
|
+
);
|
|
10627
|
+
const prompt = buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson);
|
|
10628
|
+
const budget = resolveModelBudget({
|
|
10629
|
+
taskKind: "query_verify",
|
|
10630
|
+
hintTokens: 2048,
|
|
10631
|
+
modelCapabilities: config.modelCapabilities,
|
|
10632
|
+
constraint: config.modelBudgetConstraints?.query_verify
|
|
10633
|
+
});
|
|
10634
|
+
const { object, usage } = await withRetry(
|
|
10635
|
+
() => generateObject({
|
|
10636
|
+
prompt,
|
|
10637
|
+
schema: VerifyResultSchema,
|
|
10638
|
+
maxTokens: budget.maxTokens,
|
|
10639
|
+
providerOptions
|
|
10640
|
+
})
|
|
10641
|
+
);
|
|
10642
|
+
const result = object;
|
|
10643
|
+
const deterministicIssues = deterministicQueryGroundingIssues(subAnswers, allEvidence);
|
|
10644
|
+
if (deterministicIssues.length > 0) {
|
|
10645
|
+
return {
|
|
10646
|
+
result: {
|
|
10647
|
+
...result,
|
|
10648
|
+
approved: false,
|
|
10649
|
+
issues: Array.from(/* @__PURE__ */ new Set([...result.issues, ...deterministicIssues])),
|
|
10650
|
+
retrySubQuestions: Array.from(/* @__PURE__ */ new Set([
|
|
10651
|
+
...result.retrySubQuestions ?? [],
|
|
10652
|
+
...subAnswers.filter((answer) => deterministicIssues.some((issue) => issue.includes(`"${answer.subQuestion}"`))).map((answer) => answer.subQuestion)
|
|
10653
|
+
]))
|
|
10654
|
+
},
|
|
10655
|
+
usage
|
|
10656
|
+
};
|
|
10657
|
+
}
|
|
10658
|
+
return { result, usage };
|
|
10659
|
+
}
|
|
10660
|
+
|
|
9258
10661
|
// src/prompts/query/interpret-attachment.ts
|
|
9259
10662
|
function buildInterpretAttachmentPrompt(question, attachment) {
|
|
9260
10663
|
const attachmentLabel = attachment.name ?? attachment.id ?? "attachment";
|
|
@@ -9334,7 +10737,7 @@ ${attachment.text}` : null
|
|
|
9334
10737
|
return lines.filter(Boolean).join("\n");
|
|
9335
10738
|
}
|
|
9336
10739
|
async function interpretAttachments(params) {
|
|
9337
|
-
const { attachments = [], question, generateObject, providerOptions, log, onUsage } = params;
|
|
10740
|
+
const { attachments = [], question, generateObject, providerOptions, modelCapabilities, modelBudgetConstraints, log, onUsage } = params;
|
|
9338
10741
|
if (attachments.length === 0) {
|
|
9339
10742
|
return { evidence: [] };
|
|
9340
10743
|
}
|
|
@@ -9363,12 +10766,18 @@ async function interpretAttachments(params) {
|
|
|
9363
10766
|
continue;
|
|
9364
10767
|
}
|
|
9365
10768
|
const prompt = buildInterpretAttachmentPrompt(question, attachment);
|
|
10769
|
+
const budget = resolveModelBudget({
|
|
10770
|
+
taskKind: "query_attachment",
|
|
10771
|
+
hintTokens: 2048,
|
|
10772
|
+
modelCapabilities,
|
|
10773
|
+
constraint: modelBudgetConstraints?.query_attachment
|
|
10774
|
+
});
|
|
9366
10775
|
const { object, usage } = await safeGenerateObject(
|
|
9367
10776
|
generateObject,
|
|
9368
10777
|
{
|
|
9369
10778
|
prompt,
|
|
9370
10779
|
schema: AttachmentInterpretationSchema,
|
|
9371
|
-
maxTokens:
|
|
10780
|
+
maxTokens: budget.maxTokens,
|
|
9372
10781
|
providerOptions: buildAttachmentProviderOptions(attachment, providerOptions)
|
|
9373
10782
|
},
|
|
9374
10783
|
{
|
|
@@ -9405,10 +10814,19 @@ ${item.text}`).join("\n\n");
|
|
|
9405
10814
|
function shouldRetrieveForClassification(classification) {
|
|
9406
10815
|
return classification.requiresDocumentLookup || classification.requiresChunkSearch;
|
|
9407
10816
|
}
|
|
10817
|
+
function resolveQueryRetrievalMode(params) {
|
|
10818
|
+
const requestedMode = params.inputMode ?? params.configMode ?? params.classificationMode;
|
|
10819
|
+
if (requestedMode) return requestedMode;
|
|
10820
|
+
return params.supportsSourceRetrieval ? "hybrid" : "graph_only";
|
|
10821
|
+
}
|
|
9408
10822
|
function buildInitialQueryWorkflowPlan(params) {
|
|
9409
10823
|
const { classification, attachmentEvidence } = params;
|
|
9410
10824
|
const actions = [];
|
|
9411
10825
|
const shouldRetrieve = shouldRetrieveForClassification(classification);
|
|
10826
|
+
const retrievalMode = params.retrievalMode ?? resolveQueryRetrievalMode({
|
|
10827
|
+
classificationMode: classification.retrievalMode,
|
|
10828
|
+
supportsSourceRetrieval: !!params.supportsSourceRetrieval
|
|
10829
|
+
});
|
|
9412
10830
|
if (shouldRetrieve) {
|
|
9413
10831
|
actions.push({
|
|
9414
10832
|
type: "retrieve",
|
|
@@ -9431,7 +10849,7 @@ function buildInitialQueryWorkflowPlan(params) {
|
|
|
9431
10849
|
reason: "compose final response"
|
|
9432
10850
|
}
|
|
9433
10851
|
);
|
|
9434
|
-
return { actions, shouldRetrieve };
|
|
10852
|
+
return { actions, shouldRetrieve, retrievalMode };
|
|
9435
10853
|
}
|
|
9436
10854
|
function getWorkflowAction(plan, type) {
|
|
9437
10855
|
return plan.actions.find((action) => action.type === type);
|
|
@@ -9444,14 +10862,18 @@ function createQueryAgent(config) {
|
|
|
9444
10862
|
generateObject,
|
|
9445
10863
|
documentStore,
|
|
9446
10864
|
memoryStore,
|
|
10865
|
+
sourceRetriever,
|
|
9447
10866
|
concurrency = 3,
|
|
9448
10867
|
maxVerifyRounds = 1,
|
|
9449
10868
|
retrievalLimit = 10,
|
|
10869
|
+
retrievalMode: configRetrievalMode,
|
|
9450
10870
|
onTokenUsage,
|
|
9451
10871
|
onProgress,
|
|
9452
10872
|
log,
|
|
9453
10873
|
providerOptions,
|
|
9454
|
-
qualityGate = "warn"
|
|
10874
|
+
qualityGate = "warn",
|
|
10875
|
+
modelCapabilities,
|
|
10876
|
+
modelBudgetConstraints
|
|
9455
10877
|
} = config;
|
|
9456
10878
|
const limit = pLimit(concurrency);
|
|
9457
10879
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -9462,6 +10884,14 @@ function createQueryAgent(config) {
|
|
|
9462
10884
|
onTokenUsage?.(usage);
|
|
9463
10885
|
}
|
|
9464
10886
|
}
|
|
10887
|
+
function resolveBudget(taskKind, hintTokens) {
|
|
10888
|
+
return resolveModelBudget({
|
|
10889
|
+
taskKind,
|
|
10890
|
+
hintTokens,
|
|
10891
|
+
modelCapabilities,
|
|
10892
|
+
constraint: modelBudgetConstraints?.[taskKind]
|
|
10893
|
+
});
|
|
10894
|
+
}
|
|
9465
10895
|
async function query(input) {
|
|
9466
10896
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
9467
10897
|
const { question, conversationId, context, attachments } = input;
|
|
@@ -9474,6 +10904,8 @@ function createQueryAgent(config) {
|
|
|
9474
10904
|
question,
|
|
9475
10905
|
generateObject,
|
|
9476
10906
|
providerOptions,
|
|
10907
|
+
modelCapabilities,
|
|
10908
|
+
modelBudgetConstraints,
|
|
9477
10909
|
log,
|
|
9478
10910
|
onUsage: trackUsage
|
|
9479
10911
|
});
|
|
@@ -9481,13 +10913,26 @@ function createQueryAgent(config) {
|
|
|
9481
10913
|
onProgress?.("Classifying query...");
|
|
9482
10914
|
const classification = await classify(question, conversationId, attachmentContext);
|
|
9483
10915
|
await pipelineCtx.save("classify", { classification, attachmentEvidence });
|
|
10916
|
+
const effectiveRetrievalMode = resolveQueryRetrievalMode({
|
|
10917
|
+
inputMode: input.retrievalMode,
|
|
10918
|
+
configMode: configRetrievalMode,
|
|
10919
|
+
classificationMode: classification.retrievalMode,
|
|
10920
|
+
supportsSourceRetrieval: !!sourceRetriever
|
|
10921
|
+
});
|
|
9484
10922
|
const retrieverConfig = {
|
|
9485
10923
|
documentStore,
|
|
9486
10924
|
memoryStore,
|
|
10925
|
+
sourceRetriever,
|
|
9487
10926
|
retrievalLimit,
|
|
10927
|
+
retrievalMode: effectiveRetrievalMode,
|
|
9488
10928
|
log
|
|
9489
10929
|
};
|
|
9490
|
-
const workflowPlan = buildInitialQueryWorkflowPlan({
|
|
10930
|
+
const workflowPlan = buildInitialQueryWorkflowPlan({
|
|
10931
|
+
classification,
|
|
10932
|
+
attachmentEvidence,
|
|
10933
|
+
retrievalMode: effectiveRetrievalMode,
|
|
10934
|
+
supportsSourceRetrieval: !!sourceRetriever
|
|
10935
|
+
});
|
|
9491
10936
|
const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
|
|
9492
10937
|
const reasonAction = getWorkflowAction(workflowPlan, "reason");
|
|
9493
10938
|
await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
|
|
@@ -9502,7 +10947,7 @@ function createQueryAgent(config) {
|
|
|
9502
10947
|
const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
|
|
9503
10948
|
await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
|
|
9504
10949
|
onProgress?.("Reasoning over evidence...");
|
|
9505
|
-
const reasonerConfig = { generateObject, providerOptions };
|
|
10950
|
+
const reasonerConfig = { generateObject, providerOptions, modelCapabilities, modelBudgetConstraints };
|
|
9506
10951
|
const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
|
|
9507
10952
|
const reasonResults = await Promise.allSettled(
|
|
9508
10953
|
subQuestionsToReason.map(
|
|
@@ -9537,7 +10982,7 @@ function createQueryAgent(config) {
|
|
|
9537
10982
|
}
|
|
9538
10983
|
await pipelineCtx.save("reason", { classification, attachmentEvidence, evidence: allEvidence, subAnswers });
|
|
9539
10984
|
onProgress?.("Verifying answer grounding...");
|
|
9540
|
-
const verifierConfig = { generateObject, providerOptions };
|
|
10985
|
+
const verifierConfig = { generateObject, providerOptions, modelCapabilities, modelBudgetConstraints };
|
|
9541
10986
|
const verifyRounds = [];
|
|
9542
10987
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
9543
10988
|
const { result: verifyResult, usage } = await safeVerify(
|
|
@@ -9663,12 +11108,13 @@ function createQueryAgent(config) {
|
|
|
9663
11108
|
}
|
|
9664
11109
|
}
|
|
9665
11110
|
const prompt = buildQueryClassifyPrompt(question, conversationContext, attachmentContext);
|
|
11111
|
+
const budget = resolveBudget("query_classify", 2048);
|
|
9666
11112
|
const { object, usage } = await safeGenerateObject(
|
|
9667
11113
|
generateObject,
|
|
9668
11114
|
{
|
|
9669
11115
|
prompt,
|
|
9670
11116
|
schema: QueryClassifyResultSchema,
|
|
9671
|
-
maxTokens:
|
|
11117
|
+
maxTokens: budget.maxTokens,
|
|
9672
11118
|
providerOptions
|
|
9673
11119
|
},
|
|
9674
11120
|
{
|
|
@@ -9682,7 +11128,8 @@ function createQueryAgent(config) {
|
|
|
9682
11128
|
],
|
|
9683
11129
|
requiresDocumentLookup: true,
|
|
9684
11130
|
requiresChunkSearch: true,
|
|
9685
|
-
requiresConversationHistory: !!conversationId
|
|
11131
|
+
requiresConversationHistory: !!conversationId,
|
|
11132
|
+
retrievalMode: sourceRetriever ? "hybrid" : "graph_only"
|
|
9686
11133
|
},
|
|
9687
11134
|
log,
|
|
9688
11135
|
onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
|
|
@@ -9712,12 +11159,13 @@ function createQueryAgent(config) {
|
|
|
9712
11159
|
2
|
|
9713
11160
|
);
|
|
9714
11161
|
const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
|
|
11162
|
+
const budget = resolveBudget("query_respond", 4096);
|
|
9715
11163
|
const { object, usage } = await safeGenerateObject(
|
|
9716
11164
|
generateObject,
|
|
9717
11165
|
{
|
|
9718
11166
|
prompt,
|
|
9719
11167
|
schema: QueryResultSchema,
|
|
9720
|
-
maxTokens:
|
|
11168
|
+
maxTokens: budget.maxTokens,
|
|
9721
11169
|
providerOptions
|
|
9722
11170
|
},
|
|
9723
11171
|
{
|
|
@@ -9740,6 +11188,673 @@ ${sa.answer}`).join("\n\n"),
|
|
|
9740
11188
|
return { query };
|
|
9741
11189
|
}
|
|
9742
11190
|
|
|
11191
|
+
// src/pce/index.ts
|
|
11192
|
+
import { z as z43 } from "zod";
|
|
11193
|
+
|
|
11194
|
+
// src/prompts/pce/index.ts
|
|
11195
|
+
function buildPceNormalizePrompt(input) {
|
|
11196
|
+
const evidence = input.evidenceSources.map(
|
|
11197
|
+
(source) => `- ${source.id}${source.label ? ` (${source.label})` : ""}: ${source.text.slice(0, 1200)}`
|
|
11198
|
+
).join("\n");
|
|
11199
|
+
return [
|
|
11200
|
+
"Normalize this policy change endorsement request into atomic change items.",
|
|
11201
|
+
"Use beforeValue only when the existing value is explicitly quoted in the provided evidence.",
|
|
11202
|
+
"Every beforeValue must include a citation with sourceId and exact quote.",
|
|
11203
|
+
"Ask missing-info questions for required details that are absent.",
|
|
11204
|
+
"",
|
|
11205
|
+
`Request:
|
|
11206
|
+
${input.requestText}`,
|
|
11207
|
+
"",
|
|
11208
|
+
`Evidence:
|
|
11209
|
+
${evidence || "(none provided)"}`
|
|
11210
|
+
].join("\n");
|
|
11211
|
+
}
|
|
11212
|
+
function buildPceReplyPrompt(input) {
|
|
11213
|
+
return [
|
|
11214
|
+
"Map this reply to the open missing-info questions.",
|
|
11215
|
+
"Return concise answers only for questions that are directly answered.",
|
|
11216
|
+
"",
|
|
11217
|
+
`Reply:
|
|
11218
|
+
${input.replyText}`,
|
|
11219
|
+
"",
|
|
11220
|
+
`Open questions:
|
|
11221
|
+
${input.openQuestions.map((question) => `- ${question.id}${question.fieldPath ? ` (${question.fieldPath})` : ""}: ${question.question}`).join("\n")}`
|
|
11222
|
+
].join("\n");
|
|
11223
|
+
}
|
|
11224
|
+
|
|
11225
|
+
// src/pce/index.ts
|
|
11226
|
+
var ReplyAnswersSchema = z43.object({
|
|
11227
|
+
answers: z43.array(z43.object({
|
|
11228
|
+
questionId: z43.string().optional(),
|
|
11229
|
+
fieldPath: z43.string().optional(),
|
|
11230
|
+
answer: z43.string()
|
|
11231
|
+
}))
|
|
11232
|
+
});
|
|
11233
|
+
function createPceAgent(config = {}) {
|
|
11234
|
+
const now = config.now ?? Date.now;
|
|
11235
|
+
let tokenUsage = { inputTokens: 0, outputTokens: 0 };
|
|
11236
|
+
const cases = /* @__PURE__ */ new Map();
|
|
11237
|
+
function trackUsage(usage) {
|
|
11238
|
+
if (!usage) return;
|
|
11239
|
+
tokenUsage.inputTokens += usage.inputTokens;
|
|
11240
|
+
tokenUsage.outputTokens += usage.outputTokens;
|
|
11241
|
+
config.onTokenUsage?.(usage);
|
|
11242
|
+
}
|
|
11243
|
+
function resolveBudget(taskKind, hintTokens) {
|
|
11244
|
+
return resolveModelBudget({
|
|
11245
|
+
taskKind,
|
|
11246
|
+
hintTokens,
|
|
11247
|
+
modelCapabilities: config.modelCapabilities,
|
|
11248
|
+
constraint: config.modelBudgetConstraints?.[taskKind]
|
|
11249
|
+
});
|
|
11250
|
+
}
|
|
11251
|
+
async function processChangeRequest(input) {
|
|
11252
|
+
tokenUsage = { inputTokens: 0, outputTokens: 0 };
|
|
11253
|
+
const evidenceSources = await collectPceEvidenceSources(input, config);
|
|
11254
|
+
const fallback = heuristicNormalize(input.requestText, evidenceSources);
|
|
11255
|
+
let normalized = fallback;
|
|
11256
|
+
if (config.generateObject) {
|
|
11257
|
+
const budget = resolveBudget("pce_impact_analysis", 2500);
|
|
11258
|
+
const result = await safeGenerateObject(
|
|
11259
|
+
config.generateObject,
|
|
11260
|
+
{
|
|
11261
|
+
prompt: buildPceNormalizePrompt({ requestText: input.requestText, evidenceSources }),
|
|
11262
|
+
schema: PceNormalizationResultSchema,
|
|
11263
|
+
maxTokens: budget.maxTokens,
|
|
11264
|
+
providerOptions: config.providerOptions
|
|
11265
|
+
},
|
|
11266
|
+
{ fallback, maxRetries: 1, log: config.log }
|
|
11267
|
+
);
|
|
11268
|
+
normalized = PceNormalizationResultSchema.parse(result.object);
|
|
11269
|
+
trackUsage(result.usage);
|
|
11270
|
+
}
|
|
11271
|
+
const createdAt = now();
|
|
11272
|
+
const items = normalized.items.map((item) => finalizeItem(item, input.requestText));
|
|
11273
|
+
const missingInfoQuestions = normalized.missingInfoQuestions.map((question) => {
|
|
11274
|
+
const itemId = question.itemId ?? items.find((item) => item.fieldPath === question.fieldPath)?.id;
|
|
11275
|
+
return {
|
|
11276
|
+
...question,
|
|
11277
|
+
itemId,
|
|
11278
|
+
id: question.id ?? stableCaseId("question", [itemId, question.fieldPath, question.question])
|
|
11279
|
+
};
|
|
11280
|
+
});
|
|
11281
|
+
const validationIssues = validatePceItems(items, evidenceSources);
|
|
11282
|
+
const impacts = buildPolicyChangeImpacts(items, evidenceSources);
|
|
11283
|
+
const executionMode = selectPceExecutionMode({
|
|
11284
|
+
requestedMode: input.executionMode ?? config.executionMode,
|
|
11285
|
+
requestText: input.requestText,
|
|
11286
|
+
items,
|
|
11287
|
+
impacts,
|
|
11288
|
+
evidenceSources,
|
|
11289
|
+
validationIssues,
|
|
11290
|
+
missingInfoQuestions
|
|
11291
|
+
});
|
|
11292
|
+
const state = {
|
|
11293
|
+
id: input.caseId ?? stableCaseId("pce", [input.requestText, evidenceSources.map((source) => source.id)]),
|
|
11294
|
+
requestText: input.requestText,
|
|
11295
|
+
summary: normalized.summary || summarizeItems(items),
|
|
11296
|
+
executionMode,
|
|
11297
|
+
items,
|
|
11298
|
+
impacts,
|
|
11299
|
+
evidenceSources,
|
|
11300
|
+
validationIssues,
|
|
11301
|
+
missingInfoQuestions,
|
|
11302
|
+
createdAt,
|
|
11303
|
+
updatedAt: createdAt
|
|
11304
|
+
};
|
|
11305
|
+
cases.set(state.id, state);
|
|
11306
|
+
return { state, tokenUsage };
|
|
11307
|
+
}
|
|
11308
|
+
async function processReply2(input) {
|
|
11309
|
+
tokenUsage = { inputTokens: 0, outputTokens: 0 };
|
|
11310
|
+
let answers = heuristicParseAnswers(
|
|
11311
|
+
input.replyText,
|
|
11312
|
+
input.state.missingInfoQuestions
|
|
11313
|
+
);
|
|
11314
|
+
if (config.generateObject && input.state.missingInfoQuestions.some((question) => !question.answer)) {
|
|
11315
|
+
const budget = resolveBudget("pce_reply_parse", 1e3);
|
|
11316
|
+
const result = await safeGenerateObject(
|
|
11317
|
+
config.generateObject,
|
|
11318
|
+
{
|
|
11319
|
+
prompt: buildPceReplyPrompt({
|
|
11320
|
+
replyText: input.replyText,
|
|
11321
|
+
openQuestions: input.state.missingInfoQuestions.filter((question) => !question.answer).map(({ id, question, fieldPath }) => ({ id, question, fieldPath }))
|
|
11322
|
+
}),
|
|
11323
|
+
schema: ReplyAnswersSchema,
|
|
11324
|
+
maxTokens: budget.maxTokens,
|
|
11325
|
+
providerOptions: config.providerOptions
|
|
11326
|
+
},
|
|
11327
|
+
{ fallback: { answers }, maxRetries: 1, log: config.log }
|
|
11328
|
+
);
|
|
11329
|
+
answers = ReplyAnswersSchema.parse(result.object).answers;
|
|
11330
|
+
trackUsage(result.usage);
|
|
11331
|
+
}
|
|
11332
|
+
const merged = mergeQuestionAnswers(input.state.missingInfoQuestions, answers);
|
|
11333
|
+
const items = applyMissingInfoAnswers(input.state.items, merged.questions);
|
|
11334
|
+
const validationIssues = validatePceItems(items, input.state.evidenceSources);
|
|
11335
|
+
const impacts = buildPolicyChangeImpacts(items, input.state.evidenceSources);
|
|
11336
|
+
const executionMode = selectPceExecutionMode({
|
|
11337
|
+
requestedMode: config.executionMode,
|
|
11338
|
+
requestText: input.state.requestText,
|
|
11339
|
+
items,
|
|
11340
|
+
impacts,
|
|
11341
|
+
evidenceSources: input.state.evidenceSources,
|
|
11342
|
+
validationIssues,
|
|
11343
|
+
missingInfoQuestions: merged.questions
|
|
11344
|
+
});
|
|
11345
|
+
const state = {
|
|
11346
|
+
...input.state,
|
|
11347
|
+
executionMode,
|
|
11348
|
+
items,
|
|
11349
|
+
impacts,
|
|
11350
|
+
validationIssues,
|
|
11351
|
+
missingInfoQuestions: merged.questions,
|
|
11352
|
+
updatedAt: now()
|
|
11353
|
+
};
|
|
11354
|
+
cases.set(state.id, state);
|
|
11355
|
+
return { state, answersMerged: merged.answeredCount, tokenUsage };
|
|
11356
|
+
}
|
|
11357
|
+
function generateSubmissionPacket(input) {
|
|
11358
|
+
const state = typeof input === "string" ? cases.get(input) : input.state;
|
|
11359
|
+
if (!state) {
|
|
11360
|
+
throw new Error(`Policy change case ${String(input)} not found`);
|
|
11361
|
+
}
|
|
11362
|
+
return buildPceSubmissionPacket(state, now());
|
|
11363
|
+
}
|
|
11364
|
+
return { processChangeRequest, processReply: processReply2, generateSubmissionPacket };
|
|
11365
|
+
}
|
|
11366
|
+
function applyMissingInfoAnswers(items, questions) {
|
|
11367
|
+
return items.map((item) => {
|
|
11368
|
+
const answers = questions.filter(
|
|
11369
|
+
(question) => question.answer?.trim() && (question.itemId === item.id || !question.itemId && question.fieldPath === item.fieldPath)
|
|
11370
|
+
);
|
|
11371
|
+
if (answers.length === 0) return item;
|
|
11372
|
+
const answer = answers[answers.length - 1].answer.trim();
|
|
11373
|
+
return {
|
|
11374
|
+
...item,
|
|
11375
|
+
afterValue: item.afterValue ?? answer,
|
|
11376
|
+
requestedValue: item.requestedValue ?? answer,
|
|
11377
|
+
status: item.status === "needs_info" ? "ready" : item.status,
|
|
11378
|
+
userSourceSpanIds: item.userSourceSpanIds ?? []
|
|
11379
|
+
};
|
|
11380
|
+
});
|
|
11381
|
+
}
|
|
11382
|
+
async function collectPceEvidenceSources(input, config) {
|
|
11383
|
+
const provided = input.evidenceSources ?? [];
|
|
11384
|
+
if (!config?.sourceRetriever) return provided;
|
|
11385
|
+
try {
|
|
11386
|
+
const results = await config.sourceRetriever.searchSourceSpans({
|
|
11387
|
+
question: input.requestText,
|
|
11388
|
+
limit: config.retrievalLimit ?? 8,
|
|
11389
|
+
mode: "hybrid"
|
|
11390
|
+
});
|
|
11391
|
+
const retrieved = results.map((result) => ({
|
|
11392
|
+
id: result.span.id,
|
|
11393
|
+
label: result.span.formNumber ?? result.span.sectionId ?? result.span.sourceKind,
|
|
11394
|
+
documentId: result.span.documentId,
|
|
11395
|
+
page: result.span.pageStart ?? result.span.location?.page,
|
|
11396
|
+
fieldPath: result.span.sectionId ?? result.span.location?.fieldPath,
|
|
11397
|
+
text: result.span.text,
|
|
11398
|
+
metadata: {
|
|
11399
|
+
...result.span.metadata,
|
|
11400
|
+
relevance: String(result.relevance),
|
|
11401
|
+
sourceKind: result.span.sourceKind ?? result.span.kind
|
|
11402
|
+
}
|
|
11403
|
+
}));
|
|
11404
|
+
return dedupeEvidenceSources([...provided, ...retrieved]);
|
|
11405
|
+
} catch (error) {
|
|
11406
|
+
await config.log?.(`PCE source evidence retrieval failed: ${error}`);
|
|
11407
|
+
return provided;
|
|
11408
|
+
}
|
|
11409
|
+
}
|
|
11410
|
+
function stablePolicyChangeItemId(item) {
|
|
11411
|
+
return stableCaseId("pci", [
|
|
11412
|
+
item.affectedPolicyId,
|
|
11413
|
+
item.kind,
|
|
11414
|
+
item.fieldPath,
|
|
11415
|
+
item.afterValue ?? item.requestedValue ?? "",
|
|
11416
|
+
item.sourceSpanIds?.join("|") ?? ""
|
|
11417
|
+
]);
|
|
11418
|
+
}
|
|
11419
|
+
function validatePceItems(items, sources) {
|
|
11420
|
+
return items.flatMap((item) => {
|
|
11421
|
+
const issues = [];
|
|
11422
|
+
const citation = firstCitationForValue(item.citations, item.beforeValue);
|
|
11423
|
+
issues.push(...validateQuotedEvidence({
|
|
11424
|
+
itemId: item.id,
|
|
11425
|
+
fieldPath: `${item.fieldPath}.beforeValue`,
|
|
11426
|
+
quote: item.beforeValue,
|
|
11427
|
+
citation,
|
|
11428
|
+
sources
|
|
11429
|
+
}));
|
|
11430
|
+
if (item.beforeValue?.trim() && item.sourceSpanIds.length === 0 && item.sourceIds.length === 0) {
|
|
11431
|
+
issues.push({
|
|
11432
|
+
code: "existing_value_missing_source_span",
|
|
11433
|
+
severity: "blocking",
|
|
11434
|
+
message: `Existing value for ${item.fieldPath} is missing source span evidence.`,
|
|
11435
|
+
itemId: item.id,
|
|
11436
|
+
fieldPath: item.fieldPath
|
|
11437
|
+
});
|
|
11438
|
+
}
|
|
11439
|
+
if (item.status === "needs_info" || !item.afterValue?.trim() && !item.requestedValue?.trim() && item.action !== "remove") {
|
|
11440
|
+
issues.push({
|
|
11441
|
+
code: "required_value_missing",
|
|
11442
|
+
severity: "blocking",
|
|
11443
|
+
message: `Requested value for ${item.fieldPath} is missing.`,
|
|
11444
|
+
itemId: item.id,
|
|
11445
|
+
fieldPath: item.fieldPath
|
|
11446
|
+
});
|
|
11447
|
+
}
|
|
11448
|
+
if (item.kind === "coverage_change" && item.action !== "add" && item.sourceSpanIds.length === 0 && item.sourceIds.length === 0) {
|
|
11449
|
+
issues.push({
|
|
11450
|
+
code: "coverage_source_missing",
|
|
11451
|
+
severity: "blocking",
|
|
11452
|
+
message: `Coverage change for ${item.fieldPath} is not linked to existing coverage evidence.`,
|
|
11453
|
+
itemId: item.id,
|
|
11454
|
+
fieldPath: item.fieldPath
|
|
11455
|
+
});
|
|
11456
|
+
}
|
|
11457
|
+
const effectiveDateIssue = validateEffectiveDate(item, sources);
|
|
11458
|
+
if (effectiveDateIssue) issues.push(effectiveDateIssue);
|
|
11459
|
+
const endorsementConflict = findEndorsementConflict(item, sources);
|
|
11460
|
+
if (endorsementConflict) issues.push(endorsementConflict);
|
|
11461
|
+
if ((item.kind === "cancellation" || item.kind === "nonrenewal") && (!item.effectiveDate || item.sourceSpanIds.length === 0)) {
|
|
11462
|
+
issues.push({
|
|
11463
|
+
code: "notice_rule_ambiguous",
|
|
11464
|
+
severity: "blocking",
|
|
11465
|
+
message: `${item.kind} request needs an effective date and source-backed notice/timing terms.`,
|
|
11466
|
+
itemId: item.id,
|
|
11467
|
+
fieldPath: item.fieldPath
|
|
11468
|
+
});
|
|
11469
|
+
}
|
|
11470
|
+
if (item.kind === "certificate_endorsement_request" && !hasCertificateRequirementDetails(item)) {
|
|
11471
|
+
issues.push({
|
|
11472
|
+
code: "certificate_details_missing",
|
|
11473
|
+
severity: "blocking",
|
|
11474
|
+
message: "Certificate-driven endorsement request is missing holder or requirement details.",
|
|
11475
|
+
itemId: item.id,
|
|
11476
|
+
fieldPath: item.fieldPath
|
|
11477
|
+
});
|
|
11478
|
+
}
|
|
11479
|
+
return dedupeValidationIssues(issues);
|
|
11480
|
+
});
|
|
11481
|
+
}
|
|
11482
|
+
function buildPolicyChangeImpacts(items, sources) {
|
|
11483
|
+
return items.map((item) => {
|
|
11484
|
+
const citedSources = sources.filter((source) => item.sourceSpanIds.includes(source.id) || item.sourceIds.includes(source.id));
|
|
11485
|
+
return {
|
|
11486
|
+
itemId: item.id,
|
|
11487
|
+
beforeValue: item.beforeValue,
|
|
11488
|
+
requestedValue: item.requestedValue ?? item.afterValue,
|
|
11489
|
+
likelyEndorsementRequired: item.kind !== "renewal_submission_update",
|
|
11490
|
+
carrierApprovalLikelyRequired: item.kind !== "certificate_endorsement_request",
|
|
11491
|
+
affectedCoverageForms: Array.from(new Set(
|
|
11492
|
+
citedSources.map((source) => source.metadata?.formNumber ?? source.label).filter((value) => !!value)
|
|
11493
|
+
)).sort(),
|
|
11494
|
+
sourceSpanIds: Array.from(/* @__PURE__ */ new Set([...item.sourceSpanIds, ...item.sourceIds])).sort()
|
|
11495
|
+
};
|
|
11496
|
+
});
|
|
11497
|
+
}
|
|
11498
|
+
function selectPceExecutionMode(params) {
|
|
11499
|
+
if (params.requestedMode && params.requestedMode !== "auto") {
|
|
11500
|
+
return params.requestedMode;
|
|
11501
|
+
}
|
|
11502
|
+
if (params.validationIssues.some((issue) => issue.severity === "blocking")) {
|
|
11503
|
+
return "hybrid";
|
|
11504
|
+
}
|
|
11505
|
+
if (hasConflictingEvidence(params.evidenceSources)) {
|
|
11506
|
+
return "hybrid";
|
|
11507
|
+
}
|
|
11508
|
+
if (hasAmbiguousCancellationOrNonrenewal(params.requestText, params.items)) {
|
|
11509
|
+
return "hybrid";
|
|
11510
|
+
}
|
|
11511
|
+
if (hasUnclearCertificateRequest(params.items, params.missingInfoQuestions ?? [])) {
|
|
11512
|
+
return "hybrid";
|
|
11513
|
+
}
|
|
11514
|
+
if (hasMultiFormFinancialChange(params.items, params.impacts)) {
|
|
11515
|
+
return "market_eval";
|
|
11516
|
+
}
|
|
11517
|
+
return "deterministic_tree";
|
|
11518
|
+
}
|
|
11519
|
+
function finalizeItem(item, requestText) {
|
|
11520
|
+
const status = item.status ?? (!item.afterValue && item.action !== "remove" ? "needs_info" : "ready");
|
|
11521
|
+
const citations = item.citations ?? [];
|
|
11522
|
+
const sourceSpanIds = item.sourceSpanIds?.length ? item.sourceSpanIds : inferSourceIds(citations);
|
|
11523
|
+
const afterValue = item.afterValue ?? item.requestedValue;
|
|
11524
|
+
return {
|
|
11525
|
+
...item,
|
|
11526
|
+
kind: item.kind ?? inferChangeKind(item.fieldPath, requestText),
|
|
11527
|
+
affectedPolicyId: item.affectedPolicyId ?? "unknown",
|
|
11528
|
+
afterValue,
|
|
11529
|
+
requestedValue: item.requestedValue ?? afterValue,
|
|
11530
|
+
sourceSpanIds,
|
|
11531
|
+
userSourceSpanIds: item.userSourceSpanIds ?? [],
|
|
11532
|
+
id: item.id ?? stablePolicyChangeItemId({
|
|
11533
|
+
...item,
|
|
11534
|
+
kind: item.kind ?? inferChangeKind(item.fieldPath, requestText),
|
|
11535
|
+
affectedPolicyId: item.affectedPolicyId ?? "unknown",
|
|
11536
|
+
afterValue,
|
|
11537
|
+
requestedValue: item.requestedValue ?? afterValue,
|
|
11538
|
+
sourceSpanIds
|
|
11539
|
+
}),
|
|
11540
|
+
label: item.label || item.fieldPath,
|
|
11541
|
+
sourceIds: item.sourceIds ?? sourceSpanIds,
|
|
11542
|
+
citations,
|
|
11543
|
+
confidence: item.confidence ?? (requestText.length > 0 ? "medium" : "low"),
|
|
11544
|
+
confidenceScore: item.confidenceScore ?? (requestText.length > 0 ? 0.6 : 0.3),
|
|
11545
|
+
status
|
|
11546
|
+
};
|
|
11547
|
+
}
|
|
11548
|
+
function firstCitationForValue(citations, value) {
|
|
11549
|
+
if (!value) return void 0;
|
|
11550
|
+
return citations.find((citation) => citation.quote.trim() === value.trim()) ?? citations[0];
|
|
11551
|
+
}
|
|
11552
|
+
function inferSourceIds(citations) {
|
|
11553
|
+
return Array.from(new Set(citations.map((citation) => citation.sourceId))).sort();
|
|
11554
|
+
}
|
|
11555
|
+
function dedupeEvidenceSources(sources) {
|
|
11556
|
+
const byId = /* @__PURE__ */ new Map();
|
|
11557
|
+
for (const source of sources) {
|
|
11558
|
+
byId.set(source.id, source);
|
|
11559
|
+
}
|
|
11560
|
+
return [...byId.values()].sort((left, right) => left.id.localeCompare(right.id));
|
|
11561
|
+
}
|
|
11562
|
+
function hasConflictingEvidence(sources) {
|
|
11563
|
+
const signaturesByKey = /* @__PURE__ */ new Map();
|
|
11564
|
+
for (const source of sources) {
|
|
11565
|
+
const key = normalizeEvidenceConflictKey(source);
|
|
11566
|
+
if (!key) continue;
|
|
11567
|
+
const values = extractComparableEvidenceValues(source.text);
|
|
11568
|
+
if (values.length === 0) continue;
|
|
11569
|
+
const existing = signaturesByKey.get(key) ?? /* @__PURE__ */ new Set();
|
|
11570
|
+
existing.add(values.sort().join("|"));
|
|
11571
|
+
signaturesByKey.set(key, existing);
|
|
11572
|
+
if (existing.size > 1) return true;
|
|
11573
|
+
}
|
|
11574
|
+
return false;
|
|
11575
|
+
}
|
|
11576
|
+
function normalizeEvidenceConflictKey(source) {
|
|
11577
|
+
const fieldPath = source.fieldPath ?? source.metadata?.fieldPath;
|
|
11578
|
+
const formNumber = source.metadata?.formNumber;
|
|
11579
|
+
const key = fieldPath ? `${fieldPath}:${formNumber ?? "default"}` : source.label;
|
|
11580
|
+
return key?.replace(/\s+/g, " ").trim().toLowerCase();
|
|
11581
|
+
}
|
|
11582
|
+
function extractComparableEvidenceValues(text) {
|
|
11583
|
+
const values = /* @__PURE__ */ new Set();
|
|
11584
|
+
for (const match of text.matchAll(/\$?\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b%?/g)) {
|
|
11585
|
+
values.add(match[0].replace(/[$,%\s]/g, ""));
|
|
11586
|
+
}
|
|
11587
|
+
for (const match of text.matchAll(/\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b/g)) {
|
|
11588
|
+
values.add(match[0]);
|
|
11589
|
+
}
|
|
11590
|
+
return [...values].filter((value) => value.length > 0);
|
|
11591
|
+
}
|
|
11592
|
+
function hasAmbiguousCancellationOrNonrenewal(requestText, items) {
|
|
11593
|
+
const hasCancellationAction = items.some((item) => item.kind === "cancellation" || item.kind === "nonrenewal");
|
|
11594
|
+
if (!hasCancellationAction) return false;
|
|
11595
|
+
return /\b(if|unless|maybe|possibly|unsure|unclear|or|pending|conditional)\b/i.test(requestText);
|
|
11596
|
+
}
|
|
11597
|
+
function hasUnclearCertificateRequest(items, missingInfoQuestions) {
|
|
11598
|
+
return items.some(
|
|
11599
|
+
(item) => item.kind === "certificate_endorsement_request" && (item.status === "needs_info" || !item.afterValue?.trim() || item.confidence === "low" || item.sourceSpanIds.length === 0 || missingInfoQuestions.some((question) => question.itemId === item.id || question.fieldPath === item.fieldPath))
|
|
11600
|
+
);
|
|
11601
|
+
}
|
|
11602
|
+
function hasMultiFormFinancialChange(items, impacts) {
|
|
11603
|
+
const financialItemIds = new Set(items.filter((item) => item.kind === "limit_change" || item.kind === "deductible_change").map((item) => item.id));
|
|
11604
|
+
return impacts.some(
|
|
11605
|
+
(impact) => financialItemIds.has(impact.itemId) && (impact.affectedCoverageForms.length > 1 || impact.sourceSpanIds.length > 1)
|
|
11606
|
+
);
|
|
11607
|
+
}
|
|
11608
|
+
function validateEffectiveDate(item, sources) {
|
|
11609
|
+
if (!item.effectiveDate) return void 0;
|
|
11610
|
+
const requestedDate = parseDateValue(item.effectiveDate);
|
|
11611
|
+
if (!requestedDate) {
|
|
11612
|
+
return {
|
|
11613
|
+
code: "effective_date_unparseable",
|
|
11614
|
+
severity: "warning",
|
|
11615
|
+
message: `Requested effective date ${item.effectiveDate} could not be parsed.`,
|
|
11616
|
+
itemId: item.id,
|
|
11617
|
+
fieldPath: "effectiveDate"
|
|
11618
|
+
};
|
|
11619
|
+
}
|
|
11620
|
+
const period = findPolicyPeriod(sources);
|
|
11621
|
+
if (!period) return void 0;
|
|
11622
|
+
if (requestedDate < period.start || requestedDate > period.end) {
|
|
11623
|
+
return {
|
|
11624
|
+
code: "effective_date_outside_policy_period",
|
|
11625
|
+
severity: "blocking",
|
|
11626
|
+
message: `Requested effective date ${item.effectiveDate} is outside the cited policy period.`,
|
|
11627
|
+
itemId: item.id,
|
|
11628
|
+
fieldPath: "effectiveDate",
|
|
11629
|
+
sourceId: period.sourceId
|
|
11630
|
+
};
|
|
11631
|
+
}
|
|
11632
|
+
return void 0;
|
|
11633
|
+
}
|
|
11634
|
+
function findPolicyPeriod(sources) {
|
|
11635
|
+
for (const source of sources) {
|
|
11636
|
+
const metadataStart = source.metadata?.policyEffectiveDate ?? source.metadata?.policyStartDate;
|
|
11637
|
+
const metadataEnd = source.metadata?.policyExpirationDate ?? source.metadata?.policyEndDate;
|
|
11638
|
+
const start = metadataStart ? parseDateValue(metadataStart) : void 0;
|
|
11639
|
+
const end = metadataEnd ? parseDateValue(metadataEnd) : void 0;
|
|
11640
|
+
if (start && end) return { start, end, sourceId: source.id };
|
|
11641
|
+
const textPeriod = source.text.match(/\b(?:policy\s+period|effective)\b[^.\n]*?(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})\s*(?:to|-|through)\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})/i);
|
|
11642
|
+
const textStart = textPeriod?.[1] ? parseDateValue(textPeriod[1]) : void 0;
|
|
11643
|
+
const textEnd = textPeriod?.[2] ? parseDateValue(textPeriod[2]) : void 0;
|
|
11644
|
+
if (textStart && textEnd) return { start: textStart, end: textEnd, sourceId: source.id };
|
|
11645
|
+
}
|
|
11646
|
+
return void 0;
|
|
11647
|
+
}
|
|
11648
|
+
function parseDateValue(value) {
|
|
11649
|
+
const numeric = value.match(/^(\d{1,2})[/-](\d{1,2})[/-](\d{2}|\d{4})$/);
|
|
11650
|
+
if (!numeric) return void 0;
|
|
11651
|
+
const month = Number(numeric[1]);
|
|
11652
|
+
const day = Number(numeric[2]);
|
|
11653
|
+
const rawYear = Number(numeric[3]);
|
|
11654
|
+
const year = rawYear < 100 ? 2e3 + rawYear : rawYear;
|
|
11655
|
+
if (month < 1 || month > 12 || day < 1 || day > 31) return void 0;
|
|
11656
|
+
return Date.UTC(year, month - 1, day);
|
|
11657
|
+
}
|
|
11658
|
+
function findEndorsementConflict(item, sources) {
|
|
11659
|
+
const linkedSources = sources.filter((source) => item.sourceSpanIds.includes(source.id) || item.sourceIds.includes(source.id));
|
|
11660
|
+
const conflictSource = linkedSources.find(
|
|
11661
|
+
(source) => /\bendorsement\b/i.test(`${source.label ?? ""} ${source.text}`) && /\b(excludes|exclusion|prohibits|not\s+covered|no\s+coverage|must\s+not)\b/i.test(source.text)
|
|
11662
|
+
);
|
|
11663
|
+
if (!conflictSource) return void 0;
|
|
11664
|
+
return {
|
|
11665
|
+
code: "endorsement_conflict",
|
|
11666
|
+
severity: "blocking",
|
|
11667
|
+
message: `Existing endorsement source ${conflictSource.id} may conflict with the requested change.`,
|
|
11668
|
+
itemId: item.id,
|
|
11669
|
+
fieldPath: item.fieldPath,
|
|
11670
|
+
sourceId: conflictSource.id
|
|
11671
|
+
};
|
|
11672
|
+
}
|
|
11673
|
+
function hasCertificateRequirementDetails(item) {
|
|
11674
|
+
const text = `${item.label} ${item.afterValue ?? ""} ${item.requestedValue ?? ""} ${item.reason ?? ""}`.toLowerCase();
|
|
11675
|
+
const hasHolder = /\b(holder|certificate holder|additional insured|loss payee|lender|landlord)\b/.test(text);
|
|
11676
|
+
const hasRequirement = /\b(primary|non[- ]?contributory|waiver|subrogation|notice|endorsement|requirement|wording)\b/.test(text);
|
|
11677
|
+
return hasHolder && hasRequirement;
|
|
11678
|
+
}
|
|
11679
|
+
function dedupeValidationIssues(issues) {
|
|
11680
|
+
const seen = /* @__PURE__ */ new Set();
|
|
11681
|
+
return issues.filter((issue) => {
|
|
11682
|
+
const key = `${issue.code}:${issue.itemId ?? ""}:${issue.fieldPath ?? ""}:${issue.sourceId ?? ""}`;
|
|
11683
|
+
if (seen.has(key)) return false;
|
|
11684
|
+
seen.add(key);
|
|
11685
|
+
return true;
|
|
11686
|
+
});
|
|
11687
|
+
}
|
|
11688
|
+
function heuristicNormalize(requestText, evidenceSources) {
|
|
11689
|
+
const lower = requestText.toLowerCase();
|
|
11690
|
+
const action = lower.includes("remove") || lower.includes("delete") ? "remove" : lower.includes("add") ? "add" : "update";
|
|
11691
|
+
const effectiveDate = requestText.match(/\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b/)?.[0];
|
|
11692
|
+
const label = requestText.split(/[.;\n]/)[0]?.trim() || "Policy change";
|
|
11693
|
+
const quoted = Array.from(requestText.matchAll(/"([^"]+)"/g)).map((match) => match[1]);
|
|
11694
|
+
const beforeValue = quoted.find(
|
|
11695
|
+
(quote) => evidenceSources.some((source) => source.text.toLowerCase().includes(quote.toLowerCase()))
|
|
11696
|
+
);
|
|
11697
|
+
const citationSource = beforeValue ? evidenceSources.find((source) => source.text.toLowerCase().includes(beforeValue.toLowerCase())) : void 0;
|
|
11698
|
+
const result = {
|
|
11699
|
+
summary: label,
|
|
11700
|
+
items: [{
|
|
11701
|
+
action,
|
|
11702
|
+
kind: inferChangeKind(inferFieldPath(requestText), requestText),
|
|
11703
|
+
affectedPolicyId: evidenceSources.find((source) => source.documentId)?.documentId ?? "unknown",
|
|
11704
|
+
fieldPath: inferFieldPath(requestText),
|
|
11705
|
+
label,
|
|
11706
|
+
beforeValue,
|
|
11707
|
+
afterValue: inferAfterValue(requestText, beforeValue),
|
|
11708
|
+
requestedValue: inferAfterValue(requestText, beforeValue),
|
|
11709
|
+
effectiveDate,
|
|
11710
|
+
reason: void 0,
|
|
11711
|
+
sourceIds: citationSource ? [citationSource.id] : [],
|
|
11712
|
+
sourceSpanIds: citationSource ? [citationSource.id] : [],
|
|
11713
|
+
citations: beforeValue && citationSource ? [{
|
|
11714
|
+
sourceId: citationSource.id,
|
|
11715
|
+
quote: beforeValue,
|
|
11716
|
+
page: citationSource.page,
|
|
11717
|
+
fieldPath: citationSource.fieldPath
|
|
11718
|
+
}] : [],
|
|
11719
|
+
confidence: "low",
|
|
11720
|
+
confidenceScore: 0.45
|
|
11721
|
+
}],
|
|
11722
|
+
missingInfoQuestions: inferAfterValue(requestText, beforeValue) ? [] : [{
|
|
11723
|
+
fieldPath: inferFieldPath(requestText),
|
|
11724
|
+
question: "What new value should the carrier endorse for this change?",
|
|
11725
|
+
reason: "The request did not include a clear target value."
|
|
11726
|
+
}]
|
|
11727
|
+
};
|
|
11728
|
+
return result;
|
|
11729
|
+
}
|
|
11730
|
+
function inferChangeKind(fieldPath, requestText) {
|
|
11731
|
+
const lower = `${fieldPath} ${requestText}`.toLowerCase();
|
|
11732
|
+
if (lower.includes("additional insured")) return "additional_insured_change";
|
|
11733
|
+
if (lower.includes("named insured")) return "named_insured_change";
|
|
11734
|
+
if (lower.includes("limit")) return "limit_change";
|
|
11735
|
+
if (lower.includes("deductible")) return "deductible_change";
|
|
11736
|
+
if (lower.includes("location") || lower.includes("address")) return "location_change";
|
|
11737
|
+
if (lower.includes("vehicle") || lower.includes("auto")) return "vehicle_change";
|
|
11738
|
+
if (lower.includes("certificate") || lower.includes("holder")) return "certificate_endorsement_request";
|
|
11739
|
+
if (lower.includes("cancel")) return "cancellation";
|
|
11740
|
+
if (lower.includes("nonrenew")) return "nonrenewal";
|
|
11741
|
+
if (lower.includes("renewal") || lower.includes("submission")) return "renewal_submission_update";
|
|
11742
|
+
if (lower.includes("coverage")) return "coverage_change";
|
|
11743
|
+
return "general_endorsement";
|
|
11744
|
+
}
|
|
11745
|
+
function inferFieldPath(requestText) {
|
|
11746
|
+
const lower = requestText.toLowerCase();
|
|
11747
|
+
if (lower.includes("address")) return "insured.address";
|
|
11748
|
+
if (lower.includes("vehicle")) return "auto.vehicles";
|
|
11749
|
+
if (lower.includes("driver")) return "auto.drivers";
|
|
11750
|
+
if (lower.includes("limit")) return "coverage.limit";
|
|
11751
|
+
if (lower.includes("deductible")) return "coverage.deductible";
|
|
11752
|
+
return "policy.change";
|
|
11753
|
+
}
|
|
11754
|
+
function inferAfterValue(requestText, beforeValue) {
|
|
11755
|
+
const toMatch = requestText.match(/\bto\s+([^.;\n]+)/i)?.[1]?.trim();
|
|
11756
|
+
if (toMatch && toMatch !== beforeValue) return toMatch.replace(/^"|"$/g, "");
|
|
11757
|
+
const fromToMatch = requestText.match(/\bfrom\s+(.+?)\s+to\s+([^.;\n]+)/i)?.[2]?.trim();
|
|
11758
|
+
return fromToMatch?.replace(/^"|"$/g, "");
|
|
11759
|
+
}
|
|
11760
|
+
function heuristicParseAnswers(replyText, questions) {
|
|
11761
|
+
const unanswered = questions.filter((question) => !question.answer);
|
|
11762
|
+
if (unanswered.length !== 1 || !replyText.trim()) return [];
|
|
11763
|
+
return [{ questionId: unanswered[0].id, answer: replyText.trim() }];
|
|
11764
|
+
}
|
|
11765
|
+
function summarizeItems(items) {
|
|
11766
|
+
return items.map((item) => `${item.action} ${item.label}`).join("; ");
|
|
11767
|
+
}
|
|
11768
|
+
function buildPceSubmissionPacket(state, createdAt) {
|
|
11769
|
+
const citations = uniqueCitations(state.items.flatMap((item) => item.citations));
|
|
11770
|
+
const readyItems = state.items.filter((item) => item.status === "ready");
|
|
11771
|
+
const openQuestions = state.missingInfoQuestions.filter((question) => !question.answer);
|
|
11772
|
+
const artifacts = [
|
|
11773
|
+
{
|
|
11774
|
+
id: stableCaseId("artifact", [state.id, "underwriter_summary"]),
|
|
11775
|
+
kind: "underwriter_summary",
|
|
11776
|
+
title: "Underwriter summary",
|
|
11777
|
+
content: [
|
|
11778
|
+
state.summary,
|
|
11779
|
+
"",
|
|
11780
|
+
...state.items.map((item) => `- ${item.action.toUpperCase()} ${item.label}: ${item.beforeValue ?? "(not cited)"} -> ${item.afterValue ?? "(pending)"}`),
|
|
11781
|
+
"",
|
|
11782
|
+
"Impact analysis:",
|
|
11783
|
+
...state.impacts.map((impact) => `- ${impact.itemId}: endorsement=${impact.likelyEndorsementRequired ? "likely" : "not expected"}, carrierApproval=${impact.carrierApprovalLikelyRequired ? "likely" : "not expected"}`)
|
|
11784
|
+
].join("\n"),
|
|
11785
|
+
citations
|
|
11786
|
+
},
|
|
11787
|
+
{
|
|
11788
|
+
id: stableCaseId("artifact", [state.id, "carrier_email"]),
|
|
11789
|
+
kind: "carrier_email",
|
|
11790
|
+
title: "Carrier email",
|
|
11791
|
+
content: [
|
|
11792
|
+
"Please process the following policy change endorsement request:",
|
|
11793
|
+
"",
|
|
11794
|
+
...readyItems.map((item) => `- ${item.label}: ${item.afterValue ?? item.action}`)
|
|
11795
|
+
].join("\n"),
|
|
11796
|
+
citations
|
|
11797
|
+
},
|
|
11798
|
+
{
|
|
11799
|
+
id: stableCaseId("artifact", [state.id, "missing_info_request"]),
|
|
11800
|
+
kind: "missing_info_request",
|
|
11801
|
+
title: "Missing information request",
|
|
11802
|
+
content: openQuestions.length ? openQuestions.map((question) => `- ${question.question}`).join("\n") : "No missing information questions are open.",
|
|
11803
|
+
citations: []
|
|
11804
|
+
},
|
|
11805
|
+
{
|
|
11806
|
+
id: stableCaseId("artifact", [state.id, "json_packet"]),
|
|
11807
|
+
kind: "json_packet",
|
|
11808
|
+
title: "JSON packet",
|
|
11809
|
+
content: JSON.stringify({ caseId: state.id, items: state.items, impacts: state.impacts, evidenceSourceIds: state.evidenceSources.map((source) => source.id) }, null, 2),
|
|
11810
|
+
citations
|
|
11811
|
+
},
|
|
11812
|
+
{
|
|
11813
|
+
id: stableCaseId("artifact", [state.id, "validation_report"]),
|
|
11814
|
+
kind: "validation_report",
|
|
11815
|
+
title: "Validation report",
|
|
11816
|
+
content: state.validationIssues.length ? state.validationIssues.map((issue) => `- [${issue.severity}] ${issue.code}: ${issue.message}`).join("\n") : "No validation issues.",
|
|
11817
|
+
citations: []
|
|
11818
|
+
}
|
|
11819
|
+
];
|
|
11820
|
+
return {
|
|
11821
|
+
id: stableCaseId("packet", [state.id, state.updatedAt, state.items.map((item) => item.id)]),
|
|
11822
|
+
caseId: state.id,
|
|
11823
|
+
pceCase: state,
|
|
11824
|
+
artifacts,
|
|
11825
|
+
validationIssues: state.validationIssues,
|
|
11826
|
+
missingInfoQuestions: state.missingInfoQuestions,
|
|
11827
|
+
createdAt
|
|
11828
|
+
};
|
|
11829
|
+
}
|
|
11830
|
+
function uniqueCitations(citations) {
|
|
11831
|
+
const seen = /* @__PURE__ */ new Set();
|
|
11832
|
+
return citations.filter((citation) => {
|
|
11833
|
+
const key = `${citation.sourceId}:${citation.quote}:${citation.page ?? ""}:${citation.fieldPath ?? ""}`;
|
|
11834
|
+
if (seen.has(key)) return false;
|
|
11835
|
+
seen.add(key);
|
|
11836
|
+
return true;
|
|
11837
|
+
});
|
|
11838
|
+
}
|
|
11839
|
+
|
|
11840
|
+
// src/pce/quality.ts
|
|
11841
|
+
function buildPceQualityReport(state) {
|
|
11842
|
+
const blockingIssues = state.validationIssues.filter((issue) => issue.severity === "blocking").length;
|
|
11843
|
+
const warningIssues = state.validationIssues.filter((issue) => issue.severity === "warning").length;
|
|
11844
|
+
const missingInfoCount = state.missingInfoQuestions.filter((question) => !question.answer?.trim()).length;
|
|
11845
|
+
const ungroundedExistingValueCount = state.items.filter(
|
|
11846
|
+
(item) => item.beforeValue?.trim() && item.sourceSpanIds.length === 0
|
|
11847
|
+
).length;
|
|
11848
|
+
const qualityGateStatus = blockingIssues > 0 || ungroundedExistingValueCount > 0 ? "failed" : warningIssues > 0 || missingInfoCount > 0 ? "warning" : "passed";
|
|
11849
|
+
return {
|
|
11850
|
+
qualityGateStatus,
|
|
11851
|
+
blockingIssues,
|
|
11852
|
+
warningIssues,
|
|
11853
|
+
missingInfoCount,
|
|
11854
|
+
ungroundedExistingValueCount
|
|
11855
|
+
};
|
|
11856
|
+
}
|
|
11857
|
+
|
|
9743
11858
|
// src/prompts/intent.ts
|
|
9744
11859
|
function buildClassifyMessagePrompt(platform) {
|
|
9745
11860
|
const platformFields = {
|
|
@@ -9868,6 +11983,7 @@ export {
|
|
|
9868
11983
|
AcroFormMappingSchema,
|
|
9869
11984
|
AddressSchema,
|
|
9870
11985
|
AdmittedStatusSchema,
|
|
11986
|
+
AgenticExecutionModeSchema,
|
|
9871
11987
|
AnswerParsingResultSchema,
|
|
9872
11988
|
ApplicationClassifyResultSchema,
|
|
9873
11989
|
ApplicationEmailReviewSchema,
|
|
@@ -9894,6 +12010,15 @@ export {
|
|
|
9894
12010
|
COVERAGE_COMPARISON_TOOL,
|
|
9895
12011
|
COVERAGE_FORMS,
|
|
9896
12012
|
COVERAGE_TRIGGERS,
|
|
12013
|
+
CaseActionSchema,
|
|
12014
|
+
CaseCitationSchema,
|
|
12015
|
+
CaseEvidenceSourceSchema,
|
|
12016
|
+
CasePacketArtifactKindSchema,
|
|
12017
|
+
CasePacketArtifactSchema,
|
|
12018
|
+
CaseProposalSchema,
|
|
12019
|
+
CaseProposalScoreSchema,
|
|
12020
|
+
CaseSubmissionPacketSchema,
|
|
12021
|
+
CaseValidationIssueSchema,
|
|
9897
12022
|
ChunkTypeSchema,
|
|
9898
12023
|
CitationSchema,
|
|
9899
12024
|
ClaimRecordSchema,
|
|
@@ -9975,6 +12100,8 @@ export {
|
|
|
9975
12100
|
LookupRequestSchema,
|
|
9976
12101
|
LossSettlementSchema,
|
|
9977
12102
|
LossSummarySchema,
|
|
12103
|
+
MemorySourceStore,
|
|
12104
|
+
MissingInfoQuestionSchema,
|
|
9978
12105
|
NamedInsuredSchema,
|
|
9979
12106
|
PERSONAL_AUTO_USAGES,
|
|
9980
12107
|
PET_SPECIES,
|
|
@@ -9985,6 +12112,9 @@ export {
|
|
|
9985
12112
|
ParsedAnswerSchema,
|
|
9986
12113
|
PaymentInstallmentSchema,
|
|
9987
12114
|
PaymentPlanSchema,
|
|
12115
|
+
PceCaseStateSchema,
|
|
12116
|
+
PceNormalizationResultSchema,
|
|
12117
|
+
PceSubmissionPacketSchema,
|
|
9988
12118
|
PersonalArticlesDeclarationsSchema,
|
|
9989
12119
|
PersonalAutoDeclarationsSchema,
|
|
9990
12120
|
PersonalAutoUsageSchema,
|
|
@@ -9993,6 +12123,13 @@ export {
|
|
|
9993
12123
|
PetDeclarationsSchema,
|
|
9994
12124
|
PetSpeciesSchema,
|
|
9995
12125
|
PlatformSchema,
|
|
12126
|
+
PolicyChangeActionSchema,
|
|
12127
|
+
PolicyChangeConfidenceSchema,
|
|
12128
|
+
PolicyChangeImpactSchema,
|
|
12129
|
+
PolicyChangeItemSchema,
|
|
12130
|
+
PolicyChangeKindSchema,
|
|
12131
|
+
PolicyChangeRequestSchema,
|
|
12132
|
+
PolicyChangeStatusSchema,
|
|
9996
12133
|
PolicyConditionSchema,
|
|
9997
12134
|
PolicyDocumentSchema,
|
|
9998
12135
|
PolicySectionTypeSchema,
|
|
@@ -10007,6 +12144,7 @@ export {
|
|
|
10007
12144
|
QueryClassifyResultSchema,
|
|
10008
12145
|
QueryIntentSchema,
|
|
10009
12146
|
QueryResultSchema,
|
|
12147
|
+
QueryRetrievalModeSchema,
|
|
10010
12148
|
QuestionBatchResultSchema,
|
|
10011
12149
|
QuoteDocumentSchema,
|
|
10012
12150
|
QuoteSectionTypeSchema,
|
|
@@ -10025,6 +12163,13 @@ export {
|
|
|
10025
12163
|
ScheduledItemCategorySchema,
|
|
10026
12164
|
SectionSchema,
|
|
10027
12165
|
SharedLimitSchema,
|
|
12166
|
+
SourceChunkSchema,
|
|
12167
|
+
SourceKindSchema,
|
|
12168
|
+
SourceSpanBBoxSchema,
|
|
12169
|
+
SourceSpanKindSchema,
|
|
12170
|
+
SourceSpanLocationSchema,
|
|
12171
|
+
SourceSpanRefSchema,
|
|
12172
|
+
SourceSpanSchema,
|
|
10028
12173
|
SubAnswerSchema,
|
|
10029
12174
|
SubQuestionSchema,
|
|
10030
12175
|
SubjectivityCategorySchema,
|
|
@@ -10040,6 +12185,7 @@ export {
|
|
|
10040
12185
|
UnderwritingConditionSchema,
|
|
10041
12186
|
VALUATION_METHODS,
|
|
10042
12187
|
VEHICLE_COVERAGE_TYPES,
|
|
12188
|
+
ValidationIssueSeveritySchema,
|
|
10043
12189
|
ValuationMethodSchema,
|
|
10044
12190
|
VehicleCoverageSchema,
|
|
10045
12191
|
VehicleCoverageTypeSchema,
|
|
@@ -10064,6 +12210,11 @@ export {
|
|
|
10064
12210
|
buildIntentPrompt,
|
|
10065
12211
|
buildInterpretAttachmentPrompt,
|
|
10066
12212
|
buildLookupFillPrompt,
|
|
12213
|
+
buildPageSourceSpans,
|
|
12214
|
+
buildPceNormalizePrompt,
|
|
12215
|
+
buildPceQualityReport,
|
|
12216
|
+
buildPceReplyPrompt,
|
|
12217
|
+
buildPceSubmissionPacket,
|
|
10067
12218
|
buildPdfProviderOptions,
|
|
10068
12219
|
buildQueryClassifyPrompt,
|
|
10069
12220
|
buildQuestionBatchPrompt,
|
|
@@ -10072,28 +12223,54 @@ export {
|
|
|
10072
12223
|
buildReplyIntentClassificationPrompt,
|
|
10073
12224
|
buildRespondPrompt,
|
|
10074
12225
|
buildSafetyPrompt,
|
|
12226
|
+
buildSectionSourceSpans,
|
|
12227
|
+
buildSourceSpan,
|
|
12228
|
+
buildSourceSpanId,
|
|
12229
|
+
buildTextSourceSpans,
|
|
10075
12230
|
buildVerifyPrompt,
|
|
10076
12231
|
chunkDocument,
|
|
12232
|
+
chunkSourceSpans,
|
|
12233
|
+
collectPceEvidenceSources,
|
|
12234
|
+
compareSourceEvidence,
|
|
10077
12235
|
createApplicationPipeline,
|
|
10078
12236
|
createExtractor,
|
|
12237
|
+
createPceAgent,
|
|
10079
12238
|
createPipelineContext,
|
|
10080
12239
|
createQueryAgent,
|
|
12240
|
+
evaluateCaseProposals,
|
|
12241
|
+
evidenceContainsQuote,
|
|
10081
12242
|
extractPageRange,
|
|
10082
12243
|
fillAcroForm,
|
|
12244
|
+
generateNextMessage,
|
|
10083
12245
|
getAcroFormFields,
|
|
10084
12246
|
getExtractor,
|
|
10085
12247
|
getFileIdentifier,
|
|
10086
12248
|
getPdfPageCount,
|
|
10087
12249
|
getTemplate,
|
|
10088
12250
|
isFileReference,
|
|
12251
|
+
mergeQuestionAnswers,
|
|
12252
|
+
normalizeForMatch,
|
|
12253
|
+
orderSourceEvidence,
|
|
10089
12254
|
overlayTextOnPdf,
|
|
10090
12255
|
pLimit,
|
|
10091
12256
|
pdfInputToBase64,
|
|
10092
12257
|
pdfInputToBytes,
|
|
12258
|
+
processReply,
|
|
12259
|
+
resolveModelBudget,
|
|
10093
12260
|
safeGenerateObject,
|
|
10094
12261
|
sanitizeNulls,
|
|
12262
|
+
scoreCaseProposal,
|
|
12263
|
+
selectPceExecutionMode,
|
|
12264
|
+
sourceSpanTextHash,
|
|
12265
|
+
stableCaseId,
|
|
12266
|
+
stableHash2 as stableHash,
|
|
12267
|
+
stablePolicyChangeItemId,
|
|
12268
|
+
stableStringify,
|
|
10095
12269
|
stripFences,
|
|
10096
12270
|
toStrictSchema,
|
|
12271
|
+
validateEvidence,
|
|
12272
|
+
validatePceItems,
|
|
12273
|
+
validateQuotedEvidence,
|
|
10097
12274
|
withRetry
|
|
10098
12275
|
};
|
|
10099
12276
|
//# sourceMappingURL=index.mjs.map
|