@claritylabs/cl-sdk 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -203,6 +203,72 @@ function createPipelineContext(opts) {
203
203
  };
204
204
  }
205
205
 
206
+ // src/core/model-budget.ts
207
+ function positiveInteger(value) {
208
+ return typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : void 0;
209
+ }
210
+ function resolveModelBudget(params) {
211
+ const { taskKind, modelCapabilities, constraint } = params;
212
+ const hintTokens = positiveInteger(params.hintTokens) ?? 4096;
213
+ const taskCapability = positiveInteger(modelCapabilities?.taskOutputTokens?.[taskKind]);
214
+ const longListCapability = taskKind === "extraction_long_list" ? positiveInteger(modelCapabilities?.longListOutputTokens) : void 0;
215
+ const defaultCapability = positiveInteger(modelCapabilities?.defaultOutputTokens);
216
+ const constrainedPreference = positiveInteger(constraint?.outputTokens);
217
+ const minOutputTokens = positiveInteger(constraint?.minOutputTokens);
218
+ const modelMaxOutputTokens = positiveInteger(modelCapabilities?.maxOutputTokens);
219
+ const providerMaxOutputTokens = positiveInteger(params.providerMaxOutputTokens);
220
+ const hardMaxOutputTokens = positiveInteger(constraint?.maxOutputTokens) ?? providerMaxOutputTokens;
221
+ const estimatedInputTokens = estimateTokens(params.inputContextBytes);
222
+ const schemaTokens = estimateTokens(params.schemaSizeBytes) ?? 0;
223
+ const expectedListLength = positiveInteger(params.expectedListLength) ?? 0;
224
+ const warnings = [];
225
+ let maxTokens = constrainedPreference ?? taskCapability ?? longListCapability ?? defaultCapability ?? hintTokens;
226
+ if (minOutputTokens) {
227
+ maxTokens = Math.max(maxTokens, minOutputTokens);
228
+ }
229
+ if (modelMaxOutputTokens) {
230
+ if (maxTokens > modelMaxOutputTokens) {
231
+ warnings.push(`Resolved ${taskKind} budget was capped by model max output tokens.`);
232
+ }
233
+ maxTokens = Math.min(maxTokens, modelMaxOutputTokens);
234
+ }
235
+ if (hardMaxOutputTokens) {
236
+ if (maxTokens > hardMaxOutputTokens) {
237
+ warnings.push(`Resolved ${taskKind} budget was capped by an explicit hard max output token constraint.`);
238
+ }
239
+ maxTokens = Math.min(maxTokens, hardMaxOutputTokens);
240
+ }
241
+ const expectedOutputFloor = expectedOutputTokensFloor(taskKind, schemaTokens, expectedListLength, hintTokens);
242
+ const outputTruncationRisk = maxTokens < expectedOutputFloor * 0.65 ? "high" : maxTokens < expectedOutputFloor ? "medium" : "low";
243
+ if (outputTruncationRisk !== "low") {
244
+ warnings.push(`Resolved ${taskKind} budget may be under-sized for the expected output shape.`);
245
+ }
246
+ const maxInputTokens = positiveInteger(modelCapabilities?.maxInputTokens);
247
+ if (estimatedInputTokens && maxInputTokens && estimatedInputTokens > maxInputTokens * 0.9) {
248
+ warnings.push(`Estimated ${taskKind} input context is close to or above the configured model input limit.`);
249
+ }
250
+ return {
251
+ taskKind,
252
+ maxTokens,
253
+ hintTokens,
254
+ modelMaxOutputTokens,
255
+ hardMaxOutputTokens,
256
+ estimatedInputTokens,
257
+ outputTruncationRisk,
258
+ warnings
259
+ };
260
+ }
261
+ function estimateTokens(bytes) {
262
+ const positive = positiveInteger(bytes);
263
+ if (!positive) return void 0;
264
+ return Math.ceil(positive / 4);
265
+ }
266
+ function expectedOutputTokensFloor(taskKind, schemaTokens, expectedListLength, hintTokens) {
267
+ const listMultiplier = taskKind === "extraction_long_list" ? 90 : 45;
268
+ const listFloor = expectedListLength > 0 ? expectedListLength * listMultiplier : 0;
269
+ return Math.max(Math.ceil(schemaTokens * 1.5), listFloor, Math.floor(hintTokens * 0.75));
270
+ }
271
+
206
272
  // src/schemas/enums.ts
207
273
  import { z as z2 } from "zod";
208
274
  var PolicyTypeSchema = z2.enum([
@@ -554,7 +620,10 @@ var CoverageSchema = z4.object({
554
620
  formNumber: z4.string().optional(),
555
621
  pageNumber: z4.number().optional(),
556
622
  sectionRef: z4.string().optional(),
557
- originalContent: z4.string().optional()
623
+ originalContent: z4.string().optional(),
624
+ recordId: z4.string().optional(),
625
+ sourceSpanIds: z4.array(z4.string()).optional(),
626
+ sourceTextHash: z4.string().optional()
558
627
  });
559
628
  var EnrichedCoverageSchema = z4.object({
560
629
  name: z4.string(),
@@ -578,7 +647,10 @@ var EnrichedCoverageSchema = z4.object({
578
647
  premium: z4.string().optional(),
579
648
  pageNumber: z4.number().optional(),
580
649
  sectionRef: z4.string().optional(),
581
- originalContent: z4.string().optional()
650
+ originalContent: z4.string().optional(),
651
+ recordId: z4.string().optional(),
652
+ sourceSpanIds: z4.array(z4.string()).optional(),
653
+ sourceTextHash: z4.string().optional()
582
654
  });
583
655
 
584
656
  // src/schemas/endorsement.ts
@@ -602,7 +674,10 @@ var EndorsementSchema = z5.object({
602
674
  premiumImpact: z5.string().optional(),
603
675
  content: z5.string(),
604
676
  pageStart: z5.number(),
605
- pageEnd: z5.number().optional()
677
+ pageEnd: z5.number().optional(),
678
+ recordId: z5.string().optional(),
679
+ sourceSpanIds: z5.array(z5.string()).optional(),
680
+ sourceTextHash: z5.string().optional()
606
681
  });
607
682
 
608
683
  // src/schemas/exclusion.ts
@@ -617,7 +692,10 @@ var ExclusionSchema = z6.object({
617
692
  buybackEndorsement: z6.string().optional(),
618
693
  appliesTo: z6.array(z6.string()).optional(),
619
694
  content: z6.string(),
620
- pageNumber: z6.number().optional()
695
+ pageNumber: z6.number().optional(),
696
+ recordId: z6.string().optional(),
697
+ sourceSpanIds: z6.array(z6.string()).optional(),
698
+ sourceTextHash: z6.string().optional()
621
699
  });
622
700
 
623
701
  // src/schemas/condition.ts
@@ -631,7 +709,10 @@ var PolicyConditionSchema = z7.object({
631
709
  conditionType: ConditionTypeSchema,
632
710
  content: z7.string(),
633
711
  keyValues: z7.array(ConditionKeyValueSchema).optional(),
634
- pageNumber: z7.number().optional()
712
+ pageNumber: z7.number().optional(),
713
+ recordId: z7.string().optional(),
714
+ sourceSpanIds: z7.array(z7.string()).optional(),
715
+ sourceTextHash: z7.string().optional()
635
716
  });
636
717
 
637
718
  // src/schemas/parties.ts
@@ -1214,7 +1295,10 @@ var SectionSchema = z16.object({
1214
1295
  type: z16.string(),
1215
1296
  coverageType: z16.string().optional(),
1216
1297
  content: z16.string(),
1217
- subsections: z16.array(SubsectionSchema).optional()
1298
+ subsections: z16.array(SubsectionSchema).optional(),
1299
+ recordId: z16.string().optional(),
1300
+ sourceSpanIds: z16.array(z16.string()).optional(),
1301
+ sourceTextHash: z16.string().optional()
1218
1302
  });
1219
1303
  var SubjectivitySchema = z16.object({
1220
1304
  description: z16.string(),
@@ -1240,7 +1324,10 @@ var DefinitionSchema = z16.object({
1240
1324
  formNumber: z16.string().optional(),
1241
1325
  formTitle: z16.string().optional(),
1242
1326
  sectionRef: z16.string().optional(),
1243
- originalContent: z16.string().optional()
1327
+ originalContent: z16.string().optional(),
1328
+ recordId: z16.string().optional(),
1329
+ sourceSpanIds: z16.array(z16.string()).optional(),
1330
+ sourceTextHash: z16.string().optional()
1244
1331
  });
1245
1332
  var CoveredReasonSchema = z16.object({
1246
1333
  coverageName: z16.string(),
@@ -1254,7 +1341,10 @@ var CoveredReasonSchema = z16.object({
1254
1341
  formNumber: z16.string().optional(),
1255
1342
  formTitle: z16.string().optional(),
1256
1343
  sectionRef: z16.string().optional(),
1257
- originalContent: z16.string().optional()
1344
+ originalContent: z16.string().optional(),
1345
+ recordId: z16.string().optional(),
1346
+ sourceSpanIds: z16.array(z16.string()).optional(),
1347
+ sourceTextHash: z16.string().optional()
1258
1348
  });
1259
1349
  var BaseDocumentFields = {
1260
1350
  id: z16.string(),
@@ -1393,6 +1483,294 @@ var PLATFORM_CONFIGS = {
1393
1483
  }
1394
1484
  };
1395
1485
 
1486
+ // src/schemas/pce.ts
1487
+ import { z as z19 } from "zod";
1488
+
1489
+ // src/case/index.ts
1490
+ import { z as z18 } from "zod";
1491
+ var CaseEvidenceSourceSchema = z18.object({
1492
+ id: z18.string(),
1493
+ label: z18.string().optional(),
1494
+ documentId: z18.string().optional(),
1495
+ page: z18.number().optional(),
1496
+ fieldPath: z18.string().optional(),
1497
+ text: z18.string().describe("Source text available for span validation and citation"),
1498
+ metadata: z18.record(z18.string()).optional()
1499
+ });
1500
+ var CaseCitationSchema = z18.object({
1501
+ sourceId: z18.string(),
1502
+ quote: z18.string(),
1503
+ page: z18.number().optional(),
1504
+ fieldPath: z18.string().optional()
1505
+ });
1506
+ var ValidationIssueSeveritySchema = z18.enum(["info", "warning", "blocking"]);
1507
+ var CaseValidationIssueSchema = z18.object({
1508
+ code: z18.string(),
1509
+ severity: ValidationIssueSeveritySchema,
1510
+ message: z18.string(),
1511
+ itemId: z18.string().optional(),
1512
+ fieldPath: z18.string().optional(),
1513
+ sourceId: z18.string().optional()
1514
+ });
1515
+ var MissingInfoQuestionSchema = z18.object({
1516
+ id: z18.string(),
1517
+ itemId: z18.string().optional(),
1518
+ fieldPath: z18.string().optional(),
1519
+ question: z18.string(),
1520
+ reason: z18.string(),
1521
+ answer: z18.string().optional()
1522
+ });
1523
+ var CasePacketArtifactKindSchema = z18.enum([
1524
+ "underwriter_summary",
1525
+ "carrier_email",
1526
+ "missing_info_request",
1527
+ "json_packet",
1528
+ "validation_report"
1529
+ ]);
1530
+ var CasePacketArtifactSchema = z18.object({
1531
+ id: z18.string(),
1532
+ kind: CasePacketArtifactKindSchema,
1533
+ title: z18.string(),
1534
+ content: z18.string(),
1535
+ citations: z18.array(CaseCitationSchema).default([])
1536
+ });
1537
+ var CaseSubmissionPacketSchema = z18.object({
1538
+ id: z18.string(),
1539
+ caseId: z18.string(),
1540
+ artifacts: z18.array(CasePacketArtifactSchema),
1541
+ validationIssues: z18.array(CaseValidationIssueSchema),
1542
+ missingInfoQuestions: z18.array(MissingInfoQuestionSchema),
1543
+ createdAt: z18.number()
1544
+ });
1545
+ var CaseActionSchema = z18.enum([
1546
+ "inspect_attachments",
1547
+ "retrieve_policy_evidence",
1548
+ "retrieve_prior_applications",
1549
+ "normalize_requested_change",
1550
+ "extract_application_fields",
1551
+ "fill_from_org_context",
1552
+ "fill_from_source_spans",
1553
+ "ask_missing_info_questions",
1554
+ "run_validation",
1555
+ "generate_packet",
1556
+ "answer_field_or_case_question"
1557
+ ]);
1558
+ var AgenticExecutionModeSchema = z18.enum(["deterministic_tree", "market_eval", "hybrid"]);
1559
+ var CaseProposalScoreSchema = z18.object({
1560
+ grounding: z18.number().min(0).max(1),
1561
+ completeness: z18.number().min(0).max(1),
1562
+ consistency: z18.number().min(0).max(1),
1563
+ determinism: z18.number().min(0).max(1),
1564
+ risk: z18.number().min(0).max(1),
1565
+ cost: z18.number().min(0).max(1)
1566
+ });
1567
+ var CaseProposalSchema = z18.object({
1568
+ id: z18.string(),
1569
+ sourceSpanIds: z18.array(z18.string()).default([]),
1570
+ confidence: z18.number().min(0).max(1),
1571
+ missingInfo: z18.array(z18.string()).default([]),
1572
+ validationIssues: z18.array(CaseValidationIssueSchema).default([]),
1573
+ estimatedRisk: z18.number().min(0).max(1).default(0.5),
1574
+ estimatedCost: z18.number().min(0).max(1).default(0.5),
1575
+ score: CaseProposalScoreSchema.optional()
1576
+ });
1577
+ function stableCaseId(prefix, parts) {
1578
+ return `${prefix}-${stableHash(stableStringify(parts)).slice(0, 12)}`;
1579
+ }
1580
+ function stableStringify(value) {
1581
+ if (Array.isArray(value)) {
1582
+ return `[${value.map((entry) => stableStringify(entry)).join(",")}]`;
1583
+ }
1584
+ if (value && typeof value === "object") {
1585
+ const record = value;
1586
+ return `{${Object.keys(record).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(",")}}`;
1587
+ }
1588
+ return JSON.stringify(value);
1589
+ }
1590
+ function stableHash(input) {
1591
+ let hashA = 2166136261;
1592
+ let hashB = 2654435769;
1593
+ for (let index = 0; index < input.length; index++) {
1594
+ const char = input.charCodeAt(index);
1595
+ hashA ^= char;
1596
+ hashA = Math.imul(hashA, 16777619);
1597
+ hashB ^= char + index;
1598
+ hashB = Math.imul(hashB, 2246822507);
1599
+ }
1600
+ return `${(hashA >>> 0).toString(16).padStart(8, "0")}${(hashB >>> 0).toString(16).padStart(8, "0")}`;
1601
+ }
1602
+ function normalizeForMatch(value) {
1603
+ return value.replace(/\s+/g, " ").trim().toLowerCase();
1604
+ }
1605
+ function evidenceContainsQuote(source, quote) {
1606
+ if (!source || !quote.trim()) return false;
1607
+ return normalizeForMatch(source.text).includes(normalizeForMatch(quote));
1608
+ }
1609
+ function validateQuotedEvidence(params) {
1610
+ const quote = params.quote?.trim();
1611
+ if (!quote) return [];
1612
+ const citation = params.citation;
1613
+ if (!citation) {
1614
+ return [{
1615
+ code: "missing_citation",
1616
+ severity: params.severity ?? "blocking",
1617
+ message: `Quoted value for ${params.fieldPath} is missing a citation.`,
1618
+ itemId: params.itemId,
1619
+ fieldPath: params.fieldPath
1620
+ }];
1621
+ }
1622
+ const source = params.sources.find((candidate) => candidate.id === citation.sourceId);
1623
+ if (!source) {
1624
+ return [{
1625
+ code: "unknown_source",
1626
+ severity: params.severity ?? "blocking",
1627
+ message: `Citation source ${citation.sourceId} was not provided for ${params.fieldPath}.`,
1628
+ itemId: params.itemId,
1629
+ fieldPath: params.fieldPath,
1630
+ sourceId: citation.sourceId
1631
+ }];
1632
+ }
1633
+ const citedQuote = citation.quote.trim() || quote;
1634
+ if (!evidenceContainsQuote(source, citedQuote) || !evidenceContainsQuote(source, quote)) {
1635
+ return [{
1636
+ code: "quote_not_found",
1637
+ severity: params.severity ?? "blocking",
1638
+ message: `Quoted value for ${params.fieldPath} was not found in source ${source.id}.`,
1639
+ itemId: params.itemId,
1640
+ fieldPath: params.fieldPath,
1641
+ sourceId: source.id
1642
+ }];
1643
+ }
1644
+ return [];
1645
+ }
1646
+ var validateEvidence = validateQuotedEvidence;
1647
+ function mergeQuestionAnswers(questions, answers) {
1648
+ let answeredCount = 0;
1649
+ const merged = questions.map((question) => {
1650
+ const answer = answers.find(
1651
+ (candidate) => candidate.questionId && candidate.questionId === question.id || candidate.fieldPath && candidate.fieldPath === question.fieldPath
1652
+ );
1653
+ if (!answer?.answer.trim()) return question;
1654
+ answeredCount += question.answer === answer.answer ? 0 : 1;
1655
+ return { ...question, answer: answer.answer };
1656
+ });
1657
+ return { questions: merged, answeredCount };
1658
+ }
1659
+ var processReply = mergeQuestionAnswers;
1660
+ function generateNextMessage(questions) {
1661
+ const openQuestions = questions.filter((question) => !question.answer?.trim());
1662
+ if (openQuestions.length === 0) return "No missing information questions are open.";
1663
+ return openQuestions.map((question) => question.question).join("\n");
1664
+ }
1665
+ function scoreCaseProposal(proposal) {
1666
+ if (proposal.score) return proposal.score;
1667
+ const hasBlockingIssue = proposal.validationIssues.some((issue) => issue.severity === "blocking");
1668
+ const grounding = proposal.sourceSpanIds.length > 0 ? 1 : 0;
1669
+ return {
1670
+ grounding,
1671
+ completeness: proposal.missingInfo.length === 0 ? 1 : 0.4,
1672
+ consistency: hasBlockingIssue ? 0 : 1,
1673
+ determinism: proposal.id.trim().length > 0 ? 1 : 0,
1674
+ risk: 1 - proposal.estimatedRisk,
1675
+ cost: 1 - proposal.estimatedCost
1676
+ };
1677
+ }
1678
+ function evaluateCaseProposals(proposals) {
1679
+ return proposals.filter((proposal) => !proposal.validationIssues.some(
1680
+ (issue) => issue.severity === "blocking" && (issue.code === "missing_citation" || issue.code === "unknown_source" || issue.code === "quote_not_found")
1681
+ )).map((proposal) => ({ proposal, score: scoreCaseProposal(proposal) })).sort((left, right) => {
1682
+ const leftTotal = totalProposalScore(left.score);
1683
+ const rightTotal = totalProposalScore(right.score);
1684
+ if (rightTotal !== leftTotal) return rightTotal - leftTotal;
1685
+ return left.proposal.id.localeCompare(right.proposal.id);
1686
+ })[0]?.proposal;
1687
+ }
1688
+ function totalProposalScore(score) {
1689
+ return score.grounding * 3 + score.completeness * 2 + score.consistency * 3 + score.determinism + score.risk + score.cost;
1690
+ }
1691
+
1692
+ // src/schemas/pce.ts
1693
+ var PolicyChangeActionSchema = z19.enum(["add", "remove", "update", "replace", "clarify"]);
1694
+ var PolicyChangeKindSchema = z19.enum([
1695
+ "named_insured_change",
1696
+ "additional_insured_change",
1697
+ "coverage_change",
1698
+ "limit_change",
1699
+ "deductible_change",
1700
+ "location_change",
1701
+ "vehicle_change",
1702
+ "certificate_endorsement_request",
1703
+ "cancellation",
1704
+ "nonrenewal",
1705
+ "renewal_submission_update",
1706
+ "general_endorsement"
1707
+ ]);
1708
+ var PolicyChangeConfidenceSchema = z19.enum(["high", "medium", "low"]);
1709
+ var PolicyChangeStatusSchema = z19.enum(["draft", "needs_info", "ready", "blocked"]);
1710
+ var PolicyChangeItemSchema = z19.object({
1711
+ id: z19.string(),
1712
+ kind: PolicyChangeKindSchema.default("general_endorsement"),
1713
+ action: PolicyChangeActionSchema,
1714
+ affectedPolicyId: z19.string().default("unknown"),
1715
+ fieldPath: z19.string().describe("Stable policy field path or business field name"),
1716
+ label: z19.string(),
1717
+ beforeValue: z19.string().optional().describe("Existing policy value, when cited from policy evidence"),
1718
+ afterValue: z19.string().optional().describe("Requested new value"),
1719
+ requestedValue: z19.string().optional().describe("Alias for afterValue used by policy-change workflows"),
1720
+ effectiveDate: z19.string().optional(),
1721
+ reason: z19.string().optional(),
1722
+ sourceIds: z19.array(z19.string()).default([]),
1723
+ sourceSpanIds: z19.array(z19.string()).default([]),
1724
+ userSourceSpanIds: z19.array(z19.string()).optional(),
1725
+ citations: z19.array(CaseCitationSchema).default([]),
1726
+ confidence: PolicyChangeConfidenceSchema.default("medium"),
1727
+ confidenceScore: z19.number().min(0).max(1).optional(),
1728
+ status: PolicyChangeStatusSchema.default("ready")
1729
+ });
1730
+ var PceNormalizationResultSchema = z19.object({
1731
+ summary: z19.string(),
1732
+ items: z19.array(PolicyChangeItemSchema.omit({ id: true, status: true }).extend({
1733
+ id: z19.string().optional(),
1734
+ status: PolicyChangeStatusSchema.optional()
1735
+ })),
1736
+ missingInfoQuestions: z19.array(MissingInfoQuestionSchema.omit({ id: true }).extend({
1737
+ id: z19.string().optional()
1738
+ })).default([])
1739
+ });
1740
+ var PolicyChangeImpactSchema = z19.object({
1741
+ itemId: z19.string(),
1742
+ beforeValue: z19.string().optional(),
1743
+ requestedValue: z19.string().optional(),
1744
+ likelyEndorsementRequired: z19.boolean().default(true),
1745
+ carrierApprovalLikelyRequired: z19.boolean().default(true),
1746
+ affectedCoverageForms: z19.array(z19.string()).default([]),
1747
+ sourceSpanIds: z19.array(z19.string()).default([])
1748
+ });
1749
+ var PceCaseStateSchema = z19.object({
1750
+ id: z19.string(),
1751
+ requestText: z19.string(),
1752
+ summary: z19.string(),
1753
+ executionMode: AgenticExecutionModeSchema.default("deterministic_tree"),
1754
+ items: z19.array(PolicyChangeItemSchema),
1755
+ impacts: z19.array(PolicyChangeImpactSchema),
1756
+ evidenceSources: z19.array(CaseEvidenceSourceSchema),
1757
+ validationIssues: z19.array(CaseValidationIssueSchema),
1758
+ missingInfoQuestions: z19.array(MissingInfoQuestionSchema),
1759
+ createdAt: z19.number(),
1760
+ updatedAt: z19.number()
1761
+ });
1762
+ var PolicyChangeRequestSchema = z19.object({
1763
+ id: z19.string(),
1764
+ text: z19.string(),
1765
+ executionMode: AgenticExecutionModeSchema.optional(),
1766
+ userSourceSpanIds: z19.array(z19.string()).optional(),
1767
+ createdAt: z19.number().optional()
1768
+ });
1769
+ var PceSubmissionPacketSchema = CaseSubmissionPacketSchema.extend({
1770
+ pceCase: PceCaseStateSchema,
1771
+ artifacts: z19.array(CasePacketArtifactSchema)
1772
+ });
1773
+
1396
1774
  // src/schemas/context-keys.ts
1397
1775
  var CONTEXT_KEY_MAP = [
1398
1776
  { extractedField: "insuredName", category: "company_info", contextKey: "company_name", description: "Primary named insured" },
@@ -1445,6 +1823,400 @@ var CONTEXT_KEY_MAP = [
1445
1823
  { extractedField: "declarations.breed", category: "pet_info", contextKey: "pet_breed", description: "Pet breed" }
1446
1824
  ];
1447
1825
 
1826
+ // src/source/schemas.ts
1827
+ import { z as z20 } from "zod";
1828
+ var SourceSpanKindSchema = z20.enum([
1829
+ "pdf_text",
1830
+ "pdf_image",
1831
+ "html",
1832
+ "markdown",
1833
+ "plain_text",
1834
+ "structured_field"
1835
+ ]);
1836
+ var SourceKindSchema = z20.enum([
1837
+ "policy_pdf",
1838
+ "application_pdf",
1839
+ "email",
1840
+ "attachment",
1841
+ "manual_note"
1842
+ ]);
1843
+ var SourceSpanBBoxSchema = z20.object({
1844
+ page: z20.number().int().positive(),
1845
+ x: z20.number(),
1846
+ y: z20.number(),
1847
+ width: z20.number(),
1848
+ height: z20.number()
1849
+ });
1850
+ var SourceSpanLocationSchema = z20.object({
1851
+ page: z20.number().int().positive().optional(),
1852
+ startPage: z20.number().int().positive().optional(),
1853
+ endPage: z20.number().int().positive().optional(),
1854
+ charStart: z20.number().int().nonnegative().optional(),
1855
+ charEnd: z20.number().int().nonnegative().optional(),
1856
+ lineStart: z20.number().int().positive().optional(),
1857
+ lineEnd: z20.number().int().positive().optional(),
1858
+ fieldPath: z20.string().optional()
1859
+ });
1860
+ var SourceSpanSchema = z20.object({
1861
+ id: z20.string().min(1),
1862
+ documentId: z20.string().min(1),
1863
+ sourceKind: SourceKindSchema.optional(),
1864
+ chunkId: z20.string().optional(),
1865
+ kind: SourceSpanKindSchema,
1866
+ text: z20.string(),
1867
+ hash: z20.string().min(1),
1868
+ textHash: z20.string().optional(),
1869
+ pageStart: z20.number().int().positive().optional(),
1870
+ pageEnd: z20.number().int().positive().optional(),
1871
+ sectionId: z20.string().optional(),
1872
+ formNumber: z20.string().optional(),
1873
+ bbox: z20.array(SourceSpanBBoxSchema).optional(),
1874
+ location: SourceSpanLocationSchema.optional(),
1875
+ metadata: z20.record(z20.string()).optional()
1876
+ });
1877
+ var SourceSpanRefSchema = z20.object({
1878
+ sourceSpanId: z20.string().min(1),
1879
+ documentId: z20.string().min(1).optional(),
1880
+ chunkId: z20.string().optional(),
1881
+ quote: z20.string().optional(),
1882
+ hash: z20.string().optional(),
1883
+ location: SourceSpanLocationSchema.optional()
1884
+ });
1885
+ var SourceChunkSchema = z20.object({
1886
+ id: z20.string().min(1),
1887
+ documentId: z20.string().min(1),
1888
+ sourceSpanIds: z20.array(z20.string().min(1)),
1889
+ text: z20.string(),
1890
+ textHash: z20.string().min(1),
1891
+ pageStart: z20.number().int().positive().optional(),
1892
+ pageEnd: z20.number().int().positive().optional(),
1893
+ metadata: z20.record(z20.string()).default({})
1894
+ });
1895
+
1896
+ // src/source/ids.ts
1897
+ function normalizeText(text) {
1898
+ return text.replace(/\s+/g, " ").trim();
1899
+ }
1900
+ function stableStringify2(value) {
1901
+ if (value === void 0) {
1902
+ return "undefined";
1903
+ }
1904
+ if (value === null || typeof value !== "object") {
1905
+ return JSON.stringify(value) ?? "undefined";
1906
+ }
1907
+ if (Array.isArray(value)) {
1908
+ return `[${value.map((item) => stableStringify2(item)).join(",")}]`;
1909
+ }
1910
+ const record = value;
1911
+ return `{${Object.keys(record).sort().filter((key) => record[key] !== void 0).map((key) => `${JSON.stringify(key)}:${stableStringify2(record[key])}`).join(",")}}`;
1912
+ }
1913
+ function stableHash2(value) {
1914
+ const input = stableStringify2(value);
1915
+ let hashA = 2166136261;
1916
+ let hashB = 73244475;
1917
+ for (let index = 0; index < input.length; index++) {
1918
+ const char = input.charCodeAt(index);
1919
+ hashA ^= char;
1920
+ hashA = Math.imul(hashA, 16777619);
1921
+ hashB ^= char + index;
1922
+ hashB = Math.imul(hashB, 668265261);
1923
+ }
1924
+ return `${(hashA >>> 0).toString(16).padStart(8, "0")}${(hashB >>> 0).toString(16).padStart(8, "0")}`;
1925
+ }
1926
+ function sourceSpanTextHash(text) {
1927
+ return stableHash2(normalizeText(text));
1928
+ }
1929
+ function buildSourceSpanId(input) {
1930
+ const hash = stableHash2({
1931
+ documentId: input.documentId,
1932
+ chunkId: input.chunkId,
1933
+ fieldPath: input.fieldPath,
1934
+ location: input.location,
1935
+ text: input.text ? normalizeText(input.text) : void 0
1936
+ }).slice(0, 16);
1937
+ return [input.documentId, input.chunkId, input.fieldPath, hash].filter((part) => !!part).map((part) => part.replace(/[^a-zA-Z0-9_.:-]/g, "_")).join(":");
1938
+ }
1939
+
1940
+ // src/source/retrieval.ts
1941
+ function evidenceTieBreakId(evidence) {
1942
+ return [
1943
+ evidence.source ?? "",
1944
+ evidence.sourceSpanId ?? "",
1945
+ evidence.chunkId ?? "",
1946
+ evidence.documentId ?? "",
1947
+ evidence.turnId ?? "",
1948
+ evidence.attachmentId ?? "",
1949
+ evidence.text
1950
+ ].join("|");
1951
+ }
1952
+ function compareSourceEvidence(a, b) {
1953
+ const relevanceDelta = b.relevance - a.relevance;
1954
+ if (relevanceDelta !== 0) return relevanceDelta;
1955
+ return evidenceTieBreakId(a).localeCompare(evidenceTieBreakId(b));
1956
+ }
1957
+ function orderSourceEvidence(evidence) {
1958
+ return [...evidence].sort(compareSourceEvidence);
1959
+ }
1960
+
1961
+ // src/source/extraction.ts
1962
+ function normalizeWhitespace(value) {
1963
+ return value.replace(/\s+/g, " ").trim();
1964
+ }
1965
+ function sanitizeIdPart(value) {
1966
+ return value.replace(/[^a-zA-Z0-9_.:-]/g, "_");
1967
+ }
1968
+ function buildSourceSpan(input, localIndex = 0) {
1969
+ const text = normalizeWhitespace(input.text);
1970
+ const textHash = sourceSpanTextHash(text);
1971
+ const pagePart = input.pageStart ?? "na";
1972
+ const id = [
1973
+ sanitizeIdPart(input.documentId),
1974
+ "span",
1975
+ pagePart,
1976
+ localIndex,
1977
+ textHash.slice(0, 12)
1978
+ ].join(":");
1979
+ return SourceSpanSchema.parse({
1980
+ id,
1981
+ documentId: input.documentId,
1982
+ sourceKind: input.sourceKind,
1983
+ kind: input.sourceKind.endsWith("_pdf") ? "pdf_text" : "plain_text",
1984
+ text,
1985
+ hash: textHash,
1986
+ textHash,
1987
+ pageStart: input.pageStart,
1988
+ pageEnd: input.pageEnd,
1989
+ sectionId: input.sectionId,
1990
+ formNumber: input.formNumber,
1991
+ location: {
1992
+ page: input.pageStart === input.pageEnd ? input.pageStart : void 0,
1993
+ startPage: input.pageStart,
1994
+ endPage: input.pageEnd,
1995
+ fieldPath: input.sectionId
1996
+ },
1997
+ metadata: input.metadata
1998
+ });
1999
+ }
2000
+ function buildPageSourceSpans(pages) {
2001
+ return pages.filter((page) => normalizeWhitespace(page.text).length > 0).map(
2002
+ (page, index) => buildSourceSpan(
2003
+ {
2004
+ documentId: page.documentId,
2005
+ sourceKind: page.sourceKind ?? "policy_pdf",
2006
+ text: page.text,
2007
+ pageStart: page.pageNumber,
2008
+ pageEnd: page.pageNumber,
2009
+ sectionId: page.sectionId,
2010
+ formNumber: page.formNumber,
2011
+ metadata: page.metadata
2012
+ },
2013
+ index
2014
+ )
2015
+ );
2016
+ }
2017
+ function buildSectionSourceSpans(pages, options = {}) {
2018
+ const headingPattern = options.headingPattern ?? /^(?:SECTION|COVERAGE|EXCLUSION|EXCLUSIONS|CONDITION|CONDITIONS|ENDORSEMENT|ENDORSEMENTS|DEFINITION|DEFINITIONS|DECLARATIONS?|SCHEDULE|FORM)\b[\s:.-]*(.*)$/i;
2019
+ const minSectionChars = options.minSectionChars ?? 120;
2020
+ const spans = [];
2021
+ for (const page of pages) {
2022
+ const sections = splitPageIntoSections(page.text, headingPattern, minSectionChars);
2023
+ for (const section of sections) {
2024
+ spans.push(buildSourceSpan(
2025
+ {
2026
+ documentId: page.documentId,
2027
+ sourceKind: page.sourceKind ?? "policy_pdf",
2028
+ text: section.text,
2029
+ pageStart: page.pageNumber,
2030
+ pageEnd: page.pageNumber,
2031
+ sectionId: section.title,
2032
+ formNumber: inferFormNumber(section.text),
2033
+ metadata: {
2034
+ ...page.metadata ?? {},
2035
+ sourceUnit: "section_candidate"
2036
+ }
2037
+ },
2038
+ spans.length
2039
+ ));
2040
+ }
2041
+ }
2042
+ return spans;
2043
+ }
2044
+ function buildTextSourceSpans(input, options = {}) {
2045
+ const maxChars = options.maxChars ?? 4e3;
2046
+ const overlapChars = Math.min(options.overlapChars ?? 0, Math.max(0, maxChars - 1));
2047
+ const text = normalizeWhitespace(input.text);
2048
+ if (!text) return [];
2049
+ const spans = [];
2050
+ let cursor = 0;
2051
+ while (cursor < text.length) {
2052
+ const end = Math.min(text.length, cursor + maxChars);
2053
+ const unitText = text.slice(cursor, end);
2054
+ spans.push(buildSourceSpan({ ...input, text: unitText }, spans.length));
2055
+ if (end === text.length) break;
2056
+ cursor = end - overlapChars;
2057
+ }
2058
+ return spans;
2059
+ }
2060
+ function chunkSourceSpans(spans, options = {}) {
2061
+ const maxChars = options.maxChars ?? 6e3;
2062
+ const chunks = [];
2063
+ let current = [];
2064
+ let currentLength = 0;
2065
+ const flush = () => {
2066
+ if (current.length === 0) return;
2067
+ const text = current.map((span) => span.text).join("\n\n");
2068
+ const textHash = sourceSpanTextHash(text);
2069
+ const pageStart = firstNumber(current.map((span) => span.pageStart));
2070
+ const pageEnd = lastNumber(current.map((span) => span.pageEnd ?? span.pageStart));
2071
+ const chunk = {
2072
+ id: `${sanitizeIdPart(current[0].documentId)}:source_chunk:${chunks.length}:${stableHash2({
2073
+ sourceSpanIds: current.map((span) => span.id),
2074
+ textHash
2075
+ }).slice(0, 12)}`,
2076
+ documentId: current[0].documentId,
2077
+ sourceSpanIds: current.map((span) => span.id),
2078
+ text,
2079
+ textHash,
2080
+ pageStart,
2081
+ pageEnd,
2082
+ metadata: mergeMetadata(current)
2083
+ };
2084
+ chunks.push(SourceChunkSchema.parse(chunk));
2085
+ current = [];
2086
+ currentLength = 0;
2087
+ };
2088
+ for (const span of spans) {
2089
+ const nextLength = currentLength + span.text.length + (current.length > 0 ? 2 : 0);
2090
+ if (current.length > 0 && nextLength > maxChars) {
2091
+ flush();
2092
+ }
2093
+ current.push(span);
2094
+ currentLength += span.text.length + (current.length > 1 ? 2 : 0);
2095
+ }
2096
+ flush();
2097
+ return chunks;
2098
+ }
2099
+ function splitPageIntoSections(text, headingPattern, minSectionChars) {
2100
+ const lines = text.split(/\r?\n/);
2101
+ const sections = [];
2102
+ let current;
2103
+ for (const rawLine of lines) {
2104
+ const line = rawLine.trim();
2105
+ const match = line.match(headingPattern);
2106
+ if (match) {
2107
+ if (current) sections.push(current);
2108
+ const suffix = match[1]?.trim();
2109
+ current = {
2110
+ title: normalizeWhitespace(suffix ? `${line}` : line).slice(0, 120),
2111
+ lines: [line]
2112
+ };
2113
+ continue;
2114
+ }
2115
+ current?.lines.push(rawLine);
2116
+ }
2117
+ if (current) sections.push(current);
2118
+ return sections.map((section) => ({
2119
+ title: section.title,
2120
+ text: normalizeWhitespace(section.lines.join("\n"))
2121
+ })).filter((section) => section.text.length >= minSectionChars);
2122
+ }
2123
+ function inferFormNumber(text) {
2124
+ return text.match(/\b[A-Z]{2,8}\s+\d{2,5}(?:\s+\d{2,4})?\b/)?.[0];
2125
+ }
2126
+ function firstNumber(values) {
2127
+ return values.find((value) => typeof value === "number");
2128
+ }
2129
+ function lastNumber(values) {
2130
+ return [...values].reverse().find((value) => typeof value === "number");
2131
+ }
2132
+ function mergeMetadata(spans) {
2133
+ const metadata = {};
2134
+ for (const span of spans) {
2135
+ for (const [key, value] of Object.entries(span.metadata ?? {})) {
2136
+ metadata[key] = metadata[key] ? `${metadata[key]},${value}` : value;
2137
+ }
2138
+ if (span.formNumber) metadata.formNumber = span.formNumber;
2139
+ if (span.sectionId) metadata.sectionId = span.sectionId;
2140
+ if (span.sourceKind) metadata.sourceKind = span.sourceKind;
2141
+ }
2142
+ return metadata;
2143
+ }
2144
+
2145
+ // src/source/store.ts
2146
+ var MemorySourceStore = class {
2147
+ constructor() {
2148
+ this.spans = /* @__PURE__ */ new Map();
2149
+ this.chunks = /* @__PURE__ */ new Map();
2150
+ }
2151
+ async addSourceSpans(spans) {
2152
+ for (const span of spans) {
2153
+ this.spans.set(span.id, span);
2154
+ }
2155
+ }
2156
+ async addSourceChunks(chunks) {
2157
+ for (const chunk of chunks) {
2158
+ this.chunks.set(chunk.id, chunk);
2159
+ }
2160
+ }
2161
+ async getSourceSpan(id) {
2162
+ return this.spans.get(id) ?? null;
2163
+ }
2164
+ async getSourceSpansByDocument(documentId) {
2165
+ return [...this.spans.values()].filter((span) => span.documentId === documentId).sort((left, right) => left.id.localeCompare(right.id));
2166
+ }
2167
+ async getSourceChunksByDocument(documentId) {
2168
+ return [...this.chunks.values()].filter((chunk) => chunk.documentId === documentId).sort((left, right) => left.id.localeCompare(right.id));
2169
+ }
2170
+ async deleteDocumentSource(documentId) {
2171
+ for (const [id, span] of this.spans.entries()) {
2172
+ if (span.documentId === documentId) this.spans.delete(id);
2173
+ }
2174
+ for (const [id, chunk] of this.chunks.entries()) {
2175
+ if (chunk.documentId === documentId) this.chunks.delete(id);
2176
+ }
2177
+ }
2178
+ async searchSourceSpans(query) {
2179
+ const terms = tokenize(query.question);
2180
+ const documentFilter = new Set(query.documentIds ?? []);
2181
+ const chunkFilter = new Set(query.chunkIds ?? []);
2182
+ const limit = query.limit ?? 10;
2183
+ const results = [...this.spans.values()].filter((span) => documentFilter.size === 0 || documentFilter.has(span.documentId)).filter((span) => chunkFilter.size === 0 || (span.chunkId ? chunkFilter.has(span.chunkId) : false)).filter((span) => matchesFilters(span, query.filters)).map((span) => ({
2184
+ span,
2185
+ relevance: lexicalRelevance(span.text, terms)
2186
+ })).filter((result) => result.relevance > 0);
2187
+ return orderSourceEvidence(results.map((result) => ({
2188
+ ...result,
2189
+ sourceSpanId: result.span.id,
2190
+ documentId: result.span.documentId,
2191
+ chunkId: result.span.chunkId,
2192
+ text: result.span.text
2193
+ }))).map(({ span, relevance }) => ({ span, relevance })).slice(0, limit);
2194
+ }
2195
+ };
2196
+ function tokenize(value) {
2197
+ return Array.from(new Set(
2198
+ value.toLowerCase().split(/[^a-z0-9$.,%-]+/).map((term) => term.trim()).filter((term) => term.length >= 2)
2199
+ ));
2200
+ }
2201
+ function lexicalRelevance(text, terms) {
2202
+ if (terms.length === 0) return 0;
2203
+ const normalized = text.toLowerCase();
2204
+ const matches = terms.filter((term) => normalized.includes(term)).length;
2205
+ if (matches === 0) return 0;
2206
+ return Math.min(1, matches / terms.length);
2207
+ }
2208
+ function matchesFilters(span, filters) {
2209
+ if (!filters) return true;
2210
+ for (const [key, value] of Object.entries(filters)) {
2211
+ if (span.metadata?.[key] === value) continue;
2212
+ if (key === "sourceKind" && span.sourceKind === value) continue;
2213
+ if (key === "formNumber" && span.formNumber === value) continue;
2214
+ if (key === "sectionId" && span.sectionId === value) continue;
2215
+ return false;
2216
+ }
2217
+ return true;
2218
+ }
2219
+
1448
2220
  // src/extraction/pdf.ts
1449
2221
  import {
1450
2222
  PDFDocument,
@@ -1670,6 +2442,35 @@ async function overlayTextOnPdf(pdfBytes, overlays) {
1670
2442
  }
1671
2443
 
1672
2444
  // src/extraction/extractor.ts
2445
+ function sourceSpansForPageRange(providerOptions, startPage, endPage) {
2446
+ const sourceSpans = providerOptions?.sourceSpans;
2447
+ if (!Array.isArray(sourceSpans)) return [];
2448
+ return sourceSpans.filter((span) => {
2449
+ const spanStart = span.pageStart ?? span.location?.startPage ?? span.location?.page;
2450
+ const spanEnd = span.pageEnd ?? span.location?.endPage ?? spanStart;
2451
+ if (!spanStart || !spanEnd) return false;
2452
+ return spanEnd >= startPage && spanStart <= endPage;
2453
+ });
2454
+ }
2455
+ function buildSourceContext(spans, maxChars = 12e3) {
2456
+ if (spans.length === 0) return "";
2457
+ const lines = [];
2458
+ let length = 0;
2459
+ for (const span of spans) {
2460
+ const header = `[sourceSpan:${span.id}${span.pageStart ? ` page:${span.pageStart}${span.pageEnd && span.pageEnd !== span.pageStart ? `-${span.pageEnd}` : ""}` : ""}${span.sectionId ? ` section:${span.sectionId}` : ""}${span.formNumber ? ` form:${span.formNumber}` : ""}]`;
2461
+ const text = `${header}
2462
+ ${span.text}`;
2463
+ if (length + text.length > maxChars && lines.length > 0) break;
2464
+ lines.push(text);
2465
+ length += text.length;
2466
+ }
2467
+ return `
2468
+
2469
+ SOURCE SPANS FOR THESE PAGES:
2470
+ ${lines.join("\n\n")}
2471
+
2472
+ Use sourceSpan IDs when grounding extracted contractual values.`;
2473
+ }
1673
2474
  async function runExtractor(params) {
1674
2475
  const {
1675
2476
  name,
@@ -1681,7 +2482,8 @@ async function runExtractor(params) {
1681
2482
  generateObject,
1682
2483
  convertPdfToImages,
1683
2484
  maxTokens = 4096,
1684
- providerOptions
2485
+ providerOptions,
2486
+ pageRangeCache
1685
2487
  } = params;
1686
2488
  const extractorProviderOptions = { ...providerOptions };
1687
2489
  let fullPrompt;
@@ -1693,12 +2495,21 @@ async function runExtractor(params) {
1693
2495
 
1694
2496
  [Document pages ${startPage}-${endPage} are provided as images.]`;
1695
2497
  } else {
1696
- const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
2498
+ const cacheKey = `${startPage}-${endPage}`;
2499
+ let pagesPdf = pageRangeCache?.get(cacheKey);
2500
+ if (!pagesPdf) {
2501
+ pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
2502
+ pageRangeCache?.set(cacheKey, pagesPdf);
2503
+ }
1697
2504
  extractorProviderOptions.pdfBase64 = pagesPdf;
1698
2505
  fullPrompt = `${prompt}
1699
2506
 
1700
2507
  [Document pages ${startPage}-${endPage} are provided as a PDF file.]`;
1701
2508
  }
2509
+ const sourceContext = buildSourceContext(sourceSpansForPageRange(providerOptions, startPage, endPage));
2510
+ if (sourceContext) {
2511
+ fullPrompt += sourceContext;
2512
+ }
1702
2513
  const strictSchema = toStrictSchema(schema);
1703
2514
  const result = await withRetry(
1704
2515
  () => generateObject({
@@ -2297,6 +3108,45 @@ function promoteExtractedFields(doc) {
2297
3108
  promotePremium(doc);
2298
3109
  }
2299
3110
 
3111
+ // src/extraction/alignment.ts
3112
+ function normalizeKeyPart(value) {
3113
+ if (value === void 0 || value === null) return "na";
3114
+ const normalized = String(value).trim().toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "");
3115
+ return normalized || "na";
3116
+ }
3117
+ function hashText(value) {
3118
+ let hash = 2166136261;
3119
+ for (let index = 0; index < value.length; index++) {
3120
+ hash ^= value.charCodeAt(index);
3121
+ hash = Math.imul(hash, 16777619);
3122
+ }
3123
+ return (hash >>> 0).toString(16).padStart(8, "0").slice(0, 8);
3124
+ }
3125
+ function evidencePart(record) {
3126
+ const spans = Array.isArray(record.sourceSpanIds) ? record.sourceSpanIds.join(",") : "";
3127
+ return [
3128
+ spans,
3129
+ record.sourceTextHash,
3130
+ record.formNumber,
3131
+ record.pageNumber ?? record.pageStart,
3132
+ record.sectionRef,
3133
+ record.originalContent ?? record.content
3134
+ ].filter((part) => part !== void 0 && part !== null && String(part).trim().length > 0).map(normalizeKeyPart).join("|");
3135
+ }
3136
+ function buildExtractionRecordId(documentId, recordKind, record, labelParts) {
3137
+ const label = labelParts.map(normalizeKeyPart).join(":");
3138
+ const evidence = evidencePart(record);
3139
+ const hash = hashText(`${documentId}|${recordKind}|${label}|${evidence}`);
3140
+ return `${recordKind}:${normalizeKeyPart(documentId)}:${label}:${hash}`;
3141
+ }
3142
+ function alignExtractionRecords(documentId, recordKind, records, labelParts) {
3143
+ if (!records?.length) return [];
3144
+ return records.map((record) => {
3145
+ const recordId = typeof record.recordId === "string" && record.recordId.trim().length > 0 ? record.recordId : buildExtractionRecordId(documentId, recordKind, record, labelParts(record));
3146
+ return { ...record, recordId };
3147
+ }).sort((left, right) => String(left.recordId).localeCompare(String(right.recordId)));
3148
+ }
3149
+
2300
3150
  // src/extraction/assembler.ts
2301
3151
  function assembleDocument(documentId, documentType, memory) {
2302
3152
  const carrier = getCarrierInfo(memory);
@@ -2313,11 +3163,53 @@ function assembleDocument(documentId, documentType, memory) {
2313
3163
  const classify = readMemoryRecord(memory, "classify");
2314
3164
  const lossPayees = readRecordArray(insured, "lossPayees");
2315
3165
  const mortgageHolders = readRecordArray(insured, "mortgageHolders");
3166
+ const coverageRecords = alignExtractionRecords(
3167
+ documentId,
3168
+ "coverage",
3169
+ getCoverageLimitCoverages(memory),
3170
+ (coverage) => [coverage.name, coverage.formNumber, coverage.pageNumber, coverage.limit, coverage.deductible]
3171
+ );
3172
+ const endorsementRecords = alignExtractionRecords(
3173
+ documentId,
3174
+ "endorsement",
3175
+ readRecordValue(endorsements, "endorsements"),
3176
+ (endorsement) => [endorsement.formNumber, endorsement.title, endorsement.pageStart]
3177
+ );
3178
+ const exclusionRecords = alignExtractionRecords(
3179
+ documentId,
3180
+ "exclusion",
3181
+ readRecordValue(exclusions, "exclusions"),
3182
+ (exclusion) => [exclusion.name, exclusion.formNumber, exclusion.pageNumber]
3183
+ );
3184
+ const conditionRecords = alignExtractionRecords(
3185
+ documentId,
3186
+ "condition",
3187
+ readRecordValue(conditions, "conditions"),
3188
+ (condition) => [condition.name, condition.conditionType, condition.pageNumber]
3189
+ );
3190
+ const sectionRecords = alignExtractionRecords(
3191
+ documentId,
3192
+ "section",
3193
+ getSections(memory),
3194
+ (section) => [section.title, section.type, section.pageStart, section.pageEnd]
3195
+ );
3196
+ const definitionRecords = alignExtractionRecords(
3197
+ documentId,
3198
+ "definition",
3199
+ getDefinitions(memory),
3200
+ (definition) => [definition.term, definition.formNumber, definition.pageNumber]
3201
+ );
3202
+ const coveredReasonRecords = alignExtractionRecords(
3203
+ documentId,
3204
+ "covered_reason",
3205
+ getCoveredReasons(memory),
3206
+ (reason2) => [reason2.coverageName, reason2.reasonNumber, reason2.title, reason2.pageNumber]
3207
+ );
2316
3208
  const base = {
2317
3209
  id: documentId,
2318
3210
  carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
2319
3211
  insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
2320
- coverages: getCoverageLimitCoverages(memory),
3212
+ coverages: coverageRecords,
2321
3213
  policyTypes: readRecordValue(classify, "policyTypes"),
2322
3214
  ...sanitizeNulls(carrier ?? {}),
2323
3215
  ...sanitizeNulls(insured ?? {}),
@@ -2333,13 +3225,13 @@ function assembleDocument(documentId, documentType, memory) {
2333
3225
  ...sanitizeNulls(premium ?? {}),
2334
3226
  ...sanitizeNulls(supplementary ?? {}),
2335
3227
  supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
2336
- endorsements: readRecordValue(endorsements, "endorsements"),
2337
- exclusions: readRecordValue(exclusions, "exclusions"),
2338
- conditions: readRecordValue(conditions, "conditions"),
2339
- sections: getSections(memory),
3228
+ endorsements: endorsementRecords.length > 0 ? endorsementRecords : void 0,
3229
+ exclusions: exclusionRecords.length > 0 ? exclusionRecords : void 0,
3230
+ conditions: conditionRecords.length > 0 ? conditionRecords : void 0,
3231
+ sections: sectionRecords.length > 0 ? sectionRecords : void 0,
2340
3232
  formInventory: readRecordValue(formInventory, "forms"),
2341
- definitions: getDefinitions(memory),
2342
- coveredReasons: getCoveredReasons(memory),
3233
+ definitions: definitionRecords.length > 0 ? definitionRecords : void 0,
3234
+ coveredReasons: coveredReasonRecords.length > 0 ? coveredReasonRecords : void 0,
2343
3235
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
2344
3236
  ...sanitizeNulls(lossHistory ?? {})
2345
3237
  };
@@ -2601,7 +3493,7 @@ async function formatDocumentContent(doc, generateText, options) {
2601
3493
  const result = await withRetry(
2602
3494
  () => generateText({
2603
3495
  prompt,
2604
- maxTokens: 16384,
3496
+ maxTokens: options?.maxTokens ?? 16384,
2605
3497
  providerOptions: options?.providerOptions
2606
3498
  })
2607
3499
  );
@@ -3596,12 +4488,12 @@ function dedupeByKey(items, keyFn) {
3596
4488
  }
3597
4489
  return merged;
3598
4490
  }
3599
- function normalizeKeyPart(value) {
4491
+ function normalizeKeyPart2(value) {
3600
4492
  if (value === void 0 || value === null) return "";
3601
4493
  return String(value).toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "");
3602
4494
  }
3603
4495
  function keyFromParts(...parts) {
3604
- return parts.map(normalizeKeyPart).join("|");
4496
+ return parts.map(normalizeKeyPart2).join("|");
3605
4497
  }
3606
4498
  function mergeUniqueObjects(existing, incoming, keyFn) {
3607
4499
  return dedupeByKey([...existing, ...incoming], keyFn);
@@ -4492,11 +5384,11 @@ function getTemplate(policyType) {
4492
5384
  }
4493
5385
 
4494
5386
  // src/prompts/coordinator/classify.ts
4495
- import { z as z18 } from "zod";
4496
- var ClassifyResultSchema = z18.object({
4497
- documentType: z18.enum(["policy", "quote"]).describe("Whether this is a bound policy or a proposed quote"),
4498
- policyTypes: z18.array(PolicyTypeSchema).min(1).describe("Lines of business covered \u2014 at least one required"),
4499
- confidence: z18.number().describe("Confidence score from 0.0 to 1.0")
5387
+ import { z as z21 } from "zod";
5388
+ var ClassifyResultSchema = z21.object({
5389
+ documentType: z21.enum(["policy", "quote"]).describe("Whether this is a bound policy or a proposed quote"),
5390
+ policyTypes: z21.array(PolicyTypeSchema).min(1).describe("Lines of business covered \u2014 at least one required"),
5391
+ confidence: z21.number().describe("Confidence score from 0.0 to 1.0")
4500
5392
  });
4501
5393
  function buildClassifyPrompt() {
4502
5394
  return `You are classifying an insurance document. Examine the document and determine:
@@ -4586,14 +5478,14 @@ Return JSON only:
4586
5478
  }
4587
5479
 
4588
5480
  // src/prompts/coordinator/form-inventory.ts
4589
- import { z as z19 } from "zod";
5481
+ import { z as z22 } from "zod";
4590
5482
  var FormInventoryEntrySchema = FormReferenceSchema.extend({
4591
5483
  formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
4592
5484
  pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
4593
5485
  pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
4594
5486
  });
4595
- var FormInventorySchema = z19.object({
4596
- forms: z19.array(FormInventoryEntrySchema)
5487
+ var FormInventorySchema = z22.object({
5488
+ forms: z22.array(FormInventoryEntrySchema)
4597
5489
  });
4598
5490
  function buildFormInventoryPrompt(templateHints) {
4599
5491
  return `You are building a form inventory for an insurance document.
@@ -4622,8 +5514,8 @@ Respond with JSON only.`;
4622
5514
  }
4623
5515
 
4624
5516
  // src/prompts/coordinator/page-map.ts
4625
- import { z as z20 } from "zod";
4626
- var PageExtractorSchema = z20.enum([
5517
+ import { z as z23 } from "zod";
5518
+ var PageExtractorSchema = z23.enum([
4627
5519
  "carrier_info",
4628
5520
  "named_insured",
4629
5521
  "coverage_limits",
@@ -4638,10 +5530,10 @@ var PageExtractorSchema = z20.enum([
4638
5530
  "sections",
4639
5531
  "supplementary"
4640
5532
  ]);
4641
- var PageAssignmentSchema = z20.object({
4642
- localPageNumber: z20.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
4643
- extractorNames: z20.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
4644
- pageRole: z20.enum([
5533
+ var PageAssignmentSchema = z23.object({
5534
+ localPageNumber: z23.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
5535
+ extractorNames: z23.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
5536
+ pageRole: z23.enum([
4645
5537
  "declarations_schedule",
4646
5538
  "endorsement_schedule",
4647
5539
  "policy_form",
@@ -4650,12 +5542,12 @@ var PageAssignmentSchema = z20.object({
4650
5542
  "supplementary",
4651
5543
  "other"
4652
5544
  ]).optional().describe("Primary role of the page"),
4653
- hasScheduleValues: z20.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
4654
- confidence: z20.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
4655
- notes: z20.string().optional().describe("Short explanation of what appears on the page")
5545
+ hasScheduleValues: z23.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
5546
+ confidence: z23.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
5547
+ notes: z23.string().optional().describe("Short explanation of what appears on the page")
4656
5548
  });
4657
- var PageMapChunkSchema = z20.object({
4658
- pages: z20.array(PageAssignmentSchema)
5549
+ var PageMapChunkSchema = z23.object({
5550
+ pages: z23.array(PageAssignmentSchema)
4659
5551
  });
4660
5552
  function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
4661
5553
  const inventoryBlock = formInventoryHint ? `
@@ -4731,16 +5623,16 @@ function formatFormInventoryForPageMap(forms) {
4731
5623
  }
4732
5624
 
4733
5625
  // src/prompts/coordinator/review.ts
4734
- import { z as z21 } from "zod";
4735
- var ReviewResultSchema = z21.object({
4736
- complete: z21.boolean(),
4737
- missingFields: z21.array(z21.string()),
4738
- qualityIssues: z21.array(z21.string()).optional(),
4739
- additionalTasks: z21.array(z21.object({
4740
- extractorName: z21.string(),
4741
- startPage: z21.number(),
4742
- endPage: z21.number(),
4743
- description: z21.string()
5626
+ import { z as z24 } from "zod";
5627
+ var ReviewResultSchema = z24.object({
5628
+ complete: z24.boolean(),
5629
+ missingFields: z24.array(z24.string()),
5630
+ qualityIssues: z24.array(z24.string()).optional(),
5631
+ additionalTasks: z24.array(z24.object({
5632
+ extractorName: z24.string(),
5633
+ startPage: z24.number(),
5634
+ endPage: z24.number(),
5635
+ description: z24.string()
4744
5636
  }))
4745
5637
  });
4746
5638
  function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
@@ -4796,9 +5688,9 @@ Respond with JSON only.`;
4796
5688
  }
4797
5689
 
4798
5690
  // src/prompts/coordinator/summarize.ts
4799
- import { z as z22 } from "zod";
4800
- var SummaryResultSchema = z22.object({
4801
- summary: z22.string().describe("A 1-3 sentence overview of this insurance document")
5691
+ import { z as z25 } from "zod";
5692
+ var SummaryResultSchema = z25.object({
5693
+ summary: z25.string().describe("A 1-3 sentence overview of this insurance document")
4802
5694
  });
4803
5695
  function buildSummaryPrompt(doc) {
4804
5696
  const snapshot = {
@@ -4841,23 +5733,23 @@ Return JSON only with a "summary" field.`;
4841
5733
  }
4842
5734
 
4843
5735
  // src/prompts/extractors/carrier-info.ts
4844
- import { z as z23 } from "zod";
4845
- var CarrierInfoSchema = z23.object({
4846
- carrierName: z23.string().describe("Primary insurance company name for display"),
4847
- carrierLegalName: z23.string().optional().describe("Legal entity name of insurer"),
4848
- naicNumber: z23.string().optional().describe("NAIC company code"),
4849
- amBestRating: z23.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
4850
- admittedStatus: z23.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
4851
- mga: z23.string().optional().describe("Managing General Agent or Program Administrator name"),
4852
- underwriter: z23.string().optional().describe("Named individual underwriter"),
4853
- brokerAgency: z23.string().optional().describe("Broker or producer agency name"),
4854
- brokerContactName: z23.string().optional().describe("Broker or producer contact person name"),
4855
- brokerLicenseNumber: z23.string().optional().describe("Broker or producer license number"),
4856
- policyNumber: z23.string().optional().describe("Policy or quote reference number"),
4857
- effectiveDate: z23.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
4858
- expirationDate: z23.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
4859
- quoteNumber: z23.string().optional().describe("Quote or proposal reference number"),
4860
- proposedEffectiveDate: z23.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
5736
+ import { z as z26 } from "zod";
5737
+ var CarrierInfoSchema = z26.object({
5738
+ carrierName: z26.string().describe("Primary insurance company name for display"),
5739
+ carrierLegalName: z26.string().optional().describe("Legal entity name of insurer"),
5740
+ naicNumber: z26.string().optional().describe("NAIC company code"),
5741
+ amBestRating: z26.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
5742
+ admittedStatus: z26.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
5743
+ mga: z26.string().optional().describe("Managing General Agent or Program Administrator name"),
5744
+ underwriter: z26.string().optional().describe("Named individual underwriter"),
5745
+ brokerAgency: z26.string().optional().describe("Broker or producer agency name"),
5746
+ brokerContactName: z26.string().optional().describe("Broker or producer contact person name"),
5747
+ brokerLicenseNumber: z26.string().optional().describe("Broker or producer license number"),
5748
+ policyNumber: z26.string().optional().describe("Policy or quote reference number"),
5749
+ effectiveDate: z26.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
5750
+ expirationDate: z26.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
5751
+ quoteNumber: z26.string().optional().describe("Quote or proposal reference number"),
5752
+ proposedEffectiveDate: z26.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
4861
5753
  });
4862
5754
  function buildCarrierInfoPrompt() {
4863
5755
  return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
@@ -4880,18 +5772,18 @@ Return JSON only.`;
4880
5772
  }
4881
5773
 
4882
5774
  // src/prompts/extractors/named-insured.ts
4883
- import { z as z24 } from "zod";
4884
- var AddressSchema2 = z24.object({
4885
- street1: z24.string(),
4886
- city: z24.string(),
4887
- state: z24.string(),
4888
- zip: z24.string()
4889
- });
4890
- var NamedInsuredSchema2 = z24.object({
4891
- insuredName: z24.string().describe("Name of primary named insured"),
4892
- insuredDba: z24.string().optional().describe("Doing-business-as name"),
5775
+ import { z as z27 } from "zod";
5776
+ var AddressSchema2 = z27.object({
5777
+ street1: z27.string(),
5778
+ city: z27.string(),
5779
+ state: z27.string(),
5780
+ zip: z27.string()
5781
+ });
5782
+ var NamedInsuredSchema2 = z27.object({
5783
+ insuredName: z27.string().describe("Name of primary named insured"),
5784
+ insuredDba: z27.string().optional().describe("Doing-business-as name"),
4893
5785
  insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
4894
- insuredEntityType: z24.enum([
5786
+ insuredEntityType: z27.enum([
4895
5787
  "corporation",
4896
5788
  "llc",
4897
5789
  "partnership",
@@ -4904,25 +5796,25 @@ var NamedInsuredSchema2 = z24.object({
4904
5796
  "married_couple",
4905
5797
  "other"
4906
5798
  ]).optional().describe("Legal entity type of the insured"),
4907
- insuredFein: z24.string().optional().describe("Federal Employer Identification Number"),
4908
- insuredSicCode: z24.string().optional().describe("SIC code"),
4909
- insuredNaicsCode: z24.string().optional().describe("NAICS code"),
4910
- additionalNamedInsureds: z24.array(
4911
- z24.object({
4912
- name: z24.string(),
4913
- relationship: z24.string().optional().describe("e.g. subsidiary, affiliate"),
5799
+ insuredFein: z27.string().optional().describe("Federal Employer Identification Number"),
5800
+ insuredSicCode: z27.string().optional().describe("SIC code"),
5801
+ insuredNaicsCode: z27.string().optional().describe("NAICS code"),
5802
+ additionalNamedInsureds: z27.array(
5803
+ z27.object({
5804
+ name: z27.string(),
5805
+ relationship: z27.string().optional().describe("e.g. subsidiary, affiliate"),
4914
5806
  address: AddressSchema2.optional()
4915
5807
  })
4916
5808
  ).optional().describe("Additional named insureds listed on the policy"),
4917
- lossPayees: z24.array(
4918
- z24.object({
4919
- name: z24.string(),
5809
+ lossPayees: z27.array(
5810
+ z27.object({
5811
+ name: z27.string(),
4920
5812
  address: AddressSchema2.optional()
4921
5813
  })
4922
5814
  ).optional().describe("Loss payees listed on the policy"),
4923
- mortgageHolders: z24.array(
4924
- z24.object({
4925
- name: z24.string(),
5815
+ mortgageHolders: z27.array(
5816
+ z27.object({
5817
+ name: z27.string(),
4926
5818
  address: AddressSchema2.optional()
4927
5819
  })
4928
5820
  ).optional().describe("Mortgage holders / lienholders listed on the policy")
@@ -4945,14 +5837,14 @@ Return JSON only.`;
4945
5837
  }
4946
5838
 
4947
5839
  // src/prompts/extractors/coverage-limits.ts
4948
- import { z as z25 } from "zod";
5840
+ import { z as z28 } from "zod";
4949
5841
  var ExtractorCoverageSchema = CoverageSchema.extend({
4950
- coverageCode: z25.string().optional().describe("Coverage code or class code")
5842
+ coverageCode: z28.string().optional().describe("Coverage code or class code")
4951
5843
  });
4952
- var CoverageLimitsSchema = z25.object({
4953
- coverages: z25.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
4954
- coverageForm: z25.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
4955
- retroactiveDate: z25.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
5844
+ var CoverageLimitsSchema = z28.object({
5845
+ coverages: z28.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
5846
+ coverageForm: z28.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
5847
+ retroactiveDate: z28.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
4956
5848
  });
4957
5849
  function buildCoverageLimitsPrompt() {
4958
5850
  return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
@@ -4991,14 +5883,14 @@ Return JSON only.`;
4991
5883
  }
4992
5884
 
4993
5885
  // src/prompts/extractors/endorsements.ts
4994
- import { z as z26 } from "zod";
4995
- var EndorsementsSchema = z26.object({
4996
- endorsements: z26.array(
4997
- z26.object({
4998
- formNumber: z26.string().describe("Form number, e.g. 'CG 21 47'"),
4999
- editionDate: z26.string().optional().describe("Edition date, e.g. '12 07'"),
5000
- title: z26.string().describe("Endorsement title"),
5001
- endorsementType: z26.enum([
5886
+ import { z as z29 } from "zod";
5887
+ var EndorsementsSchema = z29.object({
5888
+ endorsements: z29.array(
5889
+ z29.object({
5890
+ formNumber: z29.string().describe("Form number, e.g. 'CG 21 47'"),
5891
+ editionDate: z29.string().optional().describe("Edition date, e.g. '12 07'"),
5892
+ title: z29.string().describe("Endorsement title"),
5893
+ endorsementType: z29.enum([
5002
5894
  "additional_insured",
5003
5895
  "waiver_of_subrogation",
5004
5896
  "primary_noncontributory",
@@ -5018,12 +5910,12 @@ var EndorsementsSchema = z26.object({
5018
5910
  "territorial_extension",
5019
5911
  "other"
5020
5912
  ]).describe("Endorsement type classification"),
5021
- effectiveDate: z26.string().optional().describe("Endorsement effective date"),
5022
- affectedCoverageParts: z26.array(z26.string()).optional().describe("Coverage parts affected by this endorsement"),
5023
- namedParties: z26.array(
5024
- z26.object({
5025
- name: z26.string().describe("Party name"),
5026
- role: z26.enum([
5913
+ effectiveDate: z29.string().optional().describe("Endorsement effective date"),
5914
+ affectedCoverageParts: z29.array(z29.string()).optional().describe("Coverage parts affected by this endorsement"),
5915
+ namedParties: z29.array(
5916
+ z29.object({
5917
+ name: z29.string().describe("Party name"),
5918
+ role: z29.enum([
5027
5919
  "additional_insured",
5028
5920
  "loss_payee",
5029
5921
  "mortgage_holder",
@@ -5032,15 +5924,15 @@ var EndorsementsSchema = z26.object({
5032
5924
  "designated_person",
5033
5925
  "other"
5034
5926
  ]).describe("Party role"),
5035
- relationship: z26.string().optional().describe("Relationship to insured"),
5036
- scope: z26.string().optional().describe("Scope of coverage for this party")
5927
+ relationship: z29.string().optional().describe("Relationship to insured"),
5928
+ scope: z29.string().optional().describe("Scope of coverage for this party")
5037
5929
  })
5038
5930
  ).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
5039
- keyTerms: z26.array(z26.string()).optional().describe("Key terms or notable provisions in the endorsement"),
5040
- premiumImpact: z26.string().optional().describe("Additional premium or credit"),
5041
- content: z26.string().describe("Full verbatim text of the endorsement"),
5042
- pageStart: z26.number().describe("Starting page number of this endorsement"),
5043
- pageEnd: z26.number().optional().describe("Ending page number of this endorsement")
5931
+ keyTerms: z29.array(z29.string()).optional().describe("Key terms or notable provisions in the endorsement"),
5932
+ premiumImpact: z29.string().optional().describe("Additional premium or credit"),
5933
+ content: z29.string().describe("Full verbatim text of the endorsement"),
5934
+ pageStart: z29.number().describe("Starting page number of this endorsement"),
5935
+ pageEnd: z29.number().optional().describe("Ending page number of this endorsement")
5044
5936
  })
5045
5937
  ).describe("All endorsements found in the document")
5046
5938
  });
@@ -5071,20 +5963,20 @@ Return JSON only.`;
5071
5963
  }
5072
5964
 
5073
5965
  // src/prompts/extractors/exclusions.ts
5074
- import { z as z27 } from "zod";
5075
- var ExclusionsSchema = z27.object({
5076
- exclusions: z27.array(
5077
- z27.object({
5078
- name: z27.string().describe("Exclusion title or short description"),
5079
- formNumber: z27.string().optional().describe("Form number if part of a named endorsement"),
5080
- excludedPerils: z27.array(z27.string()).optional().describe("Specific perils excluded"),
5081
- isAbsolute: z27.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
5082
- exceptions: z27.array(z27.string()).optional().describe("Exceptions to the exclusion, if any"),
5083
- buybackAvailable: z27.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
5084
- buybackEndorsement: z27.string().optional().describe("Form number of the buyback endorsement if available"),
5085
- appliesTo: z27.array(z27.string()).optional().describe("Coverage types this exclusion applies to"),
5086
- content: z27.string().describe("Full verbatim exclusion text"),
5087
- pageNumber: z27.number().optional().describe("Page number where exclusion appears")
5966
+ import { z as z30 } from "zod";
5967
+ var ExclusionsSchema = z30.object({
5968
+ exclusions: z30.array(
5969
+ z30.object({
5970
+ name: z30.string().describe("Exclusion title or short description"),
5971
+ formNumber: z30.string().optional().describe("Form number if part of a named endorsement"),
5972
+ excludedPerils: z30.array(z30.string()).optional().describe("Specific perils excluded"),
5973
+ isAbsolute: z30.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
5974
+ exceptions: z30.array(z30.string()).optional().describe("Exceptions to the exclusion, if any"),
5975
+ buybackAvailable: z30.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
5976
+ buybackEndorsement: z30.string().optional().describe("Form number of the buyback endorsement if available"),
5977
+ appliesTo: z30.array(z30.string()).optional().describe("Coverage types this exclusion applies to"),
5978
+ content: z30.string().describe("Full verbatim exclusion text"),
5979
+ pageNumber: z30.number().optional().describe("Page number where exclusion appears")
5088
5980
  })
5089
5981
  ).describe("All exclusions found in the document")
5090
5982
  });
@@ -5120,12 +6012,12 @@ Return JSON only.`;
5120
6012
  }
5121
6013
 
5122
6014
  // src/prompts/extractors/conditions.ts
5123
- import { z as z28 } from "zod";
5124
- var ConditionsSchema = z28.object({
5125
- conditions: z28.array(
5126
- z28.object({
5127
- name: z28.string().describe("Condition title"),
5128
- conditionType: z28.enum([
6015
+ import { z as z31 } from "zod";
6016
+ var ConditionsSchema = z31.object({
6017
+ conditions: z31.array(
6018
+ z31.object({
6019
+ name: z31.string().describe("Condition title"),
6020
+ conditionType: z31.enum([
5129
6021
  "duties_after_loss",
5130
6022
  "notice_requirements",
5131
6023
  "other_insurance",
@@ -5144,14 +6036,14 @@ var ConditionsSchema = z28.object({
5144
6036
  "separation_of_insureds",
5145
6037
  "other"
5146
6038
  ]).describe("Condition category"),
5147
- content: z28.string().describe("Full verbatim condition text"),
5148
- keyValues: z28.array(
5149
- z28.object({
5150
- key: z28.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
5151
- value: z28.string().describe("Value (e.g. '30 days', '2 years')")
6039
+ content: z31.string().describe("Full verbatim condition text"),
6040
+ keyValues: z31.array(
6041
+ z31.object({
6042
+ key: z31.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
6043
+ value: z31.string().describe("Value (e.g. '30 days', '2 years')")
5152
6044
  })
5153
6045
  ).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
5154
- pageNumber: z28.number().optional().describe("Page number where condition appears")
6046
+ pageNumber: z31.number().optional().describe("Page number where condition appears")
5155
6047
  })
5156
6048
  ).describe("All policy conditions found in the document")
5157
6049
  });
@@ -5189,28 +6081,28 @@ Return JSON only.`;
5189
6081
  }
5190
6082
 
5191
6083
  // src/prompts/extractors/premium-breakdown.ts
5192
- import { z as z29 } from "zod";
5193
- var PremiumBreakdownSchema = z29.object({
5194
- premium: z29.string().optional().describe("Total premium amount, e.g. '$5,000'"),
5195
- totalCost: z29.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
5196
- premiumBreakdown: z29.array(
5197
- z29.object({
5198
- line: z29.string().describe("Coverage line name"),
5199
- amount: z29.string().describe("Premium amount for this line")
6084
+ import { z as z32 } from "zod";
6085
+ var PremiumBreakdownSchema = z32.object({
6086
+ premium: z32.string().optional().describe("Total premium amount, e.g. '$5,000'"),
6087
+ totalCost: z32.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
6088
+ premiumBreakdown: z32.array(
6089
+ z32.object({
6090
+ line: z32.string().describe("Coverage line name"),
6091
+ amount: z32.string().describe("Premium amount for this line")
5200
6092
  })
5201
6093
  ).optional().describe("Per-coverage-line premium breakdown"),
5202
- taxesAndFees: z29.array(
5203
- z29.object({
5204
- name: z29.string().describe("Fee or tax name"),
5205
- amount: z29.string().describe("Dollar amount"),
5206
- type: z29.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
6094
+ taxesAndFees: z32.array(
6095
+ z32.object({
6096
+ name: z32.string().describe("Fee or tax name"),
6097
+ amount: z32.string().describe("Dollar amount"),
6098
+ type: z32.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
5207
6099
  })
5208
6100
  ).optional().describe("Taxes, fees, surcharges, and assessments"),
5209
- minimumPremium: z29.string().optional().describe("Minimum premium if stated"),
5210
- depositPremium: z29.string().optional().describe("Deposit premium if stated"),
5211
- paymentPlan: z29.string().optional().describe("Payment plan description"),
5212
- auditType: z29.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
5213
- ratingBasis: z29.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
6101
+ minimumPremium: z32.string().optional().describe("Minimum premium if stated"),
6102
+ depositPremium: z32.string().optional().describe("Deposit premium if stated"),
6103
+ paymentPlan: z32.string().optional().describe("Payment plan description"),
6104
+ auditType: z32.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
6105
+ ratingBasis: z32.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
5214
6106
  });
5215
6107
  function buildPremiumBreakdownPrompt() {
5216
6108
  return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
@@ -5230,14 +6122,14 @@ Return JSON only.`;
5230
6122
  }
5231
6123
 
5232
6124
  // src/prompts/extractors/declarations.ts
5233
- import { z as z30 } from "zod";
5234
- var DeclarationsFieldSchema = z30.object({
5235
- field: z30.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
5236
- value: z30.string().describe("Extracted value exactly as it appears in the document"),
5237
- section: z30.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
6125
+ import { z as z33 } from "zod";
6126
+ var DeclarationsFieldSchema = z33.object({
6127
+ field: z33.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
6128
+ value: z33.string().describe("Extracted value exactly as it appears in the document"),
6129
+ section: z33.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
5238
6130
  });
5239
- var DeclarationsExtractSchema = z30.object({
5240
- fields: z30.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
6131
+ var DeclarationsExtractSchema = z33.object({
6132
+ fields: z33.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
5241
6133
  });
5242
6134
  function buildDeclarationsPrompt() {
5243
6135
  return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
@@ -5277,21 +6169,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
5277
6169
  }
5278
6170
 
5279
6171
  // src/prompts/extractors/loss-history.ts
5280
- import { z as z31 } from "zod";
5281
- var LossHistorySchema = z31.object({
5282
- lossSummary: z31.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
5283
- individualClaims: z31.array(
5284
- z31.object({
5285
- date: z31.string().optional().describe("Date of loss or claim"),
5286
- type: z31.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
5287
- description: z31.string().optional().describe("Brief description of the claim"),
5288
- amountPaid: z31.string().optional().describe("Amount paid"),
5289
- amountReserved: z31.string().optional().describe("Amount reserved"),
5290
- status: z31.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
5291
- claimNumber: z31.string().optional().describe("Claim reference number")
6172
+ import { z as z34 } from "zod";
6173
+ var LossHistorySchema = z34.object({
6174
+ lossSummary: z34.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
6175
+ individualClaims: z34.array(
6176
+ z34.object({
6177
+ date: z34.string().optional().describe("Date of loss or claim"),
6178
+ type: z34.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
6179
+ description: z34.string().optional().describe("Brief description of the claim"),
6180
+ amountPaid: z34.string().optional().describe("Amount paid"),
6181
+ amountReserved: z34.string().optional().describe("Amount reserved"),
6182
+ status: z34.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
6183
+ claimNumber: z34.string().optional().describe("Claim reference number")
5292
6184
  })
5293
6185
  ).optional().describe("Individual claim records"),
5294
- experienceMod: z31.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
6186
+ experienceMod: z34.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
5295
6187
  });
5296
6188
  function buildLossHistoryPrompt() {
5297
6189
  return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
@@ -5308,18 +6200,18 @@ Return JSON only.`;
5308
6200
  }
5309
6201
 
5310
6202
  // src/prompts/extractors/sections.ts
5311
- import { z as z32 } from "zod";
5312
- var SubsectionSchema2 = z32.object({
5313
- title: z32.string().describe("Subsection title"),
5314
- sectionNumber: z32.string().optional().describe("Subsection number"),
5315
- pageNumber: z32.number().optional().describe("Page number"),
5316
- content: z32.string().describe("Full verbatim text")
5317
- });
5318
- var SectionsSchema = z32.object({
5319
- sections: z32.array(
5320
- z32.object({
5321
- title: z32.string().describe("Section title"),
5322
- type: z32.enum([
6203
+ import { z as z35 } from "zod";
6204
+ var SubsectionSchema2 = z35.object({
6205
+ title: z35.string().describe("Subsection title"),
6206
+ sectionNumber: z35.string().optional().describe("Subsection number"),
6207
+ pageNumber: z35.number().optional().describe("Page number"),
6208
+ content: z35.string().describe("Full verbatim text")
6209
+ });
6210
+ var SectionsSchema = z35.object({
6211
+ sections: z35.array(
6212
+ z35.object({
6213
+ title: z35.string().describe("Section title"),
6214
+ type: z35.enum([
5323
6215
  "declarations",
5324
6216
  "insuring_agreement",
5325
6217
  "policy_form",
@@ -5334,10 +6226,10 @@ var SectionsSchema = z32.object({
5334
6226
  "regulatory",
5335
6227
  "other"
5336
6228
  ]).describe("Section type classification"),
5337
- content: z32.string().describe("Full verbatim text of the section"),
5338
- pageStart: z32.number().describe("Starting page number"),
5339
- pageEnd: z32.number().optional().describe("Ending page number"),
5340
- subsections: z32.array(SubsectionSchema2).optional().describe("Subsections within this section")
6229
+ content: z35.string().describe("Full verbatim text of the section"),
6230
+ pageStart: z35.number().describe("Starting page number"),
6231
+ pageEnd: z35.number().optional().describe("Ending page number"),
6232
+ subsections: z35.array(SubsectionSchema2).optional().describe("Subsections within this section")
5341
6233
  })
5342
6234
  ).describe("All document sections")
5343
6235
  });
@@ -5368,27 +6260,27 @@ Return JSON only.`;
5368
6260
  }
5369
6261
 
5370
6262
  // src/prompts/extractors/supplementary.ts
5371
- import { z as z33 } from "zod";
5372
- var ContactSchema2 = z33.object({
5373
- name: z33.string().optional().describe("Organization or person name"),
5374
- phone: z33.string().optional().describe("Phone number"),
5375
- email: z33.string().optional().describe("Email address"),
5376
- address: z33.string().optional().describe("Mailing address"),
5377
- type: z33.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
5378
- });
5379
- var AuxiliaryFactSchema2 = z33.object({
5380
- key: z33.string().describe("Normalized machine-readable fact key, e.g. 'policyholder_age' or 'insured_name'"),
5381
- value: z33.string().describe("Concrete extracted fact value"),
5382
- subject: z33.string().optional().describe("Person, entity, vehicle, property, or schedule item this fact belongs to"),
5383
- context: z33.string().optional().describe("Short disambiguating context, such as 'Driver Schedule' or 'Named Insured'")
5384
- });
5385
- var SupplementarySchema = z33.object({
5386
- regulatoryContacts: z33.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
5387
- claimsContacts: z33.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
5388
- thirdPartyAdministrators: z33.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
5389
- cancellationNoticeDays: z33.number().optional().describe("Required notice period for cancellation in days"),
5390
- nonrenewalNoticeDays: z33.number().optional().describe("Required notice period for nonrenewal in days"),
5391
- auxiliaryFacts: z33.array(AuxiliaryFactSchema2).optional().describe("Additional retrieval-only facts that do not fit the strict primary schema")
6263
+ import { z as z36 } from "zod";
6264
+ var ContactSchema2 = z36.object({
6265
+ name: z36.string().optional().describe("Organization or person name"),
6266
+ phone: z36.string().optional().describe("Phone number"),
6267
+ email: z36.string().optional().describe("Email address"),
6268
+ address: z36.string().optional().describe("Mailing address"),
6269
+ type: z36.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
6270
+ });
6271
+ var AuxiliaryFactSchema2 = z36.object({
6272
+ key: z36.string().describe("Normalized machine-readable fact key, e.g. 'policyholder_age' or 'insured_name'"),
6273
+ value: z36.string().describe("Concrete extracted fact value"),
6274
+ subject: z36.string().optional().describe("Person, entity, vehicle, property, or schedule item this fact belongs to"),
6275
+ context: z36.string().optional().describe("Short disambiguating context, such as 'Driver Schedule' or 'Named Insured'")
6276
+ });
6277
+ var SupplementarySchema = z36.object({
6278
+ regulatoryContacts: z36.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
6279
+ claimsContacts: z36.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
6280
+ thirdPartyAdministrators: z36.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
6281
+ cancellationNoticeDays: z36.number().optional().describe("Required notice period for cancellation in days"),
6282
+ nonrenewalNoticeDays: z36.number().optional().describe("Required notice period for nonrenewal in days"),
6283
+ auxiliaryFacts: z36.array(AuxiliaryFactSchema2).optional().describe("Additional retrieval-only facts that do not fit the strict primary schema")
5392
6284
  });
5393
6285
  function buildSupplementaryPrompt(alreadyExtractedSummary) {
5394
6286
  const exclusionBlock = alreadyExtractedSummary ? `
@@ -5426,17 +6318,17 @@ Return JSON only.`;
5426
6318
  }
5427
6319
 
5428
6320
  // src/prompts/extractors/definitions.ts
5429
- import { z as z34 } from "zod";
5430
- var DefinitionsSchema = z34.object({
5431
- definitions: z34.array(
5432
- z34.object({
5433
- term: z34.string().describe("Defined term exactly as shown in the document"),
5434
- definition: z34.string().describe("Full verbatim definition text, preserving original wording"),
5435
- pageNumber: z34.number().optional().describe("Original document page number"),
5436
- formNumber: z34.string().optional().describe("Form number where this definition appears"),
5437
- formTitle: z34.string().optional().describe("Form title where this definition appears"),
5438
- sectionRef: z34.string().optional().describe("Definition section heading or subsection reference"),
5439
- originalContent: z34.string().optional().describe("Short verbatim source snippet containing the term and definition")
6321
+ import { z as z37 } from "zod";
6322
+ var DefinitionsSchema = z37.object({
6323
+ definitions: z37.array(
6324
+ z37.object({
6325
+ term: z37.string().describe("Defined term exactly as shown in the document"),
6326
+ definition: z37.string().describe("Full verbatim definition text, preserving original wording"),
6327
+ pageNumber: z37.number().optional().describe("Original document page number"),
6328
+ formNumber: z37.string().optional().describe("Form number where this definition appears"),
6329
+ formTitle: z37.string().optional().describe("Form title where this definition appears"),
6330
+ sectionRef: z37.string().optional().describe("Definition section heading or subsection reference"),
6331
+ originalContent: z37.string().optional().describe("Short verbatim source snippet containing the term and definition")
5440
6332
  })
5441
6333
  ).describe("All substantive insurance definitions found in the document")
5442
6334
  });
@@ -5470,22 +6362,22 @@ Return JSON only.`;
5470
6362
  }
5471
6363
 
5472
6364
  // src/prompts/extractors/covered-reasons.ts
5473
- import { z as z35 } from "zod";
5474
- var CoveredReasonsSchema = z35.object({
5475
- coveredReasons: z35.array(
5476
- z35.object({
5477
- coverageName: z35.string().describe("Coverage, coverage part, or form this covered reason belongs to"),
5478
- reasonNumber: z35.string().optional().describe("Source number or letter for the covered reason, if shown"),
5479
- title: z35.string().optional().describe("Covered reason title, peril, cause of loss, trigger, or short name"),
5480
- content: z35.string().describe("Full verbatim covered-reason or insuring-agreement text"),
5481
- conditions: z35.array(z35.string()).optional().describe("Conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason"),
5482
- exceptions: z35.array(z35.string()).optional().describe("Exceptions or limitations attached to this covered reason"),
5483
- appliesTo: z35.array(z35.string()).optional().describe("Covered property, persons, autos, locations, operations, or coverage parts this reason applies to"),
5484
- pageNumber: z35.number().optional().describe("Original document page number"),
5485
- formNumber: z35.string().optional().describe("Form number where this covered reason appears"),
5486
- formTitle: z35.string().optional().describe("Form title where this covered reason appears"),
5487
- sectionRef: z35.string().optional().describe("Section heading where this covered reason appears"),
5488
- originalContent: z35.string().optional().describe("Short verbatim source snippet used for this covered reason")
6365
+ import { z as z38 } from "zod";
6366
+ var CoveredReasonsSchema = z38.object({
6367
+ coveredReasons: z38.array(
6368
+ z38.object({
6369
+ coverageName: z38.string().describe("Coverage, coverage part, or form this covered reason belongs to"),
6370
+ reasonNumber: z38.string().optional().describe("Source number or letter for the covered reason, if shown"),
6371
+ title: z38.string().optional().describe("Covered reason title, peril, cause of loss, trigger, or short name"),
6372
+ content: z38.string().describe("Full verbatim covered-reason or insuring-agreement text"),
6373
+ conditions: z38.array(z38.string()).optional().describe("Conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason"),
6374
+ exceptions: z38.array(z38.string()).optional().describe("Exceptions or limitations attached to this covered reason"),
6375
+ appliesTo: z38.array(z38.string()).optional().describe("Covered property, persons, autos, locations, operations, or coverage parts this reason applies to"),
6376
+ pageNumber: z38.number().optional().describe("Original document page number"),
6377
+ formNumber: z38.string().optional().describe("Form number where this covered reason appears"),
6378
+ formTitle: z38.string().optional().describe("Form title where this covered reason appears"),
6379
+ sectionRef: z38.string().optional().describe("Section heading where this covered reason appears"),
6380
+ originalContent: z38.string().optional().describe("Short verbatim source snippet used for this covered reason")
5489
6381
  })
5490
6382
  ).describe("Covered causes, perils, triggers, or reasons that affirmatively grant coverage")
5491
6383
  });
@@ -5618,21 +6510,21 @@ function formatExtractorCatalogForPrompt() {
5618
6510
  }
5619
6511
 
5620
6512
  // src/extraction/resolve-referential.ts
5621
- import { z as z37 } from "zod";
6513
+ import { z as z40 } from "zod";
5622
6514
 
5623
6515
  // src/prompts/extractors/referential-lookup.ts
5624
- import { z as z36 } from "zod";
5625
- var ReferentialLookupSchema = z36.object({
5626
- resolvedCoverages: z36.array(
5627
- z36.object({
5628
- coverageName: z36.string().describe("The coverage name that was referenced"),
5629
- resolvedLimit: z36.string().optional().describe("The concrete limit value found, if any"),
6516
+ import { z as z39 } from "zod";
6517
+ var ReferentialLookupSchema = z39.object({
6518
+ resolvedCoverages: z39.array(
6519
+ z39.object({
6520
+ coverageName: z39.string().describe("The coverage name that was referenced"),
6521
+ resolvedLimit: z39.string().optional().describe("The concrete limit value found, if any"),
5630
6522
  resolvedLimitValueType: CoverageValueTypeSchema.optional(),
5631
- resolvedDeductible: z36.string().optional().describe("The concrete deductible value found, if any"),
6523
+ resolvedDeductible: z39.string().optional().describe("The concrete deductible value found, if any"),
5632
6524
  resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
5633
- pageNumber: z36.number().optional().describe("Page where the resolved value was found"),
5634
- originalContent: z36.string().optional().describe("Verbatim source text for the resolved value"),
5635
- confidence: z36.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
6525
+ pageNumber: z39.number().optional().describe("Page where the resolved value was found"),
6526
+ originalContent: z39.string().optional().describe("Verbatim source text for the resolved value"),
6527
+ confidence: z39.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5636
6528
  })
5637
6529
  )
5638
6530
  });
@@ -5680,11 +6572,11 @@ function looksCoveredReasonSection(section) {
5680
6572
  }
5681
6573
 
5682
6574
  // src/extraction/referential-workflow.ts
5683
- function normalizeText(value) {
6575
+ function normalizeText2(value) {
5684
6576
  return typeof value === "string" ? value.trim().toLowerCase() : "";
5685
6577
  }
5686
6578
  function containsTarget(value, target) {
5687
- const normalizedValue = normalizeText(value);
6579
+ const normalizedValue = normalizeText2(value);
5688
6580
  return Boolean(normalizedValue && target && normalizedValue.includes(target));
5689
6581
  }
5690
6582
  function pageRangeFrom(startPage, endPage) {
@@ -5727,8 +6619,8 @@ function findLocalReferentialPages(params) {
5727
6619
  }
5728
6620
  function findDeclarationsSchedulePages(parsedTarget, formInventory) {
5729
6621
  for (const form of formInventory) {
5730
- const formType = normalizeText(form.formType);
5731
- const title = normalizeText(form.title);
6622
+ const formType = normalizeText2(form.formType);
6623
+ const title = normalizeText2(form.title);
5732
6624
  const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
5733
6625
  const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
5734
6626
  const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
@@ -5741,8 +6633,8 @@ function findDeclarationsSchedulePages(parsedTarget, formInventory) {
5741
6633
  }
5742
6634
  function findSectionPages(parsedTarget, sections) {
5743
6635
  for (const section of sections) {
5744
- const title = normalizeText(section.title);
5745
- const type = normalizeText(section.type);
6636
+ const title = normalizeText2(section.title);
6637
+ const type = normalizeText2(section.type);
5746
6638
  const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
5747
6639
  if (matchesKind) {
5748
6640
  const range = pageRangeFrom(section.pageStart, section.pageEnd);
@@ -5797,9 +6689,9 @@ function parseReferenceTarget(text) {
5797
6689
  if (/if applicable/i.test(normalized)) return void 0;
5798
6690
  return void 0;
5799
6691
  }
5800
- var PageLocationSchema = z37.object({
5801
- startPage: z37.number(),
5802
- endPage: z37.number()
6692
+ var PageLocationSchema = z40.object({
6693
+ startPage: z40.number(),
6694
+ endPage: z40.number()
5803
6695
  });
5804
6696
  async function findReferencedPages(params) {
5805
6697
  const {
@@ -5811,6 +6703,8 @@ async function findReferencedPages(params) {
5811
6703
  generateObject,
5812
6704
  providerOptions,
5813
6705
  trackUsage,
6706
+ modelCapabilities,
6707
+ modelBudgetConstraints,
5814
6708
  log
5815
6709
  } = params;
5816
6710
  const localPageRange = findLocalReferentialPages({
@@ -5837,6 +6731,12 @@ async function findReferencedPages(params) {
5837
6731
  return void 0;
5838
6732
  }
5839
6733
  try {
6734
+ const budget = resolveModelBudget({
6735
+ taskKind: "extraction_referential_lookup",
6736
+ hintTokens: 256,
6737
+ modelCapabilities,
6738
+ constraint: modelBudgetConstraints?.extraction_referential_lookup
6739
+ });
5840
6740
  const result = await safeGenerateObject(
5841
6741
  generateObject,
5842
6742
  {
@@ -5850,7 +6750,7 @@ If you cannot find the section, return startPage: 0 and endPage: 0.
5850
6750
 
5851
6751
  Return JSON only.`,
5852
6752
  schema: PageLocationSchema,
5853
- maxTokens: 256,
6753
+ maxTokens: budget.maxTokens,
5854
6754
  providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5855
6755
  },
5856
6756
  {
@@ -5885,6 +6785,8 @@ async function resolveReferentialCoverages(params) {
5885
6785
  convertPdfToImages,
5886
6786
  concurrency = 2,
5887
6787
  providerOptions,
6788
+ modelCapabilities,
6789
+ modelBudgetConstraints,
5888
6790
  log,
5889
6791
  onProgress
5890
6792
  } = params;
@@ -5947,6 +6849,8 @@ async function resolveReferentialCoverages(params) {
5947
6849
  generateObject,
5948
6850
  providerOptions,
5949
6851
  trackUsage,
6852
+ modelCapabilities,
6853
+ modelBudgetConstraints,
5950
6854
  log
5951
6855
  });
5952
6856
  if (!pageRange) {
@@ -5973,6 +6877,12 @@ async function resolveReferentialCoverages(params) {
5973
6877
  sectionRef: coverage.sectionRef ? String(coverage.sectionRef) : void 0
5974
6878
  }));
5975
6879
  try {
6880
+ const budget = resolveModelBudget({
6881
+ taskKind: "extraction_referential_lookup",
6882
+ hintTokens: 4096,
6883
+ modelCapabilities,
6884
+ constraint: modelBudgetConstraints?.extraction_referential_lookup
6885
+ });
5976
6886
  const result = await runExtractor({
5977
6887
  name: "referential_lookup",
5978
6888
  prompt: buildReferentialLookupPrompt(promptCoverages),
@@ -5982,7 +6892,7 @@ async function resolveReferentialCoverages(params) {
5982
6892
  endPage: pageRange.endPage,
5983
6893
  generateObject,
5984
6894
  convertPdfToImages,
5985
- maxTokens: 4096,
6895
+ maxTokens: budget.maxTokens,
5986
6896
  providerOptions
5987
6897
  });
5988
6898
  trackUsage(result.usage);
@@ -6072,7 +6982,9 @@ async function runFocusedExtractorWithFallback(params) {
6072
6982
  generateObject,
6073
6983
  convertPdfToImages,
6074
6984
  providerOptions,
6985
+ pageRangeCache,
6075
6986
  trackUsage,
6987
+ resolveBudget,
6076
6988
  log
6077
6989
  } = params;
6078
6990
  const ext = getExtractor(task.extractorName);
@@ -6081,6 +6993,9 @@ async function runFocusedExtractorWithFallback(params) {
6081
6993
  return null;
6082
6994
  }
6083
6995
  try {
6996
+ const hintTokens = ext.maxTokens ?? 4096;
6997
+ const taskKind = hintTokens >= 8192 ? "extraction_long_list" : "extraction_focused";
6998
+ const budget = resolveBudget(taskKind, hintTokens);
6084
6999
  const result = await runExtractor({
6085
7000
  name: task.extractorName,
6086
7001
  prompt: ext.buildPrompt(),
@@ -6090,10 +7005,15 @@ async function runFocusedExtractorWithFallback(params) {
6090
7005
  endPage: task.endPage,
6091
7006
  generateObject,
6092
7007
  convertPdfToImages,
6093
- maxTokens: ext.maxTokens ?? 4096,
6094
- providerOptions
7008
+ maxTokens: budget.maxTokens,
7009
+ providerOptions,
7010
+ pageRangeCache
7011
+ });
7012
+ trackUsage(result.usage, {
7013
+ taskKind,
7014
+ label: task.extractorName,
7015
+ maxTokens: budget.maxTokens
6095
7016
  });
6096
- trackUsage(result.usage);
6097
7017
  if (!ext.fallback?.isEmpty(result.data)) {
6098
7018
  return result;
6099
7019
  }
@@ -6112,6 +7032,9 @@ async function runFocusedExtractorWithFallback(params) {
6112
7032
  `Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
6113
7033
  );
6114
7034
  try {
7035
+ const hintTokens = fallbackExt.maxTokens ?? 4096;
7036
+ const taskKind = hintTokens >= 8192 ? "extraction_long_list" : "extraction_focused";
7037
+ const budget = resolveBudget(taskKind, hintTokens);
6115
7038
  const fallbackResult = await runExtractor({
6116
7039
  name: ext.fallback.extractorName,
6117
7040
  prompt: fallbackExt.buildPrompt(),
@@ -6121,10 +7044,15 @@ async function runFocusedExtractorWithFallback(params) {
6121
7044
  endPage: task.endPage,
6122
7045
  generateObject,
6123
7046
  convertPdfToImages,
6124
- maxTokens: fallbackExt.maxTokens ?? 4096,
6125
- providerOptions
7047
+ maxTokens: budget.maxTokens,
7048
+ providerOptions,
7049
+ pageRangeCache
7050
+ });
7051
+ trackUsage(fallbackResult.usage, {
7052
+ taskKind,
7053
+ label: ext.fallback.extractorName,
7054
+ maxTokens: budget.maxTokens
6126
7055
  });
6127
- trackUsage(fallbackResult.usage);
6128
7056
  const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
6129
7057
  return focusedData ? [
6130
7058
  fallbackResult,
@@ -6201,6 +7129,15 @@ function buildExtractionReviewReport(params) {
6201
7129
  const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
6202
7130
  const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
6203
7131
  const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
7132
+ if (params.sourceSpansAvailable) {
7133
+ addMissingSourceGroundingIssues(deterministicIssues, "coverage_limits", "coverages", coverages, "name");
7134
+ addMissingSourceGroundingIssues(deterministicIssues, "endorsements", "endorsements", endorsements, "title");
7135
+ addMissingSourceGroundingIssues(deterministicIssues, "exclusions", "exclusions", exclusions, "name");
7136
+ addMissingSourceGroundingIssues(deterministicIssues, "conditions", "conditions", conditions, "name");
7137
+ addMissingSourceGroundingIssues(deterministicIssues, "sections", "sections", sections, "title");
7138
+ addMissingSourceGroundingIssues(deterministicIssues, "definitions", "definitions", definitions, "term");
7139
+ addMissingSourceGroundingIssues(deterministicIssues, "covered_reasons", "coveredReasons", coveredReasons, "name");
7140
+ }
6204
7141
  if (mappedDefinitions && definitions.length === 0) {
6205
7142
  deterministicIssues.push({
6206
7143
  code: "definitions_mapped_but_empty",
@@ -6514,6 +7451,24 @@ function buildExtractionReviewReport(params) {
6514
7451
  qualityGateStatus
6515
7452
  };
6516
7453
  }
7454
+ function addMissingSourceGroundingIssues(issues, extractorName, arrayName, records, labelKey) {
7455
+ for (const record of records) {
7456
+ if (!recordHasContent(record)) continue;
7457
+ if (Array.isArray(record.sourceSpanIds) && record.sourceSpanIds.length > 0) continue;
7458
+ issues.push({
7459
+ code: "record_missing_source_span",
7460
+ severity: "blocking",
7461
+ message: `${extractorName}.${arrayName} record "${String(record[labelKey] ?? record.name ?? record.title ?? "unknown")}" is missing source span grounding.`,
7462
+ extractorName,
7463
+ pageNumber: typeof record.pageNumber === "number" ? record.pageNumber : typeof record.pageStart === "number" ? record.pageStart : void 0,
7464
+ formNumber: typeof record.formNumber === "string" ? record.formNumber : void 0,
7465
+ itemName: typeof record[labelKey] === "string" ? record[labelKey] : void 0
7466
+ });
7467
+ }
7468
+ }
7469
+ function recordHasContent(record) {
7470
+ return ["name", "title", "term", "field", "coverageName", "content", "originalContent", "value", "limit", "deductible", "premium"].some((key) => typeof record[key] === "string" && record[key].trim().length > 0);
7471
+ }
6517
7472
  function toReviewRoundRecord(round, review) {
6518
7473
  return {
6519
7474
  round,
@@ -6652,6 +7607,104 @@ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory)
6652
7607
  };
6653
7608
  }
6654
7609
 
7610
+ // src/extraction/source-grounding.ts
7611
+ var ARRAY_PATHS = [
7612
+ { memoryKey: "coverage_limits", arrayKeys: ["coverages"] },
7613
+ { memoryKey: "endorsements", arrayKeys: ["endorsements"] },
7614
+ { memoryKey: "exclusions", arrayKeys: ["exclusions"] },
7615
+ { memoryKey: "conditions", arrayKeys: ["conditions"] },
7616
+ { memoryKey: "sections", arrayKeys: ["sections"] },
7617
+ { memoryKey: "definitions", arrayKeys: ["definitions"] },
7618
+ { memoryKey: "covered_reasons", arrayKeys: ["coveredReasons", "covered_reasons"] },
7619
+ { memoryKey: "declarations", arrayKeys: ["fields"] }
7620
+ ];
7621
+ function normalize(value) {
7622
+ return value.replace(/\s+/g, " ").trim().toLowerCase();
7623
+ }
7624
+ function textValue(record, ...keys) {
7625
+ for (const key of keys) {
7626
+ const value = record[key];
7627
+ if (typeof value === "string" && value.trim()) return value.trim();
7628
+ }
7629
+ return void 0;
7630
+ }
7631
+ function numberValue(record, ...keys) {
7632
+ for (const key of keys) {
7633
+ const value = record[key];
7634
+ if (typeof value === "number" && Number.isFinite(value)) return value;
7635
+ }
7636
+ return void 0;
7637
+ }
7638
+ function pageOverlaps(recordStart, recordEnd, span) {
7639
+ if (!recordStart && !recordEnd) return false;
7640
+ const start = recordStart ?? recordEnd;
7641
+ const end = recordEnd ?? recordStart;
7642
+ const spanStart = span.pageStart ?? span.location?.page ?? span.location?.startPage;
7643
+ const spanEnd = span.pageEnd ?? span.location?.page ?? span.location?.endPage ?? spanStart;
7644
+ if (!spanStart) return false;
7645
+ return start <= (spanEnd ?? spanStart) && end >= spanStart;
7646
+ }
7647
+ function formMatches(record, span) {
7648
+ const formNumber = textValue(record, "formNumber");
7649
+ if (!formNumber || !span.formNumber) return false;
7650
+ return normalize(formNumber) === normalize(span.formNumber);
7651
+ }
7652
+ function textMatches(record, span) {
7653
+ const spanText = normalize(span.text);
7654
+ const candidates = [
7655
+ textValue(record, "originalContent", "content", "definition", "value"),
7656
+ textValue(record, "name", "title", "term", "field", "coverageName"),
7657
+ textValue(record, "limit", "deductible", "premium")
7658
+ ].filter((value) => !!value && value.length >= 3);
7659
+ return candidates.some((candidate) => spanText.includes(normalize(candidate)));
7660
+ }
7661
+ function sourceHashFor(spans) {
7662
+ return spans.map((span) => span.textHash ?? span.hash).filter(Boolean).join(":") || void 0;
7663
+ }
7664
+ function findSourceSpansForRecord(record, sourceSpans) {
7665
+ if (sourceSpans.length === 0) return [];
7666
+ const pageStart = numberValue(record, "pageNumber", "pageStart");
7667
+ const pageEnd = numberValue(record, "pageNumber", "pageEnd");
7668
+ const scored = sourceSpans.map((span) => {
7669
+ let score = 0;
7670
+ if (pageOverlaps(pageStart, pageEnd, span)) score += 4;
7671
+ if (formMatches(record, span)) score += 3;
7672
+ if (textMatches(record, span)) score += 2;
7673
+ return { span, score };
7674
+ }).filter((item) => item.score >= 2).sort((left, right) => {
7675
+ if (right.score !== left.score) return right.score - left.score;
7676
+ return left.span.id.localeCompare(right.span.id);
7677
+ });
7678
+ return scored.slice(0, 3).map((item) => item.span);
7679
+ }
7680
+ function groundRecord(record, sourceSpans) {
7681
+ if (Array.isArray(record.sourceSpanIds) && record.sourceSpanIds.length > 0 && record.sourceTextHash) {
7682
+ return record;
7683
+ }
7684
+ const matches = findSourceSpansForRecord(record, sourceSpans);
7685
+ if (matches.length === 0) return record;
7686
+ return {
7687
+ ...record,
7688
+ sourceSpanIds: Array.isArray(record.sourceSpanIds) && record.sourceSpanIds.length > 0 ? record.sourceSpanIds : matches.map((span) => span.id),
7689
+ sourceTextHash: typeof record.sourceTextHash === "string" && record.sourceTextHash.trim() ? record.sourceTextHash : sourceHashFor(matches)
7690
+ };
7691
+ }
7692
+ function groundExtractionMemoryWithSourceSpans(memory, sourceSpans) {
7693
+ if (sourceSpans.length === 0) return;
7694
+ for (const { memoryKey, arrayKeys } of ARRAY_PATHS) {
7695
+ const payload = memory.get(memoryKey);
7696
+ if (!payload || typeof payload !== "object" || Array.isArray(payload)) continue;
7697
+ const record = payload;
7698
+ for (const arrayKey of arrayKeys) {
7699
+ const items = record[arrayKey];
7700
+ if (!Array.isArray(items)) continue;
7701
+ record[arrayKey] = items.map(
7702
+ (item) => item && typeof item === "object" && !Array.isArray(item) ? groundRecord(item, sourceSpans) : item
7703
+ );
7704
+ }
7705
+ }
7706
+ }
7707
+
6655
7708
  // src/extraction/coordinator.ts
6656
7709
  function createExtractor(config) {
6657
7710
  const {
@@ -6664,7 +7717,10 @@ function createExtractor(config) {
6664
7717
  onProgress,
6665
7718
  log,
6666
7719
  providerOptions,
7720
+ sourceStore,
6667
7721
  qualityGate = "warn",
7722
+ modelCapabilities,
7723
+ modelBudgetConstraints,
6668
7724
  onCheckpointSave
6669
7725
  } = config;
6670
7726
  const limit = pLimit(concurrency);
@@ -6673,7 +7729,20 @@ function createExtractor(config) {
6673
7729
  let modelCalls = 0;
6674
7730
  let callsWithUsage = 0;
6675
7731
  let callsMissingUsage = 0;
6676
- function trackUsage(usage) {
7732
+ let performanceReport = {
7733
+ modelCalls: [],
7734
+ totalModelCallDurationMs: 0
7735
+ };
7736
+ let activeProviderOptions = providerOptions;
7737
+ function resolveBudget(taskKind, hintTokens) {
7738
+ return resolveModelBudget({
7739
+ taskKind,
7740
+ hintTokens,
7741
+ modelCapabilities,
7742
+ constraint: modelBudgetConstraints?.[taskKind]
7743
+ });
7744
+ }
7745
+ function trackUsage(usage, report) {
6677
7746
  modelCalls += 1;
6678
7747
  if (usage) {
6679
7748
  callsWithUsage += 1;
@@ -6683,6 +7752,16 @@ function createExtractor(config) {
6683
7752
  } else {
6684
7753
  callsMissingUsage += 1;
6685
7754
  }
7755
+ if (report) {
7756
+ performanceReport.modelCalls.push({
7757
+ ...report,
7758
+ usage,
7759
+ usageReported: !!usage
7760
+ });
7761
+ if (report.durationMs) {
7762
+ performanceReport.totalModelCallDurationMs += report.durationMs;
7763
+ }
7764
+ }
6686
7765
  }
6687
7766
  function mergeMemoryResult(name, data, memory) {
6688
7767
  const existing = memory.get(name);
@@ -6765,9 +7844,10 @@ function createExtractor(config) {
6765
7844
  }
6766
7845
  return lines.length > 0 ? lines.join("\n") : "";
6767
7846
  }
6768
- async function runFocusedExtractorTask(task, pdfInput, memory) {
7847
+ async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache) {
6769
7848
  if (task.extractorName === "supplementary") {
6770
7849
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
7850
+ const budget = resolveBudget("extraction_focused", 4096);
6771
7851
  const result = await runExtractor({
6772
7852
  name: "supplementary",
6773
7853
  prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
@@ -6777,10 +7857,15 @@ function createExtractor(config) {
6777
7857
  endPage: task.endPage,
6778
7858
  generateObject,
6779
7859
  convertPdfToImages,
6780
- maxTokens: 4096,
6781
- providerOptions
7860
+ maxTokens: budget.maxTokens,
7861
+ providerOptions: activeProviderOptions,
7862
+ pageRangeCache
7863
+ });
7864
+ trackUsage(result.usage, {
7865
+ taskKind: "extraction_focused",
7866
+ label: "supplementary",
7867
+ maxTokens: budget.maxTokens
6782
7868
  });
6783
- trackUsage(result.usage);
6784
7869
  return result;
6785
7870
  }
6786
7871
  return runFocusedExtractorWithFallback({
@@ -6788,8 +7873,10 @@ function createExtractor(config) {
6788
7873
  pdfInput,
6789
7874
  generateObject,
6790
7875
  convertPdfToImages,
6791
- providerOptions,
7876
+ providerOptions: activeProviderOptions,
7877
+ pageRangeCache,
6792
7878
  trackUsage,
7879
+ resolveBudget,
6793
7880
  log
6794
7881
  });
6795
7882
  }
@@ -6810,6 +7897,19 @@ function createExtractor(config) {
6810
7897
  modelCalls = 0;
6811
7898
  callsWithUsage = 0;
6812
7899
  callsMissingUsage = 0;
7900
+ performanceReport = {
7901
+ modelCalls: [],
7902
+ totalModelCallDurationMs: 0
7903
+ };
7904
+ const sourceSpans = options?.sourceSpans ?? [];
7905
+ const sourceChunks = sourceSpans.length ? chunkSourceSpans(sourceSpans) : [];
7906
+ activeProviderOptions = sourceSpans.length ? { ...providerOptions, sourceSpans, sourceChunks } : providerOptions;
7907
+ if (sourceStore && sourceSpans.length > 0) {
7908
+ await sourceStore.addSourceSpans(sourceSpans);
7909
+ if (sourceChunks.length > 0) {
7910
+ await sourceStore.addSourceChunks(sourceChunks);
7911
+ }
7912
+ }
6813
7913
  const pipelineCtx = createPipelineContext({
6814
7914
  id,
6815
7915
  onSave: onCheckpointSave,
@@ -6823,12 +7923,21 @@ function createExtractor(config) {
6823
7923
  }
6824
7924
  }
6825
7925
  let pdfBase64Cache;
7926
+ const pageRangePdfCache = /* @__PURE__ */ new Map();
6826
7927
  async function getPdfBase64ForExtraction() {
6827
7928
  if (pdfBase64Cache === void 0) {
6828
7929
  pdfBase64Cache = await pdfInputToBase64(pdfInput);
6829
7930
  }
6830
7931
  return pdfBase64Cache;
6831
7932
  }
7933
+ async function getPageRangePdf(startPage, endPage) {
7934
+ const cacheKey = `${startPage}-${endPage}`;
7935
+ const cached = pageRangePdfCache.get(cacheKey);
7936
+ if (cached) return cached;
7937
+ const pagesPdf = await extractPageRange(await getPdfBase64ForExtraction(), startPage, endPage);
7938
+ pageRangePdfCache.set(cacheKey, pagesPdf);
7939
+ return pagesPdf;
7940
+ }
6832
7941
  let classifyResult;
6833
7942
  if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
6834
7943
  classifyResult = resumed.classifyResult;
@@ -6836,13 +7945,14 @@ function createExtractor(config) {
6836
7945
  } else {
6837
7946
  onProgress?.("Classifying document...");
6838
7947
  const pageCount2 = await getPdfPageCount(pdfInput);
7948
+ const budget = resolveBudget("extraction_classify", 512);
6839
7949
  const classifyResponse = await safeGenerateObject(
6840
7950
  generateObject,
6841
7951
  {
6842
7952
  prompt: buildClassifyPrompt(),
6843
7953
  schema: ClassifyResultSchema,
6844
- maxTokens: 512,
6845
- providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
7954
+ maxTokens: budget.maxTokens,
7955
+ providerOptions: await buildPdfProviderOptions(pdfInput, activeProviderOptions)
6846
7956
  },
6847
7957
  {
6848
7958
  fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
@@ -6851,7 +7961,11 @@ function createExtractor(config) {
6851
7961
  onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
6852
7962
  }
6853
7963
  );
6854
- trackUsage(classifyResponse.usage);
7964
+ trackUsage(classifyResponse.usage, {
7965
+ taskKind: "extraction_classify",
7966
+ label: "classify",
7967
+ maxTokens: budget.maxTokens
7968
+ });
6855
7969
  classifyResult = classifyResponse.object;
6856
7970
  if (classifyResult.confidence === 0) {
6857
7971
  await log?.(`WARNING: classify returned fallback (policyTypes: ["other"]). This usually means the generateObject callback failed \u2014 check that the document content is accessible to the model.`);
@@ -6864,7 +7978,8 @@ function createExtractor(config) {
6864
7978
  memory: Object.fromEntries(memory)
6865
7979
  });
6866
7980
  }
6867
- const { documentType, policyTypes } = classifyResult;
7981
+ const documentType = classifyResult.documentType;
7982
+ const policyTypes = classifyResult.policyTypes ?? [];
6868
7983
  const primaryType = policyTypes[0] ?? "other";
6869
7984
  const template = getTemplate(primaryType);
6870
7985
  const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfInput);
@@ -6876,13 +7991,14 @@ function createExtractor(config) {
6876
7991
  onProgress?.("Resuming from checkpoint (form inventory complete)...");
6877
7992
  } else {
6878
7993
  onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
7994
+ const budget = resolveBudget("extraction_form_inventory", 2048);
6879
7995
  const formInventoryResponse = await safeGenerateObject(
6880
7996
  generateObject,
6881
7997
  {
6882
7998
  prompt: buildFormInventoryPrompt(templateHints),
6883
7999
  schema: FormInventorySchema,
6884
- maxTokens: 2048,
6885
- providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
8000
+ maxTokens: budget.maxTokens,
8001
+ providerOptions: await buildPdfProviderOptions(pdfInput, activeProviderOptions)
6886
8002
  },
6887
8003
  {
6888
8004
  fallback: { forms: [] },
@@ -6890,7 +8006,11 @@ function createExtractor(config) {
6890
8006
  onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
6891
8007
  }
6892
8008
  );
6893
- trackUsage(formInventoryResponse.usage);
8009
+ trackUsage(formInventoryResponse.usage, {
8010
+ taskKind: "extraction_form_inventory",
8011
+ label: "form_inventory",
8012
+ maxTokens: budget.maxTokens
8013
+ });
6894
8014
  formInventory = formInventoryResponse.object;
6895
8015
  memory.set("form_inventory", formInventory);
6896
8016
  await pipelineCtx.save("form_inventory", {
@@ -6909,39 +8029,54 @@ function createExtractor(config) {
6909
8029
  onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
6910
8030
  const chunkSize = 8;
6911
8031
  const collectedAssignments = [];
6912
- const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
6913
- const extractionBase64 = await getPdfBase64ForExtraction();
6914
- for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
6915
- const endPage = Math.min(pageCount, startPage + chunkSize - 1);
6916
- const pagesPdf = await extractPageRange(extractionBase64, startPage, endPage);
6917
- const mapResponse = await safeGenerateObject(
6918
- generateObject,
6919
- {
6920
- prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
6921
- schema: PageMapChunkSchema,
6922
- maxTokens: 2048,
6923
- providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
6924
- },
6925
- {
6926
- fallback: {
6927
- pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
6928
- localPageNumber: index + 1,
6929
- extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
6930
- confidence: 0,
6931
- notes: "Fallback page assignment"
6932
- }))
6933
- },
6934
- log,
6935
- onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
6936
- }
6937
- );
6938
- trackUsage(mapResponse.usage);
6939
- for (const assignment of mapResponse.object.pages) {
6940
- collectedAssignments.push({
6941
- ...assignment,
6942
- localPageNumber: startPage + assignment.localPageNumber - 1
6943
- });
8032
+ const formInventoryHint = formInventory?.forms?.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
8033
+ const pageMapChunks = Array.from(
8034
+ { length: Math.ceil(pageCount / chunkSize) },
8035
+ (_, index) => {
8036
+ const startPage = index * chunkSize + 1;
8037
+ return { startPage, endPage: Math.min(pageCount, startPage + chunkSize - 1) };
6944
8038
  }
8039
+ );
8040
+ const pageMapResults = await Promise.all(
8041
+ pageMapChunks.map(
8042
+ ({ startPage, endPage }) => limit(async () => {
8043
+ const pagesPdf = await getPageRangePdf(startPage, endPage);
8044
+ const budget = resolveBudget("extraction_page_map", 2048);
8045
+ const mapResponse = await safeGenerateObject(
8046
+ generateObject,
8047
+ {
8048
+ prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
8049
+ schema: PageMapChunkSchema,
8050
+ maxTokens: budget.maxTokens,
8051
+ providerOptions: { ...activeProviderOptions, pdfBase64: pagesPdf }
8052
+ },
8053
+ {
8054
+ fallback: {
8055
+ pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
8056
+ localPageNumber: index + 1,
8057
+ extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
8058
+ confidence: 0,
8059
+ notes: "Fallback page assignment"
8060
+ }))
8061
+ },
8062
+ log,
8063
+ onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
8064
+ }
8065
+ );
8066
+ trackUsage(mapResponse.usage, {
8067
+ taskKind: "extraction_page_map",
8068
+ label: `page_map:${startPage}-${endPage}`,
8069
+ maxTokens: budget.maxTokens
8070
+ });
8071
+ return mapResponse.object.pages.map((assignment) => ({
8072
+ ...assignment,
8073
+ localPageNumber: startPage + assignment.localPageNumber - 1
8074
+ }));
8075
+ })
8076
+ )
8077
+ );
8078
+ for (const assignments of pageMapResults) {
8079
+ collectedAssignments.push(...assignments);
6945
8080
  }
6946
8081
  pageAssignments = collectedAssignments.length > 0 ? collectedAssignments : Array.from({ length: pageCount }, (_, index) => ({
6947
8082
  localPageNumber: index + 1,
@@ -6979,11 +8114,12 @@ function createExtractor(config) {
6979
8114
  if (!pipelineCtx.isPhaseComplete("extract")) {
6980
8115
  const tasks = plan.tasks;
6981
8116
  onProgress?.(`Dispatching ${tasks.length} extractors...`);
8117
+ const extractionPdfInput = await getPdfBase64ForExtraction();
6982
8118
  const extractorResults = await Promise.all(
6983
8119
  tasks.map(
6984
8120
  (task) => limit(async () => {
6985
8121
  onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
6986
- return runFocusedExtractorTask(task, pdfInput, memory);
8122
+ return runFocusedExtractorTask(task, extractionPdfInput, memory, pageRangePdfCache);
6987
8123
  })
6988
8124
  )
6989
8125
  );
@@ -6997,6 +8133,7 @@ function createExtractor(config) {
6997
8133
  onProgress?.("Extracting supplementary retrieval facts...");
6998
8134
  try {
6999
8135
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
8136
+ const budget = resolveBudget("extraction_focused", 4096);
7000
8137
  const supplementaryResult = await runExtractor({
7001
8138
  name: "supplementary",
7002
8139
  prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
@@ -7006,10 +8143,15 @@ function createExtractor(config) {
7006
8143
  endPage: pageCount,
7007
8144
  generateObject,
7008
8145
  convertPdfToImages,
7009
- maxTokens: 4096,
7010
- providerOptions
8146
+ maxTokens: budget.maxTokens,
8147
+ providerOptions: activeProviderOptions,
8148
+ pageRangeCache: pageRangePdfCache
8149
+ });
8150
+ trackUsage(supplementaryResult.usage, {
8151
+ taskKind: "extraction_focused",
8152
+ label: "supplementary",
8153
+ maxTokens: budget.maxTokens
7011
8154
  });
7012
- trackUsage(supplementaryResult.usage);
7013
8155
  mergeMemoryResult(supplementaryResult.name, supplementaryResult.data, memory);
7014
8156
  } catch (error) {
7015
8157
  await log?.(`Supplementary extractor failed: ${error}`);
@@ -7035,11 +8177,16 @@ function createExtractor(config) {
7035
8177
  generateObject,
7036
8178
  convertPdfToImages,
7037
8179
  concurrency,
7038
- providerOptions,
8180
+ providerOptions: activeProviderOptions,
8181
+ modelCapabilities,
8182
+ modelBudgetConstraints,
7039
8183
  log,
7040
8184
  onProgress
7041
8185
  });
7042
- trackUsage(resolution.usage);
8186
+ trackUsage(resolution.usage, {
8187
+ taskKind: "extraction_referential_lookup",
8188
+ label: "referential_resolution"
8189
+ });
7043
8190
  if (resolution.attempts > 0) {
7044
8191
  await log?.(`Referential resolution: ${resolution.resolved}/${resolution.attempts} resolved, ${resolution.unresolved} unresolved`);
7045
8192
  }
@@ -7064,13 +8211,14 @@ function createExtractor(config) {
7064
8211
  const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
7065
8212
  const extractionSummary = summarizeExtraction(memory);
7066
8213
  const pageMapSummary = formatPageMapSummary(pageAssignments);
8214
+ const budget = resolveBudget("extraction_review", 1536);
7067
8215
  const reviewResponse = await safeGenerateObject(
7068
8216
  generateObject,
7069
8217
  {
7070
8218
  prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
7071
8219
  schema: ReviewResultSchema,
7072
- maxTokens: 1536,
7073
- providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
8220
+ maxTokens: budget.maxTokens,
8221
+ providerOptions: await buildPdfProviderOptions(pdfInput, activeProviderOptions)
7074
8222
  },
7075
8223
  {
7076
8224
  fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
@@ -7078,7 +8226,11 @@ function createExtractor(config) {
7078
8226
  onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
7079
8227
  }
7080
8228
  );
7081
- trackUsage(reviewResponse.usage);
8229
+ trackUsage(reviewResponse.usage, {
8230
+ taskKind: "extraction_review",
8231
+ label: `review:${round + 1}`,
8232
+ maxTokens: budget.maxTokens
8233
+ });
7082
8234
  reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
7083
8235
  if (reviewResponse.object.qualityIssues?.length) {
7084
8236
  await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
@@ -7088,10 +8240,11 @@ function createExtractor(config) {
7088
8240
  break;
7089
8241
  }
7090
8242
  onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
8243
+ const extractionPdfInput = await getPdfBase64ForExtraction();
7091
8244
  const followUpResults = await Promise.all(
7092
8245
  reviewResponse.object.additionalTasks.map(
7093
8246
  (task) => limit(async () => {
7094
- return runFocusedExtractorTask(task, pdfInput, memory);
8247
+ return runFocusedExtractorTask(task, extractionPdfInput, memory, pageRangePdfCache);
7095
8248
  })
7096
8249
  )
7097
8250
  );
@@ -7101,10 +8254,12 @@ function createExtractor(config) {
7101
8254
  }
7102
8255
  }
7103
8256
  }
8257
+ groundExtractionMemoryWithSourceSpans(memory, sourceSpans);
7104
8258
  reviewReport = buildExtractionReviewReport({
7105
8259
  memory,
7106
8260
  pageAssignments,
7107
- reviewRounds
8261
+ reviewRounds,
8262
+ sourceSpansAvailable: sourceSpans.length > 0
7108
8263
  });
7109
8264
  if (reviewReport.issues.length > 0) {
7110
8265
  await log?.(
@@ -7125,10 +8280,12 @@ function createExtractor(config) {
7125
8280
  memory: Object.fromEntries(memory)
7126
8281
  });
7127
8282
  }
8283
+ groundExtractionMemoryWithSourceSpans(memory, sourceSpans);
7128
8284
  reviewReport ?? (reviewReport = buildExtractionReviewReport({
7129
8285
  memory,
7130
8286
  pageAssignments,
7131
- reviewRounds
8287
+ reviewRounds,
8288
+ sourceSpansAvailable: sourceSpans.length > 0
7132
8289
  }));
7133
8290
  onProgress?.("Assembling document...");
7134
8291
  const document = assembleDocument(id, documentType, memory);
@@ -7146,13 +8303,14 @@ function createExtractor(config) {
7146
8303
  if (!document.summary) {
7147
8304
  onProgress?.("Generating document summary...");
7148
8305
  try {
8306
+ const budget = resolveBudget("extraction_summary", 512);
7149
8307
  const summaryResponse = await safeGenerateObject(
7150
8308
  generateObject,
7151
8309
  {
7152
8310
  prompt: buildSummaryPrompt(document),
7153
8311
  schema: SummaryResultSchema,
7154
- maxTokens: 512,
7155
- providerOptions
8312
+ maxTokens: budget.maxTokens,
8313
+ providerOptions: activeProviderOptions
7156
8314
  },
7157
8315
  {
7158
8316
  fallback: { summary: "" },
@@ -7160,7 +8318,11 @@ function createExtractor(config) {
7160
8318
  onError: (err, attempt) => log?.(`Summary attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
7161
8319
  }
7162
8320
  );
7163
- trackUsage(summaryResponse.usage);
8321
+ trackUsage(summaryResponse.usage, {
8322
+ taskKind: "extraction_summary",
8323
+ label: "summary",
8324
+ maxTokens: budget.maxTokens
8325
+ });
7164
8326
  if (summaryResponse.object.summary) {
7165
8327
  document.summary = summaryResponse.object.summary;
7166
8328
  }
@@ -7169,12 +8331,18 @@ function createExtractor(config) {
7169
8331
  }
7170
8332
  }
7171
8333
  onProgress?.("Formatting extracted content...");
8334
+ const formatBudget = resolveBudget("extraction_format", 16384);
7172
8335
  const formatResult = await formatDocumentContent(document, generateText, {
7173
- providerOptions,
8336
+ providerOptions: activeProviderOptions,
8337
+ maxTokens: formatBudget.maxTokens,
7174
8338
  onProgress,
7175
8339
  log
7176
8340
  });
7177
- trackUsage(formatResult.usage);
8341
+ trackUsage(formatResult.usage, {
8342
+ taskKind: "extraction_format",
8343
+ label: "format",
8344
+ maxTokens: formatBudget.maxTokens
8345
+ });
7178
8346
  const chunks = chunkDocument(formatResult.document);
7179
8347
  const finalCheckpoint = pipelineCtx.getCheckpoint();
7180
8348
  if (callsMissingUsage > 0) {
@@ -7184,12 +8352,15 @@ function createExtractor(config) {
7184
8352
  return {
7185
8353
  document: formatResult.document,
7186
8354
  chunks,
8355
+ sourceSpans,
8356
+ sourceChunks,
7187
8357
  tokenUsage: totalUsage,
7188
8358
  usageReporting: {
7189
8359
  modelCalls,
7190
8360
  callsWithUsage,
7191
8361
  callsMissingUsage
7192
8362
  },
8363
+ performanceReport,
7193
8364
  checkpoint: finalCheckpoint,
7194
8365
  reviewReport
7195
8366
  };
@@ -7411,8 +8582,8 @@ Respond with JSON only:
7411
8582
  }`;
7412
8583
 
7413
8584
  // src/schemas/application.ts
7414
- import { z as z38 } from "zod";
7415
- var FieldTypeSchema = z38.enum([
8585
+ import { z as z41 } from "zod";
8586
+ var FieldTypeSchema = z41.enum([
7416
8587
  "text",
7417
8588
  "numeric",
7418
8589
  "currency",
@@ -7421,144 +8592,153 @@ var FieldTypeSchema = z38.enum([
7421
8592
  "table",
7422
8593
  "declaration"
7423
8594
  ]);
7424
- var ApplicationFieldSchema = z38.object({
7425
- id: z38.string(),
7426
- label: z38.string(),
7427
- section: z38.string(),
8595
+ var ApplicationFieldSchema = z41.object({
8596
+ id: z41.string(),
8597
+ label: z41.string(),
8598
+ section: z41.string(),
7428
8599
  fieldType: FieldTypeSchema,
7429
- required: z38.boolean(),
7430
- options: z38.array(z38.string()).optional(),
7431
- columns: z38.array(z38.string()).optional(),
7432
- requiresExplanationIfYes: z38.boolean().optional(),
7433
- condition: z38.object({
7434
- dependsOn: z38.string(),
7435
- whenValue: z38.string()
8600
+ required: z41.boolean(),
8601
+ options: z41.array(z41.string()).optional(),
8602
+ columns: z41.array(z41.string()).optional(),
8603
+ requiresExplanationIfYes: z41.boolean().optional(),
8604
+ condition: z41.object({
8605
+ dependsOn: z41.string(),
8606
+ whenValue: z41.string()
7436
8607
  }).optional(),
7437
- value: z38.string().optional(),
7438
- source: z38.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
7439
- confidence: z38.enum(["confirmed", "high", "medium", "low"]).optional()
7440
- });
7441
- var ApplicationClassifyResultSchema = z38.object({
7442
- isApplication: z38.boolean(),
7443
- confidence: z38.number().min(0).max(1),
7444
- applicationType: z38.string().nullable()
7445
- });
7446
- var FieldExtractionResultSchema = z38.object({
7447
- fields: z38.array(ApplicationFieldSchema)
7448
- });
7449
- var AutoFillMatchSchema = z38.object({
7450
- fieldId: z38.string(),
7451
- value: z38.string(),
7452
- confidence: z38.enum(["confirmed"]),
7453
- contextKey: z38.string()
7454
- });
7455
- var AutoFillResultSchema = z38.object({
7456
- matches: z38.array(AutoFillMatchSchema)
7457
- });
7458
- var QuestionBatchResultSchema = z38.object({
7459
- batches: z38.array(z38.array(z38.string()).describe("Array of field IDs in this batch"))
7460
- });
7461
- var LookupRequestSchema = z38.object({
7462
- type: z38.string().describe("Type of lookup: 'records', 'website', 'policy'"),
7463
- description: z38.string(),
7464
- url: z38.string().optional(),
7465
- targetFieldIds: z38.array(z38.string())
7466
- });
7467
- var ReplyIntentSchema = z38.object({
7468
- primaryIntent: z38.enum(["answers_only", "question", "lookup_request", "mixed"]),
7469
- hasAnswers: z38.boolean(),
7470
- questionText: z38.string().optional(),
7471
- questionFieldIds: z38.array(z38.string()).optional(),
7472
- lookupRequests: z38.array(LookupRequestSchema).optional()
7473
- });
7474
- var ParsedAnswerSchema = z38.object({
7475
- fieldId: z38.string(),
7476
- value: z38.string(),
7477
- explanation: z38.string().optional()
7478
- });
7479
- var AnswerParsingResultSchema = z38.object({
7480
- answers: z38.array(ParsedAnswerSchema),
7481
- unanswered: z38.array(z38.string()).describe("Field IDs that were not answered")
7482
- });
7483
- var LookupFillSchema = z38.object({
7484
- fieldId: z38.string(),
7485
- value: z38.string(),
7486
- source: z38.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
7487
- });
7488
- var LookupFillResultSchema = z38.object({
7489
- fills: z38.array(LookupFillSchema),
7490
- unfillable: z38.array(z38.string()),
7491
- explanation: z38.string().optional()
7492
- });
7493
- var FlatPdfPlacementSchema = z38.object({
7494
- fieldId: z38.string(),
7495
- page: z38.number(),
7496
- x: z38.number().describe("Percentage from left edge (0-100)"),
7497
- y: z38.number().describe("Percentage from top edge (0-100)"),
7498
- text: z38.string(),
7499
- fontSize: z38.number().optional(),
7500
- isCheckmark: z38.boolean().optional()
7501
- });
7502
- var AcroFormMappingSchema = z38.object({
7503
- fieldId: z38.string(),
7504
- acroFormName: z38.string(),
7505
- value: z38.string()
7506
- });
7507
- var QualityGateStatusSchema = z38.enum(["passed", "warning", "failed"]);
7508
- var QualitySeveritySchema = z38.enum(["info", "warning", "blocking"]);
7509
- var ApplicationQualityIssueSchema = z38.object({
7510
- code: z38.string(),
8608
+ value: z41.string().optional(),
8609
+ source: z41.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
8610
+ confidence: z41.enum(["confirmed", "high", "medium", "low"]).optional(),
8611
+ sourceSpanIds: z41.array(z41.string()).optional().describe("Stable source spans that support the field value or field anchor"),
8612
+ userSourceSpanIds: z41.array(z41.string()).optional().describe("Message or attachment spans that support user-provided values"),
8613
+ pageNumber: z41.number().int().positive().optional().describe("Application page where the field label or anchor appears"),
8614
+ fieldAnchorId: z41.string().optional().describe("Stable field anchor ID derived from page, section, label, and form metadata"),
8615
+ acroFormName: z41.string().optional().describe("Native PDF AcroForm field name when available"),
8616
+ validationStatus: z41.enum(["valid", "needs_review", "unsupported", "missing"]).optional()
8617
+ });
8618
+ var ApplicationClassifyResultSchema = z41.object({
8619
+ isApplication: z41.boolean(),
8620
+ confidence: z41.number().min(0).max(1),
8621
+ applicationType: z41.string().nullable()
8622
+ });
8623
+ var FieldExtractionResultSchema = z41.object({
8624
+ fields: z41.array(ApplicationFieldSchema)
8625
+ });
8626
+ var AutoFillMatchSchema = z41.object({
8627
+ fieldId: z41.string(),
8628
+ value: z41.string(),
8629
+ confidence: z41.enum(["confirmed"]),
8630
+ contextKey: z41.string()
8631
+ });
8632
+ var AutoFillResultSchema = z41.object({
8633
+ matches: z41.array(AutoFillMatchSchema)
8634
+ });
8635
+ var QuestionBatchResultSchema = z41.object({
8636
+ batches: z41.array(z41.array(z41.string()).describe("Array of field IDs in this batch"))
8637
+ });
8638
+ var LookupRequestSchema = z41.object({
8639
+ type: z41.string().describe("Type of lookup: 'records', 'website', 'policy'"),
8640
+ description: z41.string(),
8641
+ url: z41.string().optional(),
8642
+ targetFieldIds: z41.array(z41.string())
8643
+ });
8644
+ var ReplyIntentSchema = z41.object({
8645
+ primaryIntent: z41.enum(["answers_only", "question", "lookup_request", "mixed"]),
8646
+ hasAnswers: z41.boolean(),
8647
+ questionText: z41.string().optional(),
8648
+ questionFieldIds: z41.array(z41.string()).optional(),
8649
+ lookupRequests: z41.array(LookupRequestSchema).optional()
8650
+ });
8651
+ var ParsedAnswerSchema = z41.object({
8652
+ fieldId: z41.string(),
8653
+ value: z41.string(),
8654
+ explanation: z41.string().optional()
8655
+ });
8656
+ var AnswerParsingResultSchema = z41.object({
8657
+ answers: z41.array(ParsedAnswerSchema),
8658
+ unanswered: z41.array(z41.string()).describe("Field IDs that were not answered")
8659
+ });
8660
+ var LookupFillSchema = z41.object({
8661
+ fieldId: z41.string(),
8662
+ value: z41.string(),
8663
+ source: z41.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'"),
8664
+ sourceSpanIds: z41.array(z41.string()).optional()
8665
+ });
8666
+ var LookupFillResultSchema = z41.object({
8667
+ fills: z41.array(LookupFillSchema),
8668
+ unfillable: z41.array(z41.string()),
8669
+ explanation: z41.string().optional()
8670
+ });
8671
+ var FlatPdfPlacementSchema = z41.object({
8672
+ fieldId: z41.string(),
8673
+ page: z41.number(),
8674
+ x: z41.number().describe("Percentage from left edge (0-100)"),
8675
+ y: z41.number().describe("Percentage from top edge (0-100)"),
8676
+ text: z41.string(),
8677
+ fontSize: z41.number().optional(),
8678
+ isCheckmark: z41.boolean().optional()
8679
+ });
8680
+ var AcroFormMappingSchema = z41.object({
8681
+ fieldId: z41.string(),
8682
+ acroFormName: z41.string(),
8683
+ value: z41.string()
8684
+ });
8685
+ var QualityGateStatusSchema = z41.enum(["passed", "warning", "failed"]);
8686
+ var QualitySeveritySchema = z41.enum(["info", "warning", "blocking"]);
8687
+ var ApplicationQualityIssueSchema = z41.object({
8688
+ code: z41.string(),
7511
8689
  severity: QualitySeveritySchema,
7512
- message: z38.string(),
7513
- fieldId: z38.string().optional()
8690
+ message: z41.string(),
8691
+ fieldId: z41.string().optional()
7514
8692
  });
7515
- var ApplicationQualityRoundSchema = z38.object({
7516
- round: z38.number(),
7517
- kind: z38.string(),
8693
+ var ApplicationQualityRoundSchema = z41.object({
8694
+ round: z41.number(),
8695
+ kind: z41.string(),
7518
8696
  status: QualityGateStatusSchema,
7519
- summary: z38.string().optional()
8697
+ summary: z41.string().optional()
7520
8698
  });
7521
- var ApplicationQualityArtifactSchema = z38.object({
7522
- kind: z38.string(),
7523
- label: z38.string().optional(),
7524
- itemCount: z38.number().optional()
8699
+ var ApplicationQualityArtifactSchema = z41.object({
8700
+ kind: z41.string(),
8701
+ label: z41.string().optional(),
8702
+ itemCount: z41.number().optional()
7525
8703
  });
7526
- var ApplicationEmailReviewSchema = z38.object({
7527
- issues: z38.array(ApplicationQualityIssueSchema),
8704
+ var ApplicationEmailReviewSchema = z41.object({
8705
+ issues: z41.array(ApplicationQualityIssueSchema),
7528
8706
  qualityGateStatus: QualityGateStatusSchema
7529
8707
  });
7530
- var ApplicationQualityReportSchema = z38.object({
7531
- issues: z38.array(ApplicationQualityIssueSchema),
7532
- rounds: z38.array(ApplicationQualityRoundSchema).optional(),
7533
- artifacts: z38.array(ApplicationQualityArtifactSchema).optional(),
8708
+ var ApplicationQualityReportSchema = z41.object({
8709
+ issues: z41.array(ApplicationQualityIssueSchema),
8710
+ rounds: z41.array(ApplicationQualityRoundSchema).optional(),
8711
+ artifacts: z41.array(ApplicationQualityArtifactSchema).optional(),
7534
8712
  emailReview: ApplicationEmailReviewSchema.optional(),
7535
8713
  qualityGateStatus: QualityGateStatusSchema
7536
8714
  });
7537
- var ApplicationStateSchema = z38.object({
7538
- id: z38.string(),
7539
- pdfBase64: z38.string().optional().describe("Original PDF, omitted after extraction"),
7540
- title: z38.string().optional(),
7541
- applicationType: z38.string().nullable().optional(),
7542
- fields: z38.array(ApplicationFieldSchema),
7543
- batches: z38.array(z38.array(z38.string())).optional(),
7544
- currentBatchIndex: z38.number().default(0),
8715
+ var ApplicationStateSchema = z41.object({
8716
+ id: z41.string(),
8717
+ pdfBase64: z41.string().optional().describe("Original PDF, omitted after extraction"),
8718
+ title: z41.string().optional(),
8719
+ applicationType: z41.string().nullable().optional(),
8720
+ fields: z41.array(ApplicationFieldSchema),
8721
+ batches: z41.array(z41.array(z41.string())).optional(),
8722
+ currentBatchIndex: z41.number().default(0),
7545
8723
  qualityReport: ApplicationQualityReportSchema.optional(),
7546
- status: z38.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
7547
- createdAt: z38.number(),
7548
- updatedAt: z38.number()
8724
+ status: z41.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
8725
+ createdAt: z41.number(),
8726
+ updatedAt: z41.number()
7549
8727
  });
7550
8728
 
7551
8729
  // src/application/agents/classifier.ts
7552
- async function classifyApplication(pdfContent, generateObject, providerOptions) {
8730
+ async function classifyApplication(pdfContent, generateObject, providerOptions, maxTokens = 512) {
7553
8731
  const { object, usage } = await withRetry(
7554
8732
  () => generateObject({
7555
8733
  prompt: `${APPLICATION_CLASSIFY_PROMPT}
7556
8734
 
7557
- Analyze the following document content:
7558
- ${pdfContent}`,
8735
+ Analyze the attached insurance document. If text source units are provided in provider options, use them as supporting context. Do not infer from base64 text.`,
7559
8736
  schema: ApplicationClassifyResultSchema,
7560
- maxTokens: 512,
7561
- providerOptions
8737
+ maxTokens,
8738
+ providerOptions: {
8739
+ ...providerOptions,
8740
+ pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
8741
+ }
7562
8742
  })
7563
8743
  );
7564
8744
  return { result: object, usage };
@@ -7571,13 +8751,18 @@ function buildFieldExtractionPrompt() {
7571
8751
  Field types: "text", "numeric", "currency", "date", "yes_no", "table", "declaration"
7572
8752
 
7573
8753
  Required keys per field:
7574
- - "id": short snake_case ID
8754
+ - "id": short provisional snake_case ID. The SDK will replace this with a stable deterministic ID.
7575
8755
  - "label": field label \u2014 a clear, natural question that a human would understand
7576
8756
  - "section": section heading
7577
8757
  - "fieldType": one of the types above
7578
8758
  - "required": boolean
7579
8759
 
7580
8760
  Optional keys (only include when applicable):
8761
+ - "sourceSpanIds": stable source span IDs if the caller provided source units for this application
8762
+ - "pageNumber": PDF page number where the field label/anchor appears
8763
+ - "fieldAnchorId": stable caller-provided field anchor ID, when available
8764
+ - "acroFormName": native PDF form field name, when visible or provided
8765
+ - "validationStatus": "missing" for extracted blank fields, "needs_review" for prefilled fields that need source validation
7581
8766
  - "options": array of strings \u2014 for fields with checkboxes/radio buttons/multiple choices (e.g. business type, state selections). Use "text" fieldType with options.
7582
8767
  - "columns": array of {"name","type"} \u2014 tables only
7583
8768
  - "requiresExplanationIfYes": boolean \u2014 declarations only
@@ -7593,25 +8778,73 @@ Example:
7593
8778
  {"id":"prior_claims","text":"Any claims in past 5 years?","section":"Declarations","fieldType":"declaration","required":true,"requiresExplanationIfYes":true}
7594
8779
  ]
7595
8780
 
7596
- Extract ALL fields. Respond with ONLY the JSON array, no other text.`;
7597
- }
8781
+ Extract ALL fields. Prefer page numbers and source span IDs over model-generated guesses whenever source units are supplied. Respond with ONLY the JSON array, no other text.`;
8782
+ }
8783
+
8784
+ // src/application/field-ids.ts
8785
+ function normalizePart(value) {
8786
+ const normalized = (value ?? "").trim().toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "");
8787
+ return normalized || "unknown";
8788
+ }
8789
+ function hashText2(value) {
8790
+ let hash = 2166136261;
8791
+ for (let index = 0; index < value.length; index++) {
8792
+ hash ^= value.charCodeAt(index);
8793
+ hash = Math.imul(hash, 16777619);
8794
+ }
8795
+ return (hash >>> 0).toString(16).padStart(8, "0").slice(0, 8);
8796
+ }
8797
+ function buildApplicationFieldAnchorId(field) {
8798
+ const page = field.pageNumber ? `p${field.pageNumber}` : "pna";
8799
+ const section = normalizePart(field.section);
8800
+ const label = normalizePart(field.label);
8801
+ const acroFormName = normalizePart(field.acroFormName);
8802
+ const hash = hashText2(`${page}|${section}|${label}|${acroFormName}`);
8803
+ return `app_field_anchor:${page}:${section}:${label}:${hash}`;
8804
+ }
8805
+ function buildStableApplicationFieldId(field) {
8806
+ const page = field.pageNumber ? `p${field.pageNumber}` : "pna";
8807
+ const section = normalizePart(field.section);
8808
+ const label = normalizePart(field.label);
8809
+ const fieldType = normalizePart(field.fieldType);
8810
+ const anchor = field.fieldAnchorId ?? buildApplicationFieldAnchorId(field);
8811
+ const hash = hashText2(`${page}|${section}|${label}|${fieldType}|${field.acroFormName ?? ""}|${anchor}`);
8812
+ return `app_field:${page}:${section}:${label}:${hash}`;
8813
+ }
8814
+ function normalizeApplicationFields(fields) {
8815
+ const seen = /* @__PURE__ */ new Map();
8816
+ return fields.map((field) => {
8817
+ const fieldAnchorId = field.fieldAnchorId ?? buildApplicationFieldAnchorId(field);
8818
+ const baseId = buildStableApplicationFieldId({ ...field, fieldAnchorId });
8819
+ const count = seen.get(baseId) ?? 0;
8820
+ seen.set(baseId, count + 1);
8821
+ return {
8822
+ ...field,
8823
+ id: count === 0 ? baseId : `${baseId}:${count + 1}`,
8824
+ fieldAnchorId,
8825
+ validationStatus: field.validationStatus ?? (field.value ? "needs_review" : "missing")
8826
+ };
8827
+ });
8828
+ }
7598
8829
 
7599
8830
  // src/application/agents/field-extractor.ts
7600
- async function extractFields(pdfContent, generateObject, providerOptions) {
8831
+ async function extractFields(pdfContent, generateObject, providerOptions, maxTokens = 8192) {
7601
8832
  const prompt = `${buildFieldExtractionPrompt()}
7602
8833
 
7603
- Extract fields from this application:
7604
- ${pdfContent}`;
8834
+ Extract fields from the attached application PDF. Use provider-supplied source units/spans for page numbers and anchors when present. Do not treat raw base64 as readable document text.`;
7605
8835
  const { object, usage } = await withRetry(
7606
8836
  () => generateObject({
7607
8837
  prompt,
7608
8838
  schema: FieldExtractionResultSchema,
7609
- maxTokens: 8192,
7610
- providerOptions
8839
+ maxTokens,
8840
+ providerOptions: {
8841
+ ...providerOptions,
8842
+ pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
8843
+ }
7611
8844
  })
7612
8845
  );
7613
8846
  const result = object;
7614
- return { fields: result.fields, usage };
8847
+ return { fields: normalizeApplicationFields(result.fields), usage };
7615
8848
  }
7616
8849
 
7617
8850
  // src/prompts/application/auto-fill.ts
@@ -7644,7 +8877,7 @@ Only include fields you can confidently fill. Do not guess or fabricate values.`
7644
8877
  }
7645
8878
 
7646
8879
  // src/application/agents/auto-filler.ts
7647
- async function autoFillFromContext(fields, orgContext, generateObject, providerOptions) {
8880
+ async function autoFillFromContext(fields, orgContext, generateObject, providerOptions, maxTokens = 4096) {
7648
8881
  const fieldSummaries = fields.map((f) => ({
7649
8882
  id: f.id,
7650
8883
  label: f.label,
@@ -7656,7 +8889,7 @@ async function autoFillFromContext(fields, orgContext, generateObject, providerO
7656
8889
  () => generateObject({
7657
8890
  prompt,
7658
8891
  schema: AutoFillResultSchema,
7659
- maxTokens: 4096,
8892
+ maxTokens,
7660
8893
  providerOptions
7661
8894
  })
7662
8895
  );
@@ -7711,7 +8944,7 @@ Respond with JSON only:
7711
8944
  }
7712
8945
 
7713
8946
  // src/application/agents/batcher.ts
7714
- async function batchQuestions(unfilledFields, generateObject, providerOptions) {
8947
+ async function batchQuestions(unfilledFields, generateObject, providerOptions, maxTokens = 2048) {
7715
8948
  const fieldSummaries = unfilledFields.map((f) => ({
7716
8949
  id: f.id,
7717
8950
  label: f.label,
@@ -7726,7 +8959,7 @@ async function batchQuestions(unfilledFields, generateObject, providerOptions) {
7726
8959
  () => generateObject({
7727
8960
  prompt,
7728
8961
  schema: QuestionBatchResultSchema,
7729
- maxTokens: 2048,
8962
+ maxTokens,
7730
8963
  providerOptions
7731
8964
  })
7732
8965
  );
@@ -7770,14 +9003,14 @@ Respond with JSON only:
7770
9003
  }
7771
9004
 
7772
9005
  // src/application/agents/reply-router.ts
7773
- async function classifyReplyIntent(fields, replyText, generateObject, providerOptions) {
9006
+ async function classifyReplyIntent(fields, replyText, generateObject, providerOptions, maxTokens = 1024) {
7774
9007
  const fieldSummaries = fields.map((f) => ({ id: f.id, label: f.label }));
7775
9008
  const prompt = buildReplyIntentClassificationPrompt(fieldSummaries, replyText);
7776
9009
  const { object, usage } = await withRetry(
7777
9010
  () => generateObject({
7778
9011
  prompt,
7779
9012
  schema: ReplyIntentSchema,
7780
- maxTokens: 1024,
9013
+ maxTokens,
7781
9014
  providerOptions
7782
9015
  })
7783
9016
  );
@@ -7824,7 +9057,7 @@ Only include answers you are confident about. If a response is ambiguous, includ
7824
9057
  }
7825
9058
 
7826
9059
  // src/application/agents/answer-parser.ts
7827
- async function parseAnswers(fields, replyText, generateObject, providerOptions) {
9060
+ async function parseAnswers(fields, replyText, generateObject, providerOptions, maxTokens = 4096) {
7828
9061
  const questions = fields.map((f) => ({
7829
9062
  id: f.id,
7830
9063
  label: f.label,
@@ -7836,7 +9069,7 @@ async function parseAnswers(fields, replyText, generateObject, providerOptions)
7836
9069
  () => generateObject({
7837
9070
  prompt,
7838
9071
  schema: AnswerParsingResultSchema,
7839
- maxTokens: 4096,
9072
+ maxTokens,
7840
9073
  providerOptions
7841
9074
  })
7842
9075
  );
@@ -7936,11 +9169,12 @@ IMPORTANT: The "source" field must be a specific, citable reference that will be
7936
9169
  - "Business Context (company_info)"
7937
9170
  - "User Profile"
7938
9171
  Never use vague sources like "existing records" or "available data".
9172
+ If AVAILABLE DATA contains sourceSpanId values, include them in "sourceSpanIds" for every value filled from that source. Existing policy values such as policy numbers, dates, limits, deductibles, premiums, coverages, exclusions, conditions, endorsements, locations, vehicles, or named insureds must not be filled without sourceSpanIds unless the value is explicitly marked for review.
7939
9173
 
7940
9174
  Respond with JSON only:
7941
9175
  {
7942
9176
  "fills": [
7943
- { "fieldId": "field_id", "value": "the value from data", "source": "Specific source with identifier (e.g. GL Policy #ABC123, stripe.com)" }
9177
+ { "fieldId": "field_id", "value": "the value from data", "source": "Specific source with identifier (e.g. GL Policy #ABC123, stripe.com)", "sourceSpanIds": ["doc-1:span:1:0:abcd1234"] }
7944
9178
  ],
7945
9179
  "unfillable": ["field_ids that couldn't be matched"],
7946
9180
  "explanation": "Brief note about what was filled and what couldn't be found, citing sources"
@@ -7948,7 +9182,7 @@ Respond with JSON only:
7948
9182
  }
7949
9183
 
7950
9184
  // src/application/agents/lookup-filler.ts
7951
- async function fillFromLookup(requests, targetFields, availableData, generateObject, providerOptions) {
9185
+ async function fillFromLookup(requests, targetFields, availableData, generateObject, providerOptions, maxTokens = 4096) {
7952
9186
  const requestSummaries = requests.map((r) => ({
7953
9187
  type: r.type,
7954
9188
  description: r.description,
@@ -7964,7 +9198,7 @@ async function fillFromLookup(requests, targetFields, availableData, generateObj
7964
9198
  () => generateObject({
7965
9199
  prompt,
7966
9200
  schema: LookupFillResultSchema,
7967
- maxTokens: 4096,
9201
+ maxTokens,
7968
9202
  providerOptions
7969
9203
  })
7970
9204
  );
@@ -8025,7 +9259,7 @@ Output the email body text ONLY. No subject line, no JSON. Use markdown for numb
8025
9259
  }
8026
9260
 
8027
9261
  // src/application/agents/email-generator.ts
8028
- async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, generateText, providerOptions) {
9262
+ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, generateText, providerOptions, maxTokens = 2048) {
8029
9263
  const fieldSummaries = batchFields.map((f) => ({
8030
9264
  id: f.id,
8031
9265
  label: f.label,
@@ -8046,7 +9280,7 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
8046
9280
  const { text, usage } = await withRetry(
8047
9281
  () => generateText({
8048
9282
  prompt,
8049
- maxTokens: 2048,
9283
+ maxTokens,
8050
9284
  providerOptions
8051
9285
  })
8052
9286
  );
@@ -8059,6 +9293,17 @@ function isVagueSource(source) {
8059
9293
  const normalized = source.trim().toLowerCase();
8060
9294
  return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
8061
9295
  }
9296
+ function isSourceGroundedPolicyValue(field) {
9297
+ if (!field.value) return false;
9298
+ const source = field.source?.toLowerCase() ?? "";
9299
+ if (field.sourceSpanIds?.length) return false;
9300
+ if (field.userSourceSpanIds?.length) return false;
9301
+ const label = `${field.section} ${field.label}`.toLowerCase();
9302
+ const highValueLabel = /\b(policy|effective|expiration|date|limit|deductible|premium|coverage|exclusion|condition|endorsement|location|vehicle|named insured|revenue|payroll|loss|claim|prior)\b/.test(label);
9303
+ const highValueType = field.fieldType === "currency" || field.fieldType === "date" || field.fieldType === "numeric" || field.fieldType === "declaration";
9304
+ const fromPolicyLikeSource = /\b(policy|quote|document|lookup|carrier|endorsement)\b/.test(source);
9305
+ return fromPolicyLikeSource && (highValueLabel || highValueType);
9306
+ }
8062
9307
  function buildApplicationQualityReport(state) {
8063
9308
  const issues = [];
8064
9309
  const seenIds = /* @__PURE__ */ new Set();
@@ -8104,6 +9349,14 @@ function buildApplicationQualityReport(state) {
8104
9349
  fieldId: field.id
8105
9350
  });
8106
9351
  }
9352
+ if (isSourceGroundedPolicyValue(field)) {
9353
+ issues.push({
9354
+ code: "policy_value_missing_source_span",
9355
+ severity: "blocking",
9356
+ message: `Filled policy-derived field "${field.label}" is missing source span evidence.`,
9357
+ fieldId: field.id
9358
+ });
9359
+ }
8107
9360
  }
8108
9361
  return {
8109
9362
  issues,
@@ -8213,7 +9466,9 @@ function createApplicationPipeline(config) {
8213
9466
  onProgress,
8214
9467
  log,
8215
9468
  providerOptions,
8216
- qualityGate = "warn"
9469
+ qualityGate = "warn",
9470
+ modelCapabilities,
9471
+ modelBudgetConstraints
8217
9472
  } = config;
8218
9473
  const limit = pLimit(concurrency);
8219
9474
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -8224,9 +9479,18 @@ function createApplicationPipeline(config) {
8224
9479
  onTokenUsage?.(usage);
8225
9480
  }
8226
9481
  }
9482
+ function resolveBudget(taskKind, hintTokens) {
9483
+ return resolveModelBudget({
9484
+ taskKind,
9485
+ hintTokens,
9486
+ modelCapabilities,
9487
+ constraint: modelBudgetConstraints?.[taskKind]
9488
+ });
9489
+ }
8227
9490
  async function processApplication(input) {
8228
9491
  totalUsage = { inputTokens: 0, outputTokens: 0 };
8229
9492
  const { pdfBase64, context } = input;
9493
+ const applicationProviderOptions = input.sourceSpans?.length ? { ...providerOptions, sourceSpans: input.sourceSpans } : providerOptions;
8230
9494
  const id = input.applicationId ?? `app-${Date.now()}`;
8231
9495
  const now = Date.now();
8232
9496
  let state = {
@@ -8247,9 +9511,10 @@ function createApplicationPipeline(config) {
8247
9511
  let classifyResult;
8248
9512
  try {
8249
9513
  const { result, usage: classifyUsage } = await classifyApplication(
8250
- pdfBase64.slice(0, 2e3),
9514
+ pdfBase64,
8251
9515
  generateObject,
8252
- providerOptions
9516
+ applicationProviderOptions,
9517
+ resolveBudget("application_classify", 512).maxTokens
8253
9518
  );
8254
9519
  trackUsage(classifyUsage);
8255
9520
  classifyResult = result;
@@ -8274,7 +9539,8 @@ function createApplicationPipeline(config) {
8274
9539
  const { fields: extractedFields, usage: extractUsage } = await extractFields(
8275
9540
  pdfBase64,
8276
9541
  generateObject,
8277
- providerOptions
9542
+ applicationProviderOptions,
9543
+ resolveBudget("application_extract_fields", 8192).maxTokens
8278
9544
  );
8279
9545
  trackUsage(extractUsage);
8280
9546
  fields = extractedFields;
@@ -8312,6 +9578,7 @@ function createApplicationPipeline(config) {
8312
9578
  field.value = pa.value;
8313
9579
  field.source = `backfill: ${pa.source}`;
8314
9580
  field.confidence = "high";
9581
+ field.validationStatus = "needs_review";
8315
9582
  }
8316
9583
  }
8317
9584
  } catch (e) {
@@ -8336,7 +9603,8 @@ function createApplicationPipeline(config) {
8336
9603
  unfilledFields2,
8337
9604
  orgContext,
8338
9605
  generateObject,
8339
- providerOptions
9606
+ providerOptions,
9607
+ resolveBudget("application_auto_fill", 4096).maxTokens
8340
9608
  );
8341
9609
  trackUsage(afUsage);
8342
9610
  for (const match of autoFillResult.matches) {
@@ -8345,6 +9613,7 @@ function createApplicationPipeline(config) {
8345
9613
  field.value = match.value;
8346
9614
  field.source = `auto-fill: ${match.contextKey}`;
8347
9615
  field.confidence = match.confidence;
9616
+ field.validationStatus = "valid";
8348
9617
  }
8349
9618
  }
8350
9619
  } catch (e) {
@@ -8387,7 +9656,8 @@ function createApplicationPipeline(config) {
8387
9656
  const { result: batchResult, usage: batchUsage } = await batchQuestions(
8388
9657
  unfilledFields,
8389
9658
  generateObject,
8390
- providerOptions
9659
+ providerOptions,
9660
+ resolveBudget("application_batch", 2048).maxTokens
8391
9661
  );
8392
9662
  trackUsage(batchUsage);
8393
9663
  state.batches = batchResult.batches;
@@ -8410,9 +9680,15 @@ function createApplicationPipeline(config) {
8410
9680
  onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
8411
9681
  return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
8412
9682
  }
8413
- async function processReply(input) {
9683
+ async function processReply2(input) {
8414
9684
  totalUsage = { inputTokens: 0, outputTokens: 0 };
8415
9685
  const { applicationId, replyText, context } = input;
9686
+ const replySourceSpanIds = input.replySourceSpanIds?.length ? input.replySourceSpanIds : buildTextSourceSpans({
9687
+ documentId: `${applicationId}:reply:${sourceSpanTextHash(replyText).slice(0, 12)}`,
9688
+ sourceKind: "email",
9689
+ text: replyText,
9690
+ metadata: { applicationId }
9691
+ }).map((span) => span.id);
8416
9692
  let state = null;
8417
9693
  if (applicationStore) {
8418
9694
  state = await applicationStore.get(applicationId);
@@ -8431,7 +9707,8 @@ function createApplicationPipeline(config) {
8431
9707
  currentBatchFields,
8432
9708
  replyText,
8433
9709
  generateObject,
8434
- providerOptions
9710
+ providerOptions,
9711
+ resolveBudget("application_classify", 1024).maxTokens
8435
9712
  );
8436
9713
  trackUsage(intentUsage);
8437
9714
  intent = classifiedIntent;
@@ -8459,7 +9736,8 @@ function createApplicationPipeline(config) {
8459
9736
  currentBatchFields,
8460
9737
  replyText,
8461
9738
  generateObject,
8462
- providerOptions
9739
+ providerOptions,
9740
+ resolveBudget("application_parse_answers", 4096).maxTokens
8463
9741
  );
8464
9742
  trackUsage(parseUsage);
8465
9743
  for (const answer of parseResult.answers) {
@@ -8468,6 +9746,8 @@ function createApplicationPipeline(config) {
8468
9746
  field.value = answer.value;
8469
9747
  field.source = "user";
8470
9748
  field.confidence = "confirmed";
9749
+ field.userSourceSpanIds = replySourceSpanIds;
9750
+ field.validationStatus = "valid";
8471
9751
  fieldsFilled++;
8472
9752
  }
8473
9753
  }
@@ -8499,7 +9779,8 @@ function createApplicationPipeline(config) {
8499
9779
  targetFields,
8500
9780
  availableData,
8501
9781
  generateObject,
8502
- providerOptions
9782
+ providerOptions,
9783
+ resolveBudget("application_lookup", 4096).maxTokens
8503
9784
  );
8504
9785
  trackUsage(lookupUsage);
8505
9786
  for (const fill of lookupResult.fills) {
@@ -8508,6 +9789,10 @@ function createApplicationPipeline(config) {
8508
9789
  field.value = fill.value;
8509
9790
  field.source = `lookup: ${fill.source}`;
8510
9791
  field.confidence = "high";
9792
+ field.validationStatus = fill.sourceSpanIds?.length ? "valid" : "needs_review";
9793
+ if (fill.sourceSpanIds?.length) {
9794
+ field.sourceSpanIds = fill.sourceSpanIds;
9795
+ }
8511
9796
  fieldsFilled++;
8512
9797
  }
8513
9798
  }
@@ -8522,7 +9807,7 @@ function createApplicationPipeline(config) {
8522
9807
  prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8523
9808
 
8524
9809
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
8525
- maxTokens: 512,
9810
+ maxTokens: resolveBudget("application_email", 512).maxTokens,
8526
9811
  providerOptions
8527
9812
  });
8528
9813
  trackUsage(usage);
@@ -8570,7 +9855,8 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
8570
9855
  companyName: context?.companyName
8571
9856
  },
8572
9857
  generateText,
8573
- providerOptions
9858
+ providerOptions,
9859
+ resolveBudget("application_email", 2048).maxTokens
8574
9860
  );
8575
9861
  trackUsage(emailUsage);
8576
9862
  const emailReview = reviewBatchEmail(emailText, nextBatchFields);
@@ -8628,7 +9914,8 @@ ${emailText}`;
8628
9914
  previousBatchSummary: opts?.previousBatchSummary
8629
9915
  },
8630
9916
  generateText,
8631
- providerOptions
9917
+ providerOptions,
9918
+ resolveBudget("application_email", 2048).maxTokens
8632
9919
  );
8633
9920
  trackUsage(usage);
8634
9921
  const emailReview = reviewBatchEmail(text, batchFields);
@@ -8652,7 +9939,7 @@ Application: ${state.title ?? "Insurance Application"}
8652
9939
 
8653
9940
  Fields:
8654
9941
  ${fieldSummary}`,
8655
- maxTokens: 4096,
9942
+ maxTokens: resolveBudget("application_email", 4096).maxTokens,
8656
9943
  providerOptions
8657
9944
  });
8658
9945
  trackUsage(usage);
@@ -8660,7 +9947,7 @@ ${fieldSummary}`,
8660
9947
  }
8661
9948
  return {
8662
9949
  processApplication,
8663
- processReply,
9950
+ processReply: processReply2,
8664
9951
  generateCurrentBatchEmail,
8665
9952
  getConfirmationSummary
8666
9953
  };
@@ -8777,91 +10064,104 @@ Respond with the final answer, deduplicated citations array, overall confidence
8777
10064
  }
8778
10065
 
8779
10066
  // src/schemas/query.ts
8780
- import { z as z39 } from "zod";
8781
- var QueryIntentSchema = z39.enum([
10067
+ import { z as z42 } from "zod";
10068
+ var QueryIntentSchema = z42.enum([
8782
10069
  "policy_question",
8783
10070
  "coverage_comparison",
8784
10071
  "document_search",
8785
10072
  "claims_inquiry",
8786
10073
  "general_knowledge"
8787
10074
  ]);
8788
- var QueryAttachmentKindSchema = z39.enum(["image", "pdf", "text"]);
8789
- var QueryAttachmentSchema = z39.object({
8790
- id: z39.string().optional().describe("Optional stable attachment ID from the caller"),
10075
+ var QueryAttachmentKindSchema = z42.enum(["image", "pdf", "text"]);
10076
+ var QueryAttachmentSchema = z42.object({
10077
+ id: z42.string().optional().describe("Optional stable attachment ID from the caller"),
8791
10078
  kind: QueryAttachmentKindSchema,
8792
- name: z39.string().optional().describe("Original filename or user-facing label"),
8793
- mimeType: z39.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
8794
- base64: z39.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
8795
- text: z39.string().optional().describe("Plain-text attachment content when available"),
8796
- description: z39.string().optional().describe("Caller-provided description of the attachment")
8797
- });
8798
- var SubQuestionSchema = z39.object({
8799
- question: z39.string().describe("Atomic sub-question to retrieve and answer independently"),
10079
+ name: z42.string().optional().describe("Original filename or user-facing label"),
10080
+ mimeType: z42.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
10081
+ base64: z42.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
10082
+ text: z42.string().optional().describe("Plain-text attachment content when available"),
10083
+ description: z42.string().optional().describe("Caller-provided description of the attachment")
10084
+ });
10085
+ var QueryRetrievalModeSchema = z42.enum([
10086
+ "graph_only",
10087
+ "source_rag",
10088
+ "long_context",
10089
+ "hybrid"
10090
+ ]);
10091
+ var SubQuestionSchema = z42.object({
10092
+ question: z42.string().describe("Atomic sub-question to retrieve and answer independently"),
8800
10093
  intent: QueryIntentSchema,
8801
- chunkTypes: z39.array(z39.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
8802
- documentFilters: z39.object({
8803
- type: z39.enum(["policy", "quote"]).optional(),
8804
- carrier: z39.string().optional(),
8805
- insuredName: z39.string().optional(),
8806
- policyNumber: z39.string().optional(),
8807
- quoteNumber: z39.string().optional(),
8808
- policyTypes: z39.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
10094
+ chunkTypes: z42.array(z42.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
10095
+ documentFilters: z42.object({
10096
+ type: z42.enum(["policy", "quote"]).optional(),
10097
+ carrier: z42.string().optional(),
10098
+ insuredName: z42.string().optional(),
10099
+ policyNumber: z42.string().optional(),
10100
+ quoteNumber: z42.string().optional(),
10101
+ policyTypes: z42.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
8809
10102
  }).optional().describe("Structured filters to narrow document lookup")
8810
10103
  });
8811
- var QueryClassifyResultSchema = z39.object({
10104
+ var QueryClassifyResultSchema = z42.object({
8812
10105
  intent: QueryIntentSchema,
8813
- subQuestions: z39.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
8814
- requiresDocumentLookup: z39.boolean().describe("Whether structured document lookup is needed"),
8815
- requiresChunkSearch: z39.boolean().describe("Whether semantic chunk search is needed"),
8816
- requiresConversationHistory: z39.boolean().describe("Whether conversation history is relevant")
8817
- });
8818
- var EvidenceItemSchema = z39.object({
8819
- source: z39.enum(["chunk", "document", "conversation", "attachment"]),
8820
- chunkId: z39.string().optional(),
8821
- documentId: z39.string().optional(),
8822
- turnId: z39.string().optional(),
8823
- attachmentId: z39.string().optional(),
8824
- text: z39.string().describe("Text excerpt from the source"),
8825
- relevance: z39.number().min(0).max(1),
8826
- metadata: z39.array(z39.object({ key: z39.string(), value: z39.string() })).optional()
8827
- });
8828
- var AttachmentInterpretationSchema = z39.object({
8829
- summary: z39.string().describe("Concise summary of what the attachment shows or contains"),
8830
- extractedFacts: z39.array(z39.string()).describe("Specific observable or document facts grounded in the attachment"),
8831
- recommendedFocus: z39.array(z39.string()).describe("Important details to incorporate when answering follow-up questions"),
8832
- confidence: z39.number().min(0).max(1)
8833
- });
8834
- var RetrievalResultSchema = z39.object({
8835
- subQuestion: z39.string(),
8836
- evidence: z39.array(EvidenceItemSchema)
8837
- });
8838
- var CitationSchema = z39.object({
8839
- index: z39.number().describe("Citation number [1], [2], etc."),
8840
- chunkId: z39.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
8841
- documentId: z39.string(),
8842
- documentType: z39.enum(["policy", "quote"]).optional(),
8843
- field: z39.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
8844
- quote: z39.string().describe("Exact text from source that supports the claim"),
8845
- relevance: z39.number().min(0).max(1)
8846
- });
8847
- var SubAnswerSchema = z39.object({
8848
- subQuestion: z39.string(),
8849
- answer: z39.string(),
8850
- citations: z39.array(CitationSchema),
8851
- confidence: z39.number().min(0).max(1),
8852
- needsMoreContext: z39.boolean().describe("True if evidence was insufficient to answer fully")
8853
- });
8854
- var VerifyResultSchema = z39.object({
8855
- approved: z39.boolean().describe("Whether all sub-answers are adequately grounded"),
8856
- issues: z39.array(z39.string()).describe("Specific grounding or consistency issues found"),
8857
- retrySubQuestions: z39.array(z39.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
8858
- });
8859
- var QueryResultSchema = z39.object({
8860
- answer: z39.string(),
8861
- citations: z39.array(CitationSchema),
10106
+ subQuestions: z42.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
10107
+ requiresDocumentLookup: z42.boolean().describe("Whether structured document lookup is needed"),
10108
+ requiresChunkSearch: z42.boolean().describe("Whether semantic chunk search is needed"),
10109
+ requiresConversationHistory: z42.boolean().describe("Whether conversation history is relevant"),
10110
+ retrievalMode: QueryRetrievalModeSchema.optional().describe("Preferred retrieval strategy for the query when source-span retrieval is available")
10111
+ });
10112
+ var EvidenceItemSchema = z42.object({
10113
+ source: z42.enum(["chunk", "document", "conversation", "attachment", "source_span"]),
10114
+ chunkId: z42.string().optional(),
10115
+ sourceSpanId: z42.string().optional(),
10116
+ documentId: z42.string().optional(),
10117
+ turnId: z42.string().optional(),
10118
+ attachmentId: z42.string().optional(),
10119
+ text: z42.string().describe("Text excerpt from the source"),
10120
+ relevance: z42.number().min(0).max(1),
10121
+ retrievalMode: QueryRetrievalModeSchema.optional(),
10122
+ sourceLocation: SourceSpanLocationSchema.optional(),
10123
+ metadata: z42.array(z42.object({ key: z42.string(), value: z42.string() })).optional()
10124
+ });
10125
+ var AttachmentInterpretationSchema = z42.object({
10126
+ summary: z42.string().describe("Concise summary of what the attachment shows or contains"),
10127
+ extractedFacts: z42.array(z42.string()).describe("Specific observable or document facts grounded in the attachment"),
10128
+ recommendedFocus: z42.array(z42.string()).describe("Important details to incorporate when answering follow-up questions"),
10129
+ confidence: z42.number().min(0).max(1)
10130
+ });
10131
+ var RetrievalResultSchema = z42.object({
10132
+ subQuestion: z42.string(),
10133
+ evidence: z42.array(EvidenceItemSchema)
10134
+ });
10135
+ var CitationSchema = z42.object({
10136
+ index: z42.number().describe("Citation number [1], [2], etc."),
10137
+ chunkId: z42.string().optional().describe("Source chunk ID, e.g. doc-123:coverage:2"),
10138
+ sourceSpanId: z42.string().optional().describe("Precise source span ID when available"),
10139
+ documentId: z42.string(),
10140
+ documentType: z42.enum(["policy", "quote"]).optional(),
10141
+ field: z42.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
10142
+ quote: z42.string().describe("Exact text from source that supports the claim"),
10143
+ relevance: z42.number().min(0).max(1),
10144
+ retrievalMode: QueryRetrievalModeSchema.optional(),
10145
+ sourceLocation: SourceSpanLocationSchema.optional()
10146
+ });
10147
+ var SubAnswerSchema = z42.object({
10148
+ subQuestion: z42.string(),
10149
+ answer: z42.string(),
10150
+ citations: z42.array(CitationSchema),
10151
+ confidence: z42.number().min(0).max(1),
10152
+ needsMoreContext: z42.boolean().describe("True if evidence was insufficient to answer fully")
10153
+ });
10154
+ var VerifyResultSchema = z42.object({
10155
+ approved: z42.boolean().describe("Whether all sub-answers are adequately grounded"),
10156
+ issues: z42.array(z42.string()).describe("Specific grounding or consistency issues found"),
10157
+ retrySubQuestions: z42.array(z42.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
10158
+ });
10159
+ var QueryResultSchema = z42.object({
10160
+ answer: z42.string(),
10161
+ citations: z42.array(CitationSchema),
8862
10162
  intent: QueryIntentSchema,
8863
- confidence: z39.number().min(0).max(1),
8864
- followUp: z39.string().optional().describe("Suggested follow-up question if applicable")
10163
+ confidence: z42.number().min(0).max(1),
10164
+ followUp: z42.string().optional().describe("Suggested follow-up question if applicable")
8865
10165
  });
8866
10166
 
8867
10167
  // src/query/retriever.ts
@@ -8869,23 +10169,69 @@ function recordToKVArray(record) {
8869
10169
  return Object.entries(record).map(([key, value]) => ({ key, value }));
8870
10170
  }
8871
10171
  async function retrieve(subQuestion, conversationId, config) {
8872
- const { documentStore, memoryStore, retrievalLimit, log } = config;
10172
+ const { documentStore, memoryStore, sourceRetriever, retrievalLimit, retrievalMode, log } = config;
8873
10173
  const evidence = [];
8874
10174
  const tasks = [];
8875
- tasks.push(
8876
- (async () => {
8877
- try {
8878
- const filter = {};
8879
- if (subQuestion.chunkTypes?.length) {
8880
- const chunkResults = await Promise.all(
8881
- subQuestion.chunkTypes.map(
8882
- (type) => memoryStore.search(subQuestion.question, {
8883
- limit: Math.ceil(retrievalLimit / subQuestion.chunkTypes.length),
8884
- filter: { ...filter, type }
8885
- })
8886
- )
8887
- );
8888
- for (const chunks of chunkResults) {
10175
+ if (retrievalMode === "source_rag" || retrievalMode === "hybrid" || retrievalMode === "long_context") {
10176
+ tasks.push(
10177
+ (async () => {
10178
+ try {
10179
+ const sourceResults = await sourceRetriever?.searchSourceSpans({
10180
+ question: subQuestion.question,
10181
+ limit: retrievalLimit,
10182
+ mode: retrievalMode
10183
+ }) ?? [];
10184
+ for (const result of sourceResults) {
10185
+ evidence.push({
10186
+ source: "source_span",
10187
+ sourceSpanId: result.span.id,
10188
+ chunkId: result.span.chunkId,
10189
+ documentId: result.span.documentId,
10190
+ text: result.span.text,
10191
+ relevance: result.relevance,
10192
+ retrievalMode,
10193
+ sourceLocation: result.span.location,
10194
+ metadata: result.span.metadata ? recordToKVArray(result.span.metadata) : void 0
10195
+ });
10196
+ }
10197
+ } catch (e) {
10198
+ await log?.(`Source span search failed for "${subQuestion.question}": ${e}`);
10199
+ }
10200
+ })()
10201
+ );
10202
+ }
10203
+ if (retrievalMode === "graph_only" || retrievalMode === "hybrid" || !sourceRetriever) {
10204
+ tasks.push(
10205
+ (async () => {
10206
+ try {
10207
+ const filter = {};
10208
+ if (subQuestion.chunkTypes?.length) {
10209
+ const chunkResults = await Promise.all(
10210
+ subQuestion.chunkTypes.map(
10211
+ (type) => memoryStore.search(subQuestion.question, {
10212
+ limit: Math.ceil(retrievalLimit / subQuestion.chunkTypes.length),
10213
+ filter: { ...filter, type }
10214
+ })
10215
+ )
10216
+ );
10217
+ for (const chunks of chunkResults) {
10218
+ for (const chunk of chunks) {
10219
+ evidence.push({
10220
+ source: "chunk",
10221
+ chunkId: chunk.id,
10222
+ documentId: chunk.documentId,
10223
+ text: chunk.text,
10224
+ relevance: 0.8,
10225
+ // Default — store doesn't expose scores directly
10226
+ retrievalMode,
10227
+ metadata: recordToKVArray(chunk.metadata)
10228
+ });
10229
+ }
10230
+ }
10231
+ } else {
10232
+ const chunks = await memoryStore.search(subQuestion.question, {
10233
+ limit: retrievalLimit
10234
+ });
8889
10235
  for (const chunk of chunks) {
8890
10236
  evidence.push({
8891
10237
  source: "chunk",
@@ -8893,32 +10239,18 @@ async function retrieve(subQuestion, conversationId, config) {
8893
10239
  documentId: chunk.documentId,
8894
10240
  text: chunk.text,
8895
10241
  relevance: 0.8,
8896
- // Default — store doesn't expose scores directly
10242
+ retrievalMode,
8897
10243
  metadata: recordToKVArray(chunk.metadata)
8898
10244
  });
8899
10245
  }
8900
10246
  }
8901
- } else {
8902
- const chunks = await memoryStore.search(subQuestion.question, {
8903
- limit: retrievalLimit
8904
- });
8905
- for (const chunk of chunks) {
8906
- evidence.push({
8907
- source: "chunk",
8908
- chunkId: chunk.id,
8909
- documentId: chunk.documentId,
8910
- text: chunk.text,
8911
- relevance: 0.8,
8912
- metadata: recordToKVArray(chunk.metadata)
8913
- });
8914
- }
10247
+ } catch (e) {
10248
+ await log?.(`Chunk search failed for "${subQuestion.question}": ${e}`);
8915
10249
  }
8916
- } catch (e) {
8917
- await log?.(`Chunk search failed for "${subQuestion.question}": ${e}`);
8918
- }
8919
- })()
8920
- );
8921
- if (subQuestion.documentFilters) {
10250
+ })()
10251
+ );
10252
+ }
10253
+ if (subQuestion.documentFilters && (retrievalMode === "graph_only" || retrievalMode === "hybrid" || retrievalMode === "long_context")) {
8922
10254
  tasks.push(
8923
10255
  (async () => {
8924
10256
  try {
@@ -8937,6 +10269,7 @@ async function retrieve(subQuestion, conversationId, config) {
8937
10269
  text: summary,
8938
10270
  relevance: 0.9,
8939
10271
  // Direct lookup is high relevance
10272
+ retrievalMode,
8940
10273
  metadata: [
8941
10274
  { key: "type", value: doc.type },
8942
10275
  { key: "carrier", value: doc.carrier ?? "" },
@@ -8963,8 +10296,9 @@ async function retrieve(subQuestion, conversationId, config) {
8963
10296
  source: "conversation",
8964
10297
  turnId: turn.id,
8965
10298
  text: `[${turn.role}]: ${turn.content}`,
8966
- relevance: 0.6
10299
+ relevance: 0.6,
8967
10300
  // Conversation context is lower relevance than documents
10301
+ retrievalMode
8968
10302
  });
8969
10303
  }
8970
10304
  } catch (e) {
@@ -8974,10 +10308,10 @@ async function retrieve(subQuestion, conversationId, config) {
8974
10308
  );
8975
10309
  }
8976
10310
  await Promise.all(tasks);
8977
- evidence.sort((a, b) => b.relevance - a.relevance);
10311
+ const orderedEvidence = orderSourceEvidence(evidence);
8978
10312
  return {
8979
10313
  subQuestion: subQuestion.question,
8980
- evidence: evidence.slice(0, retrievalLimit)
10314
+ evidence: orderedEvidence.slice(0, retrievalLimit)
8981
10315
  };
8982
10316
  }
8983
10317
  function buildDocumentSummary(doc) {
@@ -9063,16 +10397,22 @@ Answer the sub-question based on the evidence above. For every factual claim, in
9063
10397
  async function reason(subQuestion, intent, evidence, config) {
9064
10398
  const { generateObject, providerOptions } = config;
9065
10399
  const evidenceText = evidence.map((e, i) => {
9066
- const sourceLabel = e.source === "chunk" ? `[chunk:${e.chunkId}]` : e.source === "document" ? `[doc:${e.documentId}]` : `[turn:${e.turnId}]`;
10400
+ const sourceLabel = e.source === "source_span" ? `[source-span:${e.sourceSpanId}]` : e.source === "chunk" ? `[chunk:${e.chunkId}]` : e.source === "document" ? `[doc:${e.documentId}]` : e.source === "attachment" ? `[attachment:${e.attachmentId}]` : `[turn:${e.turnId}]`;
9067
10401
  return `Evidence ${i + 1} ${sourceLabel} (relevance: ${e.relevance.toFixed(2)}):
9068
10402
  ${e.text}`;
9069
10403
  }).join("\n\n");
9070
10404
  const prompt = buildReasonPrompt(subQuestion, intent, evidenceText);
10405
+ const budget = resolveModelBudget({
10406
+ taskKind: "query_reason",
10407
+ hintTokens: 4096,
10408
+ modelCapabilities: config.modelCapabilities,
10409
+ constraint: config.modelBudgetConstraints?.query_reason
10410
+ });
9071
10411
  const { object, usage } = await withRetry(
9072
10412
  () => generateObject({
9073
10413
  prompt,
9074
10414
  schema: SubAnswerSchema,
9075
- maxTokens: 4096,
10415
+ maxTokens: budget.maxTokens,
9076
10416
  providerOptions
9077
10417
  })
9078
10418
  );
@@ -9112,49 +10452,41 @@ RESPOND WITH:
9112
10452
  - retrySubQuestions: sub-questions that need re-retrieval or re-reasoning (only if not approved)`;
9113
10453
  }
9114
10454
 
9115
- // src/query/verifier.ts
9116
- async function verify(originalQuestion, subAnswers, allEvidence, config) {
9117
- const { generateObject, providerOptions } = config;
9118
- const subAnswersJson = JSON.stringify(
9119
- subAnswers.map((sa) => ({
9120
- subQuestion: sa.subQuestion,
9121
- answer: sa.answer,
9122
- citations: sa.citations,
9123
- confidence: sa.confidence,
9124
- needsMoreContext: sa.needsMoreContext
9125
- })),
9126
- null,
9127
- 2
9128
- );
9129
- const evidenceJson = JSON.stringify(
9130
- allEvidence.map((e) => ({
9131
- source: e.source,
9132
- id: e.chunkId ?? e.documentId ?? e.turnId,
9133
- text: e.text.slice(0, 500),
9134
- // Truncate for context efficiency
9135
- relevance: e.relevance
9136
- })),
9137
- null,
9138
- 2
9139
- );
9140
- const prompt = buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson);
9141
- const { object, usage } = await withRetry(
9142
- () => generateObject({
9143
- prompt,
9144
- schema: VerifyResultSchema,
9145
- maxTokens: 2048,
9146
- providerOptions
9147
- })
9148
- );
9149
- return { result: object, usage };
9150
- }
9151
-
9152
10455
  // src/query/quality.ts
9153
10456
  function sourceIdForEvidence(evidence) {
9154
- return evidence.chunkId ?? evidence.documentId ?? evidence.turnId ?? evidence.attachmentId;
10457
+ return evidence.sourceSpanId ?? evidence.chunkId ?? evidence.documentId ?? evidence.turnId ?? evidence.attachmentId;
9155
10458
  }
9156
10459
  function citationSourceId(citation) {
9157
- return citation.chunkId || citation.documentId;
10460
+ return citation.sourceSpanId || citation.chunkId || citation.documentId;
10461
+ }
10462
+ function hasGroundingEvidence(evidence) {
10463
+ return evidence.some((item) => item.source === "chunk" || item.source === "source_span");
10464
+ }
10465
+ function containsQuotedNumericDateOrContractualClaim(text) {
10466
+ const normalized = text.toLowerCase();
10467
+ return /[$€£]\s?\d|\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\s?(?:%|percent|days?|months?|years?)\b/.test(text) || /\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{4}-\d{2}-\d{2}\b/.test(text) || /\b(?:january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2},?\s+\d{4}\b/i.test(text) || /\b(?:shall|must|required|subject to|excluded|exclusion|condition|endorsement|deductible|limit|premium|retention)\b/.test(normalized);
10468
+ }
10469
+ function deterministicQueryGroundingIssues(subAnswers, evidence) {
10470
+ const issues = [];
10471
+ const evidenceBySource = /* @__PURE__ */ new Map();
10472
+ for (const item of evidence) {
10473
+ const sourceId = sourceIdForEvidence(item);
10474
+ if (!sourceId) continue;
10475
+ evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
10476
+ }
10477
+ for (const subAnswer of subAnswers) {
10478
+ if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0 && containsQuotedNumericDateOrContractualClaim(subAnswer.answer)) {
10479
+ issues.push(`Sub-answer "${subAnswer.subQuestion}" contains a numeric, date, or contractual claim without citations.`);
10480
+ }
10481
+ for (const citation of subAnswer.citations) {
10482
+ const sourceId = citationSourceId(citation);
10483
+ const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
10484
+ if (containsQuotedNumericDateOrContractualClaim(citation.quote) && !hasGroundingEvidence(supportedEvidence)) {
10485
+ issues.push(`Citation [${citation.index}] in "${subAnswer.subQuestion}" supports a numeric, date, or contractual claim without chunk or source-span evidence.`);
10486
+ }
10487
+ }
10488
+ }
10489
+ return issues;
9158
10490
  }
9159
10491
  function buildQueryReviewReport(params) {
9160
10492
  const { subAnswers, evidence, finalResult, verifyRounds } = params;
@@ -9207,6 +10539,16 @@ function buildQueryReviewReport(params) {
9207
10539
  sourceId
9208
10540
  });
9209
10541
  }
10542
+ if (containsQuotedNumericDateOrContractualClaim(citation.quote) && !hasGroundingEvidence(supportedEvidence)) {
10543
+ issues.push({
10544
+ code: "citation_claim_lacks_chunk_or_source_span",
10545
+ severity: "blocking",
10546
+ message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" supports a numeric, date, or contractual claim without chunk or source-span evidence.`,
10547
+ subQuestion: subAnswer.subQuestion,
10548
+ citationIndex: citation.index,
10549
+ sourceId
10550
+ });
10551
+ }
9210
10552
  }
9211
10553
  }
9212
10554
  if (finalResult) {
@@ -9218,10 +10560,10 @@ function buildQueryReviewReport(params) {
9218
10560
  });
9219
10561
  }
9220
10562
  const knownCitationIds = new Set(
9221
- subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
10563
+ subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.sourceSpanId ?? ""}|${citation.chunkId ?? ""}|${citation.documentId}`))
9222
10564
  );
9223
10565
  for (const citation of finalResult.citations) {
9224
- const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
10566
+ const key = `${citation.index}|${citation.sourceSpanId ?? ""}|${citation.chunkId ?? ""}|${citation.documentId}`;
9225
10567
  if (!knownCitationIds.has(key)) {
9226
10568
  issues.push({
9227
10569
  code: "final_answer_unknown_citation",
@@ -9255,6 +10597,67 @@ function buildQueryReviewReport(params) {
9255
10597
  };
9256
10598
  }
9257
10599
 
10600
+ // src/query/verifier.ts
10601
+ async function verify(originalQuestion, subAnswers, allEvidence, config) {
10602
+ const { generateObject, providerOptions } = config;
10603
+ const subAnswersJson = JSON.stringify(
10604
+ subAnswers.map((sa) => ({
10605
+ subQuestion: sa.subQuestion,
10606
+ answer: sa.answer,
10607
+ citations: sa.citations,
10608
+ confidence: sa.confidence,
10609
+ needsMoreContext: sa.needsMoreContext
10610
+ })),
10611
+ null,
10612
+ 2
10613
+ );
10614
+ const evidenceJson = JSON.stringify(
10615
+ allEvidence.map((e) => ({
10616
+ source: e.source,
10617
+ id: e.sourceSpanId ?? e.chunkId ?? e.documentId ?? e.turnId ?? e.attachmentId,
10618
+ chunkId: e.chunkId,
10619
+ sourceSpanId: e.sourceSpanId,
10620
+ text: e.text.slice(0, 500),
10621
+ // Truncate for context efficiency
10622
+ relevance: e.relevance
10623
+ })),
10624
+ null,
10625
+ 2
10626
+ );
10627
+ const prompt = buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson);
10628
+ const budget = resolveModelBudget({
10629
+ taskKind: "query_verify",
10630
+ hintTokens: 2048,
10631
+ modelCapabilities: config.modelCapabilities,
10632
+ constraint: config.modelBudgetConstraints?.query_verify
10633
+ });
10634
+ const { object, usage } = await withRetry(
10635
+ () => generateObject({
10636
+ prompt,
10637
+ schema: VerifyResultSchema,
10638
+ maxTokens: budget.maxTokens,
10639
+ providerOptions
10640
+ })
10641
+ );
10642
+ const result = object;
10643
+ const deterministicIssues = deterministicQueryGroundingIssues(subAnswers, allEvidence);
10644
+ if (deterministicIssues.length > 0) {
10645
+ return {
10646
+ result: {
10647
+ ...result,
10648
+ approved: false,
10649
+ issues: Array.from(/* @__PURE__ */ new Set([...result.issues, ...deterministicIssues])),
10650
+ retrySubQuestions: Array.from(/* @__PURE__ */ new Set([
10651
+ ...result.retrySubQuestions ?? [],
10652
+ ...subAnswers.filter((answer) => deterministicIssues.some((issue) => issue.includes(`"${answer.subQuestion}"`))).map((answer) => answer.subQuestion)
10653
+ ]))
10654
+ },
10655
+ usage
10656
+ };
10657
+ }
10658
+ return { result, usage };
10659
+ }
10660
+
9258
10661
  // src/prompts/query/interpret-attachment.ts
9259
10662
  function buildInterpretAttachmentPrompt(question, attachment) {
9260
10663
  const attachmentLabel = attachment.name ?? attachment.id ?? "attachment";
@@ -9334,7 +10737,7 @@ ${attachment.text}` : null
9334
10737
  return lines.filter(Boolean).join("\n");
9335
10738
  }
9336
10739
  async function interpretAttachments(params) {
9337
- const { attachments = [], question, generateObject, providerOptions, log, onUsage } = params;
10740
+ const { attachments = [], question, generateObject, providerOptions, modelCapabilities, modelBudgetConstraints, log, onUsage } = params;
9338
10741
  if (attachments.length === 0) {
9339
10742
  return { evidence: [] };
9340
10743
  }
@@ -9363,12 +10766,18 @@ async function interpretAttachments(params) {
9363
10766
  continue;
9364
10767
  }
9365
10768
  const prompt = buildInterpretAttachmentPrompt(question, attachment);
10769
+ const budget = resolveModelBudget({
10770
+ taskKind: "query_attachment",
10771
+ hintTokens: 2048,
10772
+ modelCapabilities,
10773
+ constraint: modelBudgetConstraints?.query_attachment
10774
+ });
9366
10775
  const { object, usage } = await safeGenerateObject(
9367
10776
  generateObject,
9368
10777
  {
9369
10778
  prompt,
9370
10779
  schema: AttachmentInterpretationSchema,
9371
- maxTokens: 2048,
10780
+ maxTokens: budget.maxTokens,
9372
10781
  providerOptions: buildAttachmentProviderOptions(attachment, providerOptions)
9373
10782
  },
9374
10783
  {
@@ -9405,10 +10814,19 @@ ${item.text}`).join("\n\n");
9405
10814
  function shouldRetrieveForClassification(classification) {
9406
10815
  return classification.requiresDocumentLookup || classification.requiresChunkSearch;
9407
10816
  }
10817
+ function resolveQueryRetrievalMode(params) {
10818
+ const requestedMode = params.inputMode ?? params.configMode ?? params.classificationMode;
10819
+ if (requestedMode) return requestedMode;
10820
+ return params.supportsSourceRetrieval ? "hybrid" : "graph_only";
10821
+ }
9408
10822
  function buildInitialQueryWorkflowPlan(params) {
9409
10823
  const { classification, attachmentEvidence } = params;
9410
10824
  const actions = [];
9411
10825
  const shouldRetrieve = shouldRetrieveForClassification(classification);
10826
+ const retrievalMode = params.retrievalMode ?? resolveQueryRetrievalMode({
10827
+ classificationMode: classification.retrievalMode,
10828
+ supportsSourceRetrieval: !!params.supportsSourceRetrieval
10829
+ });
9412
10830
  if (shouldRetrieve) {
9413
10831
  actions.push({
9414
10832
  type: "retrieve",
@@ -9431,7 +10849,7 @@ function buildInitialQueryWorkflowPlan(params) {
9431
10849
  reason: "compose final response"
9432
10850
  }
9433
10851
  );
9434
- return { actions, shouldRetrieve };
10852
+ return { actions, shouldRetrieve, retrievalMode };
9435
10853
  }
9436
10854
  function getWorkflowAction(plan, type) {
9437
10855
  return plan.actions.find((action) => action.type === type);
@@ -9444,14 +10862,18 @@ function createQueryAgent(config) {
9444
10862
  generateObject,
9445
10863
  documentStore,
9446
10864
  memoryStore,
10865
+ sourceRetriever,
9447
10866
  concurrency = 3,
9448
10867
  maxVerifyRounds = 1,
9449
10868
  retrievalLimit = 10,
10869
+ retrievalMode: configRetrievalMode,
9450
10870
  onTokenUsage,
9451
10871
  onProgress,
9452
10872
  log,
9453
10873
  providerOptions,
9454
- qualityGate = "warn"
10874
+ qualityGate = "warn",
10875
+ modelCapabilities,
10876
+ modelBudgetConstraints
9455
10877
  } = config;
9456
10878
  const limit = pLimit(concurrency);
9457
10879
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -9462,6 +10884,14 @@ function createQueryAgent(config) {
9462
10884
  onTokenUsage?.(usage);
9463
10885
  }
9464
10886
  }
10887
+ function resolveBudget(taskKind, hintTokens) {
10888
+ return resolveModelBudget({
10889
+ taskKind,
10890
+ hintTokens,
10891
+ modelCapabilities,
10892
+ constraint: modelBudgetConstraints?.[taskKind]
10893
+ });
10894
+ }
9465
10895
  async function query(input) {
9466
10896
  totalUsage = { inputTokens: 0, outputTokens: 0 };
9467
10897
  const { question, conversationId, context, attachments } = input;
@@ -9474,6 +10904,8 @@ function createQueryAgent(config) {
9474
10904
  question,
9475
10905
  generateObject,
9476
10906
  providerOptions,
10907
+ modelCapabilities,
10908
+ modelBudgetConstraints,
9477
10909
  log,
9478
10910
  onUsage: trackUsage
9479
10911
  });
@@ -9481,13 +10913,26 @@ function createQueryAgent(config) {
9481
10913
  onProgress?.("Classifying query...");
9482
10914
  const classification = await classify(question, conversationId, attachmentContext);
9483
10915
  await pipelineCtx.save("classify", { classification, attachmentEvidence });
10916
+ const effectiveRetrievalMode = resolveQueryRetrievalMode({
10917
+ inputMode: input.retrievalMode,
10918
+ configMode: configRetrievalMode,
10919
+ classificationMode: classification.retrievalMode,
10920
+ supportsSourceRetrieval: !!sourceRetriever
10921
+ });
9484
10922
  const retrieverConfig = {
9485
10923
  documentStore,
9486
10924
  memoryStore,
10925
+ sourceRetriever,
9487
10926
  retrievalLimit,
10927
+ retrievalMode: effectiveRetrievalMode,
9488
10928
  log
9489
10929
  };
9490
- const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
10930
+ const workflowPlan = buildInitialQueryWorkflowPlan({
10931
+ classification,
10932
+ attachmentEvidence,
10933
+ retrievalMode: effectiveRetrievalMode,
10934
+ supportsSourceRetrieval: !!sourceRetriever
10935
+ });
9491
10936
  const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
9492
10937
  const reasonAction = getWorkflowAction(workflowPlan, "reason");
9493
10938
  await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
@@ -9502,7 +10947,7 @@ function createQueryAgent(config) {
9502
10947
  const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
9503
10948
  await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
9504
10949
  onProgress?.("Reasoning over evidence...");
9505
- const reasonerConfig = { generateObject, providerOptions };
10950
+ const reasonerConfig = { generateObject, providerOptions, modelCapabilities, modelBudgetConstraints };
9506
10951
  const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
9507
10952
  const reasonResults = await Promise.allSettled(
9508
10953
  subQuestionsToReason.map(
@@ -9537,7 +10982,7 @@ function createQueryAgent(config) {
9537
10982
  }
9538
10983
  await pipelineCtx.save("reason", { classification, attachmentEvidence, evidence: allEvidence, subAnswers });
9539
10984
  onProgress?.("Verifying answer grounding...");
9540
- const verifierConfig = { generateObject, providerOptions };
10985
+ const verifierConfig = { generateObject, providerOptions, modelCapabilities, modelBudgetConstraints };
9541
10986
  const verifyRounds = [];
9542
10987
  for (let round = 0; round < maxVerifyRounds; round++) {
9543
10988
  const { result: verifyResult, usage } = await safeVerify(
@@ -9663,12 +11108,13 @@ function createQueryAgent(config) {
9663
11108
  }
9664
11109
  }
9665
11110
  const prompt = buildQueryClassifyPrompt(question, conversationContext, attachmentContext);
11111
+ const budget = resolveBudget("query_classify", 2048);
9666
11112
  const { object, usage } = await safeGenerateObject(
9667
11113
  generateObject,
9668
11114
  {
9669
11115
  prompt,
9670
11116
  schema: QueryClassifyResultSchema,
9671
- maxTokens: 2048,
11117
+ maxTokens: budget.maxTokens,
9672
11118
  providerOptions
9673
11119
  },
9674
11120
  {
@@ -9682,7 +11128,8 @@ function createQueryAgent(config) {
9682
11128
  ],
9683
11129
  requiresDocumentLookup: true,
9684
11130
  requiresChunkSearch: true,
9685
- requiresConversationHistory: !!conversationId
11131
+ requiresConversationHistory: !!conversationId,
11132
+ retrievalMode: sourceRetriever ? "hybrid" : "graph_only"
9686
11133
  },
9687
11134
  log,
9688
11135
  onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
@@ -9712,12 +11159,13 @@ function createQueryAgent(config) {
9712
11159
  2
9713
11160
  );
9714
11161
  const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
11162
+ const budget = resolveBudget("query_respond", 4096);
9715
11163
  const { object, usage } = await safeGenerateObject(
9716
11164
  generateObject,
9717
11165
  {
9718
11166
  prompt,
9719
11167
  schema: QueryResultSchema,
9720
- maxTokens: 4096,
11168
+ maxTokens: budget.maxTokens,
9721
11169
  providerOptions
9722
11170
  },
9723
11171
  {
@@ -9740,6 +11188,673 @@ ${sa.answer}`).join("\n\n"),
9740
11188
  return { query };
9741
11189
  }
9742
11190
 
11191
+ // src/pce/index.ts
11192
+ import { z as z43 } from "zod";
11193
+
11194
+ // src/prompts/pce/index.ts
11195
+ function buildPceNormalizePrompt(input) {
11196
+ const evidence = input.evidenceSources.map(
11197
+ (source) => `- ${source.id}${source.label ? ` (${source.label})` : ""}: ${source.text.slice(0, 1200)}`
11198
+ ).join("\n");
11199
+ return [
11200
+ "Normalize this policy change endorsement request into atomic change items.",
11201
+ "Use beforeValue only when the existing value is explicitly quoted in the provided evidence.",
11202
+ "Every beforeValue must include a citation with sourceId and exact quote.",
11203
+ "Ask missing-info questions for required details that are absent.",
11204
+ "",
11205
+ `Request:
11206
+ ${input.requestText}`,
11207
+ "",
11208
+ `Evidence:
11209
+ ${evidence || "(none provided)"}`
11210
+ ].join("\n");
11211
+ }
11212
+ function buildPceReplyPrompt(input) {
11213
+ return [
11214
+ "Map this reply to the open missing-info questions.",
11215
+ "Return concise answers only for questions that are directly answered.",
11216
+ "",
11217
+ `Reply:
11218
+ ${input.replyText}`,
11219
+ "",
11220
+ `Open questions:
11221
+ ${input.openQuestions.map((question) => `- ${question.id}${question.fieldPath ? ` (${question.fieldPath})` : ""}: ${question.question}`).join("\n")}`
11222
+ ].join("\n");
11223
+ }
11224
+
11225
+ // src/pce/index.ts
11226
+ var ReplyAnswersSchema = z43.object({
11227
+ answers: z43.array(z43.object({
11228
+ questionId: z43.string().optional(),
11229
+ fieldPath: z43.string().optional(),
11230
+ answer: z43.string()
11231
+ }))
11232
+ });
11233
+ function createPceAgent(config = {}) {
11234
+ const now = config.now ?? Date.now;
11235
+ let tokenUsage = { inputTokens: 0, outputTokens: 0 };
11236
+ const cases = /* @__PURE__ */ new Map();
11237
+ function trackUsage(usage) {
11238
+ if (!usage) return;
11239
+ tokenUsage.inputTokens += usage.inputTokens;
11240
+ tokenUsage.outputTokens += usage.outputTokens;
11241
+ config.onTokenUsage?.(usage);
11242
+ }
11243
+ function resolveBudget(taskKind, hintTokens) {
11244
+ return resolveModelBudget({
11245
+ taskKind,
11246
+ hintTokens,
11247
+ modelCapabilities: config.modelCapabilities,
11248
+ constraint: config.modelBudgetConstraints?.[taskKind]
11249
+ });
11250
+ }
11251
+ async function processChangeRequest(input) {
11252
+ tokenUsage = { inputTokens: 0, outputTokens: 0 };
11253
+ const evidenceSources = await collectPceEvidenceSources(input, config);
11254
+ const fallback = heuristicNormalize(input.requestText, evidenceSources);
11255
+ let normalized = fallback;
11256
+ if (config.generateObject) {
11257
+ const budget = resolveBudget("pce_impact_analysis", 2500);
11258
+ const result = await safeGenerateObject(
11259
+ config.generateObject,
11260
+ {
11261
+ prompt: buildPceNormalizePrompt({ requestText: input.requestText, evidenceSources }),
11262
+ schema: PceNormalizationResultSchema,
11263
+ maxTokens: budget.maxTokens,
11264
+ providerOptions: config.providerOptions
11265
+ },
11266
+ { fallback, maxRetries: 1, log: config.log }
11267
+ );
11268
+ normalized = PceNormalizationResultSchema.parse(result.object);
11269
+ trackUsage(result.usage);
11270
+ }
11271
+ const createdAt = now();
11272
+ const items = normalized.items.map((item) => finalizeItem(item, input.requestText));
11273
+ const missingInfoQuestions = normalized.missingInfoQuestions.map((question) => {
11274
+ const itemId = question.itemId ?? items.find((item) => item.fieldPath === question.fieldPath)?.id;
11275
+ return {
11276
+ ...question,
11277
+ itemId,
11278
+ id: question.id ?? stableCaseId("question", [itemId, question.fieldPath, question.question])
11279
+ };
11280
+ });
11281
+ const validationIssues = validatePceItems(items, evidenceSources);
11282
+ const impacts = buildPolicyChangeImpacts(items, evidenceSources);
11283
+ const executionMode = selectPceExecutionMode({
11284
+ requestedMode: input.executionMode ?? config.executionMode,
11285
+ requestText: input.requestText,
11286
+ items,
11287
+ impacts,
11288
+ evidenceSources,
11289
+ validationIssues,
11290
+ missingInfoQuestions
11291
+ });
11292
+ const state = {
11293
+ id: input.caseId ?? stableCaseId("pce", [input.requestText, evidenceSources.map((source) => source.id)]),
11294
+ requestText: input.requestText,
11295
+ summary: normalized.summary || summarizeItems(items),
11296
+ executionMode,
11297
+ items,
11298
+ impacts,
11299
+ evidenceSources,
11300
+ validationIssues,
11301
+ missingInfoQuestions,
11302
+ createdAt,
11303
+ updatedAt: createdAt
11304
+ };
11305
+ cases.set(state.id, state);
11306
+ return { state, tokenUsage };
11307
+ }
11308
+ async function processReply2(input) {
11309
+ tokenUsage = { inputTokens: 0, outputTokens: 0 };
11310
+ let answers = heuristicParseAnswers(
11311
+ input.replyText,
11312
+ input.state.missingInfoQuestions
11313
+ );
11314
+ if (config.generateObject && input.state.missingInfoQuestions.some((question) => !question.answer)) {
11315
+ const budget = resolveBudget("pce_reply_parse", 1e3);
11316
+ const result = await safeGenerateObject(
11317
+ config.generateObject,
11318
+ {
11319
+ prompt: buildPceReplyPrompt({
11320
+ replyText: input.replyText,
11321
+ openQuestions: input.state.missingInfoQuestions.filter((question) => !question.answer).map(({ id, question, fieldPath }) => ({ id, question, fieldPath }))
11322
+ }),
11323
+ schema: ReplyAnswersSchema,
11324
+ maxTokens: budget.maxTokens,
11325
+ providerOptions: config.providerOptions
11326
+ },
11327
+ { fallback: { answers }, maxRetries: 1, log: config.log }
11328
+ );
11329
+ answers = ReplyAnswersSchema.parse(result.object).answers;
11330
+ trackUsage(result.usage);
11331
+ }
11332
+ const merged = mergeQuestionAnswers(input.state.missingInfoQuestions, answers);
11333
+ const items = applyMissingInfoAnswers(input.state.items, merged.questions);
11334
+ const validationIssues = validatePceItems(items, input.state.evidenceSources);
11335
+ const impacts = buildPolicyChangeImpacts(items, input.state.evidenceSources);
11336
+ const executionMode = selectPceExecutionMode({
11337
+ requestedMode: config.executionMode,
11338
+ requestText: input.state.requestText,
11339
+ items,
11340
+ impacts,
11341
+ evidenceSources: input.state.evidenceSources,
11342
+ validationIssues,
11343
+ missingInfoQuestions: merged.questions
11344
+ });
11345
+ const state = {
11346
+ ...input.state,
11347
+ executionMode,
11348
+ items,
11349
+ impacts,
11350
+ validationIssues,
11351
+ missingInfoQuestions: merged.questions,
11352
+ updatedAt: now()
11353
+ };
11354
+ cases.set(state.id, state);
11355
+ return { state, answersMerged: merged.answeredCount, tokenUsage };
11356
+ }
11357
+ function generateSubmissionPacket(input) {
11358
+ const state = typeof input === "string" ? cases.get(input) : input.state;
11359
+ if (!state) {
11360
+ throw new Error(`Policy change case ${String(input)} not found`);
11361
+ }
11362
+ return buildPceSubmissionPacket(state, now());
11363
+ }
11364
+ return { processChangeRequest, processReply: processReply2, generateSubmissionPacket };
11365
+ }
11366
+ function applyMissingInfoAnswers(items, questions) {
11367
+ return items.map((item) => {
11368
+ const answers = questions.filter(
11369
+ (question) => question.answer?.trim() && (question.itemId === item.id || !question.itemId && question.fieldPath === item.fieldPath)
11370
+ );
11371
+ if (answers.length === 0) return item;
11372
+ const answer = answers[answers.length - 1].answer.trim();
11373
+ return {
11374
+ ...item,
11375
+ afterValue: item.afterValue ?? answer,
11376
+ requestedValue: item.requestedValue ?? answer,
11377
+ status: item.status === "needs_info" ? "ready" : item.status,
11378
+ userSourceSpanIds: item.userSourceSpanIds ?? []
11379
+ };
11380
+ });
11381
+ }
11382
+ async function collectPceEvidenceSources(input, config) {
11383
+ const provided = input.evidenceSources ?? [];
11384
+ if (!config?.sourceRetriever) return provided;
11385
+ try {
11386
+ const results = await config.sourceRetriever.searchSourceSpans({
11387
+ question: input.requestText,
11388
+ limit: config.retrievalLimit ?? 8,
11389
+ mode: "hybrid"
11390
+ });
11391
+ const retrieved = results.map((result) => ({
11392
+ id: result.span.id,
11393
+ label: result.span.formNumber ?? result.span.sectionId ?? result.span.sourceKind,
11394
+ documentId: result.span.documentId,
11395
+ page: result.span.pageStart ?? result.span.location?.page,
11396
+ fieldPath: result.span.sectionId ?? result.span.location?.fieldPath,
11397
+ text: result.span.text,
11398
+ metadata: {
11399
+ ...result.span.metadata,
11400
+ relevance: String(result.relevance),
11401
+ sourceKind: result.span.sourceKind ?? result.span.kind
11402
+ }
11403
+ }));
11404
+ return dedupeEvidenceSources([...provided, ...retrieved]);
11405
+ } catch (error) {
11406
+ await config.log?.(`PCE source evidence retrieval failed: ${error}`);
11407
+ return provided;
11408
+ }
11409
+ }
11410
+ function stablePolicyChangeItemId(item) {
11411
+ return stableCaseId("pci", [
11412
+ item.affectedPolicyId,
11413
+ item.kind,
11414
+ item.fieldPath,
11415
+ item.afterValue ?? item.requestedValue ?? "",
11416
+ item.sourceSpanIds?.join("|") ?? ""
11417
+ ]);
11418
+ }
11419
+ function validatePceItems(items, sources) {
11420
+ return items.flatMap((item) => {
11421
+ const issues = [];
11422
+ const citation = firstCitationForValue(item.citations, item.beforeValue);
11423
+ issues.push(...validateQuotedEvidence({
11424
+ itemId: item.id,
11425
+ fieldPath: `${item.fieldPath}.beforeValue`,
11426
+ quote: item.beforeValue,
11427
+ citation,
11428
+ sources
11429
+ }));
11430
+ if (item.beforeValue?.trim() && item.sourceSpanIds.length === 0 && item.sourceIds.length === 0) {
11431
+ issues.push({
11432
+ code: "existing_value_missing_source_span",
11433
+ severity: "blocking",
11434
+ message: `Existing value for ${item.fieldPath} is missing source span evidence.`,
11435
+ itemId: item.id,
11436
+ fieldPath: item.fieldPath
11437
+ });
11438
+ }
11439
+ if (item.status === "needs_info" || !item.afterValue?.trim() && !item.requestedValue?.trim() && item.action !== "remove") {
11440
+ issues.push({
11441
+ code: "required_value_missing",
11442
+ severity: "blocking",
11443
+ message: `Requested value for ${item.fieldPath} is missing.`,
11444
+ itemId: item.id,
11445
+ fieldPath: item.fieldPath
11446
+ });
11447
+ }
11448
+ if (item.kind === "coverage_change" && item.action !== "add" && item.sourceSpanIds.length === 0 && item.sourceIds.length === 0) {
11449
+ issues.push({
11450
+ code: "coverage_source_missing",
11451
+ severity: "blocking",
11452
+ message: `Coverage change for ${item.fieldPath} is not linked to existing coverage evidence.`,
11453
+ itemId: item.id,
11454
+ fieldPath: item.fieldPath
11455
+ });
11456
+ }
11457
+ const effectiveDateIssue = validateEffectiveDate(item, sources);
11458
+ if (effectiveDateIssue) issues.push(effectiveDateIssue);
11459
+ const endorsementConflict = findEndorsementConflict(item, sources);
11460
+ if (endorsementConflict) issues.push(endorsementConflict);
11461
+ if ((item.kind === "cancellation" || item.kind === "nonrenewal") && (!item.effectiveDate || item.sourceSpanIds.length === 0)) {
11462
+ issues.push({
11463
+ code: "notice_rule_ambiguous",
11464
+ severity: "blocking",
11465
+ message: `${item.kind} request needs an effective date and source-backed notice/timing terms.`,
11466
+ itemId: item.id,
11467
+ fieldPath: item.fieldPath
11468
+ });
11469
+ }
11470
+ if (item.kind === "certificate_endorsement_request" && !hasCertificateRequirementDetails(item)) {
11471
+ issues.push({
11472
+ code: "certificate_details_missing",
11473
+ severity: "blocking",
11474
+ message: "Certificate-driven endorsement request is missing holder or requirement details.",
11475
+ itemId: item.id,
11476
+ fieldPath: item.fieldPath
11477
+ });
11478
+ }
11479
+ return dedupeValidationIssues(issues);
11480
+ });
11481
+ }
11482
+ function buildPolicyChangeImpacts(items, sources) {
11483
+ return items.map((item) => {
11484
+ const citedSources = sources.filter((source) => item.sourceSpanIds.includes(source.id) || item.sourceIds.includes(source.id));
11485
+ return {
11486
+ itemId: item.id,
11487
+ beforeValue: item.beforeValue,
11488
+ requestedValue: item.requestedValue ?? item.afterValue,
11489
+ likelyEndorsementRequired: item.kind !== "renewal_submission_update",
11490
+ carrierApprovalLikelyRequired: item.kind !== "certificate_endorsement_request",
11491
+ affectedCoverageForms: Array.from(new Set(
11492
+ citedSources.map((source) => source.metadata?.formNumber ?? source.label).filter((value) => !!value)
11493
+ )).sort(),
11494
+ sourceSpanIds: Array.from(/* @__PURE__ */ new Set([...item.sourceSpanIds, ...item.sourceIds])).sort()
11495
+ };
11496
+ });
11497
+ }
11498
+ function selectPceExecutionMode(params) {
11499
+ if (params.requestedMode && params.requestedMode !== "auto") {
11500
+ return params.requestedMode;
11501
+ }
11502
+ if (params.validationIssues.some((issue) => issue.severity === "blocking")) {
11503
+ return "hybrid";
11504
+ }
11505
+ if (hasConflictingEvidence(params.evidenceSources)) {
11506
+ return "hybrid";
11507
+ }
11508
+ if (hasAmbiguousCancellationOrNonrenewal(params.requestText, params.items)) {
11509
+ return "hybrid";
11510
+ }
11511
+ if (hasUnclearCertificateRequest(params.items, params.missingInfoQuestions ?? [])) {
11512
+ return "hybrid";
11513
+ }
11514
+ if (hasMultiFormFinancialChange(params.items, params.impacts)) {
11515
+ return "market_eval";
11516
+ }
11517
+ return "deterministic_tree";
11518
+ }
11519
+ function finalizeItem(item, requestText) {
11520
+ const status = item.status ?? (!item.afterValue && item.action !== "remove" ? "needs_info" : "ready");
11521
+ const citations = item.citations ?? [];
11522
+ const sourceSpanIds = item.sourceSpanIds?.length ? item.sourceSpanIds : inferSourceIds(citations);
11523
+ const afterValue = item.afterValue ?? item.requestedValue;
11524
+ return {
11525
+ ...item,
11526
+ kind: item.kind ?? inferChangeKind(item.fieldPath, requestText),
11527
+ affectedPolicyId: item.affectedPolicyId ?? "unknown",
11528
+ afterValue,
11529
+ requestedValue: item.requestedValue ?? afterValue,
11530
+ sourceSpanIds,
11531
+ userSourceSpanIds: item.userSourceSpanIds ?? [],
11532
+ id: item.id ?? stablePolicyChangeItemId({
11533
+ ...item,
11534
+ kind: item.kind ?? inferChangeKind(item.fieldPath, requestText),
11535
+ affectedPolicyId: item.affectedPolicyId ?? "unknown",
11536
+ afterValue,
11537
+ requestedValue: item.requestedValue ?? afterValue,
11538
+ sourceSpanIds
11539
+ }),
11540
+ label: item.label || item.fieldPath,
11541
+ sourceIds: item.sourceIds ?? sourceSpanIds,
11542
+ citations,
11543
+ confidence: item.confidence ?? (requestText.length > 0 ? "medium" : "low"),
11544
+ confidenceScore: item.confidenceScore ?? (requestText.length > 0 ? 0.6 : 0.3),
11545
+ status
11546
+ };
11547
+ }
11548
+ function firstCitationForValue(citations, value) {
11549
+ if (!value) return void 0;
11550
+ return citations.find((citation) => citation.quote.trim() === value.trim()) ?? citations[0];
11551
+ }
11552
+ function inferSourceIds(citations) {
11553
+ return Array.from(new Set(citations.map((citation) => citation.sourceId))).sort();
11554
+ }
11555
+ function dedupeEvidenceSources(sources) {
11556
+ const byId = /* @__PURE__ */ new Map();
11557
+ for (const source of sources) {
11558
+ byId.set(source.id, source);
11559
+ }
11560
+ return [...byId.values()].sort((left, right) => left.id.localeCompare(right.id));
11561
+ }
11562
+ function hasConflictingEvidence(sources) {
11563
+ const signaturesByKey = /* @__PURE__ */ new Map();
11564
+ for (const source of sources) {
11565
+ const key = normalizeEvidenceConflictKey(source);
11566
+ if (!key) continue;
11567
+ const values = extractComparableEvidenceValues(source.text);
11568
+ if (values.length === 0) continue;
11569
+ const existing = signaturesByKey.get(key) ?? /* @__PURE__ */ new Set();
11570
+ existing.add(values.sort().join("|"));
11571
+ signaturesByKey.set(key, existing);
11572
+ if (existing.size > 1) return true;
11573
+ }
11574
+ return false;
11575
+ }
11576
+ function normalizeEvidenceConflictKey(source) {
11577
+ const fieldPath = source.fieldPath ?? source.metadata?.fieldPath;
11578
+ const formNumber = source.metadata?.formNumber;
11579
+ const key = fieldPath ? `${fieldPath}:${formNumber ?? "default"}` : source.label;
11580
+ return key?.replace(/\s+/g, " ").trim().toLowerCase();
11581
+ }
11582
+ function extractComparableEvidenceValues(text) {
11583
+ const values = /* @__PURE__ */ new Set();
11584
+ for (const match of text.matchAll(/\$?\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b%?/g)) {
11585
+ values.add(match[0].replace(/[$,%\s]/g, ""));
11586
+ }
11587
+ for (const match of text.matchAll(/\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b/g)) {
11588
+ values.add(match[0]);
11589
+ }
11590
+ return [...values].filter((value) => value.length > 0);
11591
+ }
11592
+ function hasAmbiguousCancellationOrNonrenewal(requestText, items) {
11593
+ const hasCancellationAction = items.some((item) => item.kind === "cancellation" || item.kind === "nonrenewal");
11594
+ if (!hasCancellationAction) return false;
11595
+ return /\b(if|unless|maybe|possibly|unsure|unclear|or|pending|conditional)\b/i.test(requestText);
11596
+ }
11597
+ function hasUnclearCertificateRequest(items, missingInfoQuestions) {
11598
+ return items.some(
11599
+ (item) => item.kind === "certificate_endorsement_request" && (item.status === "needs_info" || !item.afterValue?.trim() || item.confidence === "low" || item.sourceSpanIds.length === 0 || missingInfoQuestions.some((question) => question.itemId === item.id || question.fieldPath === item.fieldPath))
11600
+ );
11601
+ }
11602
+ function hasMultiFormFinancialChange(items, impacts) {
11603
+ const financialItemIds = new Set(items.filter((item) => item.kind === "limit_change" || item.kind === "deductible_change").map((item) => item.id));
11604
+ return impacts.some(
11605
+ (impact) => financialItemIds.has(impact.itemId) && (impact.affectedCoverageForms.length > 1 || impact.sourceSpanIds.length > 1)
11606
+ );
11607
+ }
11608
+ function validateEffectiveDate(item, sources) {
11609
+ if (!item.effectiveDate) return void 0;
11610
+ const requestedDate = parseDateValue(item.effectiveDate);
11611
+ if (!requestedDate) {
11612
+ return {
11613
+ code: "effective_date_unparseable",
11614
+ severity: "warning",
11615
+ message: `Requested effective date ${item.effectiveDate} could not be parsed.`,
11616
+ itemId: item.id,
11617
+ fieldPath: "effectiveDate"
11618
+ };
11619
+ }
11620
+ const period = findPolicyPeriod(sources);
11621
+ if (!period) return void 0;
11622
+ if (requestedDate < period.start || requestedDate > period.end) {
11623
+ return {
11624
+ code: "effective_date_outside_policy_period",
11625
+ severity: "blocking",
11626
+ message: `Requested effective date ${item.effectiveDate} is outside the cited policy period.`,
11627
+ itemId: item.id,
11628
+ fieldPath: "effectiveDate",
11629
+ sourceId: period.sourceId
11630
+ };
11631
+ }
11632
+ return void 0;
11633
+ }
11634
+ function findPolicyPeriod(sources) {
11635
+ for (const source of sources) {
11636
+ const metadataStart = source.metadata?.policyEffectiveDate ?? source.metadata?.policyStartDate;
11637
+ const metadataEnd = source.metadata?.policyExpirationDate ?? source.metadata?.policyEndDate;
11638
+ const start = metadataStart ? parseDateValue(metadataStart) : void 0;
11639
+ const end = metadataEnd ? parseDateValue(metadataEnd) : void 0;
11640
+ if (start && end) return { start, end, sourceId: source.id };
11641
+ const textPeriod = source.text.match(/\b(?:policy\s+period|effective)\b[^.\n]*?(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})\s*(?:to|-|through)\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})/i);
11642
+ const textStart = textPeriod?.[1] ? parseDateValue(textPeriod[1]) : void 0;
11643
+ const textEnd = textPeriod?.[2] ? parseDateValue(textPeriod[2]) : void 0;
11644
+ if (textStart && textEnd) return { start: textStart, end: textEnd, sourceId: source.id };
11645
+ }
11646
+ return void 0;
11647
+ }
11648
+ function parseDateValue(value) {
11649
+ const numeric = value.match(/^(\d{1,2})[/-](\d{1,2})[/-](\d{2}|\d{4})$/);
11650
+ if (!numeric) return void 0;
11651
+ const month = Number(numeric[1]);
11652
+ const day = Number(numeric[2]);
11653
+ const rawYear = Number(numeric[3]);
11654
+ const year = rawYear < 100 ? 2e3 + rawYear : rawYear;
11655
+ if (month < 1 || month > 12 || day < 1 || day > 31) return void 0;
11656
+ return Date.UTC(year, month - 1, day);
11657
+ }
11658
+ function findEndorsementConflict(item, sources) {
11659
+ const linkedSources = sources.filter((source) => item.sourceSpanIds.includes(source.id) || item.sourceIds.includes(source.id));
11660
+ const conflictSource = linkedSources.find(
11661
+ (source) => /\bendorsement\b/i.test(`${source.label ?? ""} ${source.text}`) && /\b(excludes|exclusion|prohibits|not\s+covered|no\s+coverage|must\s+not)\b/i.test(source.text)
11662
+ );
11663
+ if (!conflictSource) return void 0;
11664
+ return {
11665
+ code: "endorsement_conflict",
11666
+ severity: "blocking",
11667
+ message: `Existing endorsement source ${conflictSource.id} may conflict with the requested change.`,
11668
+ itemId: item.id,
11669
+ fieldPath: item.fieldPath,
11670
+ sourceId: conflictSource.id
11671
+ };
11672
+ }
11673
+ function hasCertificateRequirementDetails(item) {
11674
+ const text = `${item.label} ${item.afterValue ?? ""} ${item.requestedValue ?? ""} ${item.reason ?? ""}`.toLowerCase();
11675
+ const hasHolder = /\b(holder|certificate holder|additional insured|loss payee|lender|landlord)\b/.test(text);
11676
+ const hasRequirement = /\b(primary|non[- ]?contributory|waiver|subrogation|notice|endorsement|requirement|wording)\b/.test(text);
11677
+ return hasHolder && hasRequirement;
11678
+ }
11679
+ function dedupeValidationIssues(issues) {
11680
+ const seen = /* @__PURE__ */ new Set();
11681
+ return issues.filter((issue) => {
11682
+ const key = `${issue.code}:${issue.itemId ?? ""}:${issue.fieldPath ?? ""}:${issue.sourceId ?? ""}`;
11683
+ if (seen.has(key)) return false;
11684
+ seen.add(key);
11685
+ return true;
11686
+ });
11687
+ }
11688
+ function heuristicNormalize(requestText, evidenceSources) {
11689
+ const lower = requestText.toLowerCase();
11690
+ const action = lower.includes("remove") || lower.includes("delete") ? "remove" : lower.includes("add") ? "add" : "update";
11691
+ const effectiveDate = requestText.match(/\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b/)?.[0];
11692
+ const label = requestText.split(/[.;\n]/)[0]?.trim() || "Policy change";
11693
+ const quoted = Array.from(requestText.matchAll(/"([^"]+)"/g)).map((match) => match[1]);
11694
+ const beforeValue = quoted.find(
11695
+ (quote) => evidenceSources.some((source) => source.text.toLowerCase().includes(quote.toLowerCase()))
11696
+ );
11697
+ const citationSource = beforeValue ? evidenceSources.find((source) => source.text.toLowerCase().includes(beforeValue.toLowerCase())) : void 0;
11698
+ const result = {
11699
+ summary: label,
11700
+ items: [{
11701
+ action,
11702
+ kind: inferChangeKind(inferFieldPath(requestText), requestText),
11703
+ affectedPolicyId: evidenceSources.find((source) => source.documentId)?.documentId ?? "unknown",
11704
+ fieldPath: inferFieldPath(requestText),
11705
+ label,
11706
+ beforeValue,
11707
+ afterValue: inferAfterValue(requestText, beforeValue),
11708
+ requestedValue: inferAfterValue(requestText, beforeValue),
11709
+ effectiveDate,
11710
+ reason: void 0,
11711
+ sourceIds: citationSource ? [citationSource.id] : [],
11712
+ sourceSpanIds: citationSource ? [citationSource.id] : [],
11713
+ citations: beforeValue && citationSource ? [{
11714
+ sourceId: citationSource.id,
11715
+ quote: beforeValue,
11716
+ page: citationSource.page,
11717
+ fieldPath: citationSource.fieldPath
11718
+ }] : [],
11719
+ confidence: "low",
11720
+ confidenceScore: 0.45
11721
+ }],
11722
+ missingInfoQuestions: inferAfterValue(requestText, beforeValue) ? [] : [{
11723
+ fieldPath: inferFieldPath(requestText),
11724
+ question: "What new value should the carrier endorse for this change?",
11725
+ reason: "The request did not include a clear target value."
11726
+ }]
11727
+ };
11728
+ return result;
11729
+ }
11730
+ function inferChangeKind(fieldPath, requestText) {
11731
+ const lower = `${fieldPath} ${requestText}`.toLowerCase();
11732
+ if (lower.includes("additional insured")) return "additional_insured_change";
11733
+ if (lower.includes("named insured")) return "named_insured_change";
11734
+ if (lower.includes("limit")) return "limit_change";
11735
+ if (lower.includes("deductible")) return "deductible_change";
11736
+ if (lower.includes("location") || lower.includes("address")) return "location_change";
11737
+ if (lower.includes("vehicle") || lower.includes("auto")) return "vehicle_change";
11738
+ if (lower.includes("certificate") || lower.includes("holder")) return "certificate_endorsement_request";
11739
+ if (lower.includes("cancel")) return "cancellation";
11740
+ if (lower.includes("nonrenew")) return "nonrenewal";
11741
+ if (lower.includes("renewal") || lower.includes("submission")) return "renewal_submission_update";
11742
+ if (lower.includes("coverage")) return "coverage_change";
11743
+ return "general_endorsement";
11744
+ }
11745
+ function inferFieldPath(requestText) {
11746
+ const lower = requestText.toLowerCase();
11747
+ if (lower.includes("address")) return "insured.address";
11748
+ if (lower.includes("vehicle")) return "auto.vehicles";
11749
+ if (lower.includes("driver")) return "auto.drivers";
11750
+ if (lower.includes("limit")) return "coverage.limit";
11751
+ if (lower.includes("deductible")) return "coverage.deductible";
11752
+ return "policy.change";
11753
+ }
11754
+ function inferAfterValue(requestText, beforeValue) {
11755
+ const toMatch = requestText.match(/\bto\s+([^.;\n]+)/i)?.[1]?.trim();
11756
+ if (toMatch && toMatch !== beforeValue) return toMatch.replace(/^"|"$/g, "");
11757
+ const fromToMatch = requestText.match(/\bfrom\s+(.+?)\s+to\s+([^.;\n]+)/i)?.[2]?.trim();
11758
+ return fromToMatch?.replace(/^"|"$/g, "");
11759
+ }
11760
+ function heuristicParseAnswers(replyText, questions) {
11761
+ const unanswered = questions.filter((question) => !question.answer);
11762
+ if (unanswered.length !== 1 || !replyText.trim()) return [];
11763
+ return [{ questionId: unanswered[0].id, answer: replyText.trim() }];
11764
+ }
11765
+ function summarizeItems(items) {
11766
+ return items.map((item) => `${item.action} ${item.label}`).join("; ");
11767
+ }
11768
+ function buildPceSubmissionPacket(state, createdAt) {
11769
+ const citations = uniqueCitations(state.items.flatMap((item) => item.citations));
11770
+ const readyItems = state.items.filter((item) => item.status === "ready");
11771
+ const openQuestions = state.missingInfoQuestions.filter((question) => !question.answer);
11772
+ const artifacts = [
11773
+ {
11774
+ id: stableCaseId("artifact", [state.id, "underwriter_summary"]),
11775
+ kind: "underwriter_summary",
11776
+ title: "Underwriter summary",
11777
+ content: [
11778
+ state.summary,
11779
+ "",
11780
+ ...state.items.map((item) => `- ${item.action.toUpperCase()} ${item.label}: ${item.beforeValue ?? "(not cited)"} -> ${item.afterValue ?? "(pending)"}`),
11781
+ "",
11782
+ "Impact analysis:",
11783
+ ...state.impacts.map((impact) => `- ${impact.itemId}: endorsement=${impact.likelyEndorsementRequired ? "likely" : "not expected"}, carrierApproval=${impact.carrierApprovalLikelyRequired ? "likely" : "not expected"}`)
11784
+ ].join("\n"),
11785
+ citations
11786
+ },
11787
+ {
11788
+ id: stableCaseId("artifact", [state.id, "carrier_email"]),
11789
+ kind: "carrier_email",
11790
+ title: "Carrier email",
11791
+ content: [
11792
+ "Please process the following policy change endorsement request:",
11793
+ "",
11794
+ ...readyItems.map((item) => `- ${item.label}: ${item.afterValue ?? item.action}`)
11795
+ ].join("\n"),
11796
+ citations
11797
+ },
11798
+ {
11799
+ id: stableCaseId("artifact", [state.id, "missing_info_request"]),
11800
+ kind: "missing_info_request",
11801
+ title: "Missing information request",
11802
+ content: openQuestions.length ? openQuestions.map((question) => `- ${question.question}`).join("\n") : "No missing information questions are open.",
11803
+ citations: []
11804
+ },
11805
+ {
11806
+ id: stableCaseId("artifact", [state.id, "json_packet"]),
11807
+ kind: "json_packet",
11808
+ title: "JSON packet",
11809
+ content: JSON.stringify({ caseId: state.id, items: state.items, impacts: state.impacts, evidenceSourceIds: state.evidenceSources.map((source) => source.id) }, null, 2),
11810
+ citations
11811
+ },
11812
+ {
11813
+ id: stableCaseId("artifact", [state.id, "validation_report"]),
11814
+ kind: "validation_report",
11815
+ title: "Validation report",
11816
+ content: state.validationIssues.length ? state.validationIssues.map((issue) => `- [${issue.severity}] ${issue.code}: ${issue.message}`).join("\n") : "No validation issues.",
11817
+ citations: []
11818
+ }
11819
+ ];
11820
+ return {
11821
+ id: stableCaseId("packet", [state.id, state.updatedAt, state.items.map((item) => item.id)]),
11822
+ caseId: state.id,
11823
+ pceCase: state,
11824
+ artifacts,
11825
+ validationIssues: state.validationIssues,
11826
+ missingInfoQuestions: state.missingInfoQuestions,
11827
+ createdAt
11828
+ };
11829
+ }
11830
+ function uniqueCitations(citations) {
11831
+ const seen = /* @__PURE__ */ new Set();
11832
+ return citations.filter((citation) => {
11833
+ const key = `${citation.sourceId}:${citation.quote}:${citation.page ?? ""}:${citation.fieldPath ?? ""}`;
11834
+ if (seen.has(key)) return false;
11835
+ seen.add(key);
11836
+ return true;
11837
+ });
11838
+ }
11839
+
11840
+ // src/pce/quality.ts
11841
+ function buildPceQualityReport(state) {
11842
+ const blockingIssues = state.validationIssues.filter((issue) => issue.severity === "blocking").length;
11843
+ const warningIssues = state.validationIssues.filter((issue) => issue.severity === "warning").length;
11844
+ const missingInfoCount = state.missingInfoQuestions.filter((question) => !question.answer?.trim()).length;
11845
+ const ungroundedExistingValueCount = state.items.filter(
11846
+ (item) => item.beforeValue?.trim() && item.sourceSpanIds.length === 0
11847
+ ).length;
11848
+ const qualityGateStatus = blockingIssues > 0 || ungroundedExistingValueCount > 0 ? "failed" : warningIssues > 0 || missingInfoCount > 0 ? "warning" : "passed";
11849
+ return {
11850
+ qualityGateStatus,
11851
+ blockingIssues,
11852
+ warningIssues,
11853
+ missingInfoCount,
11854
+ ungroundedExistingValueCount
11855
+ };
11856
+ }
11857
+
9743
11858
  // src/prompts/intent.ts
9744
11859
  function buildClassifyMessagePrompt(platform) {
9745
11860
  const platformFields = {
@@ -9868,6 +11983,7 @@ export {
9868
11983
  AcroFormMappingSchema,
9869
11984
  AddressSchema,
9870
11985
  AdmittedStatusSchema,
11986
+ AgenticExecutionModeSchema,
9871
11987
  AnswerParsingResultSchema,
9872
11988
  ApplicationClassifyResultSchema,
9873
11989
  ApplicationEmailReviewSchema,
@@ -9894,6 +12010,15 @@ export {
9894
12010
  COVERAGE_COMPARISON_TOOL,
9895
12011
  COVERAGE_FORMS,
9896
12012
  COVERAGE_TRIGGERS,
12013
+ CaseActionSchema,
12014
+ CaseCitationSchema,
12015
+ CaseEvidenceSourceSchema,
12016
+ CasePacketArtifactKindSchema,
12017
+ CasePacketArtifactSchema,
12018
+ CaseProposalSchema,
12019
+ CaseProposalScoreSchema,
12020
+ CaseSubmissionPacketSchema,
12021
+ CaseValidationIssueSchema,
9897
12022
  ChunkTypeSchema,
9898
12023
  CitationSchema,
9899
12024
  ClaimRecordSchema,
@@ -9975,6 +12100,8 @@ export {
9975
12100
  LookupRequestSchema,
9976
12101
  LossSettlementSchema,
9977
12102
  LossSummarySchema,
12103
+ MemorySourceStore,
12104
+ MissingInfoQuestionSchema,
9978
12105
  NamedInsuredSchema,
9979
12106
  PERSONAL_AUTO_USAGES,
9980
12107
  PET_SPECIES,
@@ -9985,6 +12112,9 @@ export {
9985
12112
  ParsedAnswerSchema,
9986
12113
  PaymentInstallmentSchema,
9987
12114
  PaymentPlanSchema,
12115
+ PceCaseStateSchema,
12116
+ PceNormalizationResultSchema,
12117
+ PceSubmissionPacketSchema,
9988
12118
  PersonalArticlesDeclarationsSchema,
9989
12119
  PersonalAutoDeclarationsSchema,
9990
12120
  PersonalAutoUsageSchema,
@@ -9993,6 +12123,13 @@ export {
9993
12123
  PetDeclarationsSchema,
9994
12124
  PetSpeciesSchema,
9995
12125
  PlatformSchema,
12126
+ PolicyChangeActionSchema,
12127
+ PolicyChangeConfidenceSchema,
12128
+ PolicyChangeImpactSchema,
12129
+ PolicyChangeItemSchema,
12130
+ PolicyChangeKindSchema,
12131
+ PolicyChangeRequestSchema,
12132
+ PolicyChangeStatusSchema,
9996
12133
  PolicyConditionSchema,
9997
12134
  PolicyDocumentSchema,
9998
12135
  PolicySectionTypeSchema,
@@ -10007,6 +12144,7 @@ export {
10007
12144
  QueryClassifyResultSchema,
10008
12145
  QueryIntentSchema,
10009
12146
  QueryResultSchema,
12147
+ QueryRetrievalModeSchema,
10010
12148
  QuestionBatchResultSchema,
10011
12149
  QuoteDocumentSchema,
10012
12150
  QuoteSectionTypeSchema,
@@ -10025,6 +12163,13 @@ export {
10025
12163
  ScheduledItemCategorySchema,
10026
12164
  SectionSchema,
10027
12165
  SharedLimitSchema,
12166
+ SourceChunkSchema,
12167
+ SourceKindSchema,
12168
+ SourceSpanBBoxSchema,
12169
+ SourceSpanKindSchema,
12170
+ SourceSpanLocationSchema,
12171
+ SourceSpanRefSchema,
12172
+ SourceSpanSchema,
10028
12173
  SubAnswerSchema,
10029
12174
  SubQuestionSchema,
10030
12175
  SubjectivityCategorySchema,
@@ -10040,6 +12185,7 @@ export {
10040
12185
  UnderwritingConditionSchema,
10041
12186
  VALUATION_METHODS,
10042
12187
  VEHICLE_COVERAGE_TYPES,
12188
+ ValidationIssueSeveritySchema,
10043
12189
  ValuationMethodSchema,
10044
12190
  VehicleCoverageSchema,
10045
12191
  VehicleCoverageTypeSchema,
@@ -10064,6 +12210,11 @@ export {
10064
12210
  buildIntentPrompt,
10065
12211
  buildInterpretAttachmentPrompt,
10066
12212
  buildLookupFillPrompt,
12213
+ buildPageSourceSpans,
12214
+ buildPceNormalizePrompt,
12215
+ buildPceQualityReport,
12216
+ buildPceReplyPrompt,
12217
+ buildPceSubmissionPacket,
10067
12218
  buildPdfProviderOptions,
10068
12219
  buildQueryClassifyPrompt,
10069
12220
  buildQuestionBatchPrompt,
@@ -10072,28 +12223,54 @@ export {
10072
12223
  buildReplyIntentClassificationPrompt,
10073
12224
  buildRespondPrompt,
10074
12225
  buildSafetyPrompt,
12226
+ buildSectionSourceSpans,
12227
+ buildSourceSpan,
12228
+ buildSourceSpanId,
12229
+ buildTextSourceSpans,
10075
12230
  buildVerifyPrompt,
10076
12231
  chunkDocument,
12232
+ chunkSourceSpans,
12233
+ collectPceEvidenceSources,
12234
+ compareSourceEvidence,
10077
12235
  createApplicationPipeline,
10078
12236
  createExtractor,
12237
+ createPceAgent,
10079
12238
  createPipelineContext,
10080
12239
  createQueryAgent,
12240
+ evaluateCaseProposals,
12241
+ evidenceContainsQuote,
10081
12242
  extractPageRange,
10082
12243
  fillAcroForm,
12244
+ generateNextMessage,
10083
12245
  getAcroFormFields,
10084
12246
  getExtractor,
10085
12247
  getFileIdentifier,
10086
12248
  getPdfPageCount,
10087
12249
  getTemplate,
10088
12250
  isFileReference,
12251
+ mergeQuestionAnswers,
12252
+ normalizeForMatch,
12253
+ orderSourceEvidence,
10089
12254
  overlayTextOnPdf,
10090
12255
  pLimit,
10091
12256
  pdfInputToBase64,
10092
12257
  pdfInputToBytes,
12258
+ processReply,
12259
+ resolveModelBudget,
10093
12260
  safeGenerateObject,
10094
12261
  sanitizeNulls,
12262
+ scoreCaseProposal,
12263
+ selectPceExecutionMode,
12264
+ sourceSpanTextHash,
12265
+ stableCaseId,
12266
+ stableHash2 as stableHash,
12267
+ stablePolicyChangeItemId,
12268
+ stableStringify,
10095
12269
  stripFences,
10096
12270
  toStrictSchema,
12271
+ validateEvidence,
12272
+ validatePceItems,
12273
+ validateQuotedEvidence,
10097
12274
  withRetry
10098
12275
  };
10099
12276
  //# sourceMappingURL=index.mjs.map