@claritylabs/cl-sdk 0.3.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +495 -107
- package/dist/index.d.mts +24371 -3960
- package/dist/index.d.ts +24371 -3960
- package/dist/index.js +1647 -127
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1616 -127
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +9261 -1260
- package/dist/storage-sqlite.d.ts +9261 -1260
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1471,6 +1471,206 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1471
1471
|
};
|
|
1472
1472
|
}
|
|
1473
1473
|
|
|
1474
|
+
// src/prompts/coordinator/format.ts
|
|
1475
|
+
function buildFormatPrompt(entries) {
|
|
1476
|
+
const block = entries.map((e) => `===ENTRY ${e.id}===
|
|
1477
|
+
${e.text}`).join("\n\n");
|
|
1478
|
+
return `You are a markdown formatting specialist for insurance document content. You will receive numbered content entries extracted from insurance policies, quotes, and endorsements. Your job is to clean up the formatting so every entry renders correctly as standard markdown.
|
|
1479
|
+
|
|
1480
|
+
## Primary issues to fix
|
|
1481
|
+
|
|
1482
|
+
### 1. Pipe-delimited data missing table syntax
|
|
1483
|
+
The most common issue. Content uses pipe characters as column separators but is missing the separator row required for markdown table rendering.
|
|
1484
|
+
|
|
1485
|
+
Before (broken \u2014 won't render as a table):
|
|
1486
|
+
COVERAGE | FORM # | LIMIT | DEDUCTIBLE
|
|
1487
|
+
Employee Theft | | $10,000 | $1,000
|
|
1488
|
+
|
|
1489
|
+
After (valid markdown table):
|
|
1490
|
+
| COVERAGE | FORM # | LIMIT | DEDUCTIBLE |
|
|
1491
|
+
| --- | --- | --- | --- |
|
|
1492
|
+
| Employee Theft | | $10,000 | $1,000 |
|
|
1493
|
+
|
|
1494
|
+
Rules for pipe tables:
|
|
1495
|
+
- Add leading and trailing pipes to every row
|
|
1496
|
+
- Add the separator row (| --- | --- |) after the header row
|
|
1497
|
+
- Every row must have the same number of pipe-separated columns as the header
|
|
1498
|
+
- Empty cells are fine \u2014 just keep the pipes: | | $10,000 |
|
|
1499
|
+
|
|
1500
|
+
### 2. Sub-items indented within pipe tables
|
|
1501
|
+
Insurance schedules often have indented sub-items that belong to the previous coverage line. These break table column counts.
|
|
1502
|
+
|
|
1503
|
+
Before (broken):
|
|
1504
|
+
COVERAGE | LIMIT | DEDUCTIBLE
|
|
1505
|
+
Causes Of Loss - Equipment Breakdown | PR650END
|
|
1506
|
+
Described Premises Limit | | $350,804 |
|
|
1507
|
+
Diagnostic Equipment | | $100,000 |
|
|
1508
|
+
Deductible Type - Business Income: Waiting Period - Hours
|
|
1509
|
+
Waiting Period (Hours): 24
|
|
1510
|
+
|
|
1511
|
+
After: Pull sub-items out of the table. End the table before the sub-items, show them as an indented list, then start a new table if tabular data resumes:
|
|
1512
|
+
| COVERAGE | LIMIT | DEDUCTIBLE |
|
|
1513
|
+
| --- | --- | --- |
|
|
1514
|
+
| Causes Of Loss - Equipment Breakdown | PR650END | |
|
|
1515
|
+
|
|
1516
|
+
- Described Premises Limit: $350,804
|
|
1517
|
+
- Diagnostic Equipment: $100,000
|
|
1518
|
+
- Deductible Type - Business Income: Waiting Period - Hours
|
|
1519
|
+
- Waiting Period (Hours): 24
|
|
1520
|
+
|
|
1521
|
+
### 3. Space-aligned tables
|
|
1522
|
+
Declarations often align columns with spaces instead of pipes. These render as plain monospace text and lose structure.
|
|
1523
|
+
|
|
1524
|
+
Before:
|
|
1525
|
+
Coverage Limit of Liability Retention
|
|
1526
|
+
A. Network Security Liability $500,000 $10,000
|
|
1527
|
+
B. Privacy Liability $500,000 $10,000
|
|
1528
|
+
|
|
1529
|
+
After (convert to proper markdown table):
|
|
1530
|
+
| Coverage | Limit of Liability | Retention |
|
|
1531
|
+
| --- | --- | --- |
|
|
1532
|
+
| A. Network Security Liability | $500,000 | $10,000 |
|
|
1533
|
+
| B. Privacy Liability | $500,000 | $10,000 |
|
|
1534
|
+
|
|
1535
|
+
### 4. Mixed table/prose content
|
|
1536
|
+
A single entry often contains prose paragraphs followed by tabular data followed by more prose. Handle each segment independently \u2014 don't try to force everything into one table.
|
|
1537
|
+
|
|
1538
|
+
### 5. General markdown cleanup
|
|
1539
|
+
- **Line spacing**: Remove excessive blank lines (3+ consecutive newlines \u2192 2). Ensure one blank line before and after tables and headings.
|
|
1540
|
+
- **Trailing whitespace**: Remove trailing spaces on all lines.
|
|
1541
|
+
- **Broken lists**: Ensure list items use consistent markers (-, *, or 1.) with proper nesting indentation.
|
|
1542
|
+
- **Orphaned formatting**: Close any unclosed bold (**), italic (*), or code (\`) markers.
|
|
1543
|
+
- **Heading levels**: Ensure heading markers (##) have a space after the hashes.
|
|
1544
|
+
|
|
1545
|
+
## Rules
|
|
1546
|
+
- Do NOT change the meaning or substance of any content. Only fix formatting.
|
|
1547
|
+
- Do NOT add new information, headers, or commentary.
|
|
1548
|
+
- Do NOT wrap entries in code fences.
|
|
1549
|
+
- Preserve all dollar amounts, dates, policy numbers, form numbers, and technical terms exactly as they appear.
|
|
1550
|
+
- If an entry is already well-formatted, return it unchanged.
|
|
1551
|
+
- When in doubt about whether something is a table, prefer table formatting for structured data with multiple columns.
|
|
1552
|
+
|
|
1553
|
+
Return your output in this exact format \u2014 one block per entry, in the same order:
|
|
1554
|
+
|
|
1555
|
+
===ENTRY 0===
|
|
1556
|
+
(cleaned content for entry 0)
|
|
1557
|
+
|
|
1558
|
+
===ENTRY 1===
|
|
1559
|
+
(cleaned content for entry 1)
|
|
1560
|
+
|
|
1561
|
+
...and so on for each entry.
|
|
1562
|
+
|
|
1563
|
+
Here are the entries to format:
|
|
1564
|
+
|
|
1565
|
+
${block}`;
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
// src/extraction/formatter.ts
|
|
1569
|
+
function collectContentFields(doc) {
|
|
1570
|
+
const entries = [];
|
|
1571
|
+
let id = 0;
|
|
1572
|
+
function add(path, text) {
|
|
1573
|
+
if (text && text.length > 20) {
|
|
1574
|
+
entries.push({ id: id++, path, text });
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
add("summary", doc.summary);
|
|
1578
|
+
if (doc.sections) {
|
|
1579
|
+
for (let i = 0; i < doc.sections.length; i++) {
|
|
1580
|
+
const s = doc.sections[i];
|
|
1581
|
+
add(`sections[${i}].content`, s.content);
|
|
1582
|
+
if (s.subsections) {
|
|
1583
|
+
for (let j = 0; j < s.subsections.length; j++) {
|
|
1584
|
+
add(`sections[${i}].subsections[${j}].content`, s.subsections[j].content);
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
if (doc.endorsements) {
|
|
1590
|
+
for (let i = 0; i < doc.endorsements.length; i++) {
|
|
1591
|
+
add(`endorsements[${i}].content`, doc.endorsements[i].content);
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1594
|
+
if (doc.exclusions) {
|
|
1595
|
+
for (let i = 0; i < doc.exclusions.length; i++) {
|
|
1596
|
+
add(`exclusions[${i}].content`, doc.exclusions[i].content);
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
if (doc.conditions) {
|
|
1600
|
+
for (let i = 0; i < doc.conditions.length; i++) {
|
|
1601
|
+
add(`conditions[${i}].content`, doc.conditions[i].content);
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
return entries;
|
|
1605
|
+
}
|
|
1606
|
+
function parseFormatResponse(response) {
|
|
1607
|
+
const results = /* @__PURE__ */ new Map();
|
|
1608
|
+
const parts = response.split(/===ENTRY (\d+)===/);
|
|
1609
|
+
for (let i = 1; i < parts.length; i += 2) {
|
|
1610
|
+
const entryId = parseInt(parts[i], 10);
|
|
1611
|
+
const content = parts[i + 1]?.trim();
|
|
1612
|
+
if (!isNaN(entryId) && content !== void 0) {
|
|
1613
|
+
results.set(entryId, content);
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
return results;
|
|
1617
|
+
}
|
|
1618
|
+
function applyFormattedContent(doc, entries, formatted) {
|
|
1619
|
+
for (const entry of entries) {
|
|
1620
|
+
const cleaned = formatted.get(entry.id);
|
|
1621
|
+
if (!cleaned) continue;
|
|
1622
|
+
const segments = entry.path.match(/^(\w+)(?:\[(\d+)\])?(?:\.(\w+)(?:\[(\d+)\])?(?:\.(\w+))?)?$/);
|
|
1623
|
+
if (!segments) continue;
|
|
1624
|
+
const [, field, idx1, sub1, idx2, sub2] = segments;
|
|
1625
|
+
if (!sub1) {
|
|
1626
|
+
doc[field] = cleaned;
|
|
1627
|
+
} else if (!sub2) {
|
|
1628
|
+
const arr = doc[field];
|
|
1629
|
+
if (arr && arr[Number(idx1)]) {
|
|
1630
|
+
arr[Number(idx1)][sub1] = cleaned;
|
|
1631
|
+
}
|
|
1632
|
+
} else {
|
|
1633
|
+
const arr = doc[field];
|
|
1634
|
+
if (arr && arr[Number(idx1)]) {
|
|
1635
|
+
const nested = arr[Number(idx1)][sub1];
|
|
1636
|
+
if (nested && nested[Number(idx2)]) {
|
|
1637
|
+
nested[Number(idx2)][sub2] = cleaned;
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
var MAX_ENTRIES_PER_BATCH = 20;
|
|
1644
|
+
async function formatDocumentContent(doc, generateText, options) {
|
|
1645
|
+
const entries = collectContentFields(doc);
|
|
1646
|
+
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
1647
|
+
if (entries.length === 0) {
|
|
1648
|
+
return { document: doc, usage: totalUsage };
|
|
1649
|
+
}
|
|
1650
|
+
options?.onProgress?.(`Formatting ${entries.length} content fields...`);
|
|
1651
|
+
const batches = [];
|
|
1652
|
+
for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
|
|
1653
|
+
batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
|
|
1654
|
+
}
|
|
1655
|
+
for (const batch of batches) {
|
|
1656
|
+
const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
|
|
1657
|
+
const result = await withRetry(
|
|
1658
|
+
() => generateText({
|
|
1659
|
+
prompt,
|
|
1660
|
+
maxTokens: 16384,
|
|
1661
|
+
providerOptions: options?.providerOptions
|
|
1662
|
+
})
|
|
1663
|
+
);
|
|
1664
|
+
if (result.usage) {
|
|
1665
|
+
totalUsage.inputTokens += result.usage.inputTokens;
|
|
1666
|
+
totalUsage.outputTokens += result.usage.outputTokens;
|
|
1667
|
+
}
|
|
1668
|
+
const formatted = parseFormatResponse(result.text);
|
|
1669
|
+
applyFormattedContent(doc, batch, formatted);
|
|
1670
|
+
}
|
|
1671
|
+
return { document: doc, usage: totalUsage };
|
|
1672
|
+
}
|
|
1673
|
+
|
|
1474
1674
|
// src/extraction/chunking.ts
|
|
1475
1675
|
function chunkDocument(doc) {
|
|
1476
1676
|
const chunks = [];
|
|
@@ -2966,8 +3166,14 @@ function createExtractor(config) {
|
|
|
2966
3166
|
}
|
|
2967
3167
|
onProgress?.("Assembling document...");
|
|
2968
3168
|
const document = assembleDocument(id, documentType, memory);
|
|
2969
|
-
|
|
2970
|
-
|
|
3169
|
+
onProgress?.("Formatting extracted content...");
|
|
3170
|
+
const formatResult = await formatDocumentContent(document, generateText, {
|
|
3171
|
+
providerOptions,
|
|
3172
|
+
onProgress
|
|
3173
|
+
});
|
|
3174
|
+
trackUsage(formatResult.usage);
|
|
3175
|
+
const chunks = chunkDocument(formatResult.document);
|
|
3176
|
+
return { document: formatResult.document, chunks, tokenUsage: totalUsage };
|
|
2971
3177
|
}
|
|
2972
3178
|
return { extract };
|
|
2973
3179
|
}
|
|
@@ -3185,6 +3391,129 @@ Respond with JSON only:
|
|
|
3185
3391
|
"applicationType": string | null // e.g. "General Liability", "Professional Liability", "Commercial Property", "Workers Compensation", "ACORD 125", etc.
|
|
3186
3392
|
}`;
|
|
3187
3393
|
|
|
3394
|
+
// src/schemas/application.ts
|
|
3395
|
+
import { z as z31 } from "zod";
|
|
3396
|
+
var FieldTypeSchema = z31.enum([
|
|
3397
|
+
"text",
|
|
3398
|
+
"numeric",
|
|
3399
|
+
"currency",
|
|
3400
|
+
"date",
|
|
3401
|
+
"yes_no",
|
|
3402
|
+
"table",
|
|
3403
|
+
"declaration"
|
|
3404
|
+
]);
|
|
3405
|
+
var ApplicationFieldSchema = z31.object({
|
|
3406
|
+
id: z31.string(),
|
|
3407
|
+
label: z31.string(),
|
|
3408
|
+
section: z31.string(),
|
|
3409
|
+
fieldType: FieldTypeSchema,
|
|
3410
|
+
required: z31.boolean(),
|
|
3411
|
+
options: z31.array(z31.string()).optional(),
|
|
3412
|
+
columns: z31.array(z31.string()).optional(),
|
|
3413
|
+
requiresExplanationIfYes: z31.boolean().optional(),
|
|
3414
|
+
condition: z31.object({
|
|
3415
|
+
dependsOn: z31.string(),
|
|
3416
|
+
whenValue: z31.string()
|
|
3417
|
+
}).optional(),
|
|
3418
|
+
value: z31.string().optional(),
|
|
3419
|
+
source: z31.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
3420
|
+
confidence: z31.enum(["confirmed", "high", "medium", "low"]).optional()
|
|
3421
|
+
});
|
|
3422
|
+
var ApplicationClassifyResultSchema = z31.object({
|
|
3423
|
+
isApplication: z31.boolean(),
|
|
3424
|
+
confidence: z31.number().min(0).max(1),
|
|
3425
|
+
applicationType: z31.string().nullable()
|
|
3426
|
+
});
|
|
3427
|
+
var FieldExtractionResultSchema = z31.object({
|
|
3428
|
+
fields: z31.array(ApplicationFieldSchema)
|
|
3429
|
+
});
|
|
3430
|
+
var AutoFillMatchSchema = z31.object({
|
|
3431
|
+
fieldId: z31.string(),
|
|
3432
|
+
value: z31.string(),
|
|
3433
|
+
confidence: z31.enum(["confirmed"]),
|
|
3434
|
+
contextKey: z31.string()
|
|
3435
|
+
});
|
|
3436
|
+
var AutoFillResultSchema = z31.object({
|
|
3437
|
+
matches: z31.array(AutoFillMatchSchema)
|
|
3438
|
+
});
|
|
3439
|
+
var QuestionBatchResultSchema = z31.object({
|
|
3440
|
+
batches: z31.array(z31.array(z31.string()).describe("Array of field IDs in this batch"))
|
|
3441
|
+
});
|
|
3442
|
+
var LookupRequestSchema = z31.object({
|
|
3443
|
+
type: z31.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
3444
|
+
description: z31.string(),
|
|
3445
|
+
url: z31.string().optional(),
|
|
3446
|
+
targetFieldIds: z31.array(z31.string())
|
|
3447
|
+
});
|
|
3448
|
+
var ReplyIntentSchema = z31.object({
|
|
3449
|
+
primaryIntent: z31.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
3450
|
+
hasAnswers: z31.boolean(),
|
|
3451
|
+
questionText: z31.string().optional(),
|
|
3452
|
+
questionFieldIds: z31.array(z31.string()).optional(),
|
|
3453
|
+
lookupRequests: z31.array(LookupRequestSchema).optional()
|
|
3454
|
+
});
|
|
3455
|
+
var ParsedAnswerSchema = z31.object({
|
|
3456
|
+
fieldId: z31.string(),
|
|
3457
|
+
value: z31.string(),
|
|
3458
|
+
explanation: z31.string().optional()
|
|
3459
|
+
});
|
|
3460
|
+
var AnswerParsingResultSchema = z31.object({
|
|
3461
|
+
answers: z31.array(ParsedAnswerSchema),
|
|
3462
|
+
unanswered: z31.array(z31.string()).describe("Field IDs that were not answered")
|
|
3463
|
+
});
|
|
3464
|
+
var LookupFillSchema = z31.object({
|
|
3465
|
+
fieldId: z31.string(),
|
|
3466
|
+
value: z31.string(),
|
|
3467
|
+
source: z31.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
|
|
3468
|
+
});
|
|
3469
|
+
var LookupFillResultSchema = z31.object({
|
|
3470
|
+
fills: z31.array(LookupFillSchema),
|
|
3471
|
+
unfillable: z31.array(z31.string()),
|
|
3472
|
+
explanation: z31.string().optional()
|
|
3473
|
+
});
|
|
3474
|
+
var FlatPdfPlacementSchema = z31.object({
|
|
3475
|
+
fieldId: z31.string(),
|
|
3476
|
+
page: z31.number(),
|
|
3477
|
+
x: z31.number().describe("Percentage from left edge (0-100)"),
|
|
3478
|
+
y: z31.number().describe("Percentage from top edge (0-100)"),
|
|
3479
|
+
text: z31.string(),
|
|
3480
|
+
fontSize: z31.number().optional(),
|
|
3481
|
+
isCheckmark: z31.boolean().optional()
|
|
3482
|
+
});
|
|
3483
|
+
var AcroFormMappingSchema = z31.object({
|
|
3484
|
+
fieldId: z31.string(),
|
|
3485
|
+
acroFormName: z31.string(),
|
|
3486
|
+
value: z31.string()
|
|
3487
|
+
});
|
|
3488
|
+
var ApplicationStateSchema = z31.object({
|
|
3489
|
+
id: z31.string(),
|
|
3490
|
+
pdfBase64: z31.string().optional().describe("Original PDF, omitted after extraction"),
|
|
3491
|
+
title: z31.string().optional(),
|
|
3492
|
+
applicationType: z31.string().nullable().optional(),
|
|
3493
|
+
fields: z31.array(ApplicationFieldSchema),
|
|
3494
|
+
batches: z31.array(z31.array(z31.string())).optional(),
|
|
3495
|
+
currentBatchIndex: z31.number().default(0),
|
|
3496
|
+
status: z31.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
3497
|
+
createdAt: z31.number(),
|
|
3498
|
+
updatedAt: z31.number()
|
|
3499
|
+
});
|
|
3500
|
+
|
|
3501
|
+
// src/application/agents/classifier.ts
|
|
3502
|
+
async function classifyApplication(pdfContent, generateObject, providerOptions) {
|
|
3503
|
+
const { object, usage } = await withRetry(
|
|
3504
|
+
() => generateObject({
|
|
3505
|
+
prompt: `${APPLICATION_CLASSIFY_PROMPT}
|
|
3506
|
+
|
|
3507
|
+
Analyze the following document content:
|
|
3508
|
+
${pdfContent}`,
|
|
3509
|
+
schema: ApplicationClassifyResultSchema,
|
|
3510
|
+
maxTokens: 512,
|
|
3511
|
+
providerOptions
|
|
3512
|
+
})
|
|
3513
|
+
);
|
|
3514
|
+
return { result: object, usage };
|
|
3515
|
+
}
|
|
3516
|
+
|
|
3188
3517
|
// src/prompts/application/field-extraction.ts
|
|
3189
3518
|
function buildFieldExtractionPrompt() {
|
|
3190
3519
|
return `Extract all fillable fields from this insurance application PDF as a JSON array. Be concise \u2014 use short IDs and minimal keys.
|
|
@@ -3217,6 +3546,24 @@ Example:
|
|
|
3217
3546
|
Extract ALL fields. Respond with ONLY the JSON array, no other text.`;
|
|
3218
3547
|
}
|
|
3219
3548
|
|
|
3549
|
+
// src/application/agents/field-extractor.ts
|
|
3550
|
+
async function extractFields(pdfContent, generateObject, providerOptions) {
|
|
3551
|
+
const prompt = `${buildFieldExtractionPrompt()}
|
|
3552
|
+
|
|
3553
|
+
Extract fields from this application:
|
|
3554
|
+
${pdfContent}`;
|
|
3555
|
+
const { object, usage } = await withRetry(
|
|
3556
|
+
() => generateObject({
|
|
3557
|
+
prompt,
|
|
3558
|
+
schema: FieldExtractionResultSchema,
|
|
3559
|
+
maxTokens: 8192,
|
|
3560
|
+
providerOptions
|
|
3561
|
+
})
|
|
3562
|
+
);
|
|
3563
|
+
const result = object;
|
|
3564
|
+
return { fields: result.fields, usage };
|
|
3565
|
+
}
|
|
3566
|
+
|
|
3220
3567
|
// src/prompts/application/auto-fill.ts
|
|
3221
3568
|
function buildAutoFillPrompt(fields, orgContext) {
|
|
3222
3569
|
const fieldList = fields.map((f) => `- ${f.id}: "${f.label}" (${f.fieldType}, section: ${f.section})`).join("\n");
|
|
@@ -3246,6 +3593,39 @@ Respond with JSON only:
|
|
|
3246
3593
|
Only include fields you can confidently fill. Do not guess or fabricate values.`;
|
|
3247
3594
|
}
|
|
3248
3595
|
|
|
3596
|
+
// src/application/agents/auto-filler.ts
|
|
3597
|
+
async function autoFillFromContext(fields, orgContext, generateObject, providerOptions) {
|
|
3598
|
+
const fieldSummaries = fields.map((f) => ({
|
|
3599
|
+
id: f.id,
|
|
3600
|
+
label: f.label,
|
|
3601
|
+
fieldType: f.fieldType,
|
|
3602
|
+
section: f.section
|
|
3603
|
+
}));
|
|
3604
|
+
const prompt = buildAutoFillPrompt(fieldSummaries, orgContext);
|
|
3605
|
+
const { object, usage } = await withRetry(
|
|
3606
|
+
() => generateObject({
|
|
3607
|
+
prompt,
|
|
3608
|
+
schema: AutoFillResultSchema,
|
|
3609
|
+
maxTokens: 4096,
|
|
3610
|
+
providerOptions
|
|
3611
|
+
})
|
|
3612
|
+
);
|
|
3613
|
+
return { result: object, usage };
|
|
3614
|
+
}
|
|
3615
|
+
async function backfillFromPriorAnswers(fields, backfillProvider) {
|
|
3616
|
+
const unfilled = fields.filter((f) => !f.value);
|
|
3617
|
+
if (unfilled.length === 0) return [];
|
|
3618
|
+
return backfillProvider.searchPriorAnswers(
|
|
3619
|
+
unfilled.map((f) => ({
|
|
3620
|
+
id: f.id,
|
|
3621
|
+
label: f.label,
|
|
3622
|
+
section: f.section,
|
|
3623
|
+
fieldType: f.fieldType
|
|
3624
|
+
})),
|
|
3625
|
+
{ limit: unfilled.length * 2 }
|
|
3626
|
+
);
|
|
3627
|
+
}
|
|
3628
|
+
|
|
3249
3629
|
// src/prompts/application/question-batch.ts
|
|
3250
3630
|
function buildQuestionBatchPrompt(unfilledFields) {
|
|
3251
3631
|
const fieldList = unfilledFields.map(
|
|
@@ -3280,120 +3660,27 @@ Respond with JSON only:
|
|
|
3280
3660
|
}`;
|
|
3281
3661
|
}
|
|
3282
3662
|
|
|
3283
|
-
// src/
|
|
3284
|
-
function
|
|
3285
|
-
const
|
|
3286
|
-
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
{
|
|
3305
|
-
"answers": [
|
|
3306
|
-
{
|
|
3307
|
-
"fieldId": "company_name",
|
|
3308
|
-
"value": "Acme Corp"
|
|
3309
|
-
},
|
|
3310
|
-
{
|
|
3311
|
-
"fieldId": "prior_claims_decl",
|
|
3312
|
-
"value": "yes",
|
|
3313
|
-
"explanation": "One claim in 2024 for water damage, $15,000 paid"
|
|
3314
|
-
}
|
|
3315
|
-
],
|
|
3316
|
-
"unanswered": ["field_id_that_was_not_answered"]
|
|
3317
|
-
}
|
|
3318
|
-
|
|
3319
|
-
Only include answers you are confident about. If a response is ambiguous, include the field in "unanswered".`;
|
|
3320
|
-
}
|
|
3321
|
-
|
|
3322
|
-
// src/prompts/application/confirmation.ts
|
|
3323
|
-
function buildConfirmationSummaryPrompt(fields, applicationTitle) {
|
|
3324
|
-
const fieldList = fields.map((f) => {
|
|
3325
|
-
const label = f.label ?? f.text ?? f.id;
|
|
3326
|
-
const value = f.value ?? "(not provided)";
|
|
3327
|
-
return `[${f.section}] ${label}: ${value}`;
|
|
3328
|
-
}).join("\n");
|
|
3329
|
-
return `Format the following insurance application answers into a clean, readable summary grouped by section. This will be sent as an email for the user to review and confirm.
|
|
3330
|
-
|
|
3331
|
-
APPLICATION: ${applicationTitle}
|
|
3332
|
-
|
|
3333
|
-
FIELD VALUES:
|
|
3334
|
-
${fieldList}
|
|
3335
|
-
|
|
3336
|
-
Format as a readable summary:
|
|
3337
|
-
- Group by section with section headers
|
|
3338
|
-
- Show each field as "Label: Value"
|
|
3339
|
-
- For declarations, show the question and the yes/no answer plus any explanation
|
|
3340
|
-
- Skip fields with no value unless they are required
|
|
3341
|
-
- End with a note asking the user to reply "Looks good" to confirm, or describe any changes needed
|
|
3342
|
-
|
|
3343
|
-
Respond with the formatted summary text only (no JSON wrapper). Use markdown formatting (bold headers, bullet points).`;
|
|
3344
|
-
}
|
|
3345
|
-
|
|
3346
|
-
// src/prompts/application/batch-email.ts
|
|
3347
|
-
function buildBatchEmailGenerationPrompt(batchFields, batchIndex, totalBatches, appTitle, totalFieldCount, filledFieldCount, previousBatchSummary, companyName) {
|
|
3348
|
-
const nonConditionalFields = batchFields.filter((f) => !f.condition);
|
|
3349
|
-
const conditionalFields = batchFields.filter((f) => f.condition);
|
|
3350
|
-
const fieldList = nonConditionalFields.map((f, i) => {
|
|
3351
|
-
let line = `${i + 1}. id="${f.id}" label="${f.label}" type=${f.fieldType}`;
|
|
3352
|
-
if (f.options) line += ` options=[${f.options.join(", ")}]`;
|
|
3353
|
-
return line;
|
|
3354
|
-
}).join("\n");
|
|
3355
|
-
const conditionalNote = conditionalFields.length > 0 ? `
|
|
3356
|
-
|
|
3357
|
-
CONDITIONAL FIELDS (DO NOT include in this email \u2014 they will be asked as follow-ups in a separate email after the parent is answered):
|
|
3358
|
-
${conditionalFields.map((f) => `- id="${f.id}" label="${f.label}" depends on ${f.condition.dependsOn} = "${f.condition.whenValue}"`).join("\n")}` : "";
|
|
3359
|
-
const company = companyName ?? "the company";
|
|
3360
|
-
const remainingFields = totalFieldCount - filledFieldCount;
|
|
3361
|
-
const estMinutes = Math.max(1, Math.round(remainingFields * 0.5));
|
|
3362
|
-
return `You are an internal risk management assistant helping your colleague fill out an insurance application for ${company}. You work FOR ${company} \u2014 you are NOT the insurer, broker, or any external party.
|
|
3363
|
-
|
|
3364
|
-
APPLICATION: ${appTitle ?? "Insurance Application"}
|
|
3365
|
-
COMPANY: ${company}
|
|
3366
|
-
PROGRESS: ${filledFieldCount} of ${totalFieldCount} fields done, ~${remainingFields} remaining (~${estMinutes} min of questions left)
|
|
3367
|
-
${previousBatchSummary ? `
|
|
3368
|
-
PREVIOUS ANSWERS RECEIVED:
|
|
3369
|
-
${previousBatchSummary}
|
|
3370
|
-
` : ""}
|
|
3371
|
-
FIELDS TO ASK ABOUT:
|
|
3372
|
-
${fieldList}${conditionalNote}
|
|
3373
|
-
|
|
3374
|
-
Rules:
|
|
3375
|
-
- ${previousBatchSummary ? 'Start by acknowledging previous answers or auto-filled data. If fields were auto-filled, list each field with its value AND cite the specific source (e.g. "from your GL Policy #ABC123", "from vercel.com", "from your business context"). If a web lookup was done, name the URL that was checked. Ask them to reply with corrections if anything is wrong.' : "Start with a one-line intro."}
|
|
3376
|
-
- Mention progress once using estimated time remaining. Don't mention section/batch numbers or field counts.
|
|
3377
|
-
- Use "${company}" by name when referring to the company. Also fine: "we" or "our". Never "our company" or "the company".
|
|
3378
|
-
- Ask questions plainly. No em-dashes for dramatic effect, no filler phrases like "need to nail down" or "let's dive into". Just ask.
|
|
3379
|
-
- For yes/no questions, ask naturally in one sentence. Don't list "Yes / No" as options. Mention what you'll need if the answer triggers a follow-up (e.g. "If not, I'll need a brief explanation.").
|
|
3380
|
-
- For fields with 2-3 options, mention them inline. 4+ options can be a short list.
|
|
3381
|
-
- Group related fields (address, coverage limits) into single compound questions.
|
|
3382
|
-
- Do NOT include conditional/follow-up fields. They will be sent separately.
|
|
3383
|
-
- Number each question.
|
|
3384
|
-
- Note expected format where relevant: dollar amounts for currency, MM/DD/YYYY for dates, column descriptions for tables.
|
|
3385
|
-
- End with a short closing.
|
|
3386
|
-
- Tone: professional, brief, matter-of-fact. Write like a busy coworker, not a chatbot. No flourishes, no em-dashes between clauses, no editorializing about the questions.
|
|
3387
|
-
|
|
3388
|
-
NEVER:
|
|
3389
|
-
- Sound like a salesperson or customer service agent
|
|
3390
|
-
- Use em-dashes for emphasis or dramatic pacing
|
|
3391
|
-
- Editorialize ("these two should wrap up this section", "just a couple more")
|
|
3392
|
-
- List "Yes / No / N/A" as bullet options
|
|
3393
|
-
- Include conditional follow-up questions
|
|
3394
|
-
- Mention section numbers, batch numbers, or field counts
|
|
3395
|
-
|
|
3396
|
-
Output the email body text ONLY. No subject line, no JSON. Use markdown for numbered lists.`;
|
|
3663
|
+
// src/application/agents/batcher.ts
|
|
3664
|
+
async function batchQuestions(unfilledFields, generateObject, providerOptions) {
|
|
3665
|
+
const fieldSummaries = unfilledFields.map((f) => ({
|
|
3666
|
+
id: f.id,
|
|
3667
|
+
label: f.label,
|
|
3668
|
+
text: f.label,
|
|
3669
|
+
fieldType: f.fieldType,
|
|
3670
|
+
section: f.section,
|
|
3671
|
+
required: f.required,
|
|
3672
|
+
condition: f.condition
|
|
3673
|
+
}));
|
|
3674
|
+
const prompt = buildQuestionBatchPrompt(fieldSummaries);
|
|
3675
|
+
const { object, usage } = await withRetry(
|
|
3676
|
+
() => generateObject({
|
|
3677
|
+
prompt,
|
|
3678
|
+
schema: QuestionBatchResultSchema,
|
|
3679
|
+
maxTokens: 2048,
|
|
3680
|
+
providerOptions
|
|
3681
|
+
})
|
|
3682
|
+
);
|
|
3683
|
+
return { result: object, usage };
|
|
3397
3684
|
}
|
|
3398
3685
|
|
|
3399
3686
|
// src/prompts/application/reply-intent.ts
|
|
@@ -3432,23 +3719,78 @@ Respond with JSON only:
|
|
|
3432
3719
|
}`;
|
|
3433
3720
|
}
|
|
3434
3721
|
|
|
3435
|
-
// src/
|
|
3436
|
-
function
|
|
3437
|
-
|
|
3722
|
+
// src/application/agents/reply-router.ts
|
|
3723
|
+
async function classifyReplyIntent(fields, replyText, generateObject, providerOptions) {
|
|
3724
|
+
const fieldSummaries = fields.map((f) => ({ id: f.id, label: f.label }));
|
|
3725
|
+
const prompt = buildReplyIntentClassificationPrompt(fieldSummaries, replyText);
|
|
3726
|
+
const { object, usage } = await withRetry(
|
|
3727
|
+
() => generateObject({
|
|
3728
|
+
prompt,
|
|
3729
|
+
schema: ReplyIntentSchema,
|
|
3730
|
+
maxTokens: 1024,
|
|
3731
|
+
providerOptions
|
|
3732
|
+
})
|
|
3733
|
+
);
|
|
3734
|
+
return { intent: object, usage };
|
|
3735
|
+
}
|
|
3438
3736
|
|
|
3439
|
-
|
|
3737
|
+
// src/prompts/application/answer-parsing.ts
|
|
3738
|
+
function buildAnswerParsingPrompt(questions, emailBody) {
|
|
3739
|
+
const questionList = questions.map(
|
|
3740
|
+
(q, i) => `${i + 1}. ${q.id}: "${q.label ?? q.text}" (type: ${q.fieldType})`
|
|
3741
|
+
).join("\n");
|
|
3742
|
+
return `You are parsing a user's email reply to extract answers for specific insurance application questions.
|
|
3440
3743
|
|
|
3441
|
-
|
|
3744
|
+
QUESTIONS ASKED:
|
|
3745
|
+
${questionList}
|
|
3442
3746
|
|
|
3443
|
-
|
|
3444
|
-
${
|
|
3445
|
-
` : ""}
|
|
3747
|
+
USER'S EMAIL REPLY:
|
|
3748
|
+
${emailBody}
|
|
3446
3749
|
|
|
3447
|
-
|
|
3750
|
+
Extract answers for each question. Handle:
|
|
3751
|
+
- Direct numbered answers (1. answer, 2. answer)
|
|
3752
|
+
- Inline answers referencing the question
|
|
3753
|
+
- Table data provided as lists or comma-separated values
|
|
3754
|
+
- Yes/no answers with optional explanations
|
|
3755
|
+
- Partial responses (some questions answered, others skipped)
|
|
3448
3756
|
|
|
3449
|
-
|
|
3757
|
+
Respond with JSON only:
|
|
3758
|
+
{
|
|
3759
|
+
"answers": [
|
|
3760
|
+
{
|
|
3761
|
+
"fieldId": "company_name",
|
|
3762
|
+
"value": "Acme Corp"
|
|
3763
|
+
},
|
|
3764
|
+
{
|
|
3765
|
+
"fieldId": "prior_claims_decl",
|
|
3766
|
+
"value": "yes",
|
|
3767
|
+
"explanation": "One claim in 2024 for water damage, $15,000 paid"
|
|
3768
|
+
}
|
|
3769
|
+
],
|
|
3770
|
+
"unanswered": ["field_id_that_was_not_answered"]
|
|
3771
|
+
}
|
|
3450
3772
|
|
|
3451
|
-
|
|
3773
|
+
Only include answers you are confident about. If a response is ambiguous, include the field in "unanswered".`;
|
|
3774
|
+
}
|
|
3775
|
+
|
|
3776
|
+
// src/application/agents/answer-parser.ts
|
|
3777
|
+
async function parseAnswers(fields, replyText, generateObject, providerOptions) {
|
|
3778
|
+
const questions = fields.map((f) => ({
|
|
3779
|
+
id: f.id,
|
|
3780
|
+
label: f.label,
|
|
3781
|
+
text: f.label,
|
|
3782
|
+
fieldType: f.fieldType
|
|
3783
|
+
}));
|
|
3784
|
+
const prompt = buildAnswerParsingPrompt(questions, replyText);
|
|
3785
|
+
const { object, usage } = await withRetry(
|
|
3786
|
+
() => generateObject({
|
|
3787
|
+
prompt,
|
|
3788
|
+
schema: AnswerParsingResultSchema,
|
|
3789
|
+
maxTokens: 4096,
|
|
3790
|
+
providerOptions
|
|
3791
|
+
})
|
|
3792
|
+
);
|
|
3793
|
+
return { result: object, usage };
|
|
3452
3794
|
}
|
|
3453
3795
|
|
|
3454
3796
|
// src/prompts/application/pdf-mapping.ts
|
|
@@ -3555,6 +3897,1122 @@ Respond with JSON only:
|
|
|
3555
3897
|
}`;
|
|
3556
3898
|
}
|
|
3557
3899
|
|
|
3900
|
+
// src/application/agents/lookup-filler.ts
|
|
3901
|
+
async function fillFromLookup(requests, targetFields, availableData, generateObject, providerOptions) {
|
|
3902
|
+
const requestSummaries = requests.map((r) => ({
|
|
3903
|
+
type: r.type,
|
|
3904
|
+
description: r.description,
|
|
3905
|
+
targetFieldIds: r.targetFieldIds
|
|
3906
|
+
}));
|
|
3907
|
+
const fieldSummaries = targetFields.map((f) => ({
|
|
3908
|
+
id: f.id,
|
|
3909
|
+
label: f.label,
|
|
3910
|
+
fieldType: f.fieldType
|
|
3911
|
+
}));
|
|
3912
|
+
const prompt = buildLookupFillPrompt(requestSummaries, fieldSummaries, availableData);
|
|
3913
|
+
const { object, usage } = await withRetry(
|
|
3914
|
+
() => generateObject({
|
|
3915
|
+
prompt,
|
|
3916
|
+
schema: LookupFillResultSchema,
|
|
3917
|
+
maxTokens: 4096,
|
|
3918
|
+
providerOptions
|
|
3919
|
+
})
|
|
3920
|
+
);
|
|
3921
|
+
return { result: object, usage };
|
|
3922
|
+
}
|
|
3923
|
+
|
|
3924
|
+
// src/prompts/application/batch-email.ts
|
|
3925
|
+
function buildBatchEmailGenerationPrompt(batchFields, batchIndex, totalBatches, appTitle, totalFieldCount, filledFieldCount, previousBatchSummary, companyName) {
|
|
3926
|
+
const nonConditionalFields = batchFields.filter((f) => !f.condition);
|
|
3927
|
+
const conditionalFields = batchFields.filter((f) => f.condition);
|
|
3928
|
+
const fieldList = nonConditionalFields.map((f, i) => {
|
|
3929
|
+
let line = `${i + 1}. id="${f.id}" label="${f.label}" type=${f.fieldType}`;
|
|
3930
|
+
if (f.options) line += ` options=[${f.options.join(", ")}]`;
|
|
3931
|
+
return line;
|
|
3932
|
+
}).join("\n");
|
|
3933
|
+
const conditionalNote = conditionalFields.length > 0 ? `
|
|
3934
|
+
|
|
3935
|
+
CONDITIONAL FIELDS (DO NOT include in this email \u2014 they will be asked as follow-ups in a separate email after the parent is answered):
|
|
3936
|
+
${conditionalFields.map((f) => `- id="${f.id}" label="${f.label}" depends on ${f.condition.dependsOn} = "${f.condition.whenValue}"`).join("\n")}` : "";
|
|
3937
|
+
const company = companyName ?? "the company";
|
|
3938
|
+
const remainingFields = totalFieldCount - filledFieldCount;
|
|
3939
|
+
const estMinutes = Math.max(1, Math.round(remainingFields * 0.5));
|
|
3940
|
+
return `You are an internal risk management assistant helping your colleague fill out an insurance application for ${company}. You work FOR ${company} \u2014 you are NOT the insurer, broker, or any external party.
|
|
3941
|
+
|
|
3942
|
+
APPLICATION: ${appTitle ?? "Insurance Application"}
|
|
3943
|
+
COMPANY: ${company}
|
|
3944
|
+
PROGRESS: ${filledFieldCount} of ${totalFieldCount} fields done, ~${remainingFields} remaining (~${estMinutes} min of questions left)
|
|
3945
|
+
${previousBatchSummary ? `
|
|
3946
|
+
PREVIOUS ANSWERS RECEIVED:
|
|
3947
|
+
${previousBatchSummary}
|
|
3948
|
+
` : ""}
|
|
3949
|
+
FIELDS TO ASK ABOUT:
|
|
3950
|
+
${fieldList}${conditionalNote}
|
|
3951
|
+
|
|
3952
|
+
Rules:
|
|
3953
|
+
- ${previousBatchSummary ? 'Start by acknowledging previous answers or auto-filled data. If fields were auto-filled, list each field with its value AND cite the specific source (e.g. "from your GL Policy #ABC123", "from vercel.com", "from your business context"). If a web lookup was done, name the URL that was checked. Ask them to reply with corrections if anything is wrong.' : "Start with a one-line intro."}
|
|
3954
|
+
- Mention progress once using estimated time remaining. Don't mention section/batch numbers or field counts.
|
|
3955
|
+
- Use "${company}" by name when referring to the company. Also fine: "we" or "our". Never "our company" or "the company".
|
|
3956
|
+
- Ask questions plainly. No em-dashes for dramatic effect, no filler phrases like "need to nail down" or "let's dive into". Just ask.
|
|
3957
|
+
- For yes/no questions, ask naturally in one sentence. Don't list "Yes / No" as options. Mention what you'll need if the answer triggers a follow-up (e.g. "If not, I'll need a brief explanation.").
|
|
3958
|
+
- For fields with 2-3 options, mention them inline. 4+ options can be a short list.
|
|
3959
|
+
- Group related fields (address, coverage limits) into single compound questions.
|
|
3960
|
+
- Do NOT include conditional/follow-up fields. They will be sent separately.
|
|
3961
|
+
- Number each question.
|
|
3962
|
+
- Note expected format where relevant: dollar amounts for currency, MM/DD/YYYY for dates, column descriptions for tables.
|
|
3963
|
+
- End with a short closing.
|
|
3964
|
+
- Tone: professional, brief, matter-of-fact. Write like a busy coworker, not a chatbot. No flourishes, no em-dashes between clauses, no editorializing about the questions.
|
|
3965
|
+
|
|
3966
|
+
NEVER:
|
|
3967
|
+
- Sound like a salesperson or customer service agent
|
|
3968
|
+
- Use em-dashes for emphasis or dramatic pacing
|
|
3969
|
+
- Editorialize ("these two should wrap up this section", "just a couple more")
|
|
3970
|
+
- List "Yes / No / N/A" as bullet options
|
|
3971
|
+
- Include conditional follow-up questions
|
|
3972
|
+
- Mention section numbers, batch numbers, or field counts
|
|
3973
|
+
|
|
3974
|
+
Output the email body text ONLY. No subject line, no JSON. Use markdown for numbered lists.`;
|
|
3975
|
+
}
|
|
3976
|
+
|
|
3977
|
+
// src/application/agents/email-generator.ts
|
|
3978
|
+
async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, generateText, providerOptions) {
|
|
3979
|
+
const fieldSummaries = batchFields.map((f) => ({
|
|
3980
|
+
id: f.id,
|
|
3981
|
+
label: f.label,
|
|
3982
|
+
fieldType: f.fieldType,
|
|
3983
|
+
options: f.options,
|
|
3984
|
+
condition: f.condition
|
|
3985
|
+
}));
|
|
3986
|
+
const prompt = buildBatchEmailGenerationPrompt(
|
|
3987
|
+
fieldSummaries,
|
|
3988
|
+
batchIndex,
|
|
3989
|
+
totalBatches,
|
|
3990
|
+
opts.appTitle,
|
|
3991
|
+
opts.totalFieldCount,
|
|
3992
|
+
opts.filledFieldCount,
|
|
3993
|
+
opts.previousBatchSummary,
|
|
3994
|
+
opts.companyName
|
|
3995
|
+
);
|
|
3996
|
+
const { text, usage } = await withRetry(
|
|
3997
|
+
() => generateText({
|
|
3998
|
+
prompt,
|
|
3999
|
+
maxTokens: 2048,
|
|
4000
|
+
providerOptions
|
|
4001
|
+
})
|
|
4002
|
+
);
|
|
4003
|
+
return { text, usage };
|
|
4004
|
+
}
|
|
4005
|
+
|
|
4006
|
+
// src/application/coordinator.ts
|
|
4007
|
+
function createApplicationPipeline(config) {
|
|
4008
|
+
const {
|
|
4009
|
+
generateText,
|
|
4010
|
+
generateObject,
|
|
4011
|
+
applicationStore,
|
|
4012
|
+
documentStore,
|
|
4013
|
+
memoryStore,
|
|
4014
|
+
backfillProvider,
|
|
4015
|
+
orgContext = [],
|
|
4016
|
+
concurrency = 4,
|
|
4017
|
+
onTokenUsage,
|
|
4018
|
+
onProgress,
|
|
4019
|
+
log,
|
|
4020
|
+
providerOptions
|
|
4021
|
+
} = config;
|
|
4022
|
+
const limit = pLimit(concurrency);
|
|
4023
|
+
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4024
|
+
function trackUsage(usage) {
|
|
4025
|
+
if (usage) {
|
|
4026
|
+
totalUsage.inputTokens += usage.inputTokens;
|
|
4027
|
+
totalUsage.outputTokens += usage.outputTokens;
|
|
4028
|
+
onTokenUsage?.(usage);
|
|
4029
|
+
}
|
|
4030
|
+
}
|
|
4031
|
+
async function processApplication(input) {
|
|
4032
|
+
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4033
|
+
const { pdfBase64, context } = input;
|
|
4034
|
+
const id = input.applicationId ?? `app-${Date.now()}`;
|
|
4035
|
+
const now = Date.now();
|
|
4036
|
+
let state = {
|
|
4037
|
+
id,
|
|
4038
|
+
pdfBase64: void 0,
|
|
4039
|
+
// Don't persist the full PDF in state
|
|
4040
|
+
title: void 0,
|
|
4041
|
+
applicationType: null,
|
|
4042
|
+
fields: [],
|
|
4043
|
+
batches: void 0,
|
|
4044
|
+
currentBatchIndex: 0,
|
|
4045
|
+
status: "classifying",
|
|
4046
|
+
createdAt: now,
|
|
4047
|
+
updatedAt: now
|
|
4048
|
+
};
|
|
4049
|
+
onProgress?.("Classifying document...");
|
|
4050
|
+
const { result: classifyResult, usage: classifyUsage } = await classifyApplication(
|
|
4051
|
+
pdfBase64.slice(0, 2e3),
|
|
4052
|
+
// Send truncated content for classification
|
|
4053
|
+
generateObject,
|
|
4054
|
+
providerOptions
|
|
4055
|
+
);
|
|
4056
|
+
trackUsage(classifyUsage);
|
|
4057
|
+
if (!classifyResult.isApplication) {
|
|
4058
|
+
state.status = "complete";
|
|
4059
|
+
state.updatedAt = Date.now();
|
|
4060
|
+
await applicationStore?.save(state);
|
|
4061
|
+
return { state, tokenUsage: totalUsage };
|
|
4062
|
+
}
|
|
4063
|
+
state.applicationType = classifyResult.applicationType;
|
|
4064
|
+
state.status = "extracting";
|
|
4065
|
+
state.updatedAt = Date.now();
|
|
4066
|
+
onProgress?.("Extracting form fields...");
|
|
4067
|
+
const { fields, usage: extractUsage } = await extractFields(
|
|
4068
|
+
pdfBase64,
|
|
4069
|
+
generateObject,
|
|
4070
|
+
providerOptions
|
|
4071
|
+
);
|
|
4072
|
+
trackUsage(extractUsage);
|
|
4073
|
+
state.fields = fields;
|
|
4074
|
+
state.title = classifyResult.applicationType ?? void 0;
|
|
4075
|
+
state.status = "auto_filling";
|
|
4076
|
+
state.updatedAt = Date.now();
|
|
4077
|
+
await applicationStore?.save(state);
|
|
4078
|
+
onProgress?.(`Auto-filling ${fields.length} fields...`);
|
|
4079
|
+
const fillTasks = [];
|
|
4080
|
+
if (backfillProvider) {
|
|
4081
|
+
fillTasks.push(
|
|
4082
|
+
(async () => {
|
|
4083
|
+
try {
|
|
4084
|
+
const priorAnswers = await backfillFromPriorAnswers(fields, backfillProvider);
|
|
4085
|
+
for (const pa of priorAnswers) {
|
|
4086
|
+
const field = state.fields.find((f) => f.id === pa.fieldId);
|
|
4087
|
+
if (field && !field.value && pa.relevance > 0.8) {
|
|
4088
|
+
field.value = pa.value;
|
|
4089
|
+
field.source = `backfill: ${pa.source}`;
|
|
4090
|
+
field.confidence = "high";
|
|
4091
|
+
}
|
|
4092
|
+
}
|
|
4093
|
+
} catch (e) {
|
|
4094
|
+
await log?.(`Backfill failed: ${e}`);
|
|
4095
|
+
}
|
|
4096
|
+
})()
|
|
4097
|
+
);
|
|
4098
|
+
}
|
|
4099
|
+
if (orgContext.length > 0) {
|
|
4100
|
+
fillTasks.push(
|
|
4101
|
+
limit(async () => {
|
|
4102
|
+
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
4103
|
+
if (unfilledFields2.length === 0) return;
|
|
4104
|
+
const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
|
|
4105
|
+
unfilledFields2,
|
|
4106
|
+
orgContext,
|
|
4107
|
+
generateObject,
|
|
4108
|
+
providerOptions
|
|
4109
|
+
);
|
|
4110
|
+
trackUsage(afUsage);
|
|
4111
|
+
for (const match of autoFillResult.matches) {
|
|
4112
|
+
const field = state.fields.find((f) => f.id === match.fieldId);
|
|
4113
|
+
if (field && !field.value) {
|
|
4114
|
+
field.value = match.value;
|
|
4115
|
+
field.source = `auto-fill: ${match.contextKey}`;
|
|
4116
|
+
field.confidence = match.confidence;
|
|
4117
|
+
}
|
|
4118
|
+
}
|
|
4119
|
+
})
|
|
4120
|
+
);
|
|
4121
|
+
}
|
|
4122
|
+
if (documentStore && memoryStore) {
|
|
4123
|
+
fillTasks.push(
|
|
4124
|
+
(async () => {
|
|
4125
|
+
try {
|
|
4126
|
+
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
4127
|
+
const searchPromises = unfilledFields2.slice(0, 10).map(
|
|
4128
|
+
(f) => limit(async () => {
|
|
4129
|
+
const chunks = await memoryStore.search(f.label, { limit: 3 });
|
|
4130
|
+
for (const chunk of chunks) {
|
|
4131
|
+
if (!state.fields.find((sf) => sf.id === f.id)?.value) {
|
|
4132
|
+
}
|
|
4133
|
+
}
|
|
4134
|
+
})
|
|
4135
|
+
);
|
|
4136
|
+
await Promise.all(searchPromises);
|
|
4137
|
+
} catch (e) {
|
|
4138
|
+
await log?.(`Document backfill search failed: ${e}`);
|
|
4139
|
+
}
|
|
4140
|
+
})()
|
|
4141
|
+
);
|
|
4142
|
+
}
|
|
4143
|
+
await Promise.all(fillTasks);
|
|
4144
|
+
state.updatedAt = Date.now();
|
|
4145
|
+
await applicationStore?.save(state);
|
|
4146
|
+
const unfilledFields = state.fields.filter((f) => !f.value);
|
|
4147
|
+
if (unfilledFields.length > 0) {
|
|
4148
|
+
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
4149
|
+
state.status = "batching";
|
|
4150
|
+
const { result: batchResult, usage: batchUsage } = await batchQuestions(
|
|
4151
|
+
unfilledFields,
|
|
4152
|
+
generateObject,
|
|
4153
|
+
providerOptions
|
|
4154
|
+
);
|
|
4155
|
+
trackUsage(batchUsage);
|
|
4156
|
+
state.batches = batchResult.batches;
|
|
4157
|
+
state.currentBatchIndex = 0;
|
|
4158
|
+
state.status = "collecting";
|
|
4159
|
+
} else {
|
|
4160
|
+
state.status = "confirming";
|
|
4161
|
+
}
|
|
4162
|
+
state.updatedAt = Date.now();
|
|
4163
|
+
await applicationStore?.save(state);
|
|
4164
|
+
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4165
|
+
onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
|
|
4166
|
+
return { state, tokenUsage: totalUsage };
|
|
4167
|
+
}
|
|
4168
|
+
async function processReply(input) {
|
|
4169
|
+
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4170
|
+
const { applicationId, replyText, context } = input;
|
|
4171
|
+
let state = null;
|
|
4172
|
+
if (applicationStore) {
|
|
4173
|
+
state = await applicationStore.get(applicationId);
|
|
4174
|
+
}
|
|
4175
|
+
if (!state) {
|
|
4176
|
+
throw new Error(`Application ${applicationId} not found`);
|
|
4177
|
+
}
|
|
4178
|
+
const currentBatchFieldIds = state.batches?.[state.currentBatchIndex] ?? [];
|
|
4179
|
+
const currentBatchFields = state.fields.filter(
|
|
4180
|
+
(f) => currentBatchFieldIds.includes(f.id)
|
|
4181
|
+
);
|
|
4182
|
+
onProgress?.("Classifying reply...");
|
|
4183
|
+
const { intent, usage: intentUsage } = await classifyReplyIntent(
|
|
4184
|
+
currentBatchFields,
|
|
4185
|
+
replyText,
|
|
4186
|
+
generateObject,
|
|
4187
|
+
providerOptions
|
|
4188
|
+
);
|
|
4189
|
+
trackUsage(intentUsage);
|
|
4190
|
+
let fieldsFilled = 0;
|
|
4191
|
+
let responseText;
|
|
4192
|
+
if (intent.hasAnswers) {
|
|
4193
|
+
onProgress?.("Parsing answers...");
|
|
4194
|
+
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4195
|
+
currentBatchFields,
|
|
4196
|
+
replyText,
|
|
4197
|
+
generateObject,
|
|
4198
|
+
providerOptions
|
|
4199
|
+
);
|
|
4200
|
+
trackUsage(parseUsage);
|
|
4201
|
+
for (const answer of parseResult.answers) {
|
|
4202
|
+
const field = state.fields.find((f) => f.id === answer.fieldId);
|
|
4203
|
+
if (field) {
|
|
4204
|
+
field.value = answer.value;
|
|
4205
|
+
field.source = "user";
|
|
4206
|
+
field.confidence = "confirmed";
|
|
4207
|
+
fieldsFilled++;
|
|
4208
|
+
}
|
|
4209
|
+
}
|
|
4210
|
+
}
|
|
4211
|
+
if (intent.lookupRequests?.length) {
|
|
4212
|
+
onProgress?.("Processing lookup requests...");
|
|
4213
|
+
let availableData = "";
|
|
4214
|
+
if (documentStore) {
|
|
4215
|
+
try {
|
|
4216
|
+
const docs = await documentStore.query({});
|
|
4217
|
+
availableData = docs.map((d) => {
|
|
4218
|
+
const doc = d;
|
|
4219
|
+
return `Document ${doc.id}: ${doc.type} - ${doc.carrier ?? "unknown carrier"} - ${doc.insuredName ?? ""}`;
|
|
4220
|
+
}).join("\n");
|
|
4221
|
+
} catch (e) {
|
|
4222
|
+
await log?.(`Document query for lookup failed: ${e}`);
|
|
4223
|
+
}
|
|
4224
|
+
}
|
|
4225
|
+
if (availableData) {
|
|
4226
|
+
const targetFields = state.fields.filter(
|
|
4227
|
+
(f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
|
|
4228
|
+
);
|
|
4229
|
+
const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
|
|
4230
|
+
intent.lookupRequests,
|
|
4231
|
+
targetFields,
|
|
4232
|
+
availableData,
|
|
4233
|
+
generateObject,
|
|
4234
|
+
providerOptions
|
|
4235
|
+
);
|
|
4236
|
+
trackUsage(lookupUsage);
|
|
4237
|
+
for (const fill of lookupResult.fills) {
|
|
4238
|
+
const field = state.fields.find((f) => f.id === fill.fieldId);
|
|
4239
|
+
if (field) {
|
|
4240
|
+
field.value = fill.value;
|
|
4241
|
+
field.source = `lookup: ${fill.source}`;
|
|
4242
|
+
field.confidence = "high";
|
|
4243
|
+
fieldsFilled++;
|
|
4244
|
+
}
|
|
4245
|
+
}
|
|
4246
|
+
}
|
|
4247
|
+
}
|
|
4248
|
+
if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
|
|
4249
|
+
if (intent.questionText) {
|
|
4250
|
+
const { text, usage } = await generateText({
|
|
4251
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
4252
|
+
|
|
4253
|
+
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
4254
|
+
maxTokens: 512,
|
|
4255
|
+
providerOptions
|
|
4256
|
+
});
|
|
4257
|
+
trackUsage(usage);
|
|
4258
|
+
responseText = text;
|
|
4259
|
+
}
|
|
4260
|
+
}
|
|
4261
|
+
const currentBatchComplete = currentBatchFieldIds.every(
|
|
4262
|
+
(fid) => state.fields.find((f) => f.id === fid)?.value
|
|
4263
|
+
);
|
|
4264
|
+
if (currentBatchComplete && state.batches) {
|
|
4265
|
+
if (state.currentBatchIndex < state.batches.length - 1) {
|
|
4266
|
+
state.currentBatchIndex++;
|
|
4267
|
+
const nextBatchFieldIds = state.batches[state.currentBatchIndex];
|
|
4268
|
+
const nextBatchFields = state.fields.filter(
|
|
4269
|
+
(f) => nextBatchFieldIds.includes(f.id)
|
|
4270
|
+
);
|
|
4271
|
+
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4272
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
4273
|
+
nextBatchFields,
|
|
4274
|
+
state.currentBatchIndex,
|
|
4275
|
+
state.batches.length,
|
|
4276
|
+
{
|
|
4277
|
+
appTitle: state.title,
|
|
4278
|
+
totalFieldCount: state.fields.length,
|
|
4279
|
+
filledFieldCount: filledCount,
|
|
4280
|
+
companyName: context?.companyName
|
|
4281
|
+
},
|
|
4282
|
+
generateText,
|
|
4283
|
+
providerOptions
|
|
4284
|
+
);
|
|
4285
|
+
trackUsage(emailUsage);
|
|
4286
|
+
if (!responseText) {
|
|
4287
|
+
responseText = emailText;
|
|
4288
|
+
} else {
|
|
4289
|
+
responseText += `
|
|
4290
|
+
|
|
4291
|
+
${emailText}`;
|
|
4292
|
+
}
|
|
4293
|
+
} else {
|
|
4294
|
+
state.status = "confirming";
|
|
4295
|
+
}
|
|
4296
|
+
}
|
|
4297
|
+
state.updatedAt = Date.now();
|
|
4298
|
+
await applicationStore?.save(state);
|
|
4299
|
+
return {
|
|
4300
|
+
state,
|
|
4301
|
+
intent: intent.primaryIntent,
|
|
4302
|
+
fieldsFilled,
|
|
4303
|
+
responseText,
|
|
4304
|
+
tokenUsage: totalUsage
|
|
4305
|
+
};
|
|
4306
|
+
}
|
|
4307
|
+
async function generateCurrentBatchEmail(applicationId, opts) {
|
|
4308
|
+
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4309
|
+
const state = await applicationStore?.get(applicationId);
|
|
4310
|
+
if (!state) throw new Error(`Application ${applicationId} not found`);
|
|
4311
|
+
if (!state.batches?.length) throw new Error("No batches available");
|
|
4312
|
+
const batchFieldIds = state.batches[state.currentBatchIndex];
|
|
4313
|
+
const batchFields = state.fields.filter((f) => batchFieldIds.includes(f.id));
|
|
4314
|
+
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4315
|
+
const { text, usage } = await generateBatchEmail(
|
|
4316
|
+
batchFields,
|
|
4317
|
+
state.currentBatchIndex,
|
|
4318
|
+
state.batches.length,
|
|
4319
|
+
{
|
|
4320
|
+
appTitle: state.title,
|
|
4321
|
+
totalFieldCount: state.fields.length,
|
|
4322
|
+
filledFieldCount: filledCount,
|
|
4323
|
+
companyName: opts?.companyName,
|
|
4324
|
+
previousBatchSummary: opts?.previousBatchSummary
|
|
4325
|
+
},
|
|
4326
|
+
generateText,
|
|
4327
|
+
providerOptions
|
|
4328
|
+
);
|
|
4329
|
+
trackUsage(usage);
|
|
4330
|
+
return { text, tokenUsage: totalUsage };
|
|
4331
|
+
}
|
|
4332
|
+
async function getConfirmationSummary(applicationId) {
|
|
4333
|
+
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4334
|
+
const state = await applicationStore?.get(applicationId);
|
|
4335
|
+
if (!state) throw new Error(`Application ${applicationId} not found`);
|
|
4336
|
+
const filledFields = state.fields.filter((f) => f.value);
|
|
4337
|
+
const fieldSummary = filledFields.map((f) => `${f.section} > ${f.label}: ${f.value} (source: ${f.source ?? "unknown"})`).join("\n");
|
|
4338
|
+
const { text, usage } = await generateText({
|
|
4339
|
+
prompt: `Format these filled insurance application fields as a clean confirmation summary for the user to review. Group by section, show each field as "Label: Value". End with a note asking them to confirm or request changes.
|
|
4340
|
+
|
|
4341
|
+
Application: ${state.title ?? "Insurance Application"}
|
|
4342
|
+
|
|
4343
|
+
Fields:
|
|
4344
|
+
${fieldSummary}`,
|
|
4345
|
+
maxTokens: 4096,
|
|
4346
|
+
providerOptions
|
|
4347
|
+
});
|
|
4348
|
+
trackUsage(usage);
|
|
4349
|
+
return { text, tokenUsage: totalUsage };
|
|
4350
|
+
}
|
|
4351
|
+
return {
|
|
4352
|
+
processApplication,
|
|
4353
|
+
processReply,
|
|
4354
|
+
generateCurrentBatchEmail,
|
|
4355
|
+
getConfirmationSummary
|
|
4356
|
+
};
|
|
4357
|
+
}
|
|
4358
|
+
|
|
4359
|
+
// src/prompts/application/confirmation.ts
|
|
4360
|
+
function buildConfirmationSummaryPrompt(fields, applicationTitle) {
|
|
4361
|
+
const fieldList = fields.map((f) => {
|
|
4362
|
+
const label = f.label ?? f.text ?? f.id;
|
|
4363
|
+
const value = f.value ?? "(not provided)";
|
|
4364
|
+
return `[${f.section}] ${label}: ${value}`;
|
|
4365
|
+
}).join("\n");
|
|
4366
|
+
return `Format the following insurance application answers into a clean, readable summary grouped by section. This will be sent as an email for the user to review and confirm.
|
|
4367
|
+
|
|
4368
|
+
APPLICATION: ${applicationTitle}
|
|
4369
|
+
|
|
4370
|
+
FIELD VALUES:
|
|
4371
|
+
${fieldList}
|
|
4372
|
+
|
|
4373
|
+
Format as a readable summary:
|
|
4374
|
+
- Group by section with section headers
|
|
4375
|
+
- Show each field as "Label: Value"
|
|
4376
|
+
- For declarations, show the question and the yes/no answer plus any explanation
|
|
4377
|
+
- Skip fields with no value unless they are required
|
|
4378
|
+
- End with a note asking the user to reply "Looks good" to confirm, or describe any changes needed
|
|
4379
|
+
|
|
4380
|
+
Respond with the formatted summary text only (no JSON wrapper). Use markdown formatting (bold headers, bullet points).`;
|
|
4381
|
+
}
|
|
4382
|
+
|
|
4383
|
+
// src/prompts/application/field-explanation.ts
|
|
4384
|
+
function buildFieldExplanationPrompt(field, question, policyContext) {
|
|
4385
|
+
return `You are an internal risk management assistant helping a colleague fill out an insurance application for your company. They asked a question about a field on the form.
|
|
4386
|
+
|
|
4387
|
+
FIELD: "${field.label}" (type: ${field.fieldType}${field.options ? `, options: ${field.options.join(", ")}` : ""})
|
|
4388
|
+
|
|
4389
|
+
THEIR QUESTION: "${question}"
|
|
4390
|
+
|
|
4391
|
+
${policyContext ? `RELEVANT POLICY/CONTEXT INFO:
|
|
4392
|
+
${policyContext}
|
|
4393
|
+
` : ""}
|
|
4394
|
+
|
|
4395
|
+
Provide a short, helpful explanation (2-3 sentences) as a coworker would. If the field has options, briefly explain what each means if relevant. If there's policy context that helps, cite the specific source (e.g. "According to our GL Policy #ABC123 with Hartford, our current aggregate limit is $2M").
|
|
4396
|
+
|
|
4397
|
+
End with: "Just reply with the answer when you're ready and I'll fill it in."
|
|
4398
|
+
|
|
4399
|
+
Respond with the explanation text only \u2014 no JSON, no field ID, no extra formatting.`;
|
|
4400
|
+
}
|
|
4401
|
+
|
|
4402
|
+
// src/prompts/query/classify.ts
|
|
4403
|
+
function buildQueryClassifyPrompt(question, conversationContext) {
|
|
4404
|
+
return `You are a query classifier for an insurance document intelligence system.
|
|
4405
|
+
|
|
4406
|
+
Analyze the user's question and produce a structured classification.
|
|
4407
|
+
|
|
4408
|
+
USER QUESTION:
|
|
4409
|
+
${question}
|
|
4410
|
+
${conversationContext ? `
|
|
4411
|
+
CONVERSATION CONTEXT:
|
|
4412
|
+
${conversationContext}` : ""}
|
|
4413
|
+
|
|
4414
|
+
INSTRUCTIONS:
|
|
4415
|
+
|
|
4416
|
+
1. Determine the primary intent:
|
|
4417
|
+
- "policy_question": questions about specific coverage, limits, deductibles, endorsements, conditions
|
|
4418
|
+
- "coverage_comparison": comparing coverages across multiple documents or policies
|
|
4419
|
+
- "document_search": looking for a specific document by carrier, policy number, insured name
|
|
4420
|
+
- "claims_inquiry": questions about claims history, loss runs, experience modification
|
|
4421
|
+
- "general_knowledge": insurance concepts not tied to a specific document
|
|
4422
|
+
|
|
4423
|
+
2. Decompose into atomic sub-questions:
|
|
4424
|
+
- Each sub-question should be answerable from a single retrieval pass
|
|
4425
|
+
- Simple questions produce exactly one sub-question (the question itself)
|
|
4426
|
+
- Complex questions (comparisons, multi-policy, multi-field) decompose into 2-5 sub-questions
|
|
4427
|
+
- Each sub-question should specify which chunk types are most relevant
|
|
4428
|
+
|
|
4429
|
+
3. Determine which storage backends are needed:
|
|
4430
|
+
- requiresDocumentLookup: true if a specific document needs to be fetched by ID/number/carrier
|
|
4431
|
+
- requiresChunkSearch: true if semantic search over document chunks is needed
|
|
4432
|
+
- requiresConversationHistory: true if the question references prior conversation
|
|
4433
|
+
|
|
4434
|
+
CHUNK TYPES (for chunkTypes filter):
|
|
4435
|
+
carrier_info, named_insured, coverage, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
|
|
4436
|
+
|
|
4437
|
+
Respond with the structured classification.`;
|
|
4438
|
+
}
|
|
4439
|
+
|
|
4440
|
+
// src/prompts/query/respond.ts
|
|
4441
|
+
function buildRespondPrompt(originalQuestion, subAnswersJson, platform) {
|
|
4442
|
+
const formatGuidance = platform === "email" ? "Format as a professional email response. Use plain text, no markdown." : platform === "sms" ? "Keep the response concise and conversational. No markdown." : "Format as clear, well-structured text. Use markdown for lists and emphasis where helpful.";
|
|
4443
|
+
return `You are composing a final answer to an insurance question. You have verified sub-answers with citations that you need to merge into a single, natural response.
|
|
4444
|
+
|
|
4445
|
+
ORIGINAL QUESTION:
|
|
4446
|
+
${originalQuestion}
|
|
4447
|
+
|
|
4448
|
+
VERIFIED SUB-ANSWERS:
|
|
4449
|
+
${subAnswersJson}
|
|
4450
|
+
|
|
4451
|
+
FORMATTING:
|
|
4452
|
+
${formatGuidance}
|
|
4453
|
+
|
|
4454
|
+
INSTRUCTIONS:
|
|
4455
|
+
1. Write a natural, direct answer to the original question.
|
|
4456
|
+
2. Embed inline citation numbers [1], [2], etc. after each factual claim. These reference the citation objects from the sub-answers \u2014 preserve the original citation index numbers.
|
|
4457
|
+
3. If any sub-answer had low confidence or noted missing context, mention what information was unavailable rather than omitting silently.
|
|
4458
|
+
4. If the answer naturally leads to a follow-up question the user might want to ask, suggest it in the followUp field.
|
|
4459
|
+
5. Merge overlapping citations \u2014 if two sub-answers cite the same chunk, use one citation number.
|
|
4460
|
+
6. Keep the tone helpful and professional.
|
|
4461
|
+
|
|
4462
|
+
Respond with the final answer, deduplicated citations array, overall confidence (weighted average of sub-answer confidences), and an optional follow-up suggestion.`;
|
|
4463
|
+
}
|
|
4464
|
+
|
|
4465
|
+
// src/schemas/query.ts
|
|
4466
|
+
import { z as z32 } from "zod";
|
|
4467
|
+
var QueryIntentSchema = z32.enum([
|
|
4468
|
+
"policy_question",
|
|
4469
|
+
"coverage_comparison",
|
|
4470
|
+
"document_search",
|
|
4471
|
+
"claims_inquiry",
|
|
4472
|
+
"general_knowledge"
|
|
4473
|
+
]);
|
|
4474
|
+
var SubQuestionSchema = z32.object({
|
|
4475
|
+
question: z32.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
4476
|
+
intent: QueryIntentSchema,
|
|
4477
|
+
chunkTypes: z32.array(z32.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
4478
|
+
documentFilters: z32.object({
|
|
4479
|
+
type: z32.enum(["policy", "quote"]).optional(),
|
|
4480
|
+
carrier: z32.string().optional(),
|
|
4481
|
+
insuredName: z32.string().optional(),
|
|
4482
|
+
policyNumber: z32.string().optional(),
|
|
4483
|
+
quoteNumber: z32.string().optional()
|
|
4484
|
+
}).optional().describe("Structured filters to narrow document lookup")
|
|
4485
|
+
});
|
|
4486
|
+
var QueryClassifyResultSchema = z32.object({
|
|
4487
|
+
intent: QueryIntentSchema,
|
|
4488
|
+
subQuestions: z32.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
4489
|
+
requiresDocumentLookup: z32.boolean().describe("Whether structured document lookup is needed"),
|
|
4490
|
+
requiresChunkSearch: z32.boolean().describe("Whether semantic chunk search is needed"),
|
|
4491
|
+
requiresConversationHistory: z32.boolean().describe("Whether conversation history is relevant")
|
|
4492
|
+
});
|
|
4493
|
+
var EvidenceItemSchema = z32.object({
|
|
4494
|
+
source: z32.enum(["chunk", "document", "conversation"]),
|
|
4495
|
+
chunkId: z32.string().optional(),
|
|
4496
|
+
documentId: z32.string().optional(),
|
|
4497
|
+
turnId: z32.string().optional(),
|
|
4498
|
+
text: z32.string().describe("Text excerpt from the source"),
|
|
4499
|
+
relevance: z32.number().min(0).max(1),
|
|
4500
|
+
metadata: z32.record(z32.string(), z32.string()).optional()
|
|
4501
|
+
});
|
|
4502
|
+
var RetrievalResultSchema = z32.object({
|
|
4503
|
+
subQuestion: z32.string(),
|
|
4504
|
+
evidence: z32.array(EvidenceItemSchema)
|
|
4505
|
+
});
|
|
4506
|
+
var CitationSchema = z32.object({
|
|
4507
|
+
index: z32.number().describe("Citation number [1], [2], etc."),
|
|
4508
|
+
chunkId: z32.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
4509
|
+
documentId: z32.string(),
|
|
4510
|
+
documentType: z32.enum(["policy", "quote"]).optional(),
|
|
4511
|
+
field: z32.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
4512
|
+
quote: z32.string().describe("Exact text from source that supports the claim"),
|
|
4513
|
+
relevance: z32.number().min(0).max(1)
|
|
4514
|
+
});
|
|
4515
|
+
var SubAnswerSchema = z32.object({
|
|
4516
|
+
subQuestion: z32.string(),
|
|
4517
|
+
answer: z32.string(),
|
|
4518
|
+
citations: z32.array(CitationSchema),
|
|
4519
|
+
confidence: z32.number().min(0).max(1),
|
|
4520
|
+
needsMoreContext: z32.boolean().describe("True if evidence was insufficient to answer fully")
|
|
4521
|
+
});
|
|
4522
|
+
var VerifyResultSchema = z32.object({
|
|
4523
|
+
approved: z32.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
4524
|
+
issues: z32.array(z32.string()).describe("Specific grounding or consistency issues found"),
|
|
4525
|
+
retrySubQuestions: z32.array(z32.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
4526
|
+
});
|
|
4527
|
+
var QueryResultSchema = z32.object({
|
|
4528
|
+
answer: z32.string(),
|
|
4529
|
+
citations: z32.array(CitationSchema),
|
|
4530
|
+
intent: QueryIntentSchema,
|
|
4531
|
+
confidence: z32.number().min(0).max(1),
|
|
4532
|
+
followUp: z32.string().optional().describe("Suggested follow-up question if applicable")
|
|
4533
|
+
});
|
|
4534
|
+
|
|
4535
|
+
// src/query/retriever.ts
|
|
4536
|
+
async function retrieve(subQuestion, conversationId, config) {
|
|
4537
|
+
const { documentStore, memoryStore, retrievalLimit, log } = config;
|
|
4538
|
+
const evidence = [];
|
|
4539
|
+
const tasks = [];
|
|
4540
|
+
tasks.push(
|
|
4541
|
+
(async () => {
|
|
4542
|
+
try {
|
|
4543
|
+
const filter = {};
|
|
4544
|
+
if (subQuestion.chunkTypes?.length) {
|
|
4545
|
+
const chunkResults = await Promise.all(
|
|
4546
|
+
subQuestion.chunkTypes.map(
|
|
4547
|
+
(type) => memoryStore.search(subQuestion.question, {
|
|
4548
|
+
limit: Math.ceil(retrievalLimit / subQuestion.chunkTypes.length),
|
|
4549
|
+
filter: { ...filter, type }
|
|
4550
|
+
})
|
|
4551
|
+
)
|
|
4552
|
+
);
|
|
4553
|
+
for (const chunks of chunkResults) {
|
|
4554
|
+
for (const chunk of chunks) {
|
|
4555
|
+
evidence.push({
|
|
4556
|
+
source: "chunk",
|
|
4557
|
+
chunkId: chunk.id,
|
|
4558
|
+
documentId: chunk.documentId,
|
|
4559
|
+
text: chunk.text,
|
|
4560
|
+
relevance: 0.8,
|
|
4561
|
+
// Default — store doesn't expose scores directly
|
|
4562
|
+
metadata: chunk.metadata
|
|
4563
|
+
});
|
|
4564
|
+
}
|
|
4565
|
+
}
|
|
4566
|
+
} else {
|
|
4567
|
+
const chunks = await memoryStore.search(subQuestion.question, {
|
|
4568
|
+
limit: retrievalLimit
|
|
4569
|
+
});
|
|
4570
|
+
for (const chunk of chunks) {
|
|
4571
|
+
evidence.push({
|
|
4572
|
+
source: "chunk",
|
|
4573
|
+
chunkId: chunk.id,
|
|
4574
|
+
documentId: chunk.documentId,
|
|
4575
|
+
text: chunk.text,
|
|
4576
|
+
relevance: 0.8,
|
|
4577
|
+
metadata: chunk.metadata
|
|
4578
|
+
});
|
|
4579
|
+
}
|
|
4580
|
+
}
|
|
4581
|
+
} catch (e) {
|
|
4582
|
+
await log?.(`Chunk search failed for "${subQuestion.question}": ${e}`);
|
|
4583
|
+
}
|
|
4584
|
+
})()
|
|
4585
|
+
);
|
|
4586
|
+
if (subQuestion.documentFilters) {
|
|
4587
|
+
tasks.push(
|
|
4588
|
+
(async () => {
|
|
4589
|
+
try {
|
|
4590
|
+
const filters = {};
|
|
4591
|
+
if (subQuestion.documentFilters?.type) filters.type = subQuestion.documentFilters.type;
|
|
4592
|
+
if (subQuestion.documentFilters?.carrier) filters.carrier = subQuestion.documentFilters.carrier;
|
|
4593
|
+
if (subQuestion.documentFilters?.insuredName) filters.insuredName = subQuestion.documentFilters.insuredName;
|
|
4594
|
+
if (subQuestion.documentFilters?.policyNumber) filters.policyNumber = subQuestion.documentFilters.policyNumber;
|
|
4595
|
+
if (subQuestion.documentFilters?.quoteNumber) filters.quoteNumber = subQuestion.documentFilters.quoteNumber;
|
|
4596
|
+
const docs = await documentStore.query(filters);
|
|
4597
|
+
for (const doc of docs) {
|
|
4598
|
+
const summary = buildDocumentSummary(doc);
|
|
4599
|
+
evidence.push({
|
|
4600
|
+
source: "document",
|
|
4601
|
+
documentId: doc.id,
|
|
4602
|
+
text: summary,
|
|
4603
|
+
relevance: 0.9,
|
|
4604
|
+
// Direct lookup is high relevance
|
|
4605
|
+
metadata: {
|
|
4606
|
+
type: doc.type,
|
|
4607
|
+
carrier: doc.carrier ?? "",
|
|
4608
|
+
insuredName: doc.insuredName ?? ""
|
|
4609
|
+
}
|
|
4610
|
+
});
|
|
4611
|
+
}
|
|
4612
|
+
} catch (e) {
|
|
4613
|
+
await log?.(`Document lookup failed: ${e}`);
|
|
4614
|
+
}
|
|
4615
|
+
})()
|
|
4616
|
+
);
|
|
4617
|
+
}
|
|
4618
|
+
if (conversationId) {
|
|
4619
|
+
tasks.push(
|
|
4620
|
+
(async () => {
|
|
4621
|
+
try {
|
|
4622
|
+
const turns = await memoryStore.searchHistory(
|
|
4623
|
+
subQuestion.question,
|
|
4624
|
+
conversationId
|
|
4625
|
+
);
|
|
4626
|
+
for (const turn of turns.slice(0, 5)) {
|
|
4627
|
+
evidence.push({
|
|
4628
|
+
source: "conversation",
|
|
4629
|
+
turnId: turn.id,
|
|
4630
|
+
text: `[${turn.role}]: ${turn.content}`,
|
|
4631
|
+
relevance: 0.6
|
|
4632
|
+
// Conversation context is lower relevance than documents
|
|
4633
|
+
});
|
|
4634
|
+
}
|
|
4635
|
+
} catch (e) {
|
|
4636
|
+
await log?.(`Conversation history search failed: ${e}`);
|
|
4637
|
+
}
|
|
4638
|
+
})()
|
|
4639
|
+
);
|
|
4640
|
+
}
|
|
4641
|
+
await Promise.all(tasks);
|
|
4642
|
+
evidence.sort((a, b) => b.relevance - a.relevance);
|
|
4643
|
+
return {
|
|
4644
|
+
subQuestion: subQuestion.question,
|
|
4645
|
+
evidence: evidence.slice(0, retrievalLimit)
|
|
4646
|
+
};
|
|
4647
|
+
}
|
|
4648
|
+
function buildDocumentSummary(doc) {
|
|
4649
|
+
const parts = [];
|
|
4650
|
+
const type = doc.type;
|
|
4651
|
+
parts.push(`Document type: ${type}`);
|
|
4652
|
+
if (doc.carrier) parts.push(`Carrier: ${doc.carrier}`);
|
|
4653
|
+
if (doc.insuredName) parts.push(`Insured: ${doc.insuredName}`);
|
|
4654
|
+
if (type === "policy") {
|
|
4655
|
+
if (doc.policyNumber) parts.push(`Policy #: ${doc.policyNumber}`);
|
|
4656
|
+
if (doc.effectiveDate) parts.push(`Effective: ${doc.effectiveDate}`);
|
|
4657
|
+
if (doc.expirationDate) parts.push(`Expiration: ${doc.expirationDate}`);
|
|
4658
|
+
} else if (type === "quote") {
|
|
4659
|
+
if (doc.quoteNumber) parts.push(`Quote #: ${doc.quoteNumber}`);
|
|
4660
|
+
if (doc.proposedEffectiveDate) parts.push(`Proposed effective: ${doc.proposedEffectiveDate}`);
|
|
4661
|
+
}
|
|
4662
|
+
if (doc.premium) parts.push(`Premium: ${doc.premium}`);
|
|
4663
|
+
const coverages = doc.coverages;
|
|
4664
|
+
if (coverages?.length) {
|
|
4665
|
+
parts.push(`Coverages (${coverages.length}):`);
|
|
4666
|
+
for (const cov of coverages.slice(0, 10)) {
|
|
4667
|
+
const line = [cov.name, cov.limit ? `Limit: ${cov.limit}` : null, cov.deductible ? `Ded: ${cov.deductible}` : null].filter(Boolean).join(" | ");
|
|
4668
|
+
parts.push(` - ${line}`);
|
|
4669
|
+
}
|
|
4670
|
+
}
|
|
4671
|
+
return parts.join("\n");
|
|
4672
|
+
}
|
|
4673
|
+
|
|
4674
|
+
// src/prompts/query/reason.ts
|
|
4675
|
+
var INTENT_INSTRUCTIONS = {
|
|
4676
|
+
policy_question: `You are answering a question about a specific insurance policy or quote.
|
|
4677
|
+
|
|
4678
|
+
RULES:
|
|
4679
|
+
- Answer ONLY from the evidence provided. Do not use general knowledge.
|
|
4680
|
+
- When citing limits, deductibles, or amounts, use the exact values from the source.
|
|
4681
|
+
- If the evidence mentions an endorsement that modifies coverage, include that context.
|
|
4682
|
+
- If the evidence is insufficient, say what is missing rather than guessing.
|
|
4683
|
+
- Reference specific coverage names, form numbers, and endorsement titles when available.`,
|
|
4684
|
+
coverage_comparison: `You are comparing coverages across insurance documents.
|
|
4685
|
+
|
|
4686
|
+
RULES:
|
|
4687
|
+
- Answer ONLY from the evidence provided.
|
|
4688
|
+
- Structure your comparison around specific coverage attributes: limits, deductibles, forms, triggers.
|
|
4689
|
+
- Note differences clearly: "Policy A has X, while Policy B has Y."
|
|
4690
|
+
- Flag where one document has coverage the other lacks entirely.
|
|
4691
|
+
- If evidence for one side of the comparison is missing, state that explicitly.`,
|
|
4692
|
+
document_search: `You are helping locate a specific insurance document.
|
|
4693
|
+
|
|
4694
|
+
RULES:
|
|
4695
|
+
- Answer ONLY from the evidence provided.
|
|
4696
|
+
- Identify the document by carrier, policy/quote number, insured name, and effective dates.
|
|
4697
|
+
- If multiple documents match, list them with distinguishing details.
|
|
4698
|
+
- If no documents match, say so clearly.`,
|
|
4699
|
+
claims_inquiry: `You are answering a question about claims history or loss experience.
|
|
4700
|
+
|
|
4701
|
+
RULES:
|
|
4702
|
+
- Answer ONLY from the evidence provided.
|
|
4703
|
+
- Reference specific claim dates, amounts, descriptions, and statuses.
|
|
4704
|
+
- Include experience modification factors if available.
|
|
4705
|
+
- Be precise with dollar amounts and dates \u2014 do not approximate.
|
|
4706
|
+
- If the evidence shows no claims, state that explicitly.`,
|
|
4707
|
+
general_knowledge: `You are answering a general insurance question using available document context.
|
|
4708
|
+
|
|
4709
|
+
RULES:
|
|
4710
|
+
- You may use general insurance knowledge to frame your answer.
|
|
4711
|
+
- If the question can be answered from the evidence, prefer that over general knowledge.
|
|
4712
|
+
- When mixing general knowledge with document-specific data, make the distinction clear.
|
|
4713
|
+
- Still cite evidence when referencing specific documents.`
|
|
4714
|
+
};
|
|
4715
|
+
function buildReasonPrompt(subQuestion, intent, evidence) {
|
|
4716
|
+
return `${INTENT_INSTRUCTIONS[intent]}
|
|
4717
|
+
|
|
4718
|
+
SUB-QUESTION:
|
|
4719
|
+
${subQuestion}
|
|
4720
|
+
|
|
4721
|
+
EVIDENCE:
|
|
4722
|
+
${evidence}
|
|
4723
|
+
|
|
4724
|
+
Answer the sub-question based on the evidence above. For every factual claim, include a citation referencing the source evidence item by its chunkId or documentId. Rate your confidence from 0 to 1 based on how well the evidence supports your answer. Set needsMoreContext to true if the evidence was insufficient.`;
|
|
4725
|
+
}
|
|
4726
|
+
|
|
4727
|
+
// src/query/reasoner.ts
|
|
4728
|
+
async function reason(subQuestion, intent, evidence, config) {
|
|
4729
|
+
const { generateObject, providerOptions } = config;
|
|
4730
|
+
const evidenceText = evidence.map((e, i) => {
|
|
4731
|
+
const sourceLabel = e.source === "chunk" ? `[chunk:${e.chunkId}]` : e.source === "document" ? `[doc:${e.documentId}]` : `[turn:${e.turnId}]`;
|
|
4732
|
+
return `Evidence ${i + 1} ${sourceLabel} (relevance: ${e.relevance.toFixed(2)}):
|
|
4733
|
+
${e.text}`;
|
|
4734
|
+
}).join("\n\n");
|
|
4735
|
+
const prompt = buildReasonPrompt(subQuestion, intent, evidenceText);
|
|
4736
|
+
const { object, usage } = await withRetry(
|
|
4737
|
+
() => generateObject({
|
|
4738
|
+
prompt,
|
|
4739
|
+
schema: SubAnswerSchema,
|
|
4740
|
+
maxTokens: 4096,
|
|
4741
|
+
providerOptions
|
|
4742
|
+
})
|
|
4743
|
+
);
|
|
4744
|
+
return { subAnswer: object, usage };
|
|
4745
|
+
}
|
|
4746
|
+
|
|
4747
|
+
// src/prompts/query/verify.ts
|
|
4748
|
+
function buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson) {
|
|
4749
|
+
return `You are a verification agent for an insurance document intelligence system. Your job is to check that answers are accurate, grounded, and complete.
|
|
4750
|
+
|
|
4751
|
+
ORIGINAL QUESTION:
|
|
4752
|
+
${originalQuestion}
|
|
4753
|
+
|
|
4754
|
+
SUB-ANSWERS:
|
|
4755
|
+
${subAnswersJson}
|
|
4756
|
+
|
|
4757
|
+
AVAILABLE EVIDENCE:
|
|
4758
|
+
${evidenceJson}
|
|
4759
|
+
|
|
4760
|
+
CHECK EACH SUB-ANSWER FOR:
|
|
4761
|
+
|
|
4762
|
+
1. GROUNDING: Every factual claim must be supported by a citation that references actual evidence. Flag any claim that:
|
|
4763
|
+
- Has no citation
|
|
4764
|
+
- Cites a source that doesn't actually contain the claimed information
|
|
4765
|
+
- Extrapolates beyond what the evidence states
|
|
4766
|
+
|
|
4767
|
+
2. CONSISTENCY: Sub-answers should not contradict each other. Flag any contradictions, noting which sub-answers conflict and what the discrepancy is.
|
|
4768
|
+
|
|
4769
|
+
3. COMPLETENESS: Did each sub-question get an adequate answer? Flag any sub-question where:
|
|
4770
|
+
- The answer is vague or hedged when the evidence supports a specific answer
|
|
4771
|
+
- Important details from the evidence were omitted
|
|
4772
|
+
- The confidence rating seems miscalibrated (high confidence with weak evidence, or low confidence with strong evidence)
|
|
4773
|
+
|
|
4774
|
+
RESPOND WITH:
|
|
4775
|
+
- approved: true only if ALL sub-answers pass all three checks
|
|
4776
|
+
- issues: list every specific issue found (empty array if approved)
|
|
4777
|
+
- retrySubQuestions: sub-questions that need re-retrieval or re-reasoning (only if not approved)`;
|
|
4778
|
+
}
|
|
4779
|
+
|
|
4780
|
+
// src/query/verifier.ts
|
|
4781
|
+
async function verify(originalQuestion, subAnswers, allEvidence, config) {
|
|
4782
|
+
const { generateObject, providerOptions } = config;
|
|
4783
|
+
const subAnswersJson = JSON.stringify(
|
|
4784
|
+
subAnswers.map((sa) => ({
|
|
4785
|
+
subQuestion: sa.subQuestion,
|
|
4786
|
+
answer: sa.answer,
|
|
4787
|
+
citations: sa.citations,
|
|
4788
|
+
confidence: sa.confidence,
|
|
4789
|
+
needsMoreContext: sa.needsMoreContext
|
|
4790
|
+
})),
|
|
4791
|
+
null,
|
|
4792
|
+
2
|
|
4793
|
+
);
|
|
4794
|
+
const evidenceJson = JSON.stringify(
|
|
4795
|
+
allEvidence.map((e) => ({
|
|
4796
|
+
source: e.source,
|
|
4797
|
+
id: e.chunkId ?? e.documentId ?? e.turnId,
|
|
4798
|
+
text: e.text.slice(0, 500),
|
|
4799
|
+
// Truncate for context efficiency
|
|
4800
|
+
relevance: e.relevance
|
|
4801
|
+
})),
|
|
4802
|
+
null,
|
|
4803
|
+
2
|
|
4804
|
+
);
|
|
4805
|
+
const prompt = buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson);
|
|
4806
|
+
const { object, usage } = await withRetry(
|
|
4807
|
+
() => generateObject({
|
|
4808
|
+
prompt,
|
|
4809
|
+
schema: VerifyResultSchema,
|
|
4810
|
+
maxTokens: 2048,
|
|
4811
|
+
providerOptions
|
|
4812
|
+
})
|
|
4813
|
+
);
|
|
4814
|
+
return { result: object, usage };
|
|
4815
|
+
}
|
|
4816
|
+
|
|
4817
|
+
// src/query/coordinator.ts
|
|
4818
|
+
function createQueryAgent(config) {
|
|
4819
|
+
const {
|
|
4820
|
+
generateText,
|
|
4821
|
+
generateObject,
|
|
4822
|
+
documentStore,
|
|
4823
|
+
memoryStore,
|
|
4824
|
+
concurrency = 3,
|
|
4825
|
+
maxVerifyRounds = 1,
|
|
4826
|
+
retrievalLimit = 10,
|
|
4827
|
+
onTokenUsage,
|
|
4828
|
+
onProgress,
|
|
4829
|
+
log,
|
|
4830
|
+
providerOptions
|
|
4831
|
+
} = config;
|
|
4832
|
+
const limit = pLimit(concurrency);
|
|
4833
|
+
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4834
|
+
function trackUsage(usage) {
|
|
4835
|
+
if (usage) {
|
|
4836
|
+
totalUsage.inputTokens += usage.inputTokens;
|
|
4837
|
+
totalUsage.outputTokens += usage.outputTokens;
|
|
4838
|
+
onTokenUsage?.(usage);
|
|
4839
|
+
}
|
|
4840
|
+
}
|
|
4841
|
+
async function query(input) {
|
|
4842
|
+
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4843
|
+
const { question, conversationId, context } = input;
|
|
4844
|
+
onProgress?.("Classifying query...");
|
|
4845
|
+
const classification = await classify(question, conversationId);
|
|
4846
|
+
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
4847
|
+
const retrieverConfig = {
|
|
4848
|
+
documentStore,
|
|
4849
|
+
memoryStore,
|
|
4850
|
+
retrievalLimit,
|
|
4851
|
+
log
|
|
4852
|
+
};
|
|
4853
|
+
const retrievalResults = await Promise.all(
|
|
4854
|
+
classification.subQuestions.map(
|
|
4855
|
+
(sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
|
|
4856
|
+
)
|
|
4857
|
+
);
|
|
4858
|
+
const allEvidence = retrievalResults.flatMap((r) => r.evidence);
|
|
4859
|
+
onProgress?.("Reasoning over evidence...");
|
|
4860
|
+
const reasonerConfig = { generateObject, providerOptions };
|
|
4861
|
+
let subAnswers = await Promise.all(
|
|
4862
|
+
classification.subQuestions.map(
|
|
4863
|
+
(sq, i) => limit(async () => {
|
|
4864
|
+
const { subAnswer, usage } = await reason(
|
|
4865
|
+
sq.question,
|
|
4866
|
+
sq.intent,
|
|
4867
|
+
retrievalResults[i].evidence,
|
|
4868
|
+
reasonerConfig
|
|
4869
|
+
);
|
|
4870
|
+
trackUsage(usage);
|
|
4871
|
+
return subAnswer;
|
|
4872
|
+
})
|
|
4873
|
+
)
|
|
4874
|
+
);
|
|
4875
|
+
onProgress?.("Verifying answer grounding...");
|
|
4876
|
+
const verifierConfig = { generateObject, providerOptions };
|
|
4877
|
+
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
4878
|
+
const { result: verifyResult, usage } = await verify(
|
|
4879
|
+
question,
|
|
4880
|
+
subAnswers,
|
|
4881
|
+
allEvidence,
|
|
4882
|
+
verifierConfig
|
|
4883
|
+
);
|
|
4884
|
+
trackUsage(usage);
|
|
4885
|
+
if (verifyResult.approved) {
|
|
4886
|
+
onProgress?.("Verification passed.");
|
|
4887
|
+
break;
|
|
4888
|
+
}
|
|
4889
|
+
onProgress?.(`Verification found ${verifyResult.issues.length} issue(s), round ${round + 1}/${maxVerifyRounds}`);
|
|
4890
|
+
await log?.(`Verify issues: ${verifyResult.issues.join("; ")}`);
|
|
4891
|
+
if (verifyResult.retrySubQuestions?.length) {
|
|
4892
|
+
const retryQuestions = classification.subQuestions.filter(
|
|
4893
|
+
(sq) => verifyResult.retrySubQuestions.includes(sq.question)
|
|
4894
|
+
);
|
|
4895
|
+
if (retryQuestions.length > 0) {
|
|
4896
|
+
const retryRetrievals = await Promise.all(
|
|
4897
|
+
retryQuestions.map(
|
|
4898
|
+
(sq) => limit(
|
|
4899
|
+
() => retrieve(sq, conversationId, {
|
|
4900
|
+
...retrieverConfig,
|
|
4901
|
+
retrievalLimit: retrievalLimit * 2
|
|
4902
|
+
// Broader retrieval on retry
|
|
4903
|
+
})
|
|
4904
|
+
)
|
|
4905
|
+
)
|
|
4906
|
+
);
|
|
4907
|
+
for (const r of retryRetrievals) {
|
|
4908
|
+
allEvidence.push(...r.evidence);
|
|
4909
|
+
}
|
|
4910
|
+
const retrySubAnswers = await Promise.all(
|
|
4911
|
+
retryQuestions.map(
|
|
4912
|
+
(sq, i) => limit(async () => {
|
|
4913
|
+
const { subAnswer, usage: u } = await reason(
|
|
4914
|
+
sq.question,
|
|
4915
|
+
sq.intent,
|
|
4916
|
+
retryRetrievals[i].evidence,
|
|
4917
|
+
reasonerConfig
|
|
4918
|
+
);
|
|
4919
|
+
trackUsage(u);
|
|
4920
|
+
return subAnswer;
|
|
4921
|
+
})
|
|
4922
|
+
)
|
|
4923
|
+
);
|
|
4924
|
+
const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
|
|
4925
|
+
subAnswers = subAnswers.map((sa) => {
|
|
4926
|
+
if (retryQSet.has(sa.subQuestion)) {
|
|
4927
|
+
const replacement = retrySubAnswers.find((r) => r.subQuestion === sa.subQuestion);
|
|
4928
|
+
return replacement ?? sa;
|
|
4929
|
+
}
|
|
4930
|
+
return sa;
|
|
4931
|
+
});
|
|
4932
|
+
}
|
|
4933
|
+
}
|
|
4934
|
+
}
|
|
4935
|
+
onProgress?.("Composing final answer...");
|
|
4936
|
+
const queryResult = await respond(
|
|
4937
|
+
question,
|
|
4938
|
+
subAnswers,
|
|
4939
|
+
classification,
|
|
4940
|
+
context?.platform
|
|
4941
|
+
);
|
|
4942
|
+
if (conversationId) {
|
|
4943
|
+
try {
|
|
4944
|
+
await memoryStore.addTurn({
|
|
4945
|
+
id: `turn-${Date.now()}-q`,
|
|
4946
|
+
conversationId,
|
|
4947
|
+
role: "user",
|
|
4948
|
+
content: question,
|
|
4949
|
+
timestamp: Date.now()
|
|
4950
|
+
});
|
|
4951
|
+
await memoryStore.addTurn({
|
|
4952
|
+
id: `turn-${Date.now()}-a`,
|
|
4953
|
+
conversationId,
|
|
4954
|
+
role: "assistant",
|
|
4955
|
+
content: queryResult.answer,
|
|
4956
|
+
timestamp: Date.now()
|
|
4957
|
+
});
|
|
4958
|
+
} catch (e) {
|
|
4959
|
+
await log?.(`Failed to store conversation turn: ${e}`);
|
|
4960
|
+
}
|
|
4961
|
+
}
|
|
4962
|
+
return { ...queryResult, tokenUsage: totalUsage };
|
|
4963
|
+
}
|
|
4964
|
+
async function classify(question, conversationId) {
|
|
4965
|
+
let conversationContext;
|
|
4966
|
+
if (conversationId) {
|
|
4967
|
+
try {
|
|
4968
|
+
const history = await memoryStore.getHistory(conversationId, { limit: 5 });
|
|
4969
|
+
if (history.length > 0) {
|
|
4970
|
+
conversationContext = history.map((t) => `[${t.role}]: ${t.content}`).join("\n");
|
|
4971
|
+
}
|
|
4972
|
+
} catch {
|
|
4973
|
+
}
|
|
4974
|
+
}
|
|
4975
|
+
const prompt = buildQueryClassifyPrompt(question, conversationContext);
|
|
4976
|
+
const { object, usage } = await withRetry(
|
|
4977
|
+
() => generateObject({
|
|
4978
|
+
prompt,
|
|
4979
|
+
schema: QueryClassifyResultSchema,
|
|
4980
|
+
maxTokens: 2048,
|
|
4981
|
+
providerOptions
|
|
4982
|
+
})
|
|
4983
|
+
);
|
|
4984
|
+
trackUsage(usage);
|
|
4985
|
+
return object;
|
|
4986
|
+
}
|
|
4987
|
+
async function respond(originalQuestion, subAnswers, classification, platform) {
|
|
4988
|
+
const subAnswersJson = JSON.stringify(
|
|
4989
|
+
subAnswers.map((sa) => ({
|
|
4990
|
+
subQuestion: sa.subQuestion,
|
|
4991
|
+
answer: sa.answer,
|
|
4992
|
+
citations: sa.citations,
|
|
4993
|
+
confidence: sa.confidence,
|
|
4994
|
+
needsMoreContext: sa.needsMoreContext
|
|
4995
|
+
})),
|
|
4996
|
+
null,
|
|
4997
|
+
2
|
|
4998
|
+
);
|
|
4999
|
+
const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
|
|
5000
|
+
const { object, usage } = await withRetry(
|
|
5001
|
+
() => generateObject({
|
|
5002
|
+
prompt,
|
|
5003
|
+
schema: QueryResultSchema,
|
|
5004
|
+
maxTokens: 4096,
|
|
5005
|
+
providerOptions
|
|
5006
|
+
})
|
|
5007
|
+
);
|
|
5008
|
+
trackUsage(usage);
|
|
5009
|
+
const result = object;
|
|
5010
|
+
result.intent = classification.intent;
|
|
5011
|
+
return result;
|
|
5012
|
+
}
|
|
5013
|
+
return { query };
|
|
5014
|
+
}
|
|
5015
|
+
|
|
3558
5016
|
// src/prompts/intent.ts
|
|
3559
5017
|
function buildClassifyMessagePrompt(platform) {
|
|
3560
5018
|
const platformFields = {
|
|
@@ -3680,9 +5138,16 @@ export {
|
|
|
3680
5138
|
AGENT_TOOLS,
|
|
3681
5139
|
APPLICATION_CLASSIFY_PROMPT,
|
|
3682
5140
|
AUDIT_TYPES,
|
|
5141
|
+
AcroFormMappingSchema,
|
|
3683
5142
|
AddressSchema,
|
|
3684
5143
|
AdmittedStatusSchema,
|
|
5144
|
+
AnswerParsingResultSchema,
|
|
5145
|
+
ApplicationClassifyResultSchema,
|
|
5146
|
+
ApplicationFieldSchema,
|
|
5147
|
+
ApplicationStateSchema,
|
|
3685
5148
|
AuditTypeSchema,
|
|
5149
|
+
AutoFillMatchSchema,
|
|
5150
|
+
AutoFillResultSchema,
|
|
3686
5151
|
BOAT_TYPES,
|
|
3687
5152
|
BindingAuthoritySchema,
|
|
3688
5153
|
BoatTypeSchema,
|
|
@@ -3696,6 +5161,7 @@ export {
|
|
|
3696
5161
|
COVERAGE_FORMS,
|
|
3697
5162
|
COVERAGE_TRIGGERS,
|
|
3698
5163
|
ChunkTypeSchema,
|
|
5164
|
+
CitationSchema,
|
|
3699
5165
|
ClaimRecordSchema,
|
|
3700
5166
|
ClaimStatusSchema,
|
|
3701
5167
|
ClassificationCodeSchema,
|
|
@@ -3738,12 +5204,16 @@ export {
|
|
|
3738
5204
|
EnrichedSubjectivitySchema,
|
|
3739
5205
|
EnrichedUnderwritingConditionSchema,
|
|
3740
5206
|
EntityTypeSchema,
|
|
5207
|
+
EvidenceItemSchema,
|
|
3741
5208
|
ExclusionSchema,
|
|
3742
5209
|
ExperienceModSchema,
|
|
3743
5210
|
ExtendedReportingPeriodSchema,
|
|
3744
5211
|
FLOOD_ZONES,
|
|
3745
5212
|
FOUNDATION_TYPES,
|
|
3746
5213
|
FarmRanchDeclarationsSchema,
|
|
5214
|
+
FieldExtractionResultSchema,
|
|
5215
|
+
FieldTypeSchema,
|
|
5216
|
+
FlatPdfPlacementSchema,
|
|
3747
5217
|
FloodDeclarationsSchema,
|
|
3748
5218
|
FloodZoneSchema,
|
|
3749
5219
|
FormReferenceSchema,
|
|
@@ -3762,6 +5232,9 @@ export {
|
|
|
3762
5232
|
LimitScheduleSchema,
|
|
3763
5233
|
LimitTypeSchema,
|
|
3764
5234
|
LocationPremiumSchema,
|
|
5235
|
+
LookupFillResultSchema,
|
|
5236
|
+
LookupFillSchema,
|
|
5237
|
+
LookupRequestSchema,
|
|
3765
5238
|
LossSettlementSchema,
|
|
3766
5239
|
LossSummarySchema,
|
|
3767
5240
|
NamedInsuredSchema,
|
|
@@ -3771,6 +5244,7 @@ export {
|
|
|
3771
5244
|
POLICY_SECTION_TYPES,
|
|
3772
5245
|
POLICY_TERM_TYPES,
|
|
3773
5246
|
POLICY_TYPES,
|
|
5247
|
+
ParsedAnswerSchema,
|
|
3774
5248
|
PaymentInstallmentSchema,
|
|
3775
5249
|
PaymentPlanSchema,
|
|
3776
5250
|
PersonalArticlesDeclarationsSchema,
|
|
@@ -3790,6 +5264,10 @@ export {
|
|
|
3790
5264
|
ProducerInfoSchema,
|
|
3791
5265
|
ProfessionalLiabilityDeclarationsSchema,
|
|
3792
5266
|
QUOTE_SECTION_TYPES,
|
|
5267
|
+
QueryClassifyResultSchema,
|
|
5268
|
+
QueryIntentSchema,
|
|
5269
|
+
QueryResultSchema,
|
|
5270
|
+
QuestionBatchResultSchema,
|
|
3793
5271
|
QuoteDocumentSchema,
|
|
3794
5272
|
QuoteSectionTypeSchema,
|
|
3795
5273
|
RATING_BASIS_TYPES,
|
|
@@ -3799,12 +5277,16 @@ export {
|
|
|
3799
5277
|
RatingBasisSchema,
|
|
3800
5278
|
RatingBasisTypeSchema,
|
|
3801
5279
|
RecreationalVehicleDeclarationsSchema,
|
|
5280
|
+
ReplyIntentSchema,
|
|
5281
|
+
RetrievalResultSchema,
|
|
3802
5282
|
RoofTypeSchema,
|
|
3803
5283
|
SCHEDULED_ITEM_CATEGORIES,
|
|
3804
5284
|
SUBJECTIVITY_CATEGORIES,
|
|
3805
5285
|
ScheduledItemCategorySchema,
|
|
3806
5286
|
SectionSchema,
|
|
3807
5287
|
SharedLimitSchema,
|
|
5288
|
+
SubAnswerSchema,
|
|
5289
|
+
SubQuestionSchema,
|
|
3808
5290
|
SubjectivityCategorySchema,
|
|
3809
5291
|
SubjectivitySchema,
|
|
3810
5292
|
SublimitSchema,
|
|
@@ -3821,6 +5303,7 @@ export {
|
|
|
3821
5303
|
ValuationMethodSchema,
|
|
3822
5304
|
VehicleCoverageSchema,
|
|
3823
5305
|
VehicleCoverageTypeSchema,
|
|
5306
|
+
VerifyResultSchema,
|
|
3824
5307
|
WatercraftDeclarationsSchema,
|
|
3825
5308
|
WorkersCompDeclarationsSchema,
|
|
3826
5309
|
buildAcroFormMappingPrompt,
|
|
@@ -3840,12 +5323,18 @@ export {
|
|
|
3840
5323
|
buildIdentityPrompt,
|
|
3841
5324
|
buildIntentPrompt,
|
|
3842
5325
|
buildLookupFillPrompt,
|
|
5326
|
+
buildQueryClassifyPrompt,
|
|
3843
5327
|
buildQuestionBatchPrompt,
|
|
3844
5328
|
buildQuotesPoliciesPrompt,
|
|
5329
|
+
buildReasonPrompt,
|
|
3845
5330
|
buildReplyIntentClassificationPrompt,
|
|
5331
|
+
buildRespondPrompt,
|
|
3846
5332
|
buildSafetyPrompt,
|
|
5333
|
+
buildVerifyPrompt,
|
|
3847
5334
|
chunkDocument,
|
|
5335
|
+
createApplicationPipeline,
|
|
3848
5336
|
createExtractor,
|
|
5337
|
+
createQueryAgent,
|
|
3849
5338
|
extractPageRange,
|
|
3850
5339
|
fillAcroForm,
|
|
3851
5340
|
getAcroFormFields,
|