@claritylabs/cl-sdk 0.3.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1471,6 +1471,206 @@ function assembleDocument(documentId, documentType, memory) {
1471
1471
  };
1472
1472
  }
1473
1473
 
1474
+ // src/prompts/coordinator/format.ts
1475
+ function buildFormatPrompt(entries) {
1476
+ const block = entries.map((e) => `===ENTRY ${e.id}===
1477
+ ${e.text}`).join("\n\n");
1478
+ return `You are a markdown formatting specialist for insurance document content. You will receive numbered content entries extracted from insurance policies, quotes, and endorsements. Your job is to clean up the formatting so every entry renders correctly as standard markdown.
1479
+
1480
+ ## Primary issues to fix
1481
+
1482
+ ### 1. Pipe-delimited data missing table syntax
1483
+ The most common issue. Content uses pipe characters as column separators but is missing the separator row required for markdown table rendering.
1484
+
1485
+ Before (broken \u2014 won't render as a table):
1486
+ COVERAGE | FORM # | LIMIT | DEDUCTIBLE
1487
+ Employee Theft | | $10,000 | $1,000
1488
+
1489
+ After (valid markdown table):
1490
+ | COVERAGE | FORM # | LIMIT | DEDUCTIBLE |
1491
+ | --- | --- | --- | --- |
1492
+ | Employee Theft | | $10,000 | $1,000 |
1493
+
1494
+ Rules for pipe tables:
1495
+ - Add leading and trailing pipes to every row
1496
+ - Add the separator row (| --- | --- |) after the header row
1497
+ - Every row must have the same number of pipe-separated columns as the header
1498
+ - Empty cells are fine \u2014 just keep the pipes: | | $10,000 |
1499
+
1500
+ ### 2. Sub-items indented within pipe tables
1501
+ Insurance schedules often have indented sub-items that belong to the previous coverage line. These break table column counts.
1502
+
1503
+ Before (broken):
1504
+ COVERAGE | LIMIT | DEDUCTIBLE
1505
+ Causes Of Loss - Equipment Breakdown | PR650END
1506
+ Described Premises Limit | | $350,804 |
1507
+ Diagnostic Equipment | | $100,000 |
1508
+ Deductible Type - Business Income: Waiting Period - Hours
1509
+ Waiting Period (Hours): 24
1510
+
1511
+ After: Pull sub-items out of the table. End the table before the sub-items, show them as an indented list, then start a new table if tabular data resumes:
1512
+ | COVERAGE | LIMIT | DEDUCTIBLE |
1513
+ | --- | --- | --- |
1514
+ | Causes Of Loss - Equipment Breakdown | PR650END | |
1515
+
1516
+ - Described Premises Limit: $350,804
1517
+ - Diagnostic Equipment: $100,000
1518
+ - Deductible Type - Business Income: Waiting Period - Hours
1519
+ - Waiting Period (Hours): 24
1520
+
1521
+ ### 3. Space-aligned tables
1522
+ Declarations often align columns with spaces instead of pipes. These render as plain monospace text and lose structure.
1523
+
1524
+ Before:
1525
+ Coverage Limit of Liability Retention
1526
+ A. Network Security Liability $500,000 $10,000
1527
+ B. Privacy Liability $500,000 $10,000
1528
+
1529
+ After (convert to proper markdown table):
1530
+ | Coverage | Limit of Liability | Retention |
1531
+ | --- | --- | --- |
1532
+ | A. Network Security Liability | $500,000 | $10,000 |
1533
+ | B. Privacy Liability | $500,000 | $10,000 |
1534
+
1535
+ ### 4. Mixed table/prose content
1536
+ A single entry often contains prose paragraphs followed by tabular data followed by more prose. Handle each segment independently \u2014 don't try to force everything into one table.
1537
+
1538
+ ### 5. General markdown cleanup
1539
+ - **Line spacing**: Remove excessive blank lines (3+ consecutive newlines \u2192 2). Ensure one blank line before and after tables and headings.
1540
+ - **Trailing whitespace**: Remove trailing spaces on all lines.
1541
+ - **Broken lists**: Ensure list items use consistent markers (-, *, or 1.) with proper nesting indentation.
1542
+ - **Orphaned formatting**: Close any unclosed bold (**), italic (*), or code (\`) markers.
1543
+ - **Heading levels**: Ensure heading markers (##) have a space after the hashes.
1544
+
1545
+ ## Rules
1546
+ - Do NOT change the meaning or substance of any content. Only fix formatting.
1547
+ - Do NOT add new information, headers, or commentary.
1548
+ - Do NOT wrap entries in code fences.
1549
+ - Preserve all dollar amounts, dates, policy numbers, form numbers, and technical terms exactly as they appear.
1550
+ - If an entry is already well-formatted, return it unchanged.
1551
+ - When in doubt about whether something is a table, prefer table formatting for structured data with multiple columns.
1552
+
1553
+ Return your output in this exact format \u2014 one block per entry, in the same order:
1554
+
1555
+ ===ENTRY 0===
1556
+ (cleaned content for entry 0)
1557
+
1558
+ ===ENTRY 1===
1559
+ (cleaned content for entry 1)
1560
+
1561
+ ...and so on for each entry.
1562
+
1563
+ Here are the entries to format:
1564
+
1565
+ ${block}`;
1566
+ }
1567
+
1568
+ // src/extraction/formatter.ts
1569
+ function collectContentFields(doc) {
1570
+ const entries = [];
1571
+ let id = 0;
1572
+ function add(path, text) {
1573
+ if (text && text.length > 20) {
1574
+ entries.push({ id: id++, path, text });
1575
+ }
1576
+ }
1577
+ add("summary", doc.summary);
1578
+ if (doc.sections) {
1579
+ for (let i = 0; i < doc.sections.length; i++) {
1580
+ const s = doc.sections[i];
1581
+ add(`sections[${i}].content`, s.content);
1582
+ if (s.subsections) {
1583
+ for (let j = 0; j < s.subsections.length; j++) {
1584
+ add(`sections[${i}].subsections[${j}].content`, s.subsections[j].content);
1585
+ }
1586
+ }
1587
+ }
1588
+ }
1589
+ if (doc.endorsements) {
1590
+ for (let i = 0; i < doc.endorsements.length; i++) {
1591
+ add(`endorsements[${i}].content`, doc.endorsements[i].content);
1592
+ }
1593
+ }
1594
+ if (doc.exclusions) {
1595
+ for (let i = 0; i < doc.exclusions.length; i++) {
1596
+ add(`exclusions[${i}].content`, doc.exclusions[i].content);
1597
+ }
1598
+ }
1599
+ if (doc.conditions) {
1600
+ for (let i = 0; i < doc.conditions.length; i++) {
1601
+ add(`conditions[${i}].content`, doc.conditions[i].content);
1602
+ }
1603
+ }
1604
+ return entries;
1605
+ }
1606
+ function parseFormatResponse(response) {
1607
+ const results = /* @__PURE__ */ new Map();
1608
+ const parts = response.split(/===ENTRY (\d+)===/);
1609
+ for (let i = 1; i < parts.length; i += 2) {
1610
+ const entryId = parseInt(parts[i], 10);
1611
+ const content = parts[i + 1]?.trim();
1612
+ if (!isNaN(entryId) && content !== void 0) {
1613
+ results.set(entryId, content);
1614
+ }
1615
+ }
1616
+ return results;
1617
+ }
1618
+ function applyFormattedContent(doc, entries, formatted) {
1619
+ for (const entry of entries) {
1620
+ const cleaned = formatted.get(entry.id);
1621
+ if (!cleaned) continue;
1622
+ const segments = entry.path.match(/^(\w+)(?:\[(\d+)\])?(?:\.(\w+)(?:\[(\d+)\])?(?:\.(\w+))?)?$/);
1623
+ if (!segments) continue;
1624
+ const [, field, idx1, sub1, idx2, sub2] = segments;
1625
+ if (!sub1) {
1626
+ doc[field] = cleaned;
1627
+ } else if (!sub2) {
1628
+ const arr = doc[field];
1629
+ if (arr && arr[Number(idx1)]) {
1630
+ arr[Number(idx1)][sub1] = cleaned;
1631
+ }
1632
+ } else {
1633
+ const arr = doc[field];
1634
+ if (arr && arr[Number(idx1)]) {
1635
+ const nested = arr[Number(idx1)][sub1];
1636
+ if (nested && nested[Number(idx2)]) {
1637
+ nested[Number(idx2)][sub2] = cleaned;
1638
+ }
1639
+ }
1640
+ }
1641
+ }
1642
+ }
1643
+ var MAX_ENTRIES_PER_BATCH = 20;
1644
+ async function formatDocumentContent(doc, generateText, options) {
1645
+ const entries = collectContentFields(doc);
1646
+ const totalUsage = { inputTokens: 0, outputTokens: 0 };
1647
+ if (entries.length === 0) {
1648
+ return { document: doc, usage: totalUsage };
1649
+ }
1650
+ options?.onProgress?.(`Formatting ${entries.length} content fields...`);
1651
+ const batches = [];
1652
+ for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
1653
+ batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
1654
+ }
1655
+ for (const batch of batches) {
1656
+ const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
1657
+ const result = await withRetry(
1658
+ () => generateText({
1659
+ prompt,
1660
+ maxTokens: 16384,
1661
+ providerOptions: options?.providerOptions
1662
+ })
1663
+ );
1664
+ if (result.usage) {
1665
+ totalUsage.inputTokens += result.usage.inputTokens;
1666
+ totalUsage.outputTokens += result.usage.outputTokens;
1667
+ }
1668
+ const formatted = parseFormatResponse(result.text);
1669
+ applyFormattedContent(doc, batch, formatted);
1670
+ }
1671
+ return { document: doc, usage: totalUsage };
1672
+ }
1673
+
1474
1674
  // src/extraction/chunking.ts
1475
1675
  function chunkDocument(doc) {
1476
1676
  const chunks = [];
@@ -2966,8 +3166,14 @@ function createExtractor(config) {
2966
3166
  }
2967
3167
  onProgress?.("Assembling document...");
2968
3168
  const document = assembleDocument(id, documentType, memory);
2969
- const chunks = chunkDocument(document);
2970
- return { document, chunks, tokenUsage: totalUsage };
3169
+ onProgress?.("Formatting extracted content...");
3170
+ const formatResult = await formatDocumentContent(document, generateText, {
3171
+ providerOptions,
3172
+ onProgress
3173
+ });
3174
+ trackUsage(formatResult.usage);
3175
+ const chunks = chunkDocument(formatResult.document);
3176
+ return { document: formatResult.document, chunks, tokenUsage: totalUsage };
2971
3177
  }
2972
3178
  return { extract };
2973
3179
  }
@@ -3185,6 +3391,129 @@ Respond with JSON only:
3185
3391
  "applicationType": string | null // e.g. "General Liability", "Professional Liability", "Commercial Property", "Workers Compensation", "ACORD 125", etc.
3186
3392
  }`;
3187
3393
 
3394
+ // src/schemas/application.ts
3395
+ import { z as z31 } from "zod";
3396
+ var FieldTypeSchema = z31.enum([
3397
+ "text",
3398
+ "numeric",
3399
+ "currency",
3400
+ "date",
3401
+ "yes_no",
3402
+ "table",
3403
+ "declaration"
3404
+ ]);
3405
+ var ApplicationFieldSchema = z31.object({
3406
+ id: z31.string(),
3407
+ label: z31.string(),
3408
+ section: z31.string(),
3409
+ fieldType: FieldTypeSchema,
3410
+ required: z31.boolean(),
3411
+ options: z31.array(z31.string()).optional(),
3412
+ columns: z31.array(z31.string()).optional(),
3413
+ requiresExplanationIfYes: z31.boolean().optional(),
3414
+ condition: z31.object({
3415
+ dependsOn: z31.string(),
3416
+ whenValue: z31.string()
3417
+ }).optional(),
3418
+ value: z31.string().optional(),
3419
+ source: z31.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
3420
+ confidence: z31.enum(["confirmed", "high", "medium", "low"]).optional()
3421
+ });
3422
+ var ApplicationClassifyResultSchema = z31.object({
3423
+ isApplication: z31.boolean(),
3424
+ confidence: z31.number().min(0).max(1),
3425
+ applicationType: z31.string().nullable()
3426
+ });
3427
+ var FieldExtractionResultSchema = z31.object({
3428
+ fields: z31.array(ApplicationFieldSchema)
3429
+ });
3430
+ var AutoFillMatchSchema = z31.object({
3431
+ fieldId: z31.string(),
3432
+ value: z31.string(),
3433
+ confidence: z31.enum(["confirmed"]),
3434
+ contextKey: z31.string()
3435
+ });
3436
+ var AutoFillResultSchema = z31.object({
3437
+ matches: z31.array(AutoFillMatchSchema)
3438
+ });
3439
+ var QuestionBatchResultSchema = z31.object({
3440
+ batches: z31.array(z31.array(z31.string()).describe("Array of field IDs in this batch"))
3441
+ });
3442
+ var LookupRequestSchema = z31.object({
3443
+ type: z31.string().describe("Type of lookup: 'records', 'website', 'policy'"),
3444
+ description: z31.string(),
3445
+ url: z31.string().optional(),
3446
+ targetFieldIds: z31.array(z31.string())
3447
+ });
3448
+ var ReplyIntentSchema = z31.object({
3449
+ primaryIntent: z31.enum(["answers_only", "question", "lookup_request", "mixed"]),
3450
+ hasAnswers: z31.boolean(),
3451
+ questionText: z31.string().optional(),
3452
+ questionFieldIds: z31.array(z31.string()).optional(),
3453
+ lookupRequests: z31.array(LookupRequestSchema).optional()
3454
+ });
3455
+ var ParsedAnswerSchema = z31.object({
3456
+ fieldId: z31.string(),
3457
+ value: z31.string(),
3458
+ explanation: z31.string().optional()
3459
+ });
3460
+ var AnswerParsingResultSchema = z31.object({
3461
+ answers: z31.array(ParsedAnswerSchema),
3462
+ unanswered: z31.array(z31.string()).describe("Field IDs that were not answered")
3463
+ });
3464
+ var LookupFillSchema = z31.object({
3465
+ fieldId: z31.string(),
3466
+ value: z31.string(),
3467
+ source: z31.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
3468
+ });
3469
+ var LookupFillResultSchema = z31.object({
3470
+ fills: z31.array(LookupFillSchema),
3471
+ unfillable: z31.array(z31.string()),
3472
+ explanation: z31.string().optional()
3473
+ });
3474
+ var FlatPdfPlacementSchema = z31.object({
3475
+ fieldId: z31.string(),
3476
+ page: z31.number(),
3477
+ x: z31.number().describe("Percentage from left edge (0-100)"),
3478
+ y: z31.number().describe("Percentage from top edge (0-100)"),
3479
+ text: z31.string(),
3480
+ fontSize: z31.number().optional(),
3481
+ isCheckmark: z31.boolean().optional()
3482
+ });
3483
+ var AcroFormMappingSchema = z31.object({
3484
+ fieldId: z31.string(),
3485
+ acroFormName: z31.string(),
3486
+ value: z31.string()
3487
+ });
3488
+ var ApplicationStateSchema = z31.object({
3489
+ id: z31.string(),
3490
+ pdfBase64: z31.string().optional().describe("Original PDF, omitted after extraction"),
3491
+ title: z31.string().optional(),
3492
+ applicationType: z31.string().nullable().optional(),
3493
+ fields: z31.array(ApplicationFieldSchema),
3494
+ batches: z31.array(z31.array(z31.string())).optional(),
3495
+ currentBatchIndex: z31.number().default(0),
3496
+ status: z31.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
3497
+ createdAt: z31.number(),
3498
+ updatedAt: z31.number()
3499
+ });
3500
+
3501
+ // src/application/agents/classifier.ts
3502
+ async function classifyApplication(pdfContent, generateObject, providerOptions) {
3503
+ const { object, usage } = await withRetry(
3504
+ () => generateObject({
3505
+ prompt: `${APPLICATION_CLASSIFY_PROMPT}
3506
+
3507
+ Analyze the following document content:
3508
+ ${pdfContent}`,
3509
+ schema: ApplicationClassifyResultSchema,
3510
+ maxTokens: 512,
3511
+ providerOptions
3512
+ })
3513
+ );
3514
+ return { result: object, usage };
3515
+ }
3516
+
3188
3517
  // src/prompts/application/field-extraction.ts
3189
3518
  function buildFieldExtractionPrompt() {
3190
3519
  return `Extract all fillable fields from this insurance application PDF as a JSON array. Be concise \u2014 use short IDs and minimal keys.
@@ -3217,6 +3546,24 @@ Example:
3217
3546
  Extract ALL fields. Respond with ONLY the JSON array, no other text.`;
3218
3547
  }
3219
3548
 
3549
+ // src/application/agents/field-extractor.ts
3550
+ async function extractFields(pdfContent, generateObject, providerOptions) {
3551
+ const prompt = `${buildFieldExtractionPrompt()}
3552
+
3553
+ Extract fields from this application:
3554
+ ${pdfContent}`;
3555
+ const { object, usage } = await withRetry(
3556
+ () => generateObject({
3557
+ prompt,
3558
+ schema: FieldExtractionResultSchema,
3559
+ maxTokens: 8192,
3560
+ providerOptions
3561
+ })
3562
+ );
3563
+ const result = object;
3564
+ return { fields: result.fields, usage };
3565
+ }
3566
+
3220
3567
  // src/prompts/application/auto-fill.ts
3221
3568
  function buildAutoFillPrompt(fields, orgContext) {
3222
3569
  const fieldList = fields.map((f) => `- ${f.id}: "${f.label}" (${f.fieldType}, section: ${f.section})`).join("\n");
@@ -3246,6 +3593,39 @@ Respond with JSON only:
3246
3593
  Only include fields you can confidently fill. Do not guess or fabricate values.`;
3247
3594
  }
3248
3595
 
3596
+ // src/application/agents/auto-filler.ts
3597
+ async function autoFillFromContext(fields, orgContext, generateObject, providerOptions) {
3598
+ const fieldSummaries = fields.map((f) => ({
3599
+ id: f.id,
3600
+ label: f.label,
3601
+ fieldType: f.fieldType,
3602
+ section: f.section
3603
+ }));
3604
+ const prompt = buildAutoFillPrompt(fieldSummaries, orgContext);
3605
+ const { object, usage } = await withRetry(
3606
+ () => generateObject({
3607
+ prompt,
3608
+ schema: AutoFillResultSchema,
3609
+ maxTokens: 4096,
3610
+ providerOptions
3611
+ })
3612
+ );
3613
+ return { result: object, usage };
3614
+ }
3615
+ async function backfillFromPriorAnswers(fields, backfillProvider) {
3616
+ const unfilled = fields.filter((f) => !f.value);
3617
+ if (unfilled.length === 0) return [];
3618
+ return backfillProvider.searchPriorAnswers(
3619
+ unfilled.map((f) => ({
3620
+ id: f.id,
3621
+ label: f.label,
3622
+ section: f.section,
3623
+ fieldType: f.fieldType
3624
+ })),
3625
+ { limit: unfilled.length * 2 }
3626
+ );
3627
+ }
3628
+
3249
3629
  // src/prompts/application/question-batch.ts
3250
3630
  function buildQuestionBatchPrompt(unfilledFields) {
3251
3631
  const fieldList = unfilledFields.map(
@@ -3280,120 +3660,27 @@ Respond with JSON only:
3280
3660
  }`;
3281
3661
  }
3282
3662
 
3283
- // src/prompts/application/answer-parsing.ts
3284
- function buildAnswerParsingPrompt(questions, emailBody) {
3285
- const questionList = questions.map(
3286
- (q, i) => `${i + 1}. ${q.id}: "${q.label ?? q.text}" (type: ${q.fieldType})`
3287
- ).join("\n");
3288
- return `You are parsing a user's email reply to extract answers for specific insurance application questions.
3289
-
3290
- QUESTIONS ASKED:
3291
- ${questionList}
3292
-
3293
- USER'S EMAIL REPLY:
3294
- ${emailBody}
3295
-
3296
- Extract answers for each question. Handle:
3297
- - Direct numbered answers (1. answer, 2. answer)
3298
- - Inline answers referencing the question
3299
- - Table data provided as lists or comma-separated values
3300
- - Yes/no answers with optional explanations
3301
- - Partial responses (some questions answered, others skipped)
3302
-
3303
- Respond with JSON only:
3304
- {
3305
- "answers": [
3306
- {
3307
- "fieldId": "company_name",
3308
- "value": "Acme Corp"
3309
- },
3310
- {
3311
- "fieldId": "prior_claims_decl",
3312
- "value": "yes",
3313
- "explanation": "One claim in 2024 for water damage, $15,000 paid"
3314
- }
3315
- ],
3316
- "unanswered": ["field_id_that_was_not_answered"]
3317
- }
3318
-
3319
- Only include answers you are confident about. If a response is ambiguous, include the field in "unanswered".`;
3320
- }
3321
-
3322
- // src/prompts/application/confirmation.ts
3323
- function buildConfirmationSummaryPrompt(fields, applicationTitle) {
3324
- const fieldList = fields.map((f) => {
3325
- const label = f.label ?? f.text ?? f.id;
3326
- const value = f.value ?? "(not provided)";
3327
- return `[${f.section}] ${label}: ${value}`;
3328
- }).join("\n");
3329
- return `Format the following insurance application answers into a clean, readable summary grouped by section. This will be sent as an email for the user to review and confirm.
3330
-
3331
- APPLICATION: ${applicationTitle}
3332
-
3333
- FIELD VALUES:
3334
- ${fieldList}
3335
-
3336
- Format as a readable summary:
3337
- - Group by section with section headers
3338
- - Show each field as "Label: Value"
3339
- - For declarations, show the question and the yes/no answer plus any explanation
3340
- - Skip fields with no value unless they are required
3341
- - End with a note asking the user to reply "Looks good" to confirm, or describe any changes needed
3342
-
3343
- Respond with the formatted summary text only (no JSON wrapper). Use markdown formatting (bold headers, bullet points).`;
3344
- }
3345
-
3346
- // src/prompts/application/batch-email.ts
3347
- function buildBatchEmailGenerationPrompt(batchFields, batchIndex, totalBatches, appTitle, totalFieldCount, filledFieldCount, previousBatchSummary, companyName) {
3348
- const nonConditionalFields = batchFields.filter((f) => !f.condition);
3349
- const conditionalFields = batchFields.filter((f) => f.condition);
3350
- const fieldList = nonConditionalFields.map((f, i) => {
3351
- let line = `${i + 1}. id="${f.id}" label="${f.label}" type=${f.fieldType}`;
3352
- if (f.options) line += ` options=[${f.options.join(", ")}]`;
3353
- return line;
3354
- }).join("\n");
3355
- const conditionalNote = conditionalFields.length > 0 ? `
3356
-
3357
- CONDITIONAL FIELDS (DO NOT include in this email \u2014 they will be asked as follow-ups in a separate email after the parent is answered):
3358
- ${conditionalFields.map((f) => `- id="${f.id}" label="${f.label}" depends on ${f.condition.dependsOn} = "${f.condition.whenValue}"`).join("\n")}` : "";
3359
- const company = companyName ?? "the company";
3360
- const remainingFields = totalFieldCount - filledFieldCount;
3361
- const estMinutes = Math.max(1, Math.round(remainingFields * 0.5));
3362
- return `You are an internal risk management assistant helping your colleague fill out an insurance application for ${company}. You work FOR ${company} \u2014 you are NOT the insurer, broker, or any external party.
3363
-
3364
- APPLICATION: ${appTitle ?? "Insurance Application"}
3365
- COMPANY: ${company}
3366
- PROGRESS: ${filledFieldCount} of ${totalFieldCount} fields done, ~${remainingFields} remaining (~${estMinutes} min of questions left)
3367
- ${previousBatchSummary ? `
3368
- PREVIOUS ANSWERS RECEIVED:
3369
- ${previousBatchSummary}
3370
- ` : ""}
3371
- FIELDS TO ASK ABOUT:
3372
- ${fieldList}${conditionalNote}
3373
-
3374
- Rules:
3375
- - ${previousBatchSummary ? 'Start by acknowledging previous answers or auto-filled data. If fields were auto-filled, list each field with its value AND cite the specific source (e.g. "from your GL Policy #ABC123", "from vercel.com", "from your business context"). If a web lookup was done, name the URL that was checked. Ask them to reply with corrections if anything is wrong.' : "Start with a one-line intro."}
3376
- - Mention progress once using estimated time remaining. Don't mention section/batch numbers or field counts.
3377
- - Use "${company}" by name when referring to the company. Also fine: "we" or "our". Never "our company" or "the company".
3378
- - Ask questions plainly. No em-dashes for dramatic effect, no filler phrases like "need to nail down" or "let's dive into". Just ask.
3379
- - For yes/no questions, ask naturally in one sentence. Don't list "Yes / No" as options. Mention what you'll need if the answer triggers a follow-up (e.g. "If not, I'll need a brief explanation.").
3380
- - For fields with 2-3 options, mention them inline. 4+ options can be a short list.
3381
- - Group related fields (address, coverage limits) into single compound questions.
3382
- - Do NOT include conditional/follow-up fields. They will be sent separately.
3383
- - Number each question.
3384
- - Note expected format where relevant: dollar amounts for currency, MM/DD/YYYY for dates, column descriptions for tables.
3385
- - End with a short closing.
3386
- - Tone: professional, brief, matter-of-fact. Write like a busy coworker, not a chatbot. No flourishes, no em-dashes between clauses, no editorializing about the questions.
3387
-
3388
- NEVER:
3389
- - Sound like a salesperson or customer service agent
3390
- - Use em-dashes for emphasis or dramatic pacing
3391
- - Editorialize ("these two should wrap up this section", "just a couple more")
3392
- - List "Yes / No / N/A" as bullet options
3393
- - Include conditional follow-up questions
3394
- - Mention section numbers, batch numbers, or field counts
3395
-
3396
- Output the email body text ONLY. No subject line, no JSON. Use markdown for numbered lists.`;
3663
+ // src/application/agents/batcher.ts
3664
+ async function batchQuestions(unfilledFields, generateObject, providerOptions) {
3665
+ const fieldSummaries = unfilledFields.map((f) => ({
3666
+ id: f.id,
3667
+ label: f.label,
3668
+ text: f.label,
3669
+ fieldType: f.fieldType,
3670
+ section: f.section,
3671
+ required: f.required,
3672
+ condition: f.condition
3673
+ }));
3674
+ const prompt = buildQuestionBatchPrompt(fieldSummaries);
3675
+ const { object, usage } = await withRetry(
3676
+ () => generateObject({
3677
+ prompt,
3678
+ schema: QuestionBatchResultSchema,
3679
+ maxTokens: 2048,
3680
+ providerOptions
3681
+ })
3682
+ );
3683
+ return { result: object, usage };
3397
3684
  }
3398
3685
 
3399
3686
  // src/prompts/application/reply-intent.ts
@@ -3432,23 +3719,78 @@ Respond with JSON only:
3432
3719
  }`;
3433
3720
  }
3434
3721
 
3435
- // src/prompts/application/field-explanation.ts
3436
- function buildFieldExplanationPrompt(field, question, policyContext) {
3437
- return `You are an internal risk management assistant helping a colleague fill out an insurance application for your company. They asked a question about a field on the form.
3722
+ // src/application/agents/reply-router.ts
3723
+ async function classifyReplyIntent(fields, replyText, generateObject, providerOptions) {
3724
+ const fieldSummaries = fields.map((f) => ({ id: f.id, label: f.label }));
3725
+ const prompt = buildReplyIntentClassificationPrompt(fieldSummaries, replyText);
3726
+ const { object, usage } = await withRetry(
3727
+ () => generateObject({
3728
+ prompt,
3729
+ schema: ReplyIntentSchema,
3730
+ maxTokens: 1024,
3731
+ providerOptions
3732
+ })
3733
+ );
3734
+ return { intent: object, usage };
3735
+ }
3438
3736
 
3439
- FIELD: "${field.label}" (type: ${field.fieldType}${field.options ? `, options: ${field.options.join(", ")}` : ""})
3737
+ // src/prompts/application/answer-parsing.ts
3738
+ function buildAnswerParsingPrompt(questions, emailBody) {
3739
+ const questionList = questions.map(
3740
+ (q, i) => `${i + 1}. ${q.id}: "${q.label ?? q.text}" (type: ${q.fieldType})`
3741
+ ).join("\n");
3742
+ return `You are parsing a user's email reply to extract answers for specific insurance application questions.
3440
3743
 
3441
- THEIR QUESTION: "${question}"
3744
+ QUESTIONS ASKED:
3745
+ ${questionList}
3442
3746
 
3443
- ${policyContext ? `RELEVANT POLICY/CONTEXT INFO:
3444
- ${policyContext}
3445
- ` : ""}
3747
+ USER'S EMAIL REPLY:
3748
+ ${emailBody}
3446
3749
 
3447
- Provide a short, helpful explanation (2-3 sentences) as a coworker would. If the field has options, briefly explain what each means if relevant. If there's policy context that helps, cite the specific source (e.g. "According to our GL Policy #ABC123 with Hartford, our current aggregate limit is $2M").
3750
+ Extract answers for each question. Handle:
3751
+ - Direct numbered answers (1. answer, 2. answer)
3752
+ - Inline answers referencing the question
3753
+ - Table data provided as lists or comma-separated values
3754
+ - Yes/no answers with optional explanations
3755
+ - Partial responses (some questions answered, others skipped)
3448
3756
 
3449
- End with: "Just reply with the answer when you're ready and I'll fill it in."
3757
+ Respond with JSON only:
3758
+ {
3759
+ "answers": [
3760
+ {
3761
+ "fieldId": "company_name",
3762
+ "value": "Acme Corp"
3763
+ },
3764
+ {
3765
+ "fieldId": "prior_claims_decl",
3766
+ "value": "yes",
3767
+ "explanation": "One claim in 2024 for water damage, $15,000 paid"
3768
+ }
3769
+ ],
3770
+ "unanswered": ["field_id_that_was_not_answered"]
3771
+ }
3450
3772
 
3451
- Respond with the explanation text only \u2014 no JSON, no field ID, no extra formatting.`;
3773
+ Only include answers you are confident about. If a response is ambiguous, include the field in "unanswered".`;
3774
+ }
3775
+
3776
+ // src/application/agents/answer-parser.ts
3777
+ async function parseAnswers(fields, replyText, generateObject, providerOptions) {
3778
+ const questions = fields.map((f) => ({
3779
+ id: f.id,
3780
+ label: f.label,
3781
+ text: f.label,
3782
+ fieldType: f.fieldType
3783
+ }));
3784
+ const prompt = buildAnswerParsingPrompt(questions, replyText);
3785
+ const { object, usage } = await withRetry(
3786
+ () => generateObject({
3787
+ prompt,
3788
+ schema: AnswerParsingResultSchema,
3789
+ maxTokens: 4096,
3790
+ providerOptions
3791
+ })
3792
+ );
3793
+ return { result: object, usage };
3452
3794
  }
3453
3795
 
3454
3796
  // src/prompts/application/pdf-mapping.ts
@@ -3555,6 +3897,1122 @@ Respond with JSON only:
3555
3897
  }`;
3556
3898
  }
3557
3899
 
3900
+ // src/application/agents/lookup-filler.ts
3901
+ async function fillFromLookup(requests, targetFields, availableData, generateObject, providerOptions) {
3902
+ const requestSummaries = requests.map((r) => ({
3903
+ type: r.type,
3904
+ description: r.description,
3905
+ targetFieldIds: r.targetFieldIds
3906
+ }));
3907
+ const fieldSummaries = targetFields.map((f) => ({
3908
+ id: f.id,
3909
+ label: f.label,
3910
+ fieldType: f.fieldType
3911
+ }));
3912
+ const prompt = buildLookupFillPrompt(requestSummaries, fieldSummaries, availableData);
3913
+ const { object, usage } = await withRetry(
3914
+ () => generateObject({
3915
+ prompt,
3916
+ schema: LookupFillResultSchema,
3917
+ maxTokens: 4096,
3918
+ providerOptions
3919
+ })
3920
+ );
3921
+ return { result: object, usage };
3922
+ }
3923
+
3924
+ // src/prompts/application/batch-email.ts
3925
+ function buildBatchEmailGenerationPrompt(batchFields, batchIndex, totalBatches, appTitle, totalFieldCount, filledFieldCount, previousBatchSummary, companyName) {
3926
+ const nonConditionalFields = batchFields.filter((f) => !f.condition);
3927
+ const conditionalFields = batchFields.filter((f) => f.condition);
3928
+ const fieldList = nonConditionalFields.map((f, i) => {
3929
+ let line = `${i + 1}. id="${f.id}" label="${f.label}" type=${f.fieldType}`;
3930
+ if (f.options) line += ` options=[${f.options.join(", ")}]`;
3931
+ return line;
3932
+ }).join("\n");
3933
+ const conditionalNote = conditionalFields.length > 0 ? `
3934
+
3935
+ CONDITIONAL FIELDS (DO NOT include in this email \u2014 they will be asked as follow-ups in a separate email after the parent is answered):
3936
+ ${conditionalFields.map((f) => `- id="${f.id}" label="${f.label}" depends on ${f.condition.dependsOn} = "${f.condition.whenValue}"`).join("\n")}` : "";
3937
+ const company = companyName ?? "the company";
3938
+ const remainingFields = totalFieldCount - filledFieldCount;
3939
+ const estMinutes = Math.max(1, Math.round(remainingFields * 0.5));
3940
+ return `You are an internal risk management assistant helping your colleague fill out an insurance application for ${company}. You work FOR ${company} \u2014 you are NOT the insurer, broker, or any external party.
3941
+
3942
+ APPLICATION: ${appTitle ?? "Insurance Application"}
3943
+ COMPANY: ${company}
3944
+ PROGRESS: ${filledFieldCount} of ${totalFieldCount} fields done, ~${remainingFields} remaining (~${estMinutes} min of questions left)
3945
+ ${previousBatchSummary ? `
3946
+ PREVIOUS ANSWERS RECEIVED:
3947
+ ${previousBatchSummary}
3948
+ ` : ""}
3949
+ FIELDS TO ASK ABOUT:
3950
+ ${fieldList}${conditionalNote}
3951
+
3952
+ Rules:
3953
+ - ${previousBatchSummary ? 'Start by acknowledging previous answers or auto-filled data. If fields were auto-filled, list each field with its value AND cite the specific source (e.g. "from your GL Policy #ABC123", "from vercel.com", "from your business context"). If a web lookup was done, name the URL that was checked. Ask them to reply with corrections if anything is wrong.' : "Start with a one-line intro."}
3954
+ - Mention progress once using estimated time remaining. Don't mention section/batch numbers or field counts.
3955
+ - Use "${company}" by name when referring to the company. Also fine: "we" or "our". Never "our company" or "the company".
3956
+ - Ask questions plainly. No em-dashes for dramatic effect, no filler phrases like "need to nail down" or "let's dive into". Just ask.
3957
+ - For yes/no questions, ask naturally in one sentence. Don't list "Yes / No" as options. Mention what you'll need if the answer triggers a follow-up (e.g. "If not, I'll need a brief explanation.").
3958
+ - For fields with 2-3 options, mention them inline. 4+ options can be a short list.
3959
+ - Group related fields (address, coverage limits) into single compound questions.
3960
+ - Do NOT include conditional/follow-up fields. They will be sent separately.
3961
+ - Number each question.
3962
+ - Note expected format where relevant: dollar amounts for currency, MM/DD/YYYY for dates, column descriptions for tables.
3963
+ - End with a short closing.
3964
+ - Tone: professional, brief, matter-of-fact. Write like a busy coworker, not a chatbot. No flourishes, no em-dashes between clauses, no editorializing about the questions.
3965
+
3966
+ NEVER:
3967
+ - Sound like a salesperson or customer service agent
3968
+ - Use em-dashes for emphasis or dramatic pacing
3969
+ - Editorialize ("these two should wrap up this section", "just a couple more")
3970
+ - List "Yes / No / N/A" as bullet options
3971
+ - Include conditional follow-up questions
3972
+ - Mention section numbers, batch numbers, or field counts
3973
+
3974
+ Output the email body text ONLY. No subject line, no JSON. Use markdown for numbered lists.`;
3975
+ }
3976
+
3977
+ // src/application/agents/email-generator.ts
3978
+ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, generateText, providerOptions) {
3979
+ const fieldSummaries = batchFields.map((f) => ({
3980
+ id: f.id,
3981
+ label: f.label,
3982
+ fieldType: f.fieldType,
3983
+ options: f.options,
3984
+ condition: f.condition
3985
+ }));
3986
+ const prompt = buildBatchEmailGenerationPrompt(
3987
+ fieldSummaries,
3988
+ batchIndex,
3989
+ totalBatches,
3990
+ opts.appTitle,
3991
+ opts.totalFieldCount,
3992
+ opts.filledFieldCount,
3993
+ opts.previousBatchSummary,
3994
+ opts.companyName
3995
+ );
3996
+ const { text, usage } = await withRetry(
3997
+ () => generateText({
3998
+ prompt,
3999
+ maxTokens: 2048,
4000
+ providerOptions
4001
+ })
4002
+ );
4003
+ return { text, usage };
4004
+ }
4005
+
4006
+ // src/application/coordinator.ts
4007
+ function createApplicationPipeline(config) {
4008
+ const {
4009
+ generateText,
4010
+ generateObject,
4011
+ applicationStore,
4012
+ documentStore,
4013
+ memoryStore,
4014
+ backfillProvider,
4015
+ orgContext = [],
4016
+ concurrency = 4,
4017
+ onTokenUsage,
4018
+ onProgress,
4019
+ log,
4020
+ providerOptions
4021
+ } = config;
4022
+ const limit = pLimit(concurrency);
4023
+ let totalUsage = { inputTokens: 0, outputTokens: 0 };
4024
+ function trackUsage(usage) {
4025
+ if (usage) {
4026
+ totalUsage.inputTokens += usage.inputTokens;
4027
+ totalUsage.outputTokens += usage.outputTokens;
4028
+ onTokenUsage?.(usage);
4029
+ }
4030
+ }
4031
+ async function processApplication(input) {
4032
+ totalUsage = { inputTokens: 0, outputTokens: 0 };
4033
+ const { pdfBase64, context } = input;
4034
+ const id = input.applicationId ?? `app-${Date.now()}`;
4035
+ const now = Date.now();
4036
+ let state = {
4037
+ id,
4038
+ pdfBase64: void 0,
4039
+ // Don't persist the full PDF in state
4040
+ title: void 0,
4041
+ applicationType: null,
4042
+ fields: [],
4043
+ batches: void 0,
4044
+ currentBatchIndex: 0,
4045
+ status: "classifying",
4046
+ createdAt: now,
4047
+ updatedAt: now
4048
+ };
4049
+ onProgress?.("Classifying document...");
4050
+ const { result: classifyResult, usage: classifyUsage } = await classifyApplication(
4051
+ pdfBase64.slice(0, 2e3),
4052
+ // Send truncated content for classification
4053
+ generateObject,
4054
+ providerOptions
4055
+ );
4056
+ trackUsage(classifyUsage);
4057
+ if (!classifyResult.isApplication) {
4058
+ state.status = "complete";
4059
+ state.updatedAt = Date.now();
4060
+ await applicationStore?.save(state);
4061
+ return { state, tokenUsage: totalUsage };
4062
+ }
4063
+ state.applicationType = classifyResult.applicationType;
4064
+ state.status = "extracting";
4065
+ state.updatedAt = Date.now();
4066
+ onProgress?.("Extracting form fields...");
4067
+ const { fields, usage: extractUsage } = await extractFields(
4068
+ pdfBase64,
4069
+ generateObject,
4070
+ providerOptions
4071
+ );
4072
+ trackUsage(extractUsage);
4073
+ state.fields = fields;
4074
+ state.title = classifyResult.applicationType ?? void 0;
4075
+ state.status = "auto_filling";
4076
+ state.updatedAt = Date.now();
4077
+ await applicationStore?.save(state);
4078
+ onProgress?.(`Auto-filling ${fields.length} fields...`);
4079
+ const fillTasks = [];
4080
+ if (backfillProvider) {
4081
+ fillTasks.push(
4082
+ (async () => {
4083
+ try {
4084
+ const priorAnswers = await backfillFromPriorAnswers(fields, backfillProvider);
4085
+ for (const pa of priorAnswers) {
4086
+ const field = state.fields.find((f) => f.id === pa.fieldId);
4087
+ if (field && !field.value && pa.relevance > 0.8) {
4088
+ field.value = pa.value;
4089
+ field.source = `backfill: ${pa.source}`;
4090
+ field.confidence = "high";
4091
+ }
4092
+ }
4093
+ } catch (e) {
4094
+ await log?.(`Backfill failed: ${e}`);
4095
+ }
4096
+ })()
4097
+ );
4098
+ }
4099
+ if (orgContext.length > 0) {
4100
+ fillTasks.push(
4101
+ limit(async () => {
4102
+ const unfilledFields2 = state.fields.filter((f) => !f.value);
4103
+ if (unfilledFields2.length === 0) return;
4104
+ const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
4105
+ unfilledFields2,
4106
+ orgContext,
4107
+ generateObject,
4108
+ providerOptions
4109
+ );
4110
+ trackUsage(afUsage);
4111
+ for (const match of autoFillResult.matches) {
4112
+ const field = state.fields.find((f) => f.id === match.fieldId);
4113
+ if (field && !field.value) {
4114
+ field.value = match.value;
4115
+ field.source = `auto-fill: ${match.contextKey}`;
4116
+ field.confidence = match.confidence;
4117
+ }
4118
+ }
4119
+ })
4120
+ );
4121
+ }
4122
+ if (documentStore && memoryStore) {
4123
+ fillTasks.push(
4124
+ (async () => {
4125
+ try {
4126
+ const unfilledFields2 = state.fields.filter((f) => !f.value);
4127
+ const searchPromises = unfilledFields2.slice(0, 10).map(
4128
+ (f) => limit(async () => {
4129
+ const chunks = await memoryStore.search(f.label, { limit: 3 });
4130
+ for (const chunk of chunks) {
4131
+ if (!state.fields.find((sf) => sf.id === f.id)?.value) {
4132
+ }
4133
+ }
4134
+ })
4135
+ );
4136
+ await Promise.all(searchPromises);
4137
+ } catch (e) {
4138
+ await log?.(`Document backfill search failed: ${e}`);
4139
+ }
4140
+ })()
4141
+ );
4142
+ }
4143
+ await Promise.all(fillTasks);
4144
+ state.updatedAt = Date.now();
4145
+ await applicationStore?.save(state);
4146
+ const unfilledFields = state.fields.filter((f) => !f.value);
4147
+ if (unfilledFields.length > 0) {
4148
+ onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
4149
+ state.status = "batching";
4150
+ const { result: batchResult, usage: batchUsage } = await batchQuestions(
4151
+ unfilledFields,
4152
+ generateObject,
4153
+ providerOptions
4154
+ );
4155
+ trackUsage(batchUsage);
4156
+ state.batches = batchResult.batches;
4157
+ state.currentBatchIndex = 0;
4158
+ state.status = "collecting";
4159
+ } else {
4160
+ state.status = "confirming";
4161
+ }
4162
+ state.updatedAt = Date.now();
4163
+ await applicationStore?.save(state);
4164
+ const filledCount = state.fields.filter((f) => f.value).length;
4165
+ onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
4166
+ return { state, tokenUsage: totalUsage };
4167
+ }
4168
+ async function processReply(input) {
4169
+ totalUsage = { inputTokens: 0, outputTokens: 0 };
4170
+ const { applicationId, replyText, context } = input;
4171
+ let state = null;
4172
+ if (applicationStore) {
4173
+ state = await applicationStore.get(applicationId);
4174
+ }
4175
+ if (!state) {
4176
+ throw new Error(`Application ${applicationId} not found`);
4177
+ }
4178
+ const currentBatchFieldIds = state.batches?.[state.currentBatchIndex] ?? [];
4179
+ const currentBatchFields = state.fields.filter(
4180
+ (f) => currentBatchFieldIds.includes(f.id)
4181
+ );
4182
+ onProgress?.("Classifying reply...");
4183
+ const { intent, usage: intentUsage } = await classifyReplyIntent(
4184
+ currentBatchFields,
4185
+ replyText,
4186
+ generateObject,
4187
+ providerOptions
4188
+ );
4189
+ trackUsage(intentUsage);
4190
+ let fieldsFilled = 0;
4191
+ let responseText;
4192
+ if (intent.hasAnswers) {
4193
+ onProgress?.("Parsing answers...");
4194
+ const { result: parseResult, usage: parseUsage } = await parseAnswers(
4195
+ currentBatchFields,
4196
+ replyText,
4197
+ generateObject,
4198
+ providerOptions
4199
+ );
4200
+ trackUsage(parseUsage);
4201
+ for (const answer of parseResult.answers) {
4202
+ const field = state.fields.find((f) => f.id === answer.fieldId);
4203
+ if (field) {
4204
+ field.value = answer.value;
4205
+ field.source = "user";
4206
+ field.confidence = "confirmed";
4207
+ fieldsFilled++;
4208
+ }
4209
+ }
4210
+ }
4211
+ if (intent.lookupRequests?.length) {
4212
+ onProgress?.("Processing lookup requests...");
4213
+ let availableData = "";
4214
+ if (documentStore) {
4215
+ try {
4216
+ const docs = await documentStore.query({});
4217
+ availableData = docs.map((d) => {
4218
+ const doc = d;
4219
+ return `Document ${doc.id}: ${doc.type} - ${doc.carrier ?? "unknown carrier"} - ${doc.insuredName ?? ""}`;
4220
+ }).join("\n");
4221
+ } catch (e) {
4222
+ await log?.(`Document query for lookup failed: ${e}`);
4223
+ }
4224
+ }
4225
+ if (availableData) {
4226
+ const targetFields = state.fields.filter(
4227
+ (f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
4228
+ );
4229
+ const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4230
+ intent.lookupRequests,
4231
+ targetFields,
4232
+ availableData,
4233
+ generateObject,
4234
+ providerOptions
4235
+ );
4236
+ trackUsage(lookupUsage);
4237
+ for (const fill of lookupResult.fills) {
4238
+ const field = state.fields.find((f) => f.id === fill.fieldId);
4239
+ if (field) {
4240
+ field.value = fill.value;
4241
+ field.source = `lookup: ${fill.source}`;
4242
+ field.confidence = "high";
4243
+ fieldsFilled++;
4244
+ }
4245
+ }
4246
+ }
4247
+ }
4248
+ if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
4249
+ if (intent.questionText) {
4250
+ const { text, usage } = await generateText({
4251
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4252
+
4253
+ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
4254
+ maxTokens: 512,
4255
+ providerOptions
4256
+ });
4257
+ trackUsage(usage);
4258
+ responseText = text;
4259
+ }
4260
+ }
4261
+ const currentBatchComplete = currentBatchFieldIds.every(
4262
+ (fid) => state.fields.find((f) => f.id === fid)?.value
4263
+ );
4264
+ if (currentBatchComplete && state.batches) {
4265
+ if (state.currentBatchIndex < state.batches.length - 1) {
4266
+ state.currentBatchIndex++;
4267
+ const nextBatchFieldIds = state.batches[state.currentBatchIndex];
4268
+ const nextBatchFields = state.fields.filter(
4269
+ (f) => nextBatchFieldIds.includes(f.id)
4270
+ );
4271
+ const filledCount = state.fields.filter((f) => f.value).length;
4272
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4273
+ nextBatchFields,
4274
+ state.currentBatchIndex,
4275
+ state.batches.length,
4276
+ {
4277
+ appTitle: state.title,
4278
+ totalFieldCount: state.fields.length,
4279
+ filledFieldCount: filledCount,
4280
+ companyName: context?.companyName
4281
+ },
4282
+ generateText,
4283
+ providerOptions
4284
+ );
4285
+ trackUsage(emailUsage);
4286
+ if (!responseText) {
4287
+ responseText = emailText;
4288
+ } else {
4289
+ responseText += `
4290
+
4291
+ ${emailText}`;
4292
+ }
4293
+ } else {
4294
+ state.status = "confirming";
4295
+ }
4296
+ }
4297
+ state.updatedAt = Date.now();
4298
+ await applicationStore?.save(state);
4299
+ return {
4300
+ state,
4301
+ intent: intent.primaryIntent,
4302
+ fieldsFilled,
4303
+ responseText,
4304
+ tokenUsage: totalUsage
4305
+ };
4306
+ }
4307
+ async function generateCurrentBatchEmail(applicationId, opts) {
4308
+ totalUsage = { inputTokens: 0, outputTokens: 0 };
4309
+ const state = await applicationStore?.get(applicationId);
4310
+ if (!state) throw new Error(`Application ${applicationId} not found`);
4311
+ if (!state.batches?.length) throw new Error("No batches available");
4312
+ const batchFieldIds = state.batches[state.currentBatchIndex];
4313
+ const batchFields = state.fields.filter((f) => batchFieldIds.includes(f.id));
4314
+ const filledCount = state.fields.filter((f) => f.value).length;
4315
+ const { text, usage } = await generateBatchEmail(
4316
+ batchFields,
4317
+ state.currentBatchIndex,
4318
+ state.batches.length,
4319
+ {
4320
+ appTitle: state.title,
4321
+ totalFieldCount: state.fields.length,
4322
+ filledFieldCount: filledCount,
4323
+ companyName: opts?.companyName,
4324
+ previousBatchSummary: opts?.previousBatchSummary
4325
+ },
4326
+ generateText,
4327
+ providerOptions
4328
+ );
4329
+ trackUsage(usage);
4330
+ return { text, tokenUsage: totalUsage };
4331
+ }
4332
+ async function getConfirmationSummary(applicationId) {
4333
+ totalUsage = { inputTokens: 0, outputTokens: 0 };
4334
+ const state = await applicationStore?.get(applicationId);
4335
+ if (!state) throw new Error(`Application ${applicationId} not found`);
4336
+ const filledFields = state.fields.filter((f) => f.value);
4337
+ const fieldSummary = filledFields.map((f) => `${f.section} > ${f.label}: ${f.value} (source: ${f.source ?? "unknown"})`).join("\n");
4338
+ const { text, usage } = await generateText({
4339
+ prompt: `Format these filled insurance application fields as a clean confirmation summary for the user to review. Group by section, show each field as "Label: Value". End with a note asking them to confirm or request changes.
4340
+
4341
+ Application: ${state.title ?? "Insurance Application"}
4342
+
4343
+ Fields:
4344
+ ${fieldSummary}`,
4345
+ maxTokens: 4096,
4346
+ providerOptions
4347
+ });
4348
+ trackUsage(usage);
4349
+ return { text, tokenUsage: totalUsage };
4350
+ }
4351
+ return {
4352
+ processApplication,
4353
+ processReply,
4354
+ generateCurrentBatchEmail,
4355
+ getConfirmationSummary
4356
+ };
4357
+ }
4358
+
4359
+ // src/prompts/application/confirmation.ts
4360
+ function buildConfirmationSummaryPrompt(fields, applicationTitle) {
4361
+ const fieldList = fields.map((f) => {
4362
+ const label = f.label ?? f.text ?? f.id;
4363
+ const value = f.value ?? "(not provided)";
4364
+ return `[${f.section}] ${label}: ${value}`;
4365
+ }).join("\n");
4366
+ return `Format the following insurance application answers into a clean, readable summary grouped by section. This will be sent as an email for the user to review and confirm.
4367
+
4368
+ APPLICATION: ${applicationTitle}
4369
+
4370
+ FIELD VALUES:
4371
+ ${fieldList}
4372
+
4373
+ Format as a readable summary:
4374
+ - Group by section with section headers
4375
+ - Show each field as "Label: Value"
4376
+ - For declarations, show the question and the yes/no answer plus any explanation
4377
+ - Skip fields with no value unless they are required
4378
+ - End with a note asking the user to reply "Looks good" to confirm, or describe any changes needed
4379
+
4380
+ Respond with the formatted summary text only (no JSON wrapper). Use markdown formatting (bold headers, bullet points).`;
4381
+ }
4382
+
4383
+ // src/prompts/application/field-explanation.ts
4384
+ function buildFieldExplanationPrompt(field, question, policyContext) {
4385
+ return `You are an internal risk management assistant helping a colleague fill out an insurance application for your company. They asked a question about a field on the form.
4386
+
4387
+ FIELD: "${field.label}" (type: ${field.fieldType}${field.options ? `, options: ${field.options.join(", ")}` : ""})
4388
+
4389
+ THEIR QUESTION: "${question}"
4390
+
4391
+ ${policyContext ? `RELEVANT POLICY/CONTEXT INFO:
4392
+ ${policyContext}
4393
+ ` : ""}
4394
+
4395
+ Provide a short, helpful explanation (2-3 sentences) as a coworker would. If the field has options, briefly explain what each means if relevant. If there's policy context that helps, cite the specific source (e.g. "According to our GL Policy #ABC123 with Hartford, our current aggregate limit is $2M").
4396
+
4397
+ End with: "Just reply with the answer when you're ready and I'll fill it in."
4398
+
4399
+ Respond with the explanation text only \u2014 no JSON, no field ID, no extra formatting.`;
4400
+ }
4401
+
4402
+ // src/prompts/query/classify.ts
4403
+ function buildQueryClassifyPrompt(question, conversationContext) {
4404
+ return `You are a query classifier for an insurance document intelligence system.
4405
+
4406
+ Analyze the user's question and produce a structured classification.
4407
+
4408
+ USER QUESTION:
4409
+ ${question}
4410
+ ${conversationContext ? `
4411
+ CONVERSATION CONTEXT:
4412
+ ${conversationContext}` : ""}
4413
+
4414
+ INSTRUCTIONS:
4415
+
4416
+ 1. Determine the primary intent:
4417
+ - "policy_question": questions about specific coverage, limits, deductibles, endorsements, conditions
4418
+ - "coverage_comparison": comparing coverages across multiple documents or policies
4419
+ - "document_search": looking for a specific document by carrier, policy number, insured name
4420
+ - "claims_inquiry": questions about claims history, loss runs, experience modification
4421
+ - "general_knowledge": insurance concepts not tied to a specific document
4422
+
4423
+ 2. Decompose into atomic sub-questions:
4424
+ - Each sub-question should be answerable from a single retrieval pass
4425
+ - Simple questions produce exactly one sub-question (the question itself)
4426
+ - Complex questions (comparisons, multi-policy, multi-field) decompose into 2-5 sub-questions
4427
+ - Each sub-question should specify which chunk types are most relevant
4428
+
4429
+ 3. Determine which storage backends are needed:
4430
+ - requiresDocumentLookup: true if a specific document needs to be fetched by ID/number/carrier
4431
+ - requiresChunkSearch: true if semantic search over document chunks is needed
4432
+ - requiresConversationHistory: true if the question references prior conversation
4433
+
4434
+ CHUNK TYPES (for chunkTypes filter):
4435
+ carrier_info, named_insured, coverage, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
4436
+
4437
+ Respond with the structured classification.`;
4438
+ }
4439
+
4440
+ // src/prompts/query/respond.ts
4441
+ function buildRespondPrompt(originalQuestion, subAnswersJson, platform) {
4442
+ const formatGuidance = platform === "email" ? "Format as a professional email response. Use plain text, no markdown." : platform === "sms" ? "Keep the response concise and conversational. No markdown." : "Format as clear, well-structured text. Use markdown for lists and emphasis where helpful.";
4443
+ return `You are composing a final answer to an insurance question. You have verified sub-answers with citations that you need to merge into a single, natural response.
4444
+
4445
+ ORIGINAL QUESTION:
4446
+ ${originalQuestion}
4447
+
4448
+ VERIFIED SUB-ANSWERS:
4449
+ ${subAnswersJson}
4450
+
4451
+ FORMATTING:
4452
+ ${formatGuidance}
4453
+
4454
+ INSTRUCTIONS:
4455
+ 1. Write a natural, direct answer to the original question.
4456
+ 2. Embed inline citation numbers [1], [2], etc. after each factual claim. These reference the citation objects from the sub-answers \u2014 preserve the original citation index numbers.
4457
+ 3. If any sub-answer had low confidence or noted missing context, mention what information was unavailable rather than omitting silently.
4458
+ 4. If the answer naturally leads to a follow-up question the user might want to ask, suggest it in the followUp field.
4459
+ 5. Merge overlapping citations \u2014 if two sub-answers cite the same chunk, use one citation number.
4460
+ 6. Keep the tone helpful and professional.
4461
+
4462
+ Respond with the final answer, deduplicated citations array, overall confidence (weighted average of sub-answer confidences), and an optional follow-up suggestion.`;
4463
+ }
4464
+
4465
+ // src/schemas/query.ts
4466
+ import { z as z32 } from "zod";
4467
+ var QueryIntentSchema = z32.enum([
4468
+ "policy_question",
4469
+ "coverage_comparison",
4470
+ "document_search",
4471
+ "claims_inquiry",
4472
+ "general_knowledge"
4473
+ ]);
4474
+ var SubQuestionSchema = z32.object({
4475
+ question: z32.string().describe("Atomic sub-question to retrieve and answer independently"),
4476
+ intent: QueryIntentSchema,
4477
+ chunkTypes: z32.array(z32.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
4478
+ documentFilters: z32.object({
4479
+ type: z32.enum(["policy", "quote"]).optional(),
4480
+ carrier: z32.string().optional(),
4481
+ insuredName: z32.string().optional(),
4482
+ policyNumber: z32.string().optional(),
4483
+ quoteNumber: z32.string().optional()
4484
+ }).optional().describe("Structured filters to narrow document lookup")
4485
+ });
4486
+ var QueryClassifyResultSchema = z32.object({
4487
+ intent: QueryIntentSchema,
4488
+ subQuestions: z32.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
4489
+ requiresDocumentLookup: z32.boolean().describe("Whether structured document lookup is needed"),
4490
+ requiresChunkSearch: z32.boolean().describe("Whether semantic chunk search is needed"),
4491
+ requiresConversationHistory: z32.boolean().describe("Whether conversation history is relevant")
4492
+ });
4493
+ var EvidenceItemSchema = z32.object({
4494
+ source: z32.enum(["chunk", "document", "conversation"]),
4495
+ chunkId: z32.string().optional(),
4496
+ documentId: z32.string().optional(),
4497
+ turnId: z32.string().optional(),
4498
+ text: z32.string().describe("Text excerpt from the source"),
4499
+ relevance: z32.number().min(0).max(1),
4500
+ metadata: z32.record(z32.string(), z32.string()).optional()
4501
+ });
4502
+ var RetrievalResultSchema = z32.object({
4503
+ subQuestion: z32.string(),
4504
+ evidence: z32.array(EvidenceItemSchema)
4505
+ });
4506
+ var CitationSchema = z32.object({
4507
+ index: z32.number().describe("Citation number [1], [2], etc."),
4508
+ chunkId: z32.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
4509
+ documentId: z32.string(),
4510
+ documentType: z32.enum(["policy", "quote"]).optional(),
4511
+ field: z32.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
4512
+ quote: z32.string().describe("Exact text from source that supports the claim"),
4513
+ relevance: z32.number().min(0).max(1)
4514
+ });
4515
+ var SubAnswerSchema = z32.object({
4516
+ subQuestion: z32.string(),
4517
+ answer: z32.string(),
4518
+ citations: z32.array(CitationSchema),
4519
+ confidence: z32.number().min(0).max(1),
4520
+ needsMoreContext: z32.boolean().describe("True if evidence was insufficient to answer fully")
4521
+ });
4522
+ var VerifyResultSchema = z32.object({
4523
+ approved: z32.boolean().describe("Whether all sub-answers are adequately grounded"),
4524
+ issues: z32.array(z32.string()).describe("Specific grounding or consistency issues found"),
4525
+ retrySubQuestions: z32.array(z32.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
4526
+ });
4527
+ var QueryResultSchema = z32.object({
4528
+ answer: z32.string(),
4529
+ citations: z32.array(CitationSchema),
4530
+ intent: QueryIntentSchema,
4531
+ confidence: z32.number().min(0).max(1),
4532
+ followUp: z32.string().optional().describe("Suggested follow-up question if applicable")
4533
+ });
4534
+
4535
+ // src/query/retriever.ts
4536
+ async function retrieve(subQuestion, conversationId, config) {
4537
+ const { documentStore, memoryStore, retrievalLimit, log } = config;
4538
+ const evidence = [];
4539
+ const tasks = [];
4540
+ tasks.push(
4541
+ (async () => {
4542
+ try {
4543
+ const filter = {};
4544
+ if (subQuestion.chunkTypes?.length) {
4545
+ const chunkResults = await Promise.all(
4546
+ subQuestion.chunkTypes.map(
4547
+ (type) => memoryStore.search(subQuestion.question, {
4548
+ limit: Math.ceil(retrievalLimit / subQuestion.chunkTypes.length),
4549
+ filter: { ...filter, type }
4550
+ })
4551
+ )
4552
+ );
4553
+ for (const chunks of chunkResults) {
4554
+ for (const chunk of chunks) {
4555
+ evidence.push({
4556
+ source: "chunk",
4557
+ chunkId: chunk.id,
4558
+ documentId: chunk.documentId,
4559
+ text: chunk.text,
4560
+ relevance: 0.8,
4561
+ // Default — store doesn't expose scores directly
4562
+ metadata: chunk.metadata
4563
+ });
4564
+ }
4565
+ }
4566
+ } else {
4567
+ const chunks = await memoryStore.search(subQuestion.question, {
4568
+ limit: retrievalLimit
4569
+ });
4570
+ for (const chunk of chunks) {
4571
+ evidence.push({
4572
+ source: "chunk",
4573
+ chunkId: chunk.id,
4574
+ documentId: chunk.documentId,
4575
+ text: chunk.text,
4576
+ relevance: 0.8,
4577
+ metadata: chunk.metadata
4578
+ });
4579
+ }
4580
+ }
4581
+ } catch (e) {
4582
+ await log?.(`Chunk search failed for "${subQuestion.question}": ${e}`);
4583
+ }
4584
+ })()
4585
+ );
4586
+ if (subQuestion.documentFilters) {
4587
+ tasks.push(
4588
+ (async () => {
4589
+ try {
4590
+ const filters = {};
4591
+ if (subQuestion.documentFilters?.type) filters.type = subQuestion.documentFilters.type;
4592
+ if (subQuestion.documentFilters?.carrier) filters.carrier = subQuestion.documentFilters.carrier;
4593
+ if (subQuestion.documentFilters?.insuredName) filters.insuredName = subQuestion.documentFilters.insuredName;
4594
+ if (subQuestion.documentFilters?.policyNumber) filters.policyNumber = subQuestion.documentFilters.policyNumber;
4595
+ if (subQuestion.documentFilters?.quoteNumber) filters.quoteNumber = subQuestion.documentFilters.quoteNumber;
4596
+ const docs = await documentStore.query(filters);
4597
+ for (const doc of docs) {
4598
+ const summary = buildDocumentSummary(doc);
4599
+ evidence.push({
4600
+ source: "document",
4601
+ documentId: doc.id,
4602
+ text: summary,
4603
+ relevance: 0.9,
4604
+ // Direct lookup is high relevance
4605
+ metadata: {
4606
+ type: doc.type,
4607
+ carrier: doc.carrier ?? "",
4608
+ insuredName: doc.insuredName ?? ""
4609
+ }
4610
+ });
4611
+ }
4612
+ } catch (e) {
4613
+ await log?.(`Document lookup failed: ${e}`);
4614
+ }
4615
+ })()
4616
+ );
4617
+ }
4618
+ if (conversationId) {
4619
+ tasks.push(
4620
+ (async () => {
4621
+ try {
4622
+ const turns = await memoryStore.searchHistory(
4623
+ subQuestion.question,
4624
+ conversationId
4625
+ );
4626
+ for (const turn of turns.slice(0, 5)) {
4627
+ evidence.push({
4628
+ source: "conversation",
4629
+ turnId: turn.id,
4630
+ text: `[${turn.role}]: ${turn.content}`,
4631
+ relevance: 0.6
4632
+ // Conversation context is lower relevance than documents
4633
+ });
4634
+ }
4635
+ } catch (e) {
4636
+ await log?.(`Conversation history search failed: ${e}`);
4637
+ }
4638
+ })()
4639
+ );
4640
+ }
4641
+ await Promise.all(tasks);
4642
+ evidence.sort((a, b) => b.relevance - a.relevance);
4643
+ return {
4644
+ subQuestion: subQuestion.question,
4645
+ evidence: evidence.slice(0, retrievalLimit)
4646
+ };
4647
+ }
4648
+ function buildDocumentSummary(doc) {
4649
+ const parts = [];
4650
+ const type = doc.type;
4651
+ parts.push(`Document type: ${type}`);
4652
+ if (doc.carrier) parts.push(`Carrier: ${doc.carrier}`);
4653
+ if (doc.insuredName) parts.push(`Insured: ${doc.insuredName}`);
4654
+ if (type === "policy") {
4655
+ if (doc.policyNumber) parts.push(`Policy #: ${doc.policyNumber}`);
4656
+ if (doc.effectiveDate) parts.push(`Effective: ${doc.effectiveDate}`);
4657
+ if (doc.expirationDate) parts.push(`Expiration: ${doc.expirationDate}`);
4658
+ } else if (type === "quote") {
4659
+ if (doc.quoteNumber) parts.push(`Quote #: ${doc.quoteNumber}`);
4660
+ if (doc.proposedEffectiveDate) parts.push(`Proposed effective: ${doc.proposedEffectiveDate}`);
4661
+ }
4662
+ if (doc.premium) parts.push(`Premium: ${doc.premium}`);
4663
+ const coverages = doc.coverages;
4664
+ if (coverages?.length) {
4665
+ parts.push(`Coverages (${coverages.length}):`);
4666
+ for (const cov of coverages.slice(0, 10)) {
4667
+ const line = [cov.name, cov.limit ? `Limit: ${cov.limit}` : null, cov.deductible ? `Ded: ${cov.deductible}` : null].filter(Boolean).join(" | ");
4668
+ parts.push(` - ${line}`);
4669
+ }
4670
+ }
4671
+ return parts.join("\n");
4672
+ }
4673
+
4674
+ // src/prompts/query/reason.ts
4675
+ var INTENT_INSTRUCTIONS = {
4676
+ policy_question: `You are answering a question about a specific insurance policy or quote.
4677
+
4678
+ RULES:
4679
+ - Answer ONLY from the evidence provided. Do not use general knowledge.
4680
+ - When citing limits, deductibles, or amounts, use the exact values from the source.
4681
+ - If the evidence mentions an endorsement that modifies coverage, include that context.
4682
+ - If the evidence is insufficient, say what is missing rather than guessing.
4683
+ - Reference specific coverage names, form numbers, and endorsement titles when available.`,
4684
+ coverage_comparison: `You are comparing coverages across insurance documents.
4685
+
4686
+ RULES:
4687
+ - Answer ONLY from the evidence provided.
4688
+ - Structure your comparison around specific coverage attributes: limits, deductibles, forms, triggers.
4689
+ - Note differences clearly: "Policy A has X, while Policy B has Y."
4690
+ - Flag where one document has coverage the other lacks entirely.
4691
+ - If evidence for one side of the comparison is missing, state that explicitly.`,
4692
+ document_search: `You are helping locate a specific insurance document.
4693
+
4694
+ RULES:
4695
+ - Answer ONLY from the evidence provided.
4696
+ - Identify the document by carrier, policy/quote number, insured name, and effective dates.
4697
+ - If multiple documents match, list them with distinguishing details.
4698
+ - If no documents match, say so clearly.`,
4699
+ claims_inquiry: `You are answering a question about claims history or loss experience.
4700
+
4701
+ RULES:
4702
+ - Answer ONLY from the evidence provided.
4703
+ - Reference specific claim dates, amounts, descriptions, and statuses.
4704
+ - Include experience modification factors if available.
4705
+ - Be precise with dollar amounts and dates \u2014 do not approximate.
4706
+ - If the evidence shows no claims, state that explicitly.`,
4707
+ general_knowledge: `You are answering a general insurance question using available document context.
4708
+
4709
+ RULES:
4710
+ - You may use general insurance knowledge to frame your answer.
4711
+ - If the question can be answered from the evidence, prefer that over general knowledge.
4712
+ - When mixing general knowledge with document-specific data, make the distinction clear.
4713
+ - Still cite evidence when referencing specific documents.`
4714
+ };
4715
+ function buildReasonPrompt(subQuestion, intent, evidence) {
4716
+ return `${INTENT_INSTRUCTIONS[intent]}
4717
+
4718
+ SUB-QUESTION:
4719
+ ${subQuestion}
4720
+
4721
+ EVIDENCE:
4722
+ ${evidence}
4723
+
4724
+ Answer the sub-question based on the evidence above. For every factual claim, include a citation referencing the source evidence item by its chunkId or documentId. Rate your confidence from 0 to 1 based on how well the evidence supports your answer. Set needsMoreContext to true if the evidence was insufficient.`;
4725
+ }
4726
+
4727
+ // src/query/reasoner.ts
4728
+ async function reason(subQuestion, intent, evidence, config) {
4729
+ const { generateObject, providerOptions } = config;
4730
+ const evidenceText = evidence.map((e, i) => {
4731
+ const sourceLabel = e.source === "chunk" ? `[chunk:${e.chunkId}]` : e.source === "document" ? `[doc:${e.documentId}]` : `[turn:${e.turnId}]`;
4732
+ return `Evidence ${i + 1} ${sourceLabel} (relevance: ${e.relevance.toFixed(2)}):
4733
+ ${e.text}`;
4734
+ }).join("\n\n");
4735
+ const prompt = buildReasonPrompt(subQuestion, intent, evidenceText);
4736
+ const { object, usage } = await withRetry(
4737
+ () => generateObject({
4738
+ prompt,
4739
+ schema: SubAnswerSchema,
4740
+ maxTokens: 4096,
4741
+ providerOptions
4742
+ })
4743
+ );
4744
+ return { subAnswer: object, usage };
4745
+ }
4746
+
4747
+ // src/prompts/query/verify.ts
4748
+ function buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson) {
4749
+ return `You are a verification agent for an insurance document intelligence system. Your job is to check that answers are accurate, grounded, and complete.
4750
+
4751
+ ORIGINAL QUESTION:
4752
+ ${originalQuestion}
4753
+
4754
+ SUB-ANSWERS:
4755
+ ${subAnswersJson}
4756
+
4757
+ AVAILABLE EVIDENCE:
4758
+ ${evidenceJson}
4759
+
4760
+ CHECK EACH SUB-ANSWER FOR:
4761
+
4762
+ 1. GROUNDING: Every factual claim must be supported by a citation that references actual evidence. Flag any claim that:
4763
+ - Has no citation
4764
+ - Cites a source that doesn't actually contain the claimed information
4765
+ - Extrapolates beyond what the evidence states
4766
+
4767
+ 2. CONSISTENCY: Sub-answers should not contradict each other. Flag any contradictions, noting which sub-answers conflict and what the discrepancy is.
4768
+
4769
+ 3. COMPLETENESS: Did each sub-question get an adequate answer? Flag any sub-question where:
4770
+ - The answer is vague or hedged when the evidence supports a specific answer
4771
+ - Important details from the evidence were omitted
4772
+ - The confidence rating seems miscalibrated (high confidence with weak evidence, or low confidence with strong evidence)
4773
+
4774
+ RESPOND WITH:
4775
+ - approved: true only if ALL sub-answers pass all three checks
4776
+ - issues: list every specific issue found (empty array if approved)
4777
+ - retrySubQuestions: sub-questions that need re-retrieval or re-reasoning (only if not approved)`;
4778
+ }
4779
+
4780
+ // src/query/verifier.ts
4781
+ async function verify(originalQuestion, subAnswers, allEvidence, config) {
4782
+ const { generateObject, providerOptions } = config;
4783
+ const subAnswersJson = JSON.stringify(
4784
+ subAnswers.map((sa) => ({
4785
+ subQuestion: sa.subQuestion,
4786
+ answer: sa.answer,
4787
+ citations: sa.citations,
4788
+ confidence: sa.confidence,
4789
+ needsMoreContext: sa.needsMoreContext
4790
+ })),
4791
+ null,
4792
+ 2
4793
+ );
4794
+ const evidenceJson = JSON.stringify(
4795
+ allEvidence.map((e) => ({
4796
+ source: e.source,
4797
+ id: e.chunkId ?? e.documentId ?? e.turnId,
4798
+ text: e.text.slice(0, 500),
4799
+ // Truncate for context efficiency
4800
+ relevance: e.relevance
4801
+ })),
4802
+ null,
4803
+ 2
4804
+ );
4805
+ const prompt = buildVerifyPrompt(originalQuestion, subAnswersJson, evidenceJson);
4806
+ const { object, usage } = await withRetry(
4807
+ () => generateObject({
4808
+ prompt,
4809
+ schema: VerifyResultSchema,
4810
+ maxTokens: 2048,
4811
+ providerOptions
4812
+ })
4813
+ );
4814
+ return { result: object, usage };
4815
+ }
4816
+
4817
+ // src/query/coordinator.ts
4818
+ function createQueryAgent(config) {
4819
+ const {
4820
+ generateText,
4821
+ generateObject,
4822
+ documentStore,
4823
+ memoryStore,
4824
+ concurrency = 3,
4825
+ maxVerifyRounds = 1,
4826
+ retrievalLimit = 10,
4827
+ onTokenUsage,
4828
+ onProgress,
4829
+ log,
4830
+ providerOptions
4831
+ } = config;
4832
+ const limit = pLimit(concurrency);
4833
+ let totalUsage = { inputTokens: 0, outputTokens: 0 };
4834
+ function trackUsage(usage) {
4835
+ if (usage) {
4836
+ totalUsage.inputTokens += usage.inputTokens;
4837
+ totalUsage.outputTokens += usage.outputTokens;
4838
+ onTokenUsage?.(usage);
4839
+ }
4840
+ }
4841
+ async function query(input) {
4842
+ totalUsage = { inputTokens: 0, outputTokens: 0 };
4843
+ const { question, conversationId, context } = input;
4844
+ onProgress?.("Classifying query...");
4845
+ const classification = await classify(question, conversationId);
4846
+ onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
4847
+ const retrieverConfig = {
4848
+ documentStore,
4849
+ memoryStore,
4850
+ retrievalLimit,
4851
+ log
4852
+ };
4853
+ const retrievalResults = await Promise.all(
4854
+ classification.subQuestions.map(
4855
+ (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
4856
+ )
4857
+ );
4858
+ const allEvidence = retrievalResults.flatMap((r) => r.evidence);
4859
+ onProgress?.("Reasoning over evidence...");
4860
+ const reasonerConfig = { generateObject, providerOptions };
4861
+ let subAnswers = await Promise.all(
4862
+ classification.subQuestions.map(
4863
+ (sq, i) => limit(async () => {
4864
+ const { subAnswer, usage } = await reason(
4865
+ sq.question,
4866
+ sq.intent,
4867
+ retrievalResults[i].evidence,
4868
+ reasonerConfig
4869
+ );
4870
+ trackUsage(usage);
4871
+ return subAnswer;
4872
+ })
4873
+ )
4874
+ );
4875
+ onProgress?.("Verifying answer grounding...");
4876
+ const verifierConfig = { generateObject, providerOptions };
4877
+ for (let round = 0; round < maxVerifyRounds; round++) {
4878
+ const { result: verifyResult, usage } = await verify(
4879
+ question,
4880
+ subAnswers,
4881
+ allEvidence,
4882
+ verifierConfig
4883
+ );
4884
+ trackUsage(usage);
4885
+ if (verifyResult.approved) {
4886
+ onProgress?.("Verification passed.");
4887
+ break;
4888
+ }
4889
+ onProgress?.(`Verification found ${verifyResult.issues.length} issue(s), round ${round + 1}/${maxVerifyRounds}`);
4890
+ await log?.(`Verify issues: ${verifyResult.issues.join("; ")}`);
4891
+ if (verifyResult.retrySubQuestions?.length) {
4892
+ const retryQuestions = classification.subQuestions.filter(
4893
+ (sq) => verifyResult.retrySubQuestions.includes(sq.question)
4894
+ );
4895
+ if (retryQuestions.length > 0) {
4896
+ const retryRetrievals = await Promise.all(
4897
+ retryQuestions.map(
4898
+ (sq) => limit(
4899
+ () => retrieve(sq, conversationId, {
4900
+ ...retrieverConfig,
4901
+ retrievalLimit: retrievalLimit * 2
4902
+ // Broader retrieval on retry
4903
+ })
4904
+ )
4905
+ )
4906
+ );
4907
+ for (const r of retryRetrievals) {
4908
+ allEvidence.push(...r.evidence);
4909
+ }
4910
+ const retrySubAnswers = await Promise.all(
4911
+ retryQuestions.map(
4912
+ (sq, i) => limit(async () => {
4913
+ const { subAnswer, usage: u } = await reason(
4914
+ sq.question,
4915
+ sq.intent,
4916
+ retryRetrievals[i].evidence,
4917
+ reasonerConfig
4918
+ );
4919
+ trackUsage(u);
4920
+ return subAnswer;
4921
+ })
4922
+ )
4923
+ );
4924
+ const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
4925
+ subAnswers = subAnswers.map((sa) => {
4926
+ if (retryQSet.has(sa.subQuestion)) {
4927
+ const replacement = retrySubAnswers.find((r) => r.subQuestion === sa.subQuestion);
4928
+ return replacement ?? sa;
4929
+ }
4930
+ return sa;
4931
+ });
4932
+ }
4933
+ }
4934
+ }
4935
+ onProgress?.("Composing final answer...");
4936
+ const queryResult = await respond(
4937
+ question,
4938
+ subAnswers,
4939
+ classification,
4940
+ context?.platform
4941
+ );
4942
+ if (conversationId) {
4943
+ try {
4944
+ await memoryStore.addTurn({
4945
+ id: `turn-${Date.now()}-q`,
4946
+ conversationId,
4947
+ role: "user",
4948
+ content: question,
4949
+ timestamp: Date.now()
4950
+ });
4951
+ await memoryStore.addTurn({
4952
+ id: `turn-${Date.now()}-a`,
4953
+ conversationId,
4954
+ role: "assistant",
4955
+ content: queryResult.answer,
4956
+ timestamp: Date.now()
4957
+ });
4958
+ } catch (e) {
4959
+ await log?.(`Failed to store conversation turn: ${e}`);
4960
+ }
4961
+ }
4962
+ return { ...queryResult, tokenUsage: totalUsage };
4963
+ }
4964
+ async function classify(question, conversationId) {
4965
+ let conversationContext;
4966
+ if (conversationId) {
4967
+ try {
4968
+ const history = await memoryStore.getHistory(conversationId, { limit: 5 });
4969
+ if (history.length > 0) {
4970
+ conversationContext = history.map((t) => `[${t.role}]: ${t.content}`).join("\n");
4971
+ }
4972
+ } catch {
4973
+ }
4974
+ }
4975
+ const prompt = buildQueryClassifyPrompt(question, conversationContext);
4976
+ const { object, usage } = await withRetry(
4977
+ () => generateObject({
4978
+ prompt,
4979
+ schema: QueryClassifyResultSchema,
4980
+ maxTokens: 2048,
4981
+ providerOptions
4982
+ })
4983
+ );
4984
+ trackUsage(usage);
4985
+ return object;
4986
+ }
4987
+ async function respond(originalQuestion, subAnswers, classification, platform) {
4988
+ const subAnswersJson = JSON.stringify(
4989
+ subAnswers.map((sa) => ({
4990
+ subQuestion: sa.subQuestion,
4991
+ answer: sa.answer,
4992
+ citations: sa.citations,
4993
+ confidence: sa.confidence,
4994
+ needsMoreContext: sa.needsMoreContext
4995
+ })),
4996
+ null,
4997
+ 2
4998
+ );
4999
+ const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
5000
+ const { object, usage } = await withRetry(
5001
+ () => generateObject({
5002
+ prompt,
5003
+ schema: QueryResultSchema,
5004
+ maxTokens: 4096,
5005
+ providerOptions
5006
+ })
5007
+ );
5008
+ trackUsage(usage);
5009
+ const result = object;
5010
+ result.intent = classification.intent;
5011
+ return result;
5012
+ }
5013
+ return { query };
5014
+ }
5015
+
3558
5016
  // src/prompts/intent.ts
3559
5017
  function buildClassifyMessagePrompt(platform) {
3560
5018
  const platformFields = {
@@ -3680,9 +5138,16 @@ export {
3680
5138
  AGENT_TOOLS,
3681
5139
  APPLICATION_CLASSIFY_PROMPT,
3682
5140
  AUDIT_TYPES,
5141
+ AcroFormMappingSchema,
3683
5142
  AddressSchema,
3684
5143
  AdmittedStatusSchema,
5144
+ AnswerParsingResultSchema,
5145
+ ApplicationClassifyResultSchema,
5146
+ ApplicationFieldSchema,
5147
+ ApplicationStateSchema,
3685
5148
  AuditTypeSchema,
5149
+ AutoFillMatchSchema,
5150
+ AutoFillResultSchema,
3686
5151
  BOAT_TYPES,
3687
5152
  BindingAuthoritySchema,
3688
5153
  BoatTypeSchema,
@@ -3696,6 +5161,7 @@ export {
3696
5161
  COVERAGE_FORMS,
3697
5162
  COVERAGE_TRIGGERS,
3698
5163
  ChunkTypeSchema,
5164
+ CitationSchema,
3699
5165
  ClaimRecordSchema,
3700
5166
  ClaimStatusSchema,
3701
5167
  ClassificationCodeSchema,
@@ -3738,12 +5204,16 @@ export {
3738
5204
  EnrichedSubjectivitySchema,
3739
5205
  EnrichedUnderwritingConditionSchema,
3740
5206
  EntityTypeSchema,
5207
+ EvidenceItemSchema,
3741
5208
  ExclusionSchema,
3742
5209
  ExperienceModSchema,
3743
5210
  ExtendedReportingPeriodSchema,
3744
5211
  FLOOD_ZONES,
3745
5212
  FOUNDATION_TYPES,
3746
5213
  FarmRanchDeclarationsSchema,
5214
+ FieldExtractionResultSchema,
5215
+ FieldTypeSchema,
5216
+ FlatPdfPlacementSchema,
3747
5217
  FloodDeclarationsSchema,
3748
5218
  FloodZoneSchema,
3749
5219
  FormReferenceSchema,
@@ -3762,6 +5232,9 @@ export {
3762
5232
  LimitScheduleSchema,
3763
5233
  LimitTypeSchema,
3764
5234
  LocationPremiumSchema,
5235
+ LookupFillResultSchema,
5236
+ LookupFillSchema,
5237
+ LookupRequestSchema,
3765
5238
  LossSettlementSchema,
3766
5239
  LossSummarySchema,
3767
5240
  NamedInsuredSchema,
@@ -3771,6 +5244,7 @@ export {
3771
5244
  POLICY_SECTION_TYPES,
3772
5245
  POLICY_TERM_TYPES,
3773
5246
  POLICY_TYPES,
5247
+ ParsedAnswerSchema,
3774
5248
  PaymentInstallmentSchema,
3775
5249
  PaymentPlanSchema,
3776
5250
  PersonalArticlesDeclarationsSchema,
@@ -3790,6 +5264,10 @@ export {
3790
5264
  ProducerInfoSchema,
3791
5265
  ProfessionalLiabilityDeclarationsSchema,
3792
5266
  QUOTE_SECTION_TYPES,
5267
+ QueryClassifyResultSchema,
5268
+ QueryIntentSchema,
5269
+ QueryResultSchema,
5270
+ QuestionBatchResultSchema,
3793
5271
  QuoteDocumentSchema,
3794
5272
  QuoteSectionTypeSchema,
3795
5273
  RATING_BASIS_TYPES,
@@ -3799,12 +5277,16 @@ export {
3799
5277
  RatingBasisSchema,
3800
5278
  RatingBasisTypeSchema,
3801
5279
  RecreationalVehicleDeclarationsSchema,
5280
+ ReplyIntentSchema,
5281
+ RetrievalResultSchema,
3802
5282
  RoofTypeSchema,
3803
5283
  SCHEDULED_ITEM_CATEGORIES,
3804
5284
  SUBJECTIVITY_CATEGORIES,
3805
5285
  ScheduledItemCategorySchema,
3806
5286
  SectionSchema,
3807
5287
  SharedLimitSchema,
5288
+ SubAnswerSchema,
5289
+ SubQuestionSchema,
3808
5290
  SubjectivityCategorySchema,
3809
5291
  SubjectivitySchema,
3810
5292
  SublimitSchema,
@@ -3821,6 +5303,7 @@ export {
3821
5303
  ValuationMethodSchema,
3822
5304
  VehicleCoverageSchema,
3823
5305
  VehicleCoverageTypeSchema,
5306
+ VerifyResultSchema,
3824
5307
  WatercraftDeclarationsSchema,
3825
5308
  WorkersCompDeclarationsSchema,
3826
5309
  buildAcroFormMappingPrompt,
@@ -3840,12 +5323,18 @@ export {
3840
5323
  buildIdentityPrompt,
3841
5324
  buildIntentPrompt,
3842
5325
  buildLookupFillPrompt,
5326
+ buildQueryClassifyPrompt,
3843
5327
  buildQuestionBatchPrompt,
3844
5328
  buildQuotesPoliciesPrompt,
5329
+ buildReasonPrompt,
3845
5330
  buildReplyIntentClassificationPrompt,
5331
+ buildRespondPrompt,
3846
5332
  buildSafetyPrompt,
5333
+ buildVerifyPrompt,
3847
5334
  chunkDocument,
5335
+ createApplicationPipeline,
3848
5336
  createExtractor,
5337
+ createQueryAgent,
3849
5338
  extractPageRange,
3850
5339
  fillAcroForm,
3851
5340
  getAcroFormFields,