@claritylabs/cl-sdk 0.12.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1220,6 +1220,12 @@ var PremiumLineSchema = z16.object({
1220
1220
  line: z16.string(),
1221
1221
  amount: z16.string()
1222
1222
  });
1223
+ var AuxiliaryFactSchema = z16.object({
1224
+ key: z16.string(),
1225
+ value: z16.string(),
1226
+ subject: z16.string().optional(),
1227
+ context: z16.string().optional()
1228
+ });
1223
1229
  var BaseDocumentFields = {
1224
1230
  id: z16.string(),
1225
1231
  carrier: z16.string(),
@@ -1285,7 +1291,8 @@ var BaseDocumentFields = {
1285
1291
  individualClaims: z16.array(ClaimRecordSchema).optional(),
1286
1292
  experienceMod: ExperienceModSchema.optional(),
1287
1293
  cancellationNoticeDays: z16.number().optional(),
1288
- nonrenewalNoticeDays: z16.number().optional()
1294
+ nonrenewalNoticeDays: z16.number().optional(),
1295
+ supplementaryFacts: z16.array(AuxiliaryFactSchema).optional()
1289
1296
  };
1290
1297
  var PolicyDocumentSchema = z16.object({
1291
1298
  ...BaseDocumentFields,
@@ -1956,6 +1963,7 @@ function assembleDocument(documentId, documentType, memory) {
1956
1963
  ...sanitizeNulls(coverages ?? {}),
1957
1964
  ...sanitizeNulls(premium ?? {}),
1958
1965
  ...sanitizeNulls(supplementary ?? {}),
1966
+ supplementaryFacts: supplementary?.auxiliaryFacts,
1959
1967
  endorsements: endorsements?.endorsements,
1960
1968
  exclusions: exclusions?.exclusions,
1961
1969
  conditions: conditions?.conditions,
@@ -2308,6 +2316,43 @@ Total Cost: ${doc.totalCost}` : ""}`,
2308
2316
  metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
2309
2317
  });
2310
2318
  }
2319
+ const supplementaryLines = [
2320
+ ...doc.claimsContacts?.map((contact) => `Claims Contact: ${[
2321
+ contact.name,
2322
+ contact.phone,
2323
+ contact.email,
2324
+ contact.hours
2325
+ ].filter(Boolean).join(" | ")}`) ?? [],
2326
+ ...doc.regulatoryContacts?.map((contact) => `Regulatory Contact: ${[
2327
+ contact.name,
2328
+ contact.phone,
2329
+ contact.email
2330
+ ].filter(Boolean).join(" | ")}`) ?? [],
2331
+ ...doc.thirdPartyAdministrators?.map((contact) => `TPA: ${[
2332
+ contact.name,
2333
+ contact.phone,
2334
+ contact.email
2335
+ ].filter(Boolean).join(" | ")}`) ?? [],
2336
+ ...doc.supplementaryFacts?.map((fact) => [
2337
+ fact.subject ? `Subject: ${fact.subject}` : null,
2338
+ `${fact.key}: ${fact.value}`,
2339
+ fact.context ? `Context: ${fact.context}` : null
2340
+ ].filter(Boolean).join(" | ")) ?? [],
2341
+ doc.cancellationNoticeDays != null ? `Cancellation Notice Days: ${doc.cancellationNoticeDays}` : null,
2342
+ doc.nonrenewalNoticeDays != null ? `Nonrenewal Notice Days: ${doc.nonrenewalNoticeDays}` : null
2343
+ ].filter((line) => Boolean(line));
2344
+ if (supplementaryLines.length > 0) {
2345
+ chunks.push({
2346
+ id: `${docId}:supplementary:0`,
2347
+ documentId: docId,
2348
+ type: "supplementary",
2349
+ text: supplementaryLines.join("\n"),
2350
+ metadata: stringMetadata({
2351
+ documentType: doc.type,
2352
+ supplementaryFactCount: doc.supplementaryFacts?.length
2353
+ })
2354
+ });
2355
+ }
2311
2356
  return chunks;
2312
2357
  }
2313
2358
 
@@ -2391,6 +2436,32 @@ function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
2391
2436
  merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
2392
2437
  return merged;
2393
2438
  }
2439
+ function mergeSupplementary(existing, incoming) {
2440
+ const merged = mergeShallowPreferPresent(existing, incoming);
2441
+ const mergeContactArray = (arrayKey) => {
2442
+ const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
2443
+ const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
2444
+ merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) => [
2445
+ String(item.name ?? "").toLowerCase(),
2446
+ String(item.phone ?? "").toLowerCase(),
2447
+ String(item.email ?? "").toLowerCase(),
2448
+ String(item.address ?? "").toLowerCase(),
2449
+ String(item.type ?? "").toLowerCase()
2450
+ ].join("|"));
2451
+ };
2452
+ mergeContactArray("regulatoryContacts");
2453
+ mergeContactArray("claimsContacts");
2454
+ mergeContactArray("thirdPartyAdministrators");
2455
+ const existingFacts = Array.isArray(existing.auxiliaryFacts) ? existing.auxiliaryFacts : [];
2456
+ const incomingFacts = Array.isArray(incoming.auxiliaryFacts) ? incoming.auxiliaryFacts : [];
2457
+ merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) => [
2458
+ String(item.key ?? "").toLowerCase(),
2459
+ String(item.value ?? "").toLowerCase(),
2460
+ String(item.subject ?? "").toLowerCase(),
2461
+ String(item.context ?? "").toLowerCase()
2462
+ ].join("|"));
2463
+ return merged;
2464
+ }
2394
2465
  function mergeExtractorResult(extractorName, existing, incoming) {
2395
2466
  if (!existing) return incoming;
2396
2467
  if (!incoming) return existing;
@@ -2401,9 +2472,10 @@ function mergeExtractorResult(extractorName, existing, incoming) {
2401
2472
  case "carrier_info":
2402
2473
  case "named_insured":
2403
2474
  case "loss_history":
2404
- case "supplementary":
2405
2475
  case "premium_breakdown":
2406
2476
  return mergeShallowPreferPresent(current, next);
2477
+ case "supplementary":
2478
+ return mergeSupplementary(current, next);
2407
2479
  case "coverage_limits":
2408
2480
  return mergeCoverageLimits(current, next);
2409
2481
  case "declarations":
@@ -4000,16 +4072,29 @@ var ContactSchema2 = z33.object({
4000
4072
  address: z33.string().optional().describe("Mailing address"),
4001
4073
  type: z33.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
4002
4074
  });
4075
+ var AuxiliaryFactSchema2 = z33.object({
4076
+ key: z33.string().describe("Normalized machine-readable fact key, e.g. 'policyholder_age' or 'insured_name'"),
4077
+ value: z33.string().describe("Concrete extracted fact value"),
4078
+ subject: z33.string().optional().describe("Person, entity, vehicle, property, or schedule item this fact belongs to"),
4079
+ context: z33.string().optional().describe("Short disambiguating context, such as 'Driver Schedule' or 'Named Insured'")
4080
+ });
4003
4081
  var SupplementarySchema = z33.object({
4004
4082
  regulatoryContacts: z33.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
4005
4083
  claimsContacts: z33.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
4006
4084
  thirdPartyAdministrators: z33.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
4007
4085
  cancellationNoticeDays: z33.number().optional().describe("Required notice period for cancellation in days"),
4008
- nonrenewalNoticeDays: z33.number().optional().describe("Required notice period for nonrenewal in days")
4086
+ nonrenewalNoticeDays: z33.number().optional().describe("Required notice period for nonrenewal in days"),
4087
+ auxiliaryFacts: z33.array(AuxiliaryFactSchema2).optional().describe("Additional retrieval-only facts that do not fit the strict primary schema")
4009
4088
  });
4010
- function buildSupplementaryPrompt() {
4011
- return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
4089
+ function buildSupplementaryPrompt(alreadyExtractedSummary) {
4090
+ const exclusionBlock = alreadyExtractedSummary ? `
4012
4091
 
4092
+ IMPORTANT \u2014 The following facts have ALREADY been captured by prior extraction passes. Do NOT re-extract any of these. Your job is to find ADDITIONAL information that is missing from this list:
4093
+
4094
+ ${alreadyExtractedSummary}
4095
+ ` : "";
4096
+ return `You are an expert insurance document analyst. Extract supplementary, retrieval-only information from this document that is NOT already captured in the structured extraction results.
4097
+ ${exclusionBlock}
4013
4098
  Focus on:
4014
4099
  - Regulatory contacts: state department of insurance, regulatory bodies, ombudsman offices \u2014 with phone, email, address
4015
4100
  - Claims contacts: how to report claims, claims department contact info, hours of operation
@@ -4018,9 +4103,21 @@ Focus on:
4018
4103
  - Nonrenewal notice period in days
4019
4104
  - Complaint filing procedures and contacts
4020
4105
  - Governing law or jurisdiction provisions
4106
+ - Additional policy-specific facts that are useful for memory and retrieval even if they do not belong in the strict primary schema
4021
4107
 
4022
4108
  Look for regulatory notices, complaint contact sections, claims reporting instructions, and cancellation/nonrenewal provisions throughout the document.
4023
4109
 
4110
+ For auxiliaryFacts:
4111
+ - ONLY capture facts that are NOT already present in the structured extraction results above.
4112
+ - Do not duplicate information that has already been extracted \u2014 no policy numbers, insured names, addresses, coverage limits, deductibles, or any other field that appears in the already-extracted data.
4113
+ - Capture concrete, policy-specific facts as structured key/value pairs.
4114
+ - Prioritize facts that agents may need later but that are often omitted from strict schemas: policyholder names, insured person names, driver names, ages, dates of birth, marital status, garaging information, lienholders, household members, vehicle assignments, schedule row details, and other discrete identifiers \u2014 but ONLY if they are not already in the extracted data.
4115
+ - Use short normalized keys like "policyholder_name", "policyholder_age", "insured_name", "driver_age", "driver_date_of_birth", "garaging_zip", "vehicle_principal_driver".
4116
+ - Use subject when the fact belongs to a specific person, vehicle, property, or scheduled item.
4117
+ - Do not invent facts.
4118
+ - Do not include vague boilerplate or generic form language.
4119
+ - Do not repeat large narrative excerpts; keep facts atomic.
4120
+
4024
4121
  Return JSON only.`;
4025
4122
  }
4026
4123
 
@@ -4408,6 +4505,38 @@ function createExtractor(config) {
4408
4505
  sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
4409
4506
  }, null, 2);
4410
4507
  }
4508
+ function buildAlreadyExtractedSummary(memory) {
4509
+ const lines = [];
4510
+ const declarationResult = memory.get("declarations");
4511
+ if (Array.isArray(declarationResult?.fields)) {
4512
+ for (const field of declarationResult.fields) {
4513
+ if (field.key && field.value) {
4514
+ const subject = field.subject ? ` [${field.subject}]` : "";
4515
+ lines.push(`- ${field.key}${subject}: ${field.value}`);
4516
+ }
4517
+ }
4518
+ }
4519
+ const coverageResult = memory.get("coverage_limits");
4520
+ if (Array.isArray(coverageResult?.coverages)) {
4521
+ for (const cov of coverageResult.coverages) {
4522
+ const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
4523
+ if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
4524
+ }
4525
+ }
4526
+ const namedInsured = memory.get("named_insured");
4527
+ if (namedInsured) {
4528
+ for (const [key, value] of Object.entries(namedInsured)) {
4529
+ if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
4530
+ }
4531
+ }
4532
+ const carrierInfo = memory.get("carrier_info");
4533
+ if (carrierInfo) {
4534
+ for (const [key, value] of Object.entries(carrierInfo)) {
4535
+ if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
4536
+ }
4537
+ }
4538
+ return lines.length > 0 ? lines.join("\n") : "";
4539
+ }
4411
4540
  function formatPageMapSummary(pageAssignments) {
4412
4541
  const extractorPages = /* @__PURE__ */ new Map();
4413
4542
  for (const assignment of pageAssignments) {
@@ -4747,6 +4876,28 @@ function createExtractor(config) {
4747
4876
  mergeMemoryResult(result.name, result.data, memory);
4748
4877
  }
4749
4878
  }
4879
+ {
4880
+ onProgress?.("Extracting supplementary retrieval facts...");
4881
+ try {
4882
+ const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
4883
+ const supplementaryResult = await runExtractor({
4884
+ name: "supplementary",
4885
+ prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
4886
+ schema: SupplementarySchema,
4887
+ pdfBase64,
4888
+ startPage: 1,
4889
+ endPage: pageCount,
4890
+ generateObject,
4891
+ convertPdfToImages,
4892
+ maxTokens: 4096,
4893
+ providerOptions
4894
+ });
4895
+ trackUsage(supplementaryResult.usage);
4896
+ mergeMemoryResult(supplementaryResult.name, supplementaryResult.data, memory);
4897
+ } catch (error) {
4898
+ await log?.(`Supplementary extractor failed: ${error}`);
4899
+ }
4900
+ }
4750
4901
  await pipelineCtx.save("extract", {
4751
4902
  id,
4752
4903
  pageCount,
@@ -7462,6 +7613,7 @@ export {
7462
7613
  AuditTypeSchema,
7463
7614
  AutoFillMatchSchema,
7464
7615
  AutoFillResultSchema,
7616
+ AuxiliaryFactSchema,
7465
7617
  BOAT_TYPES,
7466
7618
  BindingAuthoritySchema,
7467
7619
  BoatTypeSchema,