@claritylabs/cl-sdk 1.1.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @claritylabs/cl-sdk might be problematic. Click here for more details.
- package/dist/index.d.mts +454 -55
- package/dist/index.d.ts +454 -55
- package/dist/index.js +448 -119
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +452 -127
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -28462,6 +28462,63 @@ var require_dist4 = __commonJS({
|
|
|
28462
28462
|
}
|
|
28463
28463
|
});
|
|
28464
28464
|
|
|
28465
|
+
// src/types/enums.ts
|
|
28466
|
+
var POLICY_TYPES = [
|
|
28467
|
+
"general_liability",
|
|
28468
|
+
"commercial_property",
|
|
28469
|
+
"commercial_auto",
|
|
28470
|
+
"non_owned_auto",
|
|
28471
|
+
"workers_comp",
|
|
28472
|
+
"umbrella",
|
|
28473
|
+
"excess_liability",
|
|
28474
|
+
"professional_liability",
|
|
28475
|
+
"cyber",
|
|
28476
|
+
"epli",
|
|
28477
|
+
"directors_officers",
|
|
28478
|
+
"fiduciary_liability",
|
|
28479
|
+
"crime_fidelity",
|
|
28480
|
+
"inland_marine",
|
|
28481
|
+
"builders_risk",
|
|
28482
|
+
"environmental",
|
|
28483
|
+
"ocean_marine",
|
|
28484
|
+
"surety",
|
|
28485
|
+
"product_liability",
|
|
28486
|
+
"bop",
|
|
28487
|
+
"management_liability_package",
|
|
28488
|
+
"property",
|
|
28489
|
+
"other"
|
|
28490
|
+
];
|
|
28491
|
+
|
|
28492
|
+
// src/types/context-keys.ts
|
|
28493
|
+
var CONTEXT_KEY_MAP = [
|
|
28494
|
+
{ extractedField: "insuredName", category: "company_info", contextKey: "company_name", description: "Primary named insured" },
|
|
28495
|
+
{ extractedField: "insuredDba", category: "company_info", contextKey: "dba_name", description: "Doing-business-as name" },
|
|
28496
|
+
{ extractedField: "insuredAddress", category: "company_info", contextKey: "company_address", description: "Primary insured mailing address" },
|
|
28497
|
+
{ extractedField: "insuredEntityType", category: "company_info", contextKey: "entity_type", description: "Legal entity type" },
|
|
28498
|
+
{ extractedField: "insuredFein", category: "company_info", contextKey: "fein", description: "Federal Employer ID Number" },
|
|
28499
|
+
{ extractedField: "insuredSicCode", category: "company_info", contextKey: "sic_code", description: "SIC classification code" },
|
|
28500
|
+
{ extractedField: "insuredNaicsCode", category: "company_info", contextKey: "naics_code", description: "NAICS classification code" },
|
|
28501
|
+
{ extractedField: "classifications[].description", category: "operations", contextKey: "description_of_operations", description: "Description of business operations" },
|
|
28502
|
+
{ extractedField: "classifications[].basisAmount(payroll)", category: "operations", contextKey: "annual_payroll", description: "Annual payroll from classification schedule" },
|
|
28503
|
+
{ extractedField: "classifications[].basisAmount(revenue)", category: "operations", contextKey: "annual_revenue", description: "Annual revenue from classification schedule" },
|
|
28504
|
+
{ extractedField: "totalPremium", category: "financial", contextKey: "current_premium", description: "Total policy premium" },
|
|
28505
|
+
{ extractedField: "locations[].buildingValue", category: "financial", contextKey: "total_property_values", description: "Sum of building values" },
|
|
28506
|
+
{ extractedField: "locations[].contentsValue", category: "financial", contextKey: "total_contents_values", description: "Sum of contents values" },
|
|
28507
|
+
{ extractedField: "policyTypes", category: "coverage", contextKey: "coverage_types", description: "Lines of business covered" },
|
|
28508
|
+
{ extractedField: "coverages[].limit", category: "coverage", contextKey: "current_limits", description: "Current coverage limits" },
|
|
28509
|
+
{ extractedField: "coverages[].deductible", category: "coverage", contextKey: "current_deductibles", description: "Current deductibles" },
|
|
28510
|
+
{ extractedField: "experienceMod.factor", category: "loss_history", contextKey: "experience_mod", description: "Workers comp experience modification factor" },
|
|
28511
|
+
{ extractedField: "lossSummary.totalClaims", category: "loss_history", contextKey: "total_claims", description: "Total claim count from loss runs" },
|
|
28512
|
+
{ extractedField: "locations[]", category: "premises", contextKey: "premises_addresses", description: "All insured location addresses" },
|
|
28513
|
+
{ extractedField: "locations[].constructionType", category: "premises", contextKey: "construction_type", description: "Building construction type" },
|
|
28514
|
+
{ extractedField: "locations[].yearBuilt", category: "premises", contextKey: "year_built", description: "Year built for primary location" },
|
|
28515
|
+
{ extractedField: "locations[].sprinklered", category: "premises", contextKey: "sprinkler_system", description: "Sprinkler system presence" },
|
|
28516
|
+
{ extractedField: "vehicles[]", category: "vehicles", contextKey: "vehicle_schedule", description: "Complete vehicle schedule" },
|
|
28517
|
+
{ extractedField: "vehicles[].length", category: "vehicles", contextKey: "vehicle_count", description: "Number of insured vehicles" },
|
|
28518
|
+
{ extractedField: "classifications[](WC)", category: "employees", contextKey: "employee_count_by_class", description: "Employee count by WC classification" },
|
|
28519
|
+
{ extractedField: "classifications[].basisAmount(payroll,byState)", category: "employees", contextKey: "annual_payroll_by_state", description: "Annual payroll by state" }
|
|
28520
|
+
];
|
|
28521
|
+
|
|
28465
28522
|
// src/types/platform.ts
|
|
28466
28523
|
var PLATFORM_CONFIGS = {
|
|
28467
28524
|
email: {
|
|
@@ -28537,7 +28594,7 @@ Respond with JSON only. The JSON must follow this exact structure:
|
|
|
28537
28594
|
"broker": "insurance broker name if identifiable, or null",
|
|
28538
28595
|
"policyNumber": "policy or quote reference number",
|
|
28539
28596
|
"documentType": "policy" or "quote",
|
|
28540
|
-
"policyTypes": ["general_liability", "
|
|
28597
|
+
"policyTypes": ["general_liability", "commercial_property", "commercial_auto", "non_owned_auto", "workers_comp", "umbrella", "excess_liability", "professional_liability", "cyber", "epli", "directors_officers", "fiduciary_liability", "crime_fidelity", "inland_marine", "builders_risk", "environmental", "ocean_marine", "surety", "product_liability", "bop", "management_liability_package", "property", "other"],
|
|
28541
28598
|
"policyYear": number,
|
|
28542
28599
|
"effectiveDate": "MM/DD/YYYY",
|
|
28543
28600
|
"expirationDate": "MM/DD/YYYY",
|
|
@@ -28623,26 +28680,42 @@ CLASSIFICATION SIGNALS:
|
|
|
28623
28680
|
- QUOTE signals: "quote", "proposal", "indication" wording, subjectivities, "subject to" conditions, quote expiration date, "proposed premium", "terms and conditions may vary"
|
|
28624
28681
|
|
|
28625
28682
|
If uncertain, lean toward "policy" for documents with declarations pages and binding language, "quote" for everything else.`;
|
|
28626
|
-
var METADATA_PROMPT = `You are an expert insurance document analyst. Extract
|
|
28683
|
+
var METADATA_PROMPT = `You are an expert insurance document analyst. Extract the high-level metadata AND structured declarations data from this insurance document. Do NOT extract full section content \u2014 that will be done in a separate pass.
|
|
28627
28684
|
|
|
28628
28685
|
Respond with JSON only:
|
|
28629
28686
|
|
|
28630
28687
|
{
|
|
28631
28688
|
"metadata": {
|
|
28632
28689
|
"carrier": "primary insurance company name",
|
|
28690
|
+
"carrierLegalName": "legal entity name of insurer, or null",
|
|
28691
|
+
"carrierNaicNumber": "NAIC company code, or null",
|
|
28692
|
+
"carrierAmBestRating": "AM Best rating (e.g. 'A+ XV'), or null",
|
|
28693
|
+
"carrierAdmittedStatus": "admitted" or "non_admitted" or "surplus_lines" or null,
|
|
28633
28694
|
"security": "insurer or underwriter entity providing coverage, or null",
|
|
28634
28695
|
"underwriter": "named individual underwriter, or null",
|
|
28635
28696
|
"mga": "MGA or Program Administrator, or null",
|
|
28636
|
-
"broker": "insurance broker, or null",
|
|
28697
|
+
"broker": "insurance broker agency name, or null",
|
|
28698
|
+
"brokerContactName": "individual producer name, or null",
|
|
28699
|
+
"brokerLicenseNumber": "producer license number, or null",
|
|
28637
28700
|
"policyNumber": "policy number",
|
|
28701
|
+
"priorPolicyNumber": "previous policy number if renewal, or null",
|
|
28638
28702
|
"documentType": "policy" or "quote",
|
|
28639
|
-
"policyTypes": ["general_liability",
|
|
28703
|
+
"policyTypes": ["general_liability", "commercial_property", "commercial_auto", "non_owned_auto", "workers_comp", "umbrella", "excess_liability", "professional_liability", "cyber", "epli", "directors_officers", "fiduciary_liability", "crime_fidelity", "inland_marine", "builders_risk", "environmental", "ocean_marine", "surety", "product_liability", "bop", "management_liability_package", "property", "other"],
|
|
28704
|
+
"coverageForm": "occurrence" or "claims_made" or "accident" or null,
|
|
28640
28705
|
"policyYear": number,
|
|
28641
28706
|
"effectiveDate": "MM/DD/YYYY",
|
|
28642
28707
|
"expirationDate": "MM/DD/YYYY",
|
|
28708
|
+
"effectiveTime": "e.g. 12:01 AM, or null",
|
|
28709
|
+
"retroactiveDate": "MM/DD/YYYY for claims-made policies, or null",
|
|
28643
28710
|
"isRenewal": boolean,
|
|
28711
|
+
"isPackage": boolean,
|
|
28712
|
+
"programName": "named program, or null",
|
|
28644
28713
|
"premium": "$X,XXX",
|
|
28645
|
-
"insuredName": "name of insured
|
|
28714
|
+
"insuredName": "name of primary named insured",
|
|
28715
|
+
"insuredDba": "doing-business-as name, or null",
|
|
28716
|
+
"insuredAddress": { "street1": "", "city": "", "state": "", "zip": "" } or null,
|
|
28717
|
+
"insuredEntityType": "corporation" or "llc" or "partnership" or "sole_proprietor" or "joint_venture" or "trust" or "nonprofit" or "municipality" or "other" or null,
|
|
28718
|
+
"insuredFein": "FEIN, or null",
|
|
28646
28719
|
"summary": "1-2 sentence summary"
|
|
28647
28720
|
},
|
|
28648
28721
|
"metadataSource": {
|
|
@@ -28651,34 +28724,94 @@ Respond with JSON only:
|
|
|
28651
28724
|
"premiumPage": number or null,
|
|
28652
28725
|
"effectiveDatePage": number or null
|
|
28653
28726
|
},
|
|
28727
|
+
"additionalNamedInsureds": [
|
|
28728
|
+
{ "name": "insured name", "relationship": "subsidiary, affiliate, etc., or null" }
|
|
28729
|
+
],
|
|
28654
28730
|
"coverages": [
|
|
28655
28731
|
{ "name": "coverage name", "limit": "$X,XXX,XXX", "deductible": "$X,XXX or null", "pageNumber": number, "sectionRef": "section ref or null" }
|
|
28656
28732
|
],
|
|
28733
|
+
"limits": {
|
|
28734
|
+
"perOccurrence": "$X,XXX,XXX or null",
|
|
28735
|
+
"generalAggregate": "$X,XXX,XXX or null",
|
|
28736
|
+
"productsCompletedOpsAggregate": "or null",
|
|
28737
|
+
"personalAdvertisingInjury": "or null",
|
|
28738
|
+
"fireDamage": "or null",
|
|
28739
|
+
"medicalExpense": "or null",
|
|
28740
|
+
"combinedSingleLimit": "or null",
|
|
28741
|
+
"bodilyInjuryPerPerson": "or null",
|
|
28742
|
+
"bodilyInjuryPerAccident": "or null",
|
|
28743
|
+
"propertyDamage": "or null",
|
|
28744
|
+
"eachOccurrenceUmbrella": "or null",
|
|
28745
|
+
"umbrellaAggregate": "or null",
|
|
28746
|
+
"umbrellaRetention": "or null",
|
|
28747
|
+
"statutory": boolean or null,
|
|
28748
|
+
"employersLiability": { "eachAccident": "", "diseasePolicyLimit": "", "diseaseEachEmployee": "" } or null,
|
|
28749
|
+
"defenseCostTreatment": "inside_limits" or "outside_limits" or "supplementary" or null
|
|
28750
|
+
},
|
|
28751
|
+
"deductibles": {
|
|
28752
|
+
"perClaim": "or null",
|
|
28753
|
+
"perOccurrence": "or null",
|
|
28754
|
+
"selfInsuredRetention": "or null",
|
|
28755
|
+
"waitingPeriod": "or null"
|
|
28756
|
+
},
|
|
28757
|
+
"locations": [
|
|
28758
|
+
{ "number": 1, "address": { "street1": "", "city": "", "state": "", "zip": "" }, "description": "or null", "buildingValue": "or null", "contentsValue": "or null" }
|
|
28759
|
+
],
|
|
28760
|
+
"vehicles": [
|
|
28761
|
+
{ "number": 1, "year": 2024, "make": "", "model": "", "vin": "", "vehicleType": "or null" }
|
|
28762
|
+
],
|
|
28763
|
+
"classifications": [
|
|
28764
|
+
{ "code": "12345", "description": "class description", "premiumBasis": "payroll or revenue or area", "basisAmount": "or null", "rate": "or null", "premium": "or null" }
|
|
28765
|
+
],
|
|
28766
|
+
"formInventory": [
|
|
28767
|
+
{ "formNumber": "CG 00 01", "editionDate": "04 13", "title": "or null", "formType": "coverage or endorsement or declarations or application or notice or other" }
|
|
28768
|
+
],
|
|
28769
|
+
"taxesAndFees": [
|
|
28770
|
+
{ "name": "fee name", "amount": "$X,XXX", "type": "tax or fee or surcharge or assessment or null" }
|
|
28771
|
+
],
|
|
28657
28772
|
"totalPages": number,
|
|
28658
28773
|
"tableOfContents": [
|
|
28659
28774
|
{ "title": "section title", "pageStart": number, "pageEnd": number }
|
|
28660
28775
|
]
|
|
28661
|
-
}
|
|
28662
|
-
|
|
28776
|
+
}
|
|
28777
|
+
|
|
28778
|
+
IMPORTANT:
|
|
28779
|
+
- policyTypes should include ALL coverage types found in the document
|
|
28780
|
+
- coverageForm is the primary trigger type: "occurrence" for occurrence-based, "claims_made" for claims-made, "accident" for auto/WC
|
|
28781
|
+
- isPackage is true if this is a Commercial Package Policy (CPP) with multiple coverage parts
|
|
28782
|
+
- Extract locations ONLY if a location/premises schedule is visible on the declarations
|
|
28783
|
+
- Extract vehicles ONLY if a vehicle schedule is visible
|
|
28784
|
+
- Extract classifications ONLY if a classification/rating schedule is visible
|
|
28785
|
+
- formInventory: list ALL form numbers found in any forms schedule or endorsement schedule
|
|
28786
|
+
- For limits, extract the standard limit fields that appear on the declarations page
|
|
28787
|
+
- For deductibles, extract from the declarations or deductible schedule`;
|
|
28788
|
+
var QUOTE_METADATA_PROMPT = `You are an expert insurance document analyst. Extract the high-level metadata AND structured data from this insurance QUOTE or PROPOSAL. Do NOT extract full section content \u2014 that will be done in a separate pass.
|
|
28663
28789
|
|
|
28664
28790
|
Respond with JSON only:
|
|
28665
28791
|
|
|
28666
28792
|
{
|
|
28667
28793
|
"metadata": {
|
|
28668
28794
|
"carrier": "primary insurance company name",
|
|
28669
|
-
"
|
|
28795
|
+
"carrierLegalName": "legal entity name, or null",
|
|
28796
|
+
"carrierNaicNumber": "NAIC code, or null",
|
|
28797
|
+
"carrierAdmittedStatus": "admitted or non_admitted or surplus_lines, or null",
|
|
28798
|
+
"security": "insurer or underwriter entity, or null",
|
|
28670
28799
|
"underwriter": "named individual underwriter, or null",
|
|
28671
28800
|
"mga": "MGA or Program Administrator, or null",
|
|
28672
28801
|
"broker": "insurance broker, or null",
|
|
28802
|
+
"brokerContactName": "individual producer, or null",
|
|
28673
28803
|
"quoteNumber": "quote or proposal reference number",
|
|
28674
|
-
"policyTypes": ["general_liability",
|
|
28804
|
+
"policyTypes": ["general_liability", "commercial_property", "commercial_auto", "non_owned_auto", "workers_comp", "umbrella", "excess_liability", "professional_liability", "cyber", "epli", "directors_officers", "fiduciary_liability", "crime_fidelity", "inland_marine", "builders_risk", "environmental", "ocean_marine", "surety", "product_liability", "bop", "management_liability_package", "property", "other"],
|
|
28805
|
+
"coverageForm": "occurrence or claims_made or accident, or null",
|
|
28675
28806
|
"quoteYear": number,
|
|
28676
28807
|
"proposedEffectiveDate": "MM/DD/YYYY or null",
|
|
28677
28808
|
"proposedExpirationDate": "MM/DD/YYYY or null",
|
|
28678
28809
|
"quoteExpirationDate": "MM/DD/YYYY \u2014 when this quote offer expires, or null",
|
|
28810
|
+
"retroactiveDate": "MM/DD/YYYY for claims-made, or null",
|
|
28679
28811
|
"isRenewal": boolean,
|
|
28680
28812
|
"premium": "$X,XXX \u2014 total proposed premium",
|
|
28681
28813
|
"insuredName": "name of insured party",
|
|
28814
|
+
"insuredAddress": { "street1": "", "city": "", "state": "", "zip": "" } or null,
|
|
28682
28815
|
"summary": "1-2 sentence summary of the quote"
|
|
28683
28816
|
},
|
|
28684
28817
|
"metadataSource": {
|
|
@@ -28688,16 +28821,31 @@ Respond with JSON only:
|
|
|
28688
28821
|
"effectiveDatePage": number or null
|
|
28689
28822
|
},
|
|
28690
28823
|
"coverages": [
|
|
28691
|
-
{ "name": "coverage name", "proposedLimit": "$X,XXX,XXX", "proposedDeductible": "$X,XXX or null", "pageNumber": number, "sectionRef": "
|
|
28824
|
+
{ "name": "coverage name", "proposedLimit": "$X,XXX,XXX", "proposedDeductible": "$X,XXX or null", "pageNumber": number, "sectionRef": "or null" }
|
|
28692
28825
|
],
|
|
28826
|
+
"limits": {
|
|
28827
|
+
"perOccurrence": "or null",
|
|
28828
|
+
"generalAggregate": "or null",
|
|
28829
|
+
"defenseCostTreatment": "inside_limits or outside_limits or supplementary, or null"
|
|
28830
|
+
},
|
|
28831
|
+
"deductibles": {
|
|
28832
|
+
"perClaim": "or null",
|
|
28833
|
+
"perOccurrence": "or null",
|
|
28834
|
+
"selfInsuredRetention": "or null",
|
|
28835
|
+
"waitingPeriod": "or null"
|
|
28836
|
+
},
|
|
28693
28837
|
"premiumBreakdown": [
|
|
28694
28838
|
{ "line": "coverage line name", "amount": "$X,XXX" }
|
|
28695
28839
|
],
|
|
28696
28840
|
"subjectivities": [
|
|
28697
|
-
{ "description": "subjectivity description", "category": "pre_binding
|
|
28841
|
+
{ "description": "subjectivity description", "category": "pre_binding or post_binding or information, or null", "dueDate": "or null", "pageNumber": number or null }
|
|
28698
28842
|
],
|
|
28699
28843
|
"underwritingConditions": [
|
|
28700
|
-
{ "description": "condition description", "pageNumber": number or null }
|
|
28844
|
+
{ "description": "condition description", "category": "or null", "pageNumber": number or null }
|
|
28845
|
+
],
|
|
28846
|
+
"warrantyRequirements": ["warranty text"],
|
|
28847
|
+
"taxesAndFees": [
|
|
28848
|
+
{ "name": "fee name", "amount": "$X,XXX", "type": "tax or fee or surcharge, or null" }
|
|
28701
28849
|
],
|
|
28702
28850
|
"totalPages": number,
|
|
28703
28851
|
"tableOfContents": [
|
|
@@ -28707,8 +28855,10 @@ Respond with JSON only:
|
|
|
28707
28855
|
|
|
28708
28856
|
IMPORTANT:
|
|
28709
28857
|
- quoteExpirationDate is when the quote offer itself expires (not the proposed policy period)
|
|
28710
|
-
- subjectivities are conditions that must be met before or after binding
|
|
28711
|
-
- premiumBreakdown should list each coverage line's individual premium if available
|
|
28858
|
+
- subjectivities are conditions that must be met before or after binding
|
|
28859
|
+
- premiumBreakdown should list each coverage line's individual premium if available
|
|
28860
|
+
- warrantyRequirements: extract any warranty provisions required for coverage
|
|
28861
|
+
- For limits and deductibles, extract the proposed structure from the quote`;
|
|
28712
28862
|
function buildSectionsPrompt(pageStart, pageEnd) {
|
|
28713
28863
|
return `You are an expert insurance document analyst. Extract ALL sections, clauses, endorsements, and schedules found on pages ${pageStart} through ${pageEnd} of this document. Preserve the original language verbatim.
|
|
28714
28864
|
|
|
@@ -28729,6 +28879,35 @@ Respond with JSON only:
|
|
|
28729
28879
|
]
|
|
28730
28880
|
}
|
|
28731
28881
|
],
|
|
28882
|
+
"endorsements": [
|
|
28883
|
+
{
|
|
28884
|
+
"formNumber": "e.g. CG 21 47",
|
|
28885
|
+
"editionDate": "e.g. 12 07, or null",
|
|
28886
|
+
"title": "endorsement title",
|
|
28887
|
+
"coverageType": "policyTypes value if coverage-specific, or null",
|
|
28888
|
+
"pageStart": number,
|
|
28889
|
+
"effectType": "broadening or restrictive or informational or null",
|
|
28890
|
+
"additionalPremium": "$X,XXX or null",
|
|
28891
|
+
"content": "full verbatim text of the endorsement"
|
|
28892
|
+
}
|
|
28893
|
+
],
|
|
28894
|
+
"exclusions": [
|
|
28895
|
+
{
|
|
28896
|
+
"title": "exclusion title or short description",
|
|
28897
|
+
"formNumber": "form number if part of a named endorsement, or null",
|
|
28898
|
+
"coverageType": "policyTypes value if coverage-specific, or null",
|
|
28899
|
+
"pageNumber": number,
|
|
28900
|
+
"content": "full verbatim exclusion text"
|
|
28901
|
+
}
|
|
28902
|
+
],
|
|
28903
|
+
"conditions": [
|
|
28904
|
+
{
|
|
28905
|
+
"title": "condition title",
|
|
28906
|
+
"coverageType": "policyTypes value if coverage-specific, or null",
|
|
28907
|
+
"pageNumber": number,
|
|
28908
|
+
"content": "full verbatim condition text"
|
|
28909
|
+
}
|
|
28910
|
+
],
|
|
28732
28911
|
"regulatoryContext": { "content": "verbatim text", "pageNumber": number } or null,
|
|
28733
28912
|
"complaintContact": { "content": "verbatim text", "pageNumber": number } or null,
|
|
28734
28913
|
"costsAndFees": { "content": "verbatim text", "pageNumber": number } or null,
|
|
@@ -28743,6 +28922,19 @@ SECTION TYPE GUIDANCE:
|
|
|
28743
28922
|
- "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
|
|
28744
28923
|
- Other types for standalone sections only
|
|
28745
28924
|
|
|
28925
|
+
ENDORSEMENT GUIDANCE:
|
|
28926
|
+
- List every endorsement found in the page range in the "endorsements" array
|
|
28927
|
+
- effectType: "broadening" adds or expands coverage; "restrictive" limits or excludes coverage; "informational" changes administrative terms only
|
|
28928
|
+
- additionalPremium: extract if a premium charge or credit is shown on the endorsement
|
|
28929
|
+
|
|
28930
|
+
EXCLUSION GUIDANCE:
|
|
28931
|
+
- List named exclusions from exclusion schedules or endorsements in the "exclusions" array
|
|
28932
|
+
- Also capture exclusions embedded within insuring agreements or conditions as separate entries if clearly labeled
|
|
28933
|
+
- Preserve the full verbatim exclusion text
|
|
28934
|
+
|
|
28935
|
+
CONDITION GUIDANCE:
|
|
28936
|
+
- List policy conditions (duties after loss, cooperation clause, cancellation, etc.) in the "conditions" array
|
|
28937
|
+
|
|
28746
28938
|
IMPORTANT: Only extract content from pages ${pageStart}-${pageEnd}. Preserve original language exactly.`;
|
|
28747
28939
|
}
|
|
28748
28940
|
var buildPolicySectionsPrompt = buildSectionsPrompt;
|
|
@@ -28766,11 +28958,19 @@ Respond with JSON only:
|
|
|
28766
28958
|
]
|
|
28767
28959
|
}
|
|
28768
28960
|
],
|
|
28961
|
+
"exclusions": [
|
|
28962
|
+
{
|
|
28963
|
+
"title": "exclusion title or short description",
|
|
28964
|
+
"coverageType": "policyTypes value if coverage-specific, or null",
|
|
28965
|
+
"pageNumber": number,
|
|
28966
|
+
"content": "full verbatim exclusion text"
|
|
28967
|
+
}
|
|
28968
|
+
],
|
|
28769
28969
|
"subjectivities": [
|
|
28770
|
-
{ "description": "subjectivity text", "category": "pre_binding
|
|
28970
|
+
{ "description": "subjectivity text", "category": "pre_binding or post_binding or information, or null", "dueDate": "or null", "pageNumber": number or null }
|
|
28771
28971
|
],
|
|
28772
28972
|
"underwritingConditions": [
|
|
28773
|
-
{ "description": "condition text", "pageNumber": number or null }
|
|
28973
|
+
{ "description": "condition text", "category": "or null", "pageNumber": number or null }
|
|
28774
28974
|
]
|
|
28775
28975
|
}
|
|
28776
28976
|
|
|
@@ -28783,6 +28983,11 @@ SECTION TYPE GUIDANCE:
|
|
|
28783
28983
|
- "exclusion" \u2014 excluded coverages, limitations
|
|
28784
28984
|
- "other" \u2014 anything else
|
|
28785
28985
|
|
|
28986
|
+
EXCLUSION GUIDANCE:
|
|
28987
|
+
- List named exclusions from any exclusion schedule, endorsement, or coverage summary in the "exclusions" array
|
|
28988
|
+
- Preserve the full verbatim exclusion text
|
|
28989
|
+
- Set coverageType if the exclusion applies to a specific coverage line
|
|
28990
|
+
|
|
28786
28991
|
IMPORTANT: Only extract content from pages ${pageStart}-${pageEnd}. Preserve original language exactly.`;
|
|
28787
28992
|
}
|
|
28788
28993
|
function buildSupplementaryEnrichmentPrompt(fields) {
|
|
@@ -29779,8 +29984,134 @@ var AGENT_TOOLS = [
|
|
|
29779
29984
|
|
|
29780
29985
|
// src/extraction/pipeline.ts
|
|
29781
29986
|
import { generateText } from "ai";
|
|
29782
|
-
|
|
29783
|
-
|
|
29987
|
+
|
|
29988
|
+
// src/extraction/pdf.ts
|
|
29989
|
+
import {
|
|
29990
|
+
PDFDocument,
|
|
29991
|
+
PDFTextField,
|
|
29992
|
+
PDFCheckBox,
|
|
29993
|
+
PDFDropdown,
|
|
29994
|
+
PDFRadioGroup,
|
|
29995
|
+
StandardFonts,
|
|
29996
|
+
rgb
|
|
29997
|
+
} from "pdf-lib";
|
|
29998
|
+
async function extractPageRange(pdfBase64, startPage, endPage) {
|
|
29999
|
+
const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
|
|
30000
|
+
const srcDoc = await PDFDocument.load(srcBytes, { ignoreEncryption: true });
|
|
30001
|
+
const totalPages = srcDoc.getPageCount();
|
|
30002
|
+
const start = Math.max(startPage - 1, 0);
|
|
30003
|
+
const end = Math.min(endPage, totalPages) - 1;
|
|
30004
|
+
if (start === 0 && end >= totalPages - 1) {
|
|
30005
|
+
return pdfBase64;
|
|
30006
|
+
}
|
|
30007
|
+
const newDoc = await PDFDocument.create();
|
|
30008
|
+
const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
|
|
30009
|
+
const pages = await newDoc.copyPages(srcDoc, indices);
|
|
30010
|
+
pages.forEach((page) => newDoc.addPage(page));
|
|
30011
|
+
const bytes = await newDoc.save();
|
|
30012
|
+
if (typeof Buffer !== "undefined") {
|
|
30013
|
+
return Buffer.from(bytes).toString("base64");
|
|
30014
|
+
}
|
|
30015
|
+
let binary = "";
|
|
30016
|
+
const uint8 = new Uint8Array(bytes);
|
|
30017
|
+
for (let i = 0; i < uint8.length; i++) {
|
|
30018
|
+
binary += String.fromCharCode(uint8[i]);
|
|
30019
|
+
}
|
|
30020
|
+
return btoa(binary);
|
|
30021
|
+
}
|
|
30022
|
+
async function getPdfPageCount(pdfBase64) {
|
|
30023
|
+
const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
|
|
30024
|
+
const doc = await PDFDocument.load(srcBytes, { ignoreEncryption: true });
|
|
30025
|
+
return doc.getPageCount();
|
|
30026
|
+
}
|
|
30027
|
+
function getAcroFormFields(pdfDoc) {
|
|
30028
|
+
const form = pdfDoc.getForm();
|
|
30029
|
+
const fields = form.getFields();
|
|
30030
|
+
if (fields.length === 0) return [];
|
|
30031
|
+
return fields.map((field) => {
|
|
30032
|
+
const name = field.getName();
|
|
30033
|
+
if (field instanceof PDFTextField) {
|
|
30034
|
+
return { name, type: "text" };
|
|
30035
|
+
}
|
|
30036
|
+
if (field instanceof PDFCheckBox) {
|
|
30037
|
+
return { name, type: "checkbox" };
|
|
30038
|
+
}
|
|
30039
|
+
if (field instanceof PDFDropdown) {
|
|
30040
|
+
return { name, type: "dropdown", options: field.getOptions() };
|
|
30041
|
+
}
|
|
30042
|
+
if (field instanceof PDFRadioGroup) {
|
|
30043
|
+
return { name, type: "radio", options: field.getOptions() };
|
|
30044
|
+
}
|
|
30045
|
+
return { name, type: "text" };
|
|
30046
|
+
});
|
|
30047
|
+
}
|
|
30048
|
+
async function fillAcroForm(pdfBytes, mappings) {
|
|
30049
|
+
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
|
|
30050
|
+
const form = pdfDoc.getForm();
|
|
30051
|
+
for (const { acroFormName, value } of mappings) {
|
|
30052
|
+
try {
|
|
30053
|
+
const field = form.getField(acroFormName);
|
|
30054
|
+
if (field instanceof PDFTextField) {
|
|
30055
|
+
field.setText(value);
|
|
30056
|
+
} else if (field instanceof PDFCheckBox) {
|
|
30057
|
+
const lower = value.toLowerCase();
|
|
30058
|
+
if (["yes", "true", "x", "checked", "on"].includes(lower)) {
|
|
30059
|
+
field.check();
|
|
30060
|
+
} else {
|
|
30061
|
+
field.uncheck();
|
|
30062
|
+
}
|
|
30063
|
+
} else if (field instanceof PDFDropdown) {
|
|
30064
|
+
try {
|
|
30065
|
+
field.select(value);
|
|
30066
|
+
} catch {
|
|
30067
|
+
}
|
|
30068
|
+
} else if (field instanceof PDFRadioGroup) {
|
|
30069
|
+
try {
|
|
30070
|
+
field.select(value);
|
|
30071
|
+
} catch {
|
|
30072
|
+
}
|
|
30073
|
+
}
|
|
30074
|
+
} catch {
|
|
30075
|
+
}
|
|
30076
|
+
}
|
|
30077
|
+
form.flatten();
|
|
30078
|
+
return await pdfDoc.save();
|
|
30079
|
+
}
|
|
30080
|
+
async function overlayTextOnPdf(pdfBytes, overlays) {
|
|
30081
|
+
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
|
|
30082
|
+
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
|
30083
|
+
const pageCount = pdfDoc.getPageCount();
|
|
30084
|
+
for (const overlay of overlays) {
|
|
30085
|
+
if (overlay.page < 0 || overlay.page >= pageCount) continue;
|
|
30086
|
+
const page = pdfDoc.getPage(overlay.page);
|
|
30087
|
+
const { width, height } = page.getSize();
|
|
30088
|
+
const fontSize = overlay.fontSize ?? 10;
|
|
30089
|
+
const x = overlay.x / 100 * width;
|
|
30090
|
+
const y = height - overlay.y / 100 * height - fontSize;
|
|
30091
|
+
if (overlay.isCheckmark) {
|
|
30092
|
+
page.drawText("X", {
|
|
30093
|
+
x,
|
|
30094
|
+
y,
|
|
30095
|
+
size: fontSize,
|
|
30096
|
+
font,
|
|
30097
|
+
color: rgb(0, 0, 0)
|
|
30098
|
+
});
|
|
30099
|
+
} else {
|
|
30100
|
+
page.drawText(overlay.text, {
|
|
30101
|
+
x,
|
|
30102
|
+
y,
|
|
30103
|
+
size: fontSize,
|
|
30104
|
+
font,
|
|
30105
|
+
color: rgb(0, 0, 0)
|
|
30106
|
+
});
|
|
30107
|
+
}
|
|
30108
|
+
}
|
|
30109
|
+
return await pdfDoc.save();
|
|
30110
|
+
}
|
|
30111
|
+
|
|
30112
|
+
// src/extraction/pipeline.ts
|
|
30113
|
+
var SONNET_MODEL = "claude-sonnet-4.6";
|
|
30114
|
+
var HAIKU_MODEL = "claude-haiku-4.5.20251001";
|
|
29784
30115
|
var DEFAULT_METADATA_PROVIDER_OPTIONS = {
|
|
29785
30116
|
anthropic: { thinking: { type: "enabled", budgetTokens: 4096 } }
|
|
29786
30117
|
};
|
|
@@ -29855,7 +30186,7 @@ function sanitizeNulls(obj) {
|
|
|
29855
30186
|
function applyExtracted(extracted) {
|
|
29856
30187
|
const meta = extracted.metadata ?? extracted;
|
|
29857
30188
|
const policyTypes = Array.isArray(meta.policyTypes) ? meta.policyTypes : meta.policyType ? [meta.policyType] : ["other"];
|
|
29858
|
-
|
|
30189
|
+
const fields = {
|
|
29859
30190
|
carrier: meta.carrier || meta.security || "Unknown",
|
|
29860
30191
|
security: meta.security ?? void 0,
|
|
29861
30192
|
underwriter: meta.underwriter ?? void 0,
|
|
@@ -29877,6 +30208,35 @@ function applyExtracted(extracted) {
|
|
|
29877
30208
|
extractionStatus: "complete",
|
|
29878
30209
|
extractionError: ""
|
|
29879
30210
|
};
|
|
30211
|
+
if (extracted.metadata?.carrierLegalName) fields.carrierLegalName = extracted.metadata.carrierLegalName;
|
|
30212
|
+
if (extracted.metadata?.carrierNaicNumber) fields.carrierNaicNumber = extracted.metadata.carrierNaicNumber;
|
|
30213
|
+
if (extracted.metadata?.carrierAmBestRating) fields.carrierAmBestRating = extracted.metadata.carrierAmBestRating;
|
|
30214
|
+
if (extracted.metadata?.carrierAdmittedStatus) fields.carrierAdmittedStatus = extracted.metadata.carrierAdmittedStatus;
|
|
30215
|
+
if (extracted.metadata?.mga) fields.mga = extracted.metadata.mga;
|
|
30216
|
+
if (extracted.metadata?.underwriter) fields.underwriter = extracted.metadata.underwriter;
|
|
30217
|
+
if (extracted.metadata?.brokerAgency ?? extracted.metadata?.broker) fields.brokerAgency = extracted.metadata.brokerAgency ?? extracted.metadata.broker;
|
|
30218
|
+
if (extracted.metadata?.brokerContactName) fields.brokerContactName = extracted.metadata.brokerContactName;
|
|
30219
|
+
if (extracted.metadata?.brokerLicenseNumber) fields.brokerLicenseNumber = extracted.metadata.brokerLicenseNumber;
|
|
30220
|
+
if (extracted.metadata?.priorPolicyNumber) fields.priorPolicyNumber = extracted.metadata.priorPolicyNumber;
|
|
30221
|
+
if (extracted.metadata?.programName) fields.programName = extracted.metadata.programName;
|
|
30222
|
+
if (extracted.metadata?.isRenewal != null) fields.isRenewal = extracted.metadata.isRenewal;
|
|
30223
|
+
if (extracted.metadata?.isPackage != null) fields.isPackage = extracted.metadata.isPackage;
|
|
30224
|
+
if (extracted.metadata?.coverageForm) fields.coverageForm = extracted.metadata.coverageForm;
|
|
30225
|
+
if (extracted.metadata?.retroactiveDate) fields.retroactiveDate = extracted.metadata.retroactiveDate;
|
|
30226
|
+
if (extracted.metadata?.effectiveTime) fields.effectiveTime = extracted.metadata.effectiveTime;
|
|
30227
|
+
if (extracted.metadata?.insuredDba) fields.insuredDba = extracted.metadata.insuredDba;
|
|
30228
|
+
if (extracted.metadata?.insuredAddress) fields.insuredAddress = extracted.metadata.insuredAddress;
|
|
30229
|
+
if (extracted.metadata?.insuredEntityType) fields.insuredEntityType = extracted.metadata.insuredEntityType;
|
|
30230
|
+
if (extracted.metadata?.insuredFein) fields.insuredFein = extracted.metadata.insuredFein;
|
|
30231
|
+
if (extracted.additionalNamedInsureds?.length) fields.additionalNamedInsureds = extracted.additionalNamedInsureds;
|
|
30232
|
+
if (extracted.limits) fields.limits = extracted.limits;
|
|
30233
|
+
if (extracted.deductibles) fields.deductibles = extracted.deductibles;
|
|
30234
|
+
if (extracted.locations?.length) fields.locations = extracted.locations;
|
|
30235
|
+
if (extracted.vehicles?.length) fields.vehicles = extracted.vehicles;
|
|
30236
|
+
if (extracted.classifications?.length) fields.classifications = extracted.classifications;
|
|
30237
|
+
if (extracted.formInventory?.length) fields.formInventory = extracted.formInventory;
|
|
30238
|
+
if (extracted.taxesAndFees?.length) fields.taxesAndFees = extracted.taxesAndFees;
|
|
30239
|
+
return fields;
|
|
29880
30240
|
}
|
|
29881
30241
|
function mergeChunkedSections(metadataResult, sectionChunks) {
|
|
29882
30242
|
const allSections = [];
|
|
@@ -29884,6 +30244,9 @@ function mergeChunkedSections(metadataResult, sectionChunks) {
|
|
|
29884
30244
|
let complaintContact = null;
|
|
29885
30245
|
let costsAndFees = null;
|
|
29886
30246
|
let claimsContact = null;
|
|
30247
|
+
const allEndorsements = [];
|
|
30248
|
+
const allExclusions = [];
|
|
30249
|
+
const allPolicyConditions = [];
|
|
29887
30250
|
for (const chunk of sectionChunks) {
|
|
29888
30251
|
if (chunk.sections) {
|
|
29889
30252
|
allSections.push(...chunk.sections);
|
|
@@ -29892,8 +30255,11 @@ function mergeChunkedSections(metadataResult, sectionChunks) {
|
|
|
29892
30255
|
if (chunk.complaintContact) complaintContact = chunk.complaintContact;
|
|
29893
30256
|
if (chunk.costsAndFees) costsAndFees = chunk.costsAndFees;
|
|
29894
30257
|
if (chunk.claimsContact) claimsContact = chunk.claimsContact;
|
|
30258
|
+
if (chunk.endorsements?.length) allEndorsements.push(...chunk.endorsements);
|
|
30259
|
+
if (chunk.exclusions?.length) allExclusions.push(...chunk.exclusions);
|
|
30260
|
+
if (chunk.conditions?.length) allPolicyConditions.push(...chunk.conditions);
|
|
29895
30261
|
}
|
|
29896
|
-
|
|
30262
|
+
const result = {
|
|
29897
30263
|
metadata: metadataResult.metadata,
|
|
29898
30264
|
metadataSource: metadataResult.metadataSource,
|
|
29899
30265
|
coverages: metadataResult.coverages,
|
|
@@ -29906,6 +30272,10 @@ function mergeChunkedSections(metadataResult, sectionChunks) {
|
|
|
29906
30272
|
},
|
|
29907
30273
|
totalPages: metadataResult.totalPages
|
|
29908
30274
|
};
|
|
30275
|
+
if (allEndorsements.length) result.document.endorsements = allEndorsements;
|
|
30276
|
+
if (allExclusions.length) result.document.exclusions = allExclusions;
|
|
30277
|
+
if (allPolicyConditions.length) result.document.conditions = allPolicyConditions;
|
|
30278
|
+
return result;
|
|
29909
30279
|
}
|
|
29910
30280
|
function getPageChunks(totalPages, chunkSize = 30) {
|
|
29911
30281
|
const chunks = [];
|
|
@@ -29915,8 +30285,10 @@ function getPageChunks(totalPages, chunkSize = 30) {
|
|
|
29915
30285
|
}
|
|
29916
30286
|
return chunks;
|
|
29917
30287
|
}
|
|
29918
|
-
async function callModel(model, pdfBase64, prompt, maxTokens, providerOptions, log, onTokenUsage) {
|
|
29919
|
-
await
|
|
30288
|
+
async function callModel(model, pdfBase64, prompt, maxTokens, providerOptions, log, onTokenUsage, pageRange) {
|
|
30289
|
+
const pdfToSend = pageRange ? await extractPageRange(pdfBase64, pageRange[0], pageRange[1]) : pdfBase64;
|
|
30290
|
+
const rangeLabel = pageRange ? ` [pages ${pageRange[0]}\u2013${pageRange[1]}]` : "";
|
|
30291
|
+
await log?.(`Calling model (max ${maxTokens} tokens)${rangeLabel}...`);
|
|
29920
30292
|
const start = Date.now();
|
|
29921
30293
|
const { text, usage } = await withRetry(
|
|
29922
30294
|
() => generateText({
|
|
@@ -29925,7 +30297,7 @@ async function callModel(model, pdfBase64, prompt, maxTokens, providerOptions, l
|
|
|
29925
30297
|
messages: [{
|
|
29926
30298
|
role: "user",
|
|
29927
30299
|
content: [
|
|
29928
|
-
{ type: "file", data:
|
|
30300
|
+
{ type: "file", data: pdfToSend, mediaType: "application/pdf" },
|
|
29929
30301
|
{ type: "text", text: prompt }
|
|
29930
30302
|
]
|
|
29931
30303
|
}],
|
|
@@ -30031,7 +30403,9 @@ async function classifyDocumentType(pdfBase64, options) {
|
|
|
30031
30403
|
MODEL_TOKEN_LIMITS.classification,
|
|
30032
30404
|
void 0,
|
|
30033
30405
|
log,
|
|
30034
|
-
onTokenUsage
|
|
30406
|
+
onTokenUsage,
|
|
30407
|
+
[1, 3]
|
|
30408
|
+
// Only need first 3 pages for classification
|
|
30035
30409
|
);
|
|
30036
30410
|
try {
|
|
30037
30411
|
const parsed = JSON.parse(stripFences(raw));
|
|
@@ -30048,7 +30422,7 @@ async function classifyDocumentType(pdfBase64, options) {
|
|
|
30048
30422
|
function applyExtractedQuote(extracted) {
|
|
30049
30423
|
const meta = extracted.metadata ?? extracted;
|
|
30050
30424
|
const policyTypes = Array.isArray(meta.policyTypes) ? meta.policyTypes : ["other"];
|
|
30051
|
-
|
|
30425
|
+
const fields = {
|
|
30052
30426
|
carrier: meta.carrier || meta.security || "Unknown",
|
|
30053
30427
|
security: meta.security ?? void 0,
|
|
30054
30428
|
underwriter: meta.underwriter ?? void 0,
|
|
@@ -30081,11 +30455,38 @@ function applyExtractedQuote(extracted) {
|
|
|
30081
30455
|
extractionStatus: "complete",
|
|
30082
30456
|
extractionError: ""
|
|
30083
30457
|
};
|
|
30458
|
+
if (meta.carrierLegalName) fields.carrierLegalName = meta.carrierLegalName;
|
|
30459
|
+
if (meta.carrierNaicNumber) fields.carrierNaicNumber = meta.carrierNaicNumber;
|
|
30460
|
+
if (meta.carrierAdmittedStatus) fields.carrierAdmittedStatus = meta.carrierAdmittedStatus;
|
|
30461
|
+
if (meta.coverageForm) fields.coverageForm = meta.coverageForm;
|
|
30462
|
+
if (meta.retroactiveDate) fields.retroactiveDate = meta.retroactiveDate;
|
|
30463
|
+
if (meta.insuredAddress) fields.insuredAddress = meta.insuredAddress;
|
|
30464
|
+
if (extracted.limits) fields.limits = extracted.limits;
|
|
30465
|
+
if (extracted.deductibles) fields.deductibles = extracted.deductibles;
|
|
30466
|
+
if (extracted.warrantyRequirements?.length) fields.warrantyRequirements = extracted.warrantyRequirements;
|
|
30467
|
+
if (extracted.taxesAndFees?.length) fields.taxesAndFees = extracted.taxesAndFees;
|
|
30468
|
+
if (extracted.subjectivities?.length) {
|
|
30469
|
+
fields.enrichedSubjectivities = extracted.subjectivities.map((s) => ({
|
|
30470
|
+
description: s.description,
|
|
30471
|
+
category: s.category ?? void 0,
|
|
30472
|
+
dueDate: s.dueDate ?? void 0,
|
|
30473
|
+
pageNumber: s.pageNumber ?? void 0
|
|
30474
|
+
}));
|
|
30475
|
+
}
|
|
30476
|
+
if (extracted.underwritingConditions?.length) {
|
|
30477
|
+
fields.enrichedUnderwritingConditions = extracted.underwritingConditions.map((c) => ({
|
|
30478
|
+
description: c.description,
|
|
30479
|
+
category: c.category ?? void 0,
|
|
30480
|
+
pageNumber: c.pageNumber ?? void 0
|
|
30481
|
+
}));
|
|
30482
|
+
}
|
|
30483
|
+
return fields;
|
|
30084
30484
|
}
|
|
30085
30485
|
function mergeChunkedQuoteSections(metadataResult, sectionChunks) {
|
|
30086
30486
|
const allSections = [];
|
|
30087
30487
|
const allSubjectivities = metadataResult.subjectivities || [];
|
|
30088
30488
|
const allConditions = metadataResult.underwritingConditions || [];
|
|
30489
|
+
const allExclusions = [];
|
|
30089
30490
|
for (const chunk of sectionChunks) {
|
|
30090
30491
|
if (chunk.sections) {
|
|
30091
30492
|
allSections.push(...chunk.sections);
|
|
@@ -30096,8 +30497,11 @@ function mergeChunkedQuoteSections(metadataResult, sectionChunks) {
|
|
|
30096
30497
|
if (chunk.underwritingConditions) {
|
|
30097
30498
|
allConditions.push(...chunk.underwritingConditions);
|
|
30098
30499
|
}
|
|
30500
|
+
if (chunk.exclusions?.length) {
|
|
30501
|
+
allExclusions.push(...chunk.exclusions);
|
|
30502
|
+
}
|
|
30099
30503
|
}
|
|
30100
|
-
|
|
30504
|
+
const result = {
|
|
30101
30505
|
metadata: metadataResult.metadata,
|
|
30102
30506
|
metadataSource: metadataResult.metadataSource,
|
|
30103
30507
|
coverages: metadataResult.coverages,
|
|
@@ -30109,6 +30513,8 @@ function mergeChunkedQuoteSections(metadataResult, sectionChunks) {
|
|
|
30109
30513
|
},
|
|
30110
30514
|
totalPages: metadataResult.totalPages
|
|
30111
30515
|
};
|
|
30516
|
+
if (allExclusions.length) result.document.exclusions = allExclusions;
|
|
30517
|
+
return result;
|
|
30112
30518
|
}
|
|
30113
30519
|
var CHUNK_SIZES = [15, 10, 5];
|
|
30114
30520
|
async function extractChunkWithRetry(models, pdfBase64, start, end, sizeIndex, promptBuilder, fallbackProviderOptions, log, onTokenUsage, concurrency = 2) {
|
|
@@ -30120,7 +30526,9 @@ async function extractChunkWithRetry(models, pdfBase64, start, end, sizeIndex, p
|
|
|
30120
30526
|
MODEL_TOKEN_LIMITS.sections,
|
|
30121
30527
|
void 0,
|
|
30122
30528
|
log,
|
|
30123
|
-
onTokenUsage
|
|
30529
|
+
onTokenUsage,
|
|
30530
|
+
[start, end]
|
|
30531
|
+
// Only send this chunk's pages
|
|
30124
30532
|
);
|
|
30125
30533
|
try {
|
|
30126
30534
|
return [JSON.parse(stripFences(chunkRaw))];
|
|
@@ -30162,7 +30570,9 @@ async function extractChunkWithRetry(models, pdfBase64, start, end, sizeIndex, p
|
|
|
30162
30570
|
MODEL_TOKEN_LIMITS.sectionsFallback,
|
|
30163
30571
|
fallbackProviderOptions,
|
|
30164
30572
|
log,
|
|
30165
|
-
onTokenUsage
|
|
30573
|
+
onTokenUsage,
|
|
30574
|
+
[start, end]
|
|
30575
|
+
// Only send this chunk's pages
|
|
30166
30576
|
);
|
|
30167
30577
|
try {
|
|
30168
30578
|
return [JSON.parse(stripFences(fallbackRaw))];
|
|
@@ -30205,7 +30615,9 @@ async function extractFromPdf(pdfBase64, options) {
|
|
|
30205
30615
|
onTokenUsage
|
|
30206
30616
|
} = options ?? {};
|
|
30207
30617
|
const resolved = resolveModels(models);
|
|
30618
|
+
const actualPageCount = await getPdfPageCount(pdfBase64);
|
|
30208
30619
|
await log?.("Pass 1: Extracting metadata...");
|
|
30620
|
+
const metadataPageRange = [1, Math.min(10, actualPageCount)];
|
|
30209
30621
|
const metadataRaw = await callModel(
|
|
30210
30622
|
resolved.metadata,
|
|
30211
30623
|
pdfBase64,
|
|
@@ -30213,7 +30625,8 @@ async function extractFromPdf(pdfBase64, options) {
|
|
|
30213
30625
|
MODEL_TOKEN_LIMITS.metadata,
|
|
30214
30626
|
metadataProviderOptions,
|
|
30215
30627
|
log,
|
|
30216
|
-
onTokenUsage
|
|
30628
|
+
onTokenUsage,
|
|
30629
|
+
metadataPageRange
|
|
30217
30630
|
);
|
|
30218
30631
|
let metadataResult;
|
|
30219
30632
|
try {
|
|
@@ -30224,7 +30637,7 @@ async function extractFromPdf(pdfBase64, options) {
|
|
|
30224
30637
|
throw new Error(`Metadata JSON parse failed: ${e.message}`);
|
|
30225
30638
|
}
|
|
30226
30639
|
await onMetadata?.(metadataRaw);
|
|
30227
|
-
const pageCount =
|
|
30640
|
+
const pageCount = actualPageCount;
|
|
30228
30641
|
await log?.(`Document: ${pageCount} page(s)`);
|
|
30229
30642
|
const sectionChunks = await extractSectionChunks(
|
|
30230
30643
|
resolved,
|
|
@@ -30292,7 +30705,9 @@ async function extractQuoteFromPdf(pdfBase64, options) {
|
|
|
30292
30705
|
onTokenUsage
|
|
30293
30706
|
} = options ?? {};
|
|
30294
30707
|
const resolved = resolveModels(models);
|
|
30708
|
+
const actualPageCount = await getPdfPageCount(pdfBase64);
|
|
30295
30709
|
await log?.("Pass 1: Extracting quote metadata...");
|
|
30710
|
+
const metadataPageRange = [1, Math.min(10, actualPageCount)];
|
|
30296
30711
|
const metadataRaw = await callModel(
|
|
30297
30712
|
resolved.metadata,
|
|
30298
30713
|
pdfBase64,
|
|
@@ -30300,7 +30715,8 @@ async function extractQuoteFromPdf(pdfBase64, options) {
|
|
|
30300
30715
|
MODEL_TOKEN_LIMITS.metadata,
|
|
30301
30716
|
metadataProviderOptions,
|
|
30302
30717
|
log,
|
|
30303
|
-
onTokenUsage
|
|
30718
|
+
onTokenUsage,
|
|
30719
|
+
metadataPageRange
|
|
30304
30720
|
);
|
|
30305
30721
|
let metadataResult;
|
|
30306
30722
|
try {
|
|
@@ -30311,7 +30727,7 @@ async function extractQuoteFromPdf(pdfBase64, options) {
|
|
|
30311
30727
|
throw new Error(`Quote metadata JSON parse failed: ${e.message}`);
|
|
30312
30728
|
}
|
|
30313
30729
|
await onMetadata?.(metadataRaw);
|
|
30314
|
-
const pageCount =
|
|
30730
|
+
const pageCount = actualPageCount;
|
|
30315
30731
|
await log?.(`Quote document: ${pageCount} page(s)`);
|
|
30316
30732
|
const sectionChunks = await extractSectionChunks(
|
|
30317
30733
|
resolved,
|
|
@@ -30328,107 +30744,13 @@ async function extractQuoteFromPdf(pdfBase64, options) {
|
|
|
30328
30744
|
const mergedRaw = JSON.stringify(merged);
|
|
30329
30745
|
return { rawText: mergedRaw, extracted: merged };
|
|
30330
30746
|
}
|
|
30331
|
-
|
|
30332
|
-
// src/extraction/pdf.ts
|
|
30333
|
-
import {
|
|
30334
|
-
PDFDocument,
|
|
30335
|
-
PDFTextField,
|
|
30336
|
-
PDFCheckBox,
|
|
30337
|
-
PDFDropdown,
|
|
30338
|
-
PDFRadioGroup,
|
|
30339
|
-
StandardFonts,
|
|
30340
|
-
rgb
|
|
30341
|
-
} from "pdf-lib";
|
|
30342
|
-
function getAcroFormFields(pdfDoc) {
|
|
30343
|
-
const form = pdfDoc.getForm();
|
|
30344
|
-
const fields = form.getFields();
|
|
30345
|
-
if (fields.length === 0) return [];
|
|
30346
|
-
return fields.map((field) => {
|
|
30347
|
-
const name = field.getName();
|
|
30348
|
-
if (field instanceof PDFTextField) {
|
|
30349
|
-
return { name, type: "text" };
|
|
30350
|
-
}
|
|
30351
|
-
if (field instanceof PDFCheckBox) {
|
|
30352
|
-
return { name, type: "checkbox" };
|
|
30353
|
-
}
|
|
30354
|
-
if (field instanceof PDFDropdown) {
|
|
30355
|
-
return { name, type: "dropdown", options: field.getOptions() };
|
|
30356
|
-
}
|
|
30357
|
-
if (field instanceof PDFRadioGroup) {
|
|
30358
|
-
return { name, type: "radio", options: field.getOptions() };
|
|
30359
|
-
}
|
|
30360
|
-
return { name, type: "text" };
|
|
30361
|
-
});
|
|
30362
|
-
}
|
|
30363
|
-
async function fillAcroForm(pdfBytes, mappings) {
|
|
30364
|
-
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
|
|
30365
|
-
const form = pdfDoc.getForm();
|
|
30366
|
-
for (const { acroFormName, value } of mappings) {
|
|
30367
|
-
try {
|
|
30368
|
-
const field = form.getField(acroFormName);
|
|
30369
|
-
if (field instanceof PDFTextField) {
|
|
30370
|
-
field.setText(value);
|
|
30371
|
-
} else if (field instanceof PDFCheckBox) {
|
|
30372
|
-
const lower = value.toLowerCase();
|
|
30373
|
-
if (["yes", "true", "x", "checked", "on"].includes(lower)) {
|
|
30374
|
-
field.check();
|
|
30375
|
-
} else {
|
|
30376
|
-
field.uncheck();
|
|
30377
|
-
}
|
|
30378
|
-
} else if (field instanceof PDFDropdown) {
|
|
30379
|
-
try {
|
|
30380
|
-
field.select(value);
|
|
30381
|
-
} catch {
|
|
30382
|
-
}
|
|
30383
|
-
} else if (field instanceof PDFRadioGroup) {
|
|
30384
|
-
try {
|
|
30385
|
-
field.select(value);
|
|
30386
|
-
} catch {
|
|
30387
|
-
}
|
|
30388
|
-
}
|
|
30389
|
-
} catch {
|
|
30390
|
-
}
|
|
30391
|
-
}
|
|
30392
|
-
form.flatten();
|
|
30393
|
-
return await pdfDoc.save();
|
|
30394
|
-
}
|
|
30395
|
-
async function overlayTextOnPdf(pdfBytes, overlays) {
|
|
30396
|
-
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
|
|
30397
|
-
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
|
30398
|
-
const pageCount = pdfDoc.getPageCount();
|
|
30399
|
-
for (const overlay of overlays) {
|
|
30400
|
-
if (overlay.page < 0 || overlay.page >= pageCount) continue;
|
|
30401
|
-
const page = pdfDoc.getPage(overlay.page);
|
|
30402
|
-
const { width, height } = page.getSize();
|
|
30403
|
-
const fontSize = overlay.fontSize ?? 10;
|
|
30404
|
-
const x = overlay.x / 100 * width;
|
|
30405
|
-
const y = height - overlay.y / 100 * height - fontSize;
|
|
30406
|
-
if (overlay.isCheckmark) {
|
|
30407
|
-
page.drawText("X", {
|
|
30408
|
-
x,
|
|
30409
|
-
y,
|
|
30410
|
-
size: fontSize,
|
|
30411
|
-
font,
|
|
30412
|
-
color: rgb(0, 0, 0)
|
|
30413
|
-
});
|
|
30414
|
-
} else {
|
|
30415
|
-
page.drawText(overlay.text, {
|
|
30416
|
-
x,
|
|
30417
|
-
y,
|
|
30418
|
-
size: fontSize,
|
|
30419
|
-
font,
|
|
30420
|
-
color: rgb(0, 0, 0)
|
|
30421
|
-
});
|
|
30422
|
-
}
|
|
30423
|
-
}
|
|
30424
|
-
return await pdfDoc.save();
|
|
30425
|
-
}
|
|
30426
30747
|
export {
|
|
30427
30748
|
AGENT_TOOLS,
|
|
30428
30749
|
APPLICATION_CLASSIFY_PROMPT,
|
|
30429
30750
|
CLASSIFY_DOCUMENT_PROMPT,
|
|
30430
30751
|
CLASSIFY_EMAIL_PROMPT,
|
|
30431
30752
|
COI_GENERATION_TOOL,
|
|
30753
|
+
CONTEXT_KEY_MAP,
|
|
30432
30754
|
COVERAGE_COMPARISON_TOOL,
|
|
30433
30755
|
DOCUMENT_LOOKUP_TOOL,
|
|
30434
30756
|
EXTRACTION_PROMPT,
|
|
@@ -30436,6 +30758,7 @@ export {
|
|
|
30436
30758
|
METADATA_PROMPT,
|
|
30437
30759
|
MODEL_TOKEN_LIMITS,
|
|
30438
30760
|
PLATFORM_CONFIGS,
|
|
30761
|
+
POLICY_TYPES,
|
|
30439
30762
|
QUOTE_METADATA_PROMPT,
|
|
30440
30763
|
SONNET_MODEL,
|
|
30441
30764
|
applyExtracted,
|
|
@@ -30474,11 +30797,13 @@ export {
|
|
|
30474
30797
|
createUniformModelConfig,
|
|
30475
30798
|
enrichSupplementaryFields,
|
|
30476
30799
|
extractFromPdf,
|
|
30800
|
+
extractPageRange,
|
|
30477
30801
|
extractQuoteFromPdf,
|
|
30478
30802
|
extractSectionsOnly,
|
|
30479
30803
|
fillAcroForm,
|
|
30480
30804
|
getAcroFormFields,
|
|
30481
30805
|
getPageChunks,
|
|
30806
|
+
getPdfPageCount,
|
|
30482
30807
|
mergeChunkedQuoteSections,
|
|
30483
30808
|
mergeChunkedSections,
|
|
30484
30809
|
overlayTextOnPdf,
|