@claritylabs/cl-sdk 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -540,6 +540,7 @@ var CoverageValueTypeSchema = z4.enum([
540
540
  var CoverageSchema = z4.object({
541
541
  name: z4.string(),
542
542
  limit: z4.string(),
543
+ limitType: LimitTypeSchema.optional(),
543
544
  limitValueType: CoverageValueTypeSchema.optional(),
544
545
  deductible: z4.string().optional(),
545
546
  deductibleValueType: CoverageValueTypeSchema.optional(),
@@ -1999,6 +2000,7 @@ function mergeCoverageLimits(existing, incoming) {
1999
2000
  const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
2000
2001
  const coverageKey = (coverage) => [
2001
2002
  String(coverage.name ?? "").toLowerCase(),
2003
+ String(coverage.limitType ?? "").toLowerCase(),
2002
2004
  String(coverage.limit ?? "").toLowerCase(),
2003
2005
  String(coverage.deductible ?? "").toLowerCase(),
2004
2006
  String(coverage.formNumber ?? "").toLowerCase()
@@ -2964,6 +2966,7 @@ Available extractors:
2964
2966
  - supplementary
2965
2967
 
2966
2968
  Rules:
2969
+ - Identify the broad section or form context first, then assign focused extractors within that context.
2967
2970
  - Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
2968
2971
  - Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
2969
2972
  - Avoid assigning broad ranges mentally; decide page by page.
@@ -2974,6 +2977,8 @@ Rules:
2974
2977
  - Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
2975
2978
  - Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
2976
2979
  - When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
2980
+ - Do not tag a page with "exclusions" or "conditions" if it only contains a table of contents, page-number reference, running header/footer, or a heading that points to another page without substantive wording.
2981
+ - If a page appears to be part of a larger exclusion, conditions, or endorsement section within the same form, keep the assignment consistent across nearby pages in that section rather than isolating a single page fragment.
2977
2982
  - Return every page in the supplied chunk exactly once.
2978
2983
 
2979
2984
  Return JSON:
@@ -3158,7 +3163,7 @@ Extract only insured-specific declaration, schedule, or endorsement entries that
3158
3163
 
3159
3164
  Focus on:
3160
3165
  - Every coverage listed on the declarations page or coverage schedule
3161
- - Per-occurrence, aggregate, and sub-limits for each coverage
3166
+ - Per-occurrence, individual/occurrence, aggregate, and sub-limits for each coverage
3162
3167
  - Deductible or self-insured retention for each coverage
3163
3168
  - Coverage form type: occurrence-based, claims-made, or accident
3164
3169
  - Retroactive date for claims-made policies
@@ -3170,6 +3175,7 @@ For EACH coverage, also extract:
3170
3175
  - pageNumber: the original page number where the coverage row/value appears
3171
3176
  - sectionRef: the declarations/schedule/endorsement section heading where it appears
3172
3177
  - originalContent: the verbatim row or short source snippet used for this coverage
3178
+ - limitType: when applicable, classify the limit as per_occurrence, per_claim, aggregate, per_person, per_accident, statutory, blanket, or scheduled
3173
3179
  - limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
3174
3180
  - deductibleValueType: classify the deductible/value term similarly when deductible is present
3175
3181
 
@@ -3181,6 +3187,7 @@ Critical rules:
3181
3187
  - If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
3182
3188
  - Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
3183
3189
  - Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
3190
+ - Keep individual/per-occurrence limits separate from aggregate limits even when they have the same coverage name, limit amount, deductible, and form number. Use limitType to distinguish them.
3184
3191
 
3185
3192
  Return JSON only.`;
3186
3193
  }
@@ -4058,7 +4065,7 @@ function createExtractor(config) {
4058
4065
  ranges.push({ startPage: start, endPage: previous });
4059
4066
  return ranges;
4060
4067
  }
4061
- function buildPlanFromPageAssignments(pageAssignments, pageCount) {
4068
+ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
4062
4069
  const extractorPages = /* @__PURE__ */ new Map();
4063
4070
  for (const assignment of pageAssignments) {
4064
4071
  const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
@@ -4075,8 +4082,30 @@ function createExtractor(config) {
4075
4082
  extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
4076
4083
  }
4077
4084
  }
4085
+ const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "exclusions", "endorsements"]);
4086
+ const contextualForms = (formInventory?.forms ?? []).filter(
4087
+ (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
4088
+ );
4089
+ const expandPagesToFormRanges = (extractorName, pages) => {
4090
+ if (!contextualExtractors.has(extractorName)) return pages;
4091
+ const expanded = new Set(pages);
4092
+ for (const page of pages) {
4093
+ for (const form of contextualForms) {
4094
+ const pageStart = form.pageStart;
4095
+ const pageEnd = form.pageEnd ?? form.pageStart;
4096
+ const formType = form.formType;
4097
+ const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
4098
+ if (!supportsContextualExpansion) continue;
4099
+ if (page < pageStart || page > pageEnd) continue;
4100
+ for (let current = pageStart; current <= pageEnd; current += 1) {
4101
+ expanded.add(current);
4102
+ }
4103
+ }
4104
+ }
4105
+ return [...expanded].sort((a, b) => a - b);
4106
+ };
4078
4107
  const tasks = [...extractorPages.entries()].flatMap(
4079
- ([extractorName, pages]) => groupContiguousPages(pages).map(({ startPage, endPage }) => ({
4108
+ ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
4080
4109
  extractorName,
4081
4110
  startPage,
4082
4111
  endPage,
@@ -4244,7 +4273,7 @@ function createExtractor(config) {
4244
4273
  onProgress?.("Resuming from checkpoint (plan complete)...");
4245
4274
  } else {
4246
4275
  onProgress?.(`Building extraction plan from page map for ${primaryType} ${documentType}...`);
4247
- plan = buildPlanFromPageAssignments(pageAssignments, pageCount);
4276
+ plan = buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory);
4248
4277
  await pipelineCtx.save("plan", {
4249
4278
  id,
4250
4279
  pageCount,