@claritylabs/cl-sdk 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,9 +12,9 @@ npm install @claritylabs/cl-sdk pdf-lib zod
12
12
 
13
13
  ## What It Does
14
14
 
15
- - **Document Extraction** — Agentic pipeline with 13 focused extractors that turns insurance PDFs into structured data with page-level provenance, quality gates, first-class definitions and covered reasons, and automatic declarations-to-schema promotion (limits, deductibles, locations, broker, loss payees, premium, taxes/fees, summary)
16
- - **Query Agent** — Citation-backed question answering over stored documents and inbound photos/PDFs/text with sub-question decomposition and grounding verification
17
- - **Application Processing** — Eight focused agents handle intake — field extraction, auto-fill from prior answers, topic-based question batching, and PDF mapping
15
+ - **Document Extraction** — Agentic pipeline with 13 focused extractors that turns insurance PDFs into structured data with page-level provenance, quality gates, first-class definitions and covered reasons, referential coverage resolution, cost-aware formatting, and automatic declarations-to-schema promotion (limits, deductibles, locations, broker, loss payees, premium, taxes/fees, summary)
16
+ - **Query Agent** — Citation-backed question answering over stored documents and inbound photos/PDFs/text with sub-question decomposition, bounded retrieval planning, attachment-only reasoning when retrieval is unnecessary, and grounding verification
17
+ - **Application Processing** — Focused agents handle intake with bounded workflow planning — field extraction, prior-answer backfill, context auto-fill, document lookup gating, topic-based question batching, reply parsing, and PDF mapping
18
18
  - **Agent System** — Composable prompt modules for building insurance-aware conversational agents across email, chat, SMS, Slack, and Discord
19
19
  - **Storage** — DocumentStore and MemoryStore interfaces with SQLite reference implementation
20
20
 
@@ -81,7 +81,7 @@ const result = await agent.query({
81
81
  });
82
82
  ```
83
83
 
84
- The query pipeline first interprets each attachment into structured evidence, then combines that with retrieved chunks, document lookups, and conversation history before answering.
84
+ The query workflow first interprets each attachment into structured evidence, then uses the query classifier to decide whether stored-document retrieval is needed. Simple or attachment-only questions can skip retrieval and reason over the available evidence directly; document-backed questions still retrieve chunks, reason over citations, and run grounding verification. Verification can request targeted retry retrieval for weak sub-answers.
85
85
 
86
86
  Important: your `generateObject` callback must actually forward multimodal payloads from `providerOptions` to the model request:
87
87
 
@@ -91,6 +91,18 @@ Important: your `generateObject` callback must actually forward multimodal paylo
91
91
 
92
92
  If your callback ignores those fields, the model will only see the text prompt.
93
93
 
94
+ ## Bounded Agentic Workflows
95
+
96
+ CL-SDK uses deterministic scaffolding with agentic decision points rather than fixed all-tools-all-the-time chains:
97
+
98
+ - Extraction page mapping and review choose focused follow-up extractors from the live extractor catalog. Definitions and covered reasons can fall back through section extraction when a focused run returns no usable records.
99
+ - Supplementary extraction runs only when page assignments, form inventory, existing extracted text, or review follow-up tasks indicate regulatory, claims, notice, cancellation, or contact facts are likely present.
100
+ - Referential coverage resolution tries cheap local section/form matches first, then uses bounded target-specific actions for declarations, schedules, sections, page-location lookup, or skip.
101
+ - Formatting skips the LLM cleanup pass for plain prose and only formats long or noisy content that looks likely to contain markdown, spacing, list, heading, or table artifacts.
102
+ - Application processing plans optional backfill, context auto-fill, document search, batching, reply parsing, lookup, explanations, and next-batch email generation based on current state.
103
+
104
+ These gates reduce unnecessary provider calls while preserving reliability for edge cases where additional focused extraction or retrieval is needed.
105
+
94
106
  ## Development
95
107
 
96
108
  ```bash
package/dist/index.d.mts CHANGED
@@ -164,6 +164,8 @@ interface PipelineContextOptions<TState> {
164
164
  onSave?: (checkpoint: PipelineCheckpoint<TState>) => Promise<void>;
165
165
  /** Resume from a previously saved checkpoint. */
166
166
  resumeFrom?: PipelineCheckpoint<TState>;
167
+ /** Ordered phase names. When provided, resuming from a phase marks prior phases complete too. */
168
+ phaseOrder?: string[];
167
169
  }
168
170
  /**
169
171
  * Create a pipeline context for checkpoint-based save/resume.
@@ -28882,14 +28884,14 @@ declare const PageAssignmentSchema: z.ZodObject<{
28882
28884
  notes: z.ZodOptional<z.ZodString>;
28883
28885
  }, "strip", z.ZodTypeAny, {
28884
28886
  localPageNumber: number;
28885
- extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "premium_breakdown" | "covered_reasons")[];
28887
+ extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "covered_reasons" | "premium_breakdown")[];
28886
28888
  confidence?: number | undefined;
28887
28889
  pageRole?: "other" | "policy_form" | "supplementary" | "declarations_schedule" | "endorsement_schedule" | "endorsement_form" | "condition_exclusion_form" | undefined;
28888
28890
  hasScheduleValues?: boolean | undefined;
28889
28891
  notes?: string | undefined;
28890
28892
  }, {
28891
28893
  localPageNumber: number;
28892
- extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "premium_breakdown" | "covered_reasons")[];
28894
+ extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "covered_reasons" | "premium_breakdown")[];
28893
28895
  confidence?: number | undefined;
28894
28896
  pageRole?: "other" | "policy_form" | "supplementary" | "declarations_schedule" | "endorsement_schedule" | "endorsement_form" | "condition_exclusion_form" | undefined;
28895
28897
  hasScheduleValues?: boolean | undefined;
@@ -30860,6 +30862,12 @@ interface ExtractorDef {
30860
30862
  buildPrompt: () => string;
30861
30863
  schema: ZodSchema;
30862
30864
  maxTokens?: number;
30865
+ fallback?: FocusedExtractorFallback;
30866
+ }
30867
+ interface FocusedExtractorFallback {
30868
+ extractorName: string;
30869
+ isEmpty: (data: unknown) => boolean;
30870
+ deriveFocusedResult: (fallbackData: unknown) => unknown | undefined;
30863
30871
  }
30864
30872
  declare function getExtractor(name: string): ExtractorDef | undefined;
30865
30873
 
package/dist/index.d.ts CHANGED
@@ -164,6 +164,8 @@ interface PipelineContextOptions<TState> {
164
164
  onSave?: (checkpoint: PipelineCheckpoint<TState>) => Promise<void>;
165
165
  /** Resume from a previously saved checkpoint. */
166
166
  resumeFrom?: PipelineCheckpoint<TState>;
167
+ /** Ordered phase names. When provided, resuming from a phase marks prior phases complete too. */
168
+ phaseOrder?: string[];
167
169
  }
168
170
  /**
169
171
  * Create a pipeline context for checkpoint-based save/resume.
@@ -28882,14 +28884,14 @@ declare const PageAssignmentSchema: z.ZodObject<{
28882
28884
  notes: z.ZodOptional<z.ZodString>;
28883
28885
  }, "strip", z.ZodTypeAny, {
28884
28886
  localPageNumber: number;
28885
- extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "premium_breakdown" | "covered_reasons")[];
28887
+ extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "covered_reasons" | "premium_breakdown")[];
28886
28888
  confidence?: number | undefined;
28887
28889
  pageRole?: "other" | "policy_form" | "supplementary" | "declarations_schedule" | "endorsement_schedule" | "endorsement_form" | "condition_exclusion_form" | undefined;
28888
28890
  hasScheduleValues?: boolean | undefined;
28889
28891
  notes?: string | undefined;
28890
28892
  }, {
28891
28893
  localPageNumber: number;
28892
- extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "premium_breakdown" | "covered_reasons")[];
28894
+ extractorNames: ("declarations" | "supplementary" | "conditions" | "named_insured" | "sections" | "definitions" | "endorsements" | "exclusions" | "loss_history" | "carrier_info" | "coverage_limits" | "covered_reasons" | "premium_breakdown")[];
28893
28895
  confidence?: number | undefined;
28894
28896
  pageRole?: "other" | "policy_form" | "supplementary" | "declarations_schedule" | "endorsement_schedule" | "endorsement_form" | "condition_exclusion_form" | undefined;
28895
28897
  hasScheduleValues?: boolean | undefined;
@@ -30860,6 +30862,12 @@ interface ExtractorDef {
30860
30862
  buildPrompt: () => string;
30861
30863
  schema: ZodSchema;
30862
30864
  maxTokens?: number;
30865
+ fallback?: FocusedExtractorFallback;
30866
+ }
30867
+ interface FocusedExtractorFallback {
30868
+ extractorName: string;
30869
+ isEmpty: (data: unknown) => boolean;
30870
+ deriveFocusedResult: (fallbackData: unknown) => unknown | undefined;
30863
30871
  }
30864
30872
  declare function getExtractor(name: string): ExtractorDef | undefined;
30865
30873