@claritylabs/cl-sdk 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -218
- package/dist/index.d.mts +8376 -1209
- package/dist/index.d.ts +8376 -1209
- package/dist/index.js +3308 -2136
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +3459 -2404
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +2804 -0
- package/dist/storage-sqlite.d.ts +2804 -0
- package/dist/storage-sqlite.js +238 -0
- package/dist/storage-sqlite.js.map +1 -0
- package/dist/storage-sqlite.mjs +218 -0
- package/dist/storage-sqlite.mjs.map +1 -0
- package/package.json +16 -7
package/README.md
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
# CL-0 SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[Clarity Labs](https://claritylabs.inc) allows insurers to understand their clients as well as they know themselves. Having a better understanding of clients means insurers can automate servicing to reduce costs and identify coverage gaps to cross-sell products.
|
|
4
|
+
|
|
5
|
+
CL-0 SDK is the open infrastructure layer that makes this possible: a shared intelligence system that any product or agent can import to understand, reason about, and act on insurance documents and workflows.
|
|
4
6
|
|
|
5
7
|
## Installation
|
|
6
8
|
|
|
@@ -10,284 +12,216 @@ npm install @claritylabs/cl-sdk
|
|
|
10
12
|
|
|
11
13
|
### Peer Dependencies
|
|
12
14
|
|
|
13
|
-
CL-0 SDK requires the [Vercel AI SDK](https://sdk.vercel.ai) and pdf-lib:
|
|
14
|
-
|
|
15
15
|
```bash
|
|
16
|
-
npm install
|
|
16
|
+
npm install pdf-lib zod
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
Optional (for SQLite storage):
|
|
21
20
|
```bash
|
|
22
|
-
|
|
23
|
-
npm install @ai-sdk/anthropic
|
|
24
|
-
|
|
25
|
-
# OpenAI
|
|
26
|
-
npm install @ai-sdk/openai
|
|
27
|
-
|
|
28
|
-
# Google
|
|
29
|
-
npm install @ai-sdk/google
|
|
21
|
+
npm install better-sqlite3
|
|
30
22
|
```
|
|
31
23
|
|
|
32
24
|
## Quick Start
|
|
33
25
|
|
|
34
|
-
###
|
|
26
|
+
### Document Extraction
|
|
27
|
+
|
|
28
|
+
The v6 extraction pipeline uses a coordinator/worker pattern with provider-agnostic callbacks:
|
|
35
29
|
|
|
36
30
|
```typescript
|
|
37
|
-
import {
|
|
38
|
-
import {
|
|
31
|
+
import { createExtractor } from "@claritylabs/cl-sdk";
|
|
32
|
+
import { anthropic } from "@ai-sdk/anthropic"; // or any provider
|
|
33
|
+
import { generateText, generateObject } from "ai";
|
|
34
|
+
|
|
35
|
+
const extract = createExtractor({
|
|
36
|
+
generateText: async ({ prompt, system, maxTokens }) => {
|
|
37
|
+
const { text, usage } = await generateText({
|
|
38
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
39
|
+
prompt,
|
|
40
|
+
system,
|
|
41
|
+
maxTokens,
|
|
42
|
+
});
|
|
43
|
+
return { text, usage };
|
|
44
|
+
},
|
|
45
|
+
generateObject: async ({ prompt, system, schema, maxTokens }) => {
|
|
46
|
+
const { object, usage } = await generateObject({
|
|
47
|
+
model: anthropic("claude-sonnet-4-6"),
|
|
48
|
+
prompt,
|
|
49
|
+
system,
|
|
50
|
+
schema,
|
|
51
|
+
maxTokens,
|
|
52
|
+
});
|
|
53
|
+
return { object, usage };
|
|
54
|
+
},
|
|
55
|
+
});
|
|
39
56
|
|
|
40
|
-
const anthropic = createAnthropic();
|
|
41
57
|
const pdfBase64 = "..."; // base64-encoded PDF
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
models: createUniformModelConfig(anthropic("claude-sonnet-4-6")),
|
|
45
|
-
});
|
|
46
|
-
const fields = applyExtracted(extracted);
|
|
58
|
+
const result = await extract.extract(pdfBase64);
|
|
59
|
+
console.log(result.document); // Structured InsuranceDocument
|
|
47
60
|
```
|
|
48
61
|
|
|
49
|
-
###
|
|
62
|
+
### With PDF to Image Conversion
|
|
50
63
|
|
|
51
|
-
|
|
64
|
+
For providers that don't support native PDF input:
|
|
52
65
|
|
|
53
66
|
```typescript
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
const openai = createOpenAI();
|
|
58
|
-
const { extracted } = await extractFromPdf(pdfBase64, {
|
|
59
|
-
models: createUniformModelConfig(openai("gpt-4o")),
|
|
60
|
-
});
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
If your model doesn't support native PDF input, set `pdfContentFormat: "image"` and provide a `convertPdfToImages` callback. The SDK does not bundle a converter — use whatever library works in your runtime (pdf2pic, mupdf, pdfjs-dist, etc.):
|
|
64
|
-
|
|
65
|
-
```typescript
|
|
66
|
-
const { extracted } = await extractFromPdf(pdfBase64, {
|
|
67
|
-
models: createUniformModelConfig(yourModel),
|
|
68
|
-
pdfContentFormat: "image",
|
|
67
|
+
const extract = createExtractor({
|
|
68
|
+
generateText: /* ... */,
|
|
69
|
+
generateObject: /* ... */,
|
|
69
70
|
convertPdfToImages: async (pdfBase64, startPage, endPage) => {
|
|
70
|
-
//
|
|
71
|
-
return
|
|
71
|
+
// Convert PDF pages to images using your preferred library
|
|
72
|
+
return [
|
|
73
|
+
{ imageBase64: "...", mimeType: "image/png" },
|
|
74
|
+
// ... one per page
|
|
75
|
+
];
|
|
72
76
|
},
|
|
73
77
|
});
|
|
74
78
|
```
|
|
75
79
|
|
|
76
|
-
###
|
|
77
|
-
|
|
78
|
-
Assign different models per pipeline role — use a fast model for classification/sections and a capable model for metadata/fallback:
|
|
80
|
+
### Storage (Optional)
|
|
79
81
|
|
|
80
82
|
```typescript
|
|
81
|
-
import {
|
|
82
|
-
import {
|
|
83
|
-
|
|
84
|
-
const anthropic = createAnthropic();
|
|
85
|
-
const models: ModelConfig = {
|
|
86
|
-
classification: anthropic("claude-haiku-4-5-20251001"), // fast, cheap
|
|
87
|
-
metadata: anthropic("claude-sonnet-4-6"), // capable
|
|
88
|
-
sections: anthropic("claude-haiku-4-5-20251001"), // fast, cheap
|
|
89
|
-
sectionsFallback: anthropic("claude-sonnet-4-6"), // capable (fallback)
|
|
90
|
-
enrichment: anthropic("claude-haiku-4-5-20251001"), // fast, cheap
|
|
91
|
-
};
|
|
92
|
-
|
|
93
|
-
const { extracted } = await extractFromPdf(pdfBase64, { models });
|
|
94
|
-
```
|
|
83
|
+
import { createExtractor } from "@claritylabs/cl-sdk";
|
|
84
|
+
import { SQLiteDocumentStore, SQLiteMemoryStore } from "@claritylabs/cl-sdk/storage/sqlite";
|
|
95
85
|
|
|
96
|
-
|
|
86
|
+
const documentStore = new SQLiteDocumentStore("./docs.db");
|
|
87
|
+
const memoryStore = new SQLiteMemoryStore("./memory.db");
|
|
97
88
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const openai = createOpenAI();
|
|
105
|
-
|
|
106
|
-
const models: ModelConfig = {
|
|
107
|
-
classification: openai("gpt-4o-mini"),
|
|
108
|
-
metadata: anthropic("claude-sonnet-4-6"),
|
|
109
|
-
sections: openai("gpt-4o-mini"),
|
|
110
|
-
sectionsFallback: anthropic("claude-sonnet-4-6"),
|
|
111
|
-
enrichment: openai("gpt-4o-mini"),
|
|
112
|
-
};
|
|
113
|
-
|
|
114
|
-
const { extracted } = await extractFromPdf(pdfBase64, { models });
|
|
89
|
+
const extract = createExtractor({
|
|
90
|
+
generateText: /* ... */,
|
|
91
|
+
generateObject: /* ... */,
|
|
92
|
+
documentStore,
|
|
93
|
+
memoryStore,
|
|
94
|
+
});
|
|
115
95
|
```
|
|
116
96
|
|
|
117
|
-
##
|
|
118
|
-
|
|
119
|
-
### Document Extraction Pipeline
|
|
97
|
+
## Architecture
|
|
120
98
|
|
|
121
|
-
|
|
99
|
+
### Provider-Agnostic Design
|
|
122
100
|
|
|
123
|
-
-
|
|
124
|
-
- **Pass 1 — Metadata Extraction**: Extracts high-level metadata — carrier, policy/quote number, dates, premium, insured name, coverage table with limits and deductibles. Includes an early persistence callback (`onMetadata`) so metadata is saved immediately, surviving downstream failures.
|
|
125
|
-
- **Pass 2 — Section Extraction**: Splits the document into page chunks (starting at 15 pages) and extracts structured sections in parallel (concurrency-limited, default 2). All model calls automatically retry on rate-limit errors with exponential backoff. Adaptive fallback: if a chunk's output is truncated (JSON parse failure), it re-splits into smaller chunks (10, then 5 pages), and escalates to the fallback model. Results are merged across chunks.
|
|
126
|
-
- **Pass 3 — Enrichment**: A non-fatal pass that parses raw text into structured supplementary fields — regulatory context, complaint contacts, costs and fees, claims contacts.
|
|
101
|
+
CL-0 SDK has **zero framework dependencies**. You provide simple callback functions:
|
|
127
102
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
103
|
+
```typescript
|
|
104
|
+
type GenerateText = (params: {
|
|
105
|
+
prompt: string;
|
|
106
|
+
system?: string;
|
|
107
|
+
maxTokens: number;
|
|
108
|
+
}) => Promise<{ text: string; usage?: TokenUsage }>;
|
|
109
|
+
|
|
110
|
+
type GenerateObject<T> = (params: {
|
|
111
|
+
prompt: string;
|
|
112
|
+
system?: string;
|
|
113
|
+
schema: ZodSchema<T>;
|
|
114
|
+
maxTokens: number;
|
|
115
|
+
}) => Promise<{ object: T; usage?: TokenUsage }>;
|
|
116
|
+
```
|
|
140
117
|
|
|
141
|
-
|
|
118
|
+
Works with any provider: OpenAI, Anthropic, Google, Mistral, Bedrock, Azure, Ollama, etc.
|
|
142
119
|
|
|
143
|
-
|
|
120
|
+
### Extraction Pipeline
|
|
144
121
|
|
|
145
|
-
|
|
146
|
-
- **Communication intents** — direct (user-facing), mediated (forwarded), observed (CC'd)
|
|
147
|
-
- **Composable modules** — identity, safety, formatting, coverage gaps, COI routing, quotes-vs-policies, conversation memory, and intent-specific instructions
|
|
148
|
-
- **`buildAgentSystemPrompt(ctx)`** — composes all modules into a complete system prompt
|
|
149
|
-
- **Document context builder** — scores and ranks policies/quotes by relevance to a query
|
|
150
|
-
- **Tool definitions** — `tool_use`-compatible schemas for document lookup, COI generation, and coverage comparison
|
|
122
|
+
The `createExtractor` function returns an extraction engine:
|
|
151
123
|
|
|
152
|
-
|
|
124
|
+
1. **Classify** — Determine document type (policy/quote) and line of business
|
|
125
|
+
2. **Plan** — Generate extraction plan using line-specific templates
|
|
126
|
+
3. **Extract** — Dispatch focused extractors in parallel (concurrency-limited, default 2)
|
|
127
|
+
4. **Review** — Check completeness against template requirements (up to 2 review rounds)
|
|
128
|
+
5. **Assemble** — Merge results into final `InsuranceDocument`
|
|
153
129
|
|
|
154
|
-
|
|
130
|
+
```typescript
|
|
131
|
+
const extract = createExtractor({
|
|
132
|
+
generateText,
|
|
133
|
+
generateObject,
|
|
134
|
+
concurrency: 2, // Parallel extractor limit
|
|
135
|
+
maxReviewRounds: 2, // Review loop iterations
|
|
136
|
+
onTokenUsage: (usage) => {
|
|
137
|
+
console.log(`${usage.inputTokens} in, ${usage.outputTokens} out`);
|
|
138
|
+
},
|
|
139
|
+
});
|
|
140
|
+
```
|
|
155
141
|
|
|
156
|
-
###
|
|
142
|
+
### Document Types
|
|
157
143
|
|
|
158
|
-
Comprehensive TypeScript
|
|
144
|
+
Comprehensive TypeScript types for the insurance domain:
|
|
159
145
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
146
|
+
```typescript
|
|
147
|
+
import type {
|
|
148
|
+
InsuranceDocument, // PolicyDocument | QuoteDocument
|
|
149
|
+
PolicyDocument,
|
|
150
|
+
QuoteDocument,
|
|
151
|
+
Coverage,
|
|
152
|
+
Endorsement,
|
|
153
|
+
Declaration, // 20+ line types
|
|
154
|
+
Platform,
|
|
155
|
+
AgentContext,
|
|
156
|
+
} from "@claritylabs/cl-sdk";
|
|
157
|
+
```
|
|
163
158
|
|
|
164
159
|
## API Reference
|
|
165
160
|
|
|
166
|
-
###
|
|
161
|
+
### Core Functions
|
|
167
162
|
|
|
168
163
|
| Function | Description |
|
|
169
164
|
|----------|-------------|
|
|
170
|
-
| `
|
|
171
|
-
| `
|
|
172
|
-
| `
|
|
173
|
-
| `extractSectionsOnly(pdf, metadata, options)` | Retry pass 2 using saved metadata |
|
|
174
|
-
| `applyExtracted(extracted)` | Map extraction JSON to persistence fields |
|
|
175
|
-
| `applyExtractedQuote(extracted)` | Map quote extraction JSON to persistence fields |
|
|
165
|
+
| `createExtractor(config)` | Create extraction engine with callbacks |
|
|
166
|
+
| `extract.extract(pdfBase64, documentId?)` | Run full extraction pipeline |
|
|
167
|
+
| `chunkDocument(text, maxChunkSize?)` | Chunk text for vector storage |
|
|
176
168
|
|
|
177
|
-
###
|
|
169
|
+
### Agent System
|
|
178
170
|
|
|
179
171
|
```typescript
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
interface ExtractSectionsOptions {
|
|
194
|
-
log?: LogFn;
|
|
195
|
-
promptBuilder?: PromptBuilder;
|
|
196
|
-
models: ModelConfig; // required — bring your own models
|
|
197
|
-
fallbackProviderOptions?: ProviderOptions;
|
|
198
|
-
concurrency?: number; // parallel chunk limit (default: 2)
|
|
199
|
-
tokenLimits?: TokenLimits; // override default maxTokens per role
|
|
200
|
-
onTokenUsage?: (usage: TokenUsage) => void;
|
|
201
|
-
pdfContentFormat?: "file" | "image"; // default: "file"
|
|
202
|
-
convertPdfToImages?: ConvertPdfToImagesFn; // required when pdfContentFormat is "image"
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
interface ClassifyOptions {
|
|
206
|
-
log?: LogFn;
|
|
207
|
-
models: ModelConfig; // required — bring your own models
|
|
208
|
-
tokenLimits?: TokenLimits; // override default maxTokens per role
|
|
209
|
-
onTokenUsage?: (usage: TokenUsage) => void;
|
|
210
|
-
pdfContentFormat?: "file" | "image"; // default: "file"
|
|
211
|
-
convertPdfToImages?: ConvertPdfToImagesFn; // required when pdfContentFormat is "image"
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// Override default token limits per role (all fields optional)
|
|
215
|
-
interface TokenLimits {
|
|
216
|
-
classification?: number; // default: 512
|
|
217
|
-
metadata?: number; // default: 16384
|
|
218
|
-
sections?: number; // default: 8192
|
|
219
|
-
sectionsFallback?: number; // default: 16384
|
|
220
|
-
enrichment?: number; // default: 4096
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
interface TokenUsage {
|
|
224
|
-
inputTokens: number;
|
|
225
|
-
outputTokens: number;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
type ConvertPdfToImagesFn = (
|
|
229
|
-
pdfBase64: string,
|
|
230
|
-
startPage: number,
|
|
231
|
-
endPage: number,
|
|
232
|
-
) => Promise<Array<{ imageBase64: string; mimeType: string }>>;
|
|
172
|
+
import {
|
|
173
|
+
buildAgentSystemPrompt,
|
|
174
|
+
buildIdentityPrompt,
|
|
175
|
+
buildSafetyPrompt,
|
|
176
|
+
buildCoverageGapPrompt,
|
|
177
|
+
} from "@claritylabs/cl-sdk";
|
|
178
|
+
|
|
179
|
+
const systemPrompt = buildAgentSystemPrompt({
|
|
180
|
+
platform: "email",
|
|
181
|
+
intent: "direct",
|
|
182
|
+
userName: "John",
|
|
183
|
+
companyName: "Acme Insurance",
|
|
184
|
+
});
|
|
233
185
|
```
|
|
234
186
|
|
|
235
|
-
###
|
|
236
|
-
|
|
237
|
-
| Format | Description | When to use |
|
|
238
|
-
|--------|-------------|-------------|
|
|
239
|
-
| `file` (default) | Send PDF as a native file. Most efficient — no conversion needed. | Most models (Anthropic, Google, OpenAI, Mistral, Bedrock, Azure) |
|
|
240
|
-
| `image` | Convert PDF pages to images via `convertPdfToImages` callback. | Models that don't support native PDF file input |
|
|
241
|
-
|
|
242
|
-
The SDK defaults to `"file"` — most modern models support native PDF input. If your model doesn't, set `pdfContentFormat: "image"` and provide a `convertPdfToImages` callback. The SDK does not bundle a converter — use whatever library works in your runtime (pdf2pic, mupdf, pdfjs-dist, etc.).
|
|
243
|
-
|
|
244
|
-
### Rate-Limit Resilience
|
|
245
|
-
|
|
246
|
-
All model calls automatically retry on rate-limit errors (HTTP 429 or "rate limit" in error message) with exponential backoff — up to 5 retries with delays of 2s, 4s, 8s, 16s, 32s (plus jitter). Non-rate-limit errors are re-thrown immediately.
|
|
247
|
-
|
|
248
|
-
### Parallel Chunk Extraction
|
|
249
|
-
|
|
250
|
-
Pass 2 section extraction processes page chunks in parallel with a configurable concurrency limit (default: 2). This balances throughput against rate limits. Sub-chunk retries on truncation are also parallelized.
|
|
187
|
+
### Tool Definitions
|
|
251
188
|
|
|
252
189
|
```typescript
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
concurrency: 3,
|
|
259
|
-
onTokenUsage: ({ inputTokens, outputTokens }) => {
|
|
260
|
-
totalInput += inputTokens;
|
|
261
|
-
totalOutput += outputTokens;
|
|
262
|
-
},
|
|
263
|
-
});
|
|
264
|
-
|
|
265
|
-
console.log(`Total: ${totalInput} input, ${totalOutput} output tokens`);
|
|
190
|
+
import {
|
|
191
|
+
AGENT_TOOLS,
|
|
192
|
+
DOCUMENT_LOOKUP_TOOL,
|
|
193
|
+
COI_GENERATION_TOOL,
|
|
194
|
+
} from "@claritylabs/cl-sdk";
|
|
266
195
|
```
|
|
267
196
|
|
|
268
|
-
###
|
|
197
|
+
### PDF Operations
|
|
269
198
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
199
|
+
```typescript
|
|
200
|
+
import {
|
|
201
|
+
getAcroFormFields,
|
|
202
|
+
fillAcroForm,
|
|
203
|
+
overlayTextOnPdf,
|
|
204
|
+
} from "@claritylabs/cl-sdk";
|
|
205
|
+
```
|
|
275
206
|
|
|
276
|
-
###
|
|
207
|
+
### Storage
|
|
277
208
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
| `fillAcroForm(pdfBytes, mappings)` | Fill and flatten form fields |
|
|
282
|
-
| `overlayTextOnPdf(pdfBytes, overlays)` | Position text on flat PDFs |
|
|
209
|
+
```typescript
|
|
210
|
+
import { SQLiteDocumentStore, SQLiteMemoryStore } from "@claritylabs/cl-sdk/storage/sqlite";
|
|
211
|
+
```
|
|
283
212
|
|
|
284
213
|
## Development
|
|
285
214
|
|
|
286
215
|
```bash
|
|
287
216
|
npm install
|
|
288
|
-
npm run build # Build ESM + CJS + types
|
|
217
|
+
npm run build # Build ESM + CJS + types
|
|
289
218
|
npm run dev # Watch mode
|
|
290
|
-
npm run typecheck # Type check
|
|
219
|
+
npm run typecheck # Type check
|
|
220
|
+
npm run test # Run tests (vitest)
|
|
291
221
|
```
|
|
292
222
|
|
|
293
|
-
|
|
223
|
+
Zero framework dependencies. Peer deps: `pdf-lib`, `zod`. Optional: `better-sqlite3`.
|
|
224
|
+
|
|
225
|
+
## License
|
|
226
|
+
|
|
227
|
+
Apache-2.0
|