@claritylabs/cl-sdk-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -0
- package/docs-bundle.json +188 -0
- package/mcp-config.json +5 -0
- package/package.json +44 -0
- package/server.ts +461 -0
package/README.md
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# @claritylabs/cl-sdk-mcp
|
|
2
|
+
|
|
3
|
+
MCP server for the CL SDK. Exposes SDK functions and documentation search as [Model Context Protocol](https://modelcontextprotocol.io/) tools for AI coding assistants like Claude Code, Cursor, and Windsurf.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install -g @claritylabs/cl-sdk-mcp
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Setup
|
|
12
|
+
|
|
13
|
+
### Claude Code
|
|
14
|
+
|
|
15
|
+
Add to `.claude/mcp.json` (project or global `~/.claude/mcp.json`):
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"mcpServers": {
|
|
20
|
+
"cl-sdk": {
|
|
21
|
+
"command": "npx",
|
|
22
|
+
"args": ["@claritylabs/cl-sdk-mcp"]
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Cursor / Windsurf
|
|
29
|
+
|
|
30
|
+
Add to your MCP settings (`~/.cursor/mcp.json` or equivalent):
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcpServers": {
|
|
35
|
+
"cl-sdk": {
|
|
36
|
+
"command": "npx",
|
|
37
|
+
"args": ["@claritylabs/cl-sdk-mcp"]
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Tools
|
|
44
|
+
|
|
45
|
+
### Documentation (no API key needed)
|
|
46
|
+
|
|
47
|
+
| Tool | Description |
|
|
48
|
+
|------|-------------|
|
|
49
|
+
| `search_docs` | Full-text search across CL SDK docs, returns top 5 matches |
|
|
50
|
+
| `read_doc_page` | Read a doc page by slug (e.g. `getting-started/quickstart`) |
|
|
51
|
+
| `list_doc_sections` | List all sections and pages |
|
|
52
|
+
|
|
53
|
+
### Prompt builders (no API key needed)
|
|
54
|
+
|
|
55
|
+
| Tool | Description |
|
|
56
|
+
|------|-------------|
|
|
57
|
+
| `build_agent_system_prompt` | Generate an insurance-aware agent system prompt |
|
|
58
|
+
| `build_field_extraction_prompt` | Application field extraction prompt |
|
|
59
|
+
| `build_auto_fill_prompt` | Auto-fill prompt for application fields |
|
|
60
|
+
| `build_question_batch_prompt` | Batched question prompt for unfilled fields |
|
|
61
|
+
| `apply_extracted` | Map raw policy extraction to structured fields |
|
|
62
|
+
| `apply_extracted_quote` | Map raw quote extraction to structured fields |
|
|
63
|
+
|
|
64
|
+
### Extraction (requires API key)
|
|
65
|
+
|
|
66
|
+
| Tool | Description |
|
|
67
|
+
|------|-------------|
|
|
68
|
+
| `classify_document` | Classify a PDF as policy or quote |
|
|
69
|
+
| `extract_policy` | Full multi-pass policy extraction from PDF |
|
|
70
|
+
| `extract_quote` | Full multi-pass quote extraction from PDF |
|
|
71
|
+
|
|
72
|
+
### PDF operations (no API key needed)
|
|
73
|
+
|
|
74
|
+
| Tool | Description |
|
|
75
|
+
|------|-------------|
|
|
76
|
+
| `get_acro_form_fields` | List fillable AcroForm fields in a PDF |
|
|
77
|
+
| `fill_acro_form` | Fill form fields and return flattened PDF |
|
|
78
|
+
| `overlay_text_on_pdf` | Overlay text at coordinates on a flat PDF |
|
|
79
|
+
|
|
80
|
+
## Configuration
|
|
81
|
+
|
|
82
|
+
Extraction tools need a model provider and API key. The simplest setup:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Defaults to Anthropic `claude-haiku-4-5-20251001`. Override with environment variables:
|
|
89
|
+
|
|
90
|
+
| Variable | Default |
|
|
91
|
+
|----------|---------|
|
|
92
|
+
| `CL_MCP_PROVIDER` | `anthropic` |
|
|
93
|
+
| `CL_MCP_MODEL` | `claude-haiku-4-5-20251001` |
|
|
94
|
+
|
|
95
|
+
Supports `anthropic`, `openai`, and `google` providers.
|
|
96
|
+
|
|
97
|
+
Alternatively, place an `mcp-config.json` next to the server:
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
{
|
|
101
|
+
"provider": "anthropic",
|
|
102
|
+
"model": "claude-haiku-4-5-20251001",
|
|
103
|
+
"apiKey": "${ANTHROPIC_API_KEY}"
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Docs
|
|
108
|
+
|
|
109
|
+
Full documentation: [cl-sdk.claritylabs.inc/docs/mcp-server/overview](https://cl-sdk.claritylabs.inc/docs/mcp-server/overview)
|
package/docs-bundle.json
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-03-23T21:43:25.371Z",
|
|
3
|
+
"sections": [
|
|
4
|
+
{
|
|
5
|
+
"title": "Agent System",
|
|
6
|
+
"slug": "agent",
|
|
7
|
+
"pages": [
|
|
8
|
+
"system-prompt",
|
|
9
|
+
"platforms",
|
|
10
|
+
"tools"
|
|
11
|
+
]
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"title": "API Reference",
|
|
15
|
+
"slug": "api-reference",
|
|
16
|
+
"pages": [
|
|
17
|
+
"extraction",
|
|
18
|
+
"agent",
|
|
19
|
+
"application",
|
|
20
|
+
"pdf",
|
|
21
|
+
"types"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"title": "Application Processing",
|
|
26
|
+
"slug": "application",
|
|
27
|
+
"pages": [
|
|
28
|
+
"overview",
|
|
29
|
+
"pdf-operations"
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"title": "Document Extraction",
|
|
34
|
+
"slug": "extraction",
|
|
35
|
+
"pages": [
|
|
36
|
+
"pipeline",
|
|
37
|
+
"classification",
|
|
38
|
+
"applying-results"
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"title": "Getting Started",
|
|
43
|
+
"slug": "getting-started",
|
|
44
|
+
"pages": [
|
|
45
|
+
"quickstart",
|
|
46
|
+
"architecture",
|
|
47
|
+
"models"
|
|
48
|
+
]
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"title": "MCP Server",
|
|
52
|
+
"slug": "mcp-server",
|
|
53
|
+
"pages": [
|
|
54
|
+
"overview",
|
|
55
|
+
"configuration",
|
|
56
|
+
"tools"
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
],
|
|
60
|
+
"pages": [
|
|
61
|
+
{
|
|
62
|
+
"slug": "agent/platforms",
|
|
63
|
+
"title": "Platforms & Intents",
|
|
64
|
+
"description": "Multi-platform support and communication intent system",
|
|
65
|
+
"content": "CL SDK's agent system adapts to different communication platforms and interaction modes through the platform/intent model.\n\n## Platforms\n\nFive platforms are supported, each with different formatting capabilities:\n\n| Platform | Markdown | Links | Rich Formatting | Max Length | Sign-off |\n|----------|----------|-------|----------------|------------|----------|\n| `email` | No | Yes | No | — | Yes |\n| `chat` | Yes | Yes | Yes | — | No |\n| `sms` | No | No | No | 1,600 | No |\n| `slack` | Yes | Yes | Yes | — | No |\n| `discord` | Yes | Yes | Yes | 2,000 | No |\n\n### Platform configs\n\nAccess platform configurations directly:\n\n```typescript\n\nconst emailConfig = PLATFORM_CONFIGS.email;\n// { supportsMarkdown: false, supportsLinks: true, supportsRichFormatting: false, signOff: true }\n```\n\n### Custom platform config\n\nOverride the default config for a platform:\n\n```typescript\nconst ctx: AgentContext = {\n platform: \"chat\",\n intent: \"direct\",\n siteUrl: \"https://app.example.com\",\n platformConfig: {\n supportsMarkdown: true,\n supportsLinks: true,\n supportsRichFormatting: true,\n maxResponseLength: 4000, // Custom limit\n },\n};\n```\n\n## Communication intents\n\nIntents describe the relationship between the sender and the agent:\n\n### `direct`\n\nThe user is communicating directly with the agent (e.g., emailing the agent address, using web chat). The agent responds as itself with full capabilities.\n\n### `mediated`\n\nThe message was forwarded by someone (e.g., a broker forwarding an email from a client). The agent responds to the forwarder, not the original sender. Links to the app are omitted.\n\n### `observed`\n\nThe agent was CC'd on a conversation. It observes and may provide relevant information, but doesn't insert itself as a primary participant.\n\n## COI handling\n\nThe `coiHandling` field on `AgentContext` controls how certificate of insurance requests are routed:\n\n| Value | Behavior |\n|-------|----------|\n| `\"broker\"` | Route COI requests to the broker contact |\n| `\"user\"` | The user can generate COIs directly |\n| `\"member\"` | Route to a specific team member |\n| `\"ignore\"` | Don't handle COI requests |\n\n```typescript\nconst ctx: AgentContext = {\n platform: \"email\",\n intent: \"direct\",\n siteUrl: \"https://app.example.com\",\n coiHandling: \"broker\",\n brokerName: \"ABC Insurance Agency\",\n brokerContactName: \"John Smith\",\n brokerContactEmail: \"john@abcinsurance.com\",\n};\n```"
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"slug": "agent/system-prompt",
|
|
69
|
+
"title": "Agent System Prompt",
|
|
70
|
+
"description": "Build composable, insurance-aware system prompts for conversational agents",
|
|
71
|
+
"content": "CL SDK provides a modular prompt system for building insurance-aware conversational agents. Prompts are composed from independent modules that adapt to the platform and communication intent.\n\n## Quick start\n\n```typescript\n\nconst ctx: AgentContext = {\n platform: \"email\",\n intent: \"direct\",\n siteUrl: \"https://app.example.com\",\n companyName: \"Acme Insurance\",\n userName: \"Jane Smith\",\n};\n\nconst systemPrompt = buildAgentSystemPrompt(ctx);\n// Use with any LLM: pass as the system message\n```\n\n## How it works\n\n`buildAgentSystemPrompt(ctx)` composes all modules in order, filtering out any that return `null`:\n\n```\nIdentity → Who the agent is, company context\nCompany context → Organization-specific information (if provided)\nIntent → Behavior rules for direct / mediated / observed\nFormatting → Platform-specific output rules\nSafety → Scope guardrails, anti-hallucination, prompt injection defense\nCoverage gaps → Gap detection guidance (null in some intents)\nCOI routing → Certificate of insurance handling\nQuotes/policies → How to differentiate document types\nMemory → Cross-conversation continuity guidance\n```\n\n## AgentContext\n\nThe context object drives all prompt composition:\n\n```typescript\ninterface AgentContext {\n platform: Platform; // \"email\" | \"chat\" | \"sms\" | \"slack\" | \"discord\"\n intent: CommunicationIntent; // \"direct\" | \"mediated\" | \"observed\"\n platformConfig?: PlatformConfig;\n companyName?: string;\n companyContext?: string; // Organization-specific context string\n siteUrl: string;\n userName?: string;\n coiHandling?: \"broker\" | \"user\" | \"member\" | \"ignore\";\n brokerName?: string;\n brokerContactName?: string;\n brokerContactEmail?: string;\n}\n```\n\n## Individual modules\n\nEach module is independently importable for custom prompt composition:\n\n```typescript\n buildIdentityPrompt,\n buildSafetyPrompt,\n buildFormattingPrompt,\n buildCoverageGapPrompt,\n buildCoiRoutingPrompt,\n buildQuotesPoliciesPrompt,\n buildConversationMemoryGuidance,\n buildIntentPrompt,\n} from \"@claritylabs/cl-sdk\";\n```\n\n### Custom composition\n\nBuild your own prompt from a subset of modules:\n\n```typescript\nconst customPrompt = [\n buildIdentityPrompt(ctx),\n buildSafetyPrompt(ctx),\n buildFormattingPrompt(ctx),\n // Skip coverage gaps, COI routing, etc.\n].filter(Boolean).join(\"\\n\\n\");\n```\n\n## Document context\n\nBuild ranked document context for agent responses:\n\n```typescript\n\nconst context = buildDocumentContext(documents, \"What's my GL limit?\");\n// Returns policies/quotes ranked by relevance to the query\n```\n\n## Conversation memory\n\nAdd conversation history context:\n\n```typescript\n\nconst memoryContext = buildConversationMemoryContext(previousConversations);\n```\n\n## Intent classification\n\nClassify incoming messages before routing to the agent:\n\n```typescript\n\nconst classificationPrompt = buildClassifyMessagePrompt(\"email\");\n// Returns a prompt that classifies whether a message is insurance-related\n// and suggests an intent\n```"
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"slug": "agent/tools",
|
|
75
|
+
"title": "Tool Definitions",
|
|
76
|
+
"description": "Schema-only tool definitions for agent function calling",
|
|
77
|
+
"content": "CL SDK exports Claude `tool_use`-compatible tool schemas for common insurance agent operations. These are **schema-only** — CL SDK provides the definitions, and consumers implement the execution logic.\n\n## Available tools\n\n### Document Lookup\n\nSearch and retrieve insurance documents by ID, policy number, carrier, or free-text query.\n\n```typescript\n```\n\n**Input schema:**\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `id` | `string` | No | Exact document ID |\n| `query` | `string` | No | Free-text search query |\n| `documentType` | `\"policy\" \\| \"quote\"` | No | Filter by document type |\n\n### COI Generation\n\nRequest generation of a Certificate of Insurance for a specific policy.\n\n```typescript\n```\n\n**Input schema:**\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `policyId` | `string` | Yes | Policy ID to generate COI for |\n| `holderName` | `string` | Yes | Certificate holder name |\n| `holderAddress` | `string` | No | Certificate holder address |\n| `additionalInsured` | `boolean` | No | Add holder as additional insured |\n\n### Coverage Comparison\n\nCompare coverages across two or more insurance documents.\n\n```typescript\n```\n\n**Input schema:**\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `documentIds` | `string[]` | Yes | Document IDs to compare |\n| `coverageTypes` | `string[]` | No | Filter to specific coverage types |\n\n## Using all tools\n\nImport all tools as an array:\n\n```typescript\n\n// AGENT_TOOLS = [DOCUMENT_LOOKUP_TOOL, COI_GENERATION_TOOL, COVERAGE_COMPARISON_TOOL]\n```\n\n## Integration example\n\n```typescript\n\nconst systemPrompt = buildAgentSystemPrompt(ctx);\n\nconst { text, toolCalls } = await generateText({\n model: yourModel,\n system: systemPrompt,\n tools: Object.fromEntries(\n AGENT_TOOLS.map(tool => [tool.name, {\n description: tool.description,\n parameters: tool.input_schema,\n }])\n ),\n messages: conversationHistory,\n});\n\n// Handle tool calls with your own implementations\nfor (const call of toolCalls) {\n switch (call.toolName) {\n case \"document_lookup\":\n // Query your database\n break;\n case \"coi_generation\":\n // Generate COI PDF\n break;\n case \"coverage_comparison\":\n // Build comparison table\n break;\n }\n}\n```\n\n## ToolDefinition type\n\n```typescript\n\ninterface ToolDefinition {\n name: string;\n description: string;\n input_schema: {\n type: \"object\";\n properties: Record<string, unknown>;\n required?: string[];\n };\n}\n```"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"slug": "api-reference/agent",
|
|
81
|
+
"title": "Agent API",
|
|
82
|
+
"description": "Complete reference for agent prompt builders and context functions",
|
|
83
|
+
"content": "## System prompt\n\n### `buildAgentSystemPrompt(ctx)`\n\nBuild a complete agent system prompt from composable modules.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `ctx` | `AgentContext` | Agent context configuration |\n\n**Returns:** `string`\n\n## Individual modules\n\nEach module can be used independently for custom prompt composition.\n\n### `buildIdentityPrompt(ctx)`\n\nAgent identity and name. **Returns:** `string`\n\n### `buildIntentPrompt(ctx)`\n\nCommunication intent behavior rules. **Returns:** `string`\n\n### `buildFormattingPrompt(ctx)`\n\nPlatform-specific output formatting rules. **Returns:** `string`\n\n### `buildSafetyPrompt(ctx)`\n\nScope guardrails, anti-hallucination, prompt injection defense. **Returns:** `string`\n\n### `buildCoverageGapPrompt(ctx)`\n\nCoverage gap detection guidance. Returns `null` for some intents. **Returns:** `string | null`\n\n### `buildCoiRoutingPrompt(ctx)`\n\nCertificate of insurance request routing. **Returns:** `string`\n\n### `buildQuotesPoliciesPrompt()`\n\nDocument type differentiation rules. No context needed. **Returns:** `string`\n\n### `buildConversationMemoryGuidance()`\n\nCross-conversation memory continuity rules. No context needed. **Returns:** `string`\n\n## Context builders\n\n### `buildDocumentContext(docs, query)`\n\nBuild ranked document context for a query. Scores and ranks policies/quotes by relevance.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `docs` | `InsuranceDocument[]` | Available documents |\n| `query` | `string` | User's question |\n\n**Returns:** `string`\n\n---\n\n### `buildConversationMemoryContext(conversations)`\n\nBuild conversation history context for the agent.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `conversations` | `any[]` | Previous conversations |\n\n**Returns:** `string`\n\n---\n\n### `buildPolicyContext(policies)`\n\nBuild policy context string. **Deprecated** — use `buildDocumentContext` instead.\n\n## Intent classification\n\n### `buildClassifyMessagePrompt(platform)`\n\nBuild a prompt for classifying incoming messages as insurance-related.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `platform` | `Platform` | The platform the message came from |\n\n**Returns:** `string`\n\n## Legacy (deprecated)\n\n### `buildSystemPrompt(options)`\n\nLegacy system prompt builder. Delegates to `buildAgentSystemPrompt` internally.\n\n### `CLASSIFY_EMAIL_PROMPT`\n\nLegacy email classification prompt. Use `buildClassifyMessagePrompt(\"email\")` instead.\n\n## Extraction prompts\n\nThese prompts are used internally by the extraction pipeline but are exported for advanced use cases:\n\n| Export | Description |\n|--------|-------------|\n| `EXTRACTION_PROMPT` | Full extraction prompt |\n| `CLASSIFY_DOCUMENT_PROMPT` | Document type classification |\n| `METADATA_PROMPT` | Policy metadata extraction |\n| `QUOTE_METADATA_PROMPT` | Quote metadata extraction |\n| `buildSectionsPrompt(start, end)` | Policy section extraction for page range |\n| `buildPolicySectionsPrompt(start, end)` | Alias for `buildSectionsPrompt` |\n| `buildQuoteSectionsPrompt(start, end)` | Quote section extraction for page range |\n| `buildSupplementaryEnrichmentPrompt(fields)` | Supplementary field enrichment |"
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"slug": "api-reference/application",
|
|
87
|
+
"title": "Application API",
|
|
88
|
+
"description": "Complete reference for application processing prompt builders",
|
|
89
|
+
"content": "All application processing functions are prompt builders — they return prompt strings for use with any LLM.\n\n## Detection\n\n### `APPLICATION_CLASSIFY_PROMPT`\n\nA static prompt string for classifying whether a PDF is an insurance application form.\n\n```typescript\n```\n\n## Field extraction\n\n### `buildFieldExtractionPrompt()`\n\nBuild a prompt to extract all fillable fields from an application PDF as structured data.\n\n**Returns:** `string`\n\n**Field types:** `text`, `numeric`, `currency`, `date`, `yes_no`, `table`, `declaration`\n\n## Auto-fill\n\n### `buildAutoFillPrompt(fields, businessContext)`\n\nBuild a prompt to match extracted fields against known business context for auto-filling.\n\n**Returns:** `string`\n\n## Question batching\n\n### `buildQuestionBatchPrompt(unfilledFields)`\n\nBuild a prompt to organize unfilled fields into topic-based batches for user interaction.\n\n**Returns:** `string`\n\n## Answer parsing\n\n### `buildAnswerParsingPrompt(questions, userReply)`\n\nBuild a prompt to parse free-text user replies into structured field values.\n\n**Returns:** `string`\n\n## Email generation\n\n### `buildBatchEmailGenerationPrompt(batch, context)`\n\nBuild a prompt to generate an email asking users for missing application information.\n\n**Returns:** `string`\n\n## Reply classification\n\n### `buildReplyIntentClassificationPrompt(userReply, currentBatch)`\n\nBuild a prompt to classify user reply intent (answering questions, asking for help, etc.).\n\n**Returns:** `string`\n\n## Field explanation\n\n### `buildFieldExplanationPrompt(field)`\n\nBuild a prompt to generate a human-readable explanation of a complex insurance form field.\n\n**Returns:** `string`\n\n## Confirmation\n\n### `buildConfirmationSummaryPrompt(filledFields)`\n\nBuild a prompt to generate a summary of all filled fields for user review before PDF generation.\n\n**Returns:** `string`\n\n## PDF mapping\n\n### `buildAcroFormMappingPrompt(filledFields, acroFormFields)`\n\nBuild a prompt to map filled field values to AcroForm field names in a fillable PDF.\n\n**Returns:** `string`\n\n### `buildFlatPdfMappingPrompt(filledFields, pageInfo)`\n\nBuild a prompt to map filled field values to percentage-based page coordinates for flat PDF overlay.\n\n**Returns:** `string`\n\n## Lookup fill\n\n### `buildLookupFillPrompt(fields, documents)`\n\nBuild a prompt to fill fields by looking up values from existing insurance documents.\n\n**Returns:** `string`"
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"slug": "api-reference/extraction",
|
|
93
|
+
"title": "Extraction API",
|
|
94
|
+
"description": "Complete reference for document extraction functions",
|
|
95
|
+
"content": "## Pipeline functions\n\n### `classifyDocumentType(pdf, options?)`\n\nClassify a document as a policy or quote.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `pdf` | `string` | Base64-encoded PDF |\n| `options` | `ClassifyOptions` | Optional configuration |\n\n**Returns:** `Promise<{ documentType: \"policy\" | \"quote\"; confidence: number; signals: string[] }>`\n\n```typescript\nconst { documentType, confidence, signals } = await classifyDocumentType(pdfBase64);\n```\n\n---\n\n### `extractFromPdf(pdf, options?)`\n\nFull policy extraction pipeline (passes 1-3).\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `pdf` | `string` | Base64-encoded PDF |\n| `options` | `ExtractOptions` | Optional configuration |\n\n**Returns:** `Promise<{ rawText: string; extracted: any }>`\n\n```typescript\nconst { rawText, extracted } = await extractFromPdf(pdfBase64, {\n log: async (msg) => console.log(msg),\n onMetadata: async (raw) => await saveMetadata(raw),\n models: customModels,\n});\n```\n\n---\n\n### `extractQuoteFromPdf(pdf, options?)`\n\nFull quote extraction pipeline (passes 1-2).\n\n**Parameters:** Same as `extractFromPdf`.\n\n**Returns:** `Promise<{ rawText: string; extracted: any }>`\n\n---\n\n### `extractSectionsOnly(pdf, metadataRaw, options?)`\n\nRetry section extraction using saved metadata from a prior pass 1.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `pdf` | `string` | Base64-encoded PDF |\n| `metadataRaw` | `string` | JSON string from a previous pass 1 |\n| `options` | `ExtractSectionsOptions` | Optional configuration |\n\n**Returns:** `Promise<{ rawText: string; extracted: any }>`\n\n---\n\n### `enrichSupplementaryFields(document, models?, log?)`\n\nPass 3 enrichment. Parses raw text into structured supplementary fields. Non-fatal — returns the document unchanged on failure.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `document` | `any` | Document object with raw supplementary fields |\n| `models` | `ModelConfig` | Optional model config |\n| `log` | `LogFn` | Optional logger |\n\n**Returns:** `Promise<any>`\n\n## Mapping functions\n\n### `applyExtracted(extracted)`\n\nMap raw policy extraction JSON to persistence-ready fields.\n\n**Returns:** Object with `carrier`, `policyNumber`, `coverages`, `effectiveDate`, `expirationDate`, etc.\n\n---\n\n### `applyExtractedQuote(extracted)`\n\nMap raw quote extraction JSON to persistence-ready fields.\n\n**Returns:** Object with `quoteNumber`, `premiumBreakdown`, `subjectivities`, `proposedEffectiveDate`, etc.\n\n## Merge functions\n\n### `mergeChunkedSections(metadataResult, sectionChunks)`\n\nMerge section chunks from policy extraction. Combines sections and takes the last non-null supplementary field.\n\n---\n\n### `mergeChunkedQuoteSections(metadataResult, sectionChunks)`\n\nMerge section chunks from quote extraction. Also accumulates subjectivities and underwriting conditions.\n\n## Utility functions\n\n### `getPageChunks(totalPages, chunkSize?)`\n\nCalculate page ranges for chunked extraction.\n\n| Parameter | Type | Default | Description |\n|-----------|------|---------|-------------|\n| `totalPages` | `number` | — | Total page count |\n| `chunkSize` | `number` | `30` | Pages per chunk |\n\n**Returns:** `Array<[number, number]>`\n\n---\n\n### `stripFences(text)`\n\nRemove markdown code fences from AI response text.\n\n---\n\n### `sanitizeNulls<T>(obj)`\n\nRecursively convert `null` values to `undefined`.\n\n## Options types\n\n### `ExtractOptions`\n\n```typescript\ninterface ExtractOptions {\n log?: LogFn;\n onMetadata?: (raw: string) => Promise<void>;\n models?: ModelConfig;\n metadataProviderOptions?: ProviderOptions;\n fallbackProviderOptions?: ProviderOptions;\n}\n```\n\n### `ClassifyOptions`\n\n```typescript\ninterface ClassifyOptions {\n log?: LogFn;\n models?: ModelConfig;\n}\n```\n\n### `ExtractSectionsOptions`\n\n```typescript\ninterface ExtractSectionsOptions {\n log?: LogFn;\n promptBuilder?: PromptBuilder;\n models?: ModelConfig;\n fallbackProviderOptions?: ProviderOptions;\n}\n```\n\n### `LogFn`\n\n```typescript\ntype LogFn = (message: string) => Promise<void>;\n```\n\n## Constants\n\n| Constant | Value | Description |\n|----------|-------|-------------|\n| `SONNET_MODEL` | `\"claude-sonnet-4-6\"` | Default Sonnet model ID |\n| `HAIKU_MODEL` | `\"claude-haiku-4-5-20251001\"` | Default Haiku model ID |"
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"slug": "api-reference/pdf",
|
|
99
|
+
"title": "PDF Operations API",
|
|
100
|
+
"description": "Complete reference for PDF form filling and text overlay functions",
|
|
101
|
+
"content": "## AcroForm\n\n### `getAcroFormFields(pdfDoc)`\n\nEnumerate all AcroForm fields from a PDF document.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `pdfDoc` | `PDFDocument` | A pdf-lib `PDFDocument` instance |\n\n**Returns:** `AcroFormFieldInfo[]` — empty array if no form fields exist.\n\n```typescript\ninterface AcroFormFieldInfo {\n name: string;\n type: \"text\" | \"checkbox\" | \"dropdown\" | \"radio\";\n options?: string[]; // For dropdown and radio fields\n}\n```\n\n---\n\n### `fillAcroForm(pdfBytes, mappings)`\n\nFill AcroForm fields by name and flatten the form.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `pdfBytes` | `Uint8Array` | Raw PDF bytes |\n| `mappings` | `FieldMapping[]` | Field name to value mappings |\n\n**Returns:** `Promise<Uint8Array>` — the filled and flattened PDF bytes.\n\n```typescript\ninterface FieldMapping {\n acroFormName: string;\n value: string;\n}\n```\n\n**Checkbox values:** `\"yes\"`, `\"true\"`, `\"x\"`, `\"checked\"`, `\"on\"` → checked. Anything else → unchecked.\n\nMissing fields and invalid dropdown/radio values are silently skipped.\n\n## Text overlay\n\n### `overlayTextOnPdf(pdfBytes, overlays)`\n\nOverlay text at specified coordinates on a flat PDF.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `pdfBytes` | `Uint8Array` | Raw PDF bytes |\n| `overlays` | `TextOverlay[]` | Text overlays to apply |\n\n**Returns:** `Promise<Uint8Array>` — the modified PDF bytes.\n\n```typescript\ninterface TextOverlay {\n page: number; // 0-indexed page number\n x: number; // percentage from left edge (0-100)\n y: number; // percentage from top edge (0-100)\n text: string;\n fontSize?: number; // default: 10\n isCheckmark?: boolean; // draws \"X\" instead of text\n}\n```\n\nUses Helvetica font. Overlays targeting non-existent pages are skipped."
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"slug": "api-reference/types",
|
|
105
|
+
"title": "Type Reference",
|
|
106
|
+
"description": "Complete reference for all exported TypeScript types",
|
|
107
|
+
"content": "## Document types\n\n### `InsuranceDocument`\n\nDiscriminated union of `PolicyDocument | QuoteDocument`.\n\n```typescript\ntype InsuranceDocument = PolicyDocument | QuoteDocument;\n```\n\n### `BaseDocument`\n\nShared fields for all insurance documents.\n\n```typescript\ninterface BaseDocument {\n id: string;\n type: \"policy\" | \"quote\";\n carrier: string;\n security?: string;\n insuredName: string;\n premium?: string;\n summary?: string;\n policyTypes?: string[];\n coverages: Coverage[];\n sections?: Section[];\n}\n```\n\n### `PolicyDocument`\n\n```typescript\ninterface PolicyDocument extends BaseDocument {\n type: \"policy\";\n policyNumber: string;\n effectiveDate: string;\n expirationDate: string;\n}\n```\n\n### `QuoteDocument`\n\n```typescript\ninterface QuoteDocument extends BaseDocument {\n type: \"quote\";\n quoteNumber: string;\n proposedEffectiveDate?: string;\n proposedExpirationDate?: string;\n quoteExpirationDate?: string;\n subjectivities?: Subjectivity[];\n underwritingConditions?: UnderwritingCondition[];\n premiumBreakdown?: PremiumLine[];\n}\n```\n\n### `Coverage`\n\n```typescript\ninterface Coverage {\n name: string;\n limit: string;\n deductible?: string;\n pageNumber?: number;\n sectionRef?: string;\n}\n```\n\n### `Section`\n\n```typescript\ninterface Section {\n title: string;\n sectionNumber?: string;\n pageStart: number;\n pageEnd?: number;\n type: string;\n coverageType?: string;\n content: string;\n subsections?: Subsection[];\n}\n```\n\n### `Subsection`\n\n```typescript\ninterface Subsection {\n title: string;\n sectionNumber?: string;\n pageNumber?: number;\n content: string;\n}\n```\n\n### `Subjectivity`\n\n```typescript\ninterface Subjectivity {\n description: string;\n category?: string;\n}\n```\n\n### `UnderwritingCondition`\n\n```typescript\ninterface UnderwritingCondition {\n description: string;\n}\n```\n\n### `PremiumLine`\n\n```typescript\ninterface PremiumLine {\n line: string;\n amount: string;\n}\n```\n\n## Platform types\n\n### `Platform`\n\n```typescript\ntype Platform = \"email\" | \"chat\" | \"sms\" | \"slack\" | \"discord\";\n```\n\n### `CommunicationIntent`\n\n```typescript\ntype CommunicationIntent = \"direct\" | \"mediated\" | \"observed\";\n```\n\n### `PlatformConfig`\n\n```typescript\ninterface PlatformConfig {\n supportsMarkdown: boolean;\n supportsLinks: boolean;\n supportsRichFormatting: boolean;\n maxResponseLength?: number;\n signOff?: boolean;\n}\n```\n\n### `AgentContext`\n\n```typescript\ninterface AgentContext {\n platform: Platform;\n intent: CommunicationIntent;\n platformConfig?: PlatformConfig;\n companyName?: string;\n companyContext?: string;\n siteUrl: string;\n userName?: string;\n coiHandling?: \"broker\" | \"user\" | \"member\" | \"ignore\";\n brokerName?: string;\n brokerContactName?: string;\n brokerContactEmail?: string;\n}\n```\n\n### `PLATFORM_CONFIGS`\n\n```typescript\nconst PLATFORM_CONFIGS: Record<Platform, PlatformConfig>;\n```\n\nPre-configured platform settings. See [Platforms & Intents](/docs/agent/platforms).\n\n## Model types\n\n### `ModelConfig`\n\n```typescript\ninterface ModelConfig {\n classification: LanguageModel; // Pass 0: document type classification\n metadata: LanguageModel; // Pass 1: metadata extraction\n sections: LanguageModel; // Pass 2: chunked section extraction\n sectionsFallback: LanguageModel; // Pass 2 fallback when sections truncate\n enrichment: LanguageModel; // Pass 3: supplementary field enrichment\n}\n```\n\n### `createUniformModelConfig(model)`\n\nCreate a `ModelConfig` where every role uses the same model.\n\n```typescript\nfunction createUniformModelConfig(model: LanguageModel): ModelConfig;\n```\n\n### `createDefaultModelConfig()`\n\nCreate a `ModelConfig` using Anthropic defaults. Requires `@ai-sdk/anthropic` (lazy-imported).\n\n```typescript\nfunction createDefaultModelConfig(): ModelConfig;\n```\n\n### `MODEL_TOKEN_LIMITS`\n\n```typescript\nconst MODEL_TOKEN_LIMITS: {\n classification: 512;\n metadata: 4096;\n sections: 8192;\n sectionsFallback: 16384;\n enrichment: 4096;\n};\n```\n\n## Tool types\n\n### `ToolDefinition`\n\n```typescript\ninterface ToolDefinition {\n name: string;\n description: string;\n input_schema: {\n type: \"object\";\n properties: Record<string, unknown>;\n required?: string[];\n };\n}\n```\n\n## Extraction types\n\n### `LogFn`\n\n```typescript\ntype LogFn = (message: string) => Promise<void>;\n```\n\n### `PromptBuilder`\n\n```typescript\ntype PromptBuilder = (pageStart: number, pageEnd: number) => string;\n```\n\n### `ExtractOptions`\n\n```typescript\ninterface ExtractOptions {\n log?: LogFn;\n onMetadata?: (raw: string) => Promise<void>;\n models?: ModelConfig;\n metadataProviderOptions?: ProviderOptions;\n fallbackProviderOptions?: ProviderOptions;\n}\n```\n\n### `ClassifyOptions`\n\n```typescript\ninterface ClassifyOptions {\n log?: LogFn;\n models?: ModelConfig;\n}\n```\n\n### `ExtractSectionsOptions`\n\n```typescript\ninterface ExtractSectionsOptions {\n log?: LogFn;\n promptBuilder?: PromptBuilder;\n models?: ModelConfig;\n fallbackProviderOptions?: ProviderOptions;\n}\n```\n\n## PDF types\n\n### `AcroFormFieldInfo`\n\n```typescript\ninterface AcroFormFieldInfo {\n name: string;\n type: \"text\" | \"checkbox\" | \"dropdown\" | \"radio\";\n options?: string[];\n}\n```\n\n### `FieldMapping`\n\n```typescript\ninterface FieldMapping {\n acroFormName: string;\n value: string;\n}\n```\n\n### `TextOverlay`\n\n```typescript\ninterface TextOverlay {\n page: number;\n x: number;\n y: number;\n text: string;\n fontSize?: number;\n isCheckmark?: boolean;\n}\n```"
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"slug": "application/overview",
|
|
111
|
+
"title": "Application Processing",
|
|
112
|
+
"description": "Prompt builders for insurance application detection, field extraction, and form filling",
|
|
113
|
+
"content": "CL SDK provides prompt builders for every stage of insurance application processing — from detecting application forms to filling completed PDFs.\n\n## Application detection\n\nClassify whether a PDF is an insurance application form:\n\n```typescript\n\n// Use as a prompt with any model + the PDF\nconst response = await generateText({\n model: yourModel,\n messages: [{\n role: \"user\",\n content: [\n { type: \"file\", data: pdfBase64, mediaType: \"application/pdf\" },\n { type: \"text\", text: APPLICATION_CLASSIFY_PROMPT },\n ],\n }],\n});\n```\n\n## Field extraction\n\nExtract all fillable fields from an application form:\n\n```typescript\n\nconst prompt = buildFieldExtractionPrompt();\n// Returns a prompt that extracts fields as structured data:\n// - text, numeric, currency, date, yes_no, table, declaration types\n```\n\n## Auto-fill\n\nMatch extracted fields against known business context to pre-populate answers:\n\n```typescript\n\nconst prompt = buildAutoFillPrompt(fields, businessContext);\n```\n\n## Question batching\n\nOrganize unfilled fields into topic-based batches for asking the user:\n\n```typescript\n\nconst prompt = buildQuestionBatchPrompt(unfilledFields);\n// Groups fields by topic (e.g., \"Company Information\", \"Loss History\")\n```\n\n## Answer parsing\n\nParse free-text replies back into structured field values:\n\n```typescript\n\nconst prompt = buildAnswerParsingPrompt(questions, userReply);\n```\n\n## Email generation\n\nGenerate batch emails asking users for missing information:\n\n```typescript\n\nconst prompt = buildBatchEmailGenerationPrompt(batch, context);\n```\n\n## Reply intent classification\n\nDetermine what a user's reply to an application question means:\n\n```typescript\n\nconst prompt = buildReplyIntentClassificationPrompt(userReply, currentBatch);\n```\n\n## Field explanation\n\nGenerate explanations for complex insurance form fields:\n\n```typescript\n\nconst prompt = buildFieldExplanationPrompt(field);\n```\n\n## Confirmation summary\n\nGenerate a summary of all filled fields for user confirmation before PDF generation:\n\n```typescript\n\nconst prompt = buildConfirmationSummaryPrompt(filledFields);\n```\n\n## PDF mapping\n\nMap filled field values back to PDF form coordinates for filling:\n\n<Tabs items={[\"AcroForm (fillable PDFs)\", \"Flat PDFs\"]}>\n <Tab value=\"AcroForm (fillable PDFs)\">\n ```typescript\n import { buildAcroFormMappingPrompt } from \"@claritylabs/cl-sdk\";\n\n const prompt = buildAcroFormMappingPrompt(filledFields, acroFormFields);\n // Maps answers to AcroForm field names\n ```\n </Tab>\n <Tab value=\"Flat PDFs\">\n ```typescript\n import { buildFlatPdfMappingPrompt } from \"@claritylabs/cl-sdk\";\n\n const prompt = buildFlatPdfMappingPrompt(filledFields, pageInfo);\n // Maps answers to percentage-based page coordinates\n ```\n </Tab>\n</Tabs>\n\n## Lookup fill\n\nFill fields by looking up values from existing documents:\n\n```typescript\n\nconst prompt = buildLookupFillPrompt(fields, documents);\n```"
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"slug": "application/pdf-operations",
|
|
117
|
+
"title": "PDF Operations",
|
|
118
|
+
"description": "Fill AcroForm fields and overlay text on flat PDFs",
|
|
119
|
+
"content": "CL SDK provides two modes for writing data onto PDFs using `pdf-lib`: **AcroForm filling** for fillable PDFs and **text overlay** for flat/scanned PDFs.\n\n## AcroForm fields\n\n### Detect fields\n\nEnumerate all fillable form fields in a PDF:\n\n```typescript\n\nconst pdfDoc = await PDFDocument.load(pdfBytes);\nconst fields = getAcroFormFields(pdfDoc);\n\n// [\n// { name: \"insured_name\", type: \"text\" },\n// { name: \"agree_terms\", type: \"checkbox\" },\n// { name: \"state\", type: \"dropdown\", options: [\"CA\", \"NY\", \"TX\"] },\n// ]\n```\n\nReturns an empty array if the PDF has no AcroForm.\n\n**Field types:** `text`, `checkbox`, `dropdown`, `radio`\n\n### Fill and flatten\n\nFill AcroForm fields by name and flatten the form (makes fields non-editable):\n\n```typescript\n\nconst mappings: FieldMapping[] = [\n { acroFormName: \"insured_name\", value: \"Acme Corp\" },\n { acroFormName: \"effective_date\", value: \"01/01/2025\" },\n { acroFormName: \"agree_terms\", value: \"yes\" }, // checks the checkbox\n { acroFormName: \"state\", value: \"CA\" }, // selects dropdown option\n];\n\nconst filledPdfBytes = await fillAcroForm(pdfBytes, mappings);\n```\n\n**Checkbox values:** `\"yes\"`, `\"true\"`, `\"x\"`, `\"checked\"`, `\"on\"` check the box. Anything else unchecks it.\n\nFields not found in the PDF or values not in dropdown/radio options are silently skipped.\n\n## Text overlay\n\nFor flat or scanned PDFs without form fields, overlay text at specific coordinates:\n\n```typescript\n\nconst overlays: TextOverlay[] = [\n {\n page: 0, // 0-indexed page number\n x: 25.5, // percentage from left edge (0-100)\n y: 15.2, // percentage from top edge (0-100)\n text: \"Acme Corp\",\n fontSize: 10,\n },\n {\n page: 0,\n x: 60,\n y: 30,\n text: \"\",\n isCheckmark: true, // Draws an \"X\" instead of text\n fontSize: 12,\n },\n];\n\nconst overlaidPdfBytes = await overlayTextOnPdf(pdfBytes, overlays);\n```\n\n### Coordinate system\n\n- **`x`** — percentage from the left edge (0 = left, 100 = right)\n- **`y`** — percentage from the top edge (0 = top, 100 = bottom)\n- Coordinates are converted internally to pdf-lib's bottom-left origin system\n\n### TextOverlay type\n\n```typescript\ninterface TextOverlay {\n page: number; // 0-indexed page number\n x: number; // percentage from left (0-100)\n y: number; // percentage from top (0-100)\n text: string;\n fontSize?: number; // default: 10\n isCheckmark?: boolean;\n}\n```\n\nOverlays for pages that don't exist are silently skipped."
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"slug": "changelog",
|
|
123
|
+
"title": "Changelog",
|
|
124
|
+
"description": "Release history for @claritylabs/cl-sdk",
|
|
125
|
+
"content": "All notable changes to `@claritylabs/cl-sdk` are documented here.\n\n## v1.0.0 — 2026-03-22\n\nFirst stable release under the `@claritylabs` scope on public npm.\n\n### Changed\n\n- Renamed from `@claritylabs-inc/cl-sdk` to `@claritylabs/cl-sdk`\n- Moved from GitHub Packages to public npm registry\n- MCP server published as `@claritylabs/cl-sdk-mcp` on npm\n- Consistent branding: \"CL SDK\" in prose, \"CL-1.0 SDK\" for versioned references\n\n## v0.2.5 — 2026-03-16\n\nCI pipeline fix for the docs repo dispatch workflow.\n\n### Changed\n\n- Updated `publish.yml` to use GitHub App tokens for cross-repo dispatch\n\n## v0.2.4 — 2026-03-16\n\nAgent name is now configurable instead of being hardcoded.\n\n### Changed\n\n- `AgentContext.agentName` is now optional — defaults to `\"CL Agent\"` if not set, replacing the previously hardcoded `\"Clarity Agent\"`\n\n## v0.2.3 — 2026-03-16\n\nDocument link guidance in agent prompts is now configurable.\n\n### Added\n\n- `AgentContext.linkGuidance` — optional custom link guidance string for agent prompts, replacing hardcoded policy/quote link examples\n\n## v0.2.2 — 2026-03-15\n\nCI fix for semantic-release permissions.\n\n### Changed\n\n- Added `issues` and `pull-requests` write permissions to the publish workflow for semantic-release\n\n## v0.2.1 — 2026-03-15\n\nCI fix for Node.js version compatibility.\n\n### Changed\n\n- Bumped Node.js version to 22 in CI for semantic-release v25 compatibility\n\n## v0.2.0 — 2026-03-15\n\nMajor refactor: provider-agnostic models, composable agent prompts, and a restructured type system.\n\n### Added\n\n- Provider-agnostic extraction pipeline via Vercel AI SDK `LanguageModel` instances (replaces hardcoded Anthropic)\n- Composable agent prompt modules (`buildAgentSystemPrompt`, `buildIdentityPrompt`, `buildIntentPrompt`, `buildFormattingPrompt`, `buildSafetyPrompt`, `buildCoverageGapPrompt`, `buildCoiRoutingPrompt`, `buildCustomerPoliciesPrompt`)\n- Platform and intent model (`AgentPlatform`, `AgentIntent`)\n- Agent tool definitions (`getAgentTools`)\n- Structured type system with `ExtractionResult`, `ClassificationResult`, `ApplicationResult`\n- Automated versioning via semantic-release\n\n### Changed\n\n- Models are now passed as `LanguageModel` instances instead of string identifiers\n- Agent prompt generation is now composable — individual modules can be used independently\n- Type exports reorganized under structured namespaces\n\n## v0.1.1 — 2026-03-14\n\nPatch release with no functional changes.\n\n## v0.1.0 — 2026-03-14\n\nInitial release of `@claritylabs/cl-sdk`.\n\n### Added\n\n- Document classification (`classifyDocumentType`)\n- PDF extraction pipeline (`extractFromPdf`, `extractQuoteFromPdf`)\n- Application result processing\n- PDF operations (merge, split, extract pages)\n- TypeScript type definitions for insurance domain models\n- CI publishing pipeline"
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"slug": "extraction/applying-results",
|
|
129
|
+
"title": "Applying Results",
|
|
130
|
+
"description": "Map raw extraction output to structured, persistence-ready fields",
|
|
131
|
+
"content": "The extraction pipeline returns raw JSON from the AI model. The `applyExtracted` and `applyExtractedQuote` functions normalize this into clean, typed fields ready for persistence.\n\n## Policies\n\n```typescript\n\nconst { extracted } = await extractFromPdf(pdfBase64);\nconst fields = applyExtracted(extracted);\n```\n\n### Output fields\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `carrier` | `string` | Insurance carrier name |\n| `security` | `string?` | Security/surplus lines carrier |\n| `underwriter` | `string?` | Underwriter name |\n| `mga` | `string?` | Managing General Agent |\n| `broker` | `string?` | Broker name |\n| `policyNumber` | `string` | Policy number |\n| `policyTypes` | `string[]` | Types (e.g., `[\"General Liability\"]`) |\n| `documentType` | `\"policy\" \\| \"quote\"` | Document type |\n| `policyYear` | `number` | Policy year |\n| `effectiveDate` | `string` | Policy effective date |\n| `expirationDate` | `string` | Policy expiration date |\n| `isRenewal` | `boolean` | Whether this is a renewal |\n| `coverages` | `Coverage[]` | Coverage details with limits |\n| `premium` | `string?` | Total premium |\n| `insuredName` | `string` | Name of the insured |\n| `summary` | `string?` | AI-generated summary |\n| `metadataSource` | `object?` | Raw metadata from pass 1 |\n| `document` | `object?` | Sections and supplementary data from pass 2-3 |\n| `extractionStatus` | `\"complete\"` | Always `\"complete\"` |\n| `extractionError` | `\"\"` | Always empty string |\n\n## Quotes\n\n```typescript\n\nconst { extracted } = await extractQuoteFromPdf(pdfBase64);\nconst fields = applyExtractedQuote(extracted);\n```\n\n### Additional quote fields\n\n| Field | Type | Description |\n|-------|------|-------------|\n| `quoteNumber` | `string` | Quote number |\n| `quoteYear` | `number` | Quote year |\n| `proposedEffectiveDate` | `string?` | Proposed effective date |\n| `proposedExpirationDate` | `string?` | Proposed expiration date |\n| `quoteExpirationDate` | `string?` | When the quote expires |\n| `premiumBreakdown` | `PremiumLine[]?` | Line-by-line premium |\n| `subjectivities` | `Subjectivity[]?` | Binding conditions |\n| `underwritingConditions` | `UnderwritingCondition[]?` | Carrier requirements |\n\nQuote coverages use `proposedLimit` and `proposedDeductible` instead of `limit` and `deductible`.\n\n## Null handling\n\nBoth functions use `sanitizeNulls` internally to convert `null` values to `undefined`. This is required for frameworks like Convex that reject `null` for optional fields. All coverages, metadata sources, and document sections are sanitized recursively."
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
"slug": "extraction/classification",
|
|
135
|
+
"title": "Document Classification",
|
|
136
|
+
"description": "Classify insurance documents as policies or quotes",
|
|
137
|
+
"content": "Before running the full extraction pipeline, CL SDK can classify a document to determine whether it's a policy or a quote. This drives which extraction flow to use.\n\n## Basic usage\n\n```typescript\n\nconst { documentType, confidence, signals } = await classifyDocumentType(pdfBase64);\n\nconsole.log(documentType); // \"policy\" | \"quote\"\nconsole.log(confidence); // 0.95\nconsole.log(signals); // [\"declarations_page\", \"policy_number_present\", ...]\n```\n\n## Options\n\n```typescript\nconst result = await classifyDocumentType(pdfBase64, {\n log: async (msg) => console.log(msg),\n models: customModelConfig,\n});\n```\n\n| Option | Type | Description |\n|--------|------|-------------|\n| `log` | `LogFn` | Callback for progress logging |\n| `models` | `ModelConfig` | Custom model configuration. Uses `classification` role. |\n\n## How it works\n\nClassification is **Pass 0** in the pipeline. It uses the `classification` model (Claude Haiku by default) to analyze the PDF and return:\n\n- **`documentType`** — `\"policy\"` or `\"quote\"`. Defaults to `\"policy\"` on parse failure.\n- **`confidence`** — numeric confidence score (0-1). Defaults to 0.5 on parse failure.\n- **`signals`** — array of strings describing what signals the model detected.\n\n## Routing extraction\n\nA typical flow uses classification to choose the right extraction function:\n\n```typescript\n classifyDocumentType,\n extractFromPdf,\n extractQuoteFromPdf,\n applyExtracted,\n applyExtractedQuote,\n} from \"@claritylabs/cl-sdk\";\n\nconst { documentType } = await classifyDocumentType(pdfBase64);\n\nif (documentType === \"quote\") {\n const { extracted } = await extractQuoteFromPdf(pdfBase64);\n return applyExtractedQuote(extracted);\n} else {\n const { extracted } = await extractFromPdf(pdfBase64);\n return applyExtracted(extracted);\n}\n```"
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
"slug": "extraction/pipeline",
|
|
141
|
+
"title": "Extraction Pipeline",
|
|
142
|
+
"description": "Turn insurance PDFs into structured data with the multi-pass extraction pipeline",
|
|
143
|
+
"content": "CL SDK's extraction pipeline processes insurance documents in multiple passes, producing structured data with page-level provenance.\n\n## Policy extraction\n\n`extractFromPdf` runs the full pipeline (passes 1-3) for policy documents:\n\n```typescript\n\nconst { rawText, extracted } = await extractFromPdf(pdfBase64, {\n log: async (msg) => console.log(msg),\n onMetadata: async (raw) => {\n // Save metadata immediately — survives if pass 2 fails\n await db.saveMetadata(docId, raw);\n },\n});\n\nconst fields = applyExtracted(extracted);\n```\n\n### What gets extracted\n\n**Pass 1 — Metadata:**\n- Carrier, security, underwriter, MGA, broker\n- Policy number, effective/expiration dates, policy year\n- Premium, insured name, policy types\n- Coverage table (name, limit, deductible, page number)\n\n**Pass 2 — Sections:**\n- Structured sections with title, page range, type, content\n- Subsections with section numbers\n- Coverage type classification per section\n\n**Pass 3 — Enrichment:**\n- Regulatory context (structured)\n- Complaint contacts\n- Costs and fees\n- Claims contacts\n\n## Quote extraction\n\n`extractQuoteFromPdf` runs a quote-specific pipeline (passes 1-2):\n\n```typescript\n\nconst { extracted } = await extractQuoteFromPdf(pdfBase64);\nconst fields = applyExtractedQuote(extracted);\n```\n\nIn addition to standard metadata, quotes extract:\n- **Premium breakdown** — `[{ line: \"GL\", amount: \"$5,200\" }]`\n- **Subjectivities** — conditions for binding\n- **Underwriting conditions** — carrier requirements\n- **Proposed dates** — effective, expiration, quote expiration\n\n## Retrying sections\n\nIf metadata succeeded but sections failed, retry just pass 2 without re-extracting metadata:\n\n```typescript\n\nconst { extracted } = await extractSectionsOnly(pdfBase64, savedMetadataRaw, {\n log: async (msg) => console.log(msg),\n});\n```\n\n## Chunking strategy\n\nDocuments are split into page chunks for section extraction. CL SDK uses an adaptive strategy:\n\n1. Start with **15-page chunks**\n2. On JSON parse failure (output truncation), re-split to **10-page chunks**\n3. If still failing, re-split to **5-page chunks**\n4. If all sizes fail, escalate to the `sectionsFallback` model with higher token limits\n\n```typescript\n\nconst chunks = getPageChunks(45, 15);\n// [[1, 15], [16, 30], [31, 45]]\n```\n\n## Merging results\n\nAfter chunked extraction, results are merged:\n\n```typescript\n\n// Policies\nconst merged = mergeChunkedSections(metadataResult, sectionChunks);\n\n// Quotes (also merges subjectivities + underwriting conditions)\nconst quoteMerged = mergeChunkedQuoteSections(metadataResult, sectionChunks);\n```\n\n## Early persistence\n\nThe `onMetadata` callback fires after pass 1 completes, before sections extraction begins. This ensures metadata is persisted even if pass 2 fails:\n\n```typescript\nconst { extracted } = await extractFromPdf(pdfBase64, {\n onMetadata: async (raw) => {\n const parsed = JSON.parse(raw);\n await db.patch(docId, {\n carrier: parsed.metadata.carrier,\n extractionStatus: \"metadata_complete\",\n });\n },\n});\n```\n\n## Utility functions\n\n### `stripFences(text)`\n\nRemoves markdown code fences from AI responses before JSON parsing:\n\n```typescript\n\nstripFences('```json\\n{\"key\": \"value\"}\\n```');\n// '{\"key\": \"value\"}'\n```\n\n### `sanitizeNulls(obj)`\n\nRecursively converts `null` values to `undefined`. Required for frameworks like Convex that reject `null` for optional fields:\n\n```typescript\n\nsanitizeNulls({ a: null, b: [null, 1] });\n// { a: undefined, b: [undefined, 1] }\n```"
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"slug": "getting-started/architecture",
|
|
147
|
+
"title": "Architecture",
|
|
148
|
+
"description": "How CL SDK's multi-pass extraction pipeline and composable systems work",
|
|
149
|
+
"content": "CL SDK is organized into four systems: **document extraction**, **application processing**, **agent prompts**, and **PDF operations**. Each is independent — import only what you need.\n\n## Document extraction pipeline\n\nThe core of CL SDK is a multi-pass pipeline that turns insurance PDFs into structured, queryable data.\n\n```mermaid\nflowchart LR\n PDF[PDF Document] --> P0[Pass 0: Classification]\n P0 --> P1[Pass 1: Metadata]\n P1 --> P2[Pass 2: Sections]\n P2 --> P3[Pass 3: Enrichment]\n P3 --> OUT[Structured Data]\n```\n\n### Pass 0 — Classification\n\nDetermines whether a document is a **policy** or a **quote**. Returns document type, confidence score, and supporting signals. Uses the `classification` model (fast/cheap).\n\n### Pass 1 — Metadata\n\nExtracts high-level metadata: carrier, policy/quote number, dates, premium, insured name, and a coverage table with limits and deductibles. Uses the `metadata` model (capable).\n\nSupports an `onMetadata` callback for early persistence — metadata is saved immediately so it survives downstream failures.\n\n### Pass 2 — Sections\n\nSplits the document into page chunks (starting at 15 pages) and extracts structured sections with page-level provenance. Uses the `sections` model.\n\n**Adaptive fallback**: if a chunk's output is truncated (JSON parse failure), CL SDK re-splits into smaller chunks (10 pages, then 5), and escalates to the `sectionsFallback` model with higher token limits. Results are merged across all chunks.\n\n### Pass 3 — Enrichment\n\nA non-fatal pass that parses raw text into structured supplementary fields: regulatory context, complaint contacts, costs and fees, claims contacts. Uses the `enrichment` model. Failures here don't fail the pipeline.\n\n## Quote-specific extraction\n\nQuotes run a variant pipeline (passes 1-2) that also extracts:\n\n- **Premium breakdowns** — line-by-line premium details\n- **Subjectivities** — conditions that must be met before binding\n- **Underwriting conditions** — carrier requirements\n\n## Application processing\n\nCL SDK provides prompt builders for the full application lifecycle:\n\n```mermaid\nflowchart LR\n D[Detection] --> F[Field Extraction]\n F --> A[Auto-Fill]\n A --> Q[Question Batching]\n Q --> P[Answer Parsing]\n P --> PDF[PDF Filling]\n```\n\n1. **Detection** — classify whether a PDF is an insurance application form\n2. **Field extraction** — read every field as structured data (text, numeric, currency, date, yes/no, table, declaration)\n3. **Auto-fill** — match fields against known business context\n4. **Question batching** — organize unfilled fields into topic-based batches\n5. **Answer parsing** — parse free-text replies into structured field values\n6. **PDF filling** — write answers back onto the PDF (AcroForm or text overlay)\n\n## Agent prompt system\n\nA composable system for building insurance-aware conversational agents:\n\n```\nbuildAgentSystemPrompt(ctx)\n ├── Identity — agent name, company context\n ├── Intent — direct / mediated / observed behavior\n ├── Formatting — platform-specific output rules\n ├── Safety — scope guardrails, anti-hallucination\n ├── Coverage gaps — gap detection guidance\n ├── COI routing — certificate of insurance handling\n ├── Quotes/policies — document type differentiation\n └── Memory — cross-conversation continuity\n```\n\nEach module is independently importable for custom composition. The system supports five platforms (email, chat, SMS, Slack, Discord) and three communication intents (direct, mediated, observed).\n\n## Design principles\n\n- **Provider-agnostic** — accepts any `LanguageModel` from the Vercel AI SDK. Default Anthropic models are lazy-loaded and optional.\n- **Pure TypeScript** — no framework dependencies. Works in Node.js, Deno, edge runtimes.\n- **Fail gracefully** — early persistence callbacks, non-fatal enrichment, adaptive chunk retry.\n- **Schema-only tools** — tool definitions provide schemas without implementations, so consumers control execution."
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"slug": "getting-started/models",
|
|
153
|
+
"title": "Model Configuration",
|
|
154
|
+
"description": "Configure which AI models run each pipeline pass",
|
|
155
|
+
"content": "CL SDK's extraction pipeline uses different models for different passes. You can use the defaults, a single model for everything, or assign models per role.\n\n## Default configuration\n\nIf you don't pass a `models` option, CL SDK uses Anthropic defaults via `createDefaultModelConfig()`:\n\n| Role | Default Model | Purpose |\n|------|--------------|---------|\n| `classification` | Claude Haiku 4.5 | Fast document type detection |\n| `metadata` | Claude Sonnet 4.6 | Accurate metadata extraction |\n| `sections` | Claude Haiku 4.5 | Chunked section extraction |\n| `sectionsFallback` | Claude Sonnet 4.6 | Retry when sections truncate |\n| `enrichment` | Claude Haiku 4.5 | Supplementary field parsing |\n\n<Callout type=\"info\">\n Default models require `@ai-sdk/anthropic` as a peer dependency. It's lazy-imported — consumers using other providers never need it installed.\n</Callout>\n\n## Uniform model\n\nUse the same model for every pass:\n\n```typescript\n\nconst anthropic = createAnthropic();\nconst { extracted } = await extractFromPdf(pdfBase64, {\n models: createUniformModelConfig(anthropic(\"claude-sonnet-4-6\")),\n});\n```\n\n## Fine-grained configuration\n\nAssign a different model to each pipeline role:\n\n```typescript\n\nconst anthropic = createAnthropic();\nconst models: ModelConfig = {\n classification: anthropic(\"claude-haiku-4-5-20251001\"),\n metadata: anthropic(\"claude-sonnet-4-6\"),\n sections: anthropic(\"claude-haiku-4-5-20251001\"),\n sectionsFallback: anthropic(\"claude-sonnet-4-6\"),\n enrichment: anthropic(\"claude-haiku-4-5-20251001\"),\n};\n\nconst { extracted } = await extractFromPdf(pdfBase64, { models });\n```\n\n## Non-Anthropic providers\n\nCL SDK works with any provider that implements the Vercel AI SDK `LanguageModel` interface:\n\n<Tabs items={[\"OpenAI\", \"Google\"]}>\n <Tab value=\"OpenAI\">\n ```typescript\n import { createOpenAI } from \"@ai-sdk/openai\";\n import { extractFromPdf, createUniformModelConfig } from \"@claritylabs/cl-sdk\";\n\n const openai = createOpenAI();\n const { extracted } = await extractFromPdf(pdfBase64, {\n models: createUniformModelConfig(openai(\"gpt-4o\")),\n metadataProviderOptions: {}, // disable Anthropic-specific thinking\n });\n ```\n </Tab>\n <Tab value=\"Google\">\n ```typescript\n import { createGoogleGenerativeAI } from \"@ai-sdk/google\";\n import { extractFromPdf, createUniformModelConfig } from \"@claritylabs/cl-sdk\";\n\n const google = createGoogleGenerativeAI();\n const { extracted } = await extractFromPdf(pdfBase64, {\n models: createUniformModelConfig(google(\"gemini-2.0-flash\")),\n metadataProviderOptions: {},\n });\n ```\n </Tab>\n</Tabs>\n\n<Callout type=\"warn\">\n When using non-Anthropic providers, set `metadataProviderOptions: {}` and `fallbackProviderOptions: {}` to disable Anthropic-specific extended thinking, which is enabled by default.\n</Callout>\n\n## Token limits\n\nCL SDK sets per-role token limits based on the task, not the provider:\n\n| Role | Max Output Tokens |\n|------|------------------|\n| `classification` | 512 |\n| `metadata` | 4,096 |\n| `sections` | 8,192 |\n| `sectionsFallback` | 16,384 |\n| `enrichment` | 4,096 |\n\nThese are exported as `MODEL_TOKEN_LIMITS` for reference but are managed internally by the pipeline.\n\n## Provider options\n\nThe `metadataProviderOptions` and `fallbackProviderOptions` fields pass provider-specific configuration (like Anthropic extended thinking) through to the Vercel AI SDK:\n\n```typescript\nconst { extracted } = await extractFromPdf(pdfBase64, {\n metadataProviderOptions: {\n anthropic: { thinking: { type: \"enabled\", budgetTokens: 8192 } },\n },\n});\n```\n\nBy default, Anthropic thinking is enabled with a 4,096 token budget for metadata and fallback calls."
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
"slug": "getting-started/quickstart",
|
|
159
|
+
"title": "Quickstart",
|
|
160
|
+
"description": "Install CL SDK and run your first insurance document extraction",
|
|
161
|
+
"content": "## Installation\n\nInstall CL SDK and its peer dependencies:\n\n```bash\nnpm install @claritylabs/cl-sdk ai pdf-lib\n```\n\nThen install a model provider:\n\n```bash\n# Anthropic (default)\nnpm install @ai-sdk/anthropic\n\n# OpenAI\nnpm install @ai-sdk/openai\n\n# Google\nnpm install @ai-sdk/google\n```\n\n<Callout type=\"info\">\n CL SDK is published on [npm](https://www.npmjs.com/package/@claritylabs/cl-sdk) under the `@claritylabs` scope.\n</Callout>\n\n## Extract a policy\n\nThe simplest path — default Anthropic models, no configuration:\n\n```typescript\n\n// Load a PDF as base64\nconst pdfBase64 = readFileSync(\"./policy.pdf\").toString(\"base64\");\n\n// Step 1: Classify — is this a policy or a quote?\nconst { documentType, confidence } = await classifyDocumentType(pdfBase64);\nconsole.log(`Classified as ${documentType} (confidence: ${confidence})`);\n\n// Step 2: Extract — run the full multi-pass pipeline\nconst { extracted } = await extractFromPdf(pdfBase64);\n\n// Step 3: Apply — map raw extraction to structured fields\nconst fields = applyExtracted(extracted);\nconsole.log(fields.carrier); // \"Hartford\"\nconsole.log(fields.policyNumber); // \"GL-2024-001234\"\nconsole.log(fields.coverages); // [{ name: \"General Liability\", limit: \"$1,000,000\", ... }]\n```\n\n## Extract a quote\n\nQuotes have a separate pipeline that captures quote-specific fields like subjectivities and premium breakdowns:\n\n```typescript\n\nconst { extracted } = await extractQuoteFromPdf(pdfBase64);\nconst fields = applyExtractedQuote(extracted);\n\nconsole.log(fields.quoteNumber); // \"Q-2024-5678\"\nconsole.log(fields.premiumBreakdown); // [{ line: \"GL\", amount: \"$5,200\" }, ...]\nconsole.log(fields.subjectivities); // [{ description: \"Loss runs required\", ... }]\n```\n\n## Use a custom model\n\nBring any model from any provider:\n\n```typescript\n\nconst openai = createOpenAI();\nconst { extracted } = await extractFromPdf(pdfBase64, {\n models: createUniformModelConfig(openai(\"gpt-4o\")),\n metadataProviderOptions: {}, // disable Anthropic-specific thinking\n});\n```\n\n## Add logging\n\nEvery pipeline function accepts a `log` callback:\n\n```typescript\nconst { extracted } = await extractFromPdf(pdfBase64, {\n log: async (msg) => console.log(`[cl-sdk] ${msg}`),\n});\n```\n\nOutput:\n\n```\n[cl-sdk] Pass 1: Extracting metadata...\n[cl-sdk] Calling model (max 4096 tokens)...\n[cl-sdk] 12450 in / 2300 out tokens (3.2s)\n[cl-sdk] Document: 45 page(s)\n[cl-sdk] Pass 2: Extracting sections pages 1–15...\n...\n```\n\n## Next steps\n\n<Cards>\n <Card title=\"Architecture\" description=\"Learn how the multi-pass pipeline works.\" href=\"/docs/getting-started/architecture\" />\n <Card title=\"Model configuration\" description=\"Fine-tune which model runs each pass.\" href=\"/docs/getting-started/models\" />\n</Cards>"
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
"slug": "index",
|
|
165
|
+
"title": "CL SDK",
|
|
166
|
+
"description": "An SDK for AI agents working with insurance",
|
|
167
|
+
"content": "[Clarity Labs](https://claritylabs.inc) is an applied AI research lab building the infrastructure for AI to work safely with insurance.\n\nCL SDK is an SDK for AI agents working with insurance. Any product or agent can import it to understand, reason about, and act on insurance. Provider-agnostic — works with Anthropic, OpenAI, Google, or any model via the [Vercel AI SDK](https://sdk.vercel.ai).\n\n## Capabilities\n\n<Cards>\n <Card title=\"Document Extraction\" description=\"Multi-pass pipeline that turns insurance PDFs into structured data — coverages, limits, exclusions, sections — with page-level provenance.\" />\n <Card title=\"Agent System\" description=\"Composable prompt modules for building insurance-aware conversational agents across email, chat, SMS, Slack, and Discord.\" />\n <Card title=\"Application Processing\" description=\"Prompt builders for detecting application forms, extracting fields, auto-filling from context, batching questions, and filling PDFs.\" />\n</Cards>\n\n## Install\n\n```bash\nnpm install @claritylabs/cl-sdk ai pdf-lib\n```\n\n```typescript\n\nconst { documentType } = await classifyDocumentType(pdfBase64);\nconst { extracted } = await extractFromPdf(pdfBase64);\nconst fields = applyExtracted(extracted);\n```\n\n## Get started\n\n<Cards>\n <Card title=\"Quickstart\" description=\"Install CL SDK and run your first extraction in minutes.\" href=\"/docs/getting-started/quickstart\" />\n <Card title=\"Architecture\" description=\"Understand the multi-pass extraction pipeline and how components fit together.\" href=\"/docs/getting-started/architecture\" />\n <Card title=\"Model Configuration\" description=\"Configure models per pipeline pass — or bring any provider.\" href=\"/docs/getting-started/models\" />\n <Card title=\"API Reference\" description=\"Full reference for every exported function and type.\" href=\"/docs/api-reference/extraction\" />\n</Cards>"
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"slug": "mcp-server/configuration",
|
|
171
|
+
"title": "Configuration",
|
|
172
|
+
"description": "Configure model providers and API keys for the CL SDK MCP server",
|
|
173
|
+
"content": "## Model configuration\n\nThe extraction tools (`classify_document`, `extract_policy`, `extract_quote`) need a configured model and API key. The server supports two configuration methods: environment variables (simplest) or a config file.\n\n```json\n{\n \"provider\": \"anthropic\",\n \"model\": \"claude-haiku-4-5-20251001\",\n \"apiKey\": \"${ANTHROPIC_API_KEY}\"\n}\n```\n\n### Fields\n\n| Field | Description | Default |\n|-------|-------------|---------|\n| `provider` | AI provider: `anthropic`, `openai`, or `google` | `anthropic` |\n| `model` | Model ID for the provider | `claude-haiku-4-5-20251001` |\n| `apiKey` | API key, supports `${ENV_VAR}` syntax | `${ANTHROPIC_API_KEY}` |\n\n### Environment variable expansion\n\nThe `apiKey` field supports `${ENV_VAR}` syntax. The server resolves these from `process.env` at startup. This lets you keep secrets in your shell environment rather than in the config file.\n\n## Provider examples\n\n<Tabs items={[\"Anthropic\", \"OpenAI\", \"Google\"]}>\n<Tab value=\"Anthropic\">\n```json\n{\n \"provider\": \"anthropic\",\n \"model\": \"claude-haiku-4-5-20251001\",\n \"apiKey\": \"${ANTHROPIC_API_KEY}\"\n}\n```\n\nSet the environment variable:\n\n```bash\nexport ANTHROPIC_API_KEY=sk-ant-...\n```\n</Tab>\n<Tab value=\"OpenAI\">\n```json\n{\n \"provider\": \"openai\",\n \"model\": \"gpt-4o-mini\",\n \"apiKey\": \"${OPENAI_API_KEY}\"\n}\n```\n\n```bash\nexport OPENAI_API_KEY=sk-...\n```\n</Tab>\n<Tab value=\"Google\">\n```json\n{\n \"provider\": \"google\",\n \"model\": \"gemini-2.0-flash\",\n \"apiKey\": \"${GOOGLE_API_KEY}\"\n}\n```\n\n```bash\nexport GOOGLE_API_KEY=AI...\n```\n</Tab>\n</Tabs>\n\n## Environment variables\n\nWithout a config file, the server reads directly from environment variables:\n\n| Variable | Description | Default |\n|----------|-------------|---------|\n| `CL_MCP_PROVIDER` | AI provider: `anthropic`, `openai`, or `google` | `anthropic` |\n| `CL_MCP_MODEL` | Model ID | `claude-haiku-4-5-20251001` |\n| `ANTHROPIC_API_KEY` | Anthropic API key (checked first) | — |\n| `OPENAI_API_KEY` | OpenAI API key (fallback) | — |\n| `GOOGLE_API_KEY` | Google API key (fallback) | — |\n\nThe simplest setup — just export your API key and the defaults work:\n\n```bash\nexport ANTHROPIC_API_KEY=sk-ant-...\n```\n\n<Callout type=\"warn\">\n Documentation tools and pure SDK tools (prompt builders, `apply_extracted`, etc.) work without any model configuration. Only the extraction and classification tools require a valid API key.\n</Callout>\n\n## Uniform model config\n\nThe server uses `createUniformModelConfig()` from the SDK, which assigns the same model to all extraction pipeline roles (classification, metadata, sections, enrichment). For production use with large documents, consider using the SDK directly with per-role model configuration — see [Model configuration](/docs/getting-started/models)."
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"slug": "mcp-server/overview",
|
|
177
|
+
"title": "Overview",
|
|
178
|
+
"description": "Use CL SDK tools and documentation directly from AI coding assistants via MCP",
|
|
179
|
+
"content": "The CL SDK MCP server exposes SDK functions and documentation search as [Model Context Protocol](https://modelcontextprotocol.io/) tools. This lets AI coding assistants like Claude Code, Cursor, and Windsurf call CL SDK functions and search these docs without leaving your editor.\n\n## What it provides\n\n- **Documentation search** — full-text search across all doc pages, read individual pages, list the doc tree\n- **Prompt builders** — generate system prompts, extraction prompts, and application field prompts\n- **Extraction pipeline** — classify documents, extract policies and quotes from PDFs\n- **PDF operations** — list form fields, fill AcroForms, overlay text on flat PDFs\n\n## Quick start\n\nInstall the MCP server package:\n\n```bash\nnpm install -g @claritylabs/cl-sdk-mcp\n```\n\nThen add it to your MCP client. See [Client setup](#client-setup) below.\n\n<Callout type=\"info\">\n Published on [npm](https://www.npmjs.com/package/@claritylabs/cl-sdk-mcp) under the `@claritylabs` scope.\n</Callout>\n\n## Architecture\n\nThe server is a standalone package (`@claritylabs/cl-sdk-mcp`) that bundles the documentation content and depends on `@claritylabs/cl-sdk` for SDK functionality.\n\n```\n┌──────────────────┐ ┌──────────────────────────────────────┐\n│ │ stdio │ @claritylabs/cl-sdk-mcp │\n│ AI Assistant │◄────────►│ │\n│ │ │ ┌───────────────┐ ┌───────────────┐ │\n│ Claude Code │ │ │ Doc tools │ │ SDK tools │ │\n│ Cursor │ │ │ │ │ │ │\n│ Windsurf │ │ │ search_docs │ │ Prompt │ │\n│ ... │ │ │ read_doc_page │ │ builders │ │\n│ │ │ │ list_sections │ │ Extraction │ │\n└──────────────────┘ │ │ │ │ PDF ops │ │\n │ └───────┬───────┘ └───────┬───────┘ │\n │ │ │ │\n │ ▼ ▼ │\n │ ┌───────────────┐ ┌───────────────┐ │\n │ │ docs-bundle │ │ @claritylabs │ │\n │ │ .json │ │ /cl-sdk │ │\n │ └───────────────┘ └───────────────┘ │\n └──────────────────────────────────────┘\n```\n\n1. Documentation is bundled as `docs-bundle.json` at publish time — no filesystem access needed\n2. SDK functions are imported from `@claritylabs/cl-sdk` (installed as a dependency)\n3. Model config is read from `mcp-config.json` or environment variables\n4. Communicates via `StdioServerTransport`\n\n## Client setup\n\n### Claude Code\n\nAdd to your project's `.claude/mcp.json` or global `~/.claude/mcp.json`:\n\n```json\n{\n \"mcpServers\": {\n \"cl-sdk\": {\n \"command\": \"npx\",\n \"args\": [\"@claritylabs/cl-sdk-mcp\"]\n }\n }\n}\n```\n\nIf you're working in the `cl-sdk-docs` repo directly, the `.claude/mcp.json` in the repo auto-registers it.\n\n### Cursor / Windsurf\n\nAdd to your MCP settings (usually `~/.cursor/mcp.json` or equivalent):\n\n```json\n{\n \"mcpServers\": {\n \"cl-sdk\": {\n \"command\": \"npx\",\n \"args\": [\"@claritylabs/cl-sdk-mcp\"]\n }\n }\n}\n```\n\n### Other MCP clients\n\nAny client that supports stdio servers:\n\n```bash\nnpx @claritylabs/cl-sdk-mcp\n```\n\n<Cards>\n <Card title=\"Configuration\" description=\"Set up model providers and API keys.\" href=\"/docs/mcp-server/configuration\" />\n <Card title=\"Tools reference\" description=\"Full list of all 15 MCP tools.\" href=\"/docs/mcp-server/tools\" />\n</Cards>"
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"slug": "mcp-server/tools",
|
|
183
|
+
"title": "Tools Reference",
|
|
184
|
+
"description": "Complete reference for all 15 MCP tools provided by the CL SDK server",
|
|
185
|
+
"content": "## Documentation tools\n\nThese tools search and read the CL SDK documentation. They require no model configuration.\n\n### `list_doc_sections`\n\nList all documentation sections and their pages.\n\n**Input**: none\n\n**Output**: JSON array of sections with `title`, `slug`, and `pages`.\n\n```json\n[\n { \"title\": \"Getting Started\", \"slug\": \"getting-started\", \"pages\": [\"quickstart\", \"architecture\", \"models\"] },\n { \"title\": \"Document Extraction\", \"slug\": \"extraction\", \"pages\": [\"pipeline\", \"classification\", \"applying-results\"] }\n]\n```\n\n### `search_docs`\n\nFull-text search across all MDX documentation pages. Returns top 5 matches ranked by relevance.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `query` | string | yes | Search query |\n| `section` | string | no | Limit to a section slug (e.g. `extraction`) |\n\n**Output**: Ranked results with title, slug, score, and a text excerpt around the match.\n\n### `read_doc_page`\n\nRead a specific documentation page by its slug.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `slug` | string | yes | Page slug (e.g. `getting-started/quickstart`) |\n\n**Output**: The page title and full MDX content.\n\n---\n\n## SDK prompt builders\n\nPure functions that return prompt strings. No model or API key needed.\n\n### `build_agent_system_prompt`\n\nGenerate an insurance-aware agent system prompt. Wraps `buildAgentSystemPrompt(ctx)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `platform` | `\"email\"` \\| `\"chat\"` \\| `\"sms\"` \\| `\"slack\"` \\| `\"discord\"` | yes | Communication platform |\n| `intent` | `\"direct\"` \\| `\"mediated\"` \\| `\"observed\"` | yes | Communication intent |\n| `siteUrl` | string | yes | Company website URL |\n| `companyName` | string | no | Company name |\n| `companyContext` | string | no | Company context details |\n| `userName` | string | no | Recipient name |\n| `coiHandling` | `\"broker\"` \\| `\"user\"` \\| `\"member\"` \\| `\"ignore\"` | no | COI routing strategy |\n| `brokerName` | string | no | Broker company name |\n| `brokerContactName` | string | no | Broker contact name |\n| `brokerContactEmail` | string | no | Broker contact email |\n\n### `build_field_extraction_prompt`\n\nGet the application field extraction prompt. Wraps `buildFieldExtractionPrompt()`.\n\n**Input**: none\n\n### `build_auto_fill_prompt`\n\nGenerate an auto-fill prompt for application fields. Wraps `buildAutoFillPrompt(fields, orgContext)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `fields` | `{ id, label, fieldType, section }[]` | yes | Application fields |\n| `orgContext` | `{ key, value, category }[]` | yes | Organization context |\n\n### `build_question_batch_prompt`\n\nGenerate a batched question prompt for unfilled fields. Wraps `buildQuestionBatchPrompt(unfilledFields)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `unfilledFields` | array | yes | Fields with `id`, `fieldType`, `section`, `required`, and optional `label`, `text`, `condition` |\n\n---\n\n## Data transformation tools\n\nMap raw extraction output to structured fields. No model needed.\n\n### `apply_extracted`\n\nMap raw policy extraction JSON to structured fields. Wraps `applyExtracted(extracted)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `extracted` | any | yes | Raw JSON from `extractFromPdf` |\n\n**Output**: Structured object with `carrier`, `policyNumber`, `coverages`, `premium`, `effectiveDate`, etc.\n\n### `apply_extracted_quote`\n\nMap raw quote extraction JSON to structured fields. Wraps `applyExtractedQuote(extracted)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `extracted` | any | yes | Raw JSON from `extractQuoteFromPdf` |\n\n**Output**: Structured object with `carrier`, `quoteNumber`, `premiumBreakdown`, `subjectivities`, etc.\n\n---\n\n## Extraction tools\n\nThese call the LLM-powered extraction pipeline. Requires a [configured model and API key](/docs/mcp-server/configuration).\n\n<Callout type=\"warn\">\n These tools make API calls to your configured provider. Each extraction may use multiple LLM calls (multi-pass pipeline). Monitor usage accordingly.\n</Callout>\n\n### `classify_document`\n\nClassify a PDF as a policy or quote. Wraps `classifyDocumentType(pdfBase64, options)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `pdfBase64` | string | yes | Base64-encoded PDF |\n\n**Output**: `{ documentType: \"policy\" | \"quote\", confidence: number, signals: string[] }`\n\n### `extract_policy`\n\nFull multi-pass policy extraction. Wraps `extractFromPdf(pdfBase64, options)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `pdfBase64` | string | yes | Base64-encoded PDF |\n\n**Output**: `{ rawText: string, extracted: any }` — pass `extracted` to `apply_extracted` for structured fields.\n\n### `extract_quote`\n\nFull multi-pass quote extraction. Wraps `extractQuoteFromPdf(pdfBase64, options)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `pdfBase64` | string | yes | Base64-encoded PDF |\n\n**Output**: `{ rawText: string, extracted: any }` — pass `extracted` to `apply_extracted_quote` for structured fields.\n\n---\n\n## PDF tools\n\nDirect PDF operations. No model needed.\n\n### `get_acro_form_fields`\n\nList all fillable AcroForm fields in a PDF. Wraps `getAcroFormFields(pdfDoc)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `pdfBase64` | string | yes | Base64-encoded PDF |\n\n**Output**: Array of `{ name, type, options? }` where type is `\"text\"`, `\"checkbox\"`, `\"dropdown\"`, or `\"radio\"`.\n\n### `fill_acro_form`\n\nFill AcroForm fields and return the flattened PDF. Wraps `fillAcroForm(pdfBytes, mappings)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `pdfBase64` | string | yes | Base64-encoded PDF |\n| `mappings` | `{ acroFormName, value }[]` | yes | Field name to value mappings |\n\n**Output**: Base64-encoded PDF with fields filled and flattened.\n\n### `overlay_text_on_pdf`\n\nOverlay text at specified coordinates on a flat PDF. Wraps `overlayTextOnPdf(pdfBytes, overlays)`.\n\n**Input**:\n\n| Parameter | Type | Required | Description |\n|-----------|------|----------|-------------|\n| `pdfBase64` | string | yes | Base64-encoded PDF |\n| `overlays` | array | yes | Each with `page` (0-indexed), `x`, `y`, `text`, optional `fontSize` and `isCheckmark` |\n\n**Output**: Base64-encoded PDF with text overlaid."
|
|
186
|
+
}
|
|
187
|
+
]
|
|
188
|
+
}
|
package/mcp-config.json
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@claritylabs/cl-sdk-mcp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "MCP server for CL SDK — search docs, build prompts, extract insurance PDFs",
|
|
5
|
+
"bin": {
|
|
6
|
+
"cl-sdk-mcp": "./server.ts"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"server.ts",
|
|
10
|
+
"docs-bundle.json",
|
|
11
|
+
"mcp-config.json",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build-docs": "npx tsx build-docs.ts",
|
|
16
|
+
"prepublishOnly": "npx tsx build-docs.ts"
|
|
17
|
+
},
|
|
18
|
+
"dependencies": {
|
|
19
|
+
"@ai-sdk/anthropic": "^3.0.58",
|
|
20
|
+
"@ai-sdk/google": "^3.0.52",
|
|
21
|
+
"@ai-sdk/openai": "^3.0.47",
|
|
22
|
+
"@claritylabs/cl-sdk": "^0.3.0",
|
|
23
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
24
|
+
"ai": "^6.0.116",
|
|
25
|
+
"pdf-lib": "^1.17.1",
|
|
26
|
+
"zod": "^4.3.6"
|
|
27
|
+
},
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"gray-matter": "^4.0.3",
|
|
30
|
+
"tsx": "^4.21.0",
|
|
31
|
+
"typescript": "^5.8.0"
|
|
32
|
+
},
|
|
33
|
+
"publishConfig": {
|
|
34
|
+
"registry": "https://registry.npmjs.org",
|
|
35
|
+
"access": "public"
|
|
36
|
+
},
|
|
37
|
+
"repository": {
|
|
38
|
+
"type": "git",
|
|
39
|
+
"url": "https://github.com/claritylabs-inc/cl-sdk-docs.git",
|
|
40
|
+
"homepage": "https://www.npmjs.com/package/@claritylabs/cl-sdk",
|
|
41
|
+
"directory": "mcp"
|
|
42
|
+
},
|
|
43
|
+
"license": "MIT"
|
|
44
|
+
}
|
package/server.ts
ADDED
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import * as fs from "fs";
|
|
6
|
+
import * as path from "path";
|
|
7
|
+
import { PDFDocument } from "pdf-lib";
|
|
8
|
+
import type { LanguageModel } from "ai";
|
|
9
|
+
|
|
10
|
+
// SDK imports
|
|
11
|
+
import {
|
|
12
|
+
buildAgentSystemPrompt,
|
|
13
|
+
buildFieldExtractionPrompt,
|
|
14
|
+
buildAutoFillPrompt,
|
|
15
|
+
buildQuestionBatchPrompt,
|
|
16
|
+
applyExtracted,
|
|
17
|
+
applyExtractedQuote,
|
|
18
|
+
classifyDocumentType,
|
|
19
|
+
extractFromPdf,
|
|
20
|
+
extractQuoteFromPdf,
|
|
21
|
+
getAcroFormFields,
|
|
22
|
+
fillAcroForm,
|
|
23
|
+
overlayTextOnPdf,
|
|
24
|
+
createUniformModelConfig,
|
|
25
|
+
} from "@claritylabs/cl-sdk";
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Docs bundle
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
interface DocPage {
|
|
32
|
+
slug: string;
|
|
33
|
+
title: string;
|
|
34
|
+
description: string;
|
|
35
|
+
content: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface DocSection {
|
|
39
|
+
title: string;
|
|
40
|
+
slug: string;
|
|
41
|
+
pages: string[];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
interface DocsBundle {
|
|
45
|
+
sections: DocSection[];
|
|
46
|
+
pages: DocPage[];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function loadDocsBundle(): DocsBundle {
|
|
50
|
+
const bundlePath = path.resolve(__dirname, "docs-bundle.json");
|
|
51
|
+
if (!fs.existsSync(bundlePath)) {
|
|
52
|
+
console.error(
|
|
53
|
+
"Warning: docs-bundle.json not found. Run `npx tsx mcp/build-docs.ts` to generate it.\n" +
|
|
54
|
+
"Doc search/read tools will return empty results."
|
|
55
|
+
);
|
|
56
|
+
return { sections: [], pages: [] };
|
|
57
|
+
}
|
|
58
|
+
return JSON.parse(fs.readFileSync(bundlePath, "utf-8")) as DocsBundle;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const docs = loadDocsBundle();
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Config
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
interface McpConfig {
|
|
68
|
+
provider: string;
|
|
69
|
+
model: string;
|
|
70
|
+
apiKey: string;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function loadConfig(): McpConfig {
|
|
74
|
+
// Check for config file next to server
|
|
75
|
+
const configPath = path.resolve(__dirname, "mcp-config.json");
|
|
76
|
+
try {
|
|
77
|
+
const raw = fs.readFileSync(configPath, "utf-8");
|
|
78
|
+
const cfg = JSON.parse(raw) as McpConfig;
|
|
79
|
+
cfg.apiKey = cfg.apiKey.replace(/\$\{(\w+)\}/g, (_, name) => process.env[name] ?? "");
|
|
80
|
+
return cfg;
|
|
81
|
+
} catch {
|
|
82
|
+
// Fall back to env vars
|
|
83
|
+
return {
|
|
84
|
+
provider: process.env.CL_MCP_PROVIDER ?? "anthropic",
|
|
85
|
+
model: process.env.CL_MCP_MODEL ?? "claude-haiku-4-5-20251001",
|
|
86
|
+
apiKey: process.env.ANTHROPIC_API_KEY ?? process.env.OPENAI_API_KEY ?? process.env.GOOGLE_API_KEY ?? "",
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function createModel(cfg: McpConfig): Promise<LanguageModel> {
|
|
92
|
+
switch (cfg.provider) {
|
|
93
|
+
case "anthropic": {
|
|
94
|
+
const { createAnthropic } = await import("@ai-sdk/anthropic");
|
|
95
|
+
const provider = createAnthropic({ apiKey: cfg.apiKey });
|
|
96
|
+
return provider(cfg.model);
|
|
97
|
+
}
|
|
98
|
+
case "openai": {
|
|
99
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
100
|
+
const provider = createOpenAI({ apiKey: cfg.apiKey });
|
|
101
|
+
return provider(cfg.model);
|
|
102
|
+
}
|
|
103
|
+
case "google": {
|
|
104
|
+
const { createGoogleGenerativeAI } = await import("@ai-sdk/google");
|
|
105
|
+
const provider = createGoogleGenerativeAI({ apiKey: cfg.apiKey });
|
|
106
|
+
return provider(cfg.model);
|
|
107
|
+
}
|
|
108
|
+
default:
|
|
109
|
+
throw new Error(`Unsupported provider: ${cfg.provider}`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
// Doc search helpers
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
function searchDocs(
|
|
118
|
+
query: string,
|
|
119
|
+
section?: string
|
|
120
|
+
): { slug: string; title: string; excerpt: string; score: number }[] {
|
|
121
|
+
const q = query.toLowerCase();
|
|
122
|
+
const wordBoundary = new RegExp(`\\b${q.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i");
|
|
123
|
+
|
|
124
|
+
const results: { slug: string; title: string; excerpt: string; score: number }[] = [];
|
|
125
|
+
|
|
126
|
+
for (const page of docs.pages) {
|
|
127
|
+
if (section && !page.slug.startsWith(section + "/") && page.slug !== section) continue;
|
|
128
|
+
|
|
129
|
+
const lower = page.content.toLowerCase();
|
|
130
|
+
if (!lower.includes(q)) continue;
|
|
131
|
+
|
|
132
|
+
let score = 0;
|
|
133
|
+
let idx = 0;
|
|
134
|
+
while ((idx = lower.indexOf(q, idx)) !== -1) {
|
|
135
|
+
score++;
|
|
136
|
+
idx += q.length;
|
|
137
|
+
}
|
|
138
|
+
const wbMatches = page.content.match(wordBoundary);
|
|
139
|
+
if (wbMatches) score += wbMatches.length * 2;
|
|
140
|
+
if (page.title.toLowerCase().includes(q)) score += 10;
|
|
141
|
+
|
|
142
|
+
const firstIdx = lower.indexOf(q);
|
|
143
|
+
const start = Math.max(0, firstIdx - 100);
|
|
144
|
+
const end = Math.min(page.content.length, firstIdx + q.length + 100);
|
|
145
|
+
const excerpt =
|
|
146
|
+
(start > 0 ? "..." : "") +
|
|
147
|
+
page.content.slice(start, end).trim() +
|
|
148
|
+
(end < page.content.length ? "..." : "");
|
|
149
|
+
|
|
150
|
+
results.push({ slug: page.slug, title: page.title, excerpt, score });
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
results.sort((a, b) => b.score - a.score);
|
|
154
|
+
return results.slice(0, 5);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
// Server
|
|
159
|
+
// ---------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
const server = new McpServer({
|
|
162
|
+
name: "cl-sdk-docs",
|
|
163
|
+
version: "1.0.0",
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// --- Documentation tools ---
|
|
167
|
+
|
|
168
|
+
server.tool(
|
|
169
|
+
"list_doc_sections",
|
|
170
|
+
"List all documentation sections and their pages",
|
|
171
|
+
{},
|
|
172
|
+
async () => {
|
|
173
|
+
return {
|
|
174
|
+
content: [{ type: "text", text: JSON.stringify(docs.sections, null, 2) }],
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
server.tool(
|
|
180
|
+
"search_docs",
|
|
181
|
+
"Full-text search across SDK documentation pages. Returns top 5 matches with context.",
|
|
182
|
+
{
|
|
183
|
+
query: z.string().describe("Search query"),
|
|
184
|
+
section: z.string().optional().describe("Limit to section slug (e.g. 'extraction', 'agent')"),
|
|
185
|
+
},
|
|
186
|
+
async ({ query, section }) => {
|
|
187
|
+
const results = searchDocs(query, section);
|
|
188
|
+
if (results.length === 0) {
|
|
189
|
+
return { content: [{ type: "text", text: "No results found." }] };
|
|
190
|
+
}
|
|
191
|
+
const text = results
|
|
192
|
+
.map((r, i) => `### ${i + 1}. ${r.title} (${r.slug})\nScore: ${r.score}\n\n${r.excerpt}`)
|
|
193
|
+
.join("\n\n---\n\n");
|
|
194
|
+
return { content: [{ type: "text", text }] };
|
|
195
|
+
}
|
|
196
|
+
);
|
|
197
|
+
|
|
198
|
+
server.tool(
|
|
199
|
+
"read_doc_page",
|
|
200
|
+
"Read a specific documentation page by slug (e.g. 'getting-started/quickstart')",
|
|
201
|
+
{
|
|
202
|
+
slug: z.string().describe("Page slug relative to docs root"),
|
|
203
|
+
},
|
|
204
|
+
async ({ slug }) => {
|
|
205
|
+
const page = docs.pages.find((p) => p.slug === slug);
|
|
206
|
+
if (!page) {
|
|
207
|
+
return {
|
|
208
|
+
content: [{ type: "text", text: `Page not found: ${slug}` }],
|
|
209
|
+
isError: true,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
return {
|
|
213
|
+
content: [{ type: "text", text: `# ${page.title}\n\n${page.content}` }],
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
// --- SDK pure function tools ---
|
|
219
|
+
|
|
220
|
+
server.tool(
|
|
221
|
+
"build_agent_system_prompt",
|
|
222
|
+
"Generate an insurance-aware agent system prompt from an AgentContext",
|
|
223
|
+
{
|
|
224
|
+
platform: z.enum(["email", "chat", "sms", "slack", "discord"]).describe("Communication platform"),
|
|
225
|
+
intent: z.enum(["direct", "mediated", "observed"]).describe("Communication intent"),
|
|
226
|
+
siteUrl: z.string().describe("Company website URL"),
|
|
227
|
+
companyName: z.string().optional(),
|
|
228
|
+
companyContext: z.string().optional(),
|
|
229
|
+
userName: z.string().optional(),
|
|
230
|
+
coiHandling: z.enum(["broker", "user", "member", "ignore"]).optional(),
|
|
231
|
+
brokerName: z.string().optional(),
|
|
232
|
+
brokerContactName: z.string().optional(),
|
|
233
|
+
brokerContactEmail: z.string().optional(),
|
|
234
|
+
},
|
|
235
|
+
async (args) => {
|
|
236
|
+
const prompt = buildAgentSystemPrompt(args);
|
|
237
|
+
return { content: [{ type: "text", text: prompt }] };
|
|
238
|
+
}
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
server.tool(
|
|
242
|
+
"build_field_extraction_prompt",
|
|
243
|
+
"Get the application field extraction prompt",
|
|
244
|
+
{},
|
|
245
|
+
async () => {
|
|
246
|
+
const prompt = buildFieldExtractionPrompt();
|
|
247
|
+
return { content: [{ type: "text", text: prompt }] };
|
|
248
|
+
}
|
|
249
|
+
);
|
|
250
|
+
|
|
251
|
+
server.tool(
|
|
252
|
+
"build_auto_fill_prompt",
|
|
253
|
+
"Generate an auto-fill prompt for application fields given fields and org context",
|
|
254
|
+
{
|
|
255
|
+
fields: z
|
|
256
|
+
.array(
|
|
257
|
+
z.object({
|
|
258
|
+
id: z.string(),
|
|
259
|
+
label: z.string(),
|
|
260
|
+
fieldType: z.string(),
|
|
261
|
+
section: z.string(),
|
|
262
|
+
})
|
|
263
|
+
)
|
|
264
|
+
.describe("Application fields to auto-fill"),
|
|
265
|
+
orgContext: z
|
|
266
|
+
.array(
|
|
267
|
+
z.object({
|
|
268
|
+
key: z.string(),
|
|
269
|
+
value: z.string(),
|
|
270
|
+
category: z.string(),
|
|
271
|
+
})
|
|
272
|
+
)
|
|
273
|
+
.describe("Organization context key-value pairs"),
|
|
274
|
+
},
|
|
275
|
+
async ({ fields, orgContext }) => {
|
|
276
|
+
const prompt = buildAutoFillPrompt(fields, orgContext);
|
|
277
|
+
return { content: [{ type: "text", text: prompt }] };
|
|
278
|
+
}
|
|
279
|
+
);
|
|
280
|
+
|
|
281
|
+
server.tool(
|
|
282
|
+
"build_question_batch_prompt",
|
|
283
|
+
"Generate a batched question prompt for unfilled application fields",
|
|
284
|
+
{
|
|
285
|
+
unfilledFields: z
|
|
286
|
+
.array(
|
|
287
|
+
z.object({
|
|
288
|
+
id: z.string(),
|
|
289
|
+
label: z.string().optional(),
|
|
290
|
+
text: z.string().optional(),
|
|
291
|
+
fieldType: z.string(),
|
|
292
|
+
section: z.string(),
|
|
293
|
+
required: z.boolean(),
|
|
294
|
+
condition: z
|
|
295
|
+
.object({ dependsOn: z.string(), whenValue: z.string() })
|
|
296
|
+
.optional(),
|
|
297
|
+
})
|
|
298
|
+
)
|
|
299
|
+
.describe("Unfilled fields that need questions generated"),
|
|
300
|
+
},
|
|
301
|
+
async ({ unfilledFields }) => {
|
|
302
|
+
const prompt = buildQuestionBatchPrompt(unfilledFields);
|
|
303
|
+
return { content: [{ type: "text", text: prompt }] };
|
|
304
|
+
}
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
server.tool(
|
|
308
|
+
"apply_extracted",
|
|
309
|
+
"Map raw policy extraction JSON to structured fields",
|
|
310
|
+
{
|
|
311
|
+
extracted: z.any().describe("Raw extraction JSON from extractFromPdf"),
|
|
312
|
+
},
|
|
313
|
+
async ({ extracted }) => {
|
|
314
|
+
const result = applyExtracted(extracted);
|
|
315
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
316
|
+
}
|
|
317
|
+
);
|
|
318
|
+
|
|
319
|
+
server.tool(
|
|
320
|
+
"apply_extracted_quote",
|
|
321
|
+
"Map raw quote extraction JSON to structured fields",
|
|
322
|
+
{
|
|
323
|
+
extracted: z.any().describe("Raw extraction JSON from extractQuoteFromPdf"),
|
|
324
|
+
},
|
|
325
|
+
async ({ extracted }) => {
|
|
326
|
+
const result = applyExtractedQuote(extracted);
|
|
327
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
328
|
+
}
|
|
329
|
+
);
|
|
330
|
+
|
|
331
|
+
// --- SDK LLM tools ---
|
|
332
|
+
|
|
333
|
+
let _model: LanguageModel | null = null;
|
|
334
|
+
async function getModel(): Promise<LanguageModel> {
|
|
335
|
+
if (!_model) {
|
|
336
|
+
const cfg = loadConfig();
|
|
337
|
+
_model = await createModel(cfg);
|
|
338
|
+
}
|
|
339
|
+
return _model;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
server.tool(
|
|
343
|
+
"classify_document",
|
|
344
|
+
"Classify a PDF document as policy or quote. Requires a configured model and API key.",
|
|
345
|
+
{
|
|
346
|
+
pdfBase64: z.string().describe("Base64-encoded PDF document"),
|
|
347
|
+
},
|
|
348
|
+
async ({ pdfBase64 }) => {
|
|
349
|
+
const model = await getModel();
|
|
350
|
+
const models = createUniformModelConfig(model);
|
|
351
|
+
const result = await classifyDocumentType(pdfBase64, { models });
|
|
352
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
353
|
+
}
|
|
354
|
+
);
|
|
355
|
+
|
|
356
|
+
server.tool(
|
|
357
|
+
"extract_policy",
|
|
358
|
+
"Full multi-pass policy extraction from a PDF. Requires a configured model and API key.",
|
|
359
|
+
{
|
|
360
|
+
pdfBase64: z.string().describe("Base64-encoded PDF document"),
|
|
361
|
+
},
|
|
362
|
+
async ({ pdfBase64 }) => {
|
|
363
|
+
const model = await getModel();
|
|
364
|
+
const models = createUniformModelConfig(model);
|
|
365
|
+
const result = await extractFromPdf(pdfBase64, { models });
|
|
366
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
367
|
+
}
|
|
368
|
+
);
|
|
369
|
+
|
|
370
|
+
server.tool(
|
|
371
|
+
"extract_quote",
|
|
372
|
+
"Full multi-pass quote extraction from a PDF. Requires a configured model and API key.",
|
|
373
|
+
{
|
|
374
|
+
pdfBase64: z.string().describe("Base64-encoded PDF document"),
|
|
375
|
+
},
|
|
376
|
+
async ({ pdfBase64 }) => {
|
|
377
|
+
const model = await getModel();
|
|
378
|
+
const models = createUniformModelConfig(model);
|
|
379
|
+
const result = await extractQuoteFromPdf(pdfBase64, { models });
|
|
380
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
381
|
+
}
|
|
382
|
+
);
|
|
383
|
+
|
|
384
|
+
// --- SDK PDF tools ---
|
|
385
|
+
|
|
386
|
+
server.tool(
|
|
387
|
+
"get_acro_form_fields",
|
|
388
|
+
"List all fillable AcroForm fields in a PDF",
|
|
389
|
+
{
|
|
390
|
+
pdfBase64: z.string().describe("Base64-encoded PDF document"),
|
|
391
|
+
},
|
|
392
|
+
async ({ pdfBase64 }) => {
|
|
393
|
+
const bytes = Uint8Array.from(Buffer.from(pdfBase64, "base64"));
|
|
394
|
+
const pdfDoc = await PDFDocument.load(bytes);
|
|
395
|
+
const fields = getAcroFormFields(pdfDoc);
|
|
396
|
+
return { content: [{ type: "text", text: JSON.stringify(fields, null, 2) }] };
|
|
397
|
+
}
|
|
398
|
+
);
|
|
399
|
+
|
|
400
|
+
server.tool(
|
|
401
|
+
"fill_acro_form",
|
|
402
|
+
"Fill AcroForm fields in a PDF and return the flattened result as base64",
|
|
403
|
+
{
|
|
404
|
+
pdfBase64: z.string().describe("Base64-encoded PDF document"),
|
|
405
|
+
mappings: z
|
|
406
|
+
.array(
|
|
407
|
+
z.object({
|
|
408
|
+
acroFormName: z.string().describe("Form field name"),
|
|
409
|
+
value: z.string().describe("Value to fill"),
|
|
410
|
+
})
|
|
411
|
+
)
|
|
412
|
+
.describe("Field name to value mappings"),
|
|
413
|
+
},
|
|
414
|
+
async ({ pdfBase64, mappings }) => {
|
|
415
|
+
const bytes = Uint8Array.from(Buffer.from(pdfBase64, "base64"));
|
|
416
|
+
const result = await fillAcroForm(bytes, mappings);
|
|
417
|
+
const b64 = Buffer.from(result).toString("base64");
|
|
418
|
+
return { content: [{ type: "text", text: b64 }] };
|
|
419
|
+
}
|
|
420
|
+
);
|
|
421
|
+
|
|
422
|
+
server.tool(
|
|
423
|
+
"overlay_text_on_pdf",
|
|
424
|
+
"Overlay text on a flat PDF at specified coordinates. Returns base64 PDF.",
|
|
425
|
+
{
|
|
426
|
+
pdfBase64: z.string().describe("Base64-encoded PDF document"),
|
|
427
|
+
overlays: z
|
|
428
|
+
.array(
|
|
429
|
+
z.object({
|
|
430
|
+
page: z.number().describe("Page number (0-indexed)"),
|
|
431
|
+
x: z.number().describe("X coordinate"),
|
|
432
|
+
y: z.number().describe("Y coordinate"),
|
|
433
|
+
text: z.string().describe("Text to overlay"),
|
|
434
|
+
fontSize: z.number().optional().describe("Font size (default 12)"),
|
|
435
|
+
isCheckmark: z.boolean().optional().describe("Render as checkmark"),
|
|
436
|
+
})
|
|
437
|
+
)
|
|
438
|
+
.describe("Text overlay specifications"),
|
|
439
|
+
},
|
|
440
|
+
async ({ pdfBase64, overlays }) => {
|
|
441
|
+
const bytes = Uint8Array.from(Buffer.from(pdfBase64, "base64"));
|
|
442
|
+
const result = await overlayTextOnPdf(bytes, overlays);
|
|
443
|
+
const b64 = Buffer.from(result).toString("base64");
|
|
444
|
+
return { content: [{ type: "text", text: b64 }] };
|
|
445
|
+
}
|
|
446
|
+
);
|
|
447
|
+
|
|
448
|
+
// ---------------------------------------------------------------------------
|
|
449
|
+
// Start
|
|
450
|
+
// ---------------------------------------------------------------------------
|
|
451
|
+
|
|
452
|
+
async function main() {
|
|
453
|
+
const transport = new StdioServerTransport();
|
|
454
|
+
await server.connect(transport);
|
|
455
|
+
console.error("CL SDK MCP server running on stdio");
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
main().catch((err) => {
|
|
459
|
+
console.error("Fatal error:", err);
|
|
460
|
+
process.exit(1);
|
|
461
|
+
});
|