@claritylabs/cl-sdk 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/index.d.mts +9 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.js +308 -102
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +308 -102
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.js +2 -1
- package/dist/storage-sqlite.js.map +1 -1
- package/dist/storage-sqlite.mjs +2 -1
- package/dist/storage-sqlite.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -36,6 +36,11 @@ const extractor = createExtractor({
|
|
|
36
36
|
const result = await yourProvider.generateStructured({ prompt, system, schema, maxTokens, providerOptions });
|
|
37
37
|
return { object: result.object, usage: result.usage };
|
|
38
38
|
},
|
|
39
|
+
concurrency: 3,
|
|
40
|
+
pageMapConcurrency: 3,
|
|
41
|
+
extractorConcurrency: 4,
|
|
42
|
+
formatConcurrency: 2,
|
|
43
|
+
reviewMode: "auto",
|
|
39
44
|
});
|
|
40
45
|
|
|
41
46
|
const result = await extractor.extract(pdfBase64);
|
|
@@ -122,7 +127,9 @@ CL-SDK uses deterministic scaffolding with agentic decision points rather than f
|
|
|
122
127
|
- Extraction page mapping and review choose focused follow-up extractors from the live extractor catalog. Definitions and covered reasons can fall back through section extraction when a focused run returns no usable records.
|
|
123
128
|
- Supplementary extraction runs only when page assignments, form inventory, existing extracted text, or review follow-up tasks indicate regulatory, claims, notice, cancellation, or contact facts are likely present.
|
|
124
129
|
- Referential coverage resolution tries cheap local section/form matches first, then uses bounded target-specific actions for declarations, schedules, sections, page-location lookup, or skip.
|
|
125
|
-
-
|
|
130
|
+
- Page mapping, focused extractors, referential lookup, and formatting use separate concurrency controls. Page-scoped PDF and image ranges are cached so overlapping extractor tasks do not repeatedly slice or render the same pages.
|
|
131
|
+
- Formatting skips the LLM cleanup pass for plain prose and formats long or noisy markdown/table/list content in parallel batches.
|
|
132
|
+
- `reviewMode: "auto"` skips the expensive LLM review pass when deterministic checks are clean and source spans are available. Use `"always"` for maximum review coverage or `"skip"` when the host owns quality review separately.
|
|
126
133
|
- Application processing plans optional backfill, context auto-fill, document search, batching, reply parsing, lookup, explanations, and next-batch email generation based on current state.
|
|
127
134
|
|
|
128
135
|
These gates reduce unnecessary provider calls while preserving reliability for edge cases where additional focused extraction or retrieval is needed.
|
package/dist/index.d.mts
CHANGED
|
@@ -31677,8 +31677,17 @@ interface ExtractorConfig {
|
|
|
31677
31677
|
generateText: GenerateText;
|
|
31678
31678
|
generateObject: GenerateObject;
|
|
31679
31679
|
convertPdfToImages?: ConvertPdfToImagesFn;
|
|
31680
|
+
/** Default concurrency for page mapping, extractors, referential lookup, and formatting. */
|
|
31680
31681
|
concurrency?: number;
|
|
31682
|
+
/** Optional override for page-map model calls. Defaults to `concurrency`. */
|
|
31683
|
+
pageMapConcurrency?: number;
|
|
31684
|
+
/** Optional override for focused extractor model calls. Defaults to `concurrency`. */
|
|
31685
|
+
extractorConcurrency?: number;
|
|
31686
|
+
/** Optional override for markdown formatting model calls. Defaults to `concurrency`. */
|
|
31687
|
+
formatConcurrency?: number;
|
|
31681
31688
|
maxReviewRounds?: number;
|
|
31689
|
+
/** Controls the expensive LLM review pass. `auto` skips it when deterministic checks are clean and source spans are available. */
|
|
31690
|
+
reviewMode?: "always" | "auto" | "skip";
|
|
31682
31691
|
onTokenUsage?: (usage: TokenUsage) => void;
|
|
31683
31692
|
onProgress?: (message: string) => void;
|
|
31684
31693
|
log?: LogFn;
|
package/dist/index.d.ts
CHANGED
|
@@ -31677,8 +31677,17 @@ interface ExtractorConfig {
|
|
|
31677
31677
|
generateText: GenerateText;
|
|
31678
31678
|
generateObject: GenerateObject;
|
|
31679
31679
|
convertPdfToImages?: ConvertPdfToImagesFn;
|
|
31680
|
+
/** Default concurrency for page mapping, extractors, referential lookup, and formatting. */
|
|
31680
31681
|
concurrency?: number;
|
|
31682
|
+
/** Optional override for page-map model calls. Defaults to `concurrency`. */
|
|
31683
|
+
pageMapConcurrency?: number;
|
|
31684
|
+
/** Optional override for focused extractor model calls. Defaults to `concurrency`. */
|
|
31685
|
+
extractorConcurrency?: number;
|
|
31686
|
+
/** Optional override for markdown formatting model calls. Defaults to `concurrency`. */
|
|
31687
|
+
formatConcurrency?: number;
|
|
31681
31688
|
maxReviewRounds?: number;
|
|
31689
|
+
/** Controls the expensive LLM review pass. `auto` skips it when deterministic checks are clean and source spans are available. */
|
|
31690
|
+
reviewMode?: "always" | "auto" | "skip";
|
|
31682
31691
|
onTokenUsage?: (usage: TokenUsage) => void;
|
|
31683
31692
|
onProgress?: (message: string) => void;
|
|
31684
31693
|
log?: LogFn;
|