@struktur/sdk 2.1.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/artifacts/fileToArtifact.d.ts +8 -0
- package/dist/artifacts/fileToArtifact.d.ts.map +1 -0
- package/dist/artifacts/input.d.ts +60 -0
- package/dist/artifacts/input.d.ts.map +1 -0
- package/{src/artifacts/providers.ts → dist/artifacts/providers.d.ts} +2 -4
- package/dist/artifacts/providers.d.ts.map +1 -0
- package/dist/artifacts/urlToArtifact.d.ts +3 -0
- package/dist/artifacts/urlToArtifact.d.ts.map +1 -0
- package/dist/auth/config.d.ts +34 -0
- package/dist/auth/config.d.ts.map +1 -0
- package/dist/auth/tokens.d.ts +18 -0
- package/dist/auth/tokens.d.ts.map +1 -0
- package/dist/chunking/ArtifactBatcher.d.ts +11 -0
- package/dist/chunking/ArtifactBatcher.d.ts.map +1 -0
- package/dist/chunking/ArtifactSplitter.d.ts +10 -0
- package/dist/chunking/ArtifactSplitter.d.ts.map +1 -0
- package/dist/debug/logger.d.ts +169 -0
- package/dist/debug/logger.d.ts.map +1 -0
- package/dist/extract.d.ts +3 -0
- package/dist/extract.d.ts.map +1 -0
- package/dist/fields.d.ts +75 -0
- package/dist/fields.d.ts.map +1 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5603 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/LLMClient.d.ts +40 -0
- package/dist/llm/LLMClient.d.ts.map +1 -0
- package/dist/llm/RetryingRunner.d.ts +37 -0
- package/dist/llm/RetryingRunner.d.ts.map +1 -0
- package/dist/llm/message.d.ts +12 -0
- package/dist/llm/message.d.ts.map +1 -0
- package/dist/llm/models.d.ts +13 -0
- package/dist/llm/models.d.ts.map +1 -0
- package/dist/llm/resolveModel.d.ts +3 -0
- package/dist/llm/resolveModel.d.ts.map +1 -0
- package/dist/merge/Deduplicator.d.ts +4 -0
- package/dist/merge/Deduplicator.d.ts.map +1 -0
- package/dist/merge/SmartDataMerger.d.ts +7 -0
- package/dist/merge/SmartDataMerger.d.ts.map +1 -0
- package/dist/parsers/collect.d.ts +7 -0
- package/dist/parsers/collect.d.ts.map +1 -0
- package/{src/parsers/index.ts → dist/parsers/index.d.ts} +1 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/mime.d.ts +12 -0
- package/dist/parsers/mime.d.ts.map +1 -0
- package/dist/parsers/npm.d.ts +16 -0
- package/dist/parsers/npm.d.ts.map +1 -0
- package/dist/parsers/pdf.d.ts +36 -0
- package/dist/parsers/pdf.d.ts.map +1 -0
- package/dist/parsers/runner.d.ts +4 -0
- package/dist/parsers/runner.d.ts.map +1 -0
- package/dist/parsers/types.d.ts +27 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers.d.ts +1 -0
- package/dist/parsers.js +492 -0
- package/dist/parsers.js.map +1 -0
- package/dist/prompts/DeduplicationPrompt.d.ts +5 -0
- package/dist/prompts/DeduplicationPrompt.d.ts.map +1 -0
- package/dist/prompts/ExtractorPrompt.d.ts +6 -0
- package/dist/prompts/ExtractorPrompt.d.ts.map +1 -0
- package/dist/prompts/ParallelMergerPrompt.d.ts +5 -0
- package/dist/prompts/ParallelMergerPrompt.d.ts.map +1 -0
- package/dist/prompts/SequentialExtractorPrompt.d.ts +6 -0
- package/dist/prompts/SequentialExtractorPrompt.d.ts.map +1 -0
- package/dist/prompts/formatArtifacts.d.ts +3 -0
- package/dist/prompts/formatArtifacts.d.ts.map +1 -0
- package/dist/strategies/DoublePassAutoMergeStrategy.d.ts +23 -0
- package/dist/strategies/DoublePassAutoMergeStrategy.d.ts.map +1 -0
- package/dist/strategies/DoublePassStrategy.d.ts +22 -0
- package/dist/strategies/DoublePassStrategy.d.ts.map +1 -0
- package/dist/strategies/ParallelAutoMergeStrategy.d.ts +27 -0
- package/dist/strategies/ParallelAutoMergeStrategy.d.ts.map +1 -0
- package/dist/strategies/ParallelStrategy.d.ts +22 -0
- package/dist/strategies/ParallelStrategy.d.ts.map +1 -0
- package/dist/strategies/SequentialAutoMergeStrategy.d.ts +22 -0
- package/dist/strategies/SequentialAutoMergeStrategy.d.ts.map +1 -0
- package/dist/strategies/SequentialStrategy.d.ts +20 -0
- package/dist/strategies/SequentialStrategy.d.ts.map +1 -0
- package/dist/strategies/SimpleStrategy.d.ts +18 -0
- package/dist/strategies/SimpleStrategy.d.ts.map +1 -0
- package/dist/strategies/agent/AgentStrategy.d.ts +44 -0
- package/dist/strategies/agent/AgentStrategy.d.ts.map +1 -0
- package/dist/strategies/agent/AgentTools.d.ts +55 -0
- package/dist/strategies/agent/AgentTools.d.ts.map +1 -0
- package/dist/strategies/agent/ArtifactFilesystem.d.ts +51 -0
- package/dist/strategies/agent/ArtifactFilesystem.d.ts.map +1 -0
- package/dist/strategies/agent/index.d.ts +4 -0
- package/dist/strategies/agent/index.d.ts.map +1 -0
- package/dist/strategies/concurrency.d.ts +2 -0
- package/dist/strategies/concurrency.d.ts.map +1 -0
- package/{src/strategies/index.ts → dist/strategies/index.d.ts} +2 -0
- package/dist/strategies/index.d.ts.map +1 -0
- package/dist/strategies/utils.d.ts +39 -0
- package/dist/strategies/utils.d.ts.map +1 -0
- package/dist/strategies.d.ts +1 -0
- package/dist/strategies.js +3930 -0
- package/dist/strategies.js.map +1 -0
- package/dist/tokenization.d.ts +11 -0
- package/dist/tokenization.d.ts.map +1 -0
- package/dist/types.d.ts +178 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/validation/validator.d.ts +20 -0
- package/dist/validation/validator.d.ts.map +1 -0
- package/package.json +30 -14
- package/src/agent-cli-integration.test.ts +0 -47
- package/src/agent-export.test.ts +0 -17
- package/src/agent-tool-labels.test.ts +0 -50
- package/src/artifacts/AGENTS.md +0 -16
- package/src/artifacts/fileToArtifact.test.ts +0 -37
- package/src/artifacts/fileToArtifact.ts +0 -44
- package/src/artifacts/input.test.ts +0 -243
- package/src/artifacts/input.ts +0 -360
- package/src/artifacts/providers.test.ts +0 -19
- package/src/artifacts/urlToArtifact.test.ts +0 -23
- package/src/artifacts/urlToArtifact.ts +0 -19
- package/src/auth/AGENTS.md +0 -11
- package/src/auth/config.test.ts +0 -132
- package/src/auth/config.ts +0 -186
- package/src/auth/tokens.test.ts +0 -58
- package/src/auth/tokens.ts +0 -229
- package/src/chunking/AGENTS.md +0 -11
- package/src/chunking/ArtifactBatcher.test.ts +0 -22
- package/src/chunking/ArtifactBatcher.ts +0 -110
- package/src/chunking/ArtifactSplitter.test.ts +0 -38
- package/src/chunking/ArtifactSplitter.ts +0 -151
- package/src/debug/AGENTS.md +0 -79
- package/src/debug/logger.test.ts +0 -244
- package/src/debug/logger.ts +0 -211
- package/src/extract.test.ts +0 -22
- package/src/extract.ts +0 -150
- package/src/fields.test.ts +0 -681
- package/src/fields.ts +0 -246
- package/src/index.test.ts +0 -20
- package/src/index.ts +0 -110
- package/src/llm/AGENTS.md +0 -9
- package/src/llm/LLMClient.test.ts +0 -394
- package/src/llm/LLMClient.ts +0 -264
- package/src/llm/RetryingRunner.test.ts +0 -174
- package/src/llm/RetryingRunner.ts +0 -270
- package/src/llm/message.test.ts +0 -42
- package/src/llm/message.ts +0 -47
- package/src/llm/models.test.ts +0 -82
- package/src/llm/models.ts +0 -190
- package/src/llm/resolveModel.ts +0 -86
- package/src/merge/AGENTS.md +0 -6
- package/src/merge/Deduplicator.test.ts +0 -108
- package/src/merge/Deduplicator.ts +0 -45
- package/src/merge/SmartDataMerger.test.ts +0 -177
- package/src/merge/SmartDataMerger.ts +0 -56
- package/src/parsers/AGENTS.md +0 -58
- package/src/parsers/collect.test.ts +0 -56
- package/src/parsers/collect.ts +0 -31
- package/src/parsers/mime.test.ts +0 -91
- package/src/parsers/mime.ts +0 -137
- package/src/parsers/npm.ts +0 -26
- package/src/parsers/pdf.test.ts +0 -394
- package/src/parsers/pdf.ts +0 -194
- package/src/parsers/runner.test.ts +0 -95
- package/src/parsers/runner.ts +0 -177
- package/src/parsers/types.ts +0 -29
- package/src/prompts/AGENTS.md +0 -8
- package/src/prompts/DeduplicationPrompt.test.ts +0 -41
- package/src/prompts/DeduplicationPrompt.ts +0 -37
- package/src/prompts/ExtractorPrompt.test.ts +0 -21
- package/src/prompts/ExtractorPrompt.ts +0 -72
- package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
- package/src/prompts/ParallelMergerPrompt.ts +0 -37
- package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
- package/src/prompts/SequentialExtractorPrompt.ts +0 -82
- package/src/prompts/formatArtifacts.test.ts +0 -39
- package/src/prompts/formatArtifacts.ts +0 -46
- package/src/strategies/AGENTS.md +0 -6
- package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
- package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
- package/src/strategies/DoublePassStrategy.test.ts +0 -48
- package/src/strategies/DoublePassStrategy.ts +0 -266
- package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
- package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
- package/src/strategies/ParallelStrategy.test.ts +0 -61
- package/src/strategies/ParallelStrategy.ts +0 -208
- package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
- package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
- package/src/strategies/SequentialStrategy.test.ts +0 -53
- package/src/strategies/SequentialStrategy.ts +0 -142
- package/src/strategies/SimpleStrategy.test.ts +0 -46
- package/src/strategies/SimpleStrategy.ts +0 -94
- package/src/strategies/concurrency.test.ts +0 -16
- package/src/strategies/concurrency.ts +0 -14
- package/src/strategies/index.test.ts +0 -20
- package/src/strategies/utils.test.ts +0 -76
- package/src/strategies/utils.ts +0 -95
- package/src/tokenization.test.ts +0 -119
- package/src/tokenization.ts +0 -71
- package/src/types.test.ts +0 -25
- package/src/types.ts +0 -174
- package/src/validation/AGENTS.md +0 -7
- package/src/validation/validator.test.ts +0 -204
- package/src/validation/validator.ts +0 -90
- package/tsconfig.json +0 -22
package/src/debug/logger.ts
DELETED
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
import type { Artifact, ArtifactContent, ExtractionEvents, Usage, StepInfo, ProgressInfo, RetryInfo, TokenUsageInfo } from "../types";
|
|
2
|
-
|
|
3
|
-
export type DebugLogger = ReturnType<typeof createDebugLogger>;
|
|
4
|
-
|
|
5
|
-
export const createDebugLogger = (enabled: boolean) => {
|
|
6
|
-
const log = (entry: Record<string, unknown>) => {
|
|
7
|
-
if (!enabled) return;
|
|
8
|
-
const timestamp = new Date().toISOString();
|
|
9
|
-
const logEntry = { timestamp, ...entry };
|
|
10
|
-
process.stderr.write(JSON.stringify(logEntry) + "\n");
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
return {
|
|
14
|
-
// CLI initialization
|
|
15
|
-
cliInit: (data: { args: Record<string, unknown> }) => {
|
|
16
|
-
log({ type: "cli_init", ...data });
|
|
17
|
-
},
|
|
18
|
-
|
|
19
|
-
schemaLoaded: (data: { source: string; schemaSize: number }) => {
|
|
20
|
-
log({ type: "schema_loaded", ...data });
|
|
21
|
-
},
|
|
22
|
-
|
|
23
|
-
artifactsLoaded: (data: {
|
|
24
|
-
count: number;
|
|
25
|
-
artifacts: Array<{ id: string; type: string; contentCount: number; tokens?: number }>;
|
|
26
|
-
totalTokens: number;
|
|
27
|
-
totalImages: number;
|
|
28
|
-
}) => {
|
|
29
|
-
log({ type: "artifacts_loaded", ...data });
|
|
30
|
-
},
|
|
31
|
-
|
|
32
|
-
modelResolved: (data: { modelSpec: string; resolvedModel: string }) => {
|
|
33
|
-
log({ type: "model_resolved", ...data });
|
|
34
|
-
},
|
|
35
|
-
|
|
36
|
-
strategyCreated: (data: { strategy: string; config: Record<string, unknown> }) => {
|
|
37
|
-
log({ type: "strategy_created", ...data });
|
|
38
|
-
},
|
|
39
|
-
|
|
40
|
-
// Chunking
|
|
41
|
-
chunkingStart: (data: {
|
|
42
|
-
artifactId: string;
|
|
43
|
-
totalTokens: number;
|
|
44
|
-
maxTokens: number;
|
|
45
|
-
maxImages?: number;
|
|
46
|
-
}) => {
|
|
47
|
-
log({ type: "chunking_start", ...data });
|
|
48
|
-
},
|
|
49
|
-
|
|
50
|
-
chunkingSplit: (data: {
|
|
51
|
-
artifactId: string;
|
|
52
|
-
originalContentCount: number;
|
|
53
|
-
splitContentCount: number;
|
|
54
|
-
splitReason: "text_too_long" | "content_limit";
|
|
55
|
-
originalTokens: number;
|
|
56
|
-
chunkSize: number;
|
|
57
|
-
}) => {
|
|
58
|
-
log({ type: "chunking_split", ...data });
|
|
59
|
-
},
|
|
60
|
-
|
|
61
|
-
chunkingResult: (data: {
|
|
62
|
-
artifactId: string;
|
|
63
|
-
chunksCreated: number;
|
|
64
|
-
chunkSizes: number[];
|
|
65
|
-
}) => {
|
|
66
|
-
log({ type: "chunking_result", ...data });
|
|
67
|
-
},
|
|
68
|
-
|
|
69
|
-
batchingStart: (data: {
|
|
70
|
-
totalArtifacts: number;
|
|
71
|
-
maxTokens: number;
|
|
72
|
-
maxImages?: number;
|
|
73
|
-
modelMaxTokens?: number;
|
|
74
|
-
effectiveMaxTokens: number;
|
|
75
|
-
}) => {
|
|
76
|
-
log({ type: "batching_start", ...data });
|
|
77
|
-
},
|
|
78
|
-
|
|
79
|
-
batchCreated: (data: {
|
|
80
|
-
batchIndex: number;
|
|
81
|
-
artifactCount: number;
|
|
82
|
-
totalTokens: number;
|
|
83
|
-
totalImages: number;
|
|
84
|
-
artifactIds: string[];
|
|
85
|
-
}) => {
|
|
86
|
-
log({ type: "batch_created", ...data });
|
|
87
|
-
},
|
|
88
|
-
|
|
89
|
-
batchingComplete: (data: {
|
|
90
|
-
totalBatches: number;
|
|
91
|
-
batches: Array<{ index: number; artifactCount: number; tokens: number; images: number }>;
|
|
92
|
-
}) => {
|
|
93
|
-
log({ type: "batching_complete", ...data });
|
|
94
|
-
},
|
|
95
|
-
|
|
96
|
-
// Strategy execution
|
|
97
|
-
strategyRunStart: (data: { strategy: string; estimatedSteps: number; artifactCount: number }) => {
|
|
98
|
-
log({ type: "strategy_run_start", ...data });
|
|
99
|
-
},
|
|
100
|
-
|
|
101
|
-
step: (data: StepInfo & { strategy: string }) => {
|
|
102
|
-
log({ type: "step", ...data });
|
|
103
|
-
},
|
|
104
|
-
|
|
105
|
-
progress: (data: ProgressInfo & { strategy: string; context?: string }) => {
|
|
106
|
-
log({ type: "progress", ...data });
|
|
107
|
-
},
|
|
108
|
-
|
|
109
|
-
// LLM calls
|
|
110
|
-
llmCallStart: (data: {
|
|
111
|
-
callId: string;
|
|
112
|
-
model: string;
|
|
113
|
-
schemaName?: string;
|
|
114
|
-
systemLength: number;
|
|
115
|
-
userLength: number;
|
|
116
|
-
artifactCount: number;
|
|
117
|
-
}) => {
|
|
118
|
-
log({ type: "llm_call_start", ...data });
|
|
119
|
-
},
|
|
120
|
-
|
|
121
|
-
llmCallComplete: (data: {
|
|
122
|
-
callId: string;
|
|
123
|
-
success: boolean;
|
|
124
|
-
inputTokens: number;
|
|
125
|
-
outputTokens: number;
|
|
126
|
-
totalTokens: number;
|
|
127
|
-
durationMs?: number;
|
|
128
|
-
error?: string;
|
|
129
|
-
}) => {
|
|
130
|
-
log({ type: "llm_call_complete", ...data });
|
|
131
|
-
},
|
|
132
|
-
|
|
133
|
-
// Retry events
|
|
134
|
-
retry: (data: RetryInfo & { callId: string }) => {
|
|
135
|
-
log({ type: "retry", ...data });
|
|
136
|
-
},
|
|
137
|
-
|
|
138
|
-
// Validation
|
|
139
|
-
validationStart: (data: { callId: string; attempt: number; maxAttempts: number; strict: boolean }) => {
|
|
140
|
-
log({ type: "validation_start", ...data });
|
|
141
|
-
},
|
|
142
|
-
|
|
143
|
-
validationSuccess: (data: { callId: string; attempt: number }) => {
|
|
144
|
-
log({ type: "validation_success", ...data });
|
|
145
|
-
},
|
|
146
|
-
|
|
147
|
-
validationFailed: (data: { callId: string; attempt: number; errors: unknown[] }) => {
|
|
148
|
-
log({ type: "validation_failed", ...data });
|
|
149
|
-
},
|
|
150
|
-
|
|
151
|
-
// Merging
|
|
152
|
-
mergeStart: (data: { mergeId: string; inputCount: number; strategy: string }) => {
|
|
153
|
-
log({ type: "merge_start", ...data });
|
|
154
|
-
},
|
|
155
|
-
|
|
156
|
-
mergeComplete: (data: { mergeId: string; success: boolean; error?: string }) => {
|
|
157
|
-
log({ type: "merge_complete", ...data });
|
|
158
|
-
},
|
|
159
|
-
|
|
160
|
-
// Deduplication
|
|
161
|
-
dedupeStart: (data: { dedupeId: string; itemCount: number }) => {
|
|
162
|
-
log({ type: "dedupe_start", ...data });
|
|
163
|
-
},
|
|
164
|
-
|
|
165
|
-
dedupeComplete: (data: { dedupeId: string; duplicatesFound: number; itemsRemoved: number }) => {
|
|
166
|
-
log({ type: "dedupe_complete", ...data });
|
|
167
|
-
},
|
|
168
|
-
|
|
169
|
-
// Token usage tracking
|
|
170
|
-
tokenUsage: (data: TokenUsageInfo & { context: string }) => {
|
|
171
|
-
log({ type: "token_usage", ...data });
|
|
172
|
-
},
|
|
173
|
-
|
|
174
|
-
// Results
|
|
175
|
-
extractionComplete: (data: {
|
|
176
|
-
success: boolean;
|
|
177
|
-
totalInputTokens: number;
|
|
178
|
-
totalOutputTokens: number;
|
|
179
|
-
totalTokens: number;
|
|
180
|
-
error?: string;
|
|
181
|
-
}) => {
|
|
182
|
-
log({ type: "extraction_complete", ...data });
|
|
183
|
-
},
|
|
184
|
-
|
|
185
|
-
// Prompt details (verbose)
|
|
186
|
-
promptSystem: (data: { callId: string; system: string }) => {
|
|
187
|
-
log({ type: "prompt_system", ...data });
|
|
188
|
-
},
|
|
189
|
-
|
|
190
|
-
promptUser: (data: { callId: string; user: unknown }) => {
|
|
191
|
-
log({ type: "prompt_user", ...data });
|
|
192
|
-
},
|
|
193
|
-
|
|
194
|
-
// Raw response
|
|
195
|
-
rawResponse: (data: { callId: string; response: unknown }) => {
|
|
196
|
-
log({ type: "raw_response", ...data });
|
|
197
|
-
},
|
|
198
|
-
|
|
199
|
-
// Smart merge details
|
|
200
|
-
smartMergeField: (data: {
|
|
201
|
-
mergeId: string;
|
|
202
|
-
field: string;
|
|
203
|
-
operation: "merge_arrays" | "merge_objects" | "replace" | "concat";
|
|
204
|
-
leftCount?: number;
|
|
205
|
-
rightCount?: number;
|
|
206
|
-
resultCount?: number;
|
|
207
|
-
}) => {
|
|
208
|
-
log({ type: "smart_merge_field", ...data });
|
|
209
|
-
},
|
|
210
|
-
};
|
|
211
|
-
};
|
package/src/extract.test.ts
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { test, expect } from "bun:test";
|
|
2
|
-
import { extract } from "./extract";
|
|
3
|
-
import type { ExtractionStrategy, ExtractionOptions } from "./types";
|
|
4
|
-
|
|
5
|
-
test("extract delegates to strategy", async () => {
|
|
6
|
-
const strategy: ExtractionStrategy<{ ok: boolean }> = {
|
|
7
|
-
name: "mock",
|
|
8
|
-
run: async () => ({
|
|
9
|
-
data: { ok: true },
|
|
10
|
-
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
|
11
|
-
}),
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
const options: ExtractionOptions<{ ok: boolean }> = {
|
|
15
|
-
artifacts: [],
|
|
16
|
-
schema: {},
|
|
17
|
-
strategy,
|
|
18
|
-
};
|
|
19
|
-
|
|
20
|
-
const result = await extract(options);
|
|
21
|
-
expect(result.data.ok).toBe(true);
|
|
22
|
-
});
|
package/src/extract.ts
DELETED
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
import type { ExtractionOptions, ExtractionResult } from "./types";
|
|
2
|
-
import { buildSchemaFromFields } from "./fields";
|
|
3
|
-
|
|
4
|
-
const emptyUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Resolve and validate the schema from ExtractionOptions.
|
|
8
|
-
* Exactly one of `schema` or `fields` must be provided.
|
|
9
|
-
*/
|
|
10
|
-
const resolveSchema = <T>(options: ExtractionOptions<T>) => {
|
|
11
|
-
const hasSchema = options.schema !== undefined;
|
|
12
|
-
const hasFields = options.fields !== undefined;
|
|
13
|
-
|
|
14
|
-
if (hasSchema && hasFields) {
|
|
15
|
-
throw new Error(
|
|
16
|
-
"Provide either `schema` or `fields`, not both. They are mutually exclusive.",
|
|
17
|
-
);
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
if (!hasSchema && !hasFields) {
|
|
21
|
-
throw new Error(
|
|
22
|
-
"A schema definition is required. Provide `schema` (a JSON Schema object) or `fields` (a shorthand fields string).",
|
|
23
|
-
);
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (hasFields) {
|
|
27
|
-
return buildSchemaFromFields(options.fields as string);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
return options.schema as NonNullable<typeof options.schema>;
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
export const extract = async <T>(
|
|
34
|
-
options: ExtractionOptions<T>,
|
|
35
|
-
): Promise<ExtractionResult<T>> => {
|
|
36
|
-
const debug = options.debug;
|
|
37
|
-
const telemetry = options.telemetry;
|
|
38
|
-
|
|
39
|
-
// Initialize telemetry if provided
|
|
40
|
-
if (telemetry) {
|
|
41
|
-
await telemetry.initialize();
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// Start root extraction span
|
|
45
|
-
const rootSpan = telemetry?.startSpan({
|
|
46
|
-
name: "struktur.extract",
|
|
47
|
-
kind: "CHAIN",
|
|
48
|
-
attributes: {
|
|
49
|
-
"extraction.strategy": options.strategy?.name ?? "default",
|
|
50
|
-
"extraction.artifacts.count": options.artifacts.length,
|
|
51
|
-
},
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
try {
|
|
55
|
-
// Validate mutual exclusion and resolve the concrete schema early so that
|
|
56
|
-
// every strategy receives a fully-populated options object.
|
|
57
|
-
let resolvedOptions: ExtractionOptions<T>;
|
|
58
|
-
try {
|
|
59
|
-
const schema = resolveSchema(options);
|
|
60
|
-
resolvedOptions = { ...options, schema };
|
|
61
|
-
} catch (error) {
|
|
62
|
-
debug?.extractionComplete({
|
|
63
|
-
success: false,
|
|
64
|
-
totalInputTokens: 0,
|
|
65
|
-
totalOutputTokens: 0,
|
|
66
|
-
totalTokens: 0,
|
|
67
|
-
error: (error as Error).message,
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
telemetry?.endSpan(rootSpan!, {
|
|
71
|
-
status: "error",
|
|
72
|
-
error: error as Error,
|
|
73
|
-
});
|
|
74
|
-
await telemetry?.shutdown();
|
|
75
|
-
|
|
76
|
-
return {
|
|
77
|
-
data: null as unknown as T,
|
|
78
|
-
usage: emptyUsage,
|
|
79
|
-
error: error as Error,
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
const total = resolvedOptions.strategy.getEstimatedSteps?.(resolvedOptions.artifacts);
|
|
84
|
-
|
|
85
|
-
debug?.strategyRunStart({
|
|
86
|
-
strategy: resolvedOptions.strategy.name,
|
|
87
|
-
estimatedSteps: total ?? 1,
|
|
88
|
-
artifactCount: resolvedOptions.artifacts.length,
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
await resolvedOptions.events?.onStep?.({ step: 1, total, label: "start" });
|
|
92
|
-
debug?.step({
|
|
93
|
-
step: 1,
|
|
94
|
-
total,
|
|
95
|
-
label: "start",
|
|
96
|
-
strategy: resolvedOptions.strategy.name,
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
const result = await resolvedOptions.strategy.run(resolvedOptions);
|
|
100
|
-
|
|
101
|
-
await resolvedOptions.events?.onStep?.({
|
|
102
|
-
step: total ?? 1,
|
|
103
|
-
total,
|
|
104
|
-
label: "complete",
|
|
105
|
-
});
|
|
106
|
-
debug?.step({
|
|
107
|
-
step: total ?? 1,
|
|
108
|
-
total,
|
|
109
|
-
label: "complete",
|
|
110
|
-
strategy: resolvedOptions.strategy.name,
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
debug?.extractionComplete({
|
|
114
|
-
success: !result.error,
|
|
115
|
-
totalInputTokens: result.usage.inputTokens,
|
|
116
|
-
totalOutputTokens: result.usage.outputTokens,
|
|
117
|
-
totalTokens: result.usage.totalTokens,
|
|
118
|
-
error: result.error?.message,
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
telemetry?.endSpan(rootSpan!, {
|
|
122
|
-
status: result.error ? "error" : "ok",
|
|
123
|
-
output: result.data,
|
|
124
|
-
error: result.error,
|
|
125
|
-
});
|
|
126
|
-
await telemetry?.shutdown();
|
|
127
|
-
|
|
128
|
-
return result;
|
|
129
|
-
} catch (error) {
|
|
130
|
-
debug?.extractionComplete({
|
|
131
|
-
success: false,
|
|
132
|
-
totalInputTokens: 0,
|
|
133
|
-
totalOutputTokens: 0,
|
|
134
|
-
totalTokens: 0,
|
|
135
|
-
error: (error as Error).message,
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
telemetry?.endSpan(rootSpan!, {
|
|
139
|
-
status: "error",
|
|
140
|
-
error: error as Error,
|
|
141
|
-
});
|
|
142
|
-
await telemetry?.shutdown();
|
|
143
|
-
|
|
144
|
-
return {
|
|
145
|
-
data: null as unknown as T,
|
|
146
|
-
usage: emptyUsage,
|
|
147
|
-
error: error as Error,
|
|
148
|
-
};
|
|
149
|
-
}
|
|
150
|
-
};
|