@struktur/sdk 2.1.2 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/artifacts/fileToArtifact.d.ts +8 -0
- package/dist/artifacts/fileToArtifact.d.ts.map +1 -0
- package/dist/artifacts/input.d.ts +60 -0
- package/dist/artifacts/input.d.ts.map +1 -0
- package/{src/artifacts/providers.ts → dist/artifacts/providers.d.ts} +2 -4
- package/dist/artifacts/providers.d.ts.map +1 -0
- package/dist/artifacts/urlToArtifact.d.ts +3 -0
- package/dist/artifacts/urlToArtifact.d.ts.map +1 -0
- package/dist/auth/config.d.ts +34 -0
- package/dist/auth/config.d.ts.map +1 -0
- package/dist/auth/tokens.d.ts +18 -0
- package/dist/auth/tokens.d.ts.map +1 -0
- package/dist/chunking/ArtifactBatcher.d.ts +11 -0
- package/dist/chunking/ArtifactBatcher.d.ts.map +1 -0
- package/dist/chunking/ArtifactSplitter.d.ts +10 -0
- package/dist/chunking/ArtifactSplitter.d.ts.map +1 -0
- package/dist/debug/logger.d.ts +169 -0
- package/dist/debug/logger.d.ts.map +1 -0
- package/dist/extract.d.ts +3 -0
- package/dist/extract.d.ts.map +1 -0
- package/dist/fields.d.ts +75 -0
- package/dist/fields.d.ts.map +1 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5603 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/LLMClient.d.ts +40 -0
- package/dist/llm/LLMClient.d.ts.map +1 -0
- package/dist/llm/RetryingRunner.d.ts +37 -0
- package/dist/llm/RetryingRunner.d.ts.map +1 -0
- package/dist/llm/message.d.ts +12 -0
- package/dist/llm/message.d.ts.map +1 -0
- package/dist/llm/models.d.ts +13 -0
- package/dist/llm/models.d.ts.map +1 -0
- package/dist/llm/resolveModel.d.ts +3 -0
- package/dist/llm/resolveModel.d.ts.map +1 -0
- package/dist/merge/Deduplicator.d.ts +4 -0
- package/dist/merge/Deduplicator.d.ts.map +1 -0
- package/dist/merge/SmartDataMerger.d.ts +7 -0
- package/dist/merge/SmartDataMerger.d.ts.map +1 -0
- package/dist/parsers/collect.d.ts +7 -0
- package/dist/parsers/collect.d.ts.map +1 -0
- package/{src/parsers/index.ts → dist/parsers/index.d.ts} +1 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/mime.d.ts +12 -0
- package/dist/parsers/mime.d.ts.map +1 -0
- package/dist/parsers/npm.d.ts +16 -0
- package/dist/parsers/npm.d.ts.map +1 -0
- package/dist/parsers/pdf.d.ts +36 -0
- package/dist/parsers/pdf.d.ts.map +1 -0
- package/dist/parsers/runner.d.ts +4 -0
- package/dist/parsers/runner.d.ts.map +1 -0
- package/dist/parsers/types.d.ts +27 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers.d.ts +1 -0
- package/dist/parsers.js +492 -0
- package/dist/parsers.js.map +1 -0
- package/dist/prompts/DeduplicationPrompt.d.ts +5 -0
- package/dist/prompts/DeduplicationPrompt.d.ts.map +1 -0
- package/dist/prompts/ExtractorPrompt.d.ts +6 -0
- package/dist/prompts/ExtractorPrompt.d.ts.map +1 -0
- package/dist/prompts/ParallelMergerPrompt.d.ts +5 -0
- package/dist/prompts/ParallelMergerPrompt.d.ts.map +1 -0
- package/dist/prompts/SequentialExtractorPrompt.d.ts +6 -0
- package/dist/prompts/SequentialExtractorPrompt.d.ts.map +1 -0
- package/dist/prompts/formatArtifacts.d.ts +3 -0
- package/dist/prompts/formatArtifacts.d.ts.map +1 -0
- package/dist/strategies/DoublePassAutoMergeStrategy.d.ts +23 -0
- package/dist/strategies/DoublePassAutoMergeStrategy.d.ts.map +1 -0
- package/dist/strategies/DoublePassStrategy.d.ts +22 -0
- package/dist/strategies/DoublePassStrategy.d.ts.map +1 -0
- package/dist/strategies/ParallelAutoMergeStrategy.d.ts +27 -0
- package/dist/strategies/ParallelAutoMergeStrategy.d.ts.map +1 -0
- package/dist/strategies/ParallelStrategy.d.ts +22 -0
- package/dist/strategies/ParallelStrategy.d.ts.map +1 -0
- package/dist/strategies/SequentialAutoMergeStrategy.d.ts +22 -0
- package/dist/strategies/SequentialAutoMergeStrategy.d.ts.map +1 -0
- package/dist/strategies/SequentialStrategy.d.ts +20 -0
- package/dist/strategies/SequentialStrategy.d.ts.map +1 -0
- package/dist/strategies/SimpleStrategy.d.ts +18 -0
- package/dist/strategies/SimpleStrategy.d.ts.map +1 -0
- package/dist/strategies/agent/AgentStrategy.d.ts +44 -0
- package/dist/strategies/agent/AgentStrategy.d.ts.map +1 -0
- package/dist/strategies/agent/AgentTools.d.ts +55 -0
- package/dist/strategies/agent/AgentTools.d.ts.map +1 -0
- package/dist/strategies/agent/ArtifactFilesystem.d.ts +51 -0
- package/dist/strategies/agent/ArtifactFilesystem.d.ts.map +1 -0
- package/dist/strategies/agent/index.d.ts +4 -0
- package/dist/strategies/agent/index.d.ts.map +1 -0
- package/dist/strategies/concurrency.d.ts +2 -0
- package/dist/strategies/concurrency.d.ts.map +1 -0
- package/{src/strategies/index.ts → dist/strategies/index.d.ts} +2 -0
- package/dist/strategies/index.d.ts.map +1 -0
- package/dist/strategies/utils.d.ts +39 -0
- package/dist/strategies/utils.d.ts.map +1 -0
- package/dist/strategies.d.ts +1 -0
- package/dist/strategies.js +3930 -0
- package/dist/strategies.js.map +1 -0
- package/dist/tokenization.d.ts +11 -0
- package/dist/tokenization.d.ts.map +1 -0
- package/dist/types.d.ts +178 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/validation/validator.d.ts +20 -0
- package/dist/validation/validator.d.ts.map +1 -0
- package/package.json +30 -14
- package/src/agent-cli-integration.test.ts +0 -47
- package/src/agent-export.test.ts +0 -17
- package/src/agent-tool-labels.test.ts +0 -50
- package/src/artifacts/AGENTS.md +0 -16
- package/src/artifacts/fileToArtifact.test.ts +0 -37
- package/src/artifacts/fileToArtifact.ts +0 -44
- package/src/artifacts/input.test.ts +0 -243
- package/src/artifacts/input.ts +0 -360
- package/src/artifacts/providers.test.ts +0 -19
- package/src/artifacts/urlToArtifact.test.ts +0 -23
- package/src/artifacts/urlToArtifact.ts +0 -19
- package/src/auth/AGENTS.md +0 -11
- package/src/auth/config.test.ts +0 -132
- package/src/auth/config.ts +0 -186
- package/src/auth/tokens.test.ts +0 -58
- package/src/auth/tokens.ts +0 -229
- package/src/chunking/AGENTS.md +0 -11
- package/src/chunking/ArtifactBatcher.test.ts +0 -22
- package/src/chunking/ArtifactBatcher.ts +0 -110
- package/src/chunking/ArtifactSplitter.test.ts +0 -38
- package/src/chunking/ArtifactSplitter.ts +0 -151
- package/src/debug/AGENTS.md +0 -79
- package/src/debug/logger.test.ts +0 -244
- package/src/debug/logger.ts +0 -211
- package/src/extract.test.ts +0 -22
- package/src/extract.ts +0 -150
- package/src/fields.test.ts +0 -681
- package/src/fields.ts +0 -246
- package/src/index.test.ts +0 -20
- package/src/index.ts +0 -110
- package/src/llm/AGENTS.md +0 -9
- package/src/llm/LLMClient.test.ts +0 -394
- package/src/llm/LLMClient.ts +0 -264
- package/src/llm/RetryingRunner.test.ts +0 -174
- package/src/llm/RetryingRunner.ts +0 -270
- package/src/llm/message.test.ts +0 -42
- package/src/llm/message.ts +0 -47
- package/src/llm/models.test.ts +0 -82
- package/src/llm/models.ts +0 -190
- package/src/llm/resolveModel.ts +0 -86
- package/src/merge/AGENTS.md +0 -6
- package/src/merge/Deduplicator.test.ts +0 -108
- package/src/merge/Deduplicator.ts +0 -45
- package/src/merge/SmartDataMerger.test.ts +0 -177
- package/src/merge/SmartDataMerger.ts +0 -56
- package/src/parsers/AGENTS.md +0 -58
- package/src/parsers/collect.test.ts +0 -56
- package/src/parsers/collect.ts +0 -31
- package/src/parsers/mime.test.ts +0 -91
- package/src/parsers/mime.ts +0 -137
- package/src/parsers/npm.ts +0 -26
- package/src/parsers/pdf.test.ts +0 -394
- package/src/parsers/pdf.ts +0 -194
- package/src/parsers/runner.test.ts +0 -95
- package/src/parsers/runner.ts +0 -177
- package/src/parsers/types.ts +0 -29
- package/src/prompts/AGENTS.md +0 -8
- package/src/prompts/DeduplicationPrompt.test.ts +0 -41
- package/src/prompts/DeduplicationPrompt.ts +0 -37
- package/src/prompts/ExtractorPrompt.test.ts +0 -21
- package/src/prompts/ExtractorPrompt.ts +0 -72
- package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
- package/src/prompts/ParallelMergerPrompt.ts +0 -37
- package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
- package/src/prompts/SequentialExtractorPrompt.ts +0 -82
- package/src/prompts/formatArtifacts.test.ts +0 -39
- package/src/prompts/formatArtifacts.ts +0 -46
- package/src/strategies/AGENTS.md +0 -6
- package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
- package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
- package/src/strategies/DoublePassStrategy.test.ts +0 -48
- package/src/strategies/DoublePassStrategy.ts +0 -266
- package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
- package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
- package/src/strategies/ParallelStrategy.test.ts +0 -61
- package/src/strategies/ParallelStrategy.ts +0 -208
- package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
- package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
- package/src/strategies/SequentialStrategy.test.ts +0 -53
- package/src/strategies/SequentialStrategy.ts +0 -142
- package/src/strategies/SimpleStrategy.test.ts +0 -46
- package/src/strategies/SimpleStrategy.ts +0 -94
- package/src/strategies/concurrency.test.ts +0 -16
- package/src/strategies/concurrency.ts +0 -14
- package/src/strategies/index.test.ts +0 -20
- package/src/strategies/utils.test.ts +0 -76
- package/src/strategies/utils.ts +0 -95
- package/src/tokenization.test.ts +0 -119
- package/src/tokenization.ts +0 -71
- package/src/types.test.ts +0 -25
- package/src/types.ts +0 -174
- package/src/validation/AGENTS.md +0 -7
- package/src/validation/validator.test.ts +0 -204
- package/src/validation/validator.ts +0 -90
- package/tsconfig.json +0 -22
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
import type { ExtractionResult, ExtractionStrategy } from "../types";
|
|
2
|
-
import type { ExtractionOptions } from "../types";
|
|
3
|
-
import { buildExtractorPrompt } from "../prompts/ExtractorPrompt";
|
|
4
|
-
import { buildParallelMergerPrompt } from "../prompts/ParallelMergerPrompt";
|
|
5
|
-
import { buildSequentialPrompt } from "../prompts/SequentialExtractorPrompt";
|
|
6
|
-
import {
|
|
7
|
-
extractWithPrompt,
|
|
8
|
-
getBatches,
|
|
9
|
-
mergeUsage,
|
|
10
|
-
serializeSchema,
|
|
11
|
-
} from "./utils";
|
|
12
|
-
import { runConcurrently } from "./concurrency";
|
|
13
|
-
import { runWithRetries } from "../llm/RetryingRunner";
|
|
14
|
-
|
|
15
|
-
export type DoublePassStrategyConfig = {
|
|
16
|
-
model: unknown;
|
|
17
|
-
mergeModel: unknown;
|
|
18
|
-
chunkSize: number;
|
|
19
|
-
concurrency?: number;
|
|
20
|
-
maxImages?: number;
|
|
21
|
-
outputInstructions?: string;
|
|
22
|
-
execute?: typeof runWithRetries;
|
|
23
|
-
strict?: boolean;
|
|
24
|
-
};
|
|
25
|
-
|
|
26
|
-
export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
|
|
27
|
-
public name = "double-pass";
|
|
28
|
-
private config: DoublePassStrategyConfig;
|
|
29
|
-
|
|
30
|
-
constructor(config: DoublePassStrategyConfig) {
|
|
31
|
-
this.config = config;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
getEstimatedSteps(artifacts: ExtractionOptions<T>["artifacts"]): number {
|
|
35
|
-
const batches = getBatches(artifacts, {
|
|
36
|
-
maxTokens: this.config.chunkSize,
|
|
37
|
-
maxImages: this.config.maxImages,
|
|
38
|
-
});
|
|
39
|
-
return batches.length * 2 + 3;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
|
|
43
|
-
const debug = options.debug;
|
|
44
|
-
const { telemetry } = options;
|
|
45
|
-
|
|
46
|
-
// Create strategy-level span
|
|
47
|
-
const strategySpan = telemetry?.startSpan({
|
|
48
|
-
name: "strategy.double-pass",
|
|
49
|
-
kind: "CHAIN",
|
|
50
|
-
attributes: {
|
|
51
|
-
"strategy.name": this.name,
|
|
52
|
-
"strategy.artifacts.count": options.artifacts.length,
|
|
53
|
-
"strategy.chunk_size": this.config.chunkSize,
|
|
54
|
-
"strategy.concurrency": this.config.concurrency,
|
|
55
|
-
},
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
const batches = getBatches(
|
|
59
|
-
options.artifacts,
|
|
60
|
-
{
|
|
61
|
-
maxTokens: this.config.chunkSize,
|
|
62
|
-
maxImages: this.config.maxImages,
|
|
63
|
-
},
|
|
64
|
-
debug,
|
|
65
|
-
telemetry ?? undefined,
|
|
66
|
-
strategySpan,
|
|
67
|
-
);
|
|
68
|
-
|
|
69
|
-
const schema = serializeSchema(options.schema);
|
|
70
|
-
const totalSteps = this.getEstimatedSteps(options.artifacts);
|
|
71
|
-
let step = 1;
|
|
72
|
-
|
|
73
|
-
// Create pass 1 span
|
|
74
|
-
const pass1Span = telemetry?.startSpan({
|
|
75
|
-
name: "struktur.pass_1",
|
|
76
|
-
kind: "CHAIN",
|
|
77
|
-
parentSpan: strategySpan,
|
|
78
|
-
attributes: {
|
|
79
|
-
"pass.number": 1,
|
|
80
|
-
"pass.type": "parallel_extraction",
|
|
81
|
-
},
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
const tasks = batches.map((batch, index) => async () => {
|
|
85
|
-
const prompt = buildExtractorPrompt(
|
|
86
|
-
batch,
|
|
87
|
-
schema,
|
|
88
|
-
this.config.outputInstructions,
|
|
89
|
-
);
|
|
90
|
-
const result = await extractWithPrompt<T>({
|
|
91
|
-
model: this.config.model,
|
|
92
|
-
schema: options.schema,
|
|
93
|
-
system: prompt.system,
|
|
94
|
-
user: prompt.user,
|
|
95
|
-
artifacts: batch,
|
|
96
|
-
events: options.events,
|
|
97
|
-
execute: this.config.execute as never,
|
|
98
|
-
strict: options.strict ?? this.config.strict,
|
|
99
|
-
debug,
|
|
100
|
-
callId: `double_pass_1_batch_${index + 1}`,
|
|
101
|
-
telemetry: telemetry ?? undefined,
|
|
102
|
-
parentSpan: pass1Span,
|
|
103
|
-
});
|
|
104
|
-
step += 1;
|
|
105
|
-
await options.events?.onStep?.({
|
|
106
|
-
step,
|
|
107
|
-
total: totalSteps,
|
|
108
|
-
label: `pass 1 batch ${index + 1}/${batches.length}`,
|
|
109
|
-
});
|
|
110
|
-
debug?.step({
|
|
111
|
-
step,
|
|
112
|
-
total: totalSteps,
|
|
113
|
-
label: `pass 1 batch ${index + 1}/${batches.length}`,
|
|
114
|
-
strategy: this.name,
|
|
115
|
-
});
|
|
116
|
-
return result;
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
const results = await runConcurrently(
|
|
120
|
-
tasks,
|
|
121
|
-
this.config.concurrency ?? batches.length,
|
|
122
|
-
);
|
|
123
|
-
|
|
124
|
-
debug?.mergeStart({
|
|
125
|
-
mergeId: "double_pass_1_merge",
|
|
126
|
-
inputCount: results.length,
|
|
127
|
-
strategy: this.name,
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
// Create pass 1 merge span
|
|
131
|
-
const pass1MergeSpan = telemetry?.startSpan({
|
|
132
|
-
name: "struktur.pass_1_merge",
|
|
133
|
-
kind: "CHAIN",
|
|
134
|
-
parentSpan: pass1Span,
|
|
135
|
-
attributes: {
|
|
136
|
-
"merge.strategy": "parallel",
|
|
137
|
-
"merge.input_count": results.length,
|
|
138
|
-
},
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
const mergePrompt = buildParallelMergerPrompt(
|
|
142
|
-
schema,
|
|
143
|
-
results.map((r) => r.data),
|
|
144
|
-
);
|
|
145
|
-
const merged = await extractWithPrompt<T>({
|
|
146
|
-
model: this.config.mergeModel,
|
|
147
|
-
schema: options.schema,
|
|
148
|
-
system: mergePrompt.system,
|
|
149
|
-
user: mergePrompt.user,
|
|
150
|
-
artifacts: [],
|
|
151
|
-
events: options.events,
|
|
152
|
-
execute: this.config.execute as never,
|
|
153
|
-
strict: this.config.strict,
|
|
154
|
-
debug,
|
|
155
|
-
callId: "double_pass_1_merge",
|
|
156
|
-
telemetry: telemetry ?? undefined,
|
|
157
|
-
parentSpan: pass1MergeSpan,
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
step += 1;
|
|
161
|
-
await options.events?.onStep?.({
|
|
162
|
-
step,
|
|
163
|
-
total: totalSteps,
|
|
164
|
-
label: "pass 1 merge",
|
|
165
|
-
});
|
|
166
|
-
debug?.step({
|
|
167
|
-
step,
|
|
168
|
-
total: totalSteps,
|
|
169
|
-
label: "pass 1 merge",
|
|
170
|
-
strategy: this.name,
|
|
171
|
-
});
|
|
172
|
-
debug?.mergeComplete({ mergeId: "double_pass_1_merge", success: true });
|
|
173
|
-
|
|
174
|
-
// End pass 1 merge span
|
|
175
|
-
if (pass1MergeSpan && telemetry) {
|
|
176
|
-
telemetry.recordEvent(pass1MergeSpan, {
|
|
177
|
-
type: "merge",
|
|
178
|
-
strategy: "parallel",
|
|
179
|
-
inputCount: results.length,
|
|
180
|
-
outputCount: 1,
|
|
181
|
-
});
|
|
182
|
-
telemetry.endSpan(pass1MergeSpan, {
|
|
183
|
-
status: "ok",
|
|
184
|
-
output: merged.data,
|
|
185
|
-
});
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
// End pass 1 span
|
|
189
|
-
telemetry?.endSpan(pass1Span!, {
|
|
190
|
-
status: "ok",
|
|
191
|
-
output: merged.data,
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
// Create pass 2 span
|
|
195
|
-
const pass2Span = telemetry?.startSpan({
|
|
196
|
-
name: "struktur.pass_2",
|
|
197
|
-
kind: "CHAIN",
|
|
198
|
-
parentSpan: strategySpan,
|
|
199
|
-
attributes: {
|
|
200
|
-
"pass.number": 2,
|
|
201
|
-
"pass.type": "sequential_refinement",
|
|
202
|
-
},
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
let currentData = merged.data;
|
|
206
|
-
const usages = [...results.map((r) => r.usage), merged.usage];
|
|
207
|
-
|
|
208
|
-
for (const [index, batch] of batches.entries()) {
|
|
209
|
-
const prompt = buildSequentialPrompt(
|
|
210
|
-
batch,
|
|
211
|
-
schema,
|
|
212
|
-
JSON.stringify(currentData),
|
|
213
|
-
this.config.outputInstructions,
|
|
214
|
-
);
|
|
215
|
-
|
|
216
|
-
const result = await extractWithPrompt<T>({
|
|
217
|
-
model: this.config.model,
|
|
218
|
-
schema: options.schema,
|
|
219
|
-
system: prompt.system,
|
|
220
|
-
user: prompt.user,
|
|
221
|
-
artifacts: batch,
|
|
222
|
-
events: options.events,
|
|
223
|
-
execute: this.config.execute as never,
|
|
224
|
-
strict: this.config.strict,
|
|
225
|
-
debug,
|
|
226
|
-
callId: `double_pass_2_batch_${index + 1}`,
|
|
227
|
-
telemetry: telemetry ?? undefined,
|
|
228
|
-
parentSpan: pass2Span,
|
|
229
|
-
});
|
|
230
|
-
|
|
231
|
-
currentData = result.data;
|
|
232
|
-
usages.push(result.usage);
|
|
233
|
-
|
|
234
|
-
step += 1;
|
|
235
|
-
await options.events?.onStep?.({
|
|
236
|
-
step,
|
|
237
|
-
total: totalSteps,
|
|
238
|
-
label: `pass 2 batch ${index + 1}/${batches.length}`,
|
|
239
|
-
});
|
|
240
|
-
debug?.step({
|
|
241
|
-
step,
|
|
242
|
-
total: totalSteps,
|
|
243
|
-
label: `pass 2 batch ${index + 1}/${batches.length}`,
|
|
244
|
-
strategy: this.name,
|
|
245
|
-
});
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
// End pass 2 span
|
|
249
|
-
telemetry?.endSpan(pass2Span!, {
|
|
250
|
-
status: "ok",
|
|
251
|
-
output: currentData,
|
|
252
|
-
});
|
|
253
|
-
|
|
254
|
-
// End strategy span
|
|
255
|
-
telemetry?.endSpan(strategySpan!, {
|
|
256
|
-
status: "ok",
|
|
257
|
-
output: currentData,
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
return { data: currentData, usage: mergeUsage(usages) };
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
export const doublePass = <T>(config: DoublePassStrategyConfig) => {
|
|
265
|
-
return new DoublePassStrategy<T>(config);
|
|
266
|
-
};
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
import { test, expect } from "bun:test";
|
|
2
|
-
import type { JSONSchemaType } from "ajv";
|
|
3
|
-
import { ParallelAutoMergeStrategy, __testing__ } from "./ParallelAutoMergeStrategy";
|
|
4
|
-
import type { Artifact, ExtractionOptions } from "../types";
|
|
5
|
-
|
|
6
|
-
type Output = { items: Array<{ id: number }> };
|
|
7
|
-
|
|
8
|
-
const schema: JSONSchemaType<Output> = {
|
|
9
|
-
type: "object",
|
|
10
|
-
properties: {
|
|
11
|
-
items: {
|
|
12
|
-
type: "array",
|
|
13
|
-
items: {
|
|
14
|
-
type: "object",
|
|
15
|
-
properties: { id: { type: "number" } },
|
|
16
|
-
required: ["id"],
|
|
17
|
-
additionalProperties: false,
|
|
18
|
-
},
|
|
19
|
-
},
|
|
20
|
-
},
|
|
21
|
-
required: ["items"],
|
|
22
|
-
additionalProperties: false,
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
const artifacts: Artifact[] = [
|
|
26
|
-
{
|
|
27
|
-
id: "a1",
|
|
28
|
-
type: "text",
|
|
29
|
-
raw: async () => Buffer.from(""),
|
|
30
|
-
contents: [{ text: "abcdefgh" }],
|
|
31
|
-
},
|
|
32
|
-
{
|
|
33
|
-
id: "a2",
|
|
34
|
-
type: "text",
|
|
35
|
-
raw: async () => Buffer.from(""),
|
|
36
|
-
contents: [{ text: "abcdefgh" }],
|
|
37
|
-
},
|
|
38
|
-
];
|
|
39
|
-
|
|
40
|
-
test("ParallelAutoMergeStrategy deduplicates arrays", async () => {
|
|
41
|
-
const strategy = new ParallelAutoMergeStrategy<Output>({
|
|
42
|
-
model: {},
|
|
43
|
-
chunkSize: 2,
|
|
44
|
-
execute: (async () => {
|
|
45
|
-
return {
|
|
46
|
-
data: { items: [{ id: 1 }, { id: 1 }] },
|
|
47
|
-
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
|
48
|
-
};
|
|
49
|
-
}) as any,
|
|
50
|
-
dedupeExecute: (async () => {
|
|
51
|
-
return {
|
|
52
|
-
data: { keys: [] },
|
|
53
|
-
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
|
54
|
-
};
|
|
55
|
-
}) as any,
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
const options: ExtractionOptions<Output> = {
|
|
59
|
-
artifacts,
|
|
60
|
-
schema,
|
|
61
|
-
strategy,
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
const result = await strategy.run(options);
|
|
65
|
-
expect(result.data.items.length).toBe(1);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
test("dedupeArrays removes duplicates from all array fields", () => {
|
|
69
|
-
const data = {
|
|
70
|
-
items: [{ id: 1 }, { id: 1 }, { id: 2 }],
|
|
71
|
-
names: ["a", "a", "b"],
|
|
72
|
-
count: 5,
|
|
73
|
-
};
|
|
74
|
-
|
|
75
|
-
const result = __testing__.dedupeArrays(data);
|
|
76
|
-
|
|
77
|
-
expect(result.items).toEqual([{ id: 1 }, { id: 2 }]);
|
|
78
|
-
expect(result.names).toEqual(["a", "b"]);
|
|
79
|
-
expect(result.count).toBe(5);
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
test("dedupeArrays handles non-array fields", () => {
|
|
83
|
-
const data = {
|
|
84
|
-
title: "test",
|
|
85
|
-
count: 42,
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
const result = __testing__.dedupeArrays(data);
|
|
89
|
-
|
|
90
|
-
expect(result).toEqual({ title: "test", count: 42 });
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
test("removeByPath removes item at path", () => {
|
|
94
|
-
const data = {
|
|
95
|
-
items: [{ id: 1 }, { id: 2 }, { id: 3 }],
|
|
96
|
-
};
|
|
97
|
-
|
|
98
|
-
const result = __testing__.removeByPath(data, "items.1");
|
|
99
|
-
|
|
100
|
-
expect(result.items).toEqual([{ id: 1 }, { id: 3 }]);
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
test("removeByPath handles first item", () => {
|
|
104
|
-
const data = {
|
|
105
|
-
items: [{ id: 1 }, { id: 2 }],
|
|
106
|
-
};
|
|
107
|
-
|
|
108
|
-
const result = __testing__.removeByPath(data, "items.0");
|
|
109
|
-
|
|
110
|
-
expect(result.items).toEqual([{ id: 2 }]);
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
test("removeByPath handles last item", () => {
|
|
114
|
-
const data = {
|
|
115
|
-
items: [{ id: 1 }, { id: 2 }],
|
|
116
|
-
};
|
|
117
|
-
|
|
118
|
-
const result = __testing__.removeByPath(data, "items.1");
|
|
119
|
-
|
|
120
|
-
expect(result.items).toEqual([{ id: 1 }]);
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
test("removeByPath returns unchanged data for invalid path", () => {
|
|
124
|
-
const data = {
|
|
125
|
-
items: [{ id: 1 }],
|
|
126
|
-
};
|
|
127
|
-
|
|
128
|
-
expect(__testing__.removeByPath(data, "")).toEqual(data);
|
|
129
|
-
expect(__testing__.removeByPath(data, "items")).toEqual(data);
|
|
130
|
-
expect(__testing__.removeByPath(data, "items.abc")).toEqual(data);
|
|
131
|
-
expect(__testing__.removeByPath(data, "missing.0")).toEqual(data);
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
test("removeByPath returns unchanged data for non-array field", () => {
|
|
135
|
-
const data = {
|
|
136
|
-
title: "test",
|
|
137
|
-
};
|
|
138
|
-
|
|
139
|
-
const result = __testing__.removeByPath(data, "title.0");
|
|
140
|
-
|
|
141
|
-
expect(result).toEqual(data);
|
|
142
|
-
});
|
|
143
|
-
|
|
144
|
-
test("removeByPath does not mutate original data", () => {
|
|
145
|
-
const data = {
|
|
146
|
-
items: [{ id: 1 }, { id: 2 }],
|
|
147
|
-
};
|
|
148
|
-
|
|
149
|
-
__testing__.removeByPath(data, "items.0");
|
|
150
|
-
|
|
151
|
-
expect(data.items).toEqual([{ id: 1 }, { id: 2 }]);
|
|
152
|
-
});
|