@struktur/sdk 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/index.js +4111 -0
  2. package/dist/index.js.map +1 -0
  3. package/dist/parsers.js +492 -0
  4. package/dist/parsers.js.map +1 -0
  5. package/dist/strategies.js +2435 -0
  6. package/dist/strategies.js.map +1 -0
  7. package/package.json +25 -13
  8. package/src/agent-cli-integration.test.ts +0 -47
  9. package/src/agent-export.test.ts +0 -17
  10. package/src/agent-tool-labels.test.ts +0 -50
  11. package/src/artifacts/AGENTS.md +0 -16
  12. package/src/artifacts/fileToArtifact.test.ts +0 -37
  13. package/src/artifacts/fileToArtifact.ts +0 -44
  14. package/src/artifacts/input.test.ts +0 -243
  15. package/src/artifacts/input.ts +0 -360
  16. package/src/artifacts/providers.test.ts +0 -19
  17. package/src/artifacts/providers.ts +0 -7
  18. package/src/artifacts/urlToArtifact.test.ts +0 -23
  19. package/src/artifacts/urlToArtifact.ts +0 -19
  20. package/src/auth/AGENTS.md +0 -11
  21. package/src/auth/config.test.ts +0 -132
  22. package/src/auth/config.ts +0 -186
  23. package/src/auth/tokens.test.ts +0 -58
  24. package/src/auth/tokens.ts +0 -229
  25. package/src/chunking/AGENTS.md +0 -11
  26. package/src/chunking/ArtifactBatcher.test.ts +0 -22
  27. package/src/chunking/ArtifactBatcher.ts +0 -110
  28. package/src/chunking/ArtifactSplitter.test.ts +0 -38
  29. package/src/chunking/ArtifactSplitter.ts +0 -151
  30. package/src/debug/AGENTS.md +0 -79
  31. package/src/debug/logger.test.ts +0 -244
  32. package/src/debug/logger.ts +0 -211
  33. package/src/extract.test.ts +0 -22
  34. package/src/extract.ts +0 -150
  35. package/src/fields.test.ts +0 -681
  36. package/src/fields.ts +0 -246
  37. package/src/index.test.ts +0 -20
  38. package/src/index.ts +0 -110
  39. package/src/llm/AGENTS.md +0 -9
  40. package/src/llm/LLMClient.test.ts +0 -394
  41. package/src/llm/LLMClient.ts +0 -264
  42. package/src/llm/RetryingRunner.test.ts +0 -174
  43. package/src/llm/RetryingRunner.ts +0 -270
  44. package/src/llm/message.test.ts +0 -42
  45. package/src/llm/message.ts +0 -47
  46. package/src/llm/models.test.ts +0 -82
  47. package/src/llm/models.ts +0 -190
  48. package/src/llm/resolveModel.ts +0 -86
  49. package/src/merge/AGENTS.md +0 -6
  50. package/src/merge/Deduplicator.test.ts +0 -108
  51. package/src/merge/Deduplicator.ts +0 -45
  52. package/src/merge/SmartDataMerger.test.ts +0 -177
  53. package/src/merge/SmartDataMerger.ts +0 -56
  54. package/src/parsers/AGENTS.md +0 -58
  55. package/src/parsers/collect.test.ts +0 -56
  56. package/src/parsers/collect.ts +0 -31
  57. package/src/parsers/index.ts +0 -6
  58. package/src/parsers/mime.test.ts +0 -91
  59. package/src/parsers/mime.ts +0 -137
  60. package/src/parsers/npm.ts +0 -26
  61. package/src/parsers/pdf.test.ts +0 -394
  62. package/src/parsers/pdf.ts +0 -194
  63. package/src/parsers/runner.test.ts +0 -95
  64. package/src/parsers/runner.ts +0 -177
  65. package/src/parsers/types.ts +0 -29
  66. package/src/prompts/AGENTS.md +0 -8
  67. package/src/prompts/DeduplicationPrompt.test.ts +0 -41
  68. package/src/prompts/DeduplicationPrompt.ts +0 -37
  69. package/src/prompts/ExtractorPrompt.test.ts +0 -21
  70. package/src/prompts/ExtractorPrompt.ts +0 -72
  71. package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
  72. package/src/prompts/ParallelMergerPrompt.ts +0 -37
  73. package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
  74. package/src/prompts/SequentialExtractorPrompt.ts +0 -82
  75. package/src/prompts/formatArtifacts.test.ts +0 -39
  76. package/src/prompts/formatArtifacts.ts +0 -46
  77. package/src/strategies/AGENTS.md +0 -6
  78. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
  79. package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
  80. package/src/strategies/DoublePassStrategy.test.ts +0 -48
  81. package/src/strategies/DoublePassStrategy.ts +0 -266
  82. package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
  83. package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
  84. package/src/strategies/ParallelStrategy.test.ts +0 -61
  85. package/src/strategies/ParallelStrategy.ts +0 -208
  86. package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
  87. package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
  88. package/src/strategies/SequentialStrategy.test.ts +0 -53
  89. package/src/strategies/SequentialStrategy.ts +0 -142
  90. package/src/strategies/SimpleStrategy.test.ts +0 -46
  91. package/src/strategies/SimpleStrategy.ts +0 -94
  92. package/src/strategies/concurrency.test.ts +0 -16
  93. package/src/strategies/concurrency.ts +0 -14
  94. package/src/strategies/index.test.ts +0 -20
  95. package/src/strategies/index.ts +0 -7
  96. package/src/strategies/utils.test.ts +0 -76
  97. package/src/strategies/utils.ts +0 -95
  98. package/src/tokenization.test.ts +0 -119
  99. package/src/tokenization.ts +0 -71
  100. package/src/types.test.ts +0 -25
  101. package/src/types.ts +0 -174
  102. package/src/validation/AGENTS.md +0 -7
  103. package/src/validation/validator.test.ts +0 -204
  104. package/src/validation/validator.ts +0 -90
  105. package/tsconfig.json +0 -22
@@ -1,325 +0,0 @@
1
- import type { ExtractionResult, ExtractionStrategy } from "../types";
2
- import type { ExtractionOptions } from "../types";
3
- import { buildExtractorPrompt } from "../prompts/ExtractorPrompt";
4
- import { buildDeduplicationPrompt } from "../prompts/DeduplicationPrompt";
5
- import {
6
- extractWithPrompt,
7
- getBatches,
8
- mergeUsage,
9
- serializeSchema,
10
- } from "./utils";
11
- import { SmartDataMerger } from "../merge/SmartDataMerger";
12
- import {
13
- findExactDuplicatesWithHashing,
14
- deduplicateByIndices,
15
- } from "../merge/Deduplicator";
16
- import { runWithRetries } from "../llm/RetryingRunner";
17
-
18
- export type SequentialAutoMergeStrategyConfig = {
19
- model: unknown;
20
- chunkSize: number;
21
- maxImages?: number;
22
- outputInstructions?: string;
23
- dedupeModel?: unknown;
24
- execute?: typeof runWithRetries;
25
- dedupeExecute?: typeof runWithRetries;
26
- strict?: boolean;
27
- };
28
-
29
- const dedupeSchema = {
30
- type: "object",
31
- properties: {
32
- keys: { type: "array", items: { type: "string" } },
33
- },
34
- required: ["keys"],
35
- additionalProperties: false,
36
- } as const;
37
-
38
- const dedupeArrays = (data: Record<string, unknown>) => {
39
- const result: Record<string, unknown> = { ...data };
40
- for (const [key, value] of Object.entries(result)) {
41
- if (Array.isArray(value)) {
42
- const duplicates = findExactDuplicatesWithHashing(value);
43
- result[key] = deduplicateByIndices(value, duplicates);
44
- }
45
- }
46
- return result;
47
- };
48
-
49
- const removeByPath = (data: Record<string, unknown>, path: string) => {
50
- const [root, indexStr] = path.split(".");
51
- const index = Number(indexStr);
52
- if (!root || Number.isNaN(index)) {
53
- return data;
54
- }
55
-
56
- const value = data[root];
57
- if (!Array.isArray(value)) {
58
- return data;
59
- }
60
-
61
- const next = [...value];
62
- next.splice(index, 1);
63
- return { ...data, [root]: next };
64
- };
65
-
66
- export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
67
- public name = "sequential-auto-merge";
68
- private config: SequentialAutoMergeStrategyConfig;
69
-
70
- constructor(config: SequentialAutoMergeStrategyConfig) {
71
- this.config = config;
72
- }
73
-
74
- getEstimatedSteps(artifacts: ExtractionOptions<T>["artifacts"]): number {
75
- const batches = getBatches(artifacts, {
76
- maxTokens: this.config.chunkSize,
77
- maxImages: this.config.maxImages,
78
- });
79
- return batches.length + 3;
80
- }
81
-
82
- async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
83
- const debug = options.debug;
84
- const { telemetry } = options;
85
-
86
- // Create strategy-level span
87
- const strategySpan = telemetry?.startSpan({
88
- name: "strategy.sequential-auto-merge",
89
- kind: "CHAIN",
90
- attributes: {
91
- "strategy.name": this.name,
92
- "strategy.artifacts.count": options.artifacts.length,
93
- "strategy.chunk_size": this.config.chunkSize,
94
- },
95
- });
96
-
97
- const batches = getBatches(
98
- options.artifacts,
99
- {
100
- maxTokens: this.config.chunkSize,
101
- maxImages: this.config.maxImages,
102
- },
103
- debug,
104
- telemetry ?? undefined,
105
- strategySpan,
106
- );
107
-
108
- const schema = serializeSchema(options.schema);
109
- const merger = new SmartDataMerger(
110
- options.schema as Record<string, unknown>,
111
- );
112
- let merged = {} as Record<string, unknown>;
113
- const usages = [];
114
- const totalSteps = this.getEstimatedSteps(options.artifacts);
115
- let step = 1;
116
-
117
- debug?.mergeStart({
118
- mergeId: "sequential_auto_merge",
119
- inputCount: batches.length,
120
- strategy: this.name,
121
- });
122
-
123
- // Create smart merge span
124
- const mergeSpan = telemetry?.startSpan({
125
- name: "struktur.smart_merge",
126
- kind: "CHAIN",
127
- parentSpan: strategySpan,
128
- attributes: {
129
- "merge.strategy": "smart",
130
- "merge.input_count": batches.length,
131
- },
132
- });
133
-
134
- for (const [index, batch] of batches.entries()) {
135
- const prompt = buildExtractorPrompt(
136
- batch,
137
- schema,
138
- this.config.outputInstructions,
139
- );
140
- const result = await extractWithPrompt<T>({
141
- model: this.config.model,
142
- schema: options.schema,
143
- system: prompt.system,
144
- user: prompt.user,
145
- artifacts: batch,
146
- events: options.events,
147
- execute: this.config.execute as never,
148
- strict: options.strict ?? this.config.strict,
149
- debug,
150
- callId: `sequential_auto_batch_${index + 1}`,
151
- telemetry: telemetry ?? undefined,
152
- parentSpan: mergeSpan,
153
- });
154
-
155
- merged = merger.merge(merged, result.data as Record<string, unknown>);
156
- usages.push(result.usage);
157
-
158
- // Log merge operation per field
159
- for (const key of Object.keys(result.data as Record<string, unknown>)) {
160
- const leftArray = Array.isArray(merged[key])
161
- ? (merged[key] as unknown[]).length
162
- : undefined;
163
- const rightArray = Array.isArray(
164
- (result.data as Record<string, unknown>)[key],
165
- )
166
- ? ((result.data as Record<string, unknown>)[key] as unknown[]).length
167
- : undefined;
168
-
169
- debug?.smartMergeField({
170
- mergeId: "sequential_auto_merge",
171
- field: key,
172
- operation: "merge_arrays",
173
- leftCount: leftArray,
174
- rightCount: rightArray,
175
- });
176
-
177
- // Record merge event in telemetry
178
- if (mergeSpan && telemetry) {
179
- telemetry.recordEvent(mergeSpan, {
180
- type: "merge",
181
- strategy: "smart",
182
- inputCount: rightArray ?? 1,
183
- outputCount: leftArray ?? 1,
184
- });
185
- }
186
- }
187
-
188
- step += 1;
189
- await options.events?.onStep?.({
190
- step,
191
- total: totalSteps,
192
- label: `batch ${index + 1}/${batches.length}`,
193
- });
194
- debug?.step({
195
- step,
196
- total: totalSteps,
197
- label: `batch ${index + 1}/${batches.length}`,
198
- strategy: this.name,
199
- });
200
- }
201
-
202
- debug?.mergeComplete({ mergeId: "sequential_auto_merge", success: true });
203
-
204
- // End merge span
205
- if (mergeSpan && telemetry) {
206
- telemetry.endSpan(mergeSpan, {
207
- status: "ok",
208
- output: merged,
209
- });
210
- }
211
-
212
- merged = dedupeArrays(merged);
213
-
214
- // Create exact dedupe span
215
- const exactDedupeSpan = telemetry?.startSpan({
216
- name: "struktur.exact_dedupe",
217
- kind: "CHAIN",
218
- parentSpan: strategySpan,
219
- attributes: {
220
- "dedupe.method": "exact_hashing",
221
- },
222
- });
223
-
224
- // End exact dedupe span
225
- if (exactDedupeSpan && telemetry) {
226
- telemetry.recordEvent(exactDedupeSpan, {
227
- type: "merge",
228
- strategy: "exact_hash_dedupe",
229
- inputCount: Object.keys(merged).length,
230
- outputCount: Object.keys(merged).length,
231
- });
232
- telemetry.endSpan(exactDedupeSpan, {
233
- status: "ok",
234
- output: merged,
235
- });
236
- }
237
-
238
- const dedupePrompt = buildDeduplicationPrompt(schema, merged);
239
-
240
- debug?.dedupeStart({
241
- dedupeId: "sequential_auto_dedupe",
242
- itemCount: Object.keys(merged).length,
243
- });
244
-
245
- // Create LLM dedupe span
246
- const llmDedupeSpan = telemetry?.startSpan({
247
- name: "struktur.llm_dedupe",
248
- kind: "CHAIN",
249
- parentSpan: strategySpan,
250
- attributes: {
251
- "dedupe.method": "llm",
252
- },
253
- });
254
-
255
- const dedupeResponse = await runWithRetries<{ keys: string[] }>({
256
- model: this.config.dedupeModel ?? this.config.model,
257
- schema: dedupeSchema,
258
- system: dedupePrompt.system,
259
- user: dedupePrompt.user,
260
- events: options.events,
261
- execute: this.config.dedupeExecute,
262
- strict: this.config.strict,
263
- debug,
264
- callId: "sequential_auto_dedupe",
265
- telemetry: telemetry ?? undefined,
266
- parentSpan: llmDedupeSpan,
267
- });
268
-
269
- step += 1;
270
- await options.events?.onStep?.({
271
- step,
272
- total: totalSteps,
273
- label: "dedupe",
274
- });
275
- debug?.step({
276
- step,
277
- total: totalSteps,
278
- label: "dedupe",
279
- strategy: this.name,
280
- });
281
-
282
- let deduped = merged;
283
- for (const key of dedupeResponse.data.keys) {
284
- deduped = removeByPath(deduped, key);
285
- }
286
-
287
- debug?.dedupeComplete({
288
- dedupeId: "sequential_auto_dedupe",
289
- duplicatesFound: dedupeResponse.data.keys.length,
290
- itemsRemoved: dedupeResponse.data.keys.length,
291
- });
292
-
293
- // End LLM dedupe span
294
- if (llmDedupeSpan && telemetry) {
295
- telemetry.recordEvent(llmDedupeSpan, {
296
- type: "merge",
297
- strategy: "llm_dedupe",
298
- inputCount: Object.keys(merged).length,
299
- outputCount: Object.keys(deduped).length,
300
- deduped: dedupeResponse.data.keys.length,
301
- });
302
- telemetry.endSpan(llmDedupeSpan, {
303
- status: "ok",
304
- output: deduped,
305
- });
306
- }
307
-
308
- // End strategy span
309
- telemetry?.endSpan(strategySpan!, {
310
- status: "ok",
311
- output: deduped,
312
- });
313
-
314
- return {
315
- data: deduped as T,
316
- usage: mergeUsage([...usages, dedupeResponse.usage]),
317
- };
318
- }
319
- }
320
-
321
- export const sequentialAutoMerge = <T>(
322
- config: SequentialAutoMergeStrategyConfig,
323
- ) => {
324
- return new SequentialAutoMergeStrategy<T>(config);
325
- };
@@ -1,53 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import type { JSONSchemaType } from "ajv";
3
- import { SequentialStrategy } from "./SequentialStrategy";
4
- import type { Artifact, ExtractionOptions } from "../types";
5
-
6
- type Output = { title: string };
7
-
8
- const schema: JSONSchemaType<Output> = {
9
- type: "object",
10
- properties: { title: { type: "string" } },
11
- required: ["title"],
12
- additionalProperties: false,
13
- };
14
-
15
- const artifacts: Artifact[] = [
16
- {
17
- id: "a1",
18
- type: "text",
19
- raw: async () => Buffer.from(""),
20
- contents: [{ text: "abcdefgh" }],
21
- },
22
- {
23
- id: "a2",
24
- type: "text",
25
- raw: async () => Buffer.from(""),
26
- contents: [{ text: "abcdefgh" }],
27
- },
28
- ];
29
-
30
- test("SequentialStrategy processes batches in order", async () => {
31
- let calls = 0;
32
- const strategy = new SequentialStrategy<Output>({
33
- model: {},
34
- chunkSize: 2,
35
- execute: (async () => {
36
- calls += 1;
37
- return {
38
- data: { title: `step-${calls}` },
39
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
40
- };
41
- }) as any,
42
- });
43
-
44
- const options: ExtractionOptions<Output> = {
45
- artifacts,
46
- schema,
47
- strategy,
48
- };
49
-
50
- const result = await strategy.run(options);
51
- expect(result.data.title).toBe("step-2");
52
- expect(calls).toBe(2);
53
- });
@@ -1,142 +0,0 @@
1
- import type { ExtractionResult, ExtractionStrategy } from "../types";
2
- import type { ExtractionOptions } from "../types";
3
- import { buildSequentialPrompt } from "../prompts/SequentialExtractorPrompt";
4
- import {
5
- extractWithPrompt,
6
- getBatches,
7
- mergeUsage,
8
- serializeSchema,
9
- } from "./utils";
10
- import { runWithRetries } from "../llm/RetryingRunner";
11
-
12
- export type SequentialStrategyConfig = {
13
- model: unknown;
14
- chunkSize: number;
15
- maxImages?: number;
16
- outputInstructions?: string;
17
- execute?: typeof runWithRetries;
18
- strict?: boolean;
19
- };
20
-
21
- export class SequentialStrategy<T> implements ExtractionStrategy<T> {
22
- public name = "sequential";
23
- private config: SequentialStrategyConfig;
24
-
25
- constructor(config: SequentialStrategyConfig) {
26
- this.config = config;
27
- }
28
-
29
- getEstimatedSteps(artifacts: ExtractionOptions<T>["artifacts"]): number {
30
- const batches = getBatches(artifacts, {
31
- maxTokens: this.config.chunkSize,
32
- maxImages: this.config.maxImages,
33
- });
34
- return batches.length + 2;
35
- }
36
-
37
- async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
38
- const debug = options.debug;
39
- const { telemetry } = options;
40
-
41
- // Create strategy-level span
42
- const strategySpan = telemetry?.startSpan({
43
- name: "strategy.sequential",
44
- kind: "CHAIN",
45
- attributes: {
46
- "strategy.name": this.name,
47
- "strategy.artifacts.count": options.artifacts.length,
48
- "strategy.chunk_size": this.config.chunkSize,
49
- },
50
- });
51
-
52
- const batches = getBatches(
53
- options.artifacts,
54
- {
55
- maxTokens: this.config.chunkSize,
56
- maxImages: this.config.maxImages,
57
- },
58
- debug,
59
- telemetry ?? undefined,
60
- strategySpan,
61
- );
62
-
63
- const schema = serializeSchema(options.schema);
64
- let currentData: T | undefined;
65
- const usages = [];
66
- const totalSteps = this.getEstimatedSteps(options.artifacts);
67
- let step = 1;
68
-
69
- // Emit start event
70
- await options.events?.onStep?.({
71
- step,
72
- total: totalSteps,
73
- label: batches.length > 1 ? `batch 1/${batches.length}` : "extract",
74
- });
75
- debug?.step({
76
- step,
77
- total: totalSteps,
78
- label: batches.length > 1 ? `batch 1/${batches.length}` : "extract",
79
- strategy: this.name,
80
- });
81
-
82
- for (const [index, batch] of batches.entries()) {
83
- const previousData = currentData ? JSON.stringify(currentData) : "{}";
84
- const prompt = buildSequentialPrompt(
85
- batch,
86
- schema,
87
- previousData,
88
- this.config.outputInstructions,
89
- );
90
-
91
- const result = await extractWithPrompt<T>({
92
- model: this.config.model,
93
- schema: options.schema,
94
- system: prompt.system,
95
- user: prompt.user,
96
- artifacts: batch,
97
- events: options.events,
98
- execute: this.config.execute as never,
99
- strict: options.strict ?? this.config.strict,
100
- debug,
101
- callId: `sequential_batch_${index + 1}`,
102
- telemetry: telemetry ?? undefined,
103
- parentSpan: strategySpan,
104
- });
105
-
106
- currentData = result.data;
107
- usages.push(result.usage);
108
-
109
- step += 1;
110
- // Only emit progress if there are more batches
111
- if (index < batches.length - 1) {
112
- await options.events?.onStep?.({
113
- step,
114
- total: totalSteps,
115
- label: `batch ${index + 2}/${batches.length}`,
116
- });
117
- debug?.step({
118
- step,
119
- total: totalSteps,
120
- label: `batch ${index + 2}/${batches.length}`,
121
- strategy: this.name,
122
- });
123
- }
124
- }
125
-
126
- if (!currentData) {
127
- throw new Error("No data extracted from sequential strategy");
128
- }
129
-
130
- // End strategy span
131
- telemetry?.endSpan(strategySpan!, {
132
- status: "ok",
133
- output: currentData,
134
- });
135
-
136
- return { data: currentData, usage: mergeUsage(usages) };
137
- }
138
- }
139
-
140
- export const sequential = <T>(config: SequentialStrategyConfig) => {
141
- return new SequentialStrategy<T>(config);
142
- };
@@ -1,46 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import type { JSONSchemaType } from "ajv";
3
- import { SimpleStrategy } from "./SimpleStrategy";
4
- import type { Artifact, ExtractionOptions } from "../types";
5
-
6
- type Output = { title: string };
7
-
8
- const schema: JSONSchemaType<Output> = {
9
- type: "object",
10
- properties: { title: { type: "string" } },
11
- required: ["title"],
12
- additionalProperties: false,
13
- };
14
-
15
- const artifacts: Artifact[] = [
16
- {
17
- id: "a1",
18
- type: "text",
19
- raw: async () => Buffer.from(""),
20
- contents: [{ text: "hello" }],
21
- },
22
- ];
23
-
24
- test("SimpleStrategy runs once", async () => {
25
- let calls = 0;
26
- const strategy = new SimpleStrategy<Output>({
27
- model: {},
28
- execute: (async () => {
29
- calls += 1;
30
- return {
31
- data: { title: "ok" },
32
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
33
- };
34
- }) as any,
35
- });
36
-
37
- const options: ExtractionOptions<Output> = {
38
- artifacts,
39
- schema,
40
- strategy,
41
- };
42
-
43
- const result = await strategy.run(options);
44
- expect(result.data.title).toBe("ok");
45
- expect(calls).toBe(1);
46
- });
@@ -1,94 +0,0 @@
1
- import type { ExtractionResult, ExtractionStrategy } from "../types";
2
- import type { ExtractionOptions } from "../types";
3
- import { buildExtractorPrompt } from "../prompts/ExtractorPrompt";
4
- import { extractWithPrompt, serializeSchema } from "./utils";
5
- import { runWithRetries } from "../llm/RetryingRunner";
6
-
7
- export type SimpleStrategyConfig = {
8
- model: unknown;
9
- outputInstructions?: string;
10
- execute?: typeof runWithRetries;
11
- strict?: boolean;
12
- };
13
-
14
- export class SimpleStrategy<T> implements ExtractionStrategy<T> {
15
- public name = "simple";
16
- private config: SimpleStrategyConfig;
17
-
18
- constructor(config: SimpleStrategyConfig) {
19
- this.config = config;
20
- }
21
-
22
- getEstimatedSteps(): number {
23
- return 3;
24
- }
25
-
26
- async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
27
- const debug = options.debug;
28
- const { telemetry } = options;
29
-
30
- // Create strategy-level span
31
- const strategySpan = telemetry?.startSpan({
32
- name: "strategy.simple",
33
- kind: "CHAIN",
34
- attributes: {
35
- "strategy.name": this.name,
36
- "strategy.artifacts.count": options.artifacts.length,
37
- },
38
- });
39
-
40
- const schema = serializeSchema(options.schema);
41
- const { system, user } = buildExtractorPrompt(
42
- options.artifacts,
43
- schema,
44
- this.config.outputInstructions,
45
- );
46
-
47
- // Emit start event before extraction begins
48
- await options.events?.onStep?.({
49
- step: 1,
50
- total: this.getEstimatedSteps(),
51
- label: "extract",
52
- });
53
- debug?.step({
54
- step: 1,
55
- total: this.getEstimatedSteps(),
56
- label: "extract",
57
- strategy: this.name,
58
- });
59
-
60
- const result = await extractWithPrompt<T>({
61
- model: this.config.model,
62
- schema: options.schema,
63
- system,
64
- user,
65
- artifacts: options.artifacts,
66
- events: options.events,
67
- execute: this.config.execute as never,
68
- strict: options.strict ?? this.config.strict,
69
- debug,
70
- callId: "simple_extract",
71
- telemetry,
72
- parentSpan: strategySpan,
73
- });
74
-
75
- debug?.step({
76
- step: 2,
77
- total: this.getEstimatedSteps(),
78
- label: "complete",
79
- strategy: this.name,
80
- });
81
-
82
- // End strategy span
83
- telemetry?.endSpan(strategySpan!, {
84
- status: "ok",
85
- output: result.data,
86
- });
87
-
88
- return { data: result.data, usage: result.usage };
89
- }
90
- }
91
-
92
- export const simple = <T>(config: SimpleStrategyConfig) => {
93
- return new SimpleStrategy<T>(config);
94
- };
@@ -1,16 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import { runConcurrently } from "./concurrency";
3
-
4
- test("runConcurrently runs tasks in batches", async () => {
5
- const started: number[] = [];
6
- const tasks = [1, 2, 3, 4, 5].map((value) => async () => {
7
- started.push(value);
8
- await new Promise((resolve) => setTimeout(resolve, 5));
9
- return value;
10
- });
11
-
12
- const results = await runConcurrently(tasks, 2);
13
-
14
- expect(results).toEqual([1, 2, 3, 4, 5]);
15
- expect(started).toEqual([1, 2, 3, 4, 5]);
16
- });
@@ -1,14 +0,0 @@
1
- export const runConcurrently = async <T>(
2
- tasks: Array<() => Promise<T>>,
3
- concurrency: number
4
- ): Promise<T[]> => {
5
- const results: T[] = [];
6
-
7
- for (let i = 0; i < tasks.length; i += concurrency) {
8
- const chunk = tasks.slice(i, i + concurrency).map((task) => task());
9
- const chunkResults = await Promise.all(chunk);
10
- results.push(...chunkResults);
11
- }
12
-
13
- return results;
14
- };