@struktur/sdk 1.2.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,6 +84,20 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
84
84
 
85
85
  async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
86
86
  const debug = options.debug;
87
+ const { telemetry } = options;
88
+
89
+ // Create strategy-level span
90
+ const strategySpan = telemetry?.startSpan({
91
+ name: "strategy.double-pass-auto-merge",
92
+ kind: "CHAIN",
93
+ attributes: {
94
+ "strategy.name": this.name,
95
+ "strategy.artifacts.count": options.artifacts.length,
96
+ "strategy.chunk_size": this.config.chunkSize,
97
+ "strategy.concurrency": this.config.concurrency,
98
+ },
99
+ });
100
+
87
101
  const batches = getBatches(
88
102
  options.artifacts,
89
103
  {
@@ -91,11 +105,24 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
91
105
  maxImages: this.config.maxImages,
92
106
  },
93
107
  debug,
108
+ telemetry ?? undefined,
109
+ strategySpan,
94
110
  );
95
111
 
96
112
  const schema = serializeSchema(options.schema);
97
113
  const totalSteps = this.getEstimatedSteps(options.artifacts);
98
114
  let step = 1;
115
+
116
+ // Create pass 1 span
117
+ const pass1Span = telemetry?.startSpan({
118
+ name: "struktur.pass_1",
119
+ kind: "CHAIN",
120
+ parentSpan: strategySpan,
121
+ attributes: {
122
+ "pass.number": 1,
123
+ "pass.type": "parallel_extraction",
124
+ },
125
+ });
99
126
 
100
127
  const tasks = batches.map((batch, index) => async () => {
101
128
  const prompt = buildExtractorPrompt(
@@ -114,6 +141,8 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
114
141
  strict: options.strict ?? this.config.strict,
115
142
  debug,
116
143
  callId: `double_pass_auto_1_batch_${index + 1}`,
144
+ telemetry: telemetry ?? undefined,
145
+ parentSpan: pass1Span,
117
146
  });
118
147
  step += 1;
119
148
  await options.events?.onStep?.({
@@ -145,6 +174,17 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
145
174
  inputCount: results.length,
146
175
  strategy: this.name,
147
176
  });
177
+
178
+ // Create smart merge span
179
+ const mergeSpan = telemetry?.startSpan({
180
+ name: "struktur.smart_merge",
181
+ kind: "CHAIN",
182
+ parentSpan: pass1Span,
183
+ attributes: {
184
+ "merge.strategy": "smart",
185
+ "merge.input_count": results.length,
186
+ },
187
+ });
148
188
 
149
189
  for (let i = 0; i < results.length; i++) {
150
190
  const result = results[i]!;
@@ -168,12 +208,54 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
168
208
  leftCount: leftArray,
169
209
  rightCount: rightArray,
170
210
  });
211
+
212
+ // Record merge event in telemetry
213
+ if (mergeSpan && telemetry) {
214
+ telemetry.recordEvent(mergeSpan, {
215
+ type: "merge",
216
+ strategy: "smart",
217
+ inputCount: rightArray ?? 1,
218
+ outputCount: leftArray ?? 1,
219
+ });
220
+ }
171
221
  }
172
222
  }
173
223
 
174
224
  debug?.mergeComplete({ mergeId: "double_pass_auto_merge", success: true });
225
+
226
+ // End merge span
227
+ if (mergeSpan && telemetry) {
228
+ telemetry.endSpan(mergeSpan, {
229
+ status: "ok",
230
+ output: merged,
231
+ });
232
+ }
175
233
 
176
234
  merged = dedupeArrays(merged);
235
+
236
+ // Create exact dedupe span
237
+ const exactDedupeSpan = telemetry?.startSpan({
238
+ name: "struktur.exact_dedupe",
239
+ kind: "CHAIN",
240
+ parentSpan: pass1Span,
241
+ attributes: {
242
+ "dedupe.method": "exact_hashing",
243
+ },
244
+ });
245
+
246
+ // End exact dedupe span
247
+ if (exactDedupeSpan && telemetry) {
248
+ telemetry.recordEvent(exactDedupeSpan, {
249
+ type: "merge",
250
+ strategy: "exact_hash_dedupe",
251
+ inputCount: Object.keys(merged).length,
252
+ outputCount: Object.keys(merged).length,
253
+ });
254
+ telemetry.endSpan(exactDedupeSpan, {
255
+ status: "ok",
256
+ output: merged,
257
+ });
258
+ }
177
259
 
178
260
  const dedupePrompt = buildDeduplicationPrompt(schema, merged);
179
261
 
@@ -181,6 +263,16 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
181
263
  dedupeId: "double_pass_auto_dedupe",
182
264
  itemCount: Object.keys(merged).length,
183
265
  });
266
+
267
+ // Create LLM dedupe span
268
+ const llmDedupeSpan = telemetry?.startSpan({
269
+ name: "struktur.llm_dedupe",
270
+ kind: "CHAIN",
271
+ parentSpan: pass1Span,
272
+ attributes: {
273
+ "dedupe.method": "llm",
274
+ },
275
+ });
184
276
 
185
277
  const dedupeResponse = await runWithRetries<{ keys: string[] }>({
186
278
  model: this.config.dedupeModel ?? this.config.model,
@@ -192,6 +284,8 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
192
284
  strict: this.config.strict,
193
285
  debug,
194
286
  callId: "double_pass_auto_dedupe",
287
+ telemetry: telemetry ?? undefined,
288
+ parentSpan: llmDedupeSpan,
195
289
  });
196
290
 
197
291
  step += 1;
@@ -217,9 +311,41 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
217
311
  duplicatesFound: dedupeResponse.data.keys.length,
218
312
  itemsRemoved: dedupeResponse.data.keys.length,
219
313
  });
314
+
315
+ // End LLM dedupe span
316
+ if (llmDedupeSpan && telemetry) {
317
+ telemetry.recordEvent(llmDedupeSpan, {
318
+ type: "merge",
319
+ strategy: "llm_dedupe",
320
+ inputCount: Object.keys(merged).length,
321
+ outputCount: Object.keys(deduped).length,
322
+ deduped: dedupeResponse.data.keys.length,
323
+ });
324
+ telemetry.endSpan(llmDedupeSpan, {
325
+ status: "ok",
326
+ output: deduped,
327
+ });
328
+ }
329
+
330
+ // End pass 1 span
331
+ telemetry?.endSpan(pass1Span!, {
332
+ status: "ok",
333
+ output: deduped,
334
+ });
220
335
 
221
336
  let currentData = deduped as T;
222
337
  const usages = [...results.map((r) => r.usage), dedupeResponse.usage];
338
+
339
+ // Create pass 2 span
340
+ const pass2Span = telemetry?.startSpan({
341
+ name: "struktur.pass_2",
342
+ kind: "CHAIN",
343
+ parentSpan: strategySpan,
344
+ attributes: {
345
+ "pass.number": 2,
346
+ "pass.type": "sequential_refinement",
347
+ },
348
+ });
223
349
 
224
350
  for (const [index, batch] of batches.entries()) {
225
351
  const prompt = buildSequentialPrompt(
@@ -240,6 +366,8 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
240
366
  strict: this.config.strict,
241
367
  debug,
242
368
  callId: `double_pass_auto_2_batch_${index + 1}`,
369
+ telemetry: telemetry ?? undefined,
370
+ parentSpan: pass2Span,
243
371
  });
244
372
 
245
373
  currentData = result.data;
@@ -258,6 +386,18 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
258
386
  strategy: this.name,
259
387
  });
260
388
  }
389
+
390
+ // End pass 2 span
391
+ telemetry?.endSpan(pass2Span!, {
392
+ status: "ok",
393
+ output: currentData,
394
+ });
395
+
396
+ // End strategy span
397
+ telemetry?.endSpan(strategySpan!, {
398
+ status: "ok",
399
+ output: currentData,
400
+ });
261
401
 
262
402
  return { data: currentData, usage: mergeUsage(usages) };
263
403
  }
@@ -41,6 +41,20 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
41
41
 
42
42
  async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
43
43
  const debug = options.debug;
44
+ const { telemetry } = options;
45
+
46
+ // Create strategy-level span
47
+ const strategySpan = telemetry?.startSpan({
48
+ name: "strategy.double-pass",
49
+ kind: "CHAIN",
50
+ attributes: {
51
+ "strategy.name": this.name,
52
+ "strategy.artifacts.count": options.artifacts.length,
53
+ "strategy.chunk_size": this.config.chunkSize,
54
+ "strategy.concurrency": this.config.concurrency,
55
+ },
56
+ });
57
+
44
58
  const batches = getBatches(
45
59
  options.artifacts,
46
60
  {
@@ -48,11 +62,24 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
48
62
  maxImages: this.config.maxImages,
49
63
  },
50
64
  debug,
65
+ telemetry ?? undefined,
66
+ strategySpan,
51
67
  );
52
68
 
53
69
  const schema = serializeSchema(options.schema);
54
70
  const totalSteps = this.getEstimatedSteps(options.artifacts);
55
71
  let step = 1;
72
+
73
+ // Create pass 1 span
74
+ const pass1Span = telemetry?.startSpan({
75
+ name: "struktur.pass_1",
76
+ kind: "CHAIN",
77
+ parentSpan: strategySpan,
78
+ attributes: {
79
+ "pass.number": 1,
80
+ "pass.type": "parallel_extraction",
81
+ },
82
+ });
56
83
 
57
84
  const tasks = batches.map((batch, index) => async () => {
58
85
  const prompt = buildExtractorPrompt(
@@ -71,6 +98,8 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
71
98
  strict: options.strict ?? this.config.strict,
72
99
  debug,
73
100
  callId: `double_pass_1_batch_${index + 1}`,
101
+ telemetry: telemetry ?? undefined,
102
+ parentSpan: pass1Span,
74
103
  });
75
104
  step += 1;
76
105
  await options.events?.onStep?.({
@@ -97,6 +126,17 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
97
126
  inputCount: results.length,
98
127
  strategy: this.name,
99
128
  });
129
+
130
+ // Create pass 1 merge span
131
+ const pass1MergeSpan = telemetry?.startSpan({
132
+ name: "struktur.pass_1_merge",
133
+ kind: "CHAIN",
134
+ parentSpan: pass1Span,
135
+ attributes: {
136
+ "merge.strategy": "parallel",
137
+ "merge.input_count": results.length,
138
+ },
139
+ });
100
140
 
101
141
  const mergePrompt = buildParallelMergerPrompt(
102
142
  schema,
@@ -113,6 +153,8 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
113
153
  strict: this.config.strict,
114
154
  debug,
115
155
  callId: "double_pass_1_merge",
156
+ telemetry: telemetry ?? undefined,
157
+ parentSpan: pass1MergeSpan,
116
158
  });
117
159
 
118
160
  step += 1;
@@ -128,6 +170,37 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
128
170
  strategy: this.name,
129
171
  });
130
172
  debug?.mergeComplete({ mergeId: "double_pass_1_merge", success: true });
173
+
174
+ // End pass 1 merge span
175
+ if (pass1MergeSpan && telemetry) {
176
+ telemetry.recordEvent(pass1MergeSpan, {
177
+ type: "merge",
178
+ strategy: "parallel",
179
+ inputCount: results.length,
180
+ outputCount: 1,
181
+ });
182
+ telemetry.endSpan(pass1MergeSpan, {
183
+ status: "ok",
184
+ output: merged.data,
185
+ });
186
+ }
187
+
188
+ // End pass 1 span
189
+ telemetry?.endSpan(pass1Span!, {
190
+ status: "ok",
191
+ output: merged.data,
192
+ });
193
+
194
+ // Create pass 2 span
195
+ const pass2Span = telemetry?.startSpan({
196
+ name: "struktur.pass_2",
197
+ kind: "CHAIN",
198
+ parentSpan: strategySpan,
199
+ attributes: {
200
+ "pass.number": 2,
201
+ "pass.type": "sequential_refinement",
202
+ },
203
+ });
131
204
 
132
205
  let currentData = merged.data;
133
206
  const usages = [...results.map((r) => r.usage), merged.usage];
@@ -151,6 +224,8 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
151
224
  strict: this.config.strict,
152
225
  debug,
153
226
  callId: `double_pass_2_batch_${index + 1}`,
227
+ telemetry: telemetry ?? undefined,
228
+ parentSpan: pass2Span,
154
229
  });
155
230
 
156
231
  currentData = result.data;
@@ -169,6 +244,18 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
169
244
  strategy: this.name,
170
245
  });
171
246
  }
247
+
248
+ // End pass 2 span
249
+ telemetry?.endSpan(pass2Span!, {
250
+ status: "ok",
251
+ output: currentData,
252
+ });
253
+
254
+ // End strategy span
255
+ telemetry?.endSpan(strategySpan!, {
256
+ status: "ok",
257
+ output: currentData,
258
+ });
172
259
 
173
260
  return { data: currentData, usage: mergeUsage(usages) };
174
261
  }
@@ -83,6 +83,20 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
83
83
 
84
84
  async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
85
85
  const debug = options.debug;
86
+ const { telemetry } = options;
87
+
88
+ // Create strategy-level span
89
+ const strategySpan = telemetry?.startSpan({
90
+ name: "strategy.parallel-auto-merge",
91
+ kind: "CHAIN",
92
+ attributes: {
93
+ "strategy.name": this.name,
94
+ "strategy.artifacts.count": options.artifacts.length,
95
+ "strategy.chunk_size": this.config.chunkSize,
96
+ "strategy.concurrency": this.config.concurrency,
97
+ },
98
+ });
99
+
86
100
  const batches = getBatches(
87
101
  options.artifacts,
88
102
  {
@@ -90,6 +104,8 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
90
104
  maxImages: this.config.maxImages,
91
105
  },
92
106
  debug,
107
+ telemetry ?? undefined,
108
+ strategySpan,
93
109
  );
94
110
 
95
111
  const schema = serializeSchema(options.schema);
@@ -113,6 +129,8 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
113
129
  strict: options.strict ?? this.config.strict,
114
130
  debug,
115
131
  callId: `parallel_auto_batch_${index + 1}`,
132
+ telemetry: telemetry ?? undefined,
133
+ parentSpan: strategySpan,
116
134
  });
117
135
  step += 1;
118
136
  await options.events?.onStep?.({
@@ -144,6 +162,17 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
144
162
  inputCount: results.length,
145
163
  strategy: this.name,
146
164
  });
165
+
166
+ // Create smart merge span
167
+ const mergeSpan = telemetry?.startSpan({
168
+ name: "struktur.smart_merge",
169
+ kind: "CHAIN",
170
+ parentSpan: strategySpan,
171
+ attributes: {
172
+ "merge.strategy": "smart",
173
+ "merge.input_count": results.length,
174
+ },
175
+ });
147
176
 
148
177
  for (let i = 0; i < results.length; i++) {
149
178
  const result = results[i]!;
@@ -169,6 +198,16 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
169
198
  leftCount: leftArray,
170
199
  rightCount: rightArray,
171
200
  });
201
+
202
+ // Record merge event in telemetry
203
+ if (mergeSpan && telemetry) {
204
+ telemetry.recordEvent(mergeSpan, {
205
+ type: "merge",
206
+ strategy: "smart",
207
+ inputCount: rightArray ?? 1,
208
+ outputCount: leftArray ?? 1,
209
+ });
210
+ }
172
211
  }
173
212
  }
174
213
 
@@ -176,8 +215,40 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
176
215
  mergeId: "parallel_auto_smart_merge",
177
216
  success: true,
178
217
  });
218
+
219
+ // End merge span
220
+ if (mergeSpan && telemetry) {
221
+ telemetry.endSpan(mergeSpan, {
222
+ status: "ok",
223
+ output: merged,
224
+ });
225
+ }
179
226
 
180
227
  merged = dedupeArrays(merged);
228
+
229
+ // Create exact dedupe span
230
+ const exactDedupeSpan = telemetry?.startSpan({
231
+ name: "struktur.exact_dedupe",
232
+ kind: "CHAIN",
233
+ parentSpan: strategySpan,
234
+ attributes: {
235
+ "dedupe.method": "exact_hashing",
236
+ },
237
+ });
238
+
239
+ // End exact dedupe span
240
+ if (exactDedupeSpan && telemetry) {
241
+ telemetry.recordEvent(exactDedupeSpan, {
242
+ type: "merge",
243
+ strategy: "exact_hash_dedupe",
244
+ inputCount: Object.keys(merged).length,
245
+ outputCount: Object.keys(merged).length,
246
+ });
247
+ telemetry.endSpan(exactDedupeSpan, {
248
+ status: "ok",
249
+ output: merged,
250
+ });
251
+ }
181
252
 
182
253
  const dedupePrompt = buildDeduplicationPrompt(schema, merged);
183
254
 
@@ -185,6 +256,16 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
185
256
  dedupeId: "parallel_auto_dedupe",
186
257
  itemCount: Object.keys(merged).length,
187
258
  });
259
+
260
+ // Create LLM dedupe span
261
+ const llmDedupeSpan = telemetry?.startSpan({
262
+ name: "struktur.llm_dedupe",
263
+ kind: "CHAIN",
264
+ parentSpan: strategySpan,
265
+ attributes: {
266
+ "dedupe.method": "llm",
267
+ },
268
+ });
188
269
 
189
270
  const dedupeResponse = await runWithRetries<{ keys: string[] }>({
190
271
  model: this.config.dedupeModel ?? this.config.model,
@@ -196,6 +277,8 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
196
277
  strict: this.config.strict,
197
278
  debug,
198
279
  callId: "parallel_auto_dedupe",
280
+ telemetry: telemetry ?? undefined,
281
+ parentSpan: llmDedupeSpan,
199
282
  });
200
283
 
201
284
  step += 1;
@@ -221,6 +304,27 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
221
304
  duplicatesFound: dedupeResponse.data.keys.length,
222
305
  itemsRemoved: dedupeResponse.data.keys.length,
223
306
  });
307
+
308
+ // End LLM dedupe span
309
+ if (llmDedupeSpan && telemetry) {
310
+ telemetry.recordEvent(llmDedupeSpan, {
311
+ type: "merge",
312
+ strategy: "llm_dedupe",
313
+ inputCount: Object.keys(merged).length,
314
+ outputCount: Object.keys(deduped).length,
315
+ deduped: dedupeResponse.data.keys.length,
316
+ });
317
+ telemetry.endSpan(llmDedupeSpan, {
318
+ status: "ok",
319
+ output: deduped,
320
+ });
321
+ }
322
+
323
+ // End strategy span
324
+ telemetry?.endSpan(strategySpan!, {
325
+ status: "ok",
326
+ output: deduped,
327
+ });
224
328
 
225
329
  return {
226
330
  data: deduped as T,
@@ -40,6 +40,20 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
40
40
 
41
41
  async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
42
42
  const debug = options.debug;
43
+ const { telemetry } = options;
44
+
45
+ // Create strategy-level span
46
+ const strategySpan = telemetry?.startSpan({
47
+ name: "strategy.parallel",
48
+ kind: "CHAIN",
49
+ attributes: {
50
+ "strategy.name": this.name,
51
+ "strategy.artifacts.count": options.artifacts.length,
52
+ "strategy.chunk_size": this.config.chunkSize,
53
+ "strategy.concurrency": this.config.concurrency,
54
+ },
55
+ });
56
+
43
57
  const batches = getBatches(
44
58
  options.artifacts,
45
59
  {
@@ -47,6 +61,8 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
47
61
  maxImages: this.config.maxImages,
48
62
  },
49
63
  debug,
64
+ telemetry ?? undefined,
65
+ strategySpan,
50
66
  );
51
67
 
52
68
  const schema = serializeSchema(options.schema);
@@ -83,6 +99,8 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
83
99
  strict: options.strict ?? this.config.strict,
84
100
  debug,
85
101
  callId: `parallel_batch_${index + 1}`,
102
+ telemetry: telemetry ?? undefined,
103
+ parentSpan: strategySpan,
86
104
  });
87
105
  // Emit progress after batch completes (if there are more batches)
88
106
  const completedIndex = index + 1;
@@ -113,6 +131,17 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
113
131
  inputCount: results.length,
114
132
  strategy: this.name,
115
133
  });
134
+
135
+ // Create merge span
136
+ const mergeSpan = telemetry?.startSpan({
137
+ name: "struktur.merge",
138
+ kind: "CHAIN",
139
+ parentSpan: strategySpan,
140
+ attributes: {
141
+ "merge.strategy": "parallel",
142
+ "merge.input_count": results.length,
143
+ },
144
+ });
116
145
 
117
146
  const mergePrompt = buildParallelMergerPrompt(
118
147
  schema,
@@ -129,6 +158,8 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
129
158
  strict: this.config.strict,
130
159
  debug,
131
160
  callId: "parallel_merge",
161
+ telemetry: telemetry ?? undefined,
162
+ parentSpan: mergeSpan,
132
163
  });
133
164
 
134
165
  step += 1;
@@ -144,6 +175,26 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
144
175
  strategy: this.name,
145
176
  });
146
177
  debug?.mergeComplete({ mergeId: "parallel_merge", success: true });
178
+
179
+ // End merge span
180
+ if (mergeSpan && telemetry) {
181
+ telemetry.recordEvent(mergeSpan, {
182
+ type: "merge",
183
+ strategy: "parallel",
184
+ inputCount: results.length,
185
+ outputCount: 1,
186
+ });
187
+ telemetry.endSpan(mergeSpan, {
188
+ status: "ok",
189
+ output: merged.data,
190
+ });
191
+ }
192
+
193
+ // End strategy span
194
+ telemetry?.endSpan(strategySpan!, {
195
+ status: "ok",
196
+ output: merged.data,
197
+ });
147
198
 
148
199
  return {
149
200
  data: merged.data,