npm - @struktur/sdk - Versions diffs - 2.0.0 → 2.1.0 - Mend

@struktur/sdk 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +2 -2
package/src/auth/config.ts +57 -0
package/src/extract.ts +55 -19
package/src/index.ts +13 -0
package/src/llm/LLMClient.ts +88 -7
package/src/llm/RetryingRunner.ts +83 -1
package/src/strategies/DoublePassAutoMergeStrategy.ts +140 -0
package/src/strategies/DoublePassStrategy.ts +87 -0
package/src/strategies/ParallelAutoMergeStrategy.ts +104 -0
package/src/strategies/ParallelStrategy.ts +51 -0
package/src/strategies/SequentialAutoMergeStrategy.ts +103 -0
package/src/strategies/SequentialStrategy.ts +23 -0
package/src/strategies/SimpleStrategy.ts +20 -0
package/src/strategies/utils.ts +42 -3
package/src/types.ts +66 -9

package/src/strategies/DoublePassStrategy.ts CHANGED Viewed

@@ -41,6 +41,20 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.double-pass",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+        "strategy.concurrency": this.config.concurrency,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -48,11 +62,24 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
     const totalSteps = this.getEstimatedSteps(options.artifacts);
     let step = 1;
+    // Create pass 1 span
+    const pass1Span = telemetry?.startSpan({
+      name: "struktur.pass_1",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "pass.number": 1,
+        "pass.type": "parallel_extraction",
+      },
+    });
     const tasks = batches.map((batch, index) => async () => {
       const prompt = buildExtractorPrompt(
@@ -71,6 +98,8 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `double_pass_1_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: pass1Span,
       });
       step += 1;
       await options.events?.onStep?.({
@@ -97,6 +126,17 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
       inputCount: results.length,
       strategy: this.name,
     });
+    // Create pass 1 merge span
+    const pass1MergeSpan = telemetry?.startSpan({
+      name: "struktur.pass_1_merge",
+      kind: "CHAIN",
+      parentSpan: pass1Span,
+      attributes: {
+        "merge.strategy": "parallel",
+        "merge.input_count": results.length,
+      },
+    });
     const mergePrompt = buildParallelMergerPrompt(
       schema,
@@ -113,6 +153,8 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
       strict: this.config.strict,
       debug,
       callId: "double_pass_1_merge",
+      telemetry: telemetry ?? undefined,
+      parentSpan: pass1MergeSpan,
     });
     step += 1;
@@ -128,6 +170,37 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
       strategy: this.name,
     });
     debug?.mergeComplete({ mergeId: "double_pass_1_merge", success: true });
+    // End pass 1 merge span
+    if (pass1MergeSpan && telemetry) {
+      telemetry.recordEvent(pass1MergeSpan, {
+        type: "merge",
+        strategy: "parallel",
+        inputCount: results.length,
+        outputCount: 1,
+      });
+      telemetry.endSpan(pass1MergeSpan, {
+        status: "ok",
+        output: merged.data,
+      });
+    }
+    // End pass 1 span
+    telemetry?.endSpan(pass1Span!, {
+      status: "ok",
+      output: merged.data,
+    });
+    // Create pass 2 span
+    const pass2Span = telemetry?.startSpan({
+      name: "struktur.pass_2",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "pass.number": 2,
+        "pass.type": "sequential_refinement",
+      },
+    });
     let currentData = merged.data;
     const usages = [...results.map((r) => r.usage), merged.usage];
@@ -151,6 +224,8 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
         strict: this.config.strict,
         debug,
         callId: `double_pass_2_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: pass2Span,
       });
       currentData = result.data;
@@ -169,6 +244,18 @@ export class DoublePassStrategy<T> implements ExtractionStrategy<T> {
         strategy: this.name,
       });
     }
+    // End pass 2 span
+    telemetry?.endSpan(pass2Span!, {
+      status: "ok",
+      output: currentData,
+    });
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: currentData,
+    });
     return { data: currentData, usage: mergeUsage(usages) };
   }

package/src/strategies/ParallelAutoMergeStrategy.ts CHANGED Viewed

@@ -83,6 +83,20 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.parallel-auto-merge",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+        "strategy.concurrency": this.config.concurrency,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -90,6 +104,8 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
@@ -113,6 +129,8 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `parallel_auto_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: strategySpan,
       });
       step += 1;
       await options.events?.onStep?.({
@@ -144,6 +162,17 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       inputCount: results.length,
       strategy: this.name,
     });
+    // Create smart merge span
+    const mergeSpan = telemetry?.startSpan({
+      name: "struktur.smart_merge",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "merge.strategy": "smart",
+        "merge.input_count": results.length,
+      },
+    });
     for (let i = 0; i < results.length; i++) {
       const result = results[i]!;
@@ -169,6 +198,16 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
           leftCount: leftArray,
           rightCount: rightArray,
         });
+        // Record merge event in telemetry
+        if (mergeSpan && telemetry) {
+          telemetry.recordEvent(mergeSpan, {
+            type: "merge",
+            strategy: "smart",
+            inputCount: rightArray ?? 1,
+            outputCount: leftArray ?? 1,
+          });
+        }
       }
     }
@@ -176,8 +215,40 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       mergeId: "parallel_auto_smart_merge",
       success: true,
     });
+    // End merge span
+    if (mergeSpan && telemetry) {
+      telemetry.endSpan(mergeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     merged = dedupeArrays(merged);
+    // Create exact dedupe span
+    const exactDedupeSpan = telemetry?.startSpan({
+      name: "struktur.exact_dedupe",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "dedupe.method": "exact_hashing",
+      },
+    });
+    // End exact dedupe span
+    if (exactDedupeSpan && telemetry) {
+      telemetry.recordEvent(exactDedupeSpan, {
+        type: "merge",
+        strategy: "exact_hash_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(merged).length,
+      });
+      telemetry.endSpan(exactDedupeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     const dedupePrompt = buildDeduplicationPrompt(schema, merged);
@@ -185,6 +256,16 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       dedupeId: "parallel_auto_dedupe",
       itemCount: Object.keys(merged).length,
     });
+    // Create LLM dedupe span
+    const llmDedupeSpan = telemetry?.startSpan({
+      name: "struktur.llm_dedupe",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "dedupe.method": "llm",
+      },
+    });
     const dedupeResponse = await runWithRetries<{ keys: string[] }>({
       model: this.config.dedupeModel ?? this.config.model,
@@ -196,6 +277,8 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       strict: this.config.strict,
       debug,
       callId: "parallel_auto_dedupe",
+      telemetry: telemetry ?? undefined,
+      parentSpan: llmDedupeSpan,
     });
     step += 1;
@@ -221,6 +304,27 @@ export class ParallelAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       duplicatesFound: dedupeResponse.data.keys.length,
       itemsRemoved: dedupeResponse.data.keys.length,
     });
+    // End LLM dedupe span
+    if (llmDedupeSpan && telemetry) {
+      telemetry.recordEvent(llmDedupeSpan, {
+        type: "merge",
+        strategy: "llm_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(deduped).length,
+        deduped: dedupeResponse.data.keys.length,
+      });
+      telemetry.endSpan(llmDedupeSpan, {
+        status: "ok",
+        output: deduped,
+      });
+    }
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: deduped,
+    });
     return {
       data: deduped as T,

package/src/strategies/ParallelStrategy.ts CHANGED Viewed

@@ -40,6 +40,20 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.parallel",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+        "strategy.concurrency": this.config.concurrency,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -47,6 +61,8 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
@@ -83,6 +99,8 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `parallel_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: strategySpan,
       });
       // Emit progress after batch completes (if there are more batches)
       const completedIndex = index + 1;
@@ -113,6 +131,17 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
       inputCount: results.length,
       strategy: this.name,
     });
+    // Create merge span
+    const mergeSpan = telemetry?.startSpan({
+      name: "struktur.merge",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "merge.strategy": "parallel",
+        "merge.input_count": results.length,
+      },
+    });
     const mergePrompt = buildParallelMergerPrompt(
       schema,
@@ -129,6 +158,8 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
       strict: this.config.strict,
       debug,
       callId: "parallel_merge",
+      telemetry: telemetry ?? undefined,
+      parentSpan: mergeSpan,
     });
     step += 1;
@@ -144,6 +175,26 @@ export class ParallelStrategy<T> implements ExtractionStrategy<T> {
       strategy: this.name,
     });
     debug?.mergeComplete({ mergeId: "parallel_merge", success: true });
+    // End merge span
+    if (mergeSpan && telemetry) {
+      telemetry.recordEvent(mergeSpan, {
+        type: "merge",
+        strategy: "parallel",
+        inputCount: results.length,
+        outputCount: 1,
+      });
+      telemetry.endSpan(mergeSpan, {
+        status: "ok",
+        output: merged.data,
+      });
+    }
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: merged.data,
+    });
     return {
       data: merged.data,

package/src/strategies/SequentialAutoMergeStrategy.ts CHANGED Viewed

@@ -81,6 +81,19 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.sequential-auto-merge",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -88,6 +101,8 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
@@ -104,6 +119,17 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       inputCount: batches.length,
       strategy: this.name,
     });
+    // Create smart merge span
+    const mergeSpan = telemetry?.startSpan({
+      name: "struktur.smart_merge",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "merge.strategy": "smart",
+        "merge.input_count": batches.length,
+      },
+    });
     for (const [index, batch] of batches.entries()) {
       const prompt = buildExtractorPrompt(
@@ -122,6 +148,8 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `sequential_auto_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: mergeSpan,
       });
       merged = merger.merge(merged, result.data as Record<string, unknown>);
@@ -145,6 +173,16 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
           leftCount: leftArray,
           rightCount: rightArray,
         });
+        // Record merge event in telemetry
+        if (mergeSpan && telemetry) {
+          telemetry.recordEvent(mergeSpan, {
+            type: "merge",
+            strategy: "smart",
+            inputCount: rightArray ?? 1,
+            outputCount: leftArray ?? 1,
+          });
+        }
       }
       step += 1;
@@ -162,8 +200,40 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
     }
     debug?.mergeComplete({ mergeId: "sequential_auto_merge", success: true });
+    // End merge span
+    if (mergeSpan && telemetry) {
+      telemetry.endSpan(mergeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     merged = dedupeArrays(merged);
+    // Create exact dedupe span
+    const exactDedupeSpan = telemetry?.startSpan({
+      name: "struktur.exact_dedupe",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "dedupe.method": "exact_hashing",
+      },
+    });
+    // End exact dedupe span
+    if (exactDedupeSpan && telemetry) {
+      telemetry.recordEvent(exactDedupeSpan, {
+        type: "merge",
+        strategy: "exact_hash_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(merged).length,
+      });
+      telemetry.endSpan(exactDedupeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     const dedupePrompt = buildDeduplicationPrompt(schema, merged);
@@ -171,6 +241,16 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       dedupeId: "sequential_auto_dedupe",
       itemCount: Object.keys(merged).length,
     });
+    // Create LLM dedupe span
+    const llmDedupeSpan = telemetry?.startSpan({
+      name: "struktur.llm_dedupe",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "dedupe.method": "llm",
+      },
+    });
     const dedupeResponse = await runWithRetries<{ keys: string[] }>({
       model: this.config.dedupeModel ?? this.config.model,
@@ -182,6 +262,8 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       strict: this.config.strict,
       debug,
       callId: "sequential_auto_dedupe",
+      telemetry: telemetry ?? undefined,
+      parentSpan: llmDedupeSpan,
     });
     step += 1;
@@ -207,6 +289,27 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       duplicatesFound: dedupeResponse.data.keys.length,
       itemsRemoved: dedupeResponse.data.keys.length,
     });
+    // End LLM dedupe span
+    if (llmDedupeSpan && telemetry) {
+      telemetry.recordEvent(llmDedupeSpan, {
+        type: "merge",
+        strategy: "llm_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(deduped).length,
+        deduped: dedupeResponse.data.keys.length,
+      });
+      telemetry.endSpan(llmDedupeSpan, {
+        status: "ok",
+        output: deduped,
+      });
+    }
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: deduped,
+    });
     return {
       data: deduped as T,

package/src/strategies/SequentialStrategy.ts CHANGED Viewed

@@ -36,6 +36,19 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.sequential",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -43,6 +56,8 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
@@ -84,6 +99,8 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `sequential_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: strategySpan,
       });
       currentData = result.data;
@@ -110,6 +127,12 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
       throw new Error("No data extracted from sequential strategy");
     }
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: currentData,
+    });
     return { data: currentData, usage: mergeUsage(usages) };
   }
 }

package/src/strategies/SimpleStrategy.ts CHANGED Viewed

@@ -25,6 +25,18 @@ export class SimpleStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.simple",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+      },
+    });
     const schema = serializeSchema(options.schema);
     const { system, user } = buildExtractorPrompt(
       options.artifacts,
@@ -56,6 +68,8 @@ export class SimpleStrategy<T> implements ExtractionStrategy<T> {
       strict: options.strict ?? this.config.strict,
       debug,
       callId: "simple_extract",
+      telemetry,
+      parentSpan: strategySpan,
     });
     debug?.step({
@@ -65,6 +79,12 @@ export class SimpleStrategy<T> implements ExtractionStrategy<T> {
       strategy: this.name,
     });
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: result.data,
+    });
     return { data: result.data, usage: result.usage };
   }
 }