npm - @struktur/sdk - Versions diffs - 2.0.0 → 2.1.0 - Mend

@struktur/sdk 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +2 -2
package/src/auth/config.ts +57 -0
package/src/extract.ts +55 -19
package/src/index.ts +13 -0
package/src/llm/LLMClient.ts +88 -7
package/src/llm/RetryingRunner.ts +83 -1
package/src/strategies/DoublePassAutoMergeStrategy.ts +140 -0
package/src/strategies/DoublePassStrategy.ts +87 -0
package/src/strategies/ParallelAutoMergeStrategy.ts +104 -0
package/src/strategies/ParallelStrategy.ts +51 -0
package/src/strategies/SequentialAutoMergeStrategy.ts +103 -0
package/src/strategies/SequentialStrategy.ts +23 -0
package/src/strategies/SimpleStrategy.ts +20 -0
package/src/strategies/utils.ts +42 -3
package/src/types.ts +66 -9

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@struktur/sdk",
-  "version": "2.0.0",
+  "version": "2.1.0",
   "license": "FSL-1.1-MIT",
   "type": "module",
   "main": "src/index.ts",
@@ -20,7 +20,7 @@
     "@ai-sdk/google": "^3.0.0",
     "@ai-sdk/openai": "^3.0.0",
     "@openrouter/ai-sdk-provider": "^2.0.0",
-    "@struktur/agent-strategy": "1.0.0",
+    "@struktur/agent-strategy": "2.1.0",
     "ai": "^6.0.97",
     "ajv": "^8.17.1",
     "ajv-formats": "^3.0.1",

package/src/auth/config.ts CHANGED Viewed

@@ -3,11 +3,24 @@ import os from "node:os";
 import { chmod, mkdir } from "node:fs/promises";
 import type { ParserDef, ParsersConfig } from "@struktur/sdk";
+type TelemetryConfig = {
+  enabled: boolean;
+  provider: string;
+  url?: string;
+  apiKey?: string;
+  projectName?: string;
+  publicKey?: string; // For Langfuse
+  secretKey?: string; // For Langfuse
+  baseUrl?: string; // For Langfuse
+  sampleRate?: number;
+};
 type ConfigStore = {
   version: 1;
   defaultModel?: string;
   aliases?: Record<string, string>;
   parsers?: ParsersConfig;
+  telemetry?: TelemetryConfig;
 };
 const CONFIG_DIR_ENV = "STRUKTUR_CONFIG_DIR";
@@ -127,3 +140,47 @@ export const deleteParser = async (mimeType: string): Promise<boolean> => {
   await writeConfigStore(store);
   return true;
 };
+// --- Telemetry config management ---
+export const getTelemetryConfig = async (): Promise<TelemetryConfig | undefined> => {
+  const store = await readConfigStore();
+  return store.telemetry;
+};
+export const setTelemetryConfig = async (config: TelemetryConfig): Promise<void> => {
+  const store = await readConfigStore();
+  store.telemetry = config;
+  await writeConfigStore(store);
+};
+export const enableTelemetry = async (
+  provider: string,
+  options: Omit<TelemetryConfig, "enabled" | "provider">
+): Promise<void> => {
+  const store = await readConfigStore();
+  store.telemetry = {
+    enabled: true,
+    provider,
+    ...options,
+  };
+  await writeConfigStore(store);
+};
+export const disableTelemetry = async (): Promise<void> => {
+  const store = await readConfigStore();
+  if (store.telemetry) {
+    store.telemetry.enabled = false;
+  }
+  await writeConfigStore(store);
+};
+export const deleteTelemetryConfig = async (): Promise<boolean> => {
+  const store = await readConfigStore();
+  if (!store.telemetry) {
+    return false;
+  }
+  delete store.telemetry;
+  await writeConfigStore(store);
+  return true;
+};

package/src/extract.ts CHANGED Viewed

@@ -34,29 +34,52 @@ export const extract = async <T>(
   options: ExtractionOptions<T>,
 ): Promise<ExtractionResult<T>> => {
   const debug = options.debug;
+  const telemetry = options.telemetry;
-  // Validate mutual exclusion and resolve the concrete schema early so that
-  // every strategy receives a fully-populated options object.
-  let resolvedOptions: ExtractionOptions<T>;
-  try {
-    const schema = resolveSchema(options);
-    resolvedOptions = { ...options, schema };
-  } catch (error) {
-    debug?.extractionComplete({
-      success: false,
-      totalInputTokens: 0,
-      totalOutputTokens: 0,
-      totalTokens: 0,
-      error: (error as Error).message,
-    });
-    return {
-      data: null as unknown as T,
-      usage: emptyUsage,
-      error: error as Error,
-    };
+  // Initialize telemetry if provided
+  if (telemetry) {
+    await telemetry.initialize();
   }
+  // Start root extraction span
+  const rootSpan = telemetry?.startSpan({
+    name: "struktur.extract",
+    kind: "CHAIN",
+    attributes: {
+      "extraction.strategy": options.strategy?.name ?? "default",
+      "extraction.artifacts.count": options.artifacts.length,
+    },
+  });
   try {
+    // Validate mutual exclusion and resolve the concrete schema early so that
+    // every strategy receives a fully-populated options object.
+    let resolvedOptions: ExtractionOptions<T>;
+    try {
+      const schema = resolveSchema(options);
+      resolvedOptions = { ...options, schema };
+    } catch (error) {
+      debug?.extractionComplete({
+        success: false,
+        totalInputTokens: 0,
+        totalOutputTokens: 0,
+        totalTokens: 0,
+        error: (error as Error).message,
+      });
+      telemetry?.endSpan(rootSpan!, {
+        status: "error",
+        error: error as Error,
+      });
+      await telemetry?.shutdown();
+      return {
+        data: null as unknown as T,
+        usage: emptyUsage,
+        error: error as Error,
+      };
+    }
     const total = resolvedOptions.strategy.getEstimatedSteps?.(resolvedOptions.artifacts);
     debug?.strategyRunStart({
@@ -95,6 +118,13 @@ export const extract = async <T>(
       error: result.error?.message,
     });
+    telemetry?.endSpan(rootSpan!, {
+      status: result.error ? "error" : "ok",
+      output: result.data,
+      error: result.error,
+    });
+    await telemetry?.shutdown();
     return result;
   } catch (error) {
     debug?.extractionComplete({
@@ -105,6 +135,12 @@ export const extract = async <T>(
       error: (error as Error).message,
     });
+    telemetry?.endSpan(rootSpan!, {
+      status: "error",
+      error: error as Error,
+    });
+    await telemetry?.shutdown();
     return {
       data: null as unknown as T,
       usage: emptyUsage,

package/src/index.ts CHANGED Viewed

@@ -10,6 +10,14 @@ export type {
   Usage,
   AnyJSONSchema,
   TypedJSONSchema,
+  // Agent event types
+  AgentEvents,
+  AgentToolStartInfo,
+  AgentToolEndInfo,
+  AgentMessageInfo,
+  AgentReasoningInfo,
+  // Telemetry
+  TelemetryAdapter,
 } from "./types";
 export { extract } from "./extract";
@@ -83,6 +91,11 @@ export {
   getParser,
   setParser,
   deleteParser,
+  getTelemetryConfig,
+  setTelemetryConfig,
+  enableTelemetry,
+  disableTelemetry,
+  deleteTelemetryConfig,
 } from "./auth/config";
 export {
   listStoredProviders,

package/src/llm/LLMClient.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { generateText, Output, jsonSchema, type ModelMessage } from "ai";
-import type { AnyJSONSchema, Usage } from "../types";
+import type { AnyJSONSchema, Usage, TelemetryAdapter } from "../types";
 import type { UserContent } from "./message";
 type GenerateTextParams = Parameters<typeof generateText>[0];
@@ -15,6 +15,14 @@ export type StructuredRequest<T> = {
   schemaName?: string;
   schemaDescription?: string;
   strict?: boolean;
+  /**
+   * Telemetry adapter for tracing LLM calls
+   */
+  telemetry?: TelemetryAdapter;
+  /**
+   * Parent span for creating hierarchical traces
+   */
+  parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
 };
 export type StructuredResponse<T> = {
@@ -36,6 +44,21 @@ const isZodSchema = (
 export const generateStructured = async <T>(
   request: StructuredRequest<T>,
 ): Promise<StructuredResponse<T>> => {
+  const { telemetry, parentSpan } = request;
+  // Start LLM span if telemetry is enabled
+  const llmSpan = telemetry?.startSpan({
+    name: "llm.generateStructured",
+    kind: "LLM",
+    parentSpan,
+    attributes: {
+      "llm.schema_name": request.schemaName ?? "extract",
+      "llm.strict": request.strict ?? false,
+    },
+  });
+  const startTime = Date.now();
   const schema = isZodSchema(request.schema)
     ? request.schema
     : jsonSchema(request.schema as AnyJSONSchema);
@@ -84,6 +107,13 @@ export const generateStructured = async <T>(
       ...(providerOptions ? { providerOptions } : {}),
     });
   } catch (error) {
+    // Determine model ID for error messages
+    const modelId =
+      typeof request.model === "object" && request.model !== null
+        ? (request.model as { modelId?: string }).modelId ??
+          JSON.stringify(request.model)
+        : String(request.model);
     if (
       error &&
       typeof error === "object" &&
@@ -101,12 +131,6 @@ export const generateStructured = async <T>(
         };
       };
-      const modelId =
-        typeof request.model === "object" && request.model !== null
-          ? (request.model as { modelId?: string }).modelId ??
-            JSON.stringify(request.model)
-          : String(request.model);
       const responseBody = apiError.responseBody;
       const errorData = apiError.data;
@@ -156,6 +180,30 @@ export const generateStructured = async <T>(
         );
       }
     }
+    // Record error in telemetry
+    if (llmSpan && telemetry) {
+      const latencyMs = Date.now() - startTime;
+      telemetry.recordEvent(llmSpan, {
+        type: "llm_call",
+        model: modelId,
+        provider: "unknown", // Will be determined by the model
+        input: {
+          messages: request.messages ?? [{ role: "user", content: typeof request.user === "string" ? request.user : "" }],
+          temperature: undefined,
+          maxTokens: undefined,
+          schema: request.schema,
+        },
+        error: error instanceof Error ? error : new Error(String(error)),
+        latencyMs,
+      });
+      telemetry.endSpan(llmSpan, {
+        status: "error",
+        error: error instanceof Error ? error : new Error(String(error)),
+        latencyMs,
+      });
+    }
     throw error;
   }
@@ -179,5 +227,38 @@ export const generateStructured = async <T>(
     totalTokens,
   };
+  // Record successful LLM call in telemetry
+  if (llmSpan && telemetry) {
+    const latencyMs = Date.now() - startTime;
+    telemetry.recordEvent(llmSpan, {
+      type: "llm_call",
+      model: typeof request.model === "object" && request.model !== null
+        ? (request.model as { modelId?: string }).modelId ?? "unknown"
+        : String(request.model),
+      provider: preferredProvider ?? "unknown",
+      input: {
+        messages: request.messages ?? [{ role: "user", content: typeof request.user === "string" ? request.user : "" }],
+        temperature: undefined,
+        maxTokens: undefined,
+        schema: request.schema,
+      },
+      output: {
+        content: JSON.stringify(result.output),
+        structured: true,
+        usage: {
+          input: inputTokens,
+          output: outputTokens,
+          total: totalTokens,
+        },
+      },
+      latencyMs,
+    });
+    telemetry.endSpan(llmSpan, {
+      status: "ok",
+      output: result.output,
+      latencyMs,
+    });
+  }
   return { data: result.output as T, usage };
 };

package/src/llm/RetryingRunner.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import {
   validateAllowingMissingRequired,
 } from "../validation/validator";
 import type { ModelMessage } from "ai";
-import type { ExtractionEvents, Usage } from "../types";
+import type { ExtractionEvents, Usage, TelemetryAdapter } from "../types";
 import type { DebugLogger } from "../debug/logger";
 import { generateStructured } from "./LLMClient";
 import type { UserContent } from "./message";
@@ -22,9 +22,30 @@ export type RetryOptions<T> = {
   strict?: boolean;
   debug?: DebugLogger;
   callId?: string;
+  /**
+   * Telemetry adapter for tracing validation and retries
+   */
+  telemetry?: TelemetryAdapter;
+  /**
+   * Parent span for creating hierarchical traces
+   */
+  parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
 };
 export const runWithRetries = async <T>(options: RetryOptions<T>) => {
+  const { telemetry, parentSpan } = options;
+  // Start validation/retry span if telemetry is enabled
+  const retrySpan = telemetry?.startSpan({
+    name: "struktur.validation_retry",
+    kind: "CHAIN",
+    parentSpan,
+    attributes: {
+      "retry.max_attempts": options.maxAttempts ?? 3,
+      "retry.schema_name": options.schemaName ?? "extract",
+    },
+  });
   const ajv = createAjv();
   const maxAttempts = options.maxAttempts ?? 3;
   const messages: ModelMessage[] = [{ role: "user", content: options.user }];
@@ -76,6 +97,8 @@ export const runWithRetries = async <T>(options: RetryOptions<T>) => {
       user: options.user,
       messages,
       strict: options.strict,
+      telemetry,
+      parentSpan: retrySpan,
     });
     const durationMs = Date.now() - startTime;
@@ -105,6 +128,24 @@ export const runWithRetries = async <T>(options: RetryOptions<T>) => {
           durationMs,
         });
+        // Record successful validation
+        if (retrySpan && telemetry) {
+          telemetry.recordEvent(retrySpan, {
+            type: "validation",
+            attempt,
+            maxAttempts,
+            schema: options.schema,
+            input: result.data,
+            success: true,
+            latencyMs: durationMs,
+          });
+          telemetry.endSpan(retrySpan, {
+            status: "ok",
+            output: validated,
+            latencyMs: durationMs,
+          });
+        }
         return { data: validated, usage };
       } else {
         const validationResult = validateAllowingMissingRequired<T>(
@@ -125,6 +166,24 @@ export const runWithRetries = async <T>(options: RetryOptions<T>) => {
             durationMs,
           });
+          // Record successful validation
+          if (retrySpan && telemetry) {
+            telemetry.recordEvent(retrySpan, {
+              type: "validation",
+              attempt,
+              maxAttempts,
+              schema: options.schema,
+              input: result.data,
+              success: true,
+              latencyMs: durationMs,
+            });
+            telemetry.endSpan(retrySpan, {
+              status: "ok",
+              output: validationResult.data,
+              latencyMs: durationMs,
+            });
+          }
           return { data: validationResult.data, usage };
         }
@@ -143,6 +202,20 @@ export const runWithRetries = async <T>(options: RetryOptions<T>) => {
           errors: error.errors,
         });
+        // Record failed validation
+        if (retrySpan && telemetry) {
+          telemetry.recordEvent(retrySpan, {
+            type: "validation",
+            attempt,
+            maxAttempts,
+            schema: options.schema,
+            input: result.data,
+            success: false,
+            errors: error.errors,
+            latencyMs: durationMs,
+          });
+        }
         // Emit retry event before attempting retry
         const nextAttempt = attempt + 1;
         if (nextAttempt <= maxAttempts) {
@@ -180,6 +253,15 @@ export const runWithRetries = async <T>(options: RetryOptions<T>) => {
         error: (error as Error).message,
       });
+      // Record error in telemetry
+      if (retrySpan && telemetry) {
+        telemetry.endSpan(retrySpan, {
+          status: "error",
+          error: error as Error,
+          latencyMs: durationMs,
+        });
+      }
       break;
     }
   }

package/src/strategies/DoublePassAutoMergeStrategy.ts CHANGED Viewed

@@ -84,6 +84,20 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.double-pass-auto-merge",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+        "strategy.concurrency": this.config.concurrency,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -91,11 +105,24 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
     const totalSteps = this.getEstimatedSteps(options.artifacts);
     let step = 1;
+    // Create pass 1 span
+    const pass1Span = telemetry?.startSpan({
+      name: "struktur.pass_1",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "pass.number": 1,
+        "pass.type": "parallel_extraction",
+      },
+    });
     const tasks = batches.map((batch, index) => async () => {
       const prompt = buildExtractorPrompt(
@@ -114,6 +141,8 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `double_pass_auto_1_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: pass1Span,
       });
       step += 1;
       await options.events?.onStep?.({
@@ -145,6 +174,17 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       inputCount: results.length,
       strategy: this.name,
     });
+    // Create smart merge span
+    const mergeSpan = telemetry?.startSpan({
+      name: "struktur.smart_merge",
+      kind: "CHAIN",
+      parentSpan: pass1Span,
+      attributes: {
+        "merge.strategy": "smart",
+        "merge.input_count": results.length,
+      },
+    });
     for (let i = 0; i < results.length; i++) {
       const result = results[i]!;
@@ -168,12 +208,54 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
           leftCount: leftArray,
           rightCount: rightArray,
         });
+        // Record merge event in telemetry
+        if (mergeSpan && telemetry) {
+          telemetry.recordEvent(mergeSpan, {
+            type: "merge",
+            strategy: "smart",
+            inputCount: rightArray ?? 1,
+            outputCount: leftArray ?? 1,
+          });
+        }
       }
     }
     debug?.mergeComplete({ mergeId: "double_pass_auto_merge", success: true });
+    // End merge span
+    if (mergeSpan && telemetry) {
+      telemetry.endSpan(mergeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     merged = dedupeArrays(merged);
+    // Create exact dedupe span
+    const exactDedupeSpan = telemetry?.startSpan({
+      name: "struktur.exact_dedupe",
+      kind: "CHAIN",
+      parentSpan: pass1Span,
+      attributes: {
+        "dedupe.method": "exact_hashing",
+      },
+    });
+    // End exact dedupe span
+    if (exactDedupeSpan && telemetry) {
+      telemetry.recordEvent(exactDedupeSpan, {
+        type: "merge",
+        strategy: "exact_hash_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(merged).length,
+      });
+      telemetry.endSpan(exactDedupeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     const dedupePrompt = buildDeduplicationPrompt(schema, merged);
@@ -181,6 +263,16 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       dedupeId: "double_pass_auto_dedupe",
       itemCount: Object.keys(merged).length,
     });
+    // Create LLM dedupe span
+    const llmDedupeSpan = telemetry?.startSpan({
+      name: "struktur.llm_dedupe",
+      kind: "CHAIN",
+      parentSpan: pass1Span,
+      attributes: {
+        "dedupe.method": "llm",
+      },
+    });
     const dedupeResponse = await runWithRetries<{ keys: string[] }>({
       model: this.config.dedupeModel ?? this.config.model,
@@ -192,6 +284,8 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       strict: this.config.strict,
       debug,
       callId: "double_pass_auto_dedupe",
+      telemetry: telemetry ?? undefined,
+      parentSpan: llmDedupeSpan,
     });
     step += 1;
@@ -217,9 +311,41 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       duplicatesFound: dedupeResponse.data.keys.length,
       itemsRemoved: dedupeResponse.data.keys.length,
     });
+    // End LLM dedupe span
+    if (llmDedupeSpan && telemetry) {
+      telemetry.recordEvent(llmDedupeSpan, {
+        type: "merge",
+        strategy: "llm_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(deduped).length,
+        deduped: dedupeResponse.data.keys.length,
+      });
+      telemetry.endSpan(llmDedupeSpan, {
+        status: "ok",
+        output: deduped,
+      });
+    }
+    // End pass 1 span
+    telemetry?.endSpan(pass1Span!, {
+      status: "ok",
+      output: deduped,
+    });
     let currentData = deduped as T;
     const usages = [...results.map((r) => r.usage), dedupeResponse.usage];
+    // Create pass 2 span
+    const pass2Span = telemetry?.startSpan({
+      name: "struktur.pass_2",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "pass.number": 2,
+        "pass.type": "sequential_refinement",
+      },
+    });
     for (const [index, batch] of batches.entries()) {
       const prompt = buildSequentialPrompt(
@@ -240,6 +366,8 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         strict: this.config.strict,
         debug,
         callId: `double_pass_auto_2_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: pass2Span,
       });
       currentData = result.data;
@@ -258,6 +386,18 @@ export class DoublePassAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         strategy: this.name,
       });
     }
+    // End pass 2 span
+    telemetry?.endSpan(pass2Span!, {
+      status: "ok",
+      output: currentData,
+    });
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: currentData,
+    });
     return { data: currentData, usage: mergeUsage(usages) };
   }