npm - @struktur/sdk - Versions diffs - 1.2.1 → 2.1.0 - Mend

@struktur/sdk 1.2.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/LICENSE +110 -0
package/README.md +7 -3
package/package.json +3 -1
package/src/agent-cli-integration.test.ts +47 -0
package/src/agent-export.test.ts +17 -0
package/src/agent-tool-labels.test.ts +50 -0
package/src/artifacts/AGENTS.md +1 -1
package/src/auth/config.ts +57 -0
package/src/extract.ts +55 -19
package/src/index.ts +17 -0
package/src/llm/LLMClient.test.ts +198 -0
package/src/llm/LLMClient.ts +178 -20
package/src/llm/RetryingRunner.ts +83 -1
package/src/llm/resolveModel.ts +86 -0
package/src/strategies/DoublePassAutoMergeStrategy.ts +140 -0
package/src/strategies/DoublePassStrategy.ts +87 -0
package/src/strategies/ParallelAutoMergeStrategy.ts +104 -0
package/src/strategies/ParallelStrategy.ts +51 -0
package/src/strategies/SequentialAutoMergeStrategy.ts +103 -0
package/src/strategies/SequentialStrategy.ts +23 -0
package/src/strategies/SimpleStrategy.ts +20 -0
package/src/strategies/utils.ts +42 -3
package/src/types.ts +67 -9
package/src/validation/AGENTS.md +3 -2
package/src/validation/validator.test.ts +32 -0
package/src/validation/validator.ts +8 -0

package/src/strategies/SequentialAutoMergeStrategy.ts CHANGED Viewed

@@ -81,6 +81,19 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.sequential-auto-merge",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -88,6 +101,8 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
@@ -104,6 +119,17 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       inputCount: batches.length,
       strategy: this.name,
     });
+    // Create smart merge span
+    const mergeSpan = telemetry?.startSpan({
+      name: "struktur.smart_merge",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "merge.strategy": "smart",
+        "merge.input_count": batches.length,
+      },
+    });
     for (const [index, batch] of batches.entries()) {
       const prompt = buildExtractorPrompt(
@@ -122,6 +148,8 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `sequential_auto_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: mergeSpan,
       });
       merged = merger.merge(merged, result.data as Record<string, unknown>);
@@ -145,6 +173,16 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
           leftCount: leftArray,
           rightCount: rightArray,
         });
+        // Record merge event in telemetry
+        if (mergeSpan && telemetry) {
+          telemetry.recordEvent(mergeSpan, {
+            type: "merge",
+            strategy: "smart",
+            inputCount: rightArray ?? 1,
+            outputCount: leftArray ?? 1,
+          });
+        }
       }
       step += 1;
@@ -162,8 +200,40 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
     }
     debug?.mergeComplete({ mergeId: "sequential_auto_merge", success: true });
+    // End merge span
+    if (mergeSpan && telemetry) {
+      telemetry.endSpan(mergeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     merged = dedupeArrays(merged);
+    // Create exact dedupe span
+    const exactDedupeSpan = telemetry?.startSpan({
+      name: "struktur.exact_dedupe",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "dedupe.method": "exact_hashing",
+      },
+    });
+    // End exact dedupe span
+    if (exactDedupeSpan && telemetry) {
+      telemetry.recordEvent(exactDedupeSpan, {
+        type: "merge",
+        strategy: "exact_hash_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(merged).length,
+      });
+      telemetry.endSpan(exactDedupeSpan, {
+        status: "ok",
+        output: merged,
+      });
+    }
     const dedupePrompt = buildDeduplicationPrompt(schema, merged);
@@ -171,6 +241,16 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       dedupeId: "sequential_auto_dedupe",
       itemCount: Object.keys(merged).length,
     });
+    // Create LLM dedupe span
+    const llmDedupeSpan = telemetry?.startSpan({
+      name: "struktur.llm_dedupe",
+      kind: "CHAIN",
+      parentSpan: strategySpan,
+      attributes: {
+        "dedupe.method": "llm",
+      },
+    });
     const dedupeResponse = await runWithRetries<{ keys: string[] }>({
       model: this.config.dedupeModel ?? this.config.model,
@@ -182,6 +262,8 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       strict: this.config.strict,
       debug,
       callId: "sequential_auto_dedupe",
+      telemetry: telemetry ?? undefined,
+      parentSpan: llmDedupeSpan,
     });
     step += 1;
@@ -207,6 +289,27 @@ export class SequentialAutoMergeStrategy<T> implements ExtractionStrategy<T> {
       duplicatesFound: dedupeResponse.data.keys.length,
       itemsRemoved: dedupeResponse.data.keys.length,
     });
+    // End LLM dedupe span
+    if (llmDedupeSpan && telemetry) {
+      telemetry.recordEvent(llmDedupeSpan, {
+        type: "merge",
+        strategy: "llm_dedupe",
+        inputCount: Object.keys(merged).length,
+        outputCount: Object.keys(deduped).length,
+        deduped: dedupeResponse.data.keys.length,
+      });
+      telemetry.endSpan(llmDedupeSpan, {
+        status: "ok",
+        output: deduped,
+      });
+    }
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: deduped,
+    });
     return {
       data: deduped as T,

package/src/strategies/SequentialStrategy.ts CHANGED Viewed

@@ -36,6 +36,19 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.sequential",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+        "strategy.chunk_size": this.config.chunkSize,
+      },
+    });
     const batches = getBatches(
       options.artifacts,
       {
@@ -43,6 +56,8 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
         maxImages: this.config.maxImages,
       },
       debug,
+      telemetry ?? undefined,
+      strategySpan,
     );
     const schema = serializeSchema(options.schema);
@@ -84,6 +99,8 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
         strict: options.strict ?? this.config.strict,
         debug,
         callId: `sequential_batch_${index + 1}`,
+        telemetry: telemetry ?? undefined,
+        parentSpan: strategySpan,
       });
       currentData = result.data;
@@ -110,6 +127,12 @@ export class SequentialStrategy<T> implements ExtractionStrategy<T> {
       throw new Error("No data extracted from sequential strategy");
     }
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: currentData,
+    });
     return { data: currentData, usage: mergeUsage(usages) };
   }
 }

package/src/strategies/SimpleStrategy.ts CHANGED Viewed

@@ -25,6 +25,18 @@ export class SimpleStrategy<T> implements ExtractionStrategy<T> {
   async run(options: ExtractionOptions<T>): Promise<ExtractionResult<T>> {
     const debug = options.debug;
+    const { telemetry } = options;
+    // Create strategy-level span
+    const strategySpan = telemetry?.startSpan({
+      name: "strategy.simple",
+      kind: "CHAIN",
+      attributes: {
+        "strategy.name": this.name,
+        "strategy.artifacts.count": options.artifacts.length,
+      },
+    });
     const schema = serializeSchema(options.schema);
     const { system, user } = buildExtractorPrompt(
       options.artifacts,
@@ -56,6 +68,8 @@ export class SimpleStrategy<T> implements ExtractionStrategy<T> {
       strict: options.strict ?? this.config.strict,
       debug,
       callId: "simple_extract",
+      telemetry,
+      parentSpan: strategySpan,
     });
     debug?.step({
@@ -65,6 +79,12 @@ export class SimpleStrategy<T> implements ExtractionStrategy<T> {
       strategy: this.name,
     });
+    // End strategy span
+    telemetry?.endSpan(strategySpan!, {
+      status: "ok",
+      output: result.data,
+    });
     return { data: result.data, usage: result.usage };
   }
 }

package/src/strategies/utils.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Artifact, ExtractionEvents, Usage } from "../types";
+import type { Artifact, ExtractionEvents, Usage, TelemetryAdapter } from "../types";
 import type { DebugLogger } from "../debug/logger";
 import { batchArtifacts, type BatchOptions } from "../chunking/ArtifactBatcher";
 import { buildUserContent } from "../llm/message";
@@ -22,9 +22,44 @@ export const mergeUsage = (usages: Usage[]) => {
 export const getBatches = (
   artifacts: Artifact[],
   options: BatchOptions,
-  debug?: DebugLogger
+  debug?: DebugLogger,
+  telemetry?: TelemetryAdapter,
+  parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string }
 ) => {
-  return batchArtifacts(artifacts, { ...options, debug });
+  // Create chunking span if telemetry is enabled
+  const chunkingSpan = telemetry?.startSpan({
+    name: "struktur.chunking",
+    kind: "RETRIEVER",
+    parentSpan,
+    attributes: {
+      "chunking.artifact_count": artifacts.length,
+      "chunking.max_tokens": options.maxTokens,
+      "chunking.max_images": options.maxImages,
+    },
+  });
+  const batches = batchArtifacts(artifacts, { ...options, debug });
+  // Record chunking results
+  if (chunkingSpan && telemetry) {
+    batches.forEach((batch, index) => {
+      telemetry.recordEvent(chunkingSpan, {
+        type: "chunk",
+        chunkIndex: index,
+        totalChunks: batches.length,
+        tokens: batch.reduce((sum, a) => sum + (a.tokens || 0), 0),
+        images: batch.reduce((sum, a) =>
+          sum + (a.contents?.flatMap((c) => c.media || []).length || 0), 0),
+      });
+    });
+    telemetry.endSpan(chunkingSpan, {
+      status: "ok",
+      output: { batchCount: batches.length },
+    });
+  }
+  return batches;
 };
 export const extractWithPrompt = async <T>(options: {
@@ -38,6 +73,8 @@ export const extractWithPrompt = async <T>(options: {
   strict?: boolean;
   debug?: DebugLogger;
   callId?: string;
+  telemetry?: TelemetryAdapter;
+  parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
 }) => {
   const userContent = buildUserContent(options.user, options.artifacts);
   const result = await runWithRetries<T>({
@@ -50,6 +87,8 @@ export const extractWithPrompt = async <T>(options: {
     strict: options.strict,
     debug: options.debug,
     callId: options.callId,
+    telemetry: options.telemetry,
+    parentSpan: options.parentSpan,
   });
   return result;

package/src/types.ts CHANGED Viewed

@@ -45,10 +45,34 @@ export type ExtractionResult<T> = {
   error?: Error;
 };
+/**
+ * Telemetry adapter interface for tracing extraction operations.
+ * This is a minimal interface that matches the full TelemetryAdapter from @struktur/telemetry.
+ * SDK users should import adapters from @struktur/telemetry package.
+ */
+export interface TelemetryAdapter {
+  readonly name: string;
+  readonly version: string;
+  initialize(): Promise<void>;
+  shutdown(): Promise<void>;
+  startSpan(context: {
+    name: string;
+    kind: "CHAIN" | "LLM" | "TOOL" | "AGENT" | "RETRIEVER" | "EMBEDDING" | "RERANKER";
+    parentSpan?: { id: string; traceId: string };
+    attributes?: Record<string, unknown>;
+    startTime?: number;
+  }): { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
+  endSpan(span: { id: string }, result?: { status: "ok" | "error"; error?: Error; output?: unknown; latencyMs?: number }): void;
+  recordEvent(span: { id: string }, event: unknown): void;
+  setAttributes(span: { id: string }, attributes: Record<string, unknown>): void;
+  setContext(context: { sessionId?: string; userId?: string; metadata?: Record<string, unknown>; tags?: string[] }): void;
+}
 export type StepInfo = {
   step: number;
   total?: number;
   label?: string;
+  detail?: string;
 };
 export type ProgressInfo = {
@@ -67,19 +91,47 @@ export type TokenUsageInfo = Usage & {
 };
 export type RetryInfo = {
-  attempt: number;
-  maxAttempts: number;
-  reason?: string;
+	attempt: number;
+	maxAttempts: number;
+	reason?: string;
 };
-export type ExtractionEvents = {
-  onStep?: (info: StepInfo) => void | Promise<void>;
-  onMessage?: (info: MessageInfo) => void | Promise<void>;
-  onProgress?: (info: ProgressInfo) => void | Promise<void>;
-  onTokenUsage?: (info: TokenUsageInfo) => void | Promise<void>;
-  onRetry?: (info: RetryInfo) => void | Promise<void>;
+export type AgentToolStartInfo = {
+	toolName: string;
+	toolCallId: string;
+	args: Record<string, unknown>;
+};
+export type AgentToolEndInfo = {
+	toolCallId: string;
+	result?: Record<string, unknown>;
+	error?: string;
+};
+export type AgentMessageInfo = {
+	content: string;
+	role?: "assistant" | "user";
 };
+export type AgentReasoningInfo = {
+	thought: string;
+};
+export type AgentEvents = {
+	onAgentToolStart?: (info: AgentToolStartInfo) => void | Promise<void>;
+	onAgentToolEnd?: (info: AgentToolEndInfo) => void | Promise<void>;
+	onAgentMessage?: (info: AgentMessageInfo) => void | Promise<void>;
+	onAgentReasoning?: (info: AgentReasoningInfo) => void | Promise<void>;
+};
+export type ExtractionEvents = {
+	onStep?: (info: StepInfo) => void | Promise<void>;
+	onMessage?: (info: MessageInfo) => void | Promise<void>;
+	onProgress?: (info: ProgressInfo) => void | Promise<void>;
+	onTokenUsage?: (info: TokenUsageInfo) => void | Promise<void>;
+	onRetry?: (info: RetryInfo) => void | Promise<void>;
+} & AgentEvents;
 export type AnyJSONSchema = Record<string, unknown>;
 export type TypedJSONSchema<T> = JSONSchemaType<T>;
@@ -107,6 +159,12 @@ export type ExtractionOptions<T> = {
   events?: ExtractionEvents;
   debug?: DebugLogger;
   strict?: boolean;
+  /**
+   * Telemetry adapter for tracing extraction operations.
+   * Supports Phoenix (Arize), Langfuse, and other OpenTelemetry-compatible providers.
+   * Import from `@struktur/telemetry` package and pass the adapter here.
+   */
+  telemetry?: TelemetryAdapter | null;
 }
 export interface ExtractionStrategy<T> {

package/src/validation/AGENTS.md CHANGED Viewed

@@ -1,6 +1,7 @@
 Validation module
-- Purpose: Ajv schema validation and error shaping.
+- Purpose: Schema validation and error shaping.
 - Key files: `validator.ts`.
-- Design: `validateOrThrow` compiles schemas and throws `SchemaValidationError` on failure; `createAjv` registers `ajv-formats` for common schema formats.
+- Design: `validateOrThrow` compiles schemas and throws `SchemaValidationError` on failure; `createAjv` registers `ajv-formats` for common schema formats and adds custom `artifact-id` format for referencing images in artifacts.
+- Custom formats: `artifact-id` validates strings matching pattern `artifact:ID/images/imageNUM.EXT` (e.g., `artifact:123456/images/image1.jpg`).
 - Tests: `validator.test.ts`.

package/src/validation/validator.test.ts CHANGED Viewed

@@ -82,6 +82,38 @@ test("createAjv supports common formats", () => {
   }
 });
+test("createAjv supports artifact-id format", () => {
+  const ajv = createAjv();
+  const schema: JSONSchemaType<string> = { type: "string", format: "artifact-id" };
+  const validData = validateOrThrow<string>(ajv, schema, "artifact:123456/images/image1.jpg");
+  expect(validData).toBe("artifact:123456/images/image1.jpg");
+  const validData2 = validateOrThrow<string>(ajv, schema, "artifact:abc-xyz/images/image10.png");
+  expect(validData2).toBe("artifact:abc-xyz/images/image10.png");
+  try {
+    validateOrThrow<string>(ajv, schema, "not-an-artifact-id");
+    throw new Error("Expected validation error");
+  } catch (error) {
+    expect(error).toBeInstanceOf(SchemaValidationError);
+  }
+  try {
+    validateOrThrow<string>(ajv, schema, "artifact:123/images/image");
+    throw new Error("Expected validation error");
+  } catch (error) {
+    expect(error).toBeInstanceOf(SchemaValidationError);
+  }
+  try {
+    validateOrThrow<string>(ajv, schema, "https://example.com/image.jpg");
+    throw new Error("Expected validation error");
+  } catch (error) {
+    expect(error).toBeInstanceOf(SchemaValidationError);
+  }
+});
 test("isRequiredError identifies required constraint violations", () => {
   const requiredError = {
     keyword: "required",

package/src/validation/validator.ts CHANGED Viewed

@@ -19,6 +19,8 @@ export class SchemaValidationError extends Error {
   }
 }
+const ARTIFACT_ID_PATTERN = /^artifact:[^/]+\/images\/image\d+\.\w+$/;
 export const createAjv = () => {
   const ajv = new Ajv({
     allErrors: true,
@@ -26,6 +28,12 @@ export const createAjv = () => {
     allowUnionTypes: true,
   });
   addFormats(ajv);
+  ajv.addFormat("artifact-id", {
+    type: "string",
+    validate: (data: string) => ARTIFACT_ID_PATTERN.test(data),
+  });
   return ajv;
 };