npm - @struktur/sdk - Versions diffs - 2.1.1 → 2.2.0 - Mend

@struktur/sdk 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

package/dist/index.js +4111 -0
package/dist/index.js.map +1 -0
package/dist/parsers.js +492 -0
package/dist/parsers.js.map +1 -0
package/dist/strategies.js +2435 -0
package/dist/strategies.js.map +1 -0
package/package.json +25 -13
package/src/agent-cli-integration.test.ts +0 -47
package/src/agent-export.test.ts +0 -17
package/src/agent-tool-labels.test.ts +0 -50
package/src/artifacts/AGENTS.md +0 -16
package/src/artifacts/fileToArtifact.test.ts +0 -37
package/src/artifacts/fileToArtifact.ts +0 -44
package/src/artifacts/input.test.ts +0 -243
package/src/artifacts/input.ts +0 -360
package/src/artifacts/providers.test.ts +0 -19
package/src/artifacts/providers.ts +0 -7
package/src/artifacts/urlToArtifact.test.ts +0 -23
package/src/artifacts/urlToArtifact.ts +0 -19
package/src/auth/AGENTS.md +0 -11
package/src/auth/config.test.ts +0 -132
package/src/auth/config.ts +0 -186
package/src/auth/tokens.test.ts +0 -58
package/src/auth/tokens.ts +0 -229
package/src/chunking/AGENTS.md +0 -11
package/src/chunking/ArtifactBatcher.test.ts +0 -22
package/src/chunking/ArtifactBatcher.ts +0 -110
package/src/chunking/ArtifactSplitter.test.ts +0 -38
package/src/chunking/ArtifactSplitter.ts +0 -151
package/src/debug/AGENTS.md +0 -79
package/src/debug/logger.test.ts +0 -244
package/src/debug/logger.ts +0 -211
package/src/extract.test.ts +0 -22
package/src/extract.ts +0 -150
package/src/fields.test.ts +0 -681
package/src/fields.ts +0 -246
package/src/index.test.ts +0 -20
package/src/index.ts +0 -110
package/src/llm/AGENTS.md +0 -9
package/src/llm/LLMClient.test.ts +0 -394
package/src/llm/LLMClient.ts +0 -264
package/src/llm/RetryingRunner.test.ts +0 -174
package/src/llm/RetryingRunner.ts +0 -270
package/src/llm/message.test.ts +0 -42
package/src/llm/message.ts +0 -47
package/src/llm/models.test.ts +0 -82
package/src/llm/models.ts +0 -190
package/src/llm/resolveModel.ts +0 -86
package/src/merge/AGENTS.md +0 -6
package/src/merge/Deduplicator.test.ts +0 -108
package/src/merge/Deduplicator.ts +0 -45
package/src/merge/SmartDataMerger.test.ts +0 -177
package/src/merge/SmartDataMerger.ts +0 -56
package/src/parsers/AGENTS.md +0 -58
package/src/parsers/collect.test.ts +0 -56
package/src/parsers/collect.ts +0 -31
package/src/parsers/index.ts +0 -6
package/src/parsers/mime.test.ts +0 -91
package/src/parsers/mime.ts +0 -137
package/src/parsers/npm.ts +0 -26
package/src/parsers/pdf.test.ts +0 -394
package/src/parsers/pdf.ts +0 -194
package/src/parsers/runner.test.ts +0 -95
package/src/parsers/runner.ts +0 -177
package/src/parsers/types.ts +0 -29
package/src/prompts/AGENTS.md +0 -8
package/src/prompts/DeduplicationPrompt.test.ts +0 -41
package/src/prompts/DeduplicationPrompt.ts +0 -37
package/src/prompts/ExtractorPrompt.test.ts +0 -21
package/src/prompts/ExtractorPrompt.ts +0 -72
package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
package/src/prompts/ParallelMergerPrompt.ts +0 -37
package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
package/src/prompts/SequentialExtractorPrompt.ts +0 -82
package/src/prompts/formatArtifacts.test.ts +0 -39
package/src/prompts/formatArtifacts.ts +0 -46
package/src/strategies/AGENTS.md +0 -6
package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
package/src/strategies/DoublePassStrategy.test.ts +0 -48
package/src/strategies/DoublePassStrategy.ts +0 -266
package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
package/src/strategies/ParallelStrategy.test.ts +0 -61
package/src/strategies/ParallelStrategy.ts +0 -208
package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
package/src/strategies/SequentialStrategy.test.ts +0 -53
package/src/strategies/SequentialStrategy.ts +0 -142
package/src/strategies/SimpleStrategy.test.ts +0 -46
package/src/strategies/SimpleStrategy.ts +0 -94
package/src/strategies/concurrency.test.ts +0 -16
package/src/strategies/concurrency.ts +0 -14
package/src/strategies/index.test.ts +0 -20
package/src/strategies/index.ts +0 -7
package/src/strategies/utils.test.ts +0 -76
package/src/strategies/utils.ts +0 -95
package/src/tokenization.test.ts +0 -119
package/src/tokenization.ts +0 -71
package/src/types.test.ts +0 -25
package/src/types.ts +0 -174
package/src/validation/AGENTS.md +0 -7
package/src/validation/validator.test.ts +0 -204
package/src/validation/validator.ts +0 -90
package/tsconfig.json +0 -22

package/src/debug/logger.ts DELETED Viewed

@@ -1,211 +0,0 @@
-import type { Artifact, ArtifactContent, ExtractionEvents, Usage, StepInfo, ProgressInfo, RetryInfo, TokenUsageInfo } from "../types";
-export type DebugLogger = ReturnType<typeof createDebugLogger>;
-export const createDebugLogger = (enabled: boolean) => {
-  const log = (entry: Record<string, unknown>) => {
-    if (!enabled) return;
-    const timestamp = new Date().toISOString();
-    const logEntry = { timestamp, ...entry };
-    process.stderr.write(JSON.stringify(logEntry) + "\n");
-  };
-  return {
-    // CLI initialization
-    cliInit: (data: { args: Record<string, unknown> }) => {
-      log({ type: "cli_init", ...data });
-    },
-    schemaLoaded: (data: { source: string; schemaSize: number }) => {
-      log({ type: "schema_loaded", ...data });
-    },
-    artifactsLoaded: (data: {
-      count: number;
-      artifacts: Array<{ id: string; type: string; contentCount: number; tokens?: number }>;
-      totalTokens: number;
-      totalImages: number;
-    }) => {
-      log({ type: "artifacts_loaded", ...data });
-    },
-    modelResolved: (data: { modelSpec: string; resolvedModel: string }) => {
-      log({ type: "model_resolved", ...data });
-    },
-    strategyCreated: (data: { strategy: string; config: Record<string, unknown> }) => {
-      log({ type: "strategy_created", ...data });
-    },
-    // Chunking
-    chunkingStart: (data: {
-      artifactId: string;
-      totalTokens: number;
-      maxTokens: number;
-      maxImages?: number;
-    }) => {
-      log({ type: "chunking_start", ...data });
-    },
-    chunkingSplit: (data: {
-      artifactId: string;
-      originalContentCount: number;
-      splitContentCount: number;
-      splitReason: "text_too_long" | "content_limit";
-      originalTokens: number;
-      chunkSize: number;
-    }) => {
-      log({ type: "chunking_split", ...data });
-    },
-    chunkingResult: (data: {
-      artifactId: string;
-      chunksCreated: number;
-      chunkSizes: number[];
-    }) => {
-      log({ type: "chunking_result", ...data });
-    },
-    batchingStart: (data: {
-      totalArtifacts: number;
-      maxTokens: number;
-      maxImages?: number;
-      modelMaxTokens?: number;
-      effectiveMaxTokens: number;
-    }) => {
-      log({ type: "batching_start", ...data });
-    },
-    batchCreated: (data: {
-      batchIndex: number;
-      artifactCount: number;
-      totalTokens: number;
-      totalImages: number;
-      artifactIds: string[];
-    }) => {
-      log({ type: "batch_created", ...data });
-    },
-    batchingComplete: (data: {
-      totalBatches: number;
-      batches: Array<{ index: number; artifactCount: number; tokens: number; images: number }>;
-    }) => {
-      log({ type: "batching_complete", ...data });
-    },
-    // Strategy execution
-    strategyRunStart: (data: { strategy: string; estimatedSteps: number; artifactCount: number }) => {
-      log({ type: "strategy_run_start", ...data });
-    },
-    step: (data: StepInfo & { strategy: string }) => {
-      log({ type: "step", ...data });
-    },
-    progress: (data: ProgressInfo & { strategy: string; context?: string }) => {
-      log({ type: "progress", ...data });
-    },
-    // LLM calls
-    llmCallStart: (data: {
-      callId: string;
-      model: string;
-      schemaName?: string;
-      systemLength: number;
-      userLength: number;
-      artifactCount: number;
-    }) => {
-      log({ type: "llm_call_start", ...data });
-    },
-    llmCallComplete: (data: {
-      callId: string;
-      success: boolean;
-      inputTokens: number;
-      outputTokens: number;
-      totalTokens: number;
-      durationMs?: number;
-      error?: string;
-    }) => {
-      log({ type: "llm_call_complete", ...data });
-    },
-    // Retry events
-    retry: (data: RetryInfo & { callId: string }) => {
-      log({ type: "retry", ...data });
-    },
-    // Validation
-    validationStart: (data: { callId: string; attempt: number; maxAttempts: number; strict: boolean }) => {
-      log({ type: "validation_start", ...data });
-    },
-    validationSuccess: (data: { callId: string; attempt: number }) => {
-      log({ type: "validation_success", ...data });
-    },
-    validationFailed: (data: { callId: string; attempt: number; errors: unknown[] }) => {
-      log({ type: "validation_failed", ...data });
-    },
-    // Merging
-    mergeStart: (data: { mergeId: string; inputCount: number; strategy: string }) => {
-      log({ type: "merge_start", ...data });
-    },
-    mergeComplete: (data: { mergeId: string; success: boolean; error?: string }) => {
-      log({ type: "merge_complete", ...data });
-    },
-    // Deduplication
-    dedupeStart: (data: { dedupeId: string; itemCount: number }) => {
-      log({ type: "dedupe_start", ...data });
-    },
-    dedupeComplete: (data: { dedupeId: string; duplicatesFound: number; itemsRemoved: number }) => {
-      log({ type: "dedupe_complete", ...data });
-    },
-    // Token usage tracking
-    tokenUsage: (data: TokenUsageInfo & { context: string }) => {
-      log({ type: "token_usage", ...data });
-    },
-    // Results
-    extractionComplete: (data: {
-      success: boolean;
-      totalInputTokens: number;
-      totalOutputTokens: number;
-      totalTokens: number;
-      error?: string;
-    }) => {
-      log({ type: "extraction_complete", ...data });
-    },
-    // Prompt details (verbose)
-    promptSystem: (data: { callId: string; system: string }) => {
-      log({ type: "prompt_system", ...data });
-    },
-    promptUser: (data: { callId: string; user: unknown }) => {
-      log({ type: "prompt_user", ...data });
-    },
-    // Raw response
-    rawResponse: (data: { callId: string; response: unknown }) => {
-      log({ type: "raw_response", ...data });
-    },
-    // Smart merge details
-    smartMergeField: (data: {
-      mergeId: string;
-      field: string;
-      operation: "merge_arrays" | "merge_objects" | "replace" | "concat";
-      leftCount?: number;
-      rightCount?: number;
-      resultCount?: number;
-    }) => {
-      log({ type: "smart_merge_field", ...data });
-    },
-  };
-};

package/src/extract.test.ts DELETED Viewed

@@ -1,22 +0,0 @@
-import { test, expect } from "bun:test";
-import { extract } from "./extract";
-import type { ExtractionStrategy, ExtractionOptions } from "./types";
-test("extract delegates to strategy", async () => {
-  const strategy: ExtractionStrategy<{ ok: boolean }> = {
-    name: "mock",
-    run: async () => ({
-      data: { ok: true },
-      usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
-    }),
-  };
-  const options: ExtractionOptions<{ ok: boolean }> = {
-    artifacts: [],
-    schema: {},
-    strategy,
-  };
-  const result = await extract(options);
-  expect(result.data.ok).toBe(true);
-});

package/src/extract.ts DELETED Viewed

@@ -1,150 +0,0 @@
-import type { ExtractionOptions, ExtractionResult } from "./types";
-import { buildSchemaFromFields } from "./fields";
-const emptyUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-/**
- * Resolve and validate the schema from ExtractionOptions.
- * Exactly one of `schema` or `fields` must be provided.
- */
-const resolveSchema = <T>(options: ExtractionOptions<T>) => {
-  const hasSchema = options.schema !== undefined;
-  const hasFields = options.fields !== undefined;
-  if (hasSchema && hasFields) {
-    throw new Error(
-      "Provide either `schema` or `fields`, not both. They are mutually exclusive.",
-    );
-  }
-  if (!hasSchema && !hasFields) {
-    throw new Error(
-      "A schema definition is required. Provide `schema` (a JSON Schema object) or `fields` (a shorthand fields string).",
-    );
-  }
-  if (hasFields) {
-    return buildSchemaFromFields(options.fields as string);
-  }
-  return options.schema as NonNullable<typeof options.schema>;
-};
-export const extract = async <T>(
-  options: ExtractionOptions<T>,
-): Promise<ExtractionResult<T>> => {
-  const debug = options.debug;
-  const telemetry = options.telemetry;
-  // Initialize telemetry if provided
-  if (telemetry) {
-    await telemetry.initialize();
-  }
-  // Start root extraction span
-  const rootSpan = telemetry?.startSpan({
-    name: "struktur.extract",
-    kind: "CHAIN",
-    attributes: {
-      "extraction.strategy": options.strategy?.name ?? "default",
-      "extraction.artifacts.count": options.artifacts.length,
-    },
-  });
-  try {
-    // Validate mutual exclusion and resolve the concrete schema early so that
-    // every strategy receives a fully-populated options object.
-    let resolvedOptions: ExtractionOptions<T>;
-    try {
-      const schema = resolveSchema(options);
-      resolvedOptions = { ...options, schema };
-    } catch (error) {
-      debug?.extractionComplete({
-        success: false,
-        totalInputTokens: 0,
-        totalOutputTokens: 0,
-        totalTokens: 0,
-        error: (error as Error).message,
-      });
-      telemetry?.endSpan(rootSpan!, {
-        status: "error",
-        error: error as Error,
-      });
-      await telemetry?.shutdown();
-      return {
-        data: null as unknown as T,
-        usage: emptyUsage,
-        error: error as Error,
-      };
-    }
-    const total = resolvedOptions.strategy.getEstimatedSteps?.(resolvedOptions.artifacts);
-    debug?.strategyRunStart({
-      strategy: resolvedOptions.strategy.name,
-      estimatedSteps: total ?? 1,
-      artifactCount: resolvedOptions.artifacts.length,
-    });
-    await resolvedOptions.events?.onStep?.({ step: 1, total, label: "start" });
-    debug?.step({
-      step: 1,
-      total,
-      label: "start",
-      strategy: resolvedOptions.strategy.name,
-    });
-    const result = await resolvedOptions.strategy.run(resolvedOptions);
-    await resolvedOptions.events?.onStep?.({
-      step: total ?? 1,
-      total,
-      label: "complete",
-    });
-    debug?.step({
-      step: total ?? 1,
-      total,
-      label: "complete",
-      strategy: resolvedOptions.strategy.name,
-    });
-    debug?.extractionComplete({
-      success: !result.error,
-      totalInputTokens: result.usage.inputTokens,
-      totalOutputTokens: result.usage.outputTokens,
-      totalTokens: result.usage.totalTokens,
-      error: result.error?.message,
-    });
-    telemetry?.endSpan(rootSpan!, {
-      status: result.error ? "error" : "ok",
-      output: result.data,
-      error: result.error,
-    });
-    await telemetry?.shutdown();
-    return result;
-  } catch (error) {
-    debug?.extractionComplete({
-      success: false,
-      totalInputTokens: 0,
-      totalOutputTokens: 0,
-      totalTokens: 0,
-      error: (error as Error).message,
-    });
-    telemetry?.endSpan(rootSpan!, {
-      status: "error",
-      error: error as Error,
-    });
-    await telemetry?.shutdown();
-    return {
-      data: null as unknown as T,
-      usage: emptyUsage,
-      error: error as Error,
-    };
-  }
-};