@struktur/sdk 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/index.js +4111 -0
  2. package/dist/index.js.map +1 -0
  3. package/dist/parsers.js +492 -0
  4. package/dist/parsers.js.map +1 -0
  5. package/dist/strategies.js +2435 -0
  6. package/dist/strategies.js.map +1 -0
  7. package/package.json +25 -13
  8. package/src/agent-cli-integration.test.ts +0 -47
  9. package/src/agent-export.test.ts +0 -17
  10. package/src/agent-tool-labels.test.ts +0 -50
  11. package/src/artifacts/AGENTS.md +0 -16
  12. package/src/artifacts/fileToArtifact.test.ts +0 -37
  13. package/src/artifacts/fileToArtifact.ts +0 -44
  14. package/src/artifacts/input.test.ts +0 -243
  15. package/src/artifacts/input.ts +0 -360
  16. package/src/artifacts/providers.test.ts +0 -19
  17. package/src/artifacts/providers.ts +0 -7
  18. package/src/artifacts/urlToArtifact.test.ts +0 -23
  19. package/src/artifacts/urlToArtifact.ts +0 -19
  20. package/src/auth/AGENTS.md +0 -11
  21. package/src/auth/config.test.ts +0 -132
  22. package/src/auth/config.ts +0 -186
  23. package/src/auth/tokens.test.ts +0 -58
  24. package/src/auth/tokens.ts +0 -229
  25. package/src/chunking/AGENTS.md +0 -11
  26. package/src/chunking/ArtifactBatcher.test.ts +0 -22
  27. package/src/chunking/ArtifactBatcher.ts +0 -110
  28. package/src/chunking/ArtifactSplitter.test.ts +0 -38
  29. package/src/chunking/ArtifactSplitter.ts +0 -151
  30. package/src/debug/AGENTS.md +0 -79
  31. package/src/debug/logger.test.ts +0 -244
  32. package/src/debug/logger.ts +0 -211
  33. package/src/extract.test.ts +0 -22
  34. package/src/extract.ts +0 -150
  35. package/src/fields.test.ts +0 -681
  36. package/src/fields.ts +0 -246
  37. package/src/index.test.ts +0 -20
  38. package/src/index.ts +0 -110
  39. package/src/llm/AGENTS.md +0 -9
  40. package/src/llm/LLMClient.test.ts +0 -394
  41. package/src/llm/LLMClient.ts +0 -264
  42. package/src/llm/RetryingRunner.test.ts +0 -174
  43. package/src/llm/RetryingRunner.ts +0 -270
  44. package/src/llm/message.test.ts +0 -42
  45. package/src/llm/message.ts +0 -47
  46. package/src/llm/models.test.ts +0 -82
  47. package/src/llm/models.ts +0 -190
  48. package/src/llm/resolveModel.ts +0 -86
  49. package/src/merge/AGENTS.md +0 -6
  50. package/src/merge/Deduplicator.test.ts +0 -108
  51. package/src/merge/Deduplicator.ts +0 -45
  52. package/src/merge/SmartDataMerger.test.ts +0 -177
  53. package/src/merge/SmartDataMerger.ts +0 -56
  54. package/src/parsers/AGENTS.md +0 -58
  55. package/src/parsers/collect.test.ts +0 -56
  56. package/src/parsers/collect.ts +0 -31
  57. package/src/parsers/index.ts +0 -6
  58. package/src/parsers/mime.test.ts +0 -91
  59. package/src/parsers/mime.ts +0 -137
  60. package/src/parsers/npm.ts +0 -26
  61. package/src/parsers/pdf.test.ts +0 -394
  62. package/src/parsers/pdf.ts +0 -194
  63. package/src/parsers/runner.test.ts +0 -95
  64. package/src/parsers/runner.ts +0 -177
  65. package/src/parsers/types.ts +0 -29
  66. package/src/prompts/AGENTS.md +0 -8
  67. package/src/prompts/DeduplicationPrompt.test.ts +0 -41
  68. package/src/prompts/DeduplicationPrompt.ts +0 -37
  69. package/src/prompts/ExtractorPrompt.test.ts +0 -21
  70. package/src/prompts/ExtractorPrompt.ts +0 -72
  71. package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
  72. package/src/prompts/ParallelMergerPrompt.ts +0 -37
  73. package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
  74. package/src/prompts/SequentialExtractorPrompt.ts +0 -82
  75. package/src/prompts/formatArtifacts.test.ts +0 -39
  76. package/src/prompts/formatArtifacts.ts +0 -46
  77. package/src/strategies/AGENTS.md +0 -6
  78. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
  79. package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
  80. package/src/strategies/DoublePassStrategy.test.ts +0 -48
  81. package/src/strategies/DoublePassStrategy.ts +0 -266
  82. package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
  83. package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
  84. package/src/strategies/ParallelStrategy.test.ts +0 -61
  85. package/src/strategies/ParallelStrategy.ts +0 -208
  86. package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
  87. package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
  88. package/src/strategies/SequentialStrategy.test.ts +0 -53
  89. package/src/strategies/SequentialStrategy.ts +0 -142
  90. package/src/strategies/SimpleStrategy.test.ts +0 -46
  91. package/src/strategies/SimpleStrategy.ts +0 -94
  92. package/src/strategies/concurrency.test.ts +0 -16
  93. package/src/strategies/concurrency.ts +0 -14
  94. package/src/strategies/index.test.ts +0 -20
  95. package/src/strategies/index.ts +0 -7
  96. package/src/strategies/utils.test.ts +0 -76
  97. package/src/strategies/utils.ts +0 -95
  98. package/src/tokenization.test.ts +0 -119
  99. package/src/tokenization.ts +0 -71
  100. package/src/types.test.ts +0 -25
  101. package/src/types.ts +0 -174
  102. package/src/validation/AGENTS.md +0 -7
  103. package/src/validation/validator.test.ts +0 -204
  104. package/src/validation/validator.ts +0 -90
  105. package/tsconfig.json +0 -22
@@ -1,110 +0,0 @@
1
- import type { Artifact } from "../types";
2
- import type { DebugLogger } from "../debug/logger";
3
- import {
4
- countArtifactTokens,
5
- countArtifactImages,
6
- type TokenCountOptions,
7
- } from "../tokenization";
8
- import { splitArtifact } from "./ArtifactSplitter";
9
-
10
- export type BatchOptions = TokenCountOptions & {
11
- maxTokens: number;
12
- maxImages?: number;
13
- modelMaxTokens?: number;
14
- debug?: DebugLogger;
15
- };
16
-
17
- export const batchArtifacts = (
18
- artifacts: Artifact[],
19
- options: BatchOptions
20
- ): Artifact[][] => {
21
- const debug = options.debug;
22
- const maxTokens = options.modelMaxTokens
23
- ? Math.min(options.maxTokens, options.modelMaxTokens)
24
- : options.maxTokens;
25
-
26
- // Log batching start
27
- debug?.batchingStart({
28
- totalArtifacts: artifacts.length,
29
- maxTokens: options.maxTokens,
30
- maxImages: options.maxImages,
31
- modelMaxTokens: options.modelMaxTokens,
32
- effectiveMaxTokens: maxTokens,
33
- });
34
-
35
- const batches: Artifact[][] = [];
36
- let currentBatch: Artifact[] = [];
37
- let currentTokens = 0;
38
- let currentImages = 0;
39
-
40
- for (const artifact of artifacts) {
41
- const splitOptions: any = {
42
- maxTokens,
43
- debug,
44
- };
45
- if (options.maxImages !== undefined) splitOptions.maxImages = options.maxImages;
46
- if (options.textTokenRatio !== undefined) splitOptions.textTokenRatio = options.textTokenRatio;
47
- if (options.defaultImageTokens !== undefined) splitOptions.defaultImageTokens = options.defaultImageTokens;
48
-
49
- const splits = splitArtifact(artifact, splitOptions);
50
-
51
- for (const split of splits) {
52
- const splitTokens = countArtifactTokens(split, options);
53
- const splitImages = countArtifactImages(split);
54
-
55
- const exceedsTokens =
56
- currentBatch.length > 0 && currentTokens + splitTokens > maxTokens;
57
- const exceedsImages =
58
- options.maxImages !== undefined &&
59
- currentBatch.length > 0 &&
60
- currentImages + splitImages > options.maxImages;
61
-
62
- if (exceedsTokens || exceedsImages) {
63
- // Log batch creation
64
- debug?.batchCreated({
65
- batchIndex: batches.length,
66
- artifactCount: currentBatch.length,
67
- totalTokens: currentTokens,
68
- totalImages: currentImages,
69
- artifactIds: currentBatch.map(a => a.id),
70
- });
71
-
72
- batches.push(currentBatch);
73
- currentBatch = [];
74
- currentTokens = 0;
75
- currentImages = 0;
76
- }
77
-
78
- currentBatch.push(split);
79
- currentTokens += splitTokens;
80
- currentImages += splitImages;
81
- }
82
- }
83
-
84
- if (currentBatch.length > 0) {
85
- // Log final batch
86
- debug?.batchCreated({
87
- batchIndex: batches.length,
88
- artifactCount: currentBatch.length,
89
- totalTokens: currentTokens,
90
- totalImages: currentImages,
91
- artifactIds: currentBatch.map(a => a.id),
92
- });
93
- batches.push(currentBatch);
94
- }
95
-
96
- // Log batching complete
97
- debug?.batchingComplete({
98
- totalBatches: batches.length,
99
- batches: batches.map((batch, index) => ({
100
- index,
101
- artifactCount: batch.length,
102
- tokens: batch.reduce((sum, a) => sum + (a.tokens ?? 0), 0),
103
- images: batch.reduce((sum, a) =>
104
- sum + a.contents.reduce((c, content) => c + (content.media?.length ?? 0), 0), 0
105
- ),
106
- })),
107
- });
108
-
109
- return batches;
110
- };
@@ -1,38 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import type { Artifact } from "../types";
3
- import { splitArtifact } from "./ArtifactSplitter";
4
-
5
- const baseArtifact = (text: string): Artifact => ({
6
- id: "artifact-1",
7
- type: "text",
8
- raw: async () => Buffer.from(text),
9
- contents: [{ text }],
10
- });
11
-
12
- test("splitArtifact splits large text into chunks", () => {
13
- const artifact = baseArtifact("abcdefghijklmnopqrst");
14
- const chunks = splitArtifact(artifact, { maxTokens: 2 });
15
-
16
- expect(chunks.length).toBe(3);
17
- expect(chunks[0]?.contents[0]?.text).toBe("abcdefgh");
18
- expect(chunks[1]?.contents[0]?.text).toBe("ijklmnop");
19
- expect(chunks[2]?.contents[0]?.text).toBe("qrst");
20
- });
21
-
22
- test("splitArtifact keeps media on first text chunk", () => {
23
- const artifact: Artifact = {
24
- id: "artifact-2",
25
- type: "pdf",
26
- raw: async () => Buffer.from(""),
27
- contents: [
28
- {
29
- text: "abcdefghijklmnopqrst",
30
- media: [{ type: "image", url: "https://example.com/x.png" }],
31
- },
32
- ],
33
- };
34
-
35
- const chunks = splitArtifact(artifact, { maxTokens: 2 });
36
- expect(chunks[0]?.contents[0]?.media?.length).toBe(1);
37
- expect(chunks[1]?.contents[0]?.media).toBeUndefined();
38
- });
@@ -1,151 +0,0 @@
1
- import type { Artifact, ArtifactContent } from "../types";
2
- import type { DebugLogger } from "../debug/logger";
3
- import {
4
- countContentTokens,
5
- countArtifactImages,
6
- countArtifactTokens,
7
- estimateTextTokens,
8
- type TokenCountOptions,
9
- } from "../tokenization";
10
-
11
- export type SplitOptions = TokenCountOptions & {
12
- maxTokens: number;
13
- maxImages?: number;
14
- debug?: DebugLogger;
15
- };
16
-
17
- const splitTextIntoChunks = (
18
- content: ArtifactContent,
19
- maxTokens: number,
20
- options?: TokenCountOptions,
21
- debug?: DebugLogger,
22
- artifactId?: string
23
- ): ArtifactContent[] => {
24
- if (!content.text) {
25
- return [content];
26
- }
27
-
28
- const totalTokens = estimateTextTokens(content.text, options);
29
- if (totalTokens <= maxTokens) {
30
- return [content];
31
- }
32
-
33
- const ratio = options?.textTokenRatio ?? 4;
34
- const chunkSize = Math.max(1, maxTokens * ratio);
35
- const chunks: ArtifactContent[] = [];
36
-
37
- // Log text splitting
38
- if (debug && artifactId) {
39
- debug.chunkingSplit({
40
- artifactId,
41
- originalContentCount: 1,
42
- splitContentCount: Math.ceil(content.text.length / chunkSize),
43
- splitReason: "text_too_long",
44
- originalTokens: totalTokens,
45
- chunkSize,
46
- });
47
- }
48
-
49
- for (let offset = 0; offset < content.text.length; offset += chunkSize) {
50
- const text = content.text.slice(offset, offset + chunkSize);
51
- chunks.push({
52
- page: content.page,
53
- text,
54
- media: offset === 0 ? content.media : undefined,
55
- });
56
- }
57
-
58
- return chunks;
59
- };
60
-
61
- export const splitArtifact = (
62
- artifact: Artifact,
63
- options: SplitOptions
64
- ): Artifact[] => {
65
- const { maxTokens, maxImages, debug } = options;
66
- const splitContents: ArtifactContent[] = [];
67
-
68
- // Log chunking start
69
- const totalTokens = countArtifactTokens(artifact, options);
70
- debug?.chunkingStart({
71
- artifactId: artifact.id,
72
- totalTokens,
73
- maxTokens,
74
- maxImages,
75
- });
76
-
77
- for (const content of artifact.contents) {
78
- splitContents.push(...splitTextIntoChunks(content, maxTokens, options, debug, artifact.id));
79
- }
80
-
81
- const chunks: Artifact[] = [];
82
- let currentContents: ArtifactContent[] = [];
83
- let currentTokens = 0;
84
- let currentImages = 0;
85
-
86
- for (const content of splitContents) {
87
- const contentTokens = countContentTokens(content, options);
88
- const contentImages = content.media?.length ?? 0;
89
-
90
- const exceedsTokens =
91
- currentContents.length > 0 && currentTokens + contentTokens > maxTokens;
92
- const exceedsImages =
93
- maxImages !== undefined &&
94
- currentContents.length > 0 &&
95
- currentImages + contentImages > maxImages;
96
-
97
- if (exceedsTokens || exceedsImages) {
98
- // Log chunk creation
99
- if (debug) {
100
- debug.chunkingSplit({
101
- artifactId: artifact.id,
102
- originalContentCount: splitContents.length,
103
- splitContentCount: chunks.length + 1,
104
- splitReason: exceedsTokens ? "content_limit" : "content_limit",
105
- originalTokens: totalTokens,
106
- chunkSize: maxTokens,
107
- });
108
- }
109
-
110
- chunks.push({
111
- ...artifact,
112
- id: `${artifact.id}:part:${chunks.length + 1}`,
113
- contents: currentContents,
114
- tokens: currentTokens,
115
- });
116
- currentContents = [];
117
- currentTokens = 0;
118
- currentImages = 0;
119
- }
120
-
121
- currentContents.push(content);
122
- currentTokens += contentTokens;
123
- currentImages += contentImages;
124
- }
125
-
126
- if (currentContents.length > 0) {
127
- chunks.push({
128
- ...artifact,
129
- id: `${artifact.id}:part:${chunks.length + 1}`,
130
- contents: currentContents,
131
- tokens: currentTokens,
132
- });
133
- }
134
-
135
- if (chunks.length === 0) {
136
- chunks.push({
137
- ...artifact,
138
- id: `${artifact.id}:part:1`,
139
- tokens: countArtifactTokens(artifact, options),
140
- });
141
- }
142
-
143
- // Log chunking result
144
- debug?.chunkingResult({
145
- artifactId: artifact.id,
146
- chunksCreated: chunks.length,
147
- chunkSizes: chunks.map(c => c.tokens ?? 0),
148
- });
149
-
150
- return chunks;
151
- };
@@ -1,79 +0,0 @@
1
- # Debug Module
2
-
3
- ## Overview
4
-
5
- The debug module provides comprehensive JSON logging for the Struktur extraction pipeline. When `--debug` flag is enabled via CLI, every operation is logged as single-line JSON to stderr.
6
-
7
- ## Key Files
8
-
9
- - `logger.ts`: Core debug logger with structured logging functions for every pipeline stage.
10
-
11
- ## Debug Log Types
12
-
13
- ### CLI Initialization
14
- - `cli_init`: CLI arguments and configuration
15
- - `schema_loaded`: Schema source and size
16
- - `artifacts_loaded`: Artifact count, types, tokens, images
17
- - `model_resolved`: Model specification resolution
18
- - `strategy_created`: Strategy selection with config
19
-
20
- ### Chunking
21
- - `chunking_start`: Per-artifact chunking begins
22
- - `chunking_split`: Text or content splits due to limits
23
- - `chunking_result`: Final chunks created with sizes
24
-
25
- ### Batching
26
- - `batching_start`: Batch creation parameters
27
- - `batch_created`: Individual batch details
28
- - `batching_complete`: Summary of all batches
29
-
30
- ### Strategy Execution
31
- - `strategy_run_start`: Strategy begins with estimated steps
32
- - `step`: Step progression through pipeline
33
- - `progress`: Progress updates within steps
34
-
35
- ### LLM Calls
36
- - `llm_call_start`: API call initiation with prompt sizes
37
- - `prompt_system`: Full system prompt (verbose)
38
- - `prompt_user`: Full user content (verbose)
39
- - `llm_call_complete`: Call completion with tokens/timing
40
- - `raw_response`: Raw LLM response data (verbose)
41
-
42
- ### Validation
43
- - `validation_start`: Validation attempt begins
44
- - `validation_success`: Validation passed
45
- - `validation_failed`: Validation errors
46
- - `retry`: Retry attempt triggered
47
-
48
- ### Merging
49
- - `merge_start`: Merge operation begins
50
- - `smart_merge_field`: Per-field merge operations
51
- - `merge_complete`: Merge success/failure
52
-
53
- ### Deduplication
54
- - `dedupe_start`: Deduplication begins
55
- - `dedupe_complete`: Duplicates found and removed
56
-
57
- ### Results
58
- - `token_usage`: Token consumption tracking
59
- - `extraction_complete`: Final extraction status
60
-
61
- ## Usage
62
-
63
- Enable via CLI:
64
- ```bash
65
- struktur extract --debug -t "text to extract" -s schema.json
66
- ```
67
-
68
- Debug logs are written to stderr as single-line JSON:
69
- ```json
70
- {"timestamp":"2026-02-24T20:00:00.000Z","type":"cli_init","args":{"strategy":"simple"}}
71
- ```
72
-
73
- ## Design Notes
74
-
75
- - All logs include ISO8601 timestamps
76
- - Logs are single-line JSON for easy parsing
77
- - Output goes to stderr to not interfere with stdout results
78
- - The debug logger is passed through the entire pipeline via `ExtractionOptions.debug`
79
- - When debug is disabled (default), all logging calls are no-ops
@@ -1,244 +0,0 @@
1
- import { test, expect, beforeEach, afterEach } from "bun:test";
2
- import { createDebugLogger } from "./logger";
3
-
4
- let stderrOutput: string[];
5
- const originalStderrWrite = process.stderr.write;
6
-
7
- beforeEach(() => {
8
- stderrOutput = [];
9
- process.stderr.write = (chunk: unknown) => {
10
- if (typeof chunk === "string") {
11
- stderrOutput.push(chunk);
12
- }
13
- return true;
14
- };
15
- });
16
-
17
- afterEach(() => {
18
- process.stderr.write = originalStderrWrite;
19
- });
20
-
21
- test("createDebugLogger with enabled=false is a no-op", () => {
22
- const logger = createDebugLogger(false);
23
- logger.cliInit({ args: { test: true } });
24
- expect(stderrOutput.length).toBe(0);
25
- });
26
-
27
- test("createDebugLogger with enabled=true logs to stderr", () => {
28
- const logger = createDebugLogger(true);
29
- logger.cliInit({ args: { test: true } });
30
- expect(stderrOutput.length).toBe(1);
31
- const parsed = JSON.parse(stderrOutput[0]!);
32
- expect(parsed.type).toBe("cli_init");
33
- expect(parsed.args).toEqual({ test: true });
34
- expect(parsed.timestamp).toBeDefined();
35
- });
36
-
37
- test("cliInit logs correct type", () => {
38
- const logger = createDebugLogger(true);
39
- logger.cliInit({ args: { strategy: "simple" } });
40
- const parsed = JSON.parse(stderrOutput[0]!);
41
- expect(parsed.type).toBe("cli_init");
42
- });
43
-
44
- test("schemaLoaded logs source and size", () => {
45
- const logger = createDebugLogger(true);
46
- logger.schemaLoaded({ source: "file.json", schemaSize: 100 });
47
- const parsed = JSON.parse(stderrOutput[0]!);
48
- expect(parsed.type).toBe("schema_loaded");
49
- expect(parsed.source).toBe("file.json");
50
- expect(parsed.schemaSize).toBe(100);
51
- });
52
-
53
- test("artifactsLoaded logs artifact details", () => {
54
- const logger = createDebugLogger(true);
55
- logger.artifactsLoaded({
56
- count: 2,
57
- artifacts: [
58
- { id: "a1", type: "text", contentCount: 1, tokens: 10 },
59
- { id: "a2", type: "pdf", contentCount: 3 },
60
- ],
61
- totalTokens: 1010,
62
- totalImages: 2,
63
- });
64
- const parsed = JSON.parse(stderrOutput[0]!);
65
- expect(parsed.type).toBe("artifacts_loaded");
66
- expect(parsed.count).toBe(2);
67
- expect(parsed.totalTokens).toBe(1010);
68
- expect(parsed.totalImages).toBe(2);
69
- });
70
-
71
- test("chunkingStart logs chunking parameters", () => {
72
- const logger = createDebugLogger(true);
73
- logger.chunkingStart({
74
- artifactId: "a1",
75
- totalTokens: 100,
76
- maxTokens: 50,
77
- maxImages: 5,
78
- });
79
- const parsed = JSON.parse(stderrOutput[0]!);
80
- expect(parsed.type).toBe("chunking_start");
81
- expect(parsed.artifactId).toBe("a1");
82
- expect(parsed.maxTokens).toBe(50);
83
- });
84
-
85
- test("llmCallStart logs call details", () => {
86
- const logger = createDebugLogger(true);
87
- logger.llmCallStart({
88
- callId: "call-1",
89
- model: "gpt-4",
90
- schemaName: "extract",
91
- systemLength: 100,
92
- userLength: 200,
93
- artifactCount: 3,
94
- });
95
- const parsed = JSON.parse(stderrOutput[0]!);
96
- expect(parsed.type).toBe("llm_call_start");
97
- expect(parsed.callId).toBe("call-1");
98
- expect(parsed.artifactCount).toBe(3);
99
- });
100
-
101
- test("llmCallComplete logs success with duration", () => {
102
- const logger = createDebugLogger(true);
103
- logger.llmCallComplete({
104
- callId: "call-1",
105
- success: true,
106
- inputTokens: 100,
107
- outputTokens: 50,
108
- totalTokens: 150,
109
- durationMs: 1234,
110
- });
111
- const parsed = JSON.parse(stderrOutput[0]!);
112
- expect(parsed.type).toBe("llm_call_complete");
113
- expect(parsed.success).toBe(true);
114
- expect(parsed.durationMs).toBe(1234);
115
- });
116
-
117
- test("llmCallComplete logs failure with error", () => {
118
- const logger = createDebugLogger(true);
119
- logger.llmCallComplete({
120
- callId: "call-1",
121
- success: false,
122
- inputTokens: 100,
123
- outputTokens: 0,
124
- totalTokens: 100,
125
- error: "API error",
126
- });
127
- const parsed = JSON.parse(stderrOutput[0]!);
128
- expect(parsed.success).toBe(false);
129
- expect(parsed.error).toBe("API error");
130
- });
131
-
132
- test("retry logs retry attempt", () => {
133
- const logger = createDebugLogger(true);
134
- logger.retry({
135
- callId: "call-1",
136
- attempt: 2,
137
- maxAttempts: 3,
138
- reason: "schema_validation_failed",
139
- });
140
- const parsed = JSON.parse(stderrOutput[0]!);
141
- expect(parsed.type).toBe("retry");
142
- expect(parsed.attempt).toBe(2);
143
- expect(parsed.reason).toBe("schema_validation_failed");
144
- });
145
-
146
- test("validationStart logs validation attempt", () => {
147
- const logger = createDebugLogger(true);
148
- logger.validationStart({
149
- callId: "call-1",
150
- attempt: 1,
151
- maxAttempts: 3,
152
- strict: false,
153
- });
154
- const parsed = JSON.parse(stderrOutput[0]!);
155
- expect(parsed.type).toBe("validation_start");
156
- expect(parsed.strict).toBe(false);
157
- });
158
-
159
- test("validationSuccess logs successful validation", () => {
160
- const logger = createDebugLogger(true);
161
- logger.validationSuccess({ callId: "call-1", attempt: 1 });
162
- const parsed = JSON.parse(stderrOutput[0]!);
163
- expect(parsed.type).toBe("validation_success");
164
- });
165
-
166
- test("validationFailed logs validation errors", () => {
167
- const logger = createDebugLogger(true);
168
- logger.validationFailed({
169
- callId: "call-1",
170
- attempt: 1,
171
- errors: [{ keyword: "required", message: "missing field" }],
172
- });
173
- const parsed = JSON.parse(stderrOutput[0]!);
174
- expect(parsed.type).toBe("validation_failed");
175
- expect(parsed.errors).toBeDefined();
176
- });
177
-
178
- test("mergeStart logs merge operation", () => {
179
- const logger = createDebugLogger(true);
180
- logger.mergeStart({
181
- mergeId: "merge-1",
182
- inputCount: 3,
183
- strategy: "parallel",
184
- });
185
- const parsed = JSON.parse(stderrOutput[0]!);
186
- expect(parsed.type).toBe("merge_start");
187
- expect(parsed.inputCount).toBe(3);
188
- });
189
-
190
- test("mergeComplete logs merge result", () => {
191
- const logger = createDebugLogger(true);
192
- logger.mergeComplete({ mergeId: "merge-1", success: true });
193
- const parsed = JSON.parse(stderrOutput[0]!);
194
- expect(parsed.type).toBe("merge_complete");
195
- expect(parsed.success).toBe(true);
196
- });
197
-
198
- test("dedupeStart logs deduplication start", () => {
199
- const logger = createDebugLogger(true);
200
- logger.dedupeStart({ dedupeId: "dedupe-1", itemCount: 10 });
201
- const parsed = JSON.parse(stderrOutput[0]!);
202
- expect(parsed.type).toBe("dedupe_start");
203
- expect(parsed.itemCount).toBe(10);
204
- });
205
-
206
- test("dedupeComplete logs deduplication result", () => {
207
- const logger = createDebugLogger(true);
208
- logger.dedupeComplete({
209
- dedupeId: "dedupe-1",
210
- duplicatesFound: 3,
211
- itemsRemoved: 3,
212
- });
213
- const parsed = JSON.parse(stderrOutput[0]!);
214
- expect(parsed.type).toBe("dedupe_complete");
215
- expect(parsed.duplicatesFound).toBe(3);
216
- });
217
-
218
- test("extractionComplete logs final result", () => {
219
- const logger = createDebugLogger(true);
220
- logger.extractionComplete({
221
- success: true,
222
- totalInputTokens: 100,
223
- totalOutputTokens: 50,
224
- totalTokens: 150,
225
- });
226
- const parsed = JSON.parse(stderrOutput[0]!);
227
- expect(parsed.type).toBe("extraction_complete");
228
- expect(parsed.success).toBe(true);
229
- });
230
-
231
- test("smartMergeField logs field merge operation", () => {
232
- const logger = createDebugLogger(true);
233
- logger.smartMergeField({
234
- mergeId: "merge-1",
235
- field: "items",
236
- operation: "merge_arrays",
237
- leftCount: 5,
238
- rightCount: 3,
239
- resultCount: 8,
240
- });
241
- const parsed = JSON.parse(stderrOutput[0]!);
242
- expect(parsed.type).toBe("smart_merge_field");
243
- expect(parsed.operation).toBe("merge_arrays");
244
- });