@struktur/sdk 2.1.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/dist/artifacts/fileToArtifact.d.ts +8 -0
  2. package/dist/artifacts/fileToArtifact.d.ts.map +1 -0
  3. package/dist/artifacts/input.d.ts +60 -0
  4. package/dist/artifacts/input.d.ts.map +1 -0
  5. package/{src/artifacts/providers.ts → dist/artifacts/providers.d.ts} +2 -4
  6. package/dist/artifacts/providers.d.ts.map +1 -0
  7. package/dist/artifacts/urlToArtifact.d.ts +3 -0
  8. package/dist/artifacts/urlToArtifact.d.ts.map +1 -0
  9. package/dist/auth/config.d.ts +34 -0
  10. package/dist/auth/config.d.ts.map +1 -0
  11. package/dist/auth/tokens.d.ts +18 -0
  12. package/dist/auth/tokens.d.ts.map +1 -0
  13. package/dist/chunking/ArtifactBatcher.d.ts +11 -0
  14. package/dist/chunking/ArtifactBatcher.d.ts.map +1 -0
  15. package/dist/chunking/ArtifactSplitter.d.ts +10 -0
  16. package/dist/chunking/ArtifactSplitter.d.ts.map +1 -0
  17. package/dist/debug/logger.d.ts +169 -0
  18. package/dist/debug/logger.d.ts.map +1 -0
  19. package/dist/extract.d.ts +3 -0
  20. package/dist/extract.d.ts.map +1 -0
  21. package/dist/fields.d.ts +75 -0
  22. package/dist/fields.d.ts.map +1 -0
  23. package/dist/index.d.ts +24 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +5603 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/llm/LLMClient.d.ts +40 -0
  28. package/dist/llm/LLMClient.d.ts.map +1 -0
  29. package/dist/llm/RetryingRunner.d.ts +37 -0
  30. package/dist/llm/RetryingRunner.d.ts.map +1 -0
  31. package/dist/llm/message.d.ts +12 -0
  32. package/dist/llm/message.d.ts.map +1 -0
  33. package/dist/llm/models.d.ts +13 -0
  34. package/dist/llm/models.d.ts.map +1 -0
  35. package/dist/llm/resolveModel.d.ts +3 -0
  36. package/dist/llm/resolveModel.d.ts.map +1 -0
  37. package/dist/merge/Deduplicator.d.ts +4 -0
  38. package/dist/merge/Deduplicator.d.ts.map +1 -0
  39. package/dist/merge/SmartDataMerger.d.ts +7 -0
  40. package/dist/merge/SmartDataMerger.d.ts.map +1 -0
  41. package/dist/parsers/collect.d.ts +7 -0
  42. package/dist/parsers/collect.d.ts.map +1 -0
  43. package/{src/parsers/index.ts → dist/parsers/index.d.ts} +1 -0
  44. package/dist/parsers/index.d.ts.map +1 -0
  45. package/dist/parsers/mime.d.ts +12 -0
  46. package/dist/parsers/mime.d.ts.map +1 -0
  47. package/dist/parsers/npm.d.ts +16 -0
  48. package/dist/parsers/npm.d.ts.map +1 -0
  49. package/dist/parsers/pdf.d.ts +36 -0
  50. package/dist/parsers/pdf.d.ts.map +1 -0
  51. package/dist/parsers/runner.d.ts +4 -0
  52. package/dist/parsers/runner.d.ts.map +1 -0
  53. package/dist/parsers/types.d.ts +27 -0
  54. package/dist/parsers/types.d.ts.map +1 -0
  55. package/dist/parsers.d.ts +1 -0
  56. package/dist/parsers.js +492 -0
  57. package/dist/parsers.js.map +1 -0
  58. package/dist/prompts/DeduplicationPrompt.d.ts +5 -0
  59. package/dist/prompts/DeduplicationPrompt.d.ts.map +1 -0
  60. package/dist/prompts/ExtractorPrompt.d.ts +6 -0
  61. package/dist/prompts/ExtractorPrompt.d.ts.map +1 -0
  62. package/dist/prompts/ParallelMergerPrompt.d.ts +5 -0
  63. package/dist/prompts/ParallelMergerPrompt.d.ts.map +1 -0
  64. package/dist/prompts/SequentialExtractorPrompt.d.ts +6 -0
  65. package/dist/prompts/SequentialExtractorPrompt.d.ts.map +1 -0
  66. package/dist/prompts/formatArtifacts.d.ts +3 -0
  67. package/dist/prompts/formatArtifacts.d.ts.map +1 -0
  68. package/dist/strategies/DoublePassAutoMergeStrategy.d.ts +23 -0
  69. package/dist/strategies/DoublePassAutoMergeStrategy.d.ts.map +1 -0
  70. package/dist/strategies/DoublePassStrategy.d.ts +22 -0
  71. package/dist/strategies/DoublePassStrategy.d.ts.map +1 -0
  72. package/dist/strategies/ParallelAutoMergeStrategy.d.ts +27 -0
  73. package/dist/strategies/ParallelAutoMergeStrategy.d.ts.map +1 -0
  74. package/dist/strategies/ParallelStrategy.d.ts +22 -0
  75. package/dist/strategies/ParallelStrategy.d.ts.map +1 -0
  76. package/dist/strategies/SequentialAutoMergeStrategy.d.ts +22 -0
  77. package/dist/strategies/SequentialAutoMergeStrategy.d.ts.map +1 -0
  78. package/dist/strategies/SequentialStrategy.d.ts +20 -0
  79. package/dist/strategies/SequentialStrategy.d.ts.map +1 -0
  80. package/dist/strategies/SimpleStrategy.d.ts +18 -0
  81. package/dist/strategies/SimpleStrategy.d.ts.map +1 -0
  82. package/dist/strategies/agent/AgentStrategy.d.ts +44 -0
  83. package/dist/strategies/agent/AgentStrategy.d.ts.map +1 -0
  84. package/dist/strategies/agent/AgentTools.d.ts +55 -0
  85. package/dist/strategies/agent/AgentTools.d.ts.map +1 -0
  86. package/dist/strategies/agent/ArtifactFilesystem.d.ts +51 -0
  87. package/dist/strategies/agent/ArtifactFilesystem.d.ts.map +1 -0
  88. package/dist/strategies/agent/index.d.ts +4 -0
  89. package/dist/strategies/agent/index.d.ts.map +1 -0
  90. package/dist/strategies/concurrency.d.ts +2 -0
  91. package/dist/strategies/concurrency.d.ts.map +1 -0
  92. package/{src/strategies/index.ts → dist/strategies/index.d.ts} +2 -0
  93. package/dist/strategies/index.d.ts.map +1 -0
  94. package/dist/strategies/utils.d.ts +39 -0
  95. package/dist/strategies/utils.d.ts.map +1 -0
  96. package/dist/strategies.d.ts +1 -0
  97. package/dist/strategies.js +3930 -0
  98. package/dist/strategies.js.map +1 -0
  99. package/dist/tokenization.d.ts +11 -0
  100. package/dist/tokenization.d.ts.map +1 -0
  101. package/dist/types.d.ts +178 -0
  102. package/dist/types.d.ts.map +1 -0
  103. package/dist/validation/validator.d.ts +20 -0
  104. package/dist/validation/validator.d.ts.map +1 -0
  105. package/package.json +30 -14
  106. package/src/agent-cli-integration.test.ts +0 -47
  107. package/src/agent-export.test.ts +0 -17
  108. package/src/agent-tool-labels.test.ts +0 -50
  109. package/src/artifacts/AGENTS.md +0 -16
  110. package/src/artifacts/fileToArtifact.test.ts +0 -37
  111. package/src/artifacts/fileToArtifact.ts +0 -44
  112. package/src/artifacts/input.test.ts +0 -243
  113. package/src/artifacts/input.ts +0 -360
  114. package/src/artifacts/providers.test.ts +0 -19
  115. package/src/artifacts/urlToArtifact.test.ts +0 -23
  116. package/src/artifacts/urlToArtifact.ts +0 -19
  117. package/src/auth/AGENTS.md +0 -11
  118. package/src/auth/config.test.ts +0 -132
  119. package/src/auth/config.ts +0 -186
  120. package/src/auth/tokens.test.ts +0 -58
  121. package/src/auth/tokens.ts +0 -229
  122. package/src/chunking/AGENTS.md +0 -11
  123. package/src/chunking/ArtifactBatcher.test.ts +0 -22
  124. package/src/chunking/ArtifactBatcher.ts +0 -110
  125. package/src/chunking/ArtifactSplitter.test.ts +0 -38
  126. package/src/chunking/ArtifactSplitter.ts +0 -151
  127. package/src/debug/AGENTS.md +0 -79
  128. package/src/debug/logger.test.ts +0 -244
  129. package/src/debug/logger.ts +0 -211
  130. package/src/extract.test.ts +0 -22
  131. package/src/extract.ts +0 -150
  132. package/src/fields.test.ts +0 -681
  133. package/src/fields.ts +0 -246
  134. package/src/index.test.ts +0 -20
  135. package/src/index.ts +0 -110
  136. package/src/llm/AGENTS.md +0 -9
  137. package/src/llm/LLMClient.test.ts +0 -394
  138. package/src/llm/LLMClient.ts +0 -264
  139. package/src/llm/RetryingRunner.test.ts +0 -174
  140. package/src/llm/RetryingRunner.ts +0 -270
  141. package/src/llm/message.test.ts +0 -42
  142. package/src/llm/message.ts +0 -47
  143. package/src/llm/models.test.ts +0 -82
  144. package/src/llm/models.ts +0 -190
  145. package/src/llm/resolveModel.ts +0 -86
  146. package/src/merge/AGENTS.md +0 -6
  147. package/src/merge/Deduplicator.test.ts +0 -108
  148. package/src/merge/Deduplicator.ts +0 -45
  149. package/src/merge/SmartDataMerger.test.ts +0 -177
  150. package/src/merge/SmartDataMerger.ts +0 -56
  151. package/src/parsers/AGENTS.md +0 -58
  152. package/src/parsers/collect.test.ts +0 -56
  153. package/src/parsers/collect.ts +0 -31
  154. package/src/parsers/mime.test.ts +0 -91
  155. package/src/parsers/mime.ts +0 -137
  156. package/src/parsers/npm.ts +0 -26
  157. package/src/parsers/pdf.test.ts +0 -394
  158. package/src/parsers/pdf.ts +0 -194
  159. package/src/parsers/runner.test.ts +0 -95
  160. package/src/parsers/runner.ts +0 -177
  161. package/src/parsers/types.ts +0 -29
  162. package/src/prompts/AGENTS.md +0 -8
  163. package/src/prompts/DeduplicationPrompt.test.ts +0 -41
  164. package/src/prompts/DeduplicationPrompt.ts +0 -37
  165. package/src/prompts/ExtractorPrompt.test.ts +0 -21
  166. package/src/prompts/ExtractorPrompt.ts +0 -72
  167. package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
  168. package/src/prompts/ParallelMergerPrompt.ts +0 -37
  169. package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
  170. package/src/prompts/SequentialExtractorPrompt.ts +0 -82
  171. package/src/prompts/formatArtifacts.test.ts +0 -39
  172. package/src/prompts/formatArtifacts.ts +0 -46
  173. package/src/strategies/AGENTS.md +0 -6
  174. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
  175. package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
  176. package/src/strategies/DoublePassStrategy.test.ts +0 -48
  177. package/src/strategies/DoublePassStrategy.ts +0 -266
  178. package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
  179. package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
  180. package/src/strategies/ParallelStrategy.test.ts +0 -61
  181. package/src/strategies/ParallelStrategy.ts +0 -208
  182. package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
  183. package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
  184. package/src/strategies/SequentialStrategy.test.ts +0 -53
  185. package/src/strategies/SequentialStrategy.ts +0 -142
  186. package/src/strategies/SimpleStrategy.test.ts +0 -46
  187. package/src/strategies/SimpleStrategy.ts +0 -94
  188. package/src/strategies/concurrency.test.ts +0 -16
  189. package/src/strategies/concurrency.ts +0 -14
  190. package/src/strategies/index.test.ts +0 -20
  191. package/src/strategies/utils.test.ts +0 -76
  192. package/src/strategies/utils.ts +0 -95
  193. package/src/tokenization.test.ts +0 -119
  194. package/src/tokenization.ts +0 -71
  195. package/src/types.test.ts +0 -25
  196. package/src/types.ts +0 -174
  197. package/src/validation/AGENTS.md +0 -7
  198. package/src/validation/validator.test.ts +0 -204
  199. package/src/validation/validator.ts +0 -90
  200. package/tsconfig.json +0 -22
@@ -1,174 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import type { JSONSchemaType } from "ajv";
3
- import { runWithRetries } from "./RetryingRunner";
4
-
5
- type Output = { title: string };
6
-
7
- const schema: JSONSchemaType<Output> = {
8
- type: "object",
9
- properties: { title: { type: "string" } },
10
- required: ["title"],
11
- additionalProperties: false,
12
- };
13
-
14
- test("runWithRetries emits onRetry event when retrying", async () => {
15
- let calls = 0;
16
- const retryEvents: Array<{ attempt: number; maxAttempts: number; reason?: string }> = [];
17
-
18
- const result = await runWithRetries<Output>({
19
- model: {},
20
- schema,
21
- system: "sys",
22
- user: "user",
23
- execute: async () => {
24
- calls += 1;
25
- if (calls === 1) {
26
- return {
27
- data: { title: 123 } as unknown as Output,
28
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
29
- };
30
- }
31
- return {
32
- data: { title: "ok" },
33
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
34
- };
35
- },
36
- events: {
37
- onRetry: (info) => {
38
- retryEvents.push(info);
39
- },
40
- },
41
- });
42
-
43
- expect(result.data.title).toBe("ok");
44
- expect(calls).toBe(2);
45
- expect(retryEvents).toHaveLength(1);
46
- expect(retryEvents[0]?.attempt).toBe(2);
47
- expect(retryEvents[0]?.maxAttempts).toBe(3);
48
- expect(retryEvents[0]?.reason).toBe("schema_validation_failed");
49
- });
50
-
51
- test("runWithRetries retries on validation error", async () => {
52
- let calls = 0;
53
- const result = await runWithRetries<Output>({
54
- model: {},
55
- schema,
56
- system: "sys",
57
- user: "user",
58
- execute: async () => {
59
- calls += 1;
60
- if (calls === 1) {
61
- return {
62
- data: { title: 123 } as unknown as Output,
63
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
64
- };
65
- }
66
- return {
67
- data: { title: "ok" },
68
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
69
- };
70
- },
71
- });
72
-
73
- expect(result.data.title).toBe("ok");
74
- expect(calls).toBe(2);
75
- });
76
-
77
- test("runWithRetries with strict=false retries on missing required fields until max attempts", async () => {
78
- let calls = 0;
79
-
80
- await expect(
81
- runWithRetries<Output>({
82
- model: {},
83
- schema,
84
- system: "sys",
85
- user: "user",
86
- strict: false,
87
- maxAttempts: 2,
88
- execute: async () => {
89
- calls += 1;
90
- return {
91
- data: {} as Output,
92
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
93
- };
94
- },
95
- })
96
- ).rejects.toThrow();
97
-
98
- expect(calls).toBe(2);
99
- });
100
-
101
- test("runWithRetries with strict=true validates required fields on every attempt", async () => {
102
- let calls = 0;
103
-
104
- await expect(
105
- runWithRetries<Output>({
106
- model: {},
107
- schema,
108
- system: "sys",
109
- user: "user",
110
- strict: true,
111
- maxAttempts: 2,
112
- execute: async () => {
113
- calls += 1;
114
- return {
115
- data: {} as Output,
116
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
117
- };
118
- },
119
- })
120
- ).rejects.toThrow();
121
-
122
- expect(calls).toBe(2);
123
- });
124
-
125
- test("runWithRetries with strict=false still validates type errors", async () => {
126
- let calls = 0;
127
- const result = await runWithRetries<Output>({
128
- model: {},
129
- schema,
130
- system: "sys",
131
- user: "user",
132
- strict: false,
133
- execute: async () => {
134
- calls += 1;
135
- if (calls === 1) {
136
- return {
137
- data: { title: 123 } as unknown as Output,
138
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
139
- };
140
- }
141
- return {
142
- data: { title: "ok" },
143
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
144
- };
145
- },
146
- });
147
-
148
- expect(result.data.title).toBe("ok");
149
- expect(calls).toBe(2);
150
- });
151
-
152
- test("runWithRetries enforces strict validation on final attempt even with strict=false", async () => {
153
- let calls = 0;
154
-
155
- await expect(
156
- runWithRetries<Output>({
157
- model: {},
158
- schema,
159
- system: "sys",
160
- user: "user",
161
- strict: false,
162
- maxAttempts: 2,
163
- execute: async () => {
164
- calls += 1;
165
- return {
166
- data: {} as Output,
167
- usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
168
- };
169
- },
170
- })
171
- ).rejects.toThrow();
172
-
173
- expect(calls).toBe(2);
174
- });
@@ -1,270 +0,0 @@
1
- import {
2
- createAjv,
3
- validateOrThrow,
4
- SchemaValidationError,
5
- validateAllowingMissingRequired,
6
- } from "../validation/validator";
7
- import type { ModelMessage } from "ai";
8
- import type { ExtractionEvents, Usage, TelemetryAdapter } from "../types";
9
- import type { DebugLogger } from "../debug/logger";
10
- import { generateStructured } from "./LLMClient";
11
- import type { UserContent } from "./message";
12
-
13
- export type RetryOptions<T> = {
14
- model: unknown;
15
- schema: unknown;
16
- system: string;
17
- user: UserContent;
18
- events?: ExtractionEvents;
19
- maxAttempts?: number;
20
- schemaName?: string;
21
- execute?: typeof generateStructured<T>;
22
- strict?: boolean;
23
- debug?: DebugLogger;
24
- callId?: string;
25
- /**
26
- * Telemetry adapter for tracing validation and retries
27
- */
28
- telemetry?: TelemetryAdapter;
29
- /**
30
- * Parent span for creating hierarchical traces
31
- */
32
- parentSpan?: { id: string; traceId: string; name: string; kind: string; startTime: number; parentId?: string };
33
- };
34
-
35
- export const runWithRetries = async <T>(options: RetryOptions<T>) => {
36
- const { telemetry, parentSpan } = options;
37
-
38
- // Start validation/retry span if telemetry is enabled
39
- const retrySpan = telemetry?.startSpan({
40
- name: "struktur.validation_retry",
41
- kind: "CHAIN",
42
- parentSpan,
43
- attributes: {
44
- "retry.max_attempts": options.maxAttempts ?? 3,
45
- "retry.schema_name": options.schemaName ?? "extract",
46
- },
47
- });
48
-
49
- const ajv = createAjv();
50
- const maxAttempts = options.maxAttempts ?? 3;
51
- const messages: ModelMessage[] = [{ role: "user", content: options.user }];
52
- const debug = options.debug;
53
- const callId =
54
- options.callId ??
55
- `call_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;
56
-
57
- let usage: Usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
58
- let lastError: Error | undefined;
59
-
60
- // Log LLM call start
61
- const systemLength = options.system.length;
62
- const userLength =
63
- typeof options.user === "string"
64
- ? options.user.length
65
- : JSON.stringify(options.user).length;
66
-
67
- debug?.llmCallStart({
68
- callId,
69
- model: JSON.stringify(options.model),
70
- schemaName: options.schemaName,
71
- systemLength,
72
- userLength,
73
- artifactCount: Array.isArray(options.user) ? options.user.length : 0,
74
- });
75
-
76
- debug?.promptSystem({ callId, system: options.system });
77
- debug?.promptUser({ callId, user: options.user });
78
-
79
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
80
- const executor = options.execute ?? generateStructured;
81
- const isFinalAttempt = attempt === maxAttempts;
82
- const useStrictValidation = options.strict === true || isFinalAttempt;
83
-
84
- debug?.validationStart({
85
- callId,
86
- attempt,
87
- maxAttempts,
88
- strict: useStrictValidation,
89
- });
90
-
91
- const startTime = Date.now();
92
- const result = await executor({
93
- model: options.model,
94
- schema: options.schema,
95
- schemaName: options.schemaName,
96
- system: options.system,
97
- user: options.user,
98
- messages,
99
- strict: options.strict,
100
- telemetry,
101
- parentSpan: retrySpan,
102
- });
103
- const durationMs = Date.now() - startTime;
104
-
105
- usage = {
106
- inputTokens: usage.inputTokens + result.usage.inputTokens,
107
- outputTokens: usage.outputTokens + result.usage.outputTokens,
108
- totalTokens: usage.totalTokens + result.usage.totalTokens,
109
- };
110
-
111
- debug?.rawResponse({ callId, response: result.data });
112
-
113
- try {
114
- if (useStrictValidation) {
115
- const validated = validateOrThrow<T>(
116
- ajv,
117
- options.schema as never,
118
- result.data,
119
- );
120
-
121
- debug?.validationSuccess({ callId, attempt });
122
- debug?.llmCallComplete({
123
- callId,
124
- success: true,
125
- inputTokens: usage.inputTokens,
126
- outputTokens: usage.outputTokens,
127
- totalTokens: usage.totalTokens,
128
- durationMs,
129
- });
130
-
131
- // Record successful validation
132
- if (retrySpan && telemetry) {
133
- telemetry.recordEvent(retrySpan, {
134
- type: "validation",
135
- attempt,
136
- maxAttempts,
137
- schema: options.schema,
138
- input: result.data,
139
- success: true,
140
- latencyMs: durationMs,
141
- });
142
- telemetry.endSpan(retrySpan, {
143
- status: "ok",
144
- output: validated,
145
- latencyMs: durationMs,
146
- });
147
- }
148
-
149
- return { data: validated, usage };
150
- } else {
151
- const validationResult = validateAllowingMissingRequired<T>(
152
- ajv,
153
- options.schema as never,
154
- result.data,
155
- isFinalAttempt,
156
- );
157
-
158
- if (validationResult.valid) {
159
- debug?.validationSuccess({ callId, attempt });
160
- debug?.llmCallComplete({
161
- callId,
162
- success: true,
163
- inputTokens: usage.inputTokens,
164
- outputTokens: usage.outputTokens,
165
- totalTokens: usage.totalTokens,
166
- durationMs,
167
- });
168
-
169
- // Record successful validation
170
- if (retrySpan && telemetry) {
171
- telemetry.recordEvent(retrySpan, {
172
- type: "validation",
173
- attempt,
174
- maxAttempts,
175
- schema: options.schema,
176
- input: result.data,
177
- success: true,
178
- latencyMs: durationMs,
179
- });
180
- telemetry.endSpan(retrySpan, {
181
- status: "ok",
182
- output: validationResult.data,
183
- latencyMs: durationMs,
184
- });
185
- }
186
-
187
- return { data: validationResult.data, usage };
188
- }
189
-
190
- throw new SchemaValidationError(
191
- "Schema validation failed",
192
- validationResult.errors,
193
- );
194
- }
195
- } catch (error) {
196
- lastError = error as Error;
197
-
198
- if (error instanceof SchemaValidationError) {
199
- debug?.validationFailed({
200
- callId,
201
- attempt,
202
- errors: error.errors,
203
- });
204
-
205
- // Record failed validation
206
- if (retrySpan && telemetry) {
207
- telemetry.recordEvent(retrySpan, {
208
- type: "validation",
209
- attempt,
210
- maxAttempts,
211
- schema: options.schema,
212
- input: result.data,
213
- success: false,
214
- errors: error.errors,
215
- latencyMs: durationMs,
216
- });
217
- }
218
-
219
- // Emit retry event before attempting retry
220
- const nextAttempt = attempt + 1;
221
- if (nextAttempt <= maxAttempts) {
222
- await options.events?.onRetry?.({
223
- attempt: nextAttempt,
224
- maxAttempts,
225
- reason: "schema_validation_failed",
226
- });
227
-
228
- debug?.retry({
229
- callId,
230
- attempt: nextAttempt,
231
- maxAttempts,
232
- reason: "schema_validation_failed",
233
- });
234
- }
235
-
236
- const errorPayload = JSON.stringify(error.errors, null, 2);
237
- const errorMessage = `<validation-errors>\n${errorPayload}\n</validation-errors>`;
238
- messages.push({ role: "user", content: errorMessage });
239
- await options.events?.onMessage?.({
240
- role: "user",
241
- content: errorMessage,
242
- });
243
- continue;
244
- }
245
-
246
- debug?.llmCallComplete({
247
- callId,
248
- success: false,
249
- inputTokens: usage.inputTokens,
250
- outputTokens: usage.outputTokens,
251
- totalTokens: usage.totalTokens,
252
- durationMs,
253
- error: (error as Error).message,
254
- });
255
-
256
- // Record error in telemetry
257
- if (retrySpan && telemetry) {
258
- telemetry.endSpan(retrySpan, {
259
- status: "error",
260
- error: error as Error,
261
- latencyMs: durationMs,
262
- });
263
- }
264
-
265
- break;
266
- }
267
- }
268
-
269
- throw lastError ?? new Error("Unknown extraction error");
270
- };
@@ -1,42 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import type { Artifact } from "../types";
3
- import { buildUserContent } from "./message";
4
-
5
- const makeArtifact = (contents: Artifact["contents"]): Artifact => ({
6
- id: "a1",
7
- type: "text",
8
- raw: async () => Buffer.from(""),
9
- contents,
10
- });
11
-
12
- test("buildUserContent returns text when no images", () => {
13
- const artifacts = [makeArtifact([{ text: "hello" }])];
14
- const content = buildUserContent("prompt", artifacts);
15
-
16
- expect(content).toBe("prompt");
17
- });
18
-
19
- test("buildUserContent appends images in order", () => {
20
- const artifacts: Artifact[] = [
21
- makeArtifact([
22
- { media: [{ type: "image", base64: "base" }] },
23
- { media: [{ type: "image", url: "https://example.com/img.png" }] },
24
- ]),
25
- {
26
- id: "a2",
27
- type: "image",
28
- raw: async () => Buffer.from(""),
29
- contents: [{ media: [{ type: "image", contents: Buffer.from([1]) }] }],
30
- },
31
- ];
32
-
33
- const content = buildUserContent("prompt", artifacts);
34
- expect(Array.isArray(content)).toBe(true);
35
-
36
- if (Array.isArray(content)) {
37
- expect(content[0]).toEqual({ type: "text", text: "prompt" });
38
- expect(content[1]).toEqual({ type: "image", image: "base" });
39
- expect(content[2]).toEqual({ type: "image", image: "https://example.com/img.png" });
40
- expect(content[3]).toEqual({ type: "image", image: Buffer.from([1]) });
41
- }
42
- });
@@ -1,47 +0,0 @@
1
- import type { Artifact } from "../types";
2
-
3
- export type ImagePart = {
4
- type: "image";
5
- image: string | Buffer;
6
- };
7
-
8
- export type TextPart = {
9
- type: "text";
10
- text: string;
11
- };
12
-
13
- export type UserContent = string | Array<TextPart | ImagePart>;
14
-
15
- const collectImages = (artifacts: Artifact[]): ImagePart[] => {
16
- const parts: ImagePart[] = [];
17
-
18
- for (const artifact of artifacts) {
19
- for (const content of artifact.contents) {
20
- if (!content.media?.length) {
21
- continue;
22
- }
23
-
24
- for (const media of content.media) {
25
- if (media.contents) {
26
- parts.push({ type: "image", image: media.contents });
27
- } else if (media.base64) {
28
- parts.push({ type: "image", image: media.base64 });
29
- } else if (media.url) {
30
- parts.push({ type: "image", image: media.url });
31
- }
32
- }
33
- }
34
- }
35
-
36
- return parts;
37
- };
38
-
39
- export const buildUserContent = (text: string, artifacts: Artifact[]): UserContent => {
40
- const images = collectImages(artifacts);
41
-
42
- if (images.length === 0) {
43
- return text;
44
- }
45
-
46
- return [{ type: "text", text }, ...images];
47
- };
@@ -1,82 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import { __testing__ } from "./models";
3
-
4
- test("parseOpenAiModels returns model ids", () => {
5
- const models = __testing__.parseOpenAiModels({
6
- object: "list",
7
- data: [{ id: "gpt-4o-mini" }, { id: "gpt-4o" }],
8
- });
9
-
10
- expect(models).toEqual(["gpt-4o-mini", "gpt-4o"]);
11
- });
12
-
13
- test("parseAnthropicModels returns model ids", () => {
14
- const models = __testing__.parseAnthropicModels({
15
- data: [{ id: "claude-3-5-sonnet-20241022" }],
16
- });
17
-
18
- expect(models).toEqual(["claude-3-5-sonnet-20241022"]);
19
- });
20
-
21
- test("parseGoogleModels strips models prefix", () => {
22
- const models = __testing__.parseGoogleModels({
23
- models: [{ name: "models/gemini-1.5-flash" }],
24
- });
25
-
26
- expect(models).toEqual(["gemini-1.5-flash"]);
27
- });
28
-
29
- test("parseOpenRouterModels returns model ids", () => {
30
- const models = __testing__.parseOpenRouterModels({
31
- data: [{ id: "openai/gpt-4o" }, { id: "anthropic/claude-3.5-sonnet" }],
32
- });
33
-
34
- expect(models).toEqual(["openai/gpt-4o", "anthropic/claude-3.5-sonnet"]);
35
- });
36
-
37
- test("parseOpenAiModels handles empty data", () => {
38
- const models = __testing__.parseOpenAiModels({});
39
- expect(models).toEqual([]);
40
- });
41
-
42
- test("parseOpenAiModels filters out undefined ids", () => {
43
- const models = __testing__.parseOpenAiModels({
44
- data: [{ id: "gpt-4" }, { notId: "bad" }],
45
- });
46
- expect(models).toEqual(["gpt-4"]);
47
- });
48
-
49
- test("parseGoogleModels handles empty models", () => {
50
- const models = __testing__.parseGoogleModels({});
51
- expect(models).toEqual([]);
52
- });
53
-
54
- test("pickCheapestModel prefers known cheap models", () => {
55
- const models = ["gpt-4o", "gpt-4o-mini"];
56
- expect(__testing__.pickCheapestModel("openai", models)).toBe("gpt-4o-mini");
57
- });
58
-
59
- test("pickCheapestModel returns first model if no preference matches", () => {
60
- const models = ["unknown-model-1", "unknown-model-2"];
61
- expect(__testing__.pickCheapestModel("openai", models)).toBe("unknown-model-1");
62
- });
63
-
64
- test("pickCheapestModel matches prefix for versioned models", () => {
65
- const models = ["gpt-4o-mini-2024-07-18", "gpt-4o-2024-05-13"];
66
- expect(__testing__.pickCheapestModel("openai", models)).toBe("gpt-4o-mini-2024-07-18");
67
- });
68
-
69
- test("pickCheapestModel handles anthropic preferences", () => {
70
- const models = ["claude-3-opus", "claude-3-5-haiku-20241022"];
71
- expect(__testing__.pickCheapestModel("anthropic", models)).toBe("claude-3-5-haiku-20241022");
72
- });
73
-
74
- test("pickCheapestModel handles google preferences", () => {
75
- const models = ["gemini-1.5-pro", "gemini-2.0-flash"];
76
- expect(__testing__.pickCheapestModel("google", models)).toBe("gemini-2.0-flash");
77
- });
78
-
79
- test("pickCheapestModel handles unknown provider", () => {
80
- const models = ["model-a", "model-b"];
81
- expect(__testing__.pickCheapestModel("unknown", models)).toBe("model-a");
82
- });