@struktur/sdk 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/index.js +4111 -0
  2. package/dist/index.js.map +1 -0
  3. package/dist/parsers.js +492 -0
  4. package/dist/parsers.js.map +1 -0
  5. package/dist/strategies.js +2435 -0
  6. package/dist/strategies.js.map +1 -0
  7. package/package.json +25 -13
  8. package/src/agent-cli-integration.test.ts +0 -47
  9. package/src/agent-export.test.ts +0 -17
  10. package/src/agent-tool-labels.test.ts +0 -50
  11. package/src/artifacts/AGENTS.md +0 -16
  12. package/src/artifacts/fileToArtifact.test.ts +0 -37
  13. package/src/artifacts/fileToArtifact.ts +0 -44
  14. package/src/artifacts/input.test.ts +0 -243
  15. package/src/artifacts/input.ts +0 -360
  16. package/src/artifacts/providers.test.ts +0 -19
  17. package/src/artifacts/providers.ts +0 -7
  18. package/src/artifacts/urlToArtifact.test.ts +0 -23
  19. package/src/artifacts/urlToArtifact.ts +0 -19
  20. package/src/auth/AGENTS.md +0 -11
  21. package/src/auth/config.test.ts +0 -132
  22. package/src/auth/config.ts +0 -186
  23. package/src/auth/tokens.test.ts +0 -58
  24. package/src/auth/tokens.ts +0 -229
  25. package/src/chunking/AGENTS.md +0 -11
  26. package/src/chunking/ArtifactBatcher.test.ts +0 -22
  27. package/src/chunking/ArtifactBatcher.ts +0 -110
  28. package/src/chunking/ArtifactSplitter.test.ts +0 -38
  29. package/src/chunking/ArtifactSplitter.ts +0 -151
  30. package/src/debug/AGENTS.md +0 -79
  31. package/src/debug/logger.test.ts +0 -244
  32. package/src/debug/logger.ts +0 -211
  33. package/src/extract.test.ts +0 -22
  34. package/src/extract.ts +0 -150
  35. package/src/fields.test.ts +0 -681
  36. package/src/fields.ts +0 -246
  37. package/src/index.test.ts +0 -20
  38. package/src/index.ts +0 -110
  39. package/src/llm/AGENTS.md +0 -9
  40. package/src/llm/LLMClient.test.ts +0 -394
  41. package/src/llm/LLMClient.ts +0 -264
  42. package/src/llm/RetryingRunner.test.ts +0 -174
  43. package/src/llm/RetryingRunner.ts +0 -270
  44. package/src/llm/message.test.ts +0 -42
  45. package/src/llm/message.ts +0 -47
  46. package/src/llm/models.test.ts +0 -82
  47. package/src/llm/models.ts +0 -190
  48. package/src/llm/resolveModel.ts +0 -86
  49. package/src/merge/AGENTS.md +0 -6
  50. package/src/merge/Deduplicator.test.ts +0 -108
  51. package/src/merge/Deduplicator.ts +0 -45
  52. package/src/merge/SmartDataMerger.test.ts +0 -177
  53. package/src/merge/SmartDataMerger.ts +0 -56
  54. package/src/parsers/AGENTS.md +0 -58
  55. package/src/parsers/collect.test.ts +0 -56
  56. package/src/parsers/collect.ts +0 -31
  57. package/src/parsers/index.ts +0 -6
  58. package/src/parsers/mime.test.ts +0 -91
  59. package/src/parsers/mime.ts +0 -137
  60. package/src/parsers/npm.ts +0 -26
  61. package/src/parsers/pdf.test.ts +0 -394
  62. package/src/parsers/pdf.ts +0 -194
  63. package/src/parsers/runner.test.ts +0 -95
  64. package/src/parsers/runner.ts +0 -177
  65. package/src/parsers/types.ts +0 -29
  66. package/src/prompts/AGENTS.md +0 -8
  67. package/src/prompts/DeduplicationPrompt.test.ts +0 -41
  68. package/src/prompts/DeduplicationPrompt.ts +0 -37
  69. package/src/prompts/ExtractorPrompt.test.ts +0 -21
  70. package/src/prompts/ExtractorPrompt.ts +0 -72
  71. package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
  72. package/src/prompts/ParallelMergerPrompt.ts +0 -37
  73. package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
  74. package/src/prompts/SequentialExtractorPrompt.ts +0 -82
  75. package/src/prompts/formatArtifacts.test.ts +0 -39
  76. package/src/prompts/formatArtifacts.ts +0 -46
  77. package/src/strategies/AGENTS.md +0 -6
  78. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
  79. package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
  80. package/src/strategies/DoublePassStrategy.test.ts +0 -48
  81. package/src/strategies/DoublePassStrategy.ts +0 -266
  82. package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
  83. package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
  84. package/src/strategies/ParallelStrategy.test.ts +0 -61
  85. package/src/strategies/ParallelStrategy.ts +0 -208
  86. package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
  87. package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
  88. package/src/strategies/SequentialStrategy.test.ts +0 -53
  89. package/src/strategies/SequentialStrategy.ts +0 -142
  90. package/src/strategies/SimpleStrategy.test.ts +0 -46
  91. package/src/strategies/SimpleStrategy.ts +0 -94
  92. package/src/strategies/concurrency.test.ts +0 -16
  93. package/src/strategies/concurrency.ts +0 -14
  94. package/src/strategies/index.test.ts +0 -20
  95. package/src/strategies/index.ts +0 -7
  96. package/src/strategies/utils.test.ts +0 -76
  97. package/src/strategies/utils.ts +0 -95
  98. package/src/tokenization.test.ts +0 -119
  99. package/src/tokenization.ts +0 -71
  100. package/src/types.test.ts +0 -25
  101. package/src/types.ts +0 -174
  102. package/src/validation/AGENTS.md +0 -7
  103. package/src/validation/validator.test.ts +0 -204
  104. package/src/validation/validator.ts +0 -90
  105. package/tsconfig.json +0 -22
package/src/fields.ts DELETED
@@ -1,246 +0,0 @@
1
- /**
2
- * fields.ts — Shorthand schema builder.
3
- *
4
- * Parses a comma-separated fields string into a minimal JSON Schema object.
5
- * Supported type expressions:
6
- *
7
- * string (default) title
8
- * number / float price:number or price:float
9
- * boolean / bool active:boolean or active:bool
10
- * integer count:integer
11
- * int count:int (integer + multipleOf:1 to disallow fractions)
12
- * enum status:enum{draft|published|archived}
13
- * array of scalar tags:array{string}
14
- * array (shorthand) tags:array (defaults to array{string})
15
- *
16
- * Aliases:
17
- * bool → boolean
18
- * float → number
19
- * int → integer (with multipleOf: 1)
20
- *
21
- * Examples:
22
- * parseFieldsString("title, description")
23
- * parseFieldsString("title, price:number")
24
- * parseFieldsString("title , price: number , active:boolean")
25
- * parseFieldsString("name, status:enum{draft|published}")
26
- * parseFieldsString("name, tags:array{string}")
27
- * parseFieldsString("name, tags:array")
28
- * parseFieldsString("count:int, ratio:float, enabled:bool")
29
- */
30
-
31
- import type { AnyJSONSchema } from "./types";
32
-
33
- // ---------------------------------------------------------------------------
34
- // Types
35
- // ---------------------------------------------------------------------------
36
-
37
- export type ScalarFieldType = "string" | "number" | "boolean" | "integer" | "int";
38
-
39
- export type ParsedField =
40
- | { name: string; kind: "scalar"; type: ScalarFieldType }
41
- | { name: string; kind: "enum"; values: string[] }
42
- | { name: string; kind: "array"; items: ScalarFieldType };
43
-
44
- /** Legacy alias kept for backwards compatibility */
45
- export type FieldType = ScalarFieldType;
46
-
47
- // ---------------------------------------------------------------------------
48
- // Constants
49
- // ---------------------------------------------------------------------------
50
-
51
- const SCALAR_TYPES: ReadonlySet<string> = new Set([
52
- "string",
53
- "number",
54
- "boolean",
55
- "integer",
56
- "int",
57
- ]);
58
-
59
- /** Maps alias → canonical type accepted by this parser. */
60
- const SCALAR_ALIASES: Readonly<Record<string, ScalarFieldType>> = {
61
- bool: "boolean",
62
- float: "number",
63
- // Note: "int" stays as "int" (not aliased to "integer") so the schema
64
- // builder can emit the extra multipleOf:1 constraint.
65
- };
66
-
67
- // ---------------------------------------------------------------------------
68
- // Internal parser helpers
69
- // ---------------------------------------------------------------------------
70
-
71
- /**
72
- * Extract the content inside `prefix{...}` from a raw type string.
73
- * Returns `null` if the pattern doesn't match.
74
- */
75
- const extractBraces = (
76
- rawType: string,
77
- prefix: string,
78
- ): string | null => {
79
- if (!rawType.startsWith(prefix + "{") || !rawType.endsWith("}")) {
80
- return null;
81
- }
82
- return rawType.slice(prefix.length + 1, -1);
83
- };
84
-
85
- const parseScalarType = (raw: string, fieldName: string): ScalarFieldType => {
86
- // Resolve aliases first.
87
- const resolved: string = SCALAR_ALIASES[raw] ?? raw;
88
- if (!SCALAR_TYPES.has(resolved)) {
89
- const allNames = [...Object.keys(SCALAR_ALIASES), ...SCALAR_TYPES].sort();
90
- throw new Error(
91
- `Unknown type "${raw}" for field "${fieldName}". ` +
92
- `Scalar types: ${allNames.join(", ")}. ` +
93
- `Complex types: enum{a|b|c}, array{string}, or array (shorthand for array{string}).`,
94
- );
95
- }
96
- return resolved as ScalarFieldType;
97
- };
98
-
99
- /**
100
- * Parse a single `name` or `name:type` token into a ParsedField.
101
- * Trims whitespace from name and type expression.
102
- */
103
- const parseField = (token: string): ParsedField => {
104
- const colonIndex = token.indexOf(":");
105
-
106
- if (colonIndex === -1) {
107
- const name = token.trim();
108
- if (!name) throw new Error("Empty field name in fields string.");
109
- return { name, kind: "scalar", type: "string" };
110
- }
111
-
112
- const name = token.slice(0, colonIndex).trim();
113
- const rawType = token.slice(colonIndex + 1).trim();
114
-
115
- if (!name) {
116
- throw new Error(`Empty field name before colon in token: "${token}".`);
117
- }
118
- if (!rawType) {
119
- throw new Error(
120
- `Empty type after colon for field "${name}". ` +
121
- `Omit the colon or specify a type.`,
122
- );
123
- }
124
-
125
- // enum{a|b|c}
126
- const enumContent = extractBraces(rawType, "enum");
127
- if (enumContent !== null) {
128
- const values = enumContent.split("|").map((v) => v.trim()).filter(Boolean);
129
- if (values.length < 2) {
130
- throw new Error(
131
- `enum for field "${name}" must have at least two values separated by "|", got: "${enumContent}".`,
132
- );
133
- }
134
- return { name, kind: "enum", values };
135
- }
136
-
137
- // array{itemType}
138
- const arrayContent = extractBraces(rawType, "array");
139
- if (arrayContent !== null) {
140
- const itemType = arrayContent.trim();
141
- if (!itemType) {
142
- throw new Error(
143
- `array for field "${name}" requires an item type, e.g. array{string}.`,
144
- );
145
- }
146
- return { name, kind: "array", items: parseScalarType(itemType, name) };
147
- }
148
-
149
- // array (shorthand for array{string})
150
- if (rawType === "array") {
151
- return { name, kind: "array", items: "string" };
152
- }
153
-
154
- // plain scalar
155
- return { name, kind: "scalar", type: parseScalarType(rawType, name) };
156
- };
157
-
158
- // ---------------------------------------------------------------------------
159
- // Public API
160
- // ---------------------------------------------------------------------------
161
-
162
- /**
163
- * Parse a comma-separated fields string into an array of ParsedField entries.
164
- *
165
- * @example
166
- * parseFieldsString("title, price:number")
167
- * // => [{ name: "title", kind: "scalar", type: "string" }, { name: "price", kind: "scalar", type: "number" }]
168
- *
169
- * parseFieldsString("status:enum{draft|published}")
170
- * // => [{ name: "status", kind: "enum", values: ["draft", "published"] }]
171
- *
172
- * parseFieldsString("tags:array{string}")
173
- * // => [{ name: "tags", kind: "array", items: "string" }]
174
- */
175
- export const parseFieldsString = (fields: string): ParsedField[] => {
176
- if (!fields.trim()) {
177
- throw new Error("Fields string must not be empty.");
178
- }
179
-
180
- // Split on commas that are NOT inside braces so enum{a|b,c} would still
181
- // work if someone added commas — but per spec values use |, so a simple
182
- // brace-depth split is sufficient and keeps things robust.
183
- const tokens: string[] = [];
184
- let depth = 0;
185
- let current = "";
186
- for (const ch of fields) {
187
- if (ch === "{") { depth++; current += ch; }
188
- else if (ch === "}") { depth--; current += ch; }
189
- else if (ch === "," && depth === 0) { tokens.push(current); current = ""; }
190
- else { current += ch; }
191
- }
192
- if (current) tokens.push(current);
193
-
194
- if (depth !== 0) {
195
- throw new Error("Unmatched braces in fields string.");
196
- }
197
-
198
- return tokens.map((token) => parseField(token));
199
- };
200
-
201
- /**
202
- * Build a minimal JSON Schema `object` from a parsed fields array.
203
- * All fields are required; additionalProperties is false.
204
- */
205
- export const buildSchemaFromParsedFields = (
206
- fields: ParsedField[],
207
- ): AnyJSONSchema => {
208
- if (fields.length === 0) {
209
- throw new Error("Cannot build a schema from an empty fields list.");
210
- }
211
-
212
- const properties: Record<string, unknown> = {};
213
- const required: string[] = [];
214
-
215
- for (const field of fields) {
216
- if (field.kind === "scalar") {
217
- properties[field.name] = field.type === "int"
218
- ? { type: "integer", multipleOf: 1 }
219
- : { type: field.type };
220
- } else if (field.kind === "enum") {
221
- properties[field.name] = { type: "string", enum: field.values };
222
- } else {
223
- // array
224
- properties[field.name] = { type: "array", items: field.items === "int" ? { type: "integer", multipleOf: 1 } : { type: field.items } };
225
- }
226
- required.push(field.name);
227
- }
228
-
229
- return {
230
- type: "object",
231
- properties,
232
- required,
233
- additionalProperties: false,
234
- };
235
- };
236
-
237
- /**
238
- * Convenience: parse a fields string and immediately build a JSON Schema.
239
- *
240
- * @example
241
- * buildSchemaFromFields("title, price:number")
242
- * buildSchemaFromFields("status:enum{draft|published|archived}")
243
- * buildSchemaFromFields("tags:array{string}")
244
- */
245
- export const buildSchemaFromFields = (fields: string): AnyJSONSchema =>
246
- buildSchemaFromParsedFields(parseFieldsString(fields));
package/src/index.test.ts DELETED
@@ -1,20 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import * as api from "./index";
3
-
4
- test("index re-exports main API", () => {
5
- expect(typeof api.extract).toBe("function");
6
- expect(typeof api.urlToArtifact).toBe("function");
7
- expect(typeof api.fileToArtifact).toBe("function");
8
- expect(typeof api.defaultArtifactProviders).toBe("object");
9
- expect(typeof api.registerArtifactInputParser).toBe("function");
10
- expect(typeof api.clearArtifactInputParsers).toBe("function");
11
- expect(typeof api.validateSerializedArtifacts).toBe("function");
12
- expect(typeof api.parseSerializedArtifacts).toBe("function");
13
- expect(typeof api.hydrateSerializedArtifacts).toBe("function");
14
- expect(typeof api.parse).toBe("function");
15
- expect(typeof api.splitTextIntoContents).toBe("function");
16
-
17
- expect(typeof api.simple).toBe("function");
18
- expect(typeof api.parallel).toBe("function");
19
- expect(typeof api.sequential).toBe("function");
20
- });
package/src/index.ts DELETED
@@ -1,110 +0,0 @@
1
- export type {
2
- Artifact,
3
- ArtifactContent,
4
- ArtifactImage,
5
- ArtifactType,
6
- ExtractionEvents,
7
- ExtractionOptions,
8
- ExtractionResult,
9
- ExtractionStrategy,
10
- Usage,
11
- AnyJSONSchema,
12
- TypedJSONSchema,
13
- // Agent event types
14
- AgentEvents,
15
- AgentToolStartInfo,
16
- AgentToolEndInfo,
17
- AgentMessageInfo,
18
- AgentReasoningInfo,
19
- // Telemetry
20
- TelemetryAdapter,
21
- } from "./types";
22
-
23
- export { extract } from "./extract";
24
- export {
25
- parseFieldsString,
26
- buildSchemaFromParsedFields,
27
- buildSchemaFromFields,
28
- } from "./fields";
29
- export type { ParsedField, FieldType } from "./fields";
30
-
31
- export type {
32
- ArtifactInput,
33
- ArtifactInputParser,
34
- SerializedArtifact,
35
- SerializedArtifactContent,
36
- SerializedArtifactImage,
37
- SerializedArtifacts,
38
- } from "./artifacts/input";
39
-
40
- export { urlToArtifact } from "./artifacts/urlToArtifact";
41
- export { fileToArtifact } from "./artifacts/fileToArtifact";
42
- export type { ArtifactProvider, ArtifactProviders } from "./artifacts/providers";
43
- export { defaultArtifactProviders } from "./artifacts/providers";
44
- export {
45
- registerArtifactInputParser,
46
- clearArtifactInputParsers,
47
- validateSerializedArtifacts,
48
- parseSerializedArtifacts,
49
- hydrateSerializedArtifacts,
50
- parse,
51
- splitTextIntoContents,
52
- } from "./artifacts/input";
53
-
54
- export * from "./strategies";
55
-
56
- // Agent strategy (re-exported from @struktur/agent-strategy)
57
- export { AgentStrategy, agent, type AgentStrategyConfig } from "@struktur/agent-strategy";
58
-
59
- // Parsers public API
60
- export { collectStream } from "./parsers/collect";
61
- export type { ParserDef, ParsersConfig, InlineParserDef, NpmParserDef } from "./parsers/types";
62
- export { detectMimeType } from "./parsers/mime";
63
- export type { NpmParserEntry } from "./parsers/mime";
64
- export { runParser } from "./parsers/runner";
65
- export { parsePdf } from "./parsers/pdf";
66
-
67
- // Debug
68
- export { createDebugLogger } from "./debug/logger";
69
-
70
- // LLM models
71
- export {
72
- listAllProviderModels,
73
- listProviderModels,
74
- resolveCheapestModel,
75
- } from "./llm/models";
76
- export { resolveModel } from "./llm/resolveModel";
77
-
78
- // Validation
79
- export { SchemaValidationError } from "./validation/validator";
80
-
81
- // Auth (for CLI and SDK users who want to manage tokens)
82
- export {
83
- getDefaultModel,
84
- setDefaultModel,
85
- listAliases,
86
- getAlias,
87
- setAlias,
88
- deleteAlias,
89
- resolveAlias,
90
- listParsers,
91
- getParser,
92
- setParser,
93
- deleteParser,
94
- getTelemetryConfig,
95
- setTelemetryConfig,
96
- enableTelemetry,
97
- disableTelemetry,
98
- deleteTelemetryConfig,
99
- } from "./auth/config";
100
- export {
101
- listStoredProviders,
102
- setProviderToken,
103
- deleteProviderToken,
104
- resolveProviderToken,
105
- getProviderTokenOrThrow,
106
- resolveProviderEnvVar,
107
- maskToken,
108
- type TokenStorageType,
109
- type TokenEntry,
110
- } from "./auth/tokens";
package/src/llm/AGENTS.md DELETED
@@ -1,9 +0,0 @@
1
- # LLM module
2
-
3
- - Purpose: wrap Vercel AI SDK calls, build multimodal messages, run validation retries, and query provider model lists/defaults.
4
- - Key files: `LLMClient.ts`, `RetryingRunner.ts`, `message.ts`, `models.ts`.
5
- - Design: `generateStructured` centralizes AI SDK usage; retry loop feeds validation errors back to the model.
6
- - Retry events: `runWithRetries` emits `onRetry` events with `{ attempt, maxAttempts, reason }` so the CLI can show retry progress (e.g. "Extracting data (retry 2/3)...").
7
- - Supported providers: openai, anthropic, google, opencode (Zen), openrouter.
8
- - OpenRouter provider routing: When a model has an `__openrouter_provider` property attached (set via hashtag syntax in the model string), `generateStructured` passes it as `providerOptions.openrouter.provider.order` to route requests to the specified provider.
9
- - Tests: `RetryingRunner.test.ts`, `models.test.ts`.