@struktur/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +79 -0
  2. package/package.json +33 -0
  3. package/src/artifacts/AGENTS.md +16 -0
  4. package/src/artifacts/fileToArtifact.test.ts +37 -0
  5. package/src/artifacts/fileToArtifact.ts +44 -0
  6. package/src/artifacts/input.test.ts +243 -0
  7. package/src/artifacts/input.ts +360 -0
  8. package/src/artifacts/providers.test.ts +19 -0
  9. package/src/artifacts/providers.ts +7 -0
  10. package/src/artifacts/urlToArtifact.test.ts +23 -0
  11. package/src/artifacts/urlToArtifact.ts +19 -0
  12. package/src/auth/AGENTS.md +11 -0
  13. package/src/auth/config.test.ts +132 -0
  14. package/src/auth/config.ts +129 -0
  15. package/src/auth/tokens.test.ts +58 -0
  16. package/src/auth/tokens.ts +229 -0
  17. package/src/chunking/AGENTS.md +11 -0
  18. package/src/chunking/ArtifactBatcher.test.ts +22 -0
  19. package/src/chunking/ArtifactBatcher.ts +110 -0
  20. package/src/chunking/ArtifactSplitter.test.ts +38 -0
  21. package/src/chunking/ArtifactSplitter.ts +151 -0
  22. package/src/debug/AGENTS.md +79 -0
  23. package/src/debug/logger.test.ts +244 -0
  24. package/src/debug/logger.ts +211 -0
  25. package/src/extract.test.ts +22 -0
  26. package/src/extract.ts +114 -0
  27. package/src/fields.test.ts +663 -0
  28. package/src/fields.ts +239 -0
  29. package/src/index.test.ts +20 -0
  30. package/src/index.ts +93 -0
  31. package/src/llm/AGENTS.md +9 -0
  32. package/src/llm/LLMClient.test.ts +196 -0
  33. package/src/llm/LLMClient.ts +106 -0
  34. package/src/llm/RetryingRunner.test.ts +174 -0
  35. package/src/llm/RetryingRunner.ts +188 -0
  36. package/src/llm/message.test.ts +42 -0
  37. package/src/llm/message.ts +47 -0
  38. package/src/llm/models.test.ts +82 -0
  39. package/src/llm/models.ts +190 -0
  40. package/src/merge/AGENTS.md +6 -0
  41. package/src/merge/Deduplicator.test.ts +108 -0
  42. package/src/merge/Deduplicator.ts +45 -0
  43. package/src/merge/SmartDataMerger.test.ts +177 -0
  44. package/src/merge/SmartDataMerger.ts +56 -0
  45. package/src/parsers/AGENTS.md +58 -0
  46. package/src/parsers/collect.test.ts +56 -0
  47. package/src/parsers/collect.ts +31 -0
  48. package/src/parsers/index.ts +6 -0
  49. package/src/parsers/mime.test.ts +91 -0
  50. package/src/parsers/mime.ts +137 -0
  51. package/src/parsers/npm.ts +26 -0
  52. package/src/parsers/pdf.test.ts +394 -0
  53. package/src/parsers/pdf.ts +194 -0
  54. package/src/parsers/runner.test.ts +95 -0
  55. package/src/parsers/runner.ts +177 -0
  56. package/src/parsers/types.ts +29 -0
  57. package/src/prompts/AGENTS.md +8 -0
  58. package/src/prompts/DeduplicationPrompt.test.ts +41 -0
  59. package/src/prompts/DeduplicationPrompt.ts +37 -0
  60. package/src/prompts/ExtractorPrompt.test.ts +21 -0
  61. package/src/prompts/ExtractorPrompt.ts +72 -0
  62. package/src/prompts/ParallelMergerPrompt.test.ts +8 -0
  63. package/src/prompts/ParallelMergerPrompt.ts +37 -0
  64. package/src/prompts/SequentialExtractorPrompt.test.ts +24 -0
  65. package/src/prompts/SequentialExtractorPrompt.ts +82 -0
  66. package/src/prompts/formatArtifacts.test.ts +39 -0
  67. package/src/prompts/formatArtifacts.ts +46 -0
  68. package/src/strategies/AGENTS.md +6 -0
  69. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +53 -0
  70. package/src/strategies/DoublePassAutoMergeStrategy.ts +270 -0
  71. package/src/strategies/DoublePassStrategy.test.ts +48 -0
  72. package/src/strategies/DoublePassStrategy.ts +179 -0
  73. package/src/strategies/ParallelAutoMergeStrategy.test.ts +152 -0
  74. package/src/strategies/ParallelAutoMergeStrategy.ts +241 -0
  75. package/src/strategies/ParallelStrategy.test.ts +61 -0
  76. package/src/strategies/ParallelStrategy.ts +157 -0
  77. package/src/strategies/SequentialAutoMergeStrategy.test.ts +66 -0
  78. package/src/strategies/SequentialAutoMergeStrategy.ts +222 -0
  79. package/src/strategies/SequentialStrategy.test.ts +53 -0
  80. package/src/strategies/SequentialStrategy.ts +119 -0
  81. package/src/strategies/SimpleStrategy.test.ts +46 -0
  82. package/src/strategies/SimpleStrategy.ts +74 -0
  83. package/src/strategies/concurrency.test.ts +16 -0
  84. package/src/strategies/concurrency.ts +14 -0
  85. package/src/strategies/index.test.ts +20 -0
  86. package/src/strategies/index.ts +7 -0
  87. package/src/strategies/utils.test.ts +76 -0
  88. package/src/strategies/utils.ts +56 -0
  89. package/src/tokenization.test.ts +119 -0
  90. package/src/tokenization.ts +71 -0
  91. package/src/types.test.ts +25 -0
  92. package/src/types.ts +116 -0
  93. package/src/validation/AGENTS.md +6 -0
  94. package/src/validation/validator.test.ts +172 -0
  95. package/src/validation/validator.ts +82 -0
  96. package/tsconfig.json +22 -0
package/src/fields.ts ADDED
@@ -0,0 +1,239 @@
1
+ /**
2
+ * fields.ts — Shorthand schema builder.
3
+ *
4
+ * Parses a comma-separated fields string into a minimal JSON Schema object.
5
+ * Supported type expressions:
6
+ *
7
+ * string (default) title
8
+ * number / float price:number or price:float
9
+ * boolean / bool active:boolean or active:bool
10
+ * integer count:integer
11
+ * int count:int (integer + multipleOf:1 to disallow fractions)
12
+ * enum status:enum{draft|published|archived}
13
+ * array of scalar tags:array{string}
14
+ *
15
+ * Aliases:
16
+ * bool → boolean
17
+ * float → number
18
+ * int → integer (with multipleOf: 1)
19
+ *
20
+ * Examples:
21
+ * parseFieldsString("title, description")
22
+ * parseFieldsString("title, price:number")
23
+ * parseFieldsString("title , price: number , active:boolean")
24
+ * parseFieldsString("name, status:enum{draft|published}")
25
+ * parseFieldsString("name, tags:array{string}")
26
+ * parseFieldsString("count:int, ratio:float, enabled:bool")
27
+ */
28
+
29
+ import type { AnyJSONSchema } from "./types";
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Types
33
+ // ---------------------------------------------------------------------------
34
+
35
+ export type ScalarFieldType = "string" | "number" | "boolean" | "integer" | "int";
36
+
37
+ export type ParsedField =
38
+ | { name: string; kind: "scalar"; type: ScalarFieldType }
39
+ | { name: string; kind: "enum"; values: string[] }
40
+ | { name: string; kind: "array"; items: ScalarFieldType };
41
+
42
+ /** Legacy alias kept for backwards compatibility */
43
+ export type FieldType = ScalarFieldType;
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // Constants
47
+ // ---------------------------------------------------------------------------
48
+
49
+ const SCALAR_TYPES: ReadonlySet<string> = new Set([
50
+ "string",
51
+ "number",
52
+ "boolean",
53
+ "integer",
54
+ "int",
55
+ ]);
56
+
57
+ /** Maps alias → canonical type accepted by this parser. */
58
+ const SCALAR_ALIASES: Readonly<Record<string, ScalarFieldType>> = {
59
+ bool: "boolean",
60
+ float: "number",
61
+ // Note: "int" stays as "int" (not aliased to "integer") so the schema
62
+ // builder can emit the extra multipleOf:1 constraint.
63
+ };
64
+
65
+ // ---------------------------------------------------------------------------
66
+ // Internal parser helpers
67
+ // ---------------------------------------------------------------------------
68
+
69
+ /**
70
+ * Extract the content inside `prefix{...}` from a raw type string.
71
+ * Returns `null` if the pattern doesn't match.
72
+ */
73
+ const extractBraces = (
74
+ rawType: string,
75
+ prefix: string,
76
+ ): string | null => {
77
+ if (!rawType.startsWith(prefix + "{") || !rawType.endsWith("}")) {
78
+ return null;
79
+ }
80
+ return rawType.slice(prefix.length + 1, -1);
81
+ };
82
+
83
+ const parseScalarType = (raw: string, fieldName: string): ScalarFieldType => {
84
+ // Resolve aliases first.
85
+ const resolved: string = SCALAR_ALIASES[raw] ?? raw;
86
+ if (!SCALAR_TYPES.has(resolved)) {
87
+ const allNames = [...Object.keys(SCALAR_ALIASES), ...SCALAR_TYPES].sort();
88
+ throw new Error(
89
+ `Unknown type "${raw}" for field "${fieldName}". ` +
90
+ `Scalar types: ${allNames.join(", ")}. ` +
91
+ `Complex types: enum{a|b|c}, array{string}.`,
92
+ );
93
+ }
94
+ return resolved as ScalarFieldType;
95
+ };
96
+
97
+ /**
98
+ * Parse a single `name` or `name:type` token into a ParsedField.
99
+ * Trims whitespace from name and type expression.
100
+ */
101
+ const parseField = (token: string): ParsedField => {
102
+ const colonIndex = token.indexOf(":");
103
+
104
+ if (colonIndex === -1) {
105
+ const name = token.trim();
106
+ if (!name) throw new Error("Empty field name in fields string.");
107
+ return { name, kind: "scalar", type: "string" };
108
+ }
109
+
110
+ const name = token.slice(0, colonIndex).trim();
111
+ const rawType = token.slice(colonIndex + 1).trim();
112
+
113
+ if (!name) {
114
+ throw new Error(`Empty field name before colon in token: "${token}".`);
115
+ }
116
+ if (!rawType) {
117
+ throw new Error(
118
+ `Empty type after colon for field "${name}". ` +
119
+ `Omit the colon or specify a type.`,
120
+ );
121
+ }
122
+
123
+ // enum{a|b|c}
124
+ const enumContent = extractBraces(rawType, "enum");
125
+ if (enumContent !== null) {
126
+ const values = enumContent.split("|").map((v) => v.trim()).filter(Boolean);
127
+ if (values.length < 2) {
128
+ throw new Error(
129
+ `enum for field "${name}" must have at least two values separated by "|", got: "${enumContent}".`,
130
+ );
131
+ }
132
+ return { name, kind: "enum", values };
133
+ }
134
+
135
+ // array{itemType}
136
+ const arrayContent = extractBraces(rawType, "array");
137
+ if (arrayContent !== null) {
138
+ const itemType = arrayContent.trim();
139
+ if (!itemType) {
140
+ throw new Error(
141
+ `array for field "${name}" requires an item type, e.g. array{string}.`,
142
+ );
143
+ }
144
+ return { name, kind: "array", items: parseScalarType(itemType, name) };
145
+ }
146
+
147
+ // plain scalar
148
+ return { name, kind: "scalar", type: parseScalarType(rawType, name) };
149
+ };
150
+
151
+ // ---------------------------------------------------------------------------
152
+ // Public API
153
+ // ---------------------------------------------------------------------------
154
+
155
+ /**
156
+ * Parse a comma-separated fields string into an array of ParsedField entries.
157
+ *
158
+ * @example
159
+ * parseFieldsString("title, price:number")
160
+ * // => [{ name: "title", kind: "scalar", type: "string" }, { name: "price", kind: "scalar", type: "number" }]
161
+ *
162
+ * parseFieldsString("status:enum{draft|published}")
163
+ * // => [{ name: "status", kind: "enum", values: ["draft", "published"] }]
164
+ *
165
+ * parseFieldsString("tags:array{string}")
166
+ * // => [{ name: "tags", kind: "array", items: "string" }]
167
+ */
168
+ export const parseFieldsString = (fields: string): ParsedField[] => {
169
+ if (!fields.trim()) {
170
+ throw new Error("Fields string must not be empty.");
171
+ }
172
+
173
+ // Split on commas that are NOT inside braces so enum{a|b,c} would still
174
+ // work if someone added commas — but per spec values use |, so a simple
175
+ // brace-depth split is sufficient and keeps things robust.
176
+ const tokens: string[] = [];
177
+ let depth = 0;
178
+ let current = "";
179
+ for (const ch of fields) {
180
+ if (ch === "{") { depth++; current += ch; }
181
+ else if (ch === "}") { depth--; current += ch; }
182
+ else if (ch === "," && depth === 0) { tokens.push(current); current = ""; }
183
+ else { current += ch; }
184
+ }
185
+ if (current) tokens.push(current);
186
+
187
+ if (depth !== 0) {
188
+ throw new Error("Unmatched braces in fields string.");
189
+ }
190
+
191
+ return tokens.map((token) => parseField(token));
192
+ };
193
+
194
+ /**
195
+ * Build a minimal JSON Schema `object` from a parsed fields array.
196
+ * All fields are required; additionalProperties is false.
197
+ */
198
+ export const buildSchemaFromParsedFields = (
199
+ fields: ParsedField[],
200
+ ): AnyJSONSchema => {
201
+ if (fields.length === 0) {
202
+ throw new Error("Cannot build a schema from an empty fields list.");
203
+ }
204
+
205
+ const properties: Record<string, unknown> = {};
206
+ const required: string[] = [];
207
+
208
+ for (const field of fields) {
209
+ if (field.kind === "scalar") {
210
+ properties[field.name] = field.type === "int"
211
+ ? { type: "integer", multipleOf: 1 }
212
+ : { type: field.type };
213
+ } else if (field.kind === "enum") {
214
+ properties[field.name] = { type: "string", enum: field.values };
215
+ } else {
216
+ // array
217
+ properties[field.name] = { type: "array", items: field.items === "int" ? { type: "integer", multipleOf: 1 } : { type: field.items } };
218
+ }
219
+ required.push(field.name);
220
+ }
221
+
222
+ return {
223
+ type: "object",
224
+ properties,
225
+ required,
226
+ additionalProperties: false,
227
+ };
228
+ };
229
+
230
+ /**
231
+ * Convenience: parse a fields string and immediately build a JSON Schema.
232
+ *
233
+ * @example
234
+ * buildSchemaFromFields("title, price:number")
235
+ * buildSchemaFromFields("status:enum{draft|published|archived}")
236
+ * buildSchemaFromFields("tags:array{string}")
237
+ */
238
+ export const buildSchemaFromFields = (fields: string): AnyJSONSchema =>
239
+ buildSchemaFromParsedFields(parseFieldsString(fields));
@@ -0,0 +1,20 @@
1
+ import { test, expect } from "bun:test";
2
+ import * as api from "./index";
3
+
4
+ test("index re-exports main API", () => {
5
+ expect(typeof api.extract).toBe("function");
6
+ expect(typeof api.urlToArtifact).toBe("function");
7
+ expect(typeof api.fileToArtifact).toBe("function");
8
+ expect(typeof api.defaultArtifactProviders).toBe("object");
9
+ expect(typeof api.registerArtifactInputParser).toBe("function");
10
+ expect(typeof api.clearArtifactInputParsers).toBe("function");
11
+ expect(typeof api.validateSerializedArtifacts).toBe("function");
12
+ expect(typeof api.parseSerializedArtifacts).toBe("function");
13
+ expect(typeof api.hydrateSerializedArtifacts).toBe("function");
14
+ expect(typeof api.parse).toBe("function");
15
+ expect(typeof api.splitTextIntoContents).toBe("function");
16
+
17
+ expect(typeof api.simple).toBe("function");
18
+ expect(typeof api.parallel).toBe("function");
19
+ expect(typeof api.sequential).toBe("function");
20
+ });
package/src/index.ts ADDED
@@ -0,0 +1,93 @@
1
+ export type {
2
+ Artifact,
3
+ ArtifactContent,
4
+ ArtifactImage,
5
+ ArtifactType,
6
+ ExtractionEvents,
7
+ ExtractionOptions,
8
+ ExtractionResult,
9
+ ExtractionStrategy,
10
+ Usage,
11
+ AnyJSONSchema,
12
+ TypedJSONSchema,
13
+ } from "./types";
14
+
15
+ export { extract } from "./extract";
16
+ export {
17
+ parseFieldsString,
18
+ buildSchemaFromParsedFields,
19
+ buildSchemaFromFields,
20
+ } from "./fields";
21
+ export type { ParsedField, FieldType } from "./fields";
22
+
23
+ export type {
24
+ ArtifactInput,
25
+ ArtifactInputParser,
26
+ SerializedArtifact,
27
+ SerializedArtifactContent,
28
+ SerializedArtifactImage,
29
+ SerializedArtifacts,
30
+ } from "./artifacts/input";
31
+
32
+ export { urlToArtifact } from "./artifacts/urlToArtifact";
33
+ export { fileToArtifact } from "./artifacts/fileToArtifact";
34
+ export type { ArtifactProvider, ArtifactProviders } from "./artifacts/providers";
35
+ export { defaultArtifactProviders } from "./artifacts/providers";
36
+ export {
37
+ registerArtifactInputParser,
38
+ clearArtifactInputParsers,
39
+ validateSerializedArtifacts,
40
+ parseSerializedArtifacts,
41
+ hydrateSerializedArtifacts,
42
+ parse,
43
+ splitTextIntoContents,
44
+ } from "./artifacts/input";
45
+
46
+ export * from "./strategies";
47
+
48
+ // Parsers public API
49
+ export { collectStream } from "./parsers/collect";
50
+ export type { ParserDef, ParsersConfig, InlineParserDef, NpmParserDef } from "./parsers/types";
51
+ export { detectMimeType } from "./parsers/mime";
52
+ export type { NpmParserEntry } from "./parsers/mime";
53
+ export { runParser } from "./parsers/runner";
54
+ export { parsePdf } from "./parsers/pdf";
55
+
56
+ // Debug
57
+ export { createDebugLogger } from "./debug/logger";
58
+
59
+ // LLM models
60
+ export {
61
+ listAllProviderModels,
62
+ listProviderModels,
63
+ resolveCheapestModel,
64
+ } from "./llm/models";
65
+
66
+ // Validation
67
+ export { SchemaValidationError } from "./validation/validator";
68
+
69
+ // Auth (for CLI and SDK users who want to manage tokens)
70
+ export {
71
+ getDefaultModel,
72
+ setDefaultModel,
73
+ listAliases,
74
+ getAlias,
75
+ setAlias,
76
+ deleteAlias,
77
+ resolveAlias,
78
+ listParsers,
79
+ getParser,
80
+ setParser,
81
+ deleteParser,
82
+ } from "./auth/config";
83
+ export {
84
+ listStoredProviders,
85
+ setProviderToken,
86
+ deleteProviderToken,
87
+ resolveProviderToken,
88
+ getProviderTokenOrThrow,
89
+ resolveProviderEnvVar,
90
+ maskToken,
91
+ type TokenStorageType,
92
+ type TokenEntry,
93
+ } from "./auth/tokens";
@@ -0,0 +1,9 @@
1
+ # LLM module
2
+
3
+ - Purpose: wrap Vercel AI SDK calls, build multimodal messages, run validation retries, and query provider model lists/defaults.
4
+ - Key files: `LLMClient.ts`, `RetryingRunner.ts`, `message.ts`, `models.ts`.
5
+ - Design: `generateStructured` centralizes AI SDK usage; retry loop feeds validation errors back to the model.
6
+ - Retry events: `runWithRetries` emits `onRetry` events with `{ attempt, maxAttempts, reason }` so the CLI can show retry progress (e.g. "Extracting data (retry 2/3)...").
7
+ - Supported providers: openai, anthropic, google, opencode (Zen), openrouter.
8
+ - OpenRouter provider routing: When a model has an `__openrouter_provider` property attached (set via hashtag syntax in the model string), `generateStructured` passes it as `providerOptions.openrouter.provider.order` to route requests to the specified provider.
9
+ - Tests: `RetryingRunner.test.ts`, `models.test.ts`.
@@ -0,0 +1,196 @@
1
+ import { test, expect, mock } from "bun:test";
2
+ import type { ModelMessage } from "ai";
3
+
4
+ type GenerateTextParams = {
5
+ model: unknown;
6
+ output: unknown;
7
+ system: string;
8
+ messages: ModelMessage[];
9
+ providerOptions?: unknown;
10
+ };
11
+
12
+ let generateTextImpl: (params: GenerateTextParams) => Promise<{
13
+ output: unknown;
14
+ usage?: Record<string, unknown>;
15
+ }>;
16
+
17
+ const calls: GenerateTextParams[] = [];
18
+
19
+ mock.module("ai", () => ({
20
+ generateText: (params: GenerateTextParams) => {
21
+ calls.push(params);
22
+ return generateTextImpl(params);
23
+ },
24
+ Output: {
25
+ object: (config: unknown) => config,
26
+ },
27
+ jsonSchema: (schema: unknown) => ({ wrapped: schema }),
28
+ }));
29
+
30
+ const { generateStructured } = await import("./LLMClient");
31
+
32
+ test("generateStructured maps prompt/completion token usage", async () => {
33
+ calls.length = 0;
34
+ generateTextImpl = async () => ({
35
+ output: { title: "ok" },
36
+ usage: { promptTokens: 2, completionTokens: 3, totalTokens: 9 },
37
+ });
38
+
39
+ const result = await generateStructured({
40
+ model: {},
41
+ schema: { type: "object" },
42
+ system: "sys",
43
+ user: "prompt",
44
+ });
45
+
46
+ expect(result.usage).toEqual({ inputTokens: 2, outputTokens: 3, totalTokens: 9 });
47
+ expect(calls[0]?.output).toEqual({ schema: { wrapped: { type: "object" } }, name: "extract" });
48
+ expect(calls[0]?.messages[0]).toEqual({ role: "user", content: "prompt" });
49
+ });
50
+
51
+ test("generateStructured uses explicit messages and totals usage", async () => {
52
+ calls.length = 0;
53
+ const messages: ModelMessage[] = [{ role: "user", content: "custom" }];
54
+ generateTextImpl = async (params) => ({
55
+ output: { title: "ok" },
56
+ usage: { inputTokens: 4, outputTokens: 6 },
57
+ });
58
+
59
+ const result = await generateStructured({
60
+ model: {},
61
+ schema: { type: "object" },
62
+ system: "sys",
63
+ user: "fallback",
64
+ messages,
65
+ });
66
+
67
+ expect(calls[0]?.messages).toBe(messages);
68
+ expect(result.usage).toEqual({ inputTokens: 4, outputTokens: 6, totalTokens: 10 });
69
+ });
70
+
71
+ test("generateStructured passes OpenRouter provider preference", async () => {
72
+ calls.length = 0;
73
+ generateTextImpl = async () => ({
74
+ output: { title: "ok" },
75
+ usage: { inputTokens: 1, outputTokens: 1 },
76
+ });
77
+
78
+ const model = { __openrouter_provider: "cerebras" };
79
+ await generateStructured({
80
+ model,
81
+ schema: { type: "object" },
82
+ system: "sys",
83
+ user: "prompt",
84
+ });
85
+
86
+ expect(calls[0]?.providerOptions).toEqual({
87
+ openrouter: {
88
+ provider: {
89
+ order: ["cerebras"],
90
+ },
91
+ },
92
+ });
93
+ });
94
+
95
+ test("generateStructured does not add openrouter providerOptions without preference", async () => {
96
+ calls.length = 0;
97
+ generateTextImpl = async () => ({
98
+ output: { title: "ok" },
99
+ usage: { inputTokens: 1, outputTokens: 1 },
100
+ });
101
+
102
+ await generateStructured({
103
+ model: {},
104
+ schema: { type: "object" },
105
+ system: "sys",
106
+ user: "prompt",
107
+ });
108
+
109
+ expect(calls[0]?.providerOptions).not.toHaveProperty("openrouter");
110
+ });
111
+
112
+ test("generateStructured uses inputTokens/outputTokens when promptTokens missing", async () => {
113
+ calls.length = 0;
114
+ generateTextImpl = async () => ({
115
+ output: { title: "ok" },
116
+ usage: { inputTokens: 5, outputTokens: 7 },
117
+ });
118
+
119
+ const result = await generateStructured({
120
+ model: {},
121
+ schema: { type: "object" },
122
+ system: "sys",
123
+ user: "prompt",
124
+ });
125
+
126
+ expect(result.usage).toEqual({ inputTokens: 5, outputTokens: 7, totalTokens: 12 });
127
+ });
128
+
129
+ test("generateStructured uses totalTokens from response when present", async () => {
130
+ calls.length = 0;
131
+ generateTextImpl = async () => ({
132
+ output: { title: "ok" },
133
+ usage: { inputTokens: 3, outputTokens: 4, totalTokens: 100 },
134
+ });
135
+
136
+ const result = await generateStructured({
137
+ model: {},
138
+ schema: { type: "object" },
139
+ system: "sys",
140
+ user: "prompt",
141
+ });
142
+
143
+ expect(result.usage.totalTokens).toBe(100);
144
+ });
145
+
146
+ test("generateStructured handles missing usage", async () => {
147
+ calls.length = 0;
148
+ generateTextImpl = async () => ({
149
+ output: { title: "ok" },
150
+ });
151
+
152
+ const result = await generateStructured({
153
+ model: {},
154
+ schema: { type: "object" },
155
+ system: "sys",
156
+ user: "prompt",
157
+ });
158
+
159
+ expect(result.usage).toEqual({ inputTokens: 0, outputTokens: 0, totalTokens: 0 });
160
+ });
161
+
162
+ test("generateStructured uses custom schema name", async () => {
163
+ calls.length = 0;
164
+ generateTextImpl = async () => ({
165
+ output: { title: "ok" },
166
+ usage: {},
167
+ });
168
+
169
+ await generateStructured({
170
+ model: {},
171
+ schema: { type: "object" },
172
+ schemaName: "custom_schema",
173
+ system: "sys",
174
+ user: "prompt",
175
+ });
176
+
177
+ expect(calls[0]?.output).toHaveProperty("name", "custom_schema");
178
+ });
179
+
180
+ test("generateStructured uses custom schema description", async () => {
181
+ calls.length = 0;
182
+ generateTextImpl = async () => ({
183
+ output: { title: "ok" },
184
+ usage: {},
185
+ });
186
+
187
+ await generateStructured({
188
+ model: {},
189
+ schema: { type: "object" },
190
+ schemaDescription: "Extract data",
191
+ system: "sys",
192
+ user: "prompt",
193
+ });
194
+
195
+ expect(calls[0]?.output).toHaveProperty("description", "Extract data");
196
+ });
@@ -0,0 +1,106 @@
1
+ import { generateText, Output, jsonSchema, type ModelMessage } from "ai";
2
+ import type { AnyJSONSchema, Usage } from "../types";
3
+ import type { UserContent } from "./message";
4
+
5
+ type GenerateTextParams = Parameters<typeof generateText>[0];
6
+ type ModelType = GenerateTextParams extends { model: infer M } ? M : unknown;
7
+ type MessageType = Array<ModelMessage>;
8
+
9
+ export type StructuredRequest<T> = {
10
+ model: ModelType | unknown;
11
+ system: string;
12
+ user: UserContent;
13
+ messages?: MessageType;
14
+ schema: unknown;
15
+ schemaName?: string;
16
+ schemaDescription?: string;
17
+ strict?: boolean;
18
+ };
19
+
20
+ export type StructuredResponse<T> = {
21
+ data: T;
22
+ usage: Usage;
23
+ };
24
+
25
+ const isZodSchema = (
26
+ schema: unknown,
27
+ ): schema is { safeParse: (data: unknown) => unknown } => {
28
+ return (
29
+ typeof schema === "object" &&
30
+ schema !== null &&
31
+ "safeParse" in schema &&
32
+ typeof (schema as { safeParse?: unknown }).safeParse === "function"
33
+ );
34
+ };
35
+
36
+ export const generateStructured = async <T>(
37
+ request: StructuredRequest<T>,
38
+ ): Promise<StructuredResponse<T>> => {
39
+ const schema = isZodSchema(request.schema)
40
+ ? request.schema
41
+ : jsonSchema(request.schema as AnyJSONSchema);
42
+
43
+ // Check for OpenRouter provider preference attached to the model
44
+ const preferredProvider = (
45
+ request.model as { __openrouter_provider?: string }
46
+ )?.__openrouter_provider;
47
+
48
+ if (preferredProvider && process.env.DEBUG) {
49
+ console.error(
50
+ `[DEBUG] Routing to OpenRouter provider: ${preferredProvider}`,
51
+ );
52
+ }
53
+
54
+ const providerOptions = preferredProvider
55
+ ? {
56
+ openrouter: {
57
+ provider: {
58
+ order: [preferredProvider],
59
+ },
60
+ },
61
+ }
62
+ : undefined;
63
+
64
+ const result = await generateText({
65
+ model: request.model as ModelType,
66
+ output: Output.object({
67
+ schema: schema as GenerateTextParams extends { schema: infer S }
68
+ ? S
69
+ : never,
70
+ name: request.schemaName ?? "extract",
71
+ description: request.schemaDescription,
72
+ }),
73
+ providerOptions: {
74
+ openai: {
75
+ strictJsonSchema: request.strict ?? false,
76
+ },
77
+ },
78
+ system: request.system,
79
+ messages: (request.messages ?? [
80
+ { role: "user", content: request.user },
81
+ ]) as MessageType,
82
+ ...(providerOptions ? { providerOptions } : {}),
83
+ });
84
+
85
+ const usageRaw = result.usage ?? {};
86
+ const inputTokens =
87
+ "promptTokens" in usageRaw
88
+ ? (usageRaw.promptTokens as number)
89
+ : ((usageRaw as { inputTokens?: number }).inputTokens ?? 0);
90
+ const outputTokens =
91
+ "completionTokens" in usageRaw
92
+ ? (usageRaw.completionTokens as number)
93
+ : ((usageRaw as { outputTokens?: number }).outputTokens ?? 0);
94
+ const totalTokens =
95
+ "totalTokens" in usageRaw
96
+ ? (usageRaw.totalTokens as number)
97
+ : inputTokens + outputTokens;
98
+
99
+ const usage: Usage = {
100
+ inputTokens,
101
+ outputTokens,
102
+ totalTokens,
103
+ };
104
+
105
+ return { data: result.output as T, usage };
106
+ };