@struktur/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +79 -0
  2. package/package.json +33 -0
  3. package/src/artifacts/AGENTS.md +16 -0
  4. package/src/artifacts/fileToArtifact.test.ts +37 -0
  5. package/src/artifacts/fileToArtifact.ts +44 -0
  6. package/src/artifacts/input.test.ts +243 -0
  7. package/src/artifacts/input.ts +360 -0
  8. package/src/artifacts/providers.test.ts +19 -0
  9. package/src/artifacts/providers.ts +7 -0
  10. package/src/artifacts/urlToArtifact.test.ts +23 -0
  11. package/src/artifacts/urlToArtifact.ts +19 -0
  12. package/src/auth/AGENTS.md +11 -0
  13. package/src/auth/config.test.ts +132 -0
  14. package/src/auth/config.ts +129 -0
  15. package/src/auth/tokens.test.ts +58 -0
  16. package/src/auth/tokens.ts +229 -0
  17. package/src/chunking/AGENTS.md +11 -0
  18. package/src/chunking/ArtifactBatcher.test.ts +22 -0
  19. package/src/chunking/ArtifactBatcher.ts +110 -0
  20. package/src/chunking/ArtifactSplitter.test.ts +38 -0
  21. package/src/chunking/ArtifactSplitter.ts +151 -0
  22. package/src/debug/AGENTS.md +79 -0
  23. package/src/debug/logger.test.ts +244 -0
  24. package/src/debug/logger.ts +211 -0
  25. package/src/extract.test.ts +22 -0
  26. package/src/extract.ts +114 -0
  27. package/src/fields.test.ts +663 -0
  28. package/src/fields.ts +239 -0
  29. package/src/index.test.ts +20 -0
  30. package/src/index.ts +93 -0
  31. package/src/llm/AGENTS.md +9 -0
  32. package/src/llm/LLMClient.test.ts +196 -0
  33. package/src/llm/LLMClient.ts +106 -0
  34. package/src/llm/RetryingRunner.test.ts +174 -0
  35. package/src/llm/RetryingRunner.ts +188 -0
  36. package/src/llm/message.test.ts +42 -0
  37. package/src/llm/message.ts +47 -0
  38. package/src/llm/models.test.ts +82 -0
  39. package/src/llm/models.ts +190 -0
  40. package/src/merge/AGENTS.md +6 -0
  41. package/src/merge/Deduplicator.test.ts +108 -0
  42. package/src/merge/Deduplicator.ts +45 -0
  43. package/src/merge/SmartDataMerger.test.ts +177 -0
  44. package/src/merge/SmartDataMerger.ts +56 -0
  45. package/src/parsers/AGENTS.md +58 -0
  46. package/src/parsers/collect.test.ts +56 -0
  47. package/src/parsers/collect.ts +31 -0
  48. package/src/parsers/index.ts +6 -0
  49. package/src/parsers/mime.test.ts +91 -0
  50. package/src/parsers/mime.ts +137 -0
  51. package/src/parsers/npm.ts +26 -0
  52. package/src/parsers/pdf.test.ts +394 -0
  53. package/src/parsers/pdf.ts +194 -0
  54. package/src/parsers/runner.test.ts +95 -0
  55. package/src/parsers/runner.ts +177 -0
  56. package/src/parsers/types.ts +29 -0
  57. package/src/prompts/AGENTS.md +8 -0
  58. package/src/prompts/DeduplicationPrompt.test.ts +41 -0
  59. package/src/prompts/DeduplicationPrompt.ts +37 -0
  60. package/src/prompts/ExtractorPrompt.test.ts +21 -0
  61. package/src/prompts/ExtractorPrompt.ts +72 -0
  62. package/src/prompts/ParallelMergerPrompt.test.ts +8 -0
  63. package/src/prompts/ParallelMergerPrompt.ts +37 -0
  64. package/src/prompts/SequentialExtractorPrompt.test.ts +24 -0
  65. package/src/prompts/SequentialExtractorPrompt.ts +82 -0
  66. package/src/prompts/formatArtifacts.test.ts +39 -0
  67. package/src/prompts/formatArtifacts.ts +46 -0
  68. package/src/strategies/AGENTS.md +6 -0
  69. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +53 -0
  70. package/src/strategies/DoublePassAutoMergeStrategy.ts +270 -0
  71. package/src/strategies/DoublePassStrategy.test.ts +48 -0
  72. package/src/strategies/DoublePassStrategy.ts +179 -0
  73. package/src/strategies/ParallelAutoMergeStrategy.test.ts +152 -0
  74. package/src/strategies/ParallelAutoMergeStrategy.ts +241 -0
  75. package/src/strategies/ParallelStrategy.test.ts +61 -0
  76. package/src/strategies/ParallelStrategy.ts +157 -0
  77. package/src/strategies/SequentialAutoMergeStrategy.test.ts +66 -0
  78. package/src/strategies/SequentialAutoMergeStrategy.ts +222 -0
  79. package/src/strategies/SequentialStrategy.test.ts +53 -0
  80. package/src/strategies/SequentialStrategy.ts +119 -0
  81. package/src/strategies/SimpleStrategy.test.ts +46 -0
  82. package/src/strategies/SimpleStrategy.ts +74 -0
  83. package/src/strategies/concurrency.test.ts +16 -0
  84. package/src/strategies/concurrency.ts +14 -0
  85. package/src/strategies/index.test.ts +20 -0
  86. package/src/strategies/index.ts +7 -0
  87. package/src/strategies/utils.test.ts +76 -0
  88. package/src/strategies/utils.ts +56 -0
  89. package/src/tokenization.test.ts +119 -0
  90. package/src/tokenization.ts +71 -0
  91. package/src/types.test.ts +25 -0
  92. package/src/types.ts +116 -0
  93. package/src/validation/AGENTS.md +6 -0
  94. package/src/validation/validator.test.ts +172 -0
  95. package/src/validation/validator.ts +82 -0
  96. package/tsconfig.json +22 -0
@@ -0,0 +1,360 @@
1
+ import type { Artifact, ArtifactContent, ArtifactImage, ArtifactType } from "../types";
2
+ import { createAjv, validateOrThrow } from "../validation/validator";
3
+ import { defaultArtifactProviders, type ArtifactProviders } from "./providers";
4
+ import type { ParsersConfig } from "../parsers/types";
5
+ import { runParser } from "../parsers/runner";
6
+ import type { ParsePdfOptions } from "../parsers/pdf";
7
+
8
+ export type SerializedArtifactImage = Omit<ArtifactImage, "contents"> & {
9
+ contents?: never;
10
+ };
11
+
12
+ export type SerializedArtifactContent = Omit<ArtifactContent, "media"> & {
13
+ media?: SerializedArtifactImage[];
14
+ };
15
+
16
+ export type SerializedArtifact = Omit<Artifact, "raw" | "contents"> & {
17
+ contents: SerializedArtifactContent[];
18
+ raw?: never;
19
+ };
20
+
21
+ export type SerializedArtifacts = SerializedArtifact | SerializedArtifact[];
22
+
23
+ export type ArtifactInput =
24
+ | { kind: "artifact-json"; data: unknown }
25
+ | { kind: "text"; text: string; id?: string }
26
+ | { kind: "file"; path: string; mimeType?: string; id?: string }
27
+ | { kind: "buffer"; buffer: Buffer; mimeType: string; id?: string };
28
+
29
+ export type ArtifactInputParser = {
30
+ name: string;
31
+ canParse: (input: ArtifactInput) => boolean;
32
+ parse: (
33
+ input: ArtifactInput,
34
+ options?: {
35
+ providers?: ArtifactProviders;
36
+ parsers?: ParsersConfig;
37
+ includeImages?: boolean;
38
+ screenshots?: boolean;
39
+ screenshotScale?: number;
40
+ screenshotWidth?: number;
41
+ }
42
+ ) => Promise<Artifact[]>;
43
+ };
44
+
45
+ const serializedArtifactImageSchema = {
46
+ type: "object",
47
+ required: ["type"],
48
+ properties: {
49
+ type: { const: "image" },
50
+ url: { type: "string", minLength: 1 },
51
+ base64: { type: "string", minLength: 1 },
52
+ text: { type: "string" },
53
+ x: { type: "number" },
54
+ y: { type: "number" },
55
+ width: { type: "number" },
56
+ height: { type: "number" },
57
+ imageType: { enum: ["embedded", "screenshot"] },
58
+ },
59
+ additionalProperties: false,
60
+ anyOf: [{ required: ["url"] }, { required: ["base64"] }],
61
+ };
62
+
63
+ const serializedArtifactContentSchema = {
64
+ type: "object",
65
+ properties: {
66
+ page: { type: "number" },
67
+ text: { type: "string" },
68
+ media: { type: "array", items: serializedArtifactImageSchema },
69
+ },
70
+ additionalProperties: false,
71
+ anyOf: [{ required: ["text"] }, { required: ["media"] }],
72
+ };
73
+
74
+ const serializedArtifactSchema = {
75
+ type: "object",
76
+ required: ["id", "type", "contents"],
77
+ properties: {
78
+ id: { type: "string", minLength: 1 },
79
+ type: { enum: ["text", "image", "pdf", "file"] as ArtifactType[] },
80
+ contents: { type: "array", items: serializedArtifactContentSchema },
81
+ metadata: { type: "object", additionalProperties: true },
82
+ tokens: { type: "number" },
83
+ },
84
+ additionalProperties: false,
85
+ };
86
+
87
+ const serializedArtifactsSchema = {
88
+ anyOf: [
89
+ serializedArtifactSchema,
90
+ { type: "array", items: serializedArtifactSchema },
91
+ ],
92
+ };
93
+
94
+ const inputParsers: ArtifactInputParser[] = [];
95
+
96
+ export const registerArtifactInputParser = (parser: ArtifactInputParser) => {
97
+ inputParsers.push(parser);
98
+ };
99
+
100
+ export const clearArtifactInputParsers = () => {
101
+ inputParsers.length = 0;
102
+ };
103
+
104
+ export const validateSerializedArtifacts = (data: unknown): SerializedArtifact[] => {
105
+ const ajv = createAjv();
106
+ const parsed = validateOrThrow<SerializedArtifacts>(
107
+ ajv,
108
+ serializedArtifactsSchema,
109
+ data
110
+ );
111
+ return Array.isArray(parsed) ? parsed : [parsed];
112
+ };
113
+
114
+ export const hydrateSerializedArtifacts = (items: SerializedArtifact[]): Artifact[] => {
115
+ return items.map((item) => ({
116
+ ...item,
117
+ raw: async () => Buffer.from(JSON.stringify(item.contents ?? [])),
118
+ }));
119
+ };
120
+
121
+ export const parseSerializedArtifacts = (text: string): SerializedArtifact[] => {
122
+ const parsed = JSON.parse(text) as unknown;
123
+ return validateSerializedArtifacts(parsed);
124
+ };
125
+
126
+ export const splitTextIntoContents = (text: string): ArtifactContent[] => {
127
+ const blocks = text
128
+ .split(/\n\s*\n/g)
129
+ .map((block) => block.trim())
130
+ .filter((block) => block.length > 0);
131
+
132
+ if (blocks.length === 0) {
133
+ return [{ text }];
134
+ }
135
+
136
+ return blocks.map((block) => ({ text: block }));
137
+ };
138
+
139
+ const detectMimeType = async (path: string) => {
140
+ const file = Bun.file(path);
141
+ const type = file.type?.trim();
142
+ return type && type.length > 0 ? type : "application/octet-stream";
143
+ };
144
+
145
+ const bufferToTextArtifact = (buffer: Buffer, id?: string): Artifact => {
146
+ const text = buffer.toString();
147
+ return {
148
+ id: id ?? `artifact-${crypto.randomUUID()}`,
149
+ type: "text",
150
+ raw: async () => buffer,
151
+ contents: splitTextIntoContents(text),
152
+ };
153
+ };
154
+
155
+ const bufferToImageArtifact = (buffer: Buffer, id?: string): Artifact => {
156
+ return {
157
+ id: id ?? `artifact-${crypto.randomUUID()}`,
158
+ type: "image",
159
+ raw: async () => buffer,
160
+ contents: [
161
+ {
162
+ media: [{ type: "image", contents: buffer }],
163
+ },
164
+ ],
165
+ };
166
+ };
167
+
168
+ const parseBufferInput = async (
169
+ buffer: Buffer,
170
+ mimeType: string,
171
+ id?: string,
172
+ providers?: ArtifactProviders,
173
+ parsers?: ParsersConfig,
174
+ includeImages?: boolean,
175
+ screenshots?: boolean,
176
+ screenshotScale?: number,
177
+ screenshotWidth?: number,
178
+ ): Promise<Artifact[]> => {
179
+ // Resolution order:
180
+ // 1. parsers config (custom ParserDef) — if MIME type has a configured parser, use it
181
+ if (parsers) {
182
+ const parserDef = parsers[mimeType];
183
+ if (parserDef) {
184
+ return runParser(parserDef, { kind: "buffer", buffer }, mimeType);
185
+ }
186
+ }
187
+
188
+ // 2. providers registry (user-registered ArtifactProvider functions)
189
+ const registry = providers ?? defaultArtifactProviders;
190
+ const provider = registry[mimeType];
191
+ if (provider) {
192
+ return [await provider(buffer)];
193
+ }
194
+
195
+ // JSON auto-detection: if MIME is application/json, try to parse as SerializedArtifact[]
196
+ if (mimeType === "application/json") {
197
+ try {
198
+ const parsed = JSON.parse(buffer.toString()) as unknown;
199
+ const serialized = validateSerializedArtifacts(parsed);
200
+ return hydrateSerializedArtifacts(serialized);
201
+ } catch {
202
+ // If no custom parser is configured for application/json, throw clear error
203
+ throw new Error(
204
+ "Input is JSON but not in SerializedArtifact format. To parse arbitrary JSON files, configure a parser: struktur config parsers add --mime application/json ..."
205
+ );
206
+ }
207
+ }
208
+
209
+ // 3. Built-in PDF → pdf artifact
210
+ if (mimeType === "application/pdf") {
211
+ const { parsePdf } = await import("../parsers/pdf");
212
+ const pdfOptions: ParsePdfOptions = {
213
+ includeImages,
214
+ screenshots,
215
+ screenshotScale,
216
+ screenshotWidth
217
+ };
218
+ return [await parsePdf(buffer, pdfOptions)];
219
+ }
220
+
221
+ // 4. Built-in text/* → text artifact
222
+ if (mimeType.startsWith("text/")) {
223
+ return [bufferToTextArtifact(buffer, id)];
224
+ }
225
+
226
+ // 5. Built-in image/* → image artifact
227
+ if (mimeType.startsWith("image/")) {
228
+ return [bufferToImageArtifact(buffer, id)];
229
+ }
230
+
231
+ throw new Error(`Unsupported MIME type: ${mimeType}`);
232
+ };
233
+
234
+ const artifactJsonParser: ArtifactInputParser = {
235
+ name: "artifact-json",
236
+ canParse: (input) => input.kind === "artifact-json",
237
+ parse: async (input) => {
238
+ if (input.kind !== "artifact-json") {
239
+ return [];
240
+ }
241
+ const serialized = validateSerializedArtifacts(input.data);
242
+ return hydrateSerializedArtifacts(serialized);
243
+ },
244
+ };
245
+
246
+ const textParser: ArtifactInputParser = {
247
+ name: "text",
248
+ canParse: (input) => input.kind === "text",
249
+ parse: async (input) => {
250
+ if (input.kind !== "text") {
251
+ return [];
252
+ }
253
+ const buffer = Buffer.from(input.text);
254
+ return [bufferToTextArtifact(buffer, input.id)];
255
+ },
256
+ };
257
+
258
+ const fileParser: ArtifactInputParser = {
259
+ name: "file",
260
+ canParse: (input) => input.kind === "file",
261
+ parse: async (input, options) => {
262
+ if (input.kind !== "file") {
263
+ return [];
264
+ }
265
+ const mimeType = input.mimeType ?? (await detectMimeType(input.path));
266
+
267
+ // JSON auto-detection: if MIME type is application/json, first try to validate as SerializedArtifact[]
268
+ if (mimeType === "application/json") {
269
+ const text = await Bun.file(input.path).text();
270
+ try {
271
+ const parsed = JSON.parse(text) as unknown;
272
+ const serialized = validateSerializedArtifacts(parsed);
273
+ return hydrateSerializedArtifacts(serialized);
274
+ } catch {
275
+ // Not valid artifact JSON — try custom parser or throw
276
+ if (options?.parsers) {
277
+ const parserDef = options.parsers[mimeType];
278
+ if (parserDef) {
279
+ return runParser(parserDef, { kind: "file", path: input.path }, mimeType);
280
+ }
281
+ }
282
+ throw new Error(
283
+ `File "${input.path}" is JSON but not in SerializedArtifact format. To parse arbitrary JSON files, configure a parser: struktur config parsers add --mime application/json ...`
284
+ );
285
+ }
286
+ }
287
+
288
+ const file = Bun.file(input.path);
289
+ const buffer = Buffer.from(await file.arrayBuffer());
290
+ return parseBufferInput(
291
+ buffer,
292
+ mimeType,
293
+ input.id,
294
+ options?.providers,
295
+ options?.parsers,
296
+ options?.includeImages,
297
+ options?.screenshots,
298
+ options?.screenshotScale,
299
+ options?.screenshotWidth,
300
+ );
301
+ },
302
+ };
303
+
304
+ const bufferParser: ArtifactInputParser = {
305
+ name: "buffer",
306
+ canParse: (input) => input.kind === "buffer",
307
+ parse: async (input, options) => {
308
+ if (input.kind !== "buffer") {
309
+ return [];
310
+ }
311
+ return parseBufferInput(
312
+ input.buffer,
313
+ input.mimeType,
314
+ input.id,
315
+ options?.providers,
316
+ options?.parsers,
317
+ options?.includeImages,
318
+ options?.screenshots,
319
+ options?.screenshotScale,
320
+ options?.screenshotWidth,
321
+ );
322
+ },
323
+ };
324
+
325
+ export const parse = async (
326
+ input: ArtifactInput,
327
+ options?: {
328
+ parsers?: ArtifactInputParser[];
329
+ providers?: ArtifactProviders;
330
+ parserConfig?: ParsersConfig;
331
+ includeImages?: boolean;
332
+ screenshots?: boolean;
333
+ screenshotScale?: number;
334
+ screenshotWidth?: number;
335
+ }
336
+ ): Promise<Artifact[]> => {
337
+ const parsers =
338
+ options?.parsers ??
339
+ [
340
+ ...inputParsers,
341
+ artifactJsonParser,
342
+ textParser,
343
+ fileParser,
344
+ bufferParser,
345
+ ];
346
+ const parser = parsers.find((candidate) => candidate.canParse(input));
347
+
348
+ if (!parser) {
349
+ throw new Error(`No artifact input parser available for ${input.kind}`);
350
+ }
351
+
352
+ return parser.parse(input, {
353
+ providers: options?.providers,
354
+ parsers: options?.parserConfig,
355
+ includeImages: options?.includeImages,
356
+ screenshots: options?.screenshots,
357
+ screenshotScale: options?.screenshotScale,
358
+ screenshotWidth: options?.screenshotWidth,
359
+ });
360
+ };
@@ -0,0 +1,19 @@
1
+ import { test, expect } from "bun:test";
2
+ import { defaultArtifactProviders, type ArtifactProviders } from "./providers";
3
+
4
+ test("defaultArtifactProviders is an object", () => {
5
+ expect(defaultArtifactProviders).toBeDefined();
6
+ expect(typeof defaultArtifactProviders).toBe("object");
7
+ });
8
+
9
+ test("ArtifactProviders type accepts MIME type keys", () => {
10
+ const providers: ArtifactProviders = {
11
+ "application/pdf": async (buffer) => ({
12
+ id: "pdf-1",
13
+ type: "pdf",
14
+ raw: async () => buffer,
15
+ contents: [{ page: 1, text: "test" }],
16
+ }),
17
+ };
18
+ expect(providers["application/pdf"]).toBeDefined();
19
+ });
@@ -0,0 +1,7 @@
1
+ import type { Artifact } from "../types";
2
+
3
+ export type ArtifactProvider = (buffer: Buffer) => Promise<Artifact>;
4
+
5
+ export type ArtifactProviders = Record<string, ArtifactProvider>;
6
+
7
+ export const defaultArtifactProviders: ArtifactProviders = {};
@@ -0,0 +1,23 @@
1
+ import { test, expect } from "bun:test";
2
+ import { urlToArtifact } from "./urlToArtifact";
3
+
4
+ test("urlToArtifact fetches and builds artifact", async () => {
5
+ const originalFetch = globalThis.fetch;
6
+ globalThis.fetch = (async () =>
7
+ new Response(
8
+ JSON.stringify({
9
+ id: "a1",
10
+ type: "pdf",
11
+ contents: [{ text: "hello" }],
12
+ }),
13
+ { status: 200 }
14
+ )) as unknown as typeof fetch;
15
+
16
+ const artifact = await urlToArtifact("https://example.com/artifact");
17
+ const raw = await artifact.raw();
18
+
19
+ expect(artifact.id).toBe("a1");
20
+ expect(raw.toString()).toContain("hello");
21
+
22
+ globalThis.fetch = originalFetch;
23
+ });
@@ -0,0 +1,19 @@
1
+ import type { Artifact } from "../types";
2
+
3
+ export const urlToArtifact = async (url: string): Promise<Artifact> => {
4
+ const response = await fetch(url);
5
+ if (!response.ok) {
6
+ throw new Error(`Failed to fetch artifact: ${response.status} ${response.statusText}`);
7
+ }
8
+
9
+ const data = (await response.json()) as Omit<Artifact, "raw"> & {
10
+ raw?: () => Promise<Buffer>;
11
+ };
12
+
13
+ return {
14
+ ...data,
15
+ raw:
16
+ data.raw ??
17
+ (async () => Buffer.from(JSON.stringify(data.contents ?? []))),
18
+ };
19
+ };
@@ -0,0 +1,11 @@
1
+ Auth module
2
+
3
+ - Purpose: persist and resolve provider API tokens and CLI defaults.
4
+ - Key files: `tokens.ts`, `config.ts`.
5
+ - Design: prefers macOS Keychain when available; otherwise uses `~/.config/struktur/tokens.json` with strict permissions.
6
+ - Config store (`config.ts`): stores `defaultModel` (string), `aliases` (Record<string, string>), and `parsers` (ParsersConfig) in `~/.config/struktur/config.json`.
7
+ - Alias API: `listAliases`, `getAlias`, `setAlias`, `deleteAlias`, `resolveAlias` (resolves alias → model spec, passthrough if not an alias).
8
+ - Parsers config API: `listParsers`, `getParser`, `setParser`, `deleteParser`.
9
+ - `setParser` validates that `command-file` type parsers contain `FILE_PATH` in the command string.
10
+ - Environment variables: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_GENERATIVE_AI_API_KEY, OPENCODE_API_KEY, OPENROUTER_API_KEY.
11
+ - Tests: `tokens.test.ts`.
@@ -0,0 +1,132 @@
1
+ import { test, expect } from "bun:test";
2
+ import path from "node:path";
3
+ import os from "node:os";
4
+ import { rm } from "node:fs/promises";
5
+ import {
6
+ listParsers,
7
+ getParser,
8
+ setParser,
9
+ deleteParser,
10
+ } from "./config";
11
+
12
+ const makeTempDir = () => {
13
+ const suffix = Math.random().toString(16).slice(2);
14
+ return path.join(os.tmpdir(), `struktur-test-${suffix}`);
15
+ };
16
+
17
+ test("listParsers returns empty object when no parsers configured", async () => {
18
+ const tempDir = makeTempDir();
19
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
20
+
21
+ try {
22
+ const parsers = await listParsers();
23
+ expect(parsers).toEqual({});
24
+ } finally {
25
+ delete process.env.STRUKTUR_CONFIG_DIR;
26
+ await rm(tempDir, { recursive: true, force: true });
27
+ }
28
+ });
29
+
30
+ test("setParser stores an npm parser", async () => {
31
+ const tempDir = makeTempDir();
32
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
33
+
34
+ try {
35
+ await setParser("application/pdf", { type: "npm", package: "my-pdf-parser" });
36
+ const parser = await getParser("application/pdf");
37
+ expect(parser).toEqual({ type: "npm", package: "my-pdf-parser" });
38
+ } finally {
39
+ delete process.env.STRUKTUR_CONFIG_DIR;
40
+ await rm(tempDir, { recursive: true, force: true });
41
+ }
42
+ });
43
+
44
+ test("setParser stores a command-file parser", async () => {
45
+ const tempDir = makeTempDir();
46
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
47
+
48
+ try {
49
+ await setParser("application/pdf", {
50
+ type: "command-file",
51
+ command: "my-cmd FILE_PATH output",
52
+ });
53
+ const parser = await getParser("application/pdf");
54
+ expect(parser).toEqual({ type: "command-file", command: "my-cmd FILE_PATH output" });
55
+ } finally {
56
+ delete process.env.STRUKTUR_CONFIG_DIR;
57
+ await rm(tempDir, { recursive: true, force: true });
58
+ }
59
+ });
60
+
61
+ test("setParser rejects command-file without FILE_PATH placeholder", async () => {
62
+ const tempDir = makeTempDir();
63
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
64
+
65
+ try {
66
+ await expect(
67
+ setParser("application/pdf", { type: "command-file", command: "my-cmd --input" })
68
+ ).rejects.toThrow("FILE_PATH");
69
+ } finally {
70
+ delete process.env.STRUKTUR_CONFIG_DIR;
71
+ await rm(tempDir, { recursive: true, force: true });
72
+ }
73
+ });
74
+
75
+ test("setParser stores a command-stdin parser", async () => {
76
+ const tempDir = makeTempDir();
77
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
78
+
79
+ try {
80
+ await setParser("text/csv", { type: "command-stdin", command: "csv-to-json" });
81
+ const parser = await getParser("text/csv");
82
+ expect(parser).toEqual({ type: "command-stdin", command: "csv-to-json" });
83
+ } finally {
84
+ delete process.env.STRUKTUR_CONFIG_DIR;
85
+ await rm(tempDir, { recursive: true, force: true });
86
+ }
87
+ });
88
+
89
+ test("listParsers returns all stored parsers", async () => {
90
+ const tempDir = makeTempDir();
91
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
92
+
93
+ try {
94
+ await setParser("application/pdf", { type: "npm", package: "pdf-parser" });
95
+ await setParser("text/csv", { type: "command-stdin", command: "csv-parse" });
96
+ const parsers = await listParsers();
97
+ expect(parsers["application/pdf"]).toEqual({ type: "npm", package: "pdf-parser" });
98
+ expect(parsers["text/csv"]).toEqual({ type: "command-stdin", command: "csv-parse" });
99
+ } finally {
100
+ delete process.env.STRUKTUR_CONFIG_DIR;
101
+ await rm(tempDir, { recursive: true, force: true });
102
+ }
103
+ });
104
+
105
+ test("deleteParser removes stored parser and returns true", async () => {
106
+ const tempDir = makeTempDir();
107
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
108
+
109
+ try {
110
+ await setParser("application/pdf", { type: "npm", package: "pdf-parser" });
111
+ const deleted = await deleteParser("application/pdf");
112
+ expect(deleted).toBe(true);
113
+ const parser = await getParser("application/pdf");
114
+ expect(parser).toBeUndefined();
115
+ } finally {
116
+ delete process.env.STRUKTUR_CONFIG_DIR;
117
+ await rm(tempDir, { recursive: true, force: true });
118
+ }
119
+ });
120
+
121
+ test("deleteParser returns false when parser does not exist", async () => {
122
+ const tempDir = makeTempDir();
123
+ process.env.STRUKTUR_CONFIG_DIR = tempDir;
124
+
125
+ try {
126
+ const deleted = await deleteParser("application/pdf");
127
+ expect(deleted).toBe(false);
128
+ } finally {
129
+ delete process.env.STRUKTUR_CONFIG_DIR;
130
+ await rm(tempDir, { recursive: true, force: true });
131
+ }
132
+ });
@@ -0,0 +1,129 @@
1
+ import path from "node:path";
2
+ import os from "node:os";
3
+ import { chmod, mkdir } from "node:fs/promises";
4
+ import type { ParserDef, ParsersConfig } from "@struktur/sdk";
5
+
6
+ type ConfigStore = {
7
+ version: 1;
8
+ defaultModel?: string;
9
+ aliases?: Record<string, string>;
10
+ parsers?: ParsersConfig;
11
+ };
12
+
13
+ const CONFIG_DIR_ENV = "STRUKTUR_CONFIG_DIR";
14
+
15
+ const resolveConfigDir = () => {
16
+ return process.env[CONFIG_DIR_ENV] ?? path.join(os.homedir(), ".config", "struktur");
17
+ };
18
+
19
+ const resolveConfigPath = () => path.join(resolveConfigDir(), "config.json");
20
+
21
+ const emptyStore = (): ConfigStore => ({ version: 1 });
22
+
23
+ const readConfigStore = async (): Promise<ConfigStore> => {
24
+ const configPath = resolveConfigPath();
25
+ const exists = await Bun.file(configPath).exists();
26
+ if (!exists) {
27
+ return emptyStore();
28
+ }
29
+ const raw = await Bun.file(configPath).text();
30
+ const parsed = JSON.parse(raw) as ConfigStore;
31
+ if (!parsed || parsed.version !== 1) {
32
+ return emptyStore();
33
+ }
34
+ return parsed;
35
+ };
36
+
37
+ const writeConfigStore = async (store: ConfigStore) => {
38
+ const configDir = resolveConfigDir();
39
+ const configPath = resolveConfigPath();
40
+ await mkdir(configDir, { recursive: true, mode: 0o700 });
41
+ await Bun.write(configPath, JSON.stringify(store, null, 2));
42
+ await chmod(configDir, 0o700);
43
+ await chmod(configPath, 0o600);
44
+ };
45
+
46
+ export const getDefaultModel = async () => {
47
+ const store = await readConfigStore();
48
+ return store.defaultModel;
49
+ };
50
+
51
+ export const setDefaultModel = async (model: string) => {
52
+ const store = await readConfigStore();
53
+ store.defaultModel = model;
54
+ await writeConfigStore(store);
55
+ return model;
56
+ };
57
+
58
+ // --- Alias management ---
59
+
60
+ export const listAliases = async (): Promise<Record<string, string>> => {
61
+ const store = await readConfigStore();
62
+ return store.aliases ?? {};
63
+ };
64
+
65
+ export const getAlias = async (alias: string): Promise<string | undefined> => {
66
+ const store = await readConfigStore();
67
+ return store.aliases?.[alias];
68
+ };
69
+
70
+ export const setAlias = async (alias: string, model: string): Promise<string> => {
71
+ const store = await readConfigStore();
72
+ store.aliases ??= {};
73
+ store.aliases[alias] = model;
74
+ await writeConfigStore(store);
75
+ return model;
76
+ };
77
+
78
+ export const deleteAlias = async (alias: string): Promise<boolean> => {
79
+ const store = await readConfigStore();
80
+ if (!store.aliases?.[alias]) {
81
+ return false;
82
+ }
83
+ delete store.aliases[alias];
84
+ await writeConfigStore(store);
85
+ return true;
86
+ };
87
+
88
+ /**
89
+ * Resolve a model spec: if it matches a stored alias, return the aliased model string.
90
+ * Otherwise return the original spec unchanged.
91
+ */
92
+ export const resolveAlias = async (modelSpec: string): Promise<string> => {
93
+ const aliases = await listAliases();
94
+ return aliases[modelSpec] ?? modelSpec;
95
+ };
96
+
97
+ // --- Parser config management ---
98
+
99
+ export const listParsers = async (): Promise<ParsersConfig> => {
100
+ const store = await readConfigStore();
101
+ return store.parsers ?? {};
102
+ };
103
+
104
+ export const getParser = async (mimeType: string): Promise<ParserDef | undefined> => {
105
+ const store = await readConfigStore();
106
+ return store.parsers?.[mimeType];
107
+ };
108
+
109
+ export const setParser = async (mimeType: string, def: ParserDef): Promise<void> => {
110
+ if (def.type === "command-file" && !def.command.includes("FILE_PATH")) {
111
+ throw new Error(
112
+ `command-file parser must contain FILE_PATH placeholder in the command string. Got: "${def.command}"`
113
+ );
114
+ }
115
+ const store = await readConfigStore();
116
+ store.parsers ??= {};
117
+ store.parsers[mimeType] = def;
118
+ await writeConfigStore(store);
119
+ };
120
+
121
+ export const deleteParser = async (mimeType: string): Promise<boolean> => {
122
+ const store = await readConfigStore();
123
+ if (!store.parsers?.[mimeType]) {
124
+ return false;
125
+ }
126
+ delete store.parsers[mimeType];
127
+ await writeConfigStore(store);
128
+ return true;
129
+ };