@mcptoolshop/toolshopstudio 1.1.0-toolshop

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/.dockerignore +13 -0
  2. package/.github/workflows/ci.yml +53 -0
  3. package/CHANGELOG.md +44 -0
  4. package/Dockerfile +48 -0
  5. package/LICENSE +21 -0
  6. package/README.md +110 -0
  7. package/assets/logo.png +0 -0
  8. package/dist/build-flags.d.ts +15 -0
  9. package/dist/build-flags.js +95 -0
  10. package/dist/crud.d.ts +8 -0
  11. package/dist/crud.js +76 -0
  12. package/dist/engine.test.d.ts +1 -0
  13. package/dist/engine.test.js +150 -0
  14. package/dist/exec.d.ts +14 -0
  15. package/dist/exec.js +87 -0
  16. package/dist/full.test.d.ts +1 -0
  17. package/dist/full.test.js +118 -0
  18. package/dist/generate-thumbnail.d.ts +21 -0
  19. package/dist/generate-thumbnail.js +42 -0
  20. package/dist/index.d.ts +12 -0
  21. package/dist/index.js +12 -0
  22. package/dist/pandoc/build-args.d.ts +8 -0
  23. package/dist/pandoc/build-args.js +31 -0
  24. package/dist/pandoc/convert.d.ts +38 -0
  25. package/dist/pandoc/convert.js +172 -0
  26. package/dist/pandoc/crud.d.ts +10 -0
  27. package/dist/pandoc/crud.js +80 -0
  28. package/dist/pandoc/engine.test.d.ts +1 -0
  29. package/dist/pandoc/engine.test.js +161 -0
  30. package/dist/pandoc/exec.d.ts +9 -0
  31. package/dist/pandoc/exec.js +46 -0
  32. package/dist/pandoc/full.test.d.ts +1 -0
  33. package/dist/pandoc/full.test.js +146 -0
  34. package/dist/pandoc/index.d.ts +10 -0
  35. package/dist/pandoc/index.js +10 -0
  36. package/dist/pandoc/output-polish.d.ts +21 -0
  37. package/dist/pandoc/output-polish.js +43 -0
  38. package/dist/pandoc/pipeline.test.d.ts +1 -0
  39. package/dist/pandoc/pipeline.test.js +112 -0
  40. package/dist/pandoc/preflight.d.ts +39 -0
  41. package/dist/pandoc/preflight.js +153 -0
  42. package/dist/pandoc/preset-spec.d.ts +25 -0
  43. package/dist/pandoc/preset-spec.js +74 -0
  44. package/dist/pandoc/progress.d.ts +21 -0
  45. package/dist/pandoc/progress.js +59 -0
  46. package/dist/pandoc/schemas.d.ts +137 -0
  47. package/dist/pandoc/schemas.js +44 -0
  48. package/dist/pandoc/types.d.ts +30 -0
  49. package/dist/pandoc/types.js +1 -0
  50. package/dist/pipeline.test.d.ts +1 -0
  51. package/dist/pipeline.test.js +127 -0
  52. package/dist/preflight.d.ts +32 -0
  53. package/dist/preflight.js +121 -0
  54. package/dist/preset-spec.d.ts +17 -0
  55. package/dist/preset-spec.js +117 -0
  56. package/dist/progress-parser.d.ts +33 -0
  57. package/dist/progress-parser.js +75 -0
  58. package/dist/schemas.d.ts +851 -0
  59. package/dist/schemas.js +93 -0
  60. package/dist/thumbnail.d.ts +35 -0
  61. package/dist/thumbnail.js +92 -0
  62. package/dist/transcode.d.ts +31 -0
  63. package/dist/transcode.js +183 -0
  64. package/dist/types.d.ts +33 -0
  65. package/dist/types.js +1 -0
  66. package/package.json +28 -0
  67. package/scripts/release.mjs +62 -0
  68. package/smoke.mjs +222 -0
  69. package/src/__snapshots__/engine.test.ts.snap +148 -0
  70. package/src/build-flags.ts +124 -0
  71. package/src/crud.ts +89 -0
  72. package/src/engine.test.ts +174 -0
  73. package/src/exec.ts +105 -0
  74. package/src/full.test.ts +152 -0
  75. package/src/generate-thumbnail.ts +83 -0
  76. package/src/index.ts +12 -0
  77. package/src/pandoc/build-args.ts +40 -0
  78. package/src/pandoc/convert.ts +282 -0
  79. package/src/pandoc/crud.ts +95 -0
  80. package/src/pandoc/engine.test.ts +224 -0
  81. package/src/pandoc/exec.ts +55 -0
  82. package/src/pandoc/full.test.ts +211 -0
  83. package/src/pandoc/index.ts +10 -0
  84. package/src/pandoc/output-polish.ts +60 -0
  85. package/src/pandoc/pipeline.test.ts +170 -0
  86. package/src/pandoc/preflight.ts +209 -0
  87. package/src/pandoc/preset-spec.ts +97 -0
  88. package/src/pandoc/progress.ts +71 -0
  89. package/src/pandoc/schemas.ts +54 -0
  90. package/src/pandoc/types.ts +40 -0
  91. package/src/pipeline.test.ts +167 -0
  92. package/src/preflight.ts +181 -0
  93. package/src/preset-spec.ts +136 -0
  94. package/src/progress-parser.ts +90 -0
  95. package/src/schemas.ts +107 -0
  96. package/src/thumbnail.ts +134 -0
  97. package/src/transcode.ts +272 -0
  98. package/src/types.ts +43 -0
  99. package/tsconfig.json +15 -0
@@ -0,0 +1,211 @@
1
+ import { describe, it, expect, vi } from "vitest";
2
+ import {
3
+ convertDocument,
4
+ type ConvertDocumentContext,
5
+ } from "./convert.js";
6
+ import { createPandocCRUD } from "./crud.js";
7
+ import { ensureCorrectExtension } from "./output-polish.js";
8
+ import { PANDOC_PRESET_SPECS } from "./preset-spec.js";
9
+ import type { PandocDocumentAsset } from "./schemas.js";
10
+ import type { PandocNotification } from "./types.js";
11
+ import type { PandocInputCheck, PandocAssertionResult } from "./preflight.js";
12
+ import type { PandocPresetSpec } from "./preset-spec.js";
13
+
14
+ // ── Fixtures ─────────────────────────────────────────────────────
15
+
16
+ const GOOD_INPUT_CHECK: PandocInputCheck = {
17
+ ok: true,
18
+ warnings: [],
19
+ sizeBytes: 5_000,
20
+ detectedFormat: "markdown",
21
+ };
22
+
23
+ const GOOD_ASSERTION: PandocAssertionResult = {
24
+ ok: true,
25
+ warnings: [],
26
+ };
27
+
28
+ const BAD_ASSERTION: PandocAssertionResult = {
29
+ ok: false,
30
+ warnings: ["Output file is empty — pandoc produced no content."],
31
+ };
32
+
33
+ function makeCtx(
34
+ crud: ReturnType<typeof createPandocCRUD>,
35
+ overrides?: Partial<ConvertDocumentContext>,
36
+ ): {
37
+ ctx: ConvertDocumentContext;
38
+ notifications: PandocNotification[];
39
+ } {
40
+ const notifications: PandocNotification[] = [];
41
+ const ac = new AbortController();
42
+
43
+ const ctx: ConvertDocumentContext = {
44
+ signal: ac.signal,
45
+ userId: "user1",
46
+ notify: (n) => notifications.push(n),
47
+ createAsset: async (a) => { await crud.create(a); },
48
+ runPandoc: vi.fn(async (_args, _signal, onProgress) => {
49
+ onProgress(50);
50
+ onProgress(99);
51
+ }),
52
+ checkInput: vi.fn(async () => GOOD_INPUT_CHECK),
53
+ assertOutput: vi.fn(async () => GOOD_ASSERTION),
54
+ statFile: vi.fn(async () => ({ size: 9_200 })),
55
+ ...overrides,
56
+ };
57
+
58
+ return { ctx, notifications };
59
+ }
60
+
61
+ // ── Tests ────────────────────────────────────────────────────────
62
+
63
+ describe("pandoc full integration", () => {
64
+ it("blog-post happy path: polish + CRUD create + correct metadata", async () => {
65
+ const crud = createPandocCRUD();
66
+ const { ctx, notifications } = makeCtx(crud);
67
+
68
+ const asset = await convertDocument(
69
+ {
70
+ inputPath: "/data/sandbox/user1/readme.md",
71
+ outputPath: "/data/sandbox/user1/readme.html",
72
+ preset: "blog-post",
73
+ },
74
+ ctx,
75
+ );
76
+
77
+ // Asset shape
78
+ expect(asset.id).toBeTruthy();
79
+ expect(asset.preset).toBe("blog-post");
80
+ expect(asset.outputPath).toBe("/data/sandbox/user1/readme.html");
81
+ expect(asset.inputMetadata.format).toBe("markdown");
82
+ expect(asset.inputMetadata.sizeBytes).toBe(5_000);
83
+ expect(asset.outputMetadata.format).toBe("html5");
84
+ expect(asset.outputMetadata.sizeBytes).toBe(9_200);
85
+ expect(asset.expiresAt).toBeTruthy();
86
+ expect(new Date(asset.expiresAt).getTime()).toBeGreaterThan(Date.now());
87
+
88
+ // CRUD persisted
89
+ const stored = await crud.read(asset.id);
90
+ expect(stored).not.toBeNull();
91
+ expect(stored?.id).toBe(asset.id);
92
+
93
+ const all = await crud.list();
94
+ expect(all).toHaveLength(1);
95
+
96
+ // Filter by preset
97
+ const filtered = await crud.list({ preset: "academic-pdf" });
98
+ expect(filtered).toHaveLength(0);
99
+
100
+ // Notifications: progress → ready
101
+ const readyNotifs = notifications.filter((n) => n.type === "pandoc:ready");
102
+ expect(readyNotifs).toHaveLength(1);
103
+ expect(
104
+ readyNotifs[0].type === "pandoc:ready" && readyNotifs[0].sizeBytes,
105
+ ).toBe(9_200);
106
+ });
107
+
108
+ it("newsletter fallback → correct preset in asset + warning emitted", async () => {
109
+ let assertCallCount = 0;
110
+ const crud = createPandocCRUD();
111
+ const { ctx, notifications } = makeCtx(crud, {
112
+ assertOutput: vi.fn(async () => {
113
+ assertCallCount++;
114
+ return assertCallCount === 1 ? BAD_ASSERTION : GOOD_ASSERTION;
115
+ }),
116
+ });
117
+
118
+ const asset = await convertDocument(
119
+ {
120
+ inputPath: "/data/sandbox/user1/news.md",
121
+ outputPath: "/data/sandbox/user1/news.html",
122
+ preset: "newsletter",
123
+ },
124
+ ctx,
125
+ );
126
+
127
+ // Should have fallen back to blog-post
128
+ expect(asset.preset).toBe("blog-post");
129
+ expect(asset.outputPath).toBe("/data/sandbox/user1/news.html");
130
+ expect(asset.outputMetadata.format).toBe("html5");
131
+
132
+ // CRUD has the fallback asset
133
+ const stored = await crud.read(asset.id);
134
+ expect(stored?.preset).toBe("blog-post");
135
+
136
+ // Warning about fallback was emitted
137
+ const warningNotifs = notifications.filter(
138
+ (n) => n.type === "pandoc:warning",
139
+ );
140
+ expect(warningNotifs.length).toBeGreaterThanOrEqual(1);
141
+ expect(
142
+ warningNotifs.some(
143
+ (n) =>
144
+ n.type === "pandoc:warning" &&
145
+ n.warnings.some((w) => w.includes("falling back")),
146
+ ),
147
+ ).toBe(true);
148
+ });
149
+
150
+ it("dual preset smoke: blog-post + academic-pdf in same CRUD store", async () => {
151
+ const crud = createPandocCRUD();
152
+
153
+ // Blog-post
154
+ const { ctx: ctx1 } = makeCtx(crud);
155
+ const blogAsset = await convertDocument(
156
+ {
157
+ inputPath: "/data/sandbox/user1/post.md",
158
+ outputPath: "/data/sandbox/user1/post.html",
159
+ preset: "blog-post",
160
+ },
161
+ ctx1,
162
+ );
163
+
164
+ // Academic PDF
165
+ const { ctx: ctx2 } = makeCtx(crud);
166
+ const pdfAsset = await convertDocument(
167
+ {
168
+ inputPath: "/data/sandbox/user1/thesis.md",
169
+ outputPath: "/data/sandbox/user1/thesis.pdf",
170
+ preset: "academic-pdf",
171
+ },
172
+ ctx2,
173
+ );
174
+
175
+ // Both stored
176
+ const all = await crud.list();
177
+ expect(all).toHaveLength(2);
178
+
179
+ // Filter works
180
+ const blogs = await crud.list({ preset: "blog-post" });
181
+ expect(blogs).toHaveLength(1);
182
+ expect(blogs[0].id).toBe(blogAsset.id);
183
+
184
+ const pdfs = await crud.list({ preset: "academic-pdf" });
185
+ expect(pdfs).toHaveLength(1);
186
+ expect(pdfs[0].id).toBe(pdfAsset.id);
187
+
188
+ // Different output formats
189
+ expect(blogAsset.outputMetadata.format).toBe("html5");
190
+ expect(pdfAsset.outputMetadata.format).toBe("pdf");
191
+ });
192
+ });
193
+
194
+ // ── Output polish unit tests ─────────────────────────────────────
195
+
196
+ describe("ensureCorrectExtension", () => {
197
+ it("returns unchanged if extension matches", () => {
198
+ const spec = PANDOC_PRESET_SPECS["blog-post"];
199
+ expect(ensureCorrectExtension("out.html", spec)).toBe("out.html");
200
+ });
201
+
202
+ it("appends extension if missing", () => {
203
+ const spec = PANDOC_PRESET_SPECS["academic-pdf"];
204
+ expect(ensureCorrectExtension("thesis", spec)).toBe("thesis.pdf");
205
+ });
206
+
207
+ it("appends correct extension if different", () => {
208
+ const spec = PANDOC_PRESET_SPECS["ebook"];
209
+ expect(ensureCorrectExtension("book.txt", spec)).toBe("book.txt.epub");
210
+ });
211
+ });
@@ -0,0 +1,10 @@
1
+ export * from "./schemas.js";
2
+ export * from "./preset-spec.js";
3
+ export * from "./preflight.js";
4
+ export * from "./types.js";
5
+ export * from "./build-args.js";
6
+ export * from "./progress.js";
7
+ export * from "./exec.js";
8
+ export * from "./convert.js";
9
+ export * from "./crud.js";
10
+ export * from "./output-polish.js";
@@ -0,0 +1,60 @@
1
+ import { extname } from "node:path";
2
+ import type { PandocPresetSpec } from "./preset-spec.js";
3
+ import type { PandocOutputMetadata } from "./schemas.js";
4
+
5
+ // ── Output polish — pure post-processing helpers ─────────────────
6
+
7
+ /**
8
+ * Ensure the output path has the correct extension for the preset.
9
+ * If the path already ends with the right extension, returns it unchanged.
10
+ * Otherwise appends the correct extension.
11
+ *
12
+ * Pure function — no I/O.
13
+ */
14
+ export function ensureCorrectExtension(
15
+ outputPath: string,
16
+ spec: PandocPresetSpec,
17
+ ): string {
18
+ const expectedExt = `.${spec.outputExt}`;
19
+ const currentExt = extname(outputPath).toLowerCase();
20
+
21
+ if (currentExt === expectedExt) {
22
+ return outputPath;
23
+ }
24
+
25
+ // If there's no extension, append it
26
+ if (!currentExt) {
27
+ return `${outputPath}${expectedExt}`;
28
+ }
29
+
30
+ // Different extension — append (don't replace, user may have reasons)
31
+ return `${outputPath}${expectedExt}`;
32
+ }
33
+
34
+ /**
35
+ * Build PandocOutputMetadata from known values.
36
+ *
37
+ * Pure function — caller provides sizeBytes from stat.
38
+ */
39
+ export function buildOutputMetadata(
40
+ spec: PandocPresetSpec,
41
+ sizeBytes: number,
42
+ ): PandocOutputMetadata {
43
+ return {
44
+ format: spec.to,
45
+ sizeBytes,
46
+ // pages is only meaningful for PDF — we'd need pdfinfo or
47
+ // a real PDF parser for that. Left undefined until Phase 5+.
48
+ ...(spec.to === "pdf" ? { pages: undefined } : {}),
49
+ };
50
+ }
51
+
52
+ /**
53
+ * Compute the asset expiration timestamp.
54
+ * Default: 24 hours from now.
55
+ */
56
+ export function computeExpiresAt(
57
+ ttlMs: number = 86_400_000,
58
+ ): string {
59
+ return new Date(Date.now() + ttlMs).toISOString();
60
+ }
@@ -0,0 +1,170 @@
1
+ import { describe, it, expect, vi } from "vitest";
2
+ import {
3
+ convertDocument,
4
+ type ConvertDocumentContext,
5
+ } from "./convert.js";
6
+ import type { PandocDocumentAsset } from "./schemas.js";
7
+ import type { PandocNotification } from "./types.js";
8
+ import type { PandocInputCheck, PandocAssertionResult } from "./preflight.js";
9
+ import type { PandocPresetSpec } from "./preset-spec.js";
10
+
11
+ // ── Fixtures ─────────────────────────────────────────────────────
12
+
13
+ const GOOD_INPUT_CHECK: PandocInputCheck = {
14
+ ok: true,
15
+ warnings: [],
16
+ sizeBytes: 5_000,
17
+ detectedFormat: "markdown",
18
+ };
19
+
20
+ const GOOD_ASSERTION: PandocAssertionResult = {
21
+ ok: true,
22
+ warnings: [],
23
+ };
24
+
25
+ const BAD_ASSERTION: PandocAssertionResult = {
26
+ ok: false,
27
+ warnings: ["Output file is empty — pandoc produced no content."],
28
+ };
29
+
30
+ function makeCtx(
31
+ overrides?: Partial<ConvertDocumentContext>,
32
+ ): {
33
+ ctx: ConvertDocumentContext;
34
+ notifications: PandocNotification[];
35
+ assets: PandocDocumentAsset[];
36
+ } {
37
+ const notifications: PandocNotification[] = [];
38
+ const assets: PandocDocumentAsset[] = [];
39
+ const ac = new AbortController();
40
+
41
+ const ctx: ConvertDocumentContext = {
42
+ signal: ac.signal,
43
+ userId: "user1",
44
+ notify: (n) => notifications.push(n),
45
+ createAsset: vi.fn(async (a) => {
46
+ assets.push(a);
47
+ }),
48
+ runPandoc: vi.fn(async (_args, _signal, onProgress) => {
49
+ onProgress(50);
50
+ onProgress(99);
51
+ }),
52
+ checkInput: vi.fn(async () => GOOD_INPUT_CHECK),
53
+ assertOutput: vi.fn(async () => GOOD_ASSERTION),
54
+ statFile: vi.fn(async () => ({ size: 12_000 })),
55
+ ...overrides,
56
+ };
57
+
58
+ return { ctx, notifications, assets };
59
+ }
60
+
61
+ const VALID_REQ = {
62
+ inputPath: "/data/sandbox/user1/doc.md",
63
+ outputPath: "/data/sandbox/user1/doc.html",
64
+ preset: "blog-post" as const,
65
+ timeoutSeconds: 60,
66
+ maxOutputBytes: 0,
67
+ };
68
+
69
+ // ── Tests ────────────────────────────────────────────────────────
70
+
71
+ describe("convertDocument pipeline", () => {
72
+ it("blog-post happy path: produces asset with correct shape", async () => {
73
+ const { ctx, notifications, assets } = makeCtx();
74
+
75
+ const asset = await convertDocument(VALID_REQ, ctx);
76
+
77
+ expect(asset.id).toBeTruthy();
78
+ expect(asset.inputPath).toBe(VALID_REQ.inputPath);
79
+ expect(asset.outputPath).toBe(VALID_REQ.outputPath);
80
+ expect(asset.preset).toBe("blog-post");
81
+ expect(asset.inputMetadata.format).toBe("markdown");
82
+ expect(asset.inputMetadata.sizeBytes).toBe(5_000);
83
+ expect(asset.outputMetadata.format).toBe("html5");
84
+ expect(asset.outputMetadata.sizeBytes).toBe(12_000);
85
+ expect(assets).toHaveLength(1);
86
+
87
+ // Should have progress + ready notifications
88
+ const progressNotifs = notifications.filter(
89
+ (n) => n.type === "pandoc:progress",
90
+ );
91
+ const readyNotifs = notifications.filter(
92
+ (n) => n.type === "pandoc:ready",
93
+ );
94
+ expect(progressNotifs.length).toBeGreaterThan(0);
95
+ expect(readyNotifs).toHaveLength(1);
96
+
97
+ // Final progress must be 100%
98
+ const last100 = progressNotifs.filter(
99
+ (n) => n.type === "pandoc:progress" && n.percent === 100,
100
+ );
101
+ expect(last100.length).toBeGreaterThanOrEqual(1);
102
+
103
+ // Ready notification has correct shape
104
+ const ready = readyNotifs[0];
105
+ expect(ready.type === "pandoc:ready" && ready.preset).toBe("blog-post");
106
+ expect(ready.type === "pandoc:ready" && ready.sizeBytes).toBe(12_000);
107
+ });
108
+
109
+ it("newsletter → fallback to blog-post on assertion fail + warning emitted", async () => {
110
+ let assertCallCount = 0;
111
+ const { ctx, notifications } = makeCtx({
112
+ assertOutput: vi.fn(async () => {
113
+ assertCallCount++;
114
+ // First call (newsletter) → fails assertion
115
+ // Second call (blog-post fallback) → passes
116
+ return assertCallCount === 1 ? BAD_ASSERTION : GOOD_ASSERTION;
117
+ }),
118
+ });
119
+
120
+ const asset = await convertDocument(
121
+ {
122
+ ...VALID_REQ,
123
+ preset: "newsletter",
124
+ },
125
+ ctx,
126
+ );
127
+
128
+ expect(asset).toBeDefined();
129
+ // Should have fallen back to blog-post
130
+ expect(asset.preset).toBe("blog-post");
131
+
132
+ // Should have a warning notification about fallback
133
+ const warningNotifs = notifications.filter(
134
+ (n) => n.type === "pandoc:warning",
135
+ );
136
+ expect(warningNotifs.length).toBeGreaterThanOrEqual(1);
137
+ expect(
138
+ warningNotifs.some(
139
+ (n) =>
140
+ n.type === "pandoc:warning" &&
141
+ n.warnings.some((w) => w.includes("falling back")),
142
+ ),
143
+ ).toBe(true);
144
+ });
145
+
146
+ it("AbortSignal cancels immediately", async () => {
147
+ const ac = new AbortController();
148
+ ac.abort(); // pre-abort
149
+
150
+ const { ctx } = makeCtx({ signal: ac.signal });
151
+
152
+ await expect(convertDocument(VALID_REQ, ctx)).rejects.toThrow("Aborted");
153
+ });
154
+
155
+ it("maxOutputBytes preflight rejects when estimate exceeds limit", async () => {
156
+ const { ctx } = makeCtx();
157
+
158
+ // blog-post → html5 → factor 1.8 → 5000 * 1.8 = 9000
159
+ // Set maxOutputBytes to 100 (way below 9000)
160
+ await expect(
161
+ convertDocument(
162
+ {
163
+ ...VALID_REQ,
164
+ maxOutputBytes: 100,
165
+ },
166
+ ctx,
167
+ ),
168
+ ).rejects.toThrow("exceeds maxOutputBytes");
169
+ });
170
+ });
@@ -0,0 +1,209 @@
1
+ import { stat } from "node:fs/promises";
2
+ import { extname } from "node:path";
3
+ import type { PandocPreset } from "./schemas.js";
4
+ import type { PandocPresetSpec } from "./preset-spec.js";
5
+
6
+ // Re-export validateSandboxPath from root for convenience
7
+ export { validateSandboxPath } from "../preflight.js";
8
+
9
+ // ── Input validation ──────────────────────────────────────────────
10
+
11
+ /** Max input file size: 50 MB (documents shouldn't be bigger) */
12
+ const MAX_INPUT_BYTES = 50 * 1024 * 1024;
13
+
14
+ /** Allowed input extensions per format */
15
+ const ALLOWED_EXTENSIONS: Record<string, string[]> = {
16
+ markdown: [".md", ".markdown", ".mkd", ".txt"],
17
+ docx: [".docx"],
18
+ latex: [".tex", ".latex"],
19
+ html: [".html", ".htm"],
20
+ rst: [".rst"],
21
+ org: [".org"],
22
+ };
23
+
24
+ export interface PandocInputCheck {
25
+ ok: boolean;
26
+ warnings: string[];
27
+ sizeBytes: number;
28
+ detectedFormat: string;
29
+ }
30
+
31
+ /**
32
+ * Validate a Pandoc input file: exists, not too large, recognized extension.
33
+ */
34
+ export async function checkPandocInput(
35
+ filePath: string,
36
+ ): Promise<PandocInputCheck> {
37
+ const warnings: string[] = [];
38
+
39
+ // Check file exists and get size
40
+ let sizeBytes: number;
41
+ try {
42
+ const s = await stat(filePath);
43
+ sizeBytes = s.size;
44
+ } catch {
45
+ return {
46
+ ok: false,
47
+ warnings: [`Input file not found: "${filePath}".`],
48
+ sizeBytes: 0,
49
+ detectedFormat: "unknown",
50
+ };
51
+ }
52
+
53
+ if (sizeBytes > MAX_INPUT_BYTES) {
54
+ return {
55
+ ok: false,
56
+ warnings: [
57
+ `Input file is ${(sizeBytes / 1024 / 1024).toFixed(1)} MB, ` +
58
+ `exceeds maximum ${MAX_INPUT_BYTES / 1024 / 1024} MB.`,
59
+ ],
60
+ sizeBytes,
61
+ detectedFormat: "unknown",
62
+ };
63
+ }
64
+
65
+ if (sizeBytes === 0) {
66
+ warnings.push("Input file is empty.");
67
+ }
68
+
69
+ // Detect format from extension
70
+ const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
71
+ let detectedFormat = "markdown"; // default
72
+
73
+ for (const [format, exts] of Object.entries(ALLOWED_EXTENSIONS)) {
74
+ if (exts.includes(ext)) {
75
+ detectedFormat = format;
76
+ break;
77
+ }
78
+ }
79
+
80
+ if (!Object.values(ALLOWED_EXTENSIONS).flat().includes(ext)) {
81
+ warnings.push(
82
+ `Unrecognized extension "${ext}"; assuming markdown. ` +
83
+ `Supported: ${Object.values(ALLOWED_EXTENSIONS).flat().join(", ")}.`,
84
+ );
85
+ }
86
+
87
+ return { ok: true, warnings, sizeBytes, detectedFormat };
88
+ }
89
+
90
+ // ── Output size estimation ────────────────────────────────────────
91
+
92
+ /** Rough expansion multipliers per output format */
93
+ const EXPANSION_FACTORS: Record<string, number> = {
94
+ html5: 1.8, // HTML + embedded resources
95
+ pdf: 0.8, // PDFs are often smaller than source
96
+ epub: 1.2, // EPUB with compression
97
+ revealjs: 2.5, // Slides embed reveal.js
98
+ html: 1.5, // Plain HTML
99
+ };
100
+
101
+ /**
102
+ * Estimate output file size in bytes based on input size and preset.
103
+ */
104
+ export function estimatePandocOutputBytes(
105
+ inputSizeBytes: number,
106
+ preset: PandocPreset,
107
+ ): number {
108
+ // Map preset to output format for factor lookup
109
+ const formatMap: Record<PandocPreset, string> = {
110
+ "blog-post": "html5",
111
+ "academic-pdf": "pdf",
112
+ "ebook": "epub",
113
+ "slides": "revealjs",
114
+ "newsletter": "html",
115
+ };
116
+
117
+ const factor = EXPANSION_FACTORS[formatMap[preset]] ?? 1.5;
118
+ return Math.ceil(inputSizeBytes * factor);
119
+ }
120
+
121
+ // ── Output assertions ────────────────────────────────────────────
122
+
123
+ export interface PandocAssertionResult {
124
+ ok: boolean;
125
+ warnings: string[];
126
+ }
127
+
128
+ /**
129
+ * Assert that the pandoc output file matches the preset spec.
130
+ *
131
+ * Checks:
132
+ * 1. Output file exists
133
+ * 2. Output file is non-empty
134
+ * 3. Output extension matches spec
135
+ * 4. Output size is within maxOutputBytes (if set)
136
+ */
137
+ export async function assertPandocOutput(
138
+ spec: PandocPresetSpec,
139
+ outputPath: string,
140
+ maxOutputBytes: number,
141
+ ): Promise<PandocAssertionResult> {
142
+ const warnings: string[] = [];
143
+
144
+ // Check file exists and get size
145
+ let sizeBytes: number;
146
+ try {
147
+ const s = await stat(outputPath);
148
+ sizeBytes = s.size;
149
+ } catch {
150
+ return {
151
+ ok: false,
152
+ warnings: [`Output file not found: "${outputPath}".`],
153
+ };
154
+ }
155
+
156
+ // Check non-empty
157
+ if (sizeBytes === 0) {
158
+ return {
159
+ ok: false,
160
+ warnings: ["Output file is empty — pandoc produced no content."],
161
+ };
162
+ }
163
+
164
+ // Check extension matches spec
165
+ const actualExt = extname(outputPath).slice(1).toLowerCase();
166
+ if (actualExt && actualExt !== spec.outputExt) {
167
+ warnings.push(
168
+ `Output extension ".${actualExt}" does not match expected ".${spec.outputExt}".`,
169
+ );
170
+ }
171
+
172
+ // Check maxOutputBytes (0 = unlimited)
173
+ if (maxOutputBytes > 0 && sizeBytes > maxOutputBytes) {
174
+ return {
175
+ ok: false,
176
+ warnings: [
177
+ `Output is ${(sizeBytes / 1024 / 1024).toFixed(1)} MB, ` +
178
+ `exceeds maximum ${(maxOutputBytes / 1024 / 1024).toFixed(1)} MB.`,
179
+ ],
180
+ };
181
+ }
182
+
183
+ return { ok: true, warnings };
184
+ }
185
+
186
+ // ── Input format validation (pure) ───────────────────────────────
187
+
188
+ /**
189
+ * Validate that detected input format is compatible with the preset's
190
+ * expected --from format. Returns a warning if there's a mismatch.
191
+ */
192
+ export function checkFormatCompatibility(
193
+ detectedFormat: string,
194
+ presetFrom: string,
195
+ ): { ok: boolean; warning?: string } {
196
+ // Pandoc is flexible — markdown presets accept most text formats
197
+ if (presetFrom === "markdown") return { ok: true };
198
+
199
+ if (detectedFormat !== presetFrom) {
200
+ return {
201
+ ok: true,
202
+ warning:
203
+ `Detected format "${detectedFormat}" differs from preset's ` +
204
+ `expected "${presetFrom}"; pandoc will attempt conversion anyway.`,
205
+ };
206
+ }
207
+
208
+ return { ok: true };
209
+ }