@mcptoolshop/toolshopstudio 1.1.0-toolshop
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +13 -0
- package/.github/workflows/ci.yml +53 -0
- package/CHANGELOG.md +44 -0
- package/Dockerfile +48 -0
- package/LICENSE +21 -0
- package/README.md +110 -0
- package/assets/logo.png +0 -0
- package/dist/build-flags.d.ts +15 -0
- package/dist/build-flags.js +95 -0
- package/dist/crud.d.ts +8 -0
- package/dist/crud.js +76 -0
- package/dist/engine.test.d.ts +1 -0
- package/dist/engine.test.js +150 -0
- package/dist/exec.d.ts +14 -0
- package/dist/exec.js +87 -0
- package/dist/full.test.d.ts +1 -0
- package/dist/full.test.js +118 -0
- package/dist/generate-thumbnail.d.ts +21 -0
- package/dist/generate-thumbnail.js +42 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +12 -0
- package/dist/pandoc/build-args.d.ts +8 -0
- package/dist/pandoc/build-args.js +31 -0
- package/dist/pandoc/convert.d.ts +38 -0
- package/dist/pandoc/convert.js +172 -0
- package/dist/pandoc/crud.d.ts +10 -0
- package/dist/pandoc/crud.js +80 -0
- package/dist/pandoc/engine.test.d.ts +1 -0
- package/dist/pandoc/engine.test.js +161 -0
- package/dist/pandoc/exec.d.ts +9 -0
- package/dist/pandoc/exec.js +46 -0
- package/dist/pandoc/full.test.d.ts +1 -0
- package/dist/pandoc/full.test.js +146 -0
- package/dist/pandoc/index.d.ts +10 -0
- package/dist/pandoc/index.js +10 -0
- package/dist/pandoc/output-polish.d.ts +21 -0
- package/dist/pandoc/output-polish.js +43 -0
- package/dist/pandoc/pipeline.test.d.ts +1 -0
- package/dist/pandoc/pipeline.test.js +112 -0
- package/dist/pandoc/preflight.d.ts +39 -0
- package/dist/pandoc/preflight.js +153 -0
- package/dist/pandoc/preset-spec.d.ts +25 -0
- package/dist/pandoc/preset-spec.js +74 -0
- package/dist/pandoc/progress.d.ts +21 -0
- package/dist/pandoc/progress.js +59 -0
- package/dist/pandoc/schemas.d.ts +137 -0
- package/dist/pandoc/schemas.js +44 -0
- package/dist/pandoc/types.d.ts +30 -0
- package/dist/pandoc/types.js +1 -0
- package/dist/pipeline.test.d.ts +1 -0
- package/dist/pipeline.test.js +127 -0
- package/dist/preflight.d.ts +32 -0
- package/dist/preflight.js +121 -0
- package/dist/preset-spec.d.ts +17 -0
- package/dist/preset-spec.js +117 -0
- package/dist/progress-parser.d.ts +33 -0
- package/dist/progress-parser.js +75 -0
- package/dist/schemas.d.ts +851 -0
- package/dist/schemas.js +93 -0
- package/dist/thumbnail.d.ts +35 -0
- package/dist/thumbnail.js +92 -0
- package/dist/transcode.d.ts +31 -0
- package/dist/transcode.js +183 -0
- package/dist/types.d.ts +33 -0
- package/dist/types.js +1 -0
- package/package.json +28 -0
- package/scripts/release.mjs +62 -0
- package/smoke.mjs +222 -0
- package/src/__snapshots__/engine.test.ts.snap +148 -0
- package/src/build-flags.ts +124 -0
- package/src/crud.ts +89 -0
- package/src/engine.test.ts +174 -0
- package/src/exec.ts +105 -0
- package/src/full.test.ts +152 -0
- package/src/generate-thumbnail.ts +83 -0
- package/src/index.ts +12 -0
- package/src/pandoc/build-args.ts +40 -0
- package/src/pandoc/convert.ts +282 -0
- package/src/pandoc/crud.ts +95 -0
- package/src/pandoc/engine.test.ts +224 -0
- package/src/pandoc/exec.ts +55 -0
- package/src/pandoc/full.test.ts +211 -0
- package/src/pandoc/index.ts +10 -0
- package/src/pandoc/output-polish.ts +60 -0
- package/src/pandoc/pipeline.test.ts +170 -0
- package/src/pandoc/preflight.ts +209 -0
- package/src/pandoc/preset-spec.ts +97 -0
- package/src/pandoc/progress.ts +71 -0
- package/src/pandoc/schemas.ts +54 -0
- package/src/pandoc/types.ts +40 -0
- package/src/pipeline.test.ts +167 -0
- package/src/preflight.ts +181 -0
- package/src/preset-spec.ts +136 -0
- package/src/progress-parser.ts +90 -0
- package/src/schemas.ts +107 -0
- package/src/thumbnail.ts +134 -0
- package/src/transcode.ts +272 -0
- package/src/types.ts +43 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
convertDocument,
|
|
4
|
+
type ConvertDocumentContext,
|
|
5
|
+
} from "./convert.js";
|
|
6
|
+
import { createPandocCRUD } from "./crud.js";
|
|
7
|
+
import { ensureCorrectExtension } from "./output-polish.js";
|
|
8
|
+
import { PANDOC_PRESET_SPECS } from "./preset-spec.js";
|
|
9
|
+
import type { PandocDocumentAsset } from "./schemas.js";
|
|
10
|
+
import type { PandocNotification } from "./types.js";
|
|
11
|
+
import type { PandocInputCheck, PandocAssertionResult } from "./preflight.js";
|
|
12
|
+
import type { PandocPresetSpec } from "./preset-spec.js";
|
|
13
|
+
|
|
14
|
+
// ── Fixtures ─────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
const GOOD_INPUT_CHECK: PandocInputCheck = {
|
|
17
|
+
ok: true,
|
|
18
|
+
warnings: [],
|
|
19
|
+
sizeBytes: 5_000,
|
|
20
|
+
detectedFormat: "markdown",
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const GOOD_ASSERTION: PandocAssertionResult = {
|
|
24
|
+
ok: true,
|
|
25
|
+
warnings: [],
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const BAD_ASSERTION: PandocAssertionResult = {
|
|
29
|
+
ok: false,
|
|
30
|
+
warnings: ["Output file is empty — pandoc produced no content."],
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
function makeCtx(
|
|
34
|
+
crud: ReturnType<typeof createPandocCRUD>,
|
|
35
|
+
overrides?: Partial<ConvertDocumentContext>,
|
|
36
|
+
): {
|
|
37
|
+
ctx: ConvertDocumentContext;
|
|
38
|
+
notifications: PandocNotification[];
|
|
39
|
+
} {
|
|
40
|
+
const notifications: PandocNotification[] = [];
|
|
41
|
+
const ac = new AbortController();
|
|
42
|
+
|
|
43
|
+
const ctx: ConvertDocumentContext = {
|
|
44
|
+
signal: ac.signal,
|
|
45
|
+
userId: "user1",
|
|
46
|
+
notify: (n) => notifications.push(n),
|
|
47
|
+
createAsset: async (a) => { await crud.create(a); },
|
|
48
|
+
runPandoc: vi.fn(async (_args, _signal, onProgress) => {
|
|
49
|
+
onProgress(50);
|
|
50
|
+
onProgress(99);
|
|
51
|
+
}),
|
|
52
|
+
checkInput: vi.fn(async () => GOOD_INPUT_CHECK),
|
|
53
|
+
assertOutput: vi.fn(async () => GOOD_ASSERTION),
|
|
54
|
+
statFile: vi.fn(async () => ({ size: 9_200 })),
|
|
55
|
+
...overrides,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
return { ctx, notifications };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ── Tests ────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
describe("pandoc full integration", () => {
|
|
64
|
+
it("blog-post happy path: polish + CRUD create + correct metadata", async () => {
|
|
65
|
+
const crud = createPandocCRUD();
|
|
66
|
+
const { ctx, notifications } = makeCtx(crud);
|
|
67
|
+
|
|
68
|
+
const asset = await convertDocument(
|
|
69
|
+
{
|
|
70
|
+
inputPath: "/data/sandbox/user1/readme.md",
|
|
71
|
+
outputPath: "/data/sandbox/user1/readme.html",
|
|
72
|
+
preset: "blog-post",
|
|
73
|
+
},
|
|
74
|
+
ctx,
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
// Asset shape
|
|
78
|
+
expect(asset.id).toBeTruthy();
|
|
79
|
+
expect(asset.preset).toBe("blog-post");
|
|
80
|
+
expect(asset.outputPath).toBe("/data/sandbox/user1/readme.html");
|
|
81
|
+
expect(asset.inputMetadata.format).toBe("markdown");
|
|
82
|
+
expect(asset.inputMetadata.sizeBytes).toBe(5_000);
|
|
83
|
+
expect(asset.outputMetadata.format).toBe("html5");
|
|
84
|
+
expect(asset.outputMetadata.sizeBytes).toBe(9_200);
|
|
85
|
+
expect(asset.expiresAt).toBeTruthy();
|
|
86
|
+
expect(new Date(asset.expiresAt).getTime()).toBeGreaterThan(Date.now());
|
|
87
|
+
|
|
88
|
+
// CRUD persisted
|
|
89
|
+
const stored = await crud.read(asset.id);
|
|
90
|
+
expect(stored).not.toBeNull();
|
|
91
|
+
expect(stored?.id).toBe(asset.id);
|
|
92
|
+
|
|
93
|
+
const all = await crud.list();
|
|
94
|
+
expect(all).toHaveLength(1);
|
|
95
|
+
|
|
96
|
+
// Filter by preset
|
|
97
|
+
const filtered = await crud.list({ preset: "academic-pdf" });
|
|
98
|
+
expect(filtered).toHaveLength(0);
|
|
99
|
+
|
|
100
|
+
// Notifications: progress → ready
|
|
101
|
+
const readyNotifs = notifications.filter((n) => n.type === "pandoc:ready");
|
|
102
|
+
expect(readyNotifs).toHaveLength(1);
|
|
103
|
+
expect(
|
|
104
|
+
readyNotifs[0].type === "pandoc:ready" && readyNotifs[0].sizeBytes,
|
|
105
|
+
).toBe(9_200);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it("newsletter fallback → correct preset in asset + warning emitted", async () => {
|
|
109
|
+
let assertCallCount = 0;
|
|
110
|
+
const crud = createPandocCRUD();
|
|
111
|
+
const { ctx, notifications } = makeCtx(crud, {
|
|
112
|
+
assertOutput: vi.fn(async () => {
|
|
113
|
+
assertCallCount++;
|
|
114
|
+
return assertCallCount === 1 ? BAD_ASSERTION : GOOD_ASSERTION;
|
|
115
|
+
}),
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
const asset = await convertDocument(
|
|
119
|
+
{
|
|
120
|
+
inputPath: "/data/sandbox/user1/news.md",
|
|
121
|
+
outputPath: "/data/sandbox/user1/news.html",
|
|
122
|
+
preset: "newsletter",
|
|
123
|
+
},
|
|
124
|
+
ctx,
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
// Should have fallen back to blog-post
|
|
128
|
+
expect(asset.preset).toBe("blog-post");
|
|
129
|
+
expect(asset.outputPath).toBe("/data/sandbox/user1/news.html");
|
|
130
|
+
expect(asset.outputMetadata.format).toBe("html5");
|
|
131
|
+
|
|
132
|
+
// CRUD has the fallback asset
|
|
133
|
+
const stored = await crud.read(asset.id);
|
|
134
|
+
expect(stored?.preset).toBe("blog-post");
|
|
135
|
+
|
|
136
|
+
// Warning about fallback was emitted
|
|
137
|
+
const warningNotifs = notifications.filter(
|
|
138
|
+
(n) => n.type === "pandoc:warning",
|
|
139
|
+
);
|
|
140
|
+
expect(warningNotifs.length).toBeGreaterThanOrEqual(1);
|
|
141
|
+
expect(
|
|
142
|
+
warningNotifs.some(
|
|
143
|
+
(n) =>
|
|
144
|
+
n.type === "pandoc:warning" &&
|
|
145
|
+
n.warnings.some((w) => w.includes("falling back")),
|
|
146
|
+
),
|
|
147
|
+
).toBe(true);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it("dual preset smoke: blog-post + academic-pdf in same CRUD store", async () => {
|
|
151
|
+
const crud = createPandocCRUD();
|
|
152
|
+
|
|
153
|
+
// Blog-post
|
|
154
|
+
const { ctx: ctx1 } = makeCtx(crud);
|
|
155
|
+
const blogAsset = await convertDocument(
|
|
156
|
+
{
|
|
157
|
+
inputPath: "/data/sandbox/user1/post.md",
|
|
158
|
+
outputPath: "/data/sandbox/user1/post.html",
|
|
159
|
+
preset: "blog-post",
|
|
160
|
+
},
|
|
161
|
+
ctx1,
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
// Academic PDF
|
|
165
|
+
const { ctx: ctx2 } = makeCtx(crud);
|
|
166
|
+
const pdfAsset = await convertDocument(
|
|
167
|
+
{
|
|
168
|
+
inputPath: "/data/sandbox/user1/thesis.md",
|
|
169
|
+
outputPath: "/data/sandbox/user1/thesis.pdf",
|
|
170
|
+
preset: "academic-pdf",
|
|
171
|
+
},
|
|
172
|
+
ctx2,
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
// Both stored
|
|
176
|
+
const all = await crud.list();
|
|
177
|
+
expect(all).toHaveLength(2);
|
|
178
|
+
|
|
179
|
+
// Filter works
|
|
180
|
+
const blogs = await crud.list({ preset: "blog-post" });
|
|
181
|
+
expect(blogs).toHaveLength(1);
|
|
182
|
+
expect(blogs[0].id).toBe(blogAsset.id);
|
|
183
|
+
|
|
184
|
+
const pdfs = await crud.list({ preset: "academic-pdf" });
|
|
185
|
+
expect(pdfs).toHaveLength(1);
|
|
186
|
+
expect(pdfs[0].id).toBe(pdfAsset.id);
|
|
187
|
+
|
|
188
|
+
// Different output formats
|
|
189
|
+
expect(blogAsset.outputMetadata.format).toBe("html5");
|
|
190
|
+
expect(pdfAsset.outputMetadata.format).toBe("pdf");
|
|
191
|
+
});
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// ── Output polish unit tests ─────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
describe("ensureCorrectExtension", () => {
|
|
197
|
+
it("returns unchanged if extension matches", () => {
|
|
198
|
+
const spec = PANDOC_PRESET_SPECS["blog-post"];
|
|
199
|
+
expect(ensureCorrectExtension("out.html", spec)).toBe("out.html");
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("appends extension if missing", () => {
|
|
203
|
+
const spec = PANDOC_PRESET_SPECS["academic-pdf"];
|
|
204
|
+
expect(ensureCorrectExtension("thesis", spec)).toBe("thesis.pdf");
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("appends correct extension if different", () => {
|
|
208
|
+
const spec = PANDOC_PRESET_SPECS["ebook"];
|
|
209
|
+
expect(ensureCorrectExtension("book.txt", spec)).toBe("book.txt.epub");
|
|
210
|
+
});
|
|
211
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export * from "./schemas.js";
|
|
2
|
+
export * from "./preset-spec.js";
|
|
3
|
+
export * from "./preflight.js";
|
|
4
|
+
export * from "./types.js";
|
|
5
|
+
export * from "./build-args.js";
|
|
6
|
+
export * from "./progress.js";
|
|
7
|
+
export * from "./exec.js";
|
|
8
|
+
export * from "./convert.js";
|
|
9
|
+
export * from "./crud.js";
|
|
10
|
+
export * from "./output-polish.js";
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { extname } from "node:path";
|
|
2
|
+
import type { PandocPresetSpec } from "./preset-spec.js";
|
|
3
|
+
import type { PandocOutputMetadata } from "./schemas.js";
|
|
4
|
+
|
|
5
|
+
// ── Output polish — pure post-processing helpers ─────────────────
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Ensure the output path has the correct extension for the preset.
|
|
9
|
+
* If the path already ends with the right extension, returns it unchanged.
|
|
10
|
+
* Otherwise appends the correct extension.
|
|
11
|
+
*
|
|
12
|
+
* Pure function — no I/O.
|
|
13
|
+
*/
|
|
14
|
+
export function ensureCorrectExtension(
|
|
15
|
+
outputPath: string,
|
|
16
|
+
spec: PandocPresetSpec,
|
|
17
|
+
): string {
|
|
18
|
+
const expectedExt = `.${spec.outputExt}`;
|
|
19
|
+
const currentExt = extname(outputPath).toLowerCase();
|
|
20
|
+
|
|
21
|
+
if (currentExt === expectedExt) {
|
|
22
|
+
return outputPath;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// If there's no extension, append it
|
|
26
|
+
if (!currentExt) {
|
|
27
|
+
return `${outputPath}${expectedExt}`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Different extension — append (don't replace, user may have reasons)
|
|
31
|
+
return `${outputPath}${expectedExt}`;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Build PandocOutputMetadata from known values.
|
|
36
|
+
*
|
|
37
|
+
* Pure function — caller provides sizeBytes from stat.
|
|
38
|
+
*/
|
|
39
|
+
export function buildOutputMetadata(
|
|
40
|
+
spec: PandocPresetSpec,
|
|
41
|
+
sizeBytes: number,
|
|
42
|
+
): PandocOutputMetadata {
|
|
43
|
+
return {
|
|
44
|
+
format: spec.to,
|
|
45
|
+
sizeBytes,
|
|
46
|
+
// pages is only meaningful for PDF — we'd need pdfinfo or
|
|
47
|
+
// a real PDF parser for that. Left undefined until Phase 5+.
|
|
48
|
+
...(spec.to === "pdf" ? { pages: undefined } : {}),
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Compute the asset expiration timestamp.
|
|
54
|
+
* Default: 24 hours from now.
|
|
55
|
+
*/
|
|
56
|
+
export function computeExpiresAt(
|
|
57
|
+
ttlMs: number = 86_400_000,
|
|
58
|
+
): string {
|
|
59
|
+
return new Date(Date.now() + ttlMs).toISOString();
|
|
60
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
convertDocument,
|
|
4
|
+
type ConvertDocumentContext,
|
|
5
|
+
} from "./convert.js";
|
|
6
|
+
import type { PandocDocumentAsset } from "./schemas.js";
|
|
7
|
+
import type { PandocNotification } from "./types.js";
|
|
8
|
+
import type { PandocInputCheck, PandocAssertionResult } from "./preflight.js";
|
|
9
|
+
import type { PandocPresetSpec } from "./preset-spec.js";
|
|
10
|
+
|
|
11
|
+
// ── Fixtures ─────────────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
const GOOD_INPUT_CHECK: PandocInputCheck = {
|
|
14
|
+
ok: true,
|
|
15
|
+
warnings: [],
|
|
16
|
+
sizeBytes: 5_000,
|
|
17
|
+
detectedFormat: "markdown",
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
const GOOD_ASSERTION: PandocAssertionResult = {
|
|
21
|
+
ok: true,
|
|
22
|
+
warnings: [],
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const BAD_ASSERTION: PandocAssertionResult = {
|
|
26
|
+
ok: false,
|
|
27
|
+
warnings: ["Output file is empty — pandoc produced no content."],
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
function makeCtx(
|
|
31
|
+
overrides?: Partial<ConvertDocumentContext>,
|
|
32
|
+
): {
|
|
33
|
+
ctx: ConvertDocumentContext;
|
|
34
|
+
notifications: PandocNotification[];
|
|
35
|
+
assets: PandocDocumentAsset[];
|
|
36
|
+
} {
|
|
37
|
+
const notifications: PandocNotification[] = [];
|
|
38
|
+
const assets: PandocDocumentAsset[] = [];
|
|
39
|
+
const ac = new AbortController();
|
|
40
|
+
|
|
41
|
+
const ctx: ConvertDocumentContext = {
|
|
42
|
+
signal: ac.signal,
|
|
43
|
+
userId: "user1",
|
|
44
|
+
notify: (n) => notifications.push(n),
|
|
45
|
+
createAsset: vi.fn(async (a) => {
|
|
46
|
+
assets.push(a);
|
|
47
|
+
}),
|
|
48
|
+
runPandoc: vi.fn(async (_args, _signal, onProgress) => {
|
|
49
|
+
onProgress(50);
|
|
50
|
+
onProgress(99);
|
|
51
|
+
}),
|
|
52
|
+
checkInput: vi.fn(async () => GOOD_INPUT_CHECK),
|
|
53
|
+
assertOutput: vi.fn(async () => GOOD_ASSERTION),
|
|
54
|
+
statFile: vi.fn(async () => ({ size: 12_000 })),
|
|
55
|
+
...overrides,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
return { ctx, notifications, assets };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const VALID_REQ = {
|
|
62
|
+
inputPath: "/data/sandbox/user1/doc.md",
|
|
63
|
+
outputPath: "/data/sandbox/user1/doc.html",
|
|
64
|
+
preset: "blog-post" as const,
|
|
65
|
+
timeoutSeconds: 60,
|
|
66
|
+
maxOutputBytes: 0,
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// ── Tests ────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
describe("convertDocument pipeline", () => {
|
|
72
|
+
it("blog-post happy path: produces asset with correct shape", async () => {
|
|
73
|
+
const { ctx, notifications, assets } = makeCtx();
|
|
74
|
+
|
|
75
|
+
const asset = await convertDocument(VALID_REQ, ctx);
|
|
76
|
+
|
|
77
|
+
expect(asset.id).toBeTruthy();
|
|
78
|
+
expect(asset.inputPath).toBe(VALID_REQ.inputPath);
|
|
79
|
+
expect(asset.outputPath).toBe(VALID_REQ.outputPath);
|
|
80
|
+
expect(asset.preset).toBe("blog-post");
|
|
81
|
+
expect(asset.inputMetadata.format).toBe("markdown");
|
|
82
|
+
expect(asset.inputMetadata.sizeBytes).toBe(5_000);
|
|
83
|
+
expect(asset.outputMetadata.format).toBe("html5");
|
|
84
|
+
expect(asset.outputMetadata.sizeBytes).toBe(12_000);
|
|
85
|
+
expect(assets).toHaveLength(1);
|
|
86
|
+
|
|
87
|
+
// Should have progress + ready notifications
|
|
88
|
+
const progressNotifs = notifications.filter(
|
|
89
|
+
(n) => n.type === "pandoc:progress",
|
|
90
|
+
);
|
|
91
|
+
const readyNotifs = notifications.filter(
|
|
92
|
+
(n) => n.type === "pandoc:ready",
|
|
93
|
+
);
|
|
94
|
+
expect(progressNotifs.length).toBeGreaterThan(0);
|
|
95
|
+
expect(readyNotifs).toHaveLength(1);
|
|
96
|
+
|
|
97
|
+
// Final progress must be 100%
|
|
98
|
+
const last100 = progressNotifs.filter(
|
|
99
|
+
(n) => n.type === "pandoc:progress" && n.percent === 100,
|
|
100
|
+
);
|
|
101
|
+
expect(last100.length).toBeGreaterThanOrEqual(1);
|
|
102
|
+
|
|
103
|
+
// Ready notification has correct shape
|
|
104
|
+
const ready = readyNotifs[0];
|
|
105
|
+
expect(ready.type === "pandoc:ready" && ready.preset).toBe("blog-post");
|
|
106
|
+
expect(ready.type === "pandoc:ready" && ready.sizeBytes).toBe(12_000);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("newsletter → fallback to blog-post on assertion fail + warning emitted", async () => {
|
|
110
|
+
let assertCallCount = 0;
|
|
111
|
+
const { ctx, notifications } = makeCtx({
|
|
112
|
+
assertOutput: vi.fn(async () => {
|
|
113
|
+
assertCallCount++;
|
|
114
|
+
// First call (newsletter) → fails assertion
|
|
115
|
+
// Second call (blog-post fallback) → passes
|
|
116
|
+
return assertCallCount === 1 ? BAD_ASSERTION : GOOD_ASSERTION;
|
|
117
|
+
}),
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const asset = await convertDocument(
|
|
121
|
+
{
|
|
122
|
+
...VALID_REQ,
|
|
123
|
+
preset: "newsletter",
|
|
124
|
+
},
|
|
125
|
+
ctx,
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
expect(asset).toBeDefined();
|
|
129
|
+
// Should have fallen back to blog-post
|
|
130
|
+
expect(asset.preset).toBe("blog-post");
|
|
131
|
+
|
|
132
|
+
// Should have a warning notification about fallback
|
|
133
|
+
const warningNotifs = notifications.filter(
|
|
134
|
+
(n) => n.type === "pandoc:warning",
|
|
135
|
+
);
|
|
136
|
+
expect(warningNotifs.length).toBeGreaterThanOrEqual(1);
|
|
137
|
+
expect(
|
|
138
|
+
warningNotifs.some(
|
|
139
|
+
(n) =>
|
|
140
|
+
n.type === "pandoc:warning" &&
|
|
141
|
+
n.warnings.some((w) => w.includes("falling back")),
|
|
142
|
+
),
|
|
143
|
+
).toBe(true);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it("AbortSignal cancels immediately", async () => {
|
|
147
|
+
const ac = new AbortController();
|
|
148
|
+
ac.abort(); // pre-abort
|
|
149
|
+
|
|
150
|
+
const { ctx } = makeCtx({ signal: ac.signal });
|
|
151
|
+
|
|
152
|
+
await expect(convertDocument(VALID_REQ, ctx)).rejects.toThrow("Aborted");
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("maxOutputBytes preflight rejects when estimate exceeds limit", async () => {
|
|
156
|
+
const { ctx } = makeCtx();
|
|
157
|
+
|
|
158
|
+
// blog-post → html5 → factor 1.8 → 5000 * 1.8 = 9000
|
|
159
|
+
// Set maxOutputBytes to 100 (way below 9000)
|
|
160
|
+
await expect(
|
|
161
|
+
convertDocument(
|
|
162
|
+
{
|
|
163
|
+
...VALID_REQ,
|
|
164
|
+
maxOutputBytes: 100,
|
|
165
|
+
},
|
|
166
|
+
ctx,
|
|
167
|
+
),
|
|
168
|
+
).rejects.toThrow("exceeds maxOutputBytes");
|
|
169
|
+
});
|
|
170
|
+
});
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import { stat } from "node:fs/promises";
|
|
2
|
+
import { extname } from "node:path";
|
|
3
|
+
import type { PandocPreset } from "./schemas.js";
|
|
4
|
+
import type { PandocPresetSpec } from "./preset-spec.js";
|
|
5
|
+
|
|
6
|
+
// Re-export validateSandboxPath from root for convenience
|
|
7
|
+
export { validateSandboxPath } from "../preflight.js";
|
|
8
|
+
|
|
9
|
+
// ── Input validation ──────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
/** Max input file size: 50 MB (documents shouldn't be bigger) */
|
|
12
|
+
const MAX_INPUT_BYTES = 50 * 1024 * 1024;
|
|
13
|
+
|
|
14
|
+
/** Allowed input extensions per format */
|
|
15
|
+
const ALLOWED_EXTENSIONS: Record<string, string[]> = {
|
|
16
|
+
markdown: [".md", ".markdown", ".mkd", ".txt"],
|
|
17
|
+
docx: [".docx"],
|
|
18
|
+
latex: [".tex", ".latex"],
|
|
19
|
+
html: [".html", ".htm"],
|
|
20
|
+
rst: [".rst"],
|
|
21
|
+
org: [".org"],
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export interface PandocInputCheck {
|
|
25
|
+
ok: boolean;
|
|
26
|
+
warnings: string[];
|
|
27
|
+
sizeBytes: number;
|
|
28
|
+
detectedFormat: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Validate a Pandoc input file: exists, not too large, recognized extension.
|
|
33
|
+
*/
|
|
34
|
+
export async function checkPandocInput(
|
|
35
|
+
filePath: string,
|
|
36
|
+
): Promise<PandocInputCheck> {
|
|
37
|
+
const warnings: string[] = [];
|
|
38
|
+
|
|
39
|
+
// Check file exists and get size
|
|
40
|
+
let sizeBytes: number;
|
|
41
|
+
try {
|
|
42
|
+
const s = await stat(filePath);
|
|
43
|
+
sizeBytes = s.size;
|
|
44
|
+
} catch {
|
|
45
|
+
return {
|
|
46
|
+
ok: false,
|
|
47
|
+
warnings: [`Input file not found: "${filePath}".`],
|
|
48
|
+
sizeBytes: 0,
|
|
49
|
+
detectedFormat: "unknown",
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (sizeBytes > MAX_INPUT_BYTES) {
|
|
54
|
+
return {
|
|
55
|
+
ok: false,
|
|
56
|
+
warnings: [
|
|
57
|
+
`Input file is ${(sizeBytes / 1024 / 1024).toFixed(1)} MB, ` +
|
|
58
|
+
`exceeds maximum ${MAX_INPUT_BYTES / 1024 / 1024} MB.`,
|
|
59
|
+
],
|
|
60
|
+
sizeBytes,
|
|
61
|
+
detectedFormat: "unknown",
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (sizeBytes === 0) {
|
|
66
|
+
warnings.push("Input file is empty.");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Detect format from extension
|
|
70
|
+
const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
|
|
71
|
+
let detectedFormat = "markdown"; // default
|
|
72
|
+
|
|
73
|
+
for (const [format, exts] of Object.entries(ALLOWED_EXTENSIONS)) {
|
|
74
|
+
if (exts.includes(ext)) {
|
|
75
|
+
detectedFormat = format;
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (!Object.values(ALLOWED_EXTENSIONS).flat().includes(ext)) {
|
|
81
|
+
warnings.push(
|
|
82
|
+
`Unrecognized extension "${ext}"; assuming markdown. ` +
|
|
83
|
+
`Supported: ${Object.values(ALLOWED_EXTENSIONS).flat().join(", ")}.`,
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { ok: true, warnings, sizeBytes, detectedFormat };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// ── Output size estimation ────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
/** Rough expansion multipliers per output format */
|
|
93
|
+
const EXPANSION_FACTORS: Record<string, number> = {
|
|
94
|
+
html5: 1.8, // HTML + embedded resources
|
|
95
|
+
pdf: 0.8, // PDFs are often smaller than source
|
|
96
|
+
epub: 1.2, // EPUB with compression
|
|
97
|
+
revealjs: 2.5, // Slides embed reveal.js
|
|
98
|
+
html: 1.5, // Plain HTML
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Estimate output file size in bytes based on input size and preset.
|
|
103
|
+
*/
|
|
104
|
+
export function estimatePandocOutputBytes(
|
|
105
|
+
inputSizeBytes: number,
|
|
106
|
+
preset: PandocPreset,
|
|
107
|
+
): number {
|
|
108
|
+
// Map preset to output format for factor lookup
|
|
109
|
+
const formatMap: Record<PandocPreset, string> = {
|
|
110
|
+
"blog-post": "html5",
|
|
111
|
+
"academic-pdf": "pdf",
|
|
112
|
+
"ebook": "epub",
|
|
113
|
+
"slides": "revealjs",
|
|
114
|
+
"newsletter": "html",
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
const factor = EXPANSION_FACTORS[formatMap[preset]] ?? 1.5;
|
|
118
|
+
return Math.ceil(inputSizeBytes * factor);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ── Output assertions ────────────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
export interface PandocAssertionResult {
|
|
124
|
+
ok: boolean;
|
|
125
|
+
warnings: string[];
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Assert that the pandoc output file matches the preset spec.
|
|
130
|
+
*
|
|
131
|
+
* Checks:
|
|
132
|
+
* 1. Output file exists
|
|
133
|
+
* 2. Output file is non-empty
|
|
134
|
+
* 3. Output extension matches spec
|
|
135
|
+
* 4. Output size is within maxOutputBytes (if set)
|
|
136
|
+
*/
|
|
137
|
+
export async function assertPandocOutput(
|
|
138
|
+
spec: PandocPresetSpec,
|
|
139
|
+
outputPath: string,
|
|
140
|
+
maxOutputBytes: number,
|
|
141
|
+
): Promise<PandocAssertionResult> {
|
|
142
|
+
const warnings: string[] = [];
|
|
143
|
+
|
|
144
|
+
// Check file exists and get size
|
|
145
|
+
let sizeBytes: number;
|
|
146
|
+
try {
|
|
147
|
+
const s = await stat(outputPath);
|
|
148
|
+
sizeBytes = s.size;
|
|
149
|
+
} catch {
|
|
150
|
+
return {
|
|
151
|
+
ok: false,
|
|
152
|
+
warnings: [`Output file not found: "${outputPath}".`],
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Check non-empty
|
|
157
|
+
if (sizeBytes === 0) {
|
|
158
|
+
return {
|
|
159
|
+
ok: false,
|
|
160
|
+
warnings: ["Output file is empty — pandoc produced no content."],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Check extension matches spec
|
|
165
|
+
const actualExt = extname(outputPath).slice(1).toLowerCase();
|
|
166
|
+
if (actualExt && actualExt !== spec.outputExt) {
|
|
167
|
+
warnings.push(
|
|
168
|
+
`Output extension ".${actualExt}" does not match expected ".${spec.outputExt}".`,
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Check maxOutputBytes (0 = unlimited)
|
|
173
|
+
if (maxOutputBytes > 0 && sizeBytes > maxOutputBytes) {
|
|
174
|
+
return {
|
|
175
|
+
ok: false,
|
|
176
|
+
warnings: [
|
|
177
|
+
`Output is ${(sizeBytes / 1024 / 1024).toFixed(1)} MB, ` +
|
|
178
|
+
`exceeds maximum ${(maxOutputBytes / 1024 / 1024).toFixed(1)} MB.`,
|
|
179
|
+
],
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return { ok: true, warnings };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ── Input format validation (pure) ───────────────────────────────
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Validate that detected input format is compatible with the preset's
|
|
190
|
+
* expected --from format. Returns a warning if there's a mismatch.
|
|
191
|
+
*/
|
|
192
|
+
export function checkFormatCompatibility(
|
|
193
|
+
detectedFormat: string,
|
|
194
|
+
presetFrom: string,
|
|
195
|
+
): { ok: boolean; warning?: string } {
|
|
196
|
+
// Pandoc is flexible — markdown presets accept most text formats
|
|
197
|
+
if (presetFrom === "markdown") return { ok: true };
|
|
198
|
+
|
|
199
|
+
if (detectedFormat !== presetFrom) {
|
|
200
|
+
return {
|
|
201
|
+
ok: true,
|
|
202
|
+
warning:
|
|
203
|
+
`Detected format "${detectedFormat}" differs from preset's ` +
|
|
204
|
+
`expected "${presetFrom}"; pandoc will attempt conversion anyway.`,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return { ok: true };
|
|
209
|
+
}
|