@abreen/tada 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +290 -0
- package/bin/tada.js +361 -0
- package/config/authors.json +1 -0
- package/config/nav.json +28 -0
- package/content/index.md +19 -0
- package/content/lectures/01/Pair.java.md +296 -0
- package/content/lectures/01/Rectangle.java +80 -0
- package/content/lectures/01/demo.py +9 -0
- package/content/lectures/01/index.md +39 -0
- package/content/lectures/01/lecture1.pdf +0 -0
- package/content/lectures/index.md +25 -0
- package/content/markdown.md +379 -0
- package/content/problem_sets/index.md +6 -0
- package/fonts/google-sans-code/GoogleSansCodeVariable-Italic.ttf +0 -0
- package/fonts/google-sans-code/GoogleSansCodeVariable.ttf +0 -0
- package/fonts/google-sans-code/LICENSE.txt +93 -0
- package/fonts/inter/InterVariable-Italic.ttf +0 -0
- package/fonts/inter/InterVariable.ttf +0 -0
- package/fonts/inter/LICENSE.txt +92 -0
- package/package.json +70 -0
- package/public/avatars/alex.jpg +0 -0
- package/public/test.txt +1 -0
- package/src/_mixins.scss +4 -0
- package/src/anchor/README.md +6 -0
- package/src/anchor/index.ts +34 -0
- package/src/anchor/style.scss +48 -0
- package/src/code/README.md +5 -0
- package/src/code/index.ts +113 -0
- package/src/code/style.scss +101 -0
- package/src/code.scss +54 -0
- package/src/header/README.md +8 -0
- package/src/header/index.ts +43 -0
- package/src/header/style.scss +228 -0
- package/src/index.ts +73 -0
- package/src/layout.scss +144 -0
- package/src/literate/style.scss +60 -0
- package/src/print/README.md +4 -0
- package/src/print/index.ts +32 -0
- package/src/print/style.scss +82 -0
- package/src/question/README.md +3 -0
- package/src/question/index.ts +25 -0
- package/src/question/style.scss +116 -0
- package/src/search/README.md +6 -0
- package/src/search/index.ts +574 -0
- package/src/search/style.scss +217 -0
- package/src/style.scss +815 -0
- package/src/timezone/index.test.ts +100 -0
- package/src/timezone/index.ts +298 -0
- package/src/timezone/style.scss +16 -0
- package/src/timezone/timezones.json +58 -0
- package/src/toc/README.md +3 -0
- package/src/toc/index.ts +322 -0
- package/src/toc/style.scss +203 -0
- package/src/top/README.md +4 -0
- package/src/top/index.ts +75 -0
- package/src/util.ts +122 -0
- package/templates/_author.html +27 -0
- package/templates/_bottom.html +3 -0
- package/templates/_download.html +1 -0
- package/templates/_heading.html +19 -0
- package/templates/_nav.html +18 -0
- package/templates/_theme.scss +97 -0
- package/templates/_top.html +87 -0
- package/templates/authors.schema.json +13 -0
- package/templates/code.html +31 -0
- package/templates/default.html +13 -0
- package/templates/literate.html +16 -0
- package/templates/nav.schema.json +27 -0
- package/tsconfig.json +15 -0
- package/types/dev.ts +3 -0
- package/types/sass.d.ts +1 -0
- package/types/site-variables.d.ts +16 -0
- package/webpack/apply-base-path-plugin.js +78 -0
- package/webpack/build-state.js +97 -0
- package/webpack/code.test.js +162 -0
- package/webpack/colors.js +15 -0
- package/webpack/config.base.js +147 -0
- package/webpack/config.dev.js +23 -0
- package/webpack/config.prod.js +32 -0
- package/webpack/content-watch-plugin.js +153 -0
- package/webpack/deflist-id-plugin.js +62 -0
- package/webpack/external-links-plugin.js +37 -0
- package/webpack/features.js +5 -0
- package/webpack/flair.json +1 -0
- package/webpack/generate-content-assets-plugin.js +308 -0
- package/webpack/generate-favicon-plugin.js +198 -0
- package/webpack/generate-fonts-plugin.js +69 -0
- package/webpack/generate-manifest-plugin.js +116 -0
- package/webpack/globals.js +74 -0
- package/webpack/heading-subtitle-plugin.js +80 -0
- package/webpack/json-schema.js +19 -0
- package/webpack/log.js +143 -0
- package/webpack/markdown-plugins.test.js +203 -0
- package/webpack/pagefind-plugin.js +379 -0
- package/webpack/pagefind-plugin.test.js +131 -0
- package/webpack/pdf-text.js +163 -0
- package/webpack/print-flair-plugin.js +22 -0
- package/webpack/reachability.js +273 -0
- package/webpack/reachability.test.js +80 -0
- package/webpack/serve.js +104 -0
- package/webpack/site-variables.js +53 -0
- package/webpack/site.schema.json +67 -0
- package/webpack/templates.js +128 -0
- package/webpack/text-to-id.js +8 -0
- package/webpack/toc-plugin.js +167 -0
- package/webpack/util.js +49 -0
- package/webpack/utils/code.js +439 -0
- package/webpack/utils/content-files.js +147 -0
- package/webpack/utils/define-plugin.js +20 -0
- package/webpack/utils/file-types.js +26 -0
- package/webpack/utils/front-matter.js +57 -0
- package/webpack/utils/jdi-runner/LiterateRunner.class +0 -0
- package/webpack/utils/jdi-runner/LiterateRunner.java +241 -0
- package/webpack/utils/literate-java.js +153 -0
- package/webpack/utils/markdown.js +244 -0
- package/webpack/utils/parse-hsl.js +8 -0
- package/webpack/utils/paths.js +58 -0
- package/webpack/utils/render.js +466 -0
- package/webpack/utils/shiki-highlighter.js +26 -0
- package/webpack/validate-internal-links-plugin.js +155 -0
- package/webpack/watch-reachability-state.js +273 -0
- package/webpack/watch-reachability-state.test.js +198 -0
- package/webpack/watch-reload-client.js +54 -0
- package/webpack/watch.js +166 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const { makeLogger } = require('./log');
|
|
4
|
+
const { collectReachableSiteAssets } = require('./reachability');
|
|
5
|
+
const ContentWatchPlugin = require('./content-watch-plugin');
|
|
6
|
+
const {
|
|
7
|
+
getBuildContentFiles,
|
|
8
|
+
getContentDir,
|
|
9
|
+
normalizeOutputPath,
|
|
10
|
+
} = require('./util');
|
|
11
|
+
const { assertMutoolAvailable, extractPdfPages } = require('./pdf-text');
|
|
12
|
+
|
|
13
|
+
const log = makeLogger(__filename);
|
|
14
|
+
const PAGEFIND_VERBOSE = process.env.TADA_LOG_LEVEL === 'debug';
|
|
15
|
+
const PAGEFIND_OUTPUT_SUBDIR = 'pagefind';
|
|
16
|
+
|
|
17
|
+
let pagefindModulePromise = null;
|
|
18
|
+
|
|
19
|
+
function getPagefind() {
|
|
20
|
+
if (!pagefindModulePromise) {
|
|
21
|
+
pagefindModulePromise = import('pagefind');
|
|
22
|
+
}
|
|
23
|
+
return pagefindModulePromise;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function formatPagefindErrors(step, errors) {
|
|
27
|
+
if (!errors?.length) {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
return `${step} failed: ${errors.join(' | ')}`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function readAssetContent(outputFileSystem, distPath, sourcePath) {
|
|
34
|
+
const filePath = path.join(distPath, sourcePath);
|
|
35
|
+
return String(outputFileSystem.readFileSync(filePath, 'utf-8'));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async function addHtmlFile(index, htmlFile) {
|
|
39
|
+
const { errors: addErrors } = await index.addHTMLFile(htmlFile);
|
|
40
|
+
const addError = formatPagefindErrors(
|
|
41
|
+
`index.addHTMLFile(${htmlFile.sourcePath})`,
|
|
42
|
+
addErrors,
|
|
43
|
+
);
|
|
44
|
+
if (addError) {
|
|
45
|
+
throw new Error(addError);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function addPdfRecord(index, record, sourcePath) {
|
|
50
|
+
const { errors: addErrors } = await index.addCustomRecord(record);
|
|
51
|
+
const addError = formatPagefindErrors(
|
|
52
|
+
`index.addCustomRecord(${sourcePath})`,
|
|
53
|
+
addErrors,
|
|
54
|
+
);
|
|
55
|
+
if (addError) {
|
|
56
|
+
throw new Error(addError);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function getPdfSourceByOutputPath(siteVariables) {
|
|
61
|
+
const contentDir = getContentDir();
|
|
62
|
+
const contentFiles = getBuildContentFiles(
|
|
63
|
+
contentDir,
|
|
64
|
+
Object.keys(siteVariables?.codeLanguages || {}),
|
|
65
|
+
);
|
|
66
|
+
const pdfFiles = contentFiles.filter(
|
|
67
|
+
filePath => path.extname(filePath).toLowerCase() === '.pdf',
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
return new Map(
|
|
71
|
+
pdfFiles.map(filePath => {
|
|
72
|
+
const relPath = path.relative(contentDir, filePath);
|
|
73
|
+
return [normalizeOutputPath(`/${relPath}`), filePath];
|
|
74
|
+
}),
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function collectIndexTargets(
|
|
79
|
+
htmlAssetsByPath,
|
|
80
|
+
siteVariables,
|
|
81
|
+
pdfSourceByOutputPath,
|
|
82
|
+
) {
|
|
83
|
+
if (htmlAssetsByPath.size === 0) {
|
|
84
|
+
return { reachableHtmlPaths: [], reachablePdfPaths: [] };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return collectReachableSiteAssets({
|
|
88
|
+
htmlAssetsByPath,
|
|
89
|
+
knownPdfPaths: new Set(pdfSourceByOutputPath.keys()),
|
|
90
|
+
rootPath: 'index.html',
|
|
91
|
+
basePath: siteVariables?.basePath || '/',
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function buildIndex({
|
|
96
|
+
distPath,
|
|
97
|
+
htmlAssetsByPath,
|
|
98
|
+
reachableHtmlPaths,
|
|
99
|
+
reachablePdfPaths,
|
|
100
|
+
pdfSourceByOutputPath,
|
|
101
|
+
loadPagefind = getPagefind,
|
|
102
|
+
checkMutool = assertMutoolAvailable,
|
|
103
|
+
extractPages = extractPdfPages,
|
|
104
|
+
}) {
|
|
105
|
+
const pagefind = await loadPagefind();
|
|
106
|
+
const { index, errors: createErrors } = await pagefind.createIndex({
|
|
107
|
+
keepIndexUrl: true,
|
|
108
|
+
verbose: PAGEFIND_VERBOSE,
|
|
109
|
+
});
|
|
110
|
+
const createError = formatPagefindErrors(
|
|
111
|
+
'pagefind.createIndex()',
|
|
112
|
+
createErrors,
|
|
113
|
+
);
|
|
114
|
+
if (createError) {
|
|
115
|
+
throw new Error(createError);
|
|
116
|
+
}
|
|
117
|
+
if (!index) {
|
|
118
|
+
throw new Error('pagefind.createIndex() did not return an index');
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
for (const sourcePath of reachableHtmlPaths) {
|
|
123
|
+
await addHtmlFile(index, {
|
|
124
|
+
sourcePath,
|
|
125
|
+
content: htmlAssetsByPath.get(sourcePath),
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let mutoolAvailable = true;
|
|
130
|
+
if (reachablePdfPaths.length > 0) {
|
|
131
|
+
try {
|
|
132
|
+
await checkMutool();
|
|
133
|
+
} catch {
|
|
134
|
+
mutoolAvailable = false;
|
|
135
|
+
log.warn`mutool was not found; search results will not include PDFs`;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
for (const pdfPath of mutoolAvailable ? reachablePdfPaths : []) {
|
|
140
|
+
const sourceFilePath = pdfSourceByOutputPath.get(pdfPath);
|
|
141
|
+
if (!sourceFilePath) {
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const { pages, hasExtractedText } = await extractPages(sourceFilePath);
|
|
146
|
+
const title = path.posix.basename(pdfPath);
|
|
147
|
+
|
|
148
|
+
if (!hasExtractedText) {
|
|
149
|
+
await addPdfRecord(
|
|
150
|
+
index,
|
|
151
|
+
{ url: pdfPath, content: title, language: 'en', meta: { title } },
|
|
152
|
+
pdfPath,
|
|
153
|
+
);
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
for (const page of pages) {
|
|
158
|
+
await addPdfRecord(
|
|
159
|
+
index,
|
|
160
|
+
{
|
|
161
|
+
url: `${pdfPath}#page=${page.pageNumber}`,
|
|
162
|
+
content: page.content,
|
|
163
|
+
language: 'en',
|
|
164
|
+
meta: { title, page: String(page.pageNumber) },
|
|
165
|
+
},
|
|
166
|
+
`${pdfPath}#page=${page.pageNumber}`,
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const { errors: writeErrors } = await index.writeFiles({
|
|
172
|
+
outputPath: path.join(distPath, PAGEFIND_OUTPUT_SUBDIR),
|
|
173
|
+
});
|
|
174
|
+
const writeError = formatPagefindErrors('index.writeFiles()', writeErrors);
|
|
175
|
+
if (writeError) {
|
|
176
|
+
throw new Error(writeError);
|
|
177
|
+
}
|
|
178
|
+
} finally {
|
|
179
|
+
await index.deleteIndex().catch(() => null);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
class PagefindPlugin {
|
|
184
|
+
constructor(siteVariables) {
|
|
185
|
+
this.siteVariables = siteVariables || {};
|
|
186
|
+
this.watchRunInProgress = false;
|
|
187
|
+
this.watchRunQueued = false;
|
|
188
|
+
this.lastDistPath = null;
|
|
189
|
+
this.htmlCacheByAssetPath = new Map();
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
getHtmlAssetsByPath(compilation, distPath, outputFileSystem) {
|
|
193
|
+
return new Map(
|
|
194
|
+
compilation
|
|
195
|
+
.getAssets()
|
|
196
|
+
.filter(asset => asset.name.endsWith('.html'))
|
|
197
|
+
.map(asset => [
|
|
198
|
+
asset.name.replace(/\\/g, '/'),
|
|
199
|
+
readAssetContent(
|
|
200
|
+
outputFileSystem,
|
|
201
|
+
distPath,
|
|
202
|
+
asset.name.replace(/\\/g, '/'),
|
|
203
|
+
),
|
|
204
|
+
]),
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
getIndexTargets(htmlAssetsByPath) {
|
|
209
|
+
return collectIndexTargets(
|
|
210
|
+
htmlAssetsByPath,
|
|
211
|
+
this.siteVariables,
|
|
212
|
+
getPdfSourceByOutputPath(this.siteVariables),
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
runWatchIndex() {
|
|
217
|
+
if (this.watchRunInProgress) {
|
|
218
|
+
this.watchRunQueued = true;
|
|
219
|
+
log.info`Indexing is still running in the background; queueing a rerun`;
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
this.watchRunInProgress = true;
|
|
224
|
+
this.watchRunQueued = false;
|
|
225
|
+
const distPath = this.lastDistPath;
|
|
226
|
+
const htmlAssetsByPath = new Map(this.htmlCacheByAssetPath);
|
|
227
|
+
const pdfSourceByOutputPath = getPdfSourceByOutputPath(this.siteVariables);
|
|
228
|
+
const start = Date.now();
|
|
229
|
+
|
|
230
|
+
log.debug`Preparing search index background snapshot`;
|
|
231
|
+
|
|
232
|
+
let reachableHtmlPaths;
|
|
233
|
+
let reachablePdfPaths;
|
|
234
|
+
try {
|
|
235
|
+
({ reachableHtmlPaths, reachablePdfPaths } = collectIndexTargets(
|
|
236
|
+
htmlAssetsByPath,
|
|
237
|
+
this.siteVariables,
|
|
238
|
+
pdfSourceByOutputPath,
|
|
239
|
+
));
|
|
240
|
+
} catch (err) {
|
|
241
|
+
this.watchRunInProgress = false;
|
|
242
|
+
log.warn`Pagefind failed: ${err.message}`;
|
|
243
|
+
if (this.watchRunQueued) {
|
|
244
|
+
this.runWatchIndex();
|
|
245
|
+
}
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const snapshotReadyAt = Date.now();
|
|
250
|
+
log.debug`Building search index in background`;
|
|
251
|
+
buildIndex({
|
|
252
|
+
distPath,
|
|
253
|
+
htmlAssetsByPath,
|
|
254
|
+
reachableHtmlPaths,
|
|
255
|
+
reachablePdfPaths,
|
|
256
|
+
pdfSourceByOutputPath,
|
|
257
|
+
})
|
|
258
|
+
.then(() => {
|
|
259
|
+
const finishedAt = Date.now();
|
|
260
|
+
log.info`Background search index ready in ${finishedAt - snapshotReadyAt}ms (${finishedAt - start}ms total)`;
|
|
261
|
+
})
|
|
262
|
+
.catch(err => {
|
|
263
|
+
const failedAt = Date.now();
|
|
264
|
+
log.warn`Search index failed after ${failedAt - snapshotReadyAt}ms of indexing (${failedAt - start}ms total): ${err.message}`;
|
|
265
|
+
})
|
|
266
|
+
.finally(() => {
|
|
267
|
+
this.watchRunInProgress = false;
|
|
268
|
+
if (this.watchRunQueued) {
|
|
269
|
+
log.info`Starting queued Pagefind background rerun`;
|
|
270
|
+
this.runWatchIndex();
|
|
271
|
+
}
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
apply(compiler) {
|
|
276
|
+
compiler.hooks.afterEmit.tapAsync(
|
|
277
|
+
'PagefindPlugin',
|
|
278
|
+
(compilation, callback) => {
|
|
279
|
+
const distPath =
|
|
280
|
+
compiler.options?.output?.path ||
|
|
281
|
+
compiler.outputPath ||
|
|
282
|
+
compilation.compiler.outputPath;
|
|
283
|
+
const outputFileSystem =
|
|
284
|
+
compiler.outputFileSystem ||
|
|
285
|
+
compilation.compiler.outputFileSystem ||
|
|
286
|
+
fs;
|
|
287
|
+
const isWatch = !!compiler.watching;
|
|
288
|
+
|
|
289
|
+
if (compilation.errors.length > 0) {
|
|
290
|
+
callback();
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
let htmlAssetsByPath;
|
|
295
|
+
try {
|
|
296
|
+
htmlAssetsByPath = this.getHtmlAssetsByPath(
|
|
297
|
+
compilation,
|
|
298
|
+
distPath,
|
|
299
|
+
outputFileSystem,
|
|
300
|
+
);
|
|
301
|
+
} catch (err) {
|
|
302
|
+
compilation.errors.push(err);
|
|
303
|
+
callback(err);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (isWatch) {
|
|
308
|
+
this.lastDistPath = distPath;
|
|
309
|
+
this.htmlCacheByAssetPath = htmlAssetsByPath;
|
|
310
|
+
callback();
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
const pdfSourceByOutputPath = getPdfSourceByOutputPath(
|
|
315
|
+
this.siteVariables,
|
|
316
|
+
);
|
|
317
|
+
const start = Date.now();
|
|
318
|
+
let reachableHtmlPaths;
|
|
319
|
+
let reachablePdfPaths;
|
|
320
|
+
|
|
321
|
+
log.info`Finding reachable pages for search index`;
|
|
322
|
+
try {
|
|
323
|
+
({ reachableHtmlPaths, reachablePdfPaths } = collectIndexTargets(
|
|
324
|
+
htmlAssetsByPath,
|
|
325
|
+
this.siteVariables,
|
|
326
|
+
pdfSourceByOutputPath,
|
|
327
|
+
));
|
|
328
|
+
} catch (err) {
|
|
329
|
+
compilation.errors.push(err);
|
|
330
|
+
callback(err);
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const snapshotReadyAt = Date.now();
|
|
335
|
+
log.info`Building search index for ${reachableHtmlPaths.length} page(s) and ${reachablePdfPaths.length} PDF(s) after ${snapshotReadyAt - start}ms of snapshot prep`;
|
|
336
|
+
buildIndex({
|
|
337
|
+
distPath,
|
|
338
|
+
htmlAssetsByPath,
|
|
339
|
+
reachableHtmlPaths,
|
|
340
|
+
reachablePdfPaths,
|
|
341
|
+
pdfSourceByOutputPath,
|
|
342
|
+
})
|
|
343
|
+
.then(async () => {
|
|
344
|
+
try {
|
|
345
|
+
const pagefind = await getPagefind();
|
|
346
|
+
await pagefind.close();
|
|
347
|
+
} catch (_err) {
|
|
348
|
+
// Best-effort cleanup for non-watch builds.
|
|
349
|
+
}
|
|
350
|
+
const finishedAt = Date.now();
|
|
351
|
+
log.info`Search index built in ${finishedAt - snapshotReadyAt}ms (${finishedAt - start}ms total)`;
|
|
352
|
+
callback();
|
|
353
|
+
})
|
|
354
|
+
.catch(err => {
|
|
355
|
+
const failedAt = Date.now();
|
|
356
|
+
log.error`Search indexing failed after ${failedAt - snapshotReadyAt}ms of indexing (${failedAt - start}ms total): ${err.message}`;
|
|
357
|
+
compilation.errors.push(err);
|
|
358
|
+
callback(err);
|
|
359
|
+
});
|
|
360
|
+
},
|
|
361
|
+
);
|
|
362
|
+
|
|
363
|
+
compiler.hooks.done.tap('PagefindPluginWatchRun', stats => {
|
|
364
|
+
if (
|
|
365
|
+
!compiler.watching ||
|
|
366
|
+
stats.hasErrors() ||
|
|
367
|
+
ContentWatchPlugin.needsRestart()
|
|
368
|
+
) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
setImmediate(() => this.runWatchIndex());
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
module.exports = PagefindPlugin;
|
|
378
|
+
module.exports.buildIndex = buildIndex;
|
|
379
|
+
module.exports.collectIndexTargets = collectIndexTargets;
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const { describe, expect, test } = require('bun:test');
|
|
3
|
+
const { buildIndex, collectIndexTargets } = require('./pagefind-plugin');
|
|
4
|
+
|
|
5
|
+
function createFakePagefind(calls) {
|
|
6
|
+
const fakeIndex = {
|
|
7
|
+
addHTMLFile: async file => {
|
|
8
|
+
calls.htmlFiles?.push(file);
|
|
9
|
+
return { errors: [], file: { url: file.sourcePath, meta: {} } };
|
|
10
|
+
},
|
|
11
|
+
addCustomRecord: async record => {
|
|
12
|
+
calls.customRecords?.push(record);
|
|
13
|
+
return { errors: [], file: { url: record.url, meta: record.meta } };
|
|
14
|
+
},
|
|
15
|
+
writeFiles: async ({ outputPath }) => {
|
|
16
|
+
calls.outputPath = outputPath;
|
|
17
|
+
return { errors: [], outputPath };
|
|
18
|
+
},
|
|
19
|
+
deleteIndex: async () => {
|
|
20
|
+
calls.deleted = (calls.deleted || 0) + 1;
|
|
21
|
+
},
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
return async () => ({
|
|
25
|
+
createIndex: async () => ({ index: fakeIndex, errors: [] }),
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
describe('PagefindPlugin', () => {
|
|
30
|
+
test('collectIndexTargets only includes linked PDFs from reachable HTML pages', () => {
|
|
31
|
+
const htmlAssetsByPath = new Map([
|
|
32
|
+
[
|
|
33
|
+
'index.html',
|
|
34
|
+
'<a href="/about/">About</a><a href="/docs/guide.pdf">Guide</a>',
|
|
35
|
+
],
|
|
36
|
+
['about/index.html', '<p>About</p>'],
|
|
37
|
+
['orphan/index.html', '<p>Orphan</p>'],
|
|
38
|
+
]);
|
|
39
|
+
const pdfSourceByOutputPath = new Map([
|
|
40
|
+
['/docs/guide.pdf', '/tmp/docs/guide.pdf'],
|
|
41
|
+
['/docs/orphan.pdf', '/tmp/docs/orphan.pdf'],
|
|
42
|
+
]);
|
|
43
|
+
|
|
44
|
+
const result = collectIndexTargets(
|
|
45
|
+
htmlAssetsByPath,
|
|
46
|
+
{ basePath: '/' },
|
|
47
|
+
pdfSourceByOutputPath,
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
expect(result).toEqual({
|
|
51
|
+
reachableHtmlPaths: ['about/index.html', 'index.html'],
|
|
52
|
+
reachablePdfPaths: ['/docs/guide.pdf'],
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test('buildIndex adds HTML files and per-page PDF custom records', async () => {
|
|
57
|
+
const calls = { htmlFiles: [], customRecords: [], outputPath: null };
|
|
58
|
+
|
|
59
|
+
await buildIndex({
|
|
60
|
+
distPath: '/tmp/dist',
|
|
61
|
+
htmlAssetsByPath: new Map([
|
|
62
|
+
['index.html', '<html><body>Home</body></html>'],
|
|
63
|
+
['about/index.html', '<html><body>About</body></html>'],
|
|
64
|
+
]),
|
|
65
|
+
reachableHtmlPaths: ['index.html', 'about/index.html'],
|
|
66
|
+
reachablePdfPaths: ['/docs/guide.pdf'],
|
|
67
|
+
pdfSourceByOutputPath: new Map([
|
|
68
|
+
['/docs/guide.pdf', '/tmp/docs/guide.pdf'],
|
|
69
|
+
]),
|
|
70
|
+
loadPagefind: createFakePagefind(calls),
|
|
71
|
+
checkMutool: async () => {},
|
|
72
|
+
extractPages: async filePath => ({
|
|
73
|
+
pages: [
|
|
74
|
+
{ pageNumber: 2, content: `EXTRACTED:${path.basename(filePath)}:2` },
|
|
75
|
+
{ pageNumber: 5, content: `EXTRACTED:${path.basename(filePath)}:5` },
|
|
76
|
+
],
|
|
77
|
+
hasExtractedText: true,
|
|
78
|
+
}),
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
expect(calls.htmlFiles).toEqual([
|
|
82
|
+
{ sourcePath: 'index.html', content: '<html><body>Home</body></html>' },
|
|
83
|
+
{
|
|
84
|
+
sourcePath: 'about/index.html',
|
|
85
|
+
content: '<html><body>About</body></html>',
|
|
86
|
+
},
|
|
87
|
+
]);
|
|
88
|
+
expect(calls.customRecords).toEqual([
|
|
89
|
+
{
|
|
90
|
+
url: '/docs/guide.pdf#page=2',
|
|
91
|
+
content: 'EXTRACTED:guide.pdf:2',
|
|
92
|
+
language: 'en',
|
|
93
|
+
meta: { title: 'guide.pdf', page: '2' },
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
url: '/docs/guide.pdf#page=5',
|
|
97
|
+
content: 'EXTRACTED:guide.pdf:5',
|
|
98
|
+
language: 'en',
|
|
99
|
+
meta: { title: 'guide.pdf', page: '5' },
|
|
100
|
+
},
|
|
101
|
+
]);
|
|
102
|
+
expect(calls.outputPath).toBe('/tmp/dist/pagefind');
|
|
103
|
+
expect(calls.deleted).toBe(1);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test('buildIndex falls back to a single PDF record when text extraction is empty', async () => {
|
|
107
|
+
const calls = { customRecords: [] };
|
|
108
|
+
|
|
109
|
+
await buildIndex({
|
|
110
|
+
distPath: '/tmp/dist',
|
|
111
|
+
htmlAssetsByPath: new Map(),
|
|
112
|
+
reachableHtmlPaths: [],
|
|
113
|
+
reachablePdfPaths: ['/docs/guide.pdf'],
|
|
114
|
+
pdfSourceByOutputPath: new Map([
|
|
115
|
+
['/docs/guide.pdf', '/tmp/docs/guide.pdf'],
|
|
116
|
+
]),
|
|
117
|
+
loadPagefind: createFakePagefind(calls),
|
|
118
|
+
checkMutool: async () => {},
|
|
119
|
+
extractPages: async () => ({ pages: [], hasExtractedText: false }),
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
expect(calls.customRecords).toEqual([
|
|
123
|
+
{
|
|
124
|
+
url: '/docs/guide.pdf',
|
|
125
|
+
content: 'guide.pdf',
|
|
126
|
+
language: 'en',
|
|
127
|
+
meta: { title: 'guide.pdf' },
|
|
128
|
+
},
|
|
129
|
+
]);
|
|
130
|
+
});
|
|
131
|
+
});
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const os = require('os');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const { spawn } = require('child_process');
|
|
5
|
+
|
|
6
|
+
let mutoolAvailabilityPromise = null;
|
|
7
|
+
|
|
8
|
+
function formatMutoolCommand(args) {
|
|
9
|
+
return ['mutool', ...args].map(arg => JSON.stringify(arg)).join(' ');
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function runMutool(args) {
|
|
13
|
+
return new Promise((resolve, reject) => {
|
|
14
|
+
const child = spawn('mutool', args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
15
|
+
|
|
16
|
+
let settled = false;
|
|
17
|
+
let stdout = '';
|
|
18
|
+
let stderr = '';
|
|
19
|
+
|
|
20
|
+
child.stdout.setEncoding('utf8');
|
|
21
|
+
child.stdout.on('data', chunk => {
|
|
22
|
+
stdout += chunk;
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
child.stderr.setEncoding('utf8');
|
|
26
|
+
child.stderr.on('data', chunk => {
|
|
27
|
+
stderr += chunk;
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
child.on('error', err => {
|
|
31
|
+
if (settled) {
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
settled = true;
|
|
35
|
+
|
|
36
|
+
if (err && err.code === 'ENOENT') {
|
|
37
|
+
reject(
|
|
38
|
+
new Error(
|
|
39
|
+
'mutool is required for PDF text extraction but was not found on PATH',
|
|
40
|
+
),
|
|
41
|
+
);
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
reject(err);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
child.on('close', code => {
|
|
49
|
+
if (settled) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
settled = true;
|
|
53
|
+
|
|
54
|
+
if (code !== 0) {
|
|
55
|
+
const output = stderr.trim() || stdout.trim();
|
|
56
|
+
const suffix = output.length > 0 ? `: ${output}` : '';
|
|
57
|
+
reject(
|
|
58
|
+
new Error(
|
|
59
|
+
`${formatMutoolCommand(args)} failed (code ${code})${suffix}`,
|
|
60
|
+
),
|
|
61
|
+
);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
resolve({ stdout, stderr });
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function assertMutoolAvailable() {
|
|
71
|
+
if (!mutoolAvailabilityPromise) {
|
|
72
|
+
mutoolAvailabilityPromise = runMutool(['-v']).then(() => undefined);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return mutoolAvailabilityPromise;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function listNumberedPageFiles(dir, ext) {
|
|
79
|
+
const suffix = `.${ext}`;
|
|
80
|
+
const entries = await fs.promises.readdir(dir);
|
|
81
|
+
|
|
82
|
+
return entries
|
|
83
|
+
.filter(name => name.startsWith('page-') && name.endsWith(suffix))
|
|
84
|
+
.map(name => {
|
|
85
|
+
const pageNumText = name.slice(5, -suffix.length);
|
|
86
|
+
const pageNum = Number.parseInt(pageNumText, 10);
|
|
87
|
+
if (!Number.isInteger(pageNum)) {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return { fileName: name, filePath: path.join(dir, name), pageNum };
|
|
92
|
+
})
|
|
93
|
+
.filter(entry => entry !== null)
|
|
94
|
+
.sort((a, b) => a.pageNum - b.pageNum);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function normalizeExtractedText(text) {
|
|
98
|
+
return text.replace(/\s+/g, ' ').trim();
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function buildPdfPageRecords(pageTexts) {
|
|
102
|
+
const pages = pageTexts
|
|
103
|
+
.map((text, i) => {
|
|
104
|
+
const normalized = normalizeExtractedText(text);
|
|
105
|
+
if (!normalized) {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return { pageNumber: i + 1, content: normalized };
|
|
110
|
+
})
|
|
111
|
+
.filter(Boolean);
|
|
112
|
+
|
|
113
|
+
return { pages, hasExtractedText: pages.length > 0 };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async function extractPdfPages(pdfPath) {
|
|
117
|
+
await assertMutoolAvailable();
|
|
118
|
+
|
|
119
|
+
const tempDir = await fs.promises.mkdtemp(
|
|
120
|
+
path.join(os.tmpdir(), 'pdf-text-'),
|
|
121
|
+
);
|
|
122
|
+
const textPattern = path.join(tempDir, 'page-%04d.txt');
|
|
123
|
+
let processingError = null;
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
await runMutool(['draw', '-q', '-F', 'text', '-o', textPattern, pdfPath]);
|
|
127
|
+
|
|
128
|
+
const textFiles = await listNumberedPageFiles(tempDir, 'txt');
|
|
129
|
+
const pageTexts = await Promise.all(
|
|
130
|
+
textFiles.map(textFile =>
|
|
131
|
+
fs.promises.readFile(textFile.filePath, 'utf8'),
|
|
132
|
+
),
|
|
133
|
+
);
|
|
134
|
+
const result = buildPdfPageRecords(pageTexts);
|
|
135
|
+
|
|
136
|
+
if (!result.hasExtractedText) {
|
|
137
|
+
console.warn(
|
|
138
|
+
`mutool did not extract searchable text for ${pdfPath}; indexing filename only`,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return result;
|
|
143
|
+
} catch (err) {
|
|
144
|
+
processingError = err;
|
|
145
|
+
throw err;
|
|
146
|
+
} finally {
|
|
147
|
+
try {
|
|
148
|
+
await fs.promises.rm(tempDir, { recursive: true, force: true });
|
|
149
|
+
} catch (cleanupErr) {
|
|
150
|
+
if (!processingError) {
|
|
151
|
+
console.warn(
|
|
152
|
+
`Failed to clean up temporary PDF extraction directory ${tempDir}: ${cleanupErr.message}`,
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
module.exports = {
|
|
160
|
+
assertMutoolAvailable,
|
|
161
|
+
buildPdfPageRecords,
|
|
162
|
+
extractPdfPages,
|
|
163
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
const { getFlair } = require('./log');
|
|
2
|
+
const { getDistDir } = require('./util');
|
|
3
|
+
|
|
4
|
+
module.exports = {
|
|
5
|
+
apply: compiler => {
|
|
6
|
+
compiler.hooks.afterEmit.tap('AfterEmitPlugin', compilation => {
|
|
7
|
+
const isWatch =
|
|
8
|
+
!!compiler.watching ||
|
|
9
|
+
!!compiler.watchMode ||
|
|
10
|
+
!!compilation?.compiler?.watchMode;
|
|
11
|
+
if (isWatch) {
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const distDir = getDistDir();
|
|
16
|
+
|
|
17
|
+
console.log(getFlair());
|
|
18
|
+
console.log(`The build output is available at ${distDir}`);
|
|
19
|
+
console.log('Now use `tada serve` to start a local web server');
|
|
20
|
+
});
|
|
21
|
+
},
|
|
22
|
+
};
|