@abreen/tada 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -33
- package/bin/tada.ts +356 -0
- package/bin/validators.test.ts +204 -0
- package/bin/validators.ts +83 -0
- package/{webpack/apply-base-path-plugin.js → build/apply-base-path-plugin.ts} +16 -7
- package/build/bundle.ts +117 -0
- package/{webpack/code.test.js → build/code.test.ts} +6 -7
- package/build/colors.ts +25 -0
- package/build/content-watch.ts +107 -0
- package/build/copy.ts +118 -0
- package/{webpack/deflist-id-plugin.js → build/deflist-id-plugin.ts} +7 -6
- package/{webpack/external-links-plugin.js → build/external-links-plugin.ts} +14 -5
- package/build/features.ts +11 -0
- package/build/generate-content-assets.ts +315 -0
- package/build/generate-favicon.ts +165 -0
- package/build/generate-fonts.ts +31 -0
- package/{webpack/generate-manifest-plugin.js → build/generate-manifest.ts} +29 -36
- package/build/globals.test.ts +101 -0
- package/{webpack/globals.js → build/globals.ts} +28 -13
- package/{webpack/heading-subtitle-plugin.js → build/heading-subtitle-plugin.ts} +4 -2
- package/build/json-schema.test.ts +57 -0
- package/build/json-schema.ts +33 -0
- package/build/log.test.ts +111 -0
- package/build/log.ts +167 -0
- package/{webpack/markdown-plugins.test.js → build/markdown-plugins.test.ts} +94 -9
- package/{webpack/pagefind-plugin.test.js → build/pagefind.test.ts} +74 -13
- package/build/pagefind.ts +339 -0
- package/{webpack/pdf-text.js → build/pdf-text.ts} +47 -27
- package/build/pipeline.ts +93 -0
- package/{webpack/reachability.test.js → build/reachability.test.ts} +3 -3
- package/{webpack/reachability.js → build/reachability.ts} +77 -34
- package/build/serve.ts +112 -0
- package/{webpack/site-variables.js → build/site-variables.ts} +22 -15
- package/{webpack → build}/site.schema.json +3 -10
- package/{webpack/templates.js → build/templates.ts} +35 -33
- package/{webpack/text-to-id.js → build/text-to-id.ts} +2 -2
- package/build/toc-plugin.test.ts +105 -0
- package/{webpack/toc-plugin.js → build/toc-plugin.ts} +32 -13
- package/build/types.ts +172 -0
- package/build/util.ts +26 -0
- package/{webpack/utils/code.js → build/utils/code.ts} +119 -60
- package/{webpack/utils/content-files.js → build/utils/content-files.ts} +40 -35
- package/build/utils/derive-theme.test.ts +111 -0
- package/build/utils/derive-theme.ts +85 -0
- package/build/utils/file-types.test.ts +61 -0
- package/build/utils/file-types.ts +13 -0
- package/build/utils/front-matter.test.ts +80 -0
- package/{webpack/utils/front-matter.js → build/utils/front-matter.ts} +22 -9
- package/{webpack → build}/utils/jdi-runner/LiterateRunner.java +1 -1
- package/{webpack/utils/literate-java.js → build/utils/literate-java.ts} +63 -34
- package/{webpack/utils/markdown.js → build/utils/markdown.ts} +94 -49
- package/build/utils/paths.test.ts +91 -0
- package/{webpack/utils/paths.js → build/utils/paths.ts} +14 -22
- package/{webpack/utils/render.js → build/utils/render.ts} +188 -123
- package/build/utils/shiki-highlighter.ts +29 -0
- package/build/validate-internal-links-plugin.test.ts +106 -0
- package/{webpack/validate-internal-links-plugin.js → build/validate-internal-links-plugin.ts} +47 -20
- package/{webpack/watch-reachability-state.test.js → build/watch-reachability-state.test.ts} +8 -8
- package/{webpack/watch-reachability-state.js → build/watch-reachability-state.ts} +63 -24
- package/{webpack/watch-reload-client.js → build/watch-reload-client.ts} +3 -1
- package/build/watch.ts +573 -0
- package/content/index.md +9 -3
- package/content/markdown.md +2 -1
- package/content/problem_sets/index.html +14 -0
- package/fonts/google-sans-code/woff2/GoogleSansCodeVariable-Italic.woff2 +0 -0
- package/fonts/google-sans-code/woff2/GoogleSansCodeVariable.woff2 +0 -0
- package/fonts/inter/woff2/InterVariable-Italic.woff2 +0 -0
- package/fonts/inter/woff2/InterVariable.woff2 +0 -0
- package/package.json +28 -19
- package/src/_alerts.scss +92 -0
- package/src/_base.scss +106 -0
- package/src/{layout.scss → _layout.scss} +0 -2
- package/src/anchor/style.scss +1 -9
- package/src/code/index.ts +3 -3
- package/src/code.scss +1 -1
- package/src/critical.scss +5 -0
- package/src/header/_base.scss +129 -0
- package/src/header/style.scss +3 -131
- package/src/index.ts +1 -2
- package/src/question/style.scss +1 -1
- package/src/search/index.ts +36 -15
- package/src/search/style.scss +9 -15
- package/src/style.scss +6 -269
- package/src/toc/style.scss +5 -39
- package/src/util.ts +8 -5
- package/templates/_theme.scss +38 -14
- package/tsconfig.json +10 -6
- package/types/file-system-access.d.ts +5 -0
- package/types/markdown-it-plugins.d.ts +11 -0
- package/types/untyped-modules.d.ts +40 -0
- package/bin/tada.js +0 -361
- package/content/problem_sets/index.md +0 -6
- package/webpack/build-state.js +0 -97
- package/webpack/colors.js +0 -15
- package/webpack/config.base.js +0 -151
- package/webpack/config.dev.js +0 -23
- package/webpack/config.prod.js +0 -32
- package/webpack/content-watch-plugin.js +0 -153
- package/webpack/features.js +0 -5
- package/webpack/generate-content-assets-plugin.js +0 -308
- package/webpack/generate-favicon-plugin.js +0 -198
- package/webpack/generate-fonts-plugin.js +0 -69
- package/webpack/json-schema.js +0 -19
- package/webpack/log.js +0 -143
- package/webpack/pagefind-plugin.js +0 -379
- package/webpack/print-flair-plugin.js +0 -22
- package/webpack/serve.js +0 -104
- package/webpack/util.js +0 -49
- package/webpack/utils/define-plugin.js +0 -20
- package/webpack/utils/file-types.js +0 -26
- package/webpack/utils/parse-hsl.js +0 -8
- package/webpack/utils/shiki-highlighter.js +0 -26
- package/webpack/watch.js +0 -166
- /package/{webpack → build}/flair.json +0 -0
- /package/{webpack → build}/utils/jdi-runner/LiterateRunner.class +0 -0
- /package/fonts/google-sans-code/{GoogleSansCodeVariable-Italic.ttf → ttf/GoogleSansCodeVariable-Italic.ttf} +0 -0
- /package/fonts/google-sans-code/{GoogleSansCodeVariable.ttf → ttf/GoogleSansCodeVariable.ttf} +0 -0
- /package/fonts/inter/{InterVariable-Italic.ttf → ttf/InterVariable-Italic.ttf} +0 -0
- /package/fonts/inter/{InterVariable.ttf → ttf/InterVariable.ttf} +0 -0
- /package/types/{dev.ts → dev.d.ts} +0 -0
|
@@ -1,18 +1,36 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { describe, expect, test } from 'bun:test';
|
|
3
|
+
import { buildIndex, collectIndexTargets } from './pagefind.js';
|
|
4
|
+
import type { SiteVariables } from './types.js';
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
interface FakePagefindCalls {
|
|
7
|
+
htmlFiles?: { sourcePath: string; content: string }[];
|
|
8
|
+
customRecords?: {
|
|
9
|
+
url: string;
|
|
10
|
+
content: string;
|
|
11
|
+
language: string;
|
|
12
|
+
meta: Record<string, string>;
|
|
13
|
+
}[];
|
|
14
|
+
outputPath?: string | null;
|
|
15
|
+
deleted?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function createFakePagefind(calls: FakePagefindCalls) {
|
|
6
19
|
const fakeIndex = {
|
|
7
|
-
addHTMLFile: async file => {
|
|
20
|
+
addHTMLFile: async (file: { sourcePath: string; content: string }) => {
|
|
8
21
|
calls.htmlFiles?.push(file);
|
|
9
22
|
return { errors: [], file: { url: file.sourcePath, meta: {} } };
|
|
10
23
|
},
|
|
11
|
-
addCustomRecord: async record
|
|
24
|
+
addCustomRecord: async (record: {
|
|
25
|
+
url: string;
|
|
26
|
+
content: string;
|
|
27
|
+
language: string;
|
|
28
|
+
meta: Record<string, string>;
|
|
29
|
+
}) => {
|
|
12
30
|
calls.customRecords?.push(record);
|
|
13
31
|
return { errors: [], file: { url: record.url, meta: record.meta } };
|
|
14
32
|
},
|
|
15
|
-
writeFiles: async ({ outputPath }) => {
|
|
33
|
+
writeFiles: async ({ outputPath }: { outputPath: string }) => {
|
|
16
34
|
calls.outputPath = outputPath;
|
|
17
35
|
return { errors: [], outputPath };
|
|
18
36
|
},
|
|
@@ -21,9 +39,10 @@ function createFakePagefind(calls) {
|
|
|
21
39
|
},
|
|
22
40
|
};
|
|
23
41
|
|
|
24
|
-
return
|
|
25
|
-
|
|
26
|
-
|
|
42
|
+
return (() =>
|
|
43
|
+
Promise.resolve({
|
|
44
|
+
createIndex: async () => ({ index: fakeIndex, errors: [] }),
|
|
45
|
+
})) as unknown as () => Promise<typeof import('pagefind')>;
|
|
27
46
|
}
|
|
28
47
|
|
|
29
48
|
describe('PagefindPlugin', () => {
|
|
@@ -43,7 +62,7 @@ describe('PagefindPlugin', () => {
|
|
|
43
62
|
|
|
44
63
|
const result = collectIndexTargets(
|
|
45
64
|
htmlAssetsByPath,
|
|
46
|
-
{ basePath: '/' },
|
|
65
|
+
{ base: '', basePath: '/' } as SiteVariables,
|
|
47
66
|
pdfSourceByOutputPath,
|
|
48
67
|
);
|
|
49
68
|
|
|
@@ -54,7 +73,11 @@ describe('PagefindPlugin', () => {
|
|
|
54
73
|
});
|
|
55
74
|
|
|
56
75
|
test('buildIndex adds HTML files and per-page PDF custom records', async () => {
|
|
57
|
-
const calls = {
|
|
76
|
+
const calls: FakePagefindCalls = {
|
|
77
|
+
htmlFiles: [],
|
|
78
|
+
customRecords: [],
|
|
79
|
+
outputPath: null,
|
|
80
|
+
};
|
|
58
81
|
|
|
59
82
|
await buildIndex({
|
|
60
83
|
distPath: '/tmp/dist',
|
|
@@ -103,8 +126,46 @@ describe('PagefindPlugin', () => {
|
|
|
103
126
|
expect(calls.deleted).toBe(1);
|
|
104
127
|
});
|
|
105
128
|
|
|
129
|
+
test('buildIndex prepends filename to page 1 content for searchability', async () => {
|
|
130
|
+
const calls: FakePagefindCalls = { htmlFiles: [], customRecords: [] };
|
|
131
|
+
|
|
132
|
+
await buildIndex({
|
|
133
|
+
distPath: '/tmp/dist',
|
|
134
|
+
htmlAssetsByPath: new Map(),
|
|
135
|
+
reachableHtmlPaths: [],
|
|
136
|
+
reachablePdfPaths: ['/docs/lecture1.pdf'],
|
|
137
|
+
pdfSourceByOutputPath: new Map([
|
|
138
|
+
['/docs/lecture1.pdf', '/tmp/docs/lecture1.pdf'],
|
|
139
|
+
]),
|
|
140
|
+
loadPagefind: createFakePagefind(calls),
|
|
141
|
+
checkMutool: async () => {},
|
|
142
|
+
extractPages: async () => ({
|
|
143
|
+
pages: [
|
|
144
|
+
{ pageNumber: 1, content: 'Welcome to the course' },
|
|
145
|
+
{ pageNumber: 2, content: 'Chapter one' },
|
|
146
|
+
],
|
|
147
|
+
hasExtractedText: true,
|
|
148
|
+
}),
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
expect(calls.customRecords).toEqual([
|
|
152
|
+
{
|
|
153
|
+
url: '/docs/lecture1.pdf#page=1',
|
|
154
|
+
content: 'lecture1.pdf Welcome to the course',
|
|
155
|
+
language: 'en',
|
|
156
|
+
meta: { title: 'lecture1.pdf', page: '1' },
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
url: '/docs/lecture1.pdf#page=2',
|
|
160
|
+
content: 'Chapter one',
|
|
161
|
+
language: 'en',
|
|
162
|
+
meta: { title: 'lecture1.pdf', page: '2' },
|
|
163
|
+
},
|
|
164
|
+
]);
|
|
165
|
+
});
|
|
166
|
+
|
|
106
167
|
test('buildIndex falls back to a single PDF record when text extraction is empty', async () => {
|
|
107
|
-
const calls = { customRecords: [] };
|
|
168
|
+
const calls: FakePagefindCalls = { customRecords: [] };
|
|
108
169
|
|
|
109
170
|
await buildIndex({
|
|
110
171
|
distPath: '/tmp/dist',
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { makeLogger } from './log.js';
|
|
3
|
+
import { collectReachableSiteAssets } from './reachability.js';
|
|
4
|
+
import {
|
|
5
|
+
getContentDir,
|
|
6
|
+
getFilesByExtensions,
|
|
7
|
+
normalizeOutputPath,
|
|
8
|
+
} from './util.js';
|
|
9
|
+
import { assertMutoolAvailable, extractPdfPages } from './pdf-text.js';
|
|
10
|
+
import type { SiteVariables } from './types.js';
|
|
11
|
+
|
|
12
|
+
const log = makeLogger(__filename);
|
|
13
|
+
const PAGEFIND_VERBOSE = process.env.TADA_LOG_LEVEL === 'debug';
|
|
14
|
+
const PAGEFIND_OUTPUT_SUBDIR = 'pagefind';
|
|
15
|
+
|
|
16
|
+
type PagefindModule = typeof import('pagefind');
|
|
17
|
+
type PagefindIndex = Awaited<
|
|
18
|
+
ReturnType<PagefindModule['createIndex']>
|
|
19
|
+
>['index'];
|
|
20
|
+
|
|
21
|
+
let pagefindModulePromise: Promise<PagefindModule> | null = null;
|
|
22
|
+
|
|
23
|
+
function getPagefind(): Promise<PagefindModule> {
|
|
24
|
+
if (!pagefindModulePromise) {
|
|
25
|
+
pagefindModulePromise = import('pagefind');
|
|
26
|
+
}
|
|
27
|
+
return pagefindModulePromise;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function formatPagefindErrors(
|
|
31
|
+
step: string,
|
|
32
|
+
errors: string[] | undefined,
|
|
33
|
+
): string | null {
|
|
34
|
+
if (!errors?.length) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
return `${step} failed: ${errors.join(' | ')}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async function addHtmlFile(
|
|
41
|
+
index: NonNullable<PagefindIndex>,
|
|
42
|
+
htmlFile: { sourcePath: string; content: string },
|
|
43
|
+
): Promise<void> {
|
|
44
|
+
const { errors: addErrors } = await index.addHTMLFile(htmlFile);
|
|
45
|
+
const addError = formatPagefindErrors(
|
|
46
|
+
`index.addHTMLFile(${htmlFile.sourcePath})`,
|
|
47
|
+
addErrors,
|
|
48
|
+
);
|
|
49
|
+
if (addError) {
|
|
50
|
+
throw new Error(addError);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function addPdfRecord(
|
|
55
|
+
index: NonNullable<PagefindIndex>,
|
|
56
|
+
record: {
|
|
57
|
+
url: string;
|
|
58
|
+
content: string;
|
|
59
|
+
language: string;
|
|
60
|
+
meta: Record<string, string>;
|
|
61
|
+
},
|
|
62
|
+
sourcePath: string,
|
|
63
|
+
): Promise<void> {
|
|
64
|
+
const { errors: addErrors } = await index.addCustomRecord(record);
|
|
65
|
+
const addError = formatPagefindErrors(
|
|
66
|
+
`index.addCustomRecord(${sourcePath})`,
|
|
67
|
+
addErrors,
|
|
68
|
+
);
|
|
69
|
+
if (addError) {
|
|
70
|
+
throw new Error(addError);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function getPdfSourceByOutputPath(): Map<string, string> {
|
|
75
|
+
const contentDir = getContentDir();
|
|
76
|
+
const pdfFiles: string[] = getFilesByExtensions(contentDir, ['pdf']);
|
|
77
|
+
|
|
78
|
+
return new Map(
|
|
79
|
+
pdfFiles.map((filePath: string) => {
|
|
80
|
+
const relPath = path.relative(contentDir, filePath);
|
|
81
|
+
return [normalizeOutputPath(`/${relPath}`), filePath] as const;
|
|
82
|
+
}),
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
interface IndexTargets {
|
|
87
|
+
reachableHtmlPaths: string[];
|
|
88
|
+
reachablePdfPaths: string[];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function collectIndexTargets(
|
|
92
|
+
htmlAssetsByPath: Map<string, string>,
|
|
93
|
+
siteVariables: SiteVariables,
|
|
94
|
+
pdfSourceByOutputPath: Map<string, string>,
|
|
95
|
+
): IndexTargets {
|
|
96
|
+
if (htmlAssetsByPath.size === 0) {
|
|
97
|
+
return { reachableHtmlPaths: [], reachablePdfPaths: [] };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return collectReachableSiteAssets({
|
|
101
|
+
htmlAssetsByPath,
|
|
102
|
+
knownPdfPaths: new Set(pdfSourceByOutputPath.keys()),
|
|
103
|
+
rootPath: 'index.html',
|
|
104
|
+
basePath: siteVariables?.basePath || '/',
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
interface BuildIndexOptions {
|
|
109
|
+
distPath: string;
|
|
110
|
+
htmlAssetsByPath: Map<string, string>;
|
|
111
|
+
reachableHtmlPaths: string[];
|
|
112
|
+
reachablePdfPaths: string[];
|
|
113
|
+
pdfSourceByOutputPath: Map<string, string>;
|
|
114
|
+
loadPagefind?: () => Promise<PagefindModule>;
|
|
115
|
+
checkMutool?: () => Promise<void>;
|
|
116
|
+
extractPages?: typeof extractPdfPages;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async function buildIndex({
|
|
120
|
+
distPath,
|
|
121
|
+
htmlAssetsByPath,
|
|
122
|
+
reachableHtmlPaths,
|
|
123
|
+
reachablePdfPaths,
|
|
124
|
+
pdfSourceByOutputPath,
|
|
125
|
+
loadPagefind = getPagefind,
|
|
126
|
+
checkMutool = assertMutoolAvailable,
|
|
127
|
+
extractPages = extractPdfPages,
|
|
128
|
+
}: BuildIndexOptions): Promise<void> {
|
|
129
|
+
const pagefind = await loadPagefind();
|
|
130
|
+
const { index, errors: createErrors } = await pagefind.createIndex({
|
|
131
|
+
keepIndexUrl: true,
|
|
132
|
+
verbose: PAGEFIND_VERBOSE,
|
|
133
|
+
});
|
|
134
|
+
const createError = formatPagefindErrors(
|
|
135
|
+
'pagefind.createIndex()',
|
|
136
|
+
createErrors,
|
|
137
|
+
);
|
|
138
|
+
if (createError) {
|
|
139
|
+
throw new Error(createError);
|
|
140
|
+
}
|
|
141
|
+
if (!index) {
|
|
142
|
+
throw new Error('pagefind.createIndex() did not return an index');
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
for (const sourcePath of reachableHtmlPaths) {
|
|
147
|
+
await addHtmlFile(index, {
|
|
148
|
+
sourcePath,
|
|
149
|
+
content: htmlAssetsByPath.get(sourcePath)!,
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
let mutoolAvailable = true;
|
|
154
|
+
if (reachablePdfPaths.length > 0) {
|
|
155
|
+
try {
|
|
156
|
+
await checkMutool();
|
|
157
|
+
} catch {
|
|
158
|
+
mutoolAvailable = false;
|
|
159
|
+
log.warn`mutool was not found; search results will not include PDFs`;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
for (const pdfPath of mutoolAvailable ? reachablePdfPaths : []) {
|
|
164
|
+
const sourceFilePath = pdfSourceByOutputPath.get(pdfPath);
|
|
165
|
+
if (!sourceFilePath) {
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const { pages, hasExtractedText } = await extractPages(sourceFilePath);
|
|
170
|
+
const title = path.posix.basename(pdfPath);
|
|
171
|
+
|
|
172
|
+
if (!hasExtractedText) {
|
|
173
|
+
await addPdfRecord(
|
|
174
|
+
index,
|
|
175
|
+
{ url: pdfPath, content: title, language: 'en', meta: { title } },
|
|
176
|
+
pdfPath,
|
|
177
|
+
);
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
for (const page of pages) {
|
|
182
|
+
const content =
|
|
183
|
+
page.pageNumber === 1 ? `${title} ${page.content}` : page.content;
|
|
184
|
+
await addPdfRecord(
|
|
185
|
+
index,
|
|
186
|
+
{
|
|
187
|
+
url: `${pdfPath}#page=${page.pageNumber}`,
|
|
188
|
+
content,
|
|
189
|
+
language: 'en',
|
|
190
|
+
meta: { title, page: String(page.pageNumber) },
|
|
191
|
+
},
|
|
192
|
+
`${pdfPath}#page=${page.pageNumber}`,
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const { errors: writeErrors } = await index.writeFiles({
|
|
198
|
+
outputPath: path.join(distPath, PAGEFIND_OUTPUT_SUBDIR),
|
|
199
|
+
});
|
|
200
|
+
const writeError = formatPagefindErrors('index.writeFiles()', writeErrors);
|
|
201
|
+
if (writeError) {
|
|
202
|
+
throw new Error(writeError);
|
|
203
|
+
}
|
|
204
|
+
} finally {
|
|
205
|
+
await index.deleteIndex().catch(() => null);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
interface RunPagefindOptions {
|
|
210
|
+
siteVariables: SiteVariables;
|
|
211
|
+
distPath: string;
|
|
212
|
+
htmlAssetsByPath: Map<string, string>;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export async function runPagefind({
|
|
216
|
+
siteVariables,
|
|
217
|
+
distPath,
|
|
218
|
+
htmlAssetsByPath,
|
|
219
|
+
}: RunPagefindOptions): Promise<void> {
|
|
220
|
+
const pdfSourceByOutputPath = getPdfSourceByOutputPath();
|
|
221
|
+
const start = Date.now();
|
|
222
|
+
|
|
223
|
+
log.debug`Finding reachable pages for search index`;
|
|
224
|
+
const { reachableHtmlPaths, reachablePdfPaths } = collectIndexTargets(
|
|
225
|
+
htmlAssetsByPath,
|
|
226
|
+
siteVariables,
|
|
227
|
+
pdfSourceByOutputPath,
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
const snapshotReadyAt = Date.now();
|
|
231
|
+
|
|
232
|
+
let noun = reachableHtmlPaths.length === 1 ? 'page' : 'pages';
|
|
233
|
+
let message = `Building search index for ${reachableHtmlPaths.length} ${noun}`;
|
|
234
|
+
if (reachablePdfPaths.length > 0) {
|
|
235
|
+
noun = reachablePdfPaths.length === 1 ? 'PDF' : 'PDFs';
|
|
236
|
+
message += ` and ${reachablePdfPaths.length} ${noun}`;
|
|
237
|
+
}
|
|
238
|
+
log.info`${message}`;
|
|
239
|
+
|
|
240
|
+
await buildIndex({
|
|
241
|
+
distPath,
|
|
242
|
+
htmlAssetsByPath,
|
|
243
|
+
reachableHtmlPaths,
|
|
244
|
+
reachablePdfPaths,
|
|
245
|
+
pdfSourceByOutputPath,
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
try {
|
|
249
|
+
const pagefind = await getPagefind();
|
|
250
|
+
await pagefind.close();
|
|
251
|
+
} catch {
|
|
252
|
+
// Best-effort cleanup
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const finishedAt = Date.now();
|
|
256
|
+
log.debug`Search index built in ${finishedAt - snapshotReadyAt}ms (${finishedAt - start}ms total)`;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export class WatchPagefindRunner {
|
|
260
|
+
private siteVariables: SiteVariables;
|
|
261
|
+
private watchRunInProgress: boolean;
|
|
262
|
+
private watchRunQueued: boolean;
|
|
263
|
+
private distPath: string | null;
|
|
264
|
+
private htmlCacheByAssetPath: Map<string, string>;
|
|
265
|
+
|
|
266
|
+
constructor(siteVariables: SiteVariables) {
|
|
267
|
+
this.siteVariables = siteVariables || {};
|
|
268
|
+
this.watchRunInProgress = false;
|
|
269
|
+
this.watchRunQueued = false;
|
|
270
|
+
this.distPath = null;
|
|
271
|
+
this.htmlCacheByAssetPath = new Map();
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
update(distPath: string, htmlAssetsByPath: Map<string, string>): void {
|
|
275
|
+
this.distPath = distPath;
|
|
276
|
+
this.htmlCacheByAssetPath = htmlAssetsByPath;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
run(): void {
|
|
280
|
+
if (this.watchRunInProgress) {
|
|
281
|
+
this.watchRunQueued = true;
|
|
282
|
+
log.debug`Indexing is still running in the background; queueing a rerun`;
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
this.watchRunInProgress = true;
|
|
287
|
+
this.watchRunQueued = false;
|
|
288
|
+
const distPath = this.distPath!;
|
|
289
|
+
const htmlAssetsByPath = new Map(this.htmlCacheByAssetPath);
|
|
290
|
+
const pdfSourceByOutputPath = getPdfSourceByOutputPath();
|
|
291
|
+
const start = Date.now();
|
|
292
|
+
|
|
293
|
+
log.debug`Preparing search index background snapshot`;
|
|
294
|
+
|
|
295
|
+
let reachableHtmlPaths: string[];
|
|
296
|
+
let reachablePdfPaths: string[];
|
|
297
|
+
try {
|
|
298
|
+
({ reachableHtmlPaths, reachablePdfPaths } = collectIndexTargets(
|
|
299
|
+
htmlAssetsByPath,
|
|
300
|
+
this.siteVariables,
|
|
301
|
+
pdfSourceByOutputPath,
|
|
302
|
+
));
|
|
303
|
+
} catch (err) {
|
|
304
|
+
this.watchRunInProgress = false;
|
|
305
|
+
log.warn`Pagefind failed: ${(err as Error).message}`;
|
|
306
|
+
if (this.watchRunQueued) {
|
|
307
|
+
this.run();
|
|
308
|
+
}
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
const snapshotReadyAt = Date.now();
|
|
313
|
+
log.debug`Building search index in background`;
|
|
314
|
+
buildIndex({
|
|
315
|
+
distPath,
|
|
316
|
+
htmlAssetsByPath,
|
|
317
|
+
reachableHtmlPaths,
|
|
318
|
+
reachablePdfPaths,
|
|
319
|
+
pdfSourceByOutputPath,
|
|
320
|
+
})
|
|
321
|
+
.then(() => {
|
|
322
|
+
const finishedAt = Date.now();
|
|
323
|
+
log.debug`Search index ready after ${finishedAt - snapshotReadyAt}ms (${finishedAt - start}ms total)`;
|
|
324
|
+
})
|
|
325
|
+
.catch(err => {
|
|
326
|
+
const failedAt = Date.now();
|
|
327
|
+
log.warn`Search index failed after ${failedAt - snapshotReadyAt}ms of indexing (${failedAt - start}ms total): ${err.message}`;
|
|
328
|
+
})
|
|
329
|
+
.finally(() => {
|
|
330
|
+
this.watchRunInProgress = false;
|
|
331
|
+
if (this.watchRunQueued) {
|
|
332
|
+
log.debug`Starting queued Pagefind background rerun`;
|
|
333
|
+
this.run();
|
|
334
|
+
}
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
export { buildIndex, collectIndexTargets };
|
|
@@ -1,15 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import os from 'os';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { spawn } from 'child_process';
|
|
5
|
+
|
|
6
|
+
interface MutoolOutput {
|
|
7
|
+
stdout: string;
|
|
8
|
+
stderr: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface PdfPage {
|
|
12
|
+
pageNumber: number;
|
|
13
|
+
content: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface PdfExtractResult {
|
|
17
|
+
pages: PdfPage[];
|
|
18
|
+
hasExtractedText: boolean;
|
|
19
|
+
}
|
|
5
20
|
|
|
6
|
-
let mutoolAvailabilityPromise = null;
|
|
21
|
+
let mutoolAvailabilityPromise: Promise<void> | null = null;
|
|
7
22
|
|
|
8
|
-
function formatMutoolCommand(args) {
|
|
23
|
+
function formatMutoolCommand(args: string[]): string {
|
|
9
24
|
return ['mutool', ...args].map(arg => JSON.stringify(arg)).join(' ');
|
|
10
25
|
}
|
|
11
26
|
|
|
12
|
-
function runMutool(args) {
|
|
27
|
+
function runMutool(args: string[]): Promise<MutoolOutput> {
|
|
13
28
|
return new Promise((resolve, reject) => {
|
|
14
29
|
const child = spawn('mutool', args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
15
30
|
|
|
@@ -18,16 +33,16 @@ function runMutool(args) {
|
|
|
18
33
|
let stderr = '';
|
|
19
34
|
|
|
20
35
|
child.stdout.setEncoding('utf8');
|
|
21
|
-
child.stdout.on('data', chunk => {
|
|
36
|
+
child.stdout.on('data', (chunk: string) => {
|
|
22
37
|
stdout += chunk;
|
|
23
38
|
});
|
|
24
39
|
|
|
25
40
|
child.stderr.setEncoding('utf8');
|
|
26
|
-
child.stderr.on('data', chunk => {
|
|
41
|
+
child.stderr.on('data', (chunk: string) => {
|
|
27
42
|
stderr += chunk;
|
|
28
43
|
});
|
|
29
44
|
|
|
30
|
-
child.on('error', err => {
|
|
45
|
+
child.on('error', (err: NodeJS.ErrnoException) => {
|
|
31
46
|
if (settled) {
|
|
32
47
|
return;
|
|
33
48
|
}
|
|
@@ -45,7 +60,7 @@ function runMutool(args) {
|
|
|
45
60
|
reject(err);
|
|
46
61
|
});
|
|
47
62
|
|
|
48
|
-
child.on('close', code => {
|
|
63
|
+
child.on('close', (code: number | null) => {
|
|
49
64
|
if (settled) {
|
|
50
65
|
return;
|
|
51
66
|
}
|
|
@@ -67,7 +82,7 @@ function runMutool(args) {
|
|
|
67
82
|
});
|
|
68
83
|
}
|
|
69
84
|
|
|
70
|
-
function assertMutoolAvailable() {
|
|
85
|
+
export function assertMutoolAvailable(): Promise<void> {
|
|
71
86
|
if (!mutoolAvailabilityPromise) {
|
|
72
87
|
mutoolAvailabilityPromise = runMutool(['-v']).then(() => undefined);
|
|
73
88
|
}
|
|
@@ -75,7 +90,16 @@ function assertMutoolAvailable() {
|
|
|
75
90
|
return mutoolAvailabilityPromise;
|
|
76
91
|
}
|
|
77
92
|
|
|
78
|
-
|
|
93
|
+
interface NumberedPageFile {
|
|
94
|
+
fileName: string;
|
|
95
|
+
filePath: string;
|
|
96
|
+
pageNum: number;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async function listNumberedPageFiles(
|
|
100
|
+
dir: string,
|
|
101
|
+
ext: string,
|
|
102
|
+
): Promise<NumberedPageFile[]> {
|
|
79
103
|
const suffix = `.${ext}`;
|
|
80
104
|
const entries = await fs.promises.readdir(dir);
|
|
81
105
|
|
|
@@ -90,16 +114,16 @@ async function listNumberedPageFiles(dir, ext) {
|
|
|
90
114
|
|
|
91
115
|
return { fileName: name, filePath: path.join(dir, name), pageNum };
|
|
92
116
|
})
|
|
93
|
-
.filter(entry => entry !== null)
|
|
117
|
+
.filter((entry): entry is NumberedPageFile => entry !== null)
|
|
94
118
|
.sort((a, b) => a.pageNum - b.pageNum);
|
|
95
119
|
}
|
|
96
120
|
|
|
97
|
-
function normalizeExtractedText(text) {
|
|
121
|
+
function normalizeExtractedText(text: string): string {
|
|
98
122
|
return text.replace(/\s+/g, ' ').trim();
|
|
99
123
|
}
|
|
100
124
|
|
|
101
|
-
function buildPdfPageRecords(pageTexts) {
|
|
102
|
-
const pages = pageTexts
|
|
125
|
+
export function buildPdfPageRecords(pageTexts: string[]): PdfExtractResult {
|
|
126
|
+
const pages: PdfPage[] = pageTexts
|
|
103
127
|
.map((text, i) => {
|
|
104
128
|
const normalized = normalizeExtractedText(text);
|
|
105
129
|
if (!normalized) {
|
|
@@ -108,19 +132,21 @@ function buildPdfPageRecords(pageTexts) {
|
|
|
108
132
|
|
|
109
133
|
return { pageNumber: i + 1, content: normalized };
|
|
110
134
|
})
|
|
111
|
-
.filter(
|
|
135
|
+
.filter((entry): entry is PdfPage => entry !== null);
|
|
112
136
|
|
|
113
137
|
return { pages, hasExtractedText: pages.length > 0 };
|
|
114
138
|
}
|
|
115
139
|
|
|
116
|
-
async function extractPdfPages(
|
|
140
|
+
export async function extractPdfPages(
|
|
141
|
+
pdfPath: string,
|
|
142
|
+
): Promise<PdfExtractResult> {
|
|
117
143
|
await assertMutoolAvailable();
|
|
118
144
|
|
|
119
145
|
const tempDir = await fs.promises.mkdtemp(
|
|
120
146
|
path.join(os.tmpdir(), 'pdf-text-'),
|
|
121
147
|
);
|
|
122
148
|
const textPattern = path.join(tempDir, 'page-%04d.txt');
|
|
123
|
-
let processingError = null;
|
|
149
|
+
let processingError: unknown = null;
|
|
124
150
|
|
|
125
151
|
try {
|
|
126
152
|
await runMutool(['draw', '-q', '-F', 'text', '-o', textPattern, pdfPath]);
|
|
@@ -149,15 +175,9 @@ async function extractPdfPages(pdfPath) {
|
|
|
149
175
|
} catch (cleanupErr) {
|
|
150
176
|
if (!processingError) {
|
|
151
177
|
console.warn(
|
|
152
|
-
`Failed to clean up temporary PDF extraction directory ${tempDir}: ${cleanupErr.message}`,
|
|
178
|
+
`Failed to clean up temporary PDF extraction directory ${tempDir}: ${(cleanupErr as Error).message}`,
|
|
153
179
|
);
|
|
154
180
|
}
|
|
155
181
|
}
|
|
156
182
|
}
|
|
157
183
|
}
|
|
158
|
-
|
|
159
|
-
module.exports = {
|
|
160
|
-
assertMutoolAvailable,
|
|
161
|
-
buildPdfPageRecords,
|
|
162
|
-
extractPdfPages,
|
|
163
|
-
};
|