@abreen/tada 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +29 -33
  2. package/bin/tada.ts +356 -0
  3. package/bin/validators.test.ts +204 -0
  4. package/bin/validators.ts +83 -0
  5. package/{webpack/apply-base-path-plugin.js → build/apply-base-path-plugin.ts} +16 -7
  6. package/build/bundle.ts +117 -0
  7. package/{webpack/code.test.js → build/code.test.ts} +6 -7
  8. package/build/colors.ts +25 -0
  9. package/build/content-watch.ts +107 -0
  10. package/build/copy.ts +118 -0
  11. package/{webpack/deflist-id-plugin.js → build/deflist-id-plugin.ts} +7 -6
  12. package/{webpack/external-links-plugin.js → build/external-links-plugin.ts} +14 -5
  13. package/build/features.ts +11 -0
  14. package/build/generate-content-assets.ts +315 -0
  15. package/build/generate-favicon.ts +165 -0
  16. package/build/generate-fonts.ts +31 -0
  17. package/{webpack/generate-manifest-plugin.js → build/generate-manifest.ts} +29 -36
  18. package/build/globals.test.ts +101 -0
  19. package/{webpack/globals.js → build/globals.ts} +28 -13
  20. package/{webpack/heading-subtitle-plugin.js → build/heading-subtitle-plugin.ts} +4 -2
  21. package/build/json-schema.test.ts +57 -0
  22. package/build/json-schema.ts +33 -0
  23. package/build/log.test.ts +111 -0
  24. package/build/log.ts +167 -0
  25. package/{webpack/markdown-plugins.test.js → build/markdown-plugins.test.ts} +94 -9
  26. package/{webpack/pagefind-plugin.test.js → build/pagefind.test.ts} +74 -13
  27. package/build/pagefind.ts +339 -0
  28. package/{webpack/pdf-text.js → build/pdf-text.ts} +47 -27
  29. package/build/pipeline.ts +93 -0
  30. package/{webpack/reachability.test.js → build/reachability.test.ts} +3 -3
  31. package/{webpack/reachability.js → build/reachability.ts} +77 -34
  32. package/build/serve.ts +112 -0
  33. package/{webpack/site-variables.js → build/site-variables.ts} +22 -15
  34. package/{webpack → build}/site.schema.json +3 -10
  35. package/{webpack/templates.js → build/templates.ts} +35 -33
  36. package/{webpack/text-to-id.js → build/text-to-id.ts} +2 -2
  37. package/build/toc-plugin.test.ts +105 -0
  38. package/{webpack/toc-plugin.js → build/toc-plugin.ts} +32 -13
  39. package/build/types.ts +172 -0
  40. package/build/util.ts +26 -0
  41. package/{webpack/utils/code.js → build/utils/code.ts} +119 -60
  42. package/{webpack/utils/content-files.js → build/utils/content-files.ts} +40 -35
  43. package/build/utils/derive-theme.test.ts +111 -0
  44. package/build/utils/derive-theme.ts +85 -0
  45. package/build/utils/file-types.test.ts +61 -0
  46. package/build/utils/file-types.ts +13 -0
  47. package/build/utils/front-matter.test.ts +80 -0
  48. package/{webpack/utils/front-matter.js → build/utils/front-matter.ts} +22 -9
  49. package/{webpack → build}/utils/jdi-runner/LiterateRunner.java +1 -1
  50. package/{webpack/utils/literate-java.js → build/utils/literate-java.ts} +63 -34
  51. package/{webpack/utils/markdown.js → build/utils/markdown.ts} +94 -49
  52. package/build/utils/paths.test.ts +91 -0
  53. package/{webpack/utils/paths.js → build/utils/paths.ts} +14 -22
  54. package/{webpack/utils/render.js → build/utils/render.ts} +188 -123
  55. package/build/utils/shiki-highlighter.ts +29 -0
  56. package/build/validate-internal-links-plugin.test.ts +106 -0
  57. package/{webpack/validate-internal-links-plugin.js → build/validate-internal-links-plugin.ts} +47 -20
  58. package/{webpack/watch-reachability-state.test.js → build/watch-reachability-state.test.ts} +8 -8
  59. package/{webpack/watch-reachability-state.js → build/watch-reachability-state.ts} +63 -24
  60. package/{webpack/watch-reload-client.js → build/watch-reload-client.ts} +3 -1
  61. package/build/watch.ts +573 -0
  62. package/content/index.md +9 -3
  63. package/content/markdown.md +2 -1
  64. package/content/problem_sets/index.html +14 -0
  65. package/fonts/google-sans-code/woff2/GoogleSansCodeVariable-Italic.woff2 +0 -0
  66. package/fonts/google-sans-code/woff2/GoogleSansCodeVariable.woff2 +0 -0
  67. package/fonts/inter/woff2/InterVariable-Italic.woff2 +0 -0
  68. package/fonts/inter/woff2/InterVariable.woff2 +0 -0
  69. package/package.json +28 -19
  70. package/src/_alerts.scss +92 -0
  71. package/src/_base.scss +106 -0
  72. package/src/{layout.scss → _layout.scss} +0 -2
  73. package/src/anchor/style.scss +1 -9
  74. package/src/code/index.ts +3 -3
  75. package/src/code.scss +1 -1
  76. package/src/critical.scss +5 -0
  77. package/src/header/_base.scss +129 -0
  78. package/src/header/style.scss +3 -131
  79. package/src/index.ts +1 -2
  80. package/src/question/style.scss +1 -1
  81. package/src/search/index.ts +36 -15
  82. package/src/search/style.scss +9 -15
  83. package/src/style.scss +6 -269
  84. package/src/toc/style.scss +5 -39
  85. package/src/util.ts +8 -5
  86. package/templates/_theme.scss +38 -14
  87. package/tsconfig.json +10 -6
  88. package/types/file-system-access.d.ts +5 -0
  89. package/types/markdown-it-plugins.d.ts +11 -0
  90. package/types/untyped-modules.d.ts +40 -0
  91. package/bin/tada.js +0 -361
  92. package/content/problem_sets/index.md +0 -6
  93. package/webpack/build-state.js +0 -97
  94. package/webpack/colors.js +0 -15
  95. package/webpack/config.base.js +0 -151
  96. package/webpack/config.dev.js +0 -23
  97. package/webpack/config.prod.js +0 -32
  98. package/webpack/content-watch-plugin.js +0 -153
  99. package/webpack/features.js +0 -5
  100. package/webpack/generate-content-assets-plugin.js +0 -308
  101. package/webpack/generate-favicon-plugin.js +0 -198
  102. package/webpack/generate-fonts-plugin.js +0 -69
  103. package/webpack/json-schema.js +0 -19
  104. package/webpack/log.js +0 -143
  105. package/webpack/pagefind-plugin.js +0 -379
  106. package/webpack/print-flair-plugin.js +0 -22
  107. package/webpack/serve.js +0 -104
  108. package/webpack/util.js +0 -49
  109. package/webpack/utils/define-plugin.js +0 -20
  110. package/webpack/utils/file-types.js +0 -26
  111. package/webpack/utils/parse-hsl.js +0 -8
  112. package/webpack/utils/shiki-highlighter.js +0 -26
  113. package/webpack/watch.js +0 -166
  114. /package/{webpack → build}/flair.json +0 -0
  115. /package/{webpack → build}/utils/jdi-runner/LiterateRunner.class +0 -0
  116. /package/fonts/google-sans-code/{GoogleSansCodeVariable-Italic.ttf → ttf/GoogleSansCodeVariable-Italic.ttf} +0 -0
  117. /package/fonts/google-sans-code/{GoogleSansCodeVariable.ttf → ttf/GoogleSansCodeVariable.ttf} +0 -0
  118. /package/fonts/inter/{InterVariable-Italic.ttf → ttf/InterVariable-Italic.ttf} +0 -0
  119. /package/fonts/inter/{InterVariable.ttf → ttf/InterVariable.ttf} +0 -0
  120. /package/types/{dev.ts → dev.d.ts} +0 -0
@@ -1,18 +1,36 @@
1
- const path = require('path');
2
- const { describe, expect, test } = require('bun:test');
3
- const { buildIndex, collectIndexTargets } = require('./pagefind-plugin');
1
+ import path from 'path';
2
+ import { describe, expect, test } from 'bun:test';
3
+ import { buildIndex, collectIndexTargets } from './pagefind.js';
4
+ import type { SiteVariables } from './types.js';
4
5
 
5
- function createFakePagefind(calls) {
6
+ interface FakePagefindCalls {
7
+ htmlFiles?: { sourcePath: string; content: string }[];
8
+ customRecords?: {
9
+ url: string;
10
+ content: string;
11
+ language: string;
12
+ meta: Record<string, string>;
13
+ }[];
14
+ outputPath?: string | null;
15
+ deleted?: number;
16
+ }
17
+
18
+ function createFakePagefind(calls: FakePagefindCalls) {
6
19
  const fakeIndex = {
7
- addHTMLFile: async file => {
20
+ addHTMLFile: async (file: { sourcePath: string; content: string }) => {
8
21
  calls.htmlFiles?.push(file);
9
22
  return { errors: [], file: { url: file.sourcePath, meta: {} } };
10
23
  },
11
- addCustomRecord: async record => {
24
+ addCustomRecord: async (record: {
25
+ url: string;
26
+ content: string;
27
+ language: string;
28
+ meta: Record<string, string>;
29
+ }) => {
12
30
  calls.customRecords?.push(record);
13
31
  return { errors: [], file: { url: record.url, meta: record.meta } };
14
32
  },
15
- writeFiles: async ({ outputPath }) => {
33
+ writeFiles: async ({ outputPath }: { outputPath: string }) => {
16
34
  calls.outputPath = outputPath;
17
35
  return { errors: [], outputPath };
18
36
  },
@@ -21,9 +39,10 @@ function createFakePagefind(calls) {
21
39
  },
22
40
  };
23
41
 
24
- return async () => ({
25
- createIndex: async () => ({ index: fakeIndex, errors: [] }),
26
- });
42
+ return (() =>
43
+ Promise.resolve({
44
+ createIndex: async () => ({ index: fakeIndex, errors: [] }),
45
+ })) as unknown as () => Promise<typeof import('pagefind')>;
27
46
  }
28
47
 
29
48
  describe('PagefindPlugin', () => {
@@ -43,7 +62,7 @@ describe('PagefindPlugin', () => {
43
62
 
44
63
  const result = collectIndexTargets(
45
64
  htmlAssetsByPath,
46
- { basePath: '/' },
65
+ { base: '', basePath: '/' } as SiteVariables,
47
66
  pdfSourceByOutputPath,
48
67
  );
49
68
 
@@ -54,7 +73,11 @@ describe('PagefindPlugin', () => {
54
73
  });
55
74
 
56
75
  test('buildIndex adds HTML files and per-page PDF custom records', async () => {
57
- const calls = { htmlFiles: [], customRecords: [], outputPath: null };
76
+ const calls: FakePagefindCalls = {
77
+ htmlFiles: [],
78
+ customRecords: [],
79
+ outputPath: null,
80
+ };
58
81
 
59
82
  await buildIndex({
60
83
  distPath: '/tmp/dist',
@@ -103,8 +126,46 @@ describe('PagefindPlugin', () => {
103
126
  expect(calls.deleted).toBe(1);
104
127
  });
105
128
 
129
+ test('buildIndex prepends filename to page 1 content for searchability', async () => {
130
+ const calls: FakePagefindCalls = { htmlFiles: [], customRecords: [] };
131
+
132
+ await buildIndex({
133
+ distPath: '/tmp/dist',
134
+ htmlAssetsByPath: new Map(),
135
+ reachableHtmlPaths: [],
136
+ reachablePdfPaths: ['/docs/lecture1.pdf'],
137
+ pdfSourceByOutputPath: new Map([
138
+ ['/docs/lecture1.pdf', '/tmp/docs/lecture1.pdf'],
139
+ ]),
140
+ loadPagefind: createFakePagefind(calls),
141
+ checkMutool: async () => {},
142
+ extractPages: async () => ({
143
+ pages: [
144
+ { pageNumber: 1, content: 'Welcome to the course' },
145
+ { pageNumber: 2, content: 'Chapter one' },
146
+ ],
147
+ hasExtractedText: true,
148
+ }),
149
+ });
150
+
151
+ expect(calls.customRecords).toEqual([
152
+ {
153
+ url: '/docs/lecture1.pdf#page=1',
154
+ content: 'lecture1.pdf Welcome to the course',
155
+ language: 'en',
156
+ meta: { title: 'lecture1.pdf', page: '1' },
157
+ },
158
+ {
159
+ url: '/docs/lecture1.pdf#page=2',
160
+ content: 'Chapter one',
161
+ language: 'en',
162
+ meta: { title: 'lecture1.pdf', page: '2' },
163
+ },
164
+ ]);
165
+ });
166
+
106
167
  test('buildIndex falls back to a single PDF record when text extraction is empty', async () => {
107
- const calls = { customRecords: [] };
168
+ const calls: FakePagefindCalls = { customRecords: [] };
108
169
 
109
170
  await buildIndex({
110
171
  distPath: '/tmp/dist',
@@ -0,0 +1,339 @@
1
+ import path from 'path';
2
+ import { makeLogger } from './log.js';
3
+ import { collectReachableSiteAssets } from './reachability.js';
4
+ import {
5
+ getContentDir,
6
+ getFilesByExtensions,
7
+ normalizeOutputPath,
8
+ } from './util.js';
9
+ import { assertMutoolAvailable, extractPdfPages } from './pdf-text.js';
10
+ import type { SiteVariables } from './types.js';
11
+
12
+ const log = makeLogger(__filename);
13
+ const PAGEFIND_VERBOSE = process.env.TADA_LOG_LEVEL === 'debug';
14
+ const PAGEFIND_OUTPUT_SUBDIR = 'pagefind';
15
+
16
+ type PagefindModule = typeof import('pagefind');
17
+ type PagefindIndex = Awaited<
18
+ ReturnType<PagefindModule['createIndex']>
19
+ >['index'];
20
+
21
+ let pagefindModulePromise: Promise<PagefindModule> | null = null;
22
+
23
+ function getPagefind(): Promise<PagefindModule> {
24
+ if (!pagefindModulePromise) {
25
+ pagefindModulePromise = import('pagefind');
26
+ }
27
+ return pagefindModulePromise;
28
+ }
29
+
30
+ function formatPagefindErrors(
31
+ step: string,
32
+ errors: string[] | undefined,
33
+ ): string | null {
34
+ if (!errors?.length) {
35
+ return null;
36
+ }
37
+ return `${step} failed: ${errors.join(' | ')}`;
38
+ }
39
+
40
+ async function addHtmlFile(
41
+ index: NonNullable<PagefindIndex>,
42
+ htmlFile: { sourcePath: string; content: string },
43
+ ): Promise<void> {
44
+ const { errors: addErrors } = await index.addHTMLFile(htmlFile);
45
+ const addError = formatPagefindErrors(
46
+ `index.addHTMLFile(${htmlFile.sourcePath})`,
47
+ addErrors,
48
+ );
49
+ if (addError) {
50
+ throw new Error(addError);
51
+ }
52
+ }
53
+
54
+ async function addPdfRecord(
55
+ index: NonNullable<PagefindIndex>,
56
+ record: {
57
+ url: string;
58
+ content: string;
59
+ language: string;
60
+ meta: Record<string, string>;
61
+ },
62
+ sourcePath: string,
63
+ ): Promise<void> {
64
+ const { errors: addErrors } = await index.addCustomRecord(record);
65
+ const addError = formatPagefindErrors(
66
+ `index.addCustomRecord(${sourcePath})`,
67
+ addErrors,
68
+ );
69
+ if (addError) {
70
+ throw new Error(addError);
71
+ }
72
+ }
73
+
74
+ function getPdfSourceByOutputPath(): Map<string, string> {
75
+ const contentDir = getContentDir();
76
+ const pdfFiles: string[] = getFilesByExtensions(contentDir, ['pdf']);
77
+
78
+ return new Map(
79
+ pdfFiles.map((filePath: string) => {
80
+ const relPath = path.relative(contentDir, filePath);
81
+ return [normalizeOutputPath(`/${relPath}`), filePath] as const;
82
+ }),
83
+ );
84
+ }
85
+
86
+ interface IndexTargets {
87
+ reachableHtmlPaths: string[];
88
+ reachablePdfPaths: string[];
89
+ }
90
+
91
+ function collectIndexTargets(
92
+ htmlAssetsByPath: Map<string, string>,
93
+ siteVariables: SiteVariables,
94
+ pdfSourceByOutputPath: Map<string, string>,
95
+ ): IndexTargets {
96
+ if (htmlAssetsByPath.size === 0) {
97
+ return { reachableHtmlPaths: [], reachablePdfPaths: [] };
98
+ }
99
+
100
+ return collectReachableSiteAssets({
101
+ htmlAssetsByPath,
102
+ knownPdfPaths: new Set(pdfSourceByOutputPath.keys()),
103
+ rootPath: 'index.html',
104
+ basePath: siteVariables?.basePath || '/',
105
+ });
106
+ }
107
+
108
+ interface BuildIndexOptions {
109
+ distPath: string;
110
+ htmlAssetsByPath: Map<string, string>;
111
+ reachableHtmlPaths: string[];
112
+ reachablePdfPaths: string[];
113
+ pdfSourceByOutputPath: Map<string, string>;
114
+ loadPagefind?: () => Promise<PagefindModule>;
115
+ checkMutool?: () => Promise<void>;
116
+ extractPages?: typeof extractPdfPages;
117
+ }
118
+
119
+ async function buildIndex({
120
+ distPath,
121
+ htmlAssetsByPath,
122
+ reachableHtmlPaths,
123
+ reachablePdfPaths,
124
+ pdfSourceByOutputPath,
125
+ loadPagefind = getPagefind,
126
+ checkMutool = assertMutoolAvailable,
127
+ extractPages = extractPdfPages,
128
+ }: BuildIndexOptions): Promise<void> {
129
+ const pagefind = await loadPagefind();
130
+ const { index, errors: createErrors } = await pagefind.createIndex({
131
+ keepIndexUrl: true,
132
+ verbose: PAGEFIND_VERBOSE,
133
+ });
134
+ const createError = formatPagefindErrors(
135
+ 'pagefind.createIndex()',
136
+ createErrors,
137
+ );
138
+ if (createError) {
139
+ throw new Error(createError);
140
+ }
141
+ if (!index) {
142
+ throw new Error('pagefind.createIndex() did not return an index');
143
+ }
144
+
145
+ try {
146
+ for (const sourcePath of reachableHtmlPaths) {
147
+ await addHtmlFile(index, {
148
+ sourcePath,
149
+ content: htmlAssetsByPath.get(sourcePath)!,
150
+ });
151
+ }
152
+
153
+ let mutoolAvailable = true;
154
+ if (reachablePdfPaths.length > 0) {
155
+ try {
156
+ await checkMutool();
157
+ } catch {
158
+ mutoolAvailable = false;
159
+ log.warn`mutool was not found; search results will not include PDFs`;
160
+ }
161
+ }
162
+
163
+ for (const pdfPath of mutoolAvailable ? reachablePdfPaths : []) {
164
+ const sourceFilePath = pdfSourceByOutputPath.get(pdfPath);
165
+ if (!sourceFilePath) {
166
+ continue;
167
+ }
168
+
169
+ const { pages, hasExtractedText } = await extractPages(sourceFilePath);
170
+ const title = path.posix.basename(pdfPath);
171
+
172
+ if (!hasExtractedText) {
173
+ await addPdfRecord(
174
+ index,
175
+ { url: pdfPath, content: title, language: 'en', meta: { title } },
176
+ pdfPath,
177
+ );
178
+ continue;
179
+ }
180
+
181
+ for (const page of pages) {
182
+ const content =
183
+ page.pageNumber === 1 ? `${title} ${page.content}` : page.content;
184
+ await addPdfRecord(
185
+ index,
186
+ {
187
+ url: `${pdfPath}#page=${page.pageNumber}`,
188
+ content,
189
+ language: 'en',
190
+ meta: { title, page: String(page.pageNumber) },
191
+ },
192
+ `${pdfPath}#page=${page.pageNumber}`,
193
+ );
194
+ }
195
+ }
196
+
197
+ const { errors: writeErrors } = await index.writeFiles({
198
+ outputPath: path.join(distPath, PAGEFIND_OUTPUT_SUBDIR),
199
+ });
200
+ const writeError = formatPagefindErrors('index.writeFiles()', writeErrors);
201
+ if (writeError) {
202
+ throw new Error(writeError);
203
+ }
204
+ } finally {
205
+ await index.deleteIndex().catch(() => null);
206
+ }
207
+ }
208
+
209
+ interface RunPagefindOptions {
210
+ siteVariables: SiteVariables;
211
+ distPath: string;
212
+ htmlAssetsByPath: Map<string, string>;
213
+ }
214
+
215
+ export async function runPagefind({
216
+ siteVariables,
217
+ distPath,
218
+ htmlAssetsByPath,
219
+ }: RunPagefindOptions): Promise<void> {
220
+ const pdfSourceByOutputPath = getPdfSourceByOutputPath();
221
+ const start = Date.now();
222
+
223
+ log.debug`Finding reachable pages for search index`;
224
+ const { reachableHtmlPaths, reachablePdfPaths } = collectIndexTargets(
225
+ htmlAssetsByPath,
226
+ siteVariables,
227
+ pdfSourceByOutputPath,
228
+ );
229
+
230
+ const snapshotReadyAt = Date.now();
231
+
232
+ let noun = reachableHtmlPaths.length === 1 ? 'page' : 'pages';
233
+ let message = `Building search index for ${reachableHtmlPaths.length} ${noun}`;
234
+ if (reachablePdfPaths.length > 0) {
235
+ noun = reachablePdfPaths.length === 1 ? 'PDF' : 'PDFs';
236
+ message += ` and ${reachablePdfPaths.length} ${noun}`;
237
+ }
238
+ log.info`${message}`;
239
+
240
+ await buildIndex({
241
+ distPath,
242
+ htmlAssetsByPath,
243
+ reachableHtmlPaths,
244
+ reachablePdfPaths,
245
+ pdfSourceByOutputPath,
246
+ });
247
+
248
+ try {
249
+ const pagefind = await getPagefind();
250
+ await pagefind.close();
251
+ } catch {
252
+ // Best-effort cleanup
253
+ }
254
+
255
+ const finishedAt = Date.now();
256
+ log.debug`Search index built in ${finishedAt - snapshotReadyAt}ms (${finishedAt - start}ms total)`;
257
+ }
258
+
259
+ export class WatchPagefindRunner {
260
+ private siteVariables: SiteVariables;
261
+ private watchRunInProgress: boolean;
262
+ private watchRunQueued: boolean;
263
+ private distPath: string | null;
264
+ private htmlCacheByAssetPath: Map<string, string>;
265
+
266
+ constructor(siteVariables: SiteVariables) {
267
+ this.siteVariables = siteVariables || {};
268
+ this.watchRunInProgress = false;
269
+ this.watchRunQueued = false;
270
+ this.distPath = null;
271
+ this.htmlCacheByAssetPath = new Map();
272
+ }
273
+
274
+ update(distPath: string, htmlAssetsByPath: Map<string, string>): void {
275
+ this.distPath = distPath;
276
+ this.htmlCacheByAssetPath = htmlAssetsByPath;
277
+ }
278
+
279
+ run(): void {
280
+ if (this.watchRunInProgress) {
281
+ this.watchRunQueued = true;
282
+ log.debug`Indexing is still running in the background; queueing a rerun`;
283
+ return;
284
+ }
285
+
286
+ this.watchRunInProgress = true;
287
+ this.watchRunQueued = false;
288
+ const distPath = this.distPath!;
289
+ const htmlAssetsByPath = new Map(this.htmlCacheByAssetPath);
290
+ const pdfSourceByOutputPath = getPdfSourceByOutputPath();
291
+ const start = Date.now();
292
+
293
+ log.debug`Preparing search index background snapshot`;
294
+
295
+ let reachableHtmlPaths: string[];
296
+ let reachablePdfPaths: string[];
297
+ try {
298
+ ({ reachableHtmlPaths, reachablePdfPaths } = collectIndexTargets(
299
+ htmlAssetsByPath,
300
+ this.siteVariables,
301
+ pdfSourceByOutputPath,
302
+ ));
303
+ } catch (err) {
304
+ this.watchRunInProgress = false;
305
+ log.warn`Pagefind failed: ${(err as Error).message}`;
306
+ if (this.watchRunQueued) {
307
+ this.run();
308
+ }
309
+ return;
310
+ }
311
+
312
+ const snapshotReadyAt = Date.now();
313
+ log.debug`Building search index in background`;
314
+ buildIndex({
315
+ distPath,
316
+ htmlAssetsByPath,
317
+ reachableHtmlPaths,
318
+ reachablePdfPaths,
319
+ pdfSourceByOutputPath,
320
+ })
321
+ .then(() => {
322
+ const finishedAt = Date.now();
323
+ log.debug`Search index ready after ${finishedAt - snapshotReadyAt}ms (${finishedAt - start}ms total)`;
324
+ })
325
+ .catch(err => {
326
+ const failedAt = Date.now();
327
+ log.warn`Search index failed after ${failedAt - snapshotReadyAt}ms of indexing (${failedAt - start}ms total): ${err.message}`;
328
+ })
329
+ .finally(() => {
330
+ this.watchRunInProgress = false;
331
+ if (this.watchRunQueued) {
332
+ log.debug`Starting queued Pagefind background rerun`;
333
+ this.run();
334
+ }
335
+ });
336
+ }
337
+ }
338
+
339
+ export { buildIndex, collectIndexTargets };
@@ -1,15 +1,30 @@
1
- const fs = require('fs');
2
- const os = require('os');
3
- const path = require('path');
4
- const { spawn } = require('child_process');
1
+ import fs from 'fs';
2
+ import os from 'os';
3
+ import path from 'path';
4
+ import { spawn } from 'child_process';
5
+
6
+ interface MutoolOutput {
7
+ stdout: string;
8
+ stderr: string;
9
+ }
10
+
11
+ export interface PdfPage {
12
+ pageNumber: number;
13
+ content: string;
14
+ }
15
+
16
+ export interface PdfExtractResult {
17
+ pages: PdfPage[];
18
+ hasExtractedText: boolean;
19
+ }
5
20
 
6
- let mutoolAvailabilityPromise = null;
21
+ let mutoolAvailabilityPromise: Promise<void> | null = null;
7
22
 
8
- function formatMutoolCommand(args) {
23
+ function formatMutoolCommand(args: string[]): string {
9
24
  return ['mutool', ...args].map(arg => JSON.stringify(arg)).join(' ');
10
25
  }
11
26
 
12
- function runMutool(args) {
27
+ function runMutool(args: string[]): Promise<MutoolOutput> {
13
28
  return new Promise((resolve, reject) => {
14
29
  const child = spawn('mutool', args, { stdio: ['ignore', 'pipe', 'pipe'] });
15
30
 
@@ -18,16 +33,16 @@ function runMutool(args) {
18
33
  let stderr = '';
19
34
 
20
35
  child.stdout.setEncoding('utf8');
21
- child.stdout.on('data', chunk => {
36
+ child.stdout.on('data', (chunk: string) => {
22
37
  stdout += chunk;
23
38
  });
24
39
 
25
40
  child.stderr.setEncoding('utf8');
26
- child.stderr.on('data', chunk => {
41
+ child.stderr.on('data', (chunk: string) => {
27
42
  stderr += chunk;
28
43
  });
29
44
 
30
- child.on('error', err => {
45
+ child.on('error', (err: NodeJS.ErrnoException) => {
31
46
  if (settled) {
32
47
  return;
33
48
  }
@@ -45,7 +60,7 @@ function runMutool(args) {
45
60
  reject(err);
46
61
  });
47
62
 
48
- child.on('close', code => {
63
+ child.on('close', (code: number | null) => {
49
64
  if (settled) {
50
65
  return;
51
66
  }
@@ -67,7 +82,7 @@ function runMutool(args) {
67
82
  });
68
83
  }
69
84
 
70
- function assertMutoolAvailable() {
85
+ export function assertMutoolAvailable(): Promise<void> {
71
86
  if (!mutoolAvailabilityPromise) {
72
87
  mutoolAvailabilityPromise = runMutool(['-v']).then(() => undefined);
73
88
  }
@@ -75,7 +90,16 @@ function assertMutoolAvailable() {
75
90
  return mutoolAvailabilityPromise;
76
91
  }
77
92
 
78
- async function listNumberedPageFiles(dir, ext) {
93
+ interface NumberedPageFile {
94
+ fileName: string;
95
+ filePath: string;
96
+ pageNum: number;
97
+ }
98
+
99
+ async function listNumberedPageFiles(
100
+ dir: string,
101
+ ext: string,
102
+ ): Promise<NumberedPageFile[]> {
79
103
  const suffix = `.${ext}`;
80
104
  const entries = await fs.promises.readdir(dir);
81
105
 
@@ -90,16 +114,16 @@ async function listNumberedPageFiles(dir, ext) {
90
114
 
91
115
  return { fileName: name, filePath: path.join(dir, name), pageNum };
92
116
  })
93
- .filter(entry => entry !== null)
117
+ .filter((entry): entry is NumberedPageFile => entry !== null)
94
118
  .sort((a, b) => a.pageNum - b.pageNum);
95
119
  }
96
120
 
97
- function normalizeExtractedText(text) {
121
+ function normalizeExtractedText(text: string): string {
98
122
  return text.replace(/\s+/g, ' ').trim();
99
123
  }
100
124
 
101
- function buildPdfPageRecords(pageTexts) {
102
- const pages = pageTexts
125
+ export function buildPdfPageRecords(pageTexts: string[]): PdfExtractResult {
126
+ const pages: PdfPage[] = pageTexts
103
127
  .map((text, i) => {
104
128
  const normalized = normalizeExtractedText(text);
105
129
  if (!normalized) {
@@ -108,19 +132,21 @@ function buildPdfPageRecords(pageTexts) {
108
132
 
109
133
  return { pageNumber: i + 1, content: normalized };
110
134
  })
111
- .filter(Boolean);
135
+ .filter((entry): entry is PdfPage => entry !== null);
112
136
 
113
137
  return { pages, hasExtractedText: pages.length > 0 };
114
138
  }
115
139
 
116
- async function extractPdfPages(pdfPath) {
140
+ export async function extractPdfPages(
141
+ pdfPath: string,
142
+ ): Promise<PdfExtractResult> {
117
143
  await assertMutoolAvailable();
118
144
 
119
145
  const tempDir = await fs.promises.mkdtemp(
120
146
  path.join(os.tmpdir(), 'pdf-text-'),
121
147
  );
122
148
  const textPattern = path.join(tempDir, 'page-%04d.txt');
123
- let processingError = null;
149
+ let processingError: unknown = null;
124
150
 
125
151
  try {
126
152
  await runMutool(['draw', '-q', '-F', 'text', '-o', textPattern, pdfPath]);
@@ -149,15 +175,9 @@ async function extractPdfPages(pdfPath) {
149
175
  } catch (cleanupErr) {
150
176
  if (!processingError) {
151
177
  console.warn(
152
- `Failed to clean up temporary PDF extraction directory ${tempDir}: ${cleanupErr.message}`,
178
+ `Failed to clean up temporary PDF extraction directory ${tempDir}: ${(cleanupErr as Error).message}`,
153
179
  );
154
180
  }
155
181
  }
156
182
  }
157
183
  }
158
-
159
- module.exports = {
160
- assertMutoolAvailable,
161
- buildPdfPageRecords,
162
- extractPdfPages,
163
- };