@nitpicker/core 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/LICENSE +191 -0
  3. package/README.md +13 -0
  4. package/lib/discover-analyze-plugins.d.ts +14 -0
  5. package/lib/discover-analyze-plugins.js +34 -0
  6. package/lib/find-nitpicker-modules-dir.d.ts +12 -0
  7. package/lib/find-nitpicker-modules-dir.js +23 -0
  8. package/lib/hooks/actions.d.ts +9 -0
  9. package/lib/hooks/actions.js +9 -0
  10. package/lib/hooks/child-process.d.ts +1 -0
  11. package/lib/hooks/child-process.js +34 -0
  12. package/lib/hooks/define-plugin.d.ts +68 -0
  13. package/lib/hooks/define-plugin.js +69 -0
  14. package/lib/hooks/index.d.ts +1 -0
  15. package/lib/hooks/index.js +1 -0
  16. package/lib/hooks/runner.d.ts +10 -0
  17. package/lib/hooks/runner.js +32 -0
  18. package/lib/import-modules.d.ts +24 -0
  19. package/lib/import-modules.js +38 -0
  20. package/lib/index.d.ts +5 -0
  21. package/lib/index.js +5 -0
  22. package/lib/load-plugin-settings.d.ts +40 -0
  23. package/lib/load-plugin-settings.js +85 -0
  24. package/lib/nitpicker.d.ts +127 -0
  25. package/lib/nitpicker.js +338 -0
  26. package/lib/page-analysis-worker.d.ts +48 -0
  27. package/lib/page-analysis-worker.js +98 -0
  28. package/lib/read-plugin-labels.d.ts +15 -0
  29. package/lib/read-plugin-labels.js +30 -0
  30. package/lib/table.d.ts +75 -0
  31. package/lib/table.js +132 -0
  32. package/lib/types.d.ts +264 -0
  33. package/lib/types.js +1 -0
  34. package/lib/url-event-bus.d.ts +32 -0
  35. package/lib/url-event-bus.js +20 -0
  36. package/lib/utils.d.ts +36 -0
  37. package/lib/utils.js +43 -0
  38. package/lib/worker/run-in-worker.d.ts +51 -0
  39. package/lib/worker/run-in-worker.js +120 -0
  40. package/lib/worker/runner.d.ts +25 -0
  41. package/lib/worker/runner.js +31 -0
  42. package/lib/worker/types.d.ts +23 -0
  43. package/lib/worker/types.js +1 -0
  44. package/lib/worker/worker.d.ts +27 -0
  45. package/lib/worker/worker.js +53 -0
  46. package/package.json +36 -0
  47. package/src/discover-analyze-plugins.spec.ts +21 -0
  48. package/src/discover-analyze-plugins.ts +37 -0
  49. package/src/hooks/define-plugin.spec.ts +38 -0
  50. package/src/hooks/define-plugin.ts +73 -0
  51. package/src/hooks/index.ts +1 -0
  52. package/src/import-modules.spec.ts +150 -0
  53. package/src/import-modules.ts +45 -0
  54. package/src/index.ts +5 -0
  55. package/src/load-plugin-settings.spec.ts +192 -0
  56. package/src/load-plugin-settings.ts +99 -0
  57. package/src/nitpicker.ts +418 -0
  58. package/src/page-analysis-worker.spec.ts +287 -0
  59. package/src/page-analysis-worker.ts +131 -0
  60. package/src/read-plugin-labels.spec.ts +151 -0
  61. package/src/read-plugin-labels.ts +37 -0
  62. package/src/table.spec.ts +83 -0
  63. package/src/table.ts +149 -0
  64. package/src/types.ts +289 -0
  65. package/src/url-event-bus.spec.ts +28 -0
  66. package/src/url-event-bus.ts +33 -0
  67. package/src/worker/run-in-worker.ts +155 -0
  68. package/src/worker/runner.ts +38 -0
  69. package/src/worker/types.ts +25 -0
  70. package/src/worker/worker.ts +64 -0
  71. package/tsconfig.json +11 -0
  72. package/tsconfig.tsbuildinfo +1 -0
package/src/types.ts ADDED
@@ -0,0 +1,289 @@
1
+ import type { Lanes } from '@d-zero/dealer';
2
+ import type { ExURL as URL } from '@d-zero/shared/parse-url';
3
+ import type { TableValue, Violation } from '@nitpicker/types';
4
+ import type { DOMWindow } from 'jsdom';
5
+
6
+ /**
7
+ * Represents a single analyze plugin loaded from the user's configuration.
8
+ *
9
+ * Each plugin corresponds to an npm module that exports a {@link PluginFactory}
10
+ * factory function as its default export. The `settings` object is passed
11
+ * through to that factory at initialization time.
12
+ * @see {@link ./load-plugin-settings.ts} for how plugins are discovered from cosmiconfig
13
+ * @see {@link ./import-modules.ts} for how plugins are dynamically imported
14
+ */
15
+ export interface Plugin {
16
+ /**
17
+ * Human-readable plugin name. Currently unused at runtime but kept
18
+ * for backward compatibility with older config formats.
19
+ * @deprecated Use `module` to identify plugins instead.
20
+ */
21
+ name: string;
22
+
23
+ /**
24
+ * The npm module specifier to `import()` (e.g. `"@nitpicker/analyze-axe"`).
25
+ */
26
+ module: string;
27
+
28
+ /**
29
+ * Absolute path to the configuration file where this plugin was declared.
30
+ * Passed to the plugin so it can resolve relative paths in its own config.
31
+ */
32
+ configFilePath: string;
33
+
34
+ /**
35
+ * Plugin-specific settings object parsed from the config file.
36
+ * The shape is determined by the plugin itself (e.g. `{ lang: "ja" }` for axe).
37
+ */
38
+ settings?: unknown;
39
+ }
40
+
41
+ /**
42
+ * Options for {@link ../nitpicker.ts!Nitpicker.analyze}.
43
+ *
44
+ * Allows callers to provide an external {@link https://www.npmjs.com/package/@d-zero/dealer | Lanes}
45
+ * instance for rich progress display, and a verbose flag for non-TTY environments.
46
+ */
47
+ export interface AnalyzeOptions {
48
+ /** Lanes instance for per-plugin progress display. If omitted, no progress is shown. */
49
+ readonly lanes?: Lanes;
50
+
51
+ /** When `true`, outputs plain-text progress lines instead of animated Lanes. */
52
+ readonly verbose?: boolean;
53
+ }
54
+
55
+ /**
56
+ * Internal configuration model used by {@link ../nitpicker.ts!Nitpicker}.
57
+ *
58
+ * Built by {@link ./load-plugin-settings.ts!loadPluginSettings} from the
59
+ * user's cosmiconfig file (e.g. `.nitpickerrc.json`). The external config
60
+ * format (`ConfigJSON` from `@nitpicker/types`) uses a `plugins.analyze`
61
+ * object keyed by module name; this internal type normalizes it into an
62
+ * ordered array of fully-resolved {@link Plugin} entries.
63
+ */
64
+ export interface Config {
65
+ /** Ordered list of analyze plugins to execute. */
66
+ analyze: Plugin[];
67
+ }
68
+
69
+ /**
70
+ * The runtime interface that every analyze plugin must satisfy after
71
+ * its factory function ({@link PluginFactory}) has been invoked.
72
+ *
73
+ * A plugin may implement one or both callback methods:
74
+ *
75
+ * - **`eachPage`** - Runs inside a Worker thread with full JSDOM access.
76
+ * Best for DOM-dependent analysis (markup validation, text linting,
77
+ * accessibility checks). Each invocation receives a parsed `DOMWindow`,
78
+ * so plugins can use standard DOM APIs without additional parsing.
79
+ *
80
+ * - **`eachUrl`** - Runs in the main thread, receives only the URL and
81
+ * external/internal flag. Suited for lightweight, network-based checks
82
+ * (e.g. link validation, SEO URL pattern checks).
83
+ * @template T - String literal union of the column keys this plugin
84
+ * contributes to the report table (e.g. `'title' | 'description'`).
85
+ * @see {@link ./page-analysis-worker.ts} for how `eachPage` is called inside the worker
86
+ * @see {@link ./nitpicker.ts} for how `eachUrl` is called from the main thread
87
+ */
88
+ export interface AnalyzePlugin<T extends string = string> {
89
+ /**
90
+ * Human-readable display label for interactive prompts.
91
+ * Shown instead of the raw package name (e.g. `"axe: アクセシビリティチェック"`).
92
+ */
93
+ label?: string;
94
+
95
+ /**
96
+ * Column header definitions contributed by this plugin.
97
+ * Keys are column identifiers (`T`), values are human-readable labels
98
+ * shown in the report header row.
99
+ */
100
+ headers?: TableHeaders<T>;
101
+
102
+ /**
103
+ * Per-page analysis callback executed in a Worker thread.
104
+ * @param page - Context for the current page, including the raw HTML,
105
+ * a live JSDOM window, the page URL, and progress counters.
106
+ * @param page.url - Parsed URL of the page being analyzed.
107
+ * @param page.html - Raw HTML string of the page.
108
+ * @param page.window - JSDOM window with the page's DOM tree. Closed after the callback returns.
109
+ * @param page.num - Zero-based index of the current page in the batch.
110
+ * @param page.total - Total number of pages in the batch.
111
+ * @returns Report data for this page, or `null` to skip.
112
+ */
113
+ eachPage?(page: {
114
+ /** Parsed URL of the page being analyzed. */
115
+ url: URL;
116
+ /** Raw HTML string of the page. */
117
+ html: string;
118
+ /** JSDOM window with the page's DOM tree. Closed after the callback returns. */
119
+ window: DOMWindow;
120
+ /** Zero-based index of the current page in the batch. */
121
+ num: number;
122
+ /** Total number of pages in the batch. */
123
+ total: number;
124
+ }): Promise<ReportPage<T> | null> | ReportPage<T> | null;
125
+
126
+ /**
127
+ * Per-URL analysis callback executed in the main thread.
128
+ *
129
+ * Unlike `eachPage`, this callback does **not** receive HTML or a DOM
130
+ * window. It is designed for checks that depend only on URL metadata
131
+ * (e.g. checking URL patterns, external link policies).
132
+ * @param page - URL context including external/internal classification.
133
+ * @param page.url - Parsed URL being analyzed.
134
+ * @param page.isExternal - Whether this URL is external to the crawled site.
135
+ * @returns Report data for this URL, or `null` to skip.
136
+ */
137
+ eachUrl?(page: {
138
+ /** Parsed URL being analyzed. */
139
+ url: URL;
140
+ /** Whether this URL is external to the crawled site. */
141
+ isExternal: boolean;
142
+ }): Promise<ReportPage<T> | null> | ReportPage<T> | null;
143
+ }
144
+
145
+ /**
146
+ * The return value of a single {@link AnalyzePlugin.eachPage} or {@link AnalyzePlugin.eachUrl}
147
+ * invocation for one page/URL.
148
+ *
149
+ * A plugin can contribute tabular data (displayed in spreadsheet columns)
150
+ * and/or violation records (displayed in a dedicated violations sheet).
151
+ * @template T - Column key union matching the plugin's `headers`.
152
+ */
153
+ export interface ReportPage<T extends string> {
154
+ /** Column data for this page. Keys must be a subset of `T`. */
155
+ page?: TableData<T>;
156
+ /** Violations detected on this page (e.g. a11y issues, lint errors). */
157
+ violations?: Violation[];
158
+ }
159
+
160
+ /**
161
+ * Aggregated report data from a Worker thread, keyed by page URL.
162
+ *
163
+ * This is the message payload returned from the Worker to the main thread
164
+ * via the `'finish'` message. It aggregates results from all plugins that
165
+ * ran `eachPage` for a single page.
166
+ * @template T - Column key union.
167
+ * @see {@link ./page-analysis-worker.ts} for the Worker entry point that produces this
168
+ * @see {@link ./worker/run-in-worker.ts!runInWorker} for the main-thread consumer
169
+ */
170
+ export interface ReportPages<T extends string> {
171
+ /** Per-URL table data from all plugins. */
172
+ pages?: TablePages<T>;
173
+ /** Combined violations from all plugins. */
174
+ violations?: Violation[];
175
+ }
176
+
177
+ /**
178
+ * Factory function signature that every analyze plugin module must
179
+ * export as its default export.
180
+ *
181
+ * The factory receives the user's settings object (`O`) and returns
182
+ * an {@link AnalyzePlugin} instance (or a Promise thereof). This two-phase
183
+ * pattern allows plugins to perform async initialization (e.g.
184
+ * loading locale files, compiling lint configs) once, then reuse
185
+ * the resulting plugin for every page.
186
+ *
187
+ * Use {@link ./hooks/define-plugin.ts!definePlugin} to define a
188
+ * plugin with full type inference.
189
+ * @template O - Shape of the plugin's settings from the config file.
190
+ * @template T - String literal union of column keys the plugin contributes.
191
+ * @example
192
+ * ```ts
193
+ * // In @nitpicker/analyze-search/src/index.ts
194
+ * import { definePlugin } from '@nitpicker/core';
195
+ *
196
+ * type Options = { keywords: string[] };
197
+ *
198
+ * export default definePlugin(async (options: Options) => {
199
+ * return {
200
+ * headers: { found: 'Keywords Found' },
201
+ * async eachPage({ html }) {
202
+ * const count = options.keywords.filter(k => html.includes(k)).length;
203
+ * return { page: { found: { value: count } } };
204
+ * },
205
+ * };
206
+ * });
207
+ * ```
208
+ * @see {@link AnalyzePlugin} for the runtime interface
209
+ * @see {@link ./hooks/define-plugin.ts!definePlugin} for the type-safe wrapper
210
+ */
211
+ export type PluginFactory<O, T extends string = string> = (
212
+ options: O,
213
+ configFilePath: string,
214
+ ) => Promise<AnalyzePlugin<T>> | AnalyzePlugin<T>;
215
+
216
+ /**
217
+ * Column header definitions: maps column key to display label.
218
+ * @example
219
+ * ```ts
220
+ * const headers: TableHeaders<'title' | 'desc'> = {
221
+ * title: 'Page Title',
222
+ * desc: 'Meta Description',
223
+ * };
224
+ * ```
225
+ */
226
+ export type TableHeaders<K extends string> = Record<K, string>;
227
+
228
+ /** Internal Map representation of {@link TableHeaders}, used by {@link ../table.ts!Table}. */
229
+ export type TableHeaderMap<K extends string> = Map<K, string>;
230
+
231
+ /** A single row of cell values keyed by column identifier. */
232
+ export type TableData<K extends string> = Record<K, TableValue>;
233
+
234
+ /**
235
+ * Multiple rows of table data keyed by page URL.
236
+ * This is the serialized form used in Worker message payloads and JSON output.
237
+ */
238
+ export type TablePages<K extends string> = Record<string, TableData<K>>;
239
+
240
+ /**
241
+ * Internal Map representation of page-keyed table data.
242
+ * Used by {@link ../table.ts!Table} for efficient merge operations.
243
+ */
244
+ export type TableRow<K extends string> = Map<string, TableData<K>>;
245
+
246
+ /**
247
+ * Payload for starting an analyze action.
248
+ * @internal
249
+ */
250
+ export interface PluginExecutionContext {
251
+ /** Human-readable name of the plugin module. */
252
+ pluginModuleName: string;
253
+ /** Resolved file system path to the plugin module. */
254
+ pluginModulePath: string;
255
+ /** Plugin-specific settings to pass to the hook factory. */
256
+ settings: unknown;
257
+ /** Path to the config file where this action was declared. */
258
+ configFilePath: string;
259
+ /** Temporary directory where the archive is extracted. */
260
+ archiveTempDir: string;
261
+ /** Full resolved configuration. */
262
+ config: Config;
263
+ }
264
+
265
+ /**
266
+ * Event map for the {@link ../nitpicker.ts!Nitpicker} event emitter.
267
+ *
268
+ * Consumers can listen to these events via `nitpicker.on('writeFile', ...)`.
269
+ * @see {@link ../nitpicker.ts!Nitpicker} which extends `TypedAwaitEventEmitter<NitpickerEvent>`
270
+ */
271
+ export interface NitpickerEvent {
272
+ /**
273
+ * Emitted after the archive file has been successfully written to disk.
274
+ */
275
+ writeFile: {
276
+ /** Absolute path to the written `.nitpicker` archive file. */
277
+ filePath: string;
278
+ };
279
+
280
+ /**
281
+ * Emitted when a non-fatal error occurs during analysis.
282
+ */
283
+ error: {
284
+ /** Human-readable error description. */
285
+ message: string;
286
+ /** Original Error object, or `null` if unavailable. */
287
+ error: Error | null;
288
+ };
289
+ }
@@ -0,0 +1,28 @@
1
+ import { describe, it, expect, vi } from 'vitest';
2
+
3
+ import { UrlEventBus } from './url-event-bus.js';
4
+
5
+ describe('UrlEventBus', () => {
6
+ it('emits and receives url events', async () => {
7
+ const emitter = new UrlEventBus();
8
+ const handler = vi.fn();
9
+
10
+ emitter.on('url', handler);
11
+ await emitter.emit('url', 'https://example.com/');
12
+
13
+ expect(handler).toHaveBeenCalledWith('https://example.com/');
14
+ });
15
+
16
+ it('supports multiple listeners', async () => {
17
+ const emitter = new UrlEventBus();
18
+ const handler1 = vi.fn();
19
+ const handler2 = vi.fn();
20
+
21
+ emitter.on('url', handler1);
22
+ emitter.on('url', handler2);
23
+ await emitter.emit('url', 'https://example.com/page');
24
+
25
+ expect(handler1).toHaveBeenCalledOnce();
26
+ expect(handler2).toHaveBeenCalledOnce();
27
+ });
28
+ });
@@ -0,0 +1,33 @@
1
+ import { TypedAwaitEventEmitter as EventEmitter } from '@d-zero/shared/typed-await-event-emitter';
2
+
3
+ /**
4
+ * Event map for {@link UrlEventBus}.
5
+ *
6
+ * Currently supports a single event type for URL discovery notifications.
7
+ */
8
+ export interface UrlEventBusEvent {
9
+ /**
10
+ * Emitted when a URL is discovered or being processed.
11
+ * The payload is the URL href string.
12
+ */
13
+ url: string;
14
+ }
15
+
16
+ /**
17
+ * Typed event bus for URL discovery notifications.
18
+ *
19
+ * Used as a communication channel between Worker threads and the main thread:
20
+ *
21
+ * - **Inside Workers**: The each-page worker emits `'url'` events on a local
22
+ * UrlEventBus. The Worker thread entry point ({@link ./worker/worker.ts})
23
+ * listens for these and forwards them to the main thread via `parentPort.postMessage`.
24
+ *
25
+ * - **In the main thread**: {@link ./worker/run-in-worker.ts!runInWorker} creates its own
26
+ * UrlEventBus and re-emits `'url'` messages received from the Worker.
27
+ *
28
+ * This indirection allows the same plugin code to work both in Worker threads
29
+ * and in direct execution mode (when `useWorker` is `false`).
30
+ * @see {@link ./worker/worker.ts} for Worker-side forwarding
31
+ * @see {@link ./worker/run-in-worker.ts!runInWorker} for main-thread re-emission
32
+ */
33
+ export class UrlEventBus extends EventEmitter<UrlEventBusEvent> {}
@@ -0,0 +1,155 @@
1
+ import type { UrlEventBus } from '../url-event-bus.js';
2
+
3
+ import path from 'node:path';
4
+ import { Worker } from 'node:worker_threads';
5
+
6
+ import { runner } from './runner.js';
7
+
8
+ const __filename = new URL(import.meta.url).pathname;
9
+ const __dirname = path.dirname(__filename);
10
+
11
+ /** Resolved path to the compiled Worker thread entry point ({@link ./worker.ts}). */
12
+ const workerPath = path.resolve(__dirname, 'worker.js');
13
+
14
+ /**
15
+ * Feature flag controlling whether plugin execution uses Worker threads.
16
+ * When `true` (default), each plugin invocation runs in an isolated Worker,
17
+ * providing memory isolation and crash protection. When `false`, the runner
18
+ * executes directly in the main thread (useful for debugging).
19
+ */
20
+ const useWorker = true;
21
+
22
+ /**
23
+ * Parameters for {@link runInWorker}.
24
+ * @template I - Shape of the additional data merged into `workerData`.
25
+ */
26
+ export interface RunInWorkerParams<I extends Record<string, unknown>> {
27
+ /** Absolute path to the module to execute in the Worker. */
28
+ readonly filePath: string;
29
+ /** Zero-based index of the current item (for progress display). */
30
+ readonly num: number;
31
+ /** Total number of items in the batch. */
32
+ readonly total: number;
33
+ /** URL event bus; `'url'` messages from the Worker are re-emitted here. */
34
+ readonly emitter: UrlEventBus;
35
+ /** Plugin-specific data to pass to the Worker module. */
36
+ readonly initialData: I;
37
+ }
38
+
39
+ /**
40
+ * Spawns a Worker thread to execute a plugin module and returns its result.
41
+ *
42
+ * This is the bridge between the main thread's `deal()` parallelism and the
43
+ * per-page Worker execution. Each call creates a new Worker, passes the
44
+ * initial data via `workerData`, and listens for messages until the Worker
45
+ * signals completion.
46
+ *
47
+ * ## Why Worker threads?
48
+ *
49
+ * DOM-heavy plugins (JSDOM + axe-core, markuplint, etc.) allocate significant
50
+ * memory per page. Running them in Workers ensures:
51
+ * - **Memory isolation**: JSDOM windows are fully GC'd when the Worker exits
52
+ * - **Crash containment**: A plugin segfault/OOM kills only the Worker, not the process
53
+ * - **Signal handling**: Graceful cleanup on SIGABRT, SIGQUIT, and other signals
54
+ *
55
+ * ## Message protocol
56
+ *
57
+ * The Worker sends two types of messages:
58
+ * - `{ type: 'url', url: string }` - URL discovery notification, forwarded to the emitter
59
+ * - `{ type: 'finish', result: R }` - Execution complete, resolves the Promise
60
+ *
61
+ * ## Fallback mode
62
+ *
63
+ * When `useWorker` is `false`, execution delegates directly to
64
+ * {@link ./runner.ts!runner} in the main thread.
65
+ * @template I - Shape of the additional data merged into `workerData`.
66
+ * @template R - Return type expected from the Worker module.
67
+ * @param params - Parameters containing file path, progress info, emitter, and data.
68
+ * @returns The result produced by the Worker module's default export.
69
+ * @see {@link ./worker.ts} for the Worker-side entry point
70
+ * @see {@link ./runner.ts!runner} for the direct (non-Worker) execution path
71
+ */
72
+ export function runInWorker<I extends Record<string, unknown>, R>(
73
+ params: RunInWorkerParams<I>,
74
+ ) {
75
+ const { filePath, num, total, emitter, initialData } = params;
76
+ if (useWorker) {
77
+ const worker = new Worker(workerPath, {
78
+ workerData: {
79
+ filePath,
80
+ num,
81
+ total,
82
+ ...initialData,
83
+ },
84
+ });
85
+ return new Promise<R>((resolve, reject) => {
86
+ const killWorker = async (sig: NodeJS.Signals) => {
87
+ await worker.terminate();
88
+ worker.unref();
89
+ worker.removeAllListeners();
90
+
91
+ process.removeListener('SIGABRT', killWorker);
92
+ process.removeListener('SIGLOST', killWorker);
93
+ process.removeListener('SIGQUIT', killWorker);
94
+ process.removeListener('disconnect', killWorker);
95
+ process.removeListener('exit', killWorker);
96
+ process.removeListener('uncaughtException', killWorker);
97
+ process.removeListener('uncaughtExceptionMonitor', killWorker);
98
+ process.removeListener('unhandledRejection', killWorker);
99
+
100
+ // eslint-disable-next-line no-console
101
+ console.log(`Kill Worker cause: %O`, sig);
102
+ reject(`SIG: ${sig}`);
103
+ };
104
+
105
+ // Changed from old issue
106
+ // @see https://github.com/nodejs/node-v0.x-archive/issues/6339
107
+ // process.once('SIGKILL', killWorker);
108
+ // process.once('SIGSTOP', killWorker);
109
+
110
+ process.once('SIGABRT', killWorker);
111
+ process.once('SIGLOST', killWorker);
112
+ process.once('SIGQUIT', killWorker);
113
+ process.once('disconnect', killWorker);
114
+ process.once('exit', killWorker);
115
+ process.once('uncaughtException', killWorker);
116
+ process.once('uncaughtExceptionMonitor', killWorker);
117
+ process.once('unhandledRejection', killWorker);
118
+ worker.once('error', killWorker);
119
+ worker.once('messageerror', killWorker);
120
+
121
+ worker.on('message', async (message) => {
122
+ if (!message) {
123
+ return;
124
+ }
125
+ if (message.type === 'url') {
126
+ void emitter.emit('url', message.url);
127
+ }
128
+ if (message.type === 'finish') {
129
+ await worker.terminate();
130
+ worker.removeAllListeners();
131
+ worker.unref();
132
+ process.removeListener('SIGABRT', killWorker);
133
+ process.removeListener('SIGLOST', killWorker);
134
+ process.removeListener('SIGQUIT', killWorker);
135
+ process.removeListener('disconnect', killWorker);
136
+ process.removeListener('exit', killWorker);
137
+ process.removeListener('uncaughtException', killWorker);
138
+ process.removeListener('uncaughtExceptionMonitor', killWorker);
139
+ process.removeListener('unhandledRejection', killWorker);
140
+ resolve(message.result);
141
+ }
142
+ });
143
+ });
144
+ }
145
+
146
+ return runner<I, R>(
147
+ {
148
+ filePath,
149
+ num,
150
+ total,
151
+ ...initialData,
152
+ },
153
+ emitter,
154
+ );
155
+ }
@@ -0,0 +1,38 @@
1
+ import type { WorkerData } from './types.js';
2
+ import type { UrlEventBus } from '../url-event-bus.js';
3
+
4
+ /**
5
+ * Dynamically imports and executes a plugin worker module.
6
+ *
7
+ * This function is the final execution step in the worker pipeline:
8
+ * 1. It `import()`s the module specified by `data.filePath`
9
+ * 2. Calls the module's default export with the remaining data, emitter,
10
+ * and progress counters
11
+ * 3. Returns the result
12
+ *
13
+ * The `filePath` field is deleted from `data` before passing it to the
14
+ * module function, so the module only receives its own domain-specific data.
15
+ *
16
+ * This function is called both from the Worker thread ({@link ./worker.ts})
17
+ * and as a direct fallback when `useWorker` is `false` in {@link ./run-in-worker.ts}.
18
+ * @template I - Shape of the caller's initial data (minus the worker infrastructure fields).
19
+ * @template R - Return type of the plugin module's default export.
20
+ * @param data - Combined worker data containing the module path and plugin-specific payload.
21
+ * @param emitter - Event emitter for URL discovery notifications (forwarded to the plugin).
22
+ * @returns The result produced by the dynamically imported module.
23
+ * @see {@link ./types.ts!WorkerData} for the data shape
24
+ * @see {@link ../page-analysis-worker.ts} for a typical module loaded by this runner
25
+ */
26
+ export async function runner<I extends Record<string, unknown>, R>(
27
+ data: WorkerData<I>,
28
+ emitter: UrlEventBus,
29
+ ): Promise<R> {
30
+ const { filePath, num, total } = data;
31
+
32
+ const mod = await import(filePath);
33
+ const fn = mod.default;
34
+ // @ts-expect-error
35
+ delete data.filePath;
36
+ const result = await fn(data, emitter, num, total);
37
+ return result;
38
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Data payload passed to a Worker thread via `workerData`.
3
+ *
4
+ * This type merges the worker infrastructure fields (`filePath`, `num`, `total`)
5
+ * with the caller-supplied initial data (`I`). The `filePath` points to the
6
+ * module whose default export will be invoked by the {@link ../runner.ts!runner}.
7
+ * @template I - Shape of the additional data the caller provides
8
+ * (e.g. {@link ../../page-analysis-worker.ts!PageAnalysisWorkerData}).
9
+ * @see {@link ../run-in-worker.ts!runInWorker} for where this payload is constructed
10
+ * @see {@link ../runner.ts!runner} for where it is consumed
11
+ */
12
+ export type WorkerData<I extends Record<string, unknown>> = {
13
+ /**
14
+ * Absolute path to the compiled JS module to `import()` and execute.
15
+ * The module must export a default function matching the
16
+ * `(data, emitter, num, total) => Promise<R>` signature.
17
+ */
18
+ filePath: string;
19
+
20
+ /** Zero-based index of the current item in the batch (for progress tracking). */
21
+ num: number;
22
+
23
+ /** Total number of items in the batch (for progress tracking). */
24
+ total: number;
25
+ } & I;
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Worker thread entry point for plugin execution.
3
+ *
4
+ * This module runs inside a `new Worker(...)` created by {@link ./run-in-worker.ts!runInWorker}.
5
+ * It uses a message-based protocol to communicate with the main thread:
6
+ *
7
+ * ## Message protocol (Worker -> Main)
8
+ *
9
+ * | `type` | Payload | Description |
10
+ * |------------|------------------|------------------------------------------------|
11
+ * | `'url'` | `{ url: string }` | A URL was discovered during analysis |
12
+ * | `'finish'` | `{ result: R }` | Analysis complete, carries the plugin result |
13
+ *
14
+ * ## Lifecycle
15
+ *
16
+ * 1. Reads `workerData` containing the module path + plugin data
17
+ * 2. Creates a local {@link ../url-event-bus.ts!UrlEventBus} that forwards `'url'`
18
+ * events to the main thread via `parentPort.postMessage`
19
+ * 3. Delegates to {@link ./runner.ts!runner} for dynamic import and execution
20
+ * 4. Posts the `'finish'` message with the result
21
+ *
22
+ * The main thread terminates this Worker after receiving `'finish'`.
23
+ * @see {@link ./run-in-worker.ts!runInWorker} for the main-thread counterpart
24
+ * @see {@link ./runner.ts!runner} for the actual module loading logic
25
+ * @module
26
+ */
27
+
28
+ import type { WorkerData } from './types.js';
29
+
30
+ import { parentPort, workerData } from 'node:worker_threads';
31
+
32
+ import { UrlEventBus } from '../url-event-bus.js';
33
+
34
+ import { runner } from './runner.js';
35
+
36
+ const data: WorkerData<Record<string, unknown>> = workerData;
37
+
38
+ const emitter = new UrlEventBus();
39
+
40
+ /**
41
+ * Forward URL discovery events from the plugin to the main thread.
42
+ * The main thread's {@link ../url-event-bus.ts!UrlEventBus} re-emits these
43
+ * so that the orchestrator can track discovered URLs.
44
+ */
45
+ emitter.on('url', (url) => {
46
+ if (!parentPort) {
47
+ throw new Error('Use in worker thread');
48
+ }
49
+ parentPort.postMessage({
50
+ type: 'url',
51
+ url,
52
+ });
53
+ });
54
+
55
+ const result = await runner(data, emitter);
56
+
57
+ if (!parentPort) {
58
+ throw new Error('Use in worker thread');
59
+ }
60
+
61
+ parentPort.postMessage({
62
+ type: 'finish',
63
+ result,
64
+ });
package/tsconfig.json ADDED
@@ -0,0 +1,11 @@
1
+ {
2
+ "extends": "../../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "composite": true,
5
+ "outDir": "./lib",
6
+ "rootDir": "./src"
7
+ },
8
+ "references": [{ "path": "../types" }, { "path": "../crawler" }],
9
+ "include": ["./src/**/*"],
10
+ "exclude": ["node_modules", "lib", "./src/**/*.spec.ts"]
11
+ }