@nitpicker/cli 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/LICENSE +191 -0
- package/README.md +21 -0
- package/bin/nitpicker.js +3 -0
- package/lib/analyze/log.d.ts +12 -0
- package/lib/analyze/log.js +24 -0
- package/lib/analyze/logger.d.ts +10 -0
- package/lib/analyze/logger.js +16 -0
- package/lib/audit.d.ts +7 -0
- package/lib/audit.js +42 -0
- package/lib/bootstrap.d.ts +9 -0
- package/lib/bootstrap.js +22 -0
- package/lib/commands/analyze.d.ts +35 -0
- package/lib/commands/analyze.js +86 -0
- package/lib/commands/crawl.d.ts +125 -0
- package/lib/commands/crawl.js +249 -0
- package/lib/commands/report.d.ts +45 -0
- package/lib/commands/report.js +62 -0
- package/lib/crawl/debug.d.ts +6 -0
- package/lib/crawl/debug.js +10 -0
- package/lib/crawl/diff.d.ts +10 -0
- package/lib/crawl/diff.js +38 -0
- package/lib/crawl/event-assignments.d.ts +17 -0
- package/lib/crawl/event-assignments.js +33 -0
- package/lib/index.d.ts +1 -0
- package/lib/index.js +28 -0
- package/lib/log.d.ts +8 -0
- package/lib/log.js +45 -0
- package/lib/logger.d.ts +10 -0
- package/lib/logger.js +16 -0
- package/package.json +45 -0
- package/src/analyze/log.spec.ts +88 -0
- package/src/analyze/log.ts +28 -0
- package/src/commands/analyze.ts +107 -0
- package/src/commands/crawl.ts +303 -0
- package/src/commands/report.ts +73 -0
- package/src/crawl/debug.ts +14 -0
- package/src/crawl/diff.ts +48 -0
- package/src/crawl/event-assignments.ts +48 -0
- package/src/index.ts +32 -0
- package/tsconfig.json +15 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { readList } from '@d-zero/readtext/list';
|
|
3
|
+
import { CrawlerOrchestrator } from '@nitpicker/crawler';
|
|
4
|
+
import { log, verbosely } from '../crawl/debug.js';
|
|
5
|
+
import { diff } from '../crawl/diff.js';
|
|
6
|
+
import { eventAssignments } from '../crawl/event-assignments.js';
|
|
7
|
+
/**
|
|
8
|
+
* Command definition for the `crawl` sub-command.
|
|
9
|
+
* Defines all CLI flags with their types, defaults, and descriptions.
|
|
10
|
+
* @see {@link crawl} for the main entry point that dispatches to startCrawl/resumeCrawl/diff
|
|
11
|
+
*/
|
|
12
|
+
export const commandDef = {
|
|
13
|
+
desc: 'Crawl a website',
|
|
14
|
+
flags: {
|
|
15
|
+
resume: {
|
|
16
|
+
type: 'string',
|
|
17
|
+
shortFlag: 'R',
|
|
18
|
+
desc: 'Resume crawling from a stub file',
|
|
19
|
+
},
|
|
20
|
+
interval: {
|
|
21
|
+
type: 'number',
|
|
22
|
+
shortFlag: 'I',
|
|
23
|
+
desc: 'An interval time on request when crawles',
|
|
24
|
+
},
|
|
25
|
+
image: {
|
|
26
|
+
type: 'boolean',
|
|
27
|
+
default: true,
|
|
28
|
+
desc: 'Getting images (use --no-image to disable)',
|
|
29
|
+
},
|
|
30
|
+
fetchExternal: {
|
|
31
|
+
type: 'boolean',
|
|
32
|
+
default: true,
|
|
33
|
+
desc: 'Fetch external links (use --no-fetch-external to disable)',
|
|
34
|
+
},
|
|
35
|
+
parallels: {
|
|
36
|
+
type: 'number',
|
|
37
|
+
shortFlag: 'P',
|
|
38
|
+
desc: 'Number of parallel scraping',
|
|
39
|
+
},
|
|
40
|
+
recursive: {
|
|
41
|
+
type: 'boolean',
|
|
42
|
+
default: true,
|
|
43
|
+
desc: 'Recursive crawling (use --no-recursive to disable)',
|
|
44
|
+
},
|
|
45
|
+
scope: {
|
|
46
|
+
type: 'string',
|
|
47
|
+
desc: 'Set hosts and URLs as scope',
|
|
48
|
+
},
|
|
49
|
+
exclude: {
|
|
50
|
+
type: 'string',
|
|
51
|
+
isMultiple: true,
|
|
52
|
+
desc: 'Excluding page URL path (glob pattern)',
|
|
53
|
+
},
|
|
54
|
+
excludeKeyword: {
|
|
55
|
+
type: 'string',
|
|
56
|
+
isMultiple: true,
|
|
57
|
+
desc: 'Exclude keyword in document of page',
|
|
58
|
+
},
|
|
59
|
+
excludeUrl: {
|
|
60
|
+
type: 'string',
|
|
61
|
+
isMultiple: true,
|
|
62
|
+
desc: 'Exclude external URL prefix',
|
|
63
|
+
},
|
|
64
|
+
disableQueries: {
|
|
65
|
+
type: 'boolean',
|
|
66
|
+
shortFlag: 'Q',
|
|
67
|
+
desc: 'Disable queries that the URL has',
|
|
68
|
+
},
|
|
69
|
+
imageFileSizeThreshold: {
|
|
70
|
+
type: 'number',
|
|
71
|
+
desc: 'Image file size threshold',
|
|
72
|
+
},
|
|
73
|
+
single: {
|
|
74
|
+
type: 'boolean',
|
|
75
|
+
desc: 'Single page mode',
|
|
76
|
+
},
|
|
77
|
+
maxExcludedDepth: {
|
|
78
|
+
type: 'number',
|
|
79
|
+
desc: 'Avoid crawling depths above a set number',
|
|
80
|
+
},
|
|
81
|
+
retry: {
|
|
82
|
+
type: 'number',
|
|
83
|
+
default: 3,
|
|
84
|
+
desc: 'Number of retry attempts per URL on scrape failure',
|
|
85
|
+
},
|
|
86
|
+
list: {
|
|
87
|
+
type: 'string',
|
|
88
|
+
isMultiple: true,
|
|
89
|
+
desc: 'Running only each page from the list',
|
|
90
|
+
},
|
|
91
|
+
listFile: {
|
|
92
|
+
type: 'string',
|
|
93
|
+
desc: 'Running only each page from the list file',
|
|
94
|
+
},
|
|
95
|
+
userAgent: {
|
|
96
|
+
type: 'string',
|
|
97
|
+
desc: 'Custom User-Agent string for HTTP requests',
|
|
98
|
+
},
|
|
99
|
+
ignoreRobots: {
|
|
100
|
+
type: 'boolean',
|
|
101
|
+
desc: 'Ignore robots.txt restrictions (use responsibly)',
|
|
102
|
+
},
|
|
103
|
+
verbose: {
|
|
104
|
+
type: 'boolean',
|
|
105
|
+
desc: 'Output verbose log to standard out',
|
|
106
|
+
},
|
|
107
|
+
silent: {
|
|
108
|
+
type: 'boolean',
|
|
109
|
+
desc: 'No output log to standard out',
|
|
110
|
+
},
|
|
111
|
+
diff: {
|
|
112
|
+
type: 'boolean',
|
|
113
|
+
desc: 'Diff mode',
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
/**
|
|
118
|
+
* Sets up signal handlers for graceful shutdown and starts event logging.
|
|
119
|
+
*
|
|
120
|
+
* Registers SIGINT/SIGBREAK/SIGHUP/SIGABRT handlers that kill zombie
|
|
121
|
+
* Chromium processes before exiting, then delegates to {@link eventAssignments}
|
|
122
|
+
* for progress output.
|
|
123
|
+
* @param trigger - Display label for the crawl (URL or stub file path)
|
|
124
|
+
* @param orchestrator - The initialized CrawlerOrchestrator instance
|
|
125
|
+
* @param config - The resolved archive configuration
|
|
126
|
+
* @param logType - Output verbosity level
|
|
127
|
+
*/
|
|
128
|
+
function run(trigger, orchestrator, config, logType) {
|
|
129
|
+
const killed = () => {
|
|
130
|
+
orchestrator.garbageCollect();
|
|
131
|
+
process.exit();
|
|
132
|
+
};
|
|
133
|
+
process.on('SIGINT', killed);
|
|
134
|
+
process.on('SIGBREAK', killed);
|
|
135
|
+
process.on('SIGHUP', killed);
|
|
136
|
+
process.on('SIGABRT', killed);
|
|
137
|
+
const head = [
|
|
138
|
+
`🐳 ${trigger} (New scraping)`,
|
|
139
|
+
...Object.entries(config).map(([key, value]) => ` ${key}: ${value}`),
|
|
140
|
+
];
|
|
141
|
+
return eventAssignments(orchestrator, head, logType);
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Starts a fresh crawl session for the given URLs.
|
|
145
|
+
*
|
|
146
|
+
* Creates a CrawlerOrchestrator, runs the crawl, writes the archive,
|
|
147
|
+
* and cleans up browser processes. Exits with code 1 if errors occurred.
|
|
148
|
+
* @param siteUrl - One or more root URLs to crawl
|
|
149
|
+
* @param flags - Parsed CLI flags from the `crawl` command
|
|
150
|
+
*/
|
|
151
|
+
async function startCrawl(siteUrl, flags) {
|
|
152
|
+
const errStack = [];
|
|
153
|
+
const isList = !!flags.list?.length;
|
|
154
|
+
const orchestrator = await CrawlerOrchestrator.crawling(siteUrl, {
|
|
155
|
+
...flags,
|
|
156
|
+
scope: flags.scope?.split(',').map((s) => s.trim()),
|
|
157
|
+
list: isList,
|
|
158
|
+
recursive: isList ? false : flags.recursive,
|
|
159
|
+
}, (orchestrator, config) => {
|
|
160
|
+
run(config.baseUrl, orchestrator, config, flags.verbose ? 'verbose' : flags.silent ? 'silent' : 'normal').catch((error) => errStack.push(error));
|
|
161
|
+
});
|
|
162
|
+
await orchestrator.write();
|
|
163
|
+
orchestrator.garbageCollect();
|
|
164
|
+
if (errStack.length > 0) {
|
|
165
|
+
formatCrawlErrors(errStack);
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Resumes a previously interrupted crawl from a stub file (temporary directory).
|
|
171
|
+
*
|
|
172
|
+
* Restores the crawl state from the archive, applies any flag overrides,
|
|
173
|
+
* and continues crawling from where the previous session left off.
|
|
174
|
+
* @param stubFilePath - Path to the stub file or temporary directory to resume from
|
|
175
|
+
* @param flags - Parsed CLI flags from the `crawl` command
|
|
176
|
+
*/
|
|
177
|
+
async function resumeCrawl(stubFilePath, flags) {
|
|
178
|
+
const errStack = [];
|
|
179
|
+
const absFilePath = path.isAbsolute(stubFilePath)
|
|
180
|
+
? stubFilePath
|
|
181
|
+
: path.resolve(process.cwd(), stubFilePath);
|
|
182
|
+
const orchestrator = await CrawlerOrchestrator.resume(absFilePath, {
|
|
183
|
+
...flags,
|
|
184
|
+
scope: flags.scope?.split(',').map((s) => s.trim()),
|
|
185
|
+
list: false,
|
|
186
|
+
}, (orchestrator, config) => {
|
|
187
|
+
run(stubFilePath, orchestrator, config, flags.verbose ? 'verbose' : flags.silent ? 'silent' : 'normal').catch((error) => errStack.push(error));
|
|
188
|
+
});
|
|
189
|
+
await orchestrator.write();
|
|
190
|
+
orchestrator.garbageCollect();
|
|
191
|
+
if (errStack.length > 0) {
|
|
192
|
+
formatCrawlErrors(errStack);
|
|
193
|
+
process.exit(1);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Main entry point for the `crawl` CLI command.
|
|
198
|
+
*
|
|
199
|
+
* Dispatches to one of four modes based on the flags:
|
|
200
|
+
* 1. `--diff` mode: Compares two archive files and outputs URL lists
|
|
201
|
+
* 2. `--resume` mode: Resumes a previously interrupted crawl
|
|
202
|
+
* 3. `--list-file` / `--list` mode: Crawls a pre-defined URL list (non-recursive)
|
|
203
|
+
* 4. Default mode: Crawls from a single root URL
|
|
204
|
+
* @param args - Positional arguments (typically one or two URLs/file paths)
|
|
205
|
+
* @param flags - Parsed CLI flags from the `crawl` command
|
|
206
|
+
*/
|
|
207
|
+
export async function crawl(args, flags) {
|
|
208
|
+
if (flags.verbose && !flags.silent) {
|
|
209
|
+
verbosely();
|
|
210
|
+
}
|
|
211
|
+
log('Options: %O', flags);
|
|
212
|
+
if (flags.diff) {
|
|
213
|
+
const a = args[0];
|
|
214
|
+
const b = args[1];
|
|
215
|
+
if (!a || !b) {
|
|
216
|
+
throw new Error('Please provide two file paths to compare');
|
|
217
|
+
}
|
|
218
|
+
await diff(a, b);
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
if (flags.resume) {
|
|
222
|
+
await resumeCrawl(flags.resume, flags);
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
225
|
+
if (flags.listFile) {
|
|
226
|
+
const list = await readList(path.resolve(process.cwd(), flags.listFile));
|
|
227
|
+
flags.list = list;
|
|
228
|
+
await startCrawl(list, flags);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
if (flags.list && flags.list.length > 0) {
|
|
232
|
+
const pageList = [...flags.list, ...args];
|
|
233
|
+
await startCrawl(pageList, flags);
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
const siteUrl = args[0];
|
|
237
|
+
if (siteUrl) {
|
|
238
|
+
await startCrawl([siteUrl], flags);
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Prints a summary of errors that occurred during crawling to stderr.
|
|
244
|
+
* @param errStack - Array of errors collected during the crawl session
|
|
245
|
+
*/
|
|
246
|
+
function formatCrawlErrors(errStack) {
|
|
247
|
+
// eslint-disable-next-line no-console
|
|
248
|
+
console.error(`\nCompleted with ${errStack.length} error(s).`);
|
|
249
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { InferFlags } from '@d-zero/roar';
|
|
2
|
+
/**
|
|
3
|
+
* Command definition for the `report` sub-command.
|
|
4
|
+
* @see {@link report} for the main entry point
|
|
5
|
+
*/
|
|
6
|
+
export declare const commandDef: {
|
|
7
|
+
readonly desc: "Generate a Google Sheets report";
|
|
8
|
+
readonly flags: {
|
|
9
|
+
readonly sheet: {
|
|
10
|
+
readonly shortFlag: "S";
|
|
11
|
+
readonly type: "string";
|
|
12
|
+
readonly isRequired: true;
|
|
13
|
+
readonly desc: "Google Sheets URL";
|
|
14
|
+
};
|
|
15
|
+
readonly credentials: {
|
|
16
|
+
readonly shortFlag: "C";
|
|
17
|
+
readonly type: "string";
|
|
18
|
+
readonly default: "./credentials.json";
|
|
19
|
+
readonly desc: "Path to credentials file (keep this file secure and out of version control)";
|
|
20
|
+
};
|
|
21
|
+
readonly config: {
|
|
22
|
+
readonly shortFlag: "c";
|
|
23
|
+
readonly type: "string";
|
|
24
|
+
readonly desc: "Path to config file";
|
|
25
|
+
};
|
|
26
|
+
readonly limit: {
|
|
27
|
+
readonly shortFlag: "l";
|
|
28
|
+
readonly type: "number";
|
|
29
|
+
readonly default: 100000;
|
|
30
|
+
readonly desc: "Limit number of rows";
|
|
31
|
+
};
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
type ReportFlags = InferFlags<typeof commandDef.flags>;
|
|
35
|
+
/**
|
|
36
|
+
* Main entry point for the `report` CLI command.
|
|
37
|
+
*
|
|
38
|
+
* Reads a `.nitpicker` archive and generates a Google Sheets report
|
|
39
|
+
* by delegating to `@nitpicker/report-google-sheets`. Requires a Google
|
|
40
|
+
* Sheets URL and a service account credentials file.
|
|
41
|
+
* @param args - Positional arguments; first argument is the `.nitpicker` file path
|
|
42
|
+
* @param flags - Parsed CLI flags from the `report` command
|
|
43
|
+
*/
|
|
44
|
+
export declare function report(args: string[], flags: ReportFlags): Promise<void>;
|
|
45
|
+
export {};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { report as runReport } from '@nitpicker/report-google-sheets';
|
|
2
|
+
/**
|
|
3
|
+
* Command definition for the `report` sub-command.
|
|
4
|
+
* @see {@link report} for the main entry point
|
|
5
|
+
*/
|
|
6
|
+
export const commandDef = {
|
|
7
|
+
desc: 'Generate a Google Sheets report',
|
|
8
|
+
flags: {
|
|
9
|
+
sheet: {
|
|
10
|
+
shortFlag: 'S',
|
|
11
|
+
type: 'string',
|
|
12
|
+
isRequired: true,
|
|
13
|
+
desc: 'Google Sheets URL',
|
|
14
|
+
},
|
|
15
|
+
credentials: {
|
|
16
|
+
shortFlag: 'C',
|
|
17
|
+
type: 'string',
|
|
18
|
+
default: './credentials.json',
|
|
19
|
+
desc: 'Path to credentials file (keep this file secure and out of version control)',
|
|
20
|
+
},
|
|
21
|
+
config: {
|
|
22
|
+
shortFlag: 'c',
|
|
23
|
+
type: 'string',
|
|
24
|
+
desc: 'Path to config file',
|
|
25
|
+
},
|
|
26
|
+
limit: {
|
|
27
|
+
shortFlag: 'l',
|
|
28
|
+
type: 'number',
|
|
29
|
+
default: 100_000,
|
|
30
|
+
desc: 'Limit number of rows',
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
/**
|
|
35
|
+
* Main entry point for the `report` CLI command.
|
|
36
|
+
*
|
|
37
|
+
* Reads a `.nitpicker` archive and generates a Google Sheets report
|
|
38
|
+
* by delegating to `@nitpicker/report-google-sheets`. Requires a Google
|
|
39
|
+
* Sheets URL and a service account credentials file.
|
|
40
|
+
* @param args - Positional arguments; first argument is the `.nitpicker` file path
|
|
41
|
+
* @param flags - Parsed CLI flags from the `report` command
|
|
42
|
+
*/
|
|
43
|
+
export async function report(args, flags) {
|
|
44
|
+
const filePath = args[0];
|
|
45
|
+
if (!filePath) {
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
const sheetUrl = flags.sheet;
|
|
49
|
+
if (!sheetUrl) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
const credentialFilePath = flags.credentials;
|
|
53
|
+
const configFilePath = flags.config || null;
|
|
54
|
+
const limit = flags.limit;
|
|
55
|
+
await runReport({
|
|
56
|
+
filePath,
|
|
57
|
+
sheetUrl,
|
|
58
|
+
credentialFilePath,
|
|
59
|
+
configPath: configFilePath,
|
|
60
|
+
limit,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import debug from 'debug';
|
|
2
|
+
export const log = debug('Nitpicker').extend('CLI');
|
|
3
|
+
/**
|
|
4
|
+
*
|
|
5
|
+
*/
|
|
6
|
+
export function verbosely() {
|
|
7
|
+
if (!debug.enabled('Nitpicker')) {
|
|
8
|
+
debug.enable('Nitpicker*,-Nitpicker:Crawler:Deal,-Nitpicker:Scraper:DOM:Details:*,-Nitpicker:Scraper:Resource:*');
|
|
9
|
+
}
|
|
10
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compares two `.nitpicker` archives and writes their URL lists to `a.txt` and `b.txt`.
|
|
3
|
+
*
|
|
4
|
+
* Extracts active internal HTML pages (2xx/3xx status) from both archives,
|
|
5
|
+
* sorts them in natural URL order, and writes to the current working directory.
|
|
6
|
+
* The output files can then be compared using standard diff tools.
|
|
7
|
+
* @param a - File path to the first `.nitpicker` archive
|
|
8
|
+
* @param b - File path to the second `.nitpicker` archive
|
|
9
|
+
*/
|
|
10
|
+
export declare function diff(a: string, b: string): Promise<void>;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import { sortUrl } from '@d-zero/shared/sort-url';
|
|
3
|
+
import { Archive } from '@nitpicker/crawler';
|
|
4
|
+
/**
|
|
5
|
+
* Compares two `.nitpicker` archives and writes their URL lists to `a.txt` and `b.txt`.
|
|
6
|
+
*
|
|
7
|
+
* Extracts active internal HTML pages (2xx/3xx status) from both archives,
|
|
8
|
+
* sorts them in natural URL order, and writes to the current working directory.
|
|
9
|
+
* The output files can then be compared using standard diff tools.
|
|
10
|
+
* @param a - File path to the first `.nitpicker` archive
|
|
11
|
+
* @param b - File path to the second `.nitpicker` archive
|
|
12
|
+
*/
|
|
13
|
+
export async function diff(a, b) {
|
|
14
|
+
const archiveA = await Archive.open({ filePath: a });
|
|
15
|
+
const archiveB = await Archive.open({ filePath: b });
|
|
16
|
+
const pagesA = await archiveA.getPages();
|
|
17
|
+
const pagesB = await archiveB.getPages();
|
|
18
|
+
const listA = pagesA.filter(isActive).map((page) => page.url.withoutHashAndAuth);
|
|
19
|
+
const listB = pagesB.filter(isActive).map((page) => page.url.withoutHashAndAuth);
|
|
20
|
+
const sortedA = sortUrl(listA).map((url) => url.withoutHashAndAuth);
|
|
21
|
+
const sortedB = sortUrl(listB).map((url) => url.withoutHashAndAuth);
|
|
22
|
+
await fs.writeFile('a.txt', sortedA.join('\n'), 'utf8');
|
|
23
|
+
await fs.writeFile('b.txt', sortedB.join('\n'), 'utf8');
|
|
24
|
+
await archiveA.close();
|
|
25
|
+
await archiveB.close();
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Filters for active internal HTML pages (status 200-399, non-external).
|
|
29
|
+
* @param page - The page to check
|
|
30
|
+
* @returns `true` if the page is an active internal HTML page
|
|
31
|
+
*/
|
|
32
|
+
function isActive(page) {
|
|
33
|
+
return (page.isPage() &&
|
|
34
|
+
!page.isExternal &&
|
|
35
|
+
page.status &&
|
|
36
|
+
page.status >= 200 &&
|
|
37
|
+
page.status < 400);
|
|
38
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { CrawlerOrchestrator } from '@nitpicker/crawler';
|
|
2
|
+
type LogType = 'verbose' | 'normal' | 'silent';
|
|
3
|
+
/**
|
|
4
|
+
* Registers event listeners on the CrawlerOrchestrator for CLI progress display.
|
|
5
|
+
*
|
|
6
|
+
* Outputs the initial configuration summary to stderr, then listens for
|
|
7
|
+
* `error`, `writeFileStart`, and `writeFileEnd` events. Returns a Promise
|
|
8
|
+
* that resolves when the archive file has been written or rejects on error.
|
|
9
|
+
*
|
|
10
|
+
* WHY stderr: The crawl progress output is informational and should not
|
|
11
|
+
* interfere with stdout, which may be piped to other tools.
|
|
12
|
+
* @param orchestrator - The CrawlerOrchestrator to listen on
|
|
13
|
+
* @param initialLog - Lines to display at the start (URL + config summary)
|
|
14
|
+
* @param logType - Verbosity level; `'silent'` suppresses all output
|
|
15
|
+
*/
|
|
16
|
+
export declare function eventAssignments(orchestrator: CrawlerOrchestrator, initialLog: string[], logType: LogType): Promise<void>;
|
|
17
|
+
export {};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import c from 'ansi-colors';
|
|
2
|
+
/**
|
|
3
|
+
* Registers event listeners on the CrawlerOrchestrator for CLI progress display.
|
|
4
|
+
*
|
|
5
|
+
* Outputs the initial configuration summary to stderr, then listens for
|
|
6
|
+
* `error`, `writeFileStart`, and `writeFileEnd` events. Returns a Promise
|
|
7
|
+
* that resolves when the archive file has been written or rejects on error.
|
|
8
|
+
*
|
|
9
|
+
* WHY stderr: The crawl progress output is informational and should not
|
|
10
|
+
* interfere with stdout, which may be piped to other tools.
|
|
11
|
+
* @param orchestrator - The CrawlerOrchestrator to listen on
|
|
12
|
+
* @param initialLog - Lines to display at the start (URL + config summary)
|
|
13
|
+
* @param logType - Verbosity level; `'silent'` suppresses all output
|
|
14
|
+
*/
|
|
15
|
+
export async function eventAssignments(orchestrator, initialLog, logType) {
|
|
16
|
+
if (logType === 'silent') {
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
return new Promise((resolve, reject) => {
|
|
20
|
+
const [firstLine, ...restLines] = initialLog;
|
|
21
|
+
process.stderr.write([c.bold(firstLine ?? ''), ...restLines.map((l) => c.dim(l))].join('\n') + '\n');
|
|
22
|
+
orchestrator.on('error', (error) => {
|
|
23
|
+
reject(error);
|
|
24
|
+
});
|
|
25
|
+
orchestrator.on('writeFileStart', ({ filePath }) => {
|
|
26
|
+
process.stderr.write(`📥 Writing to: ${c.cyan(filePath)}\n`);
|
|
27
|
+
});
|
|
28
|
+
orchestrator.on('writeFileEnd', ({ filePath }) => {
|
|
29
|
+
process.stderr.write(`📥 Done: ${c.cyan(filePath)}\n`);
|
|
30
|
+
resolve();
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
}
|
package/lib/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/lib/index.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { parseCli } from '@d-zero/roar';
|
|
2
|
+
import { analyze, commandDef as analyzeDef } from './commands/analyze.js';
|
|
3
|
+
import { crawl, commandDef as crawlDef } from './commands/crawl.js';
|
|
4
|
+
import { report, commandDef as reportDef } from './commands/report.js';
|
|
5
|
+
process.title = 'Nitpicker CLI';
|
|
6
|
+
const cli = parseCli({
|
|
7
|
+
name: 'nitpicker',
|
|
8
|
+
commands: {
|
|
9
|
+
crawl: crawlDef,
|
|
10
|
+
analyze: analyzeDef,
|
|
11
|
+
report: reportDef,
|
|
12
|
+
},
|
|
13
|
+
onError: () => true,
|
|
14
|
+
});
|
|
15
|
+
switch (cli.command) {
|
|
16
|
+
case 'crawl': {
|
|
17
|
+
await crawl(cli.args, cli.flags);
|
|
18
|
+
break;
|
|
19
|
+
}
|
|
20
|
+
case 'analyze': {
|
|
21
|
+
await analyze(cli.args, cli.flags);
|
|
22
|
+
break;
|
|
23
|
+
}
|
|
24
|
+
case 'report': {
|
|
25
|
+
await report(cli.args, cli.flags);
|
|
26
|
+
break;
|
|
27
|
+
}
|
|
28
|
+
}
|
package/lib/log.d.ts
ADDED
package/lib/log.js
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import Logger from './logger.js';
|
|
2
|
+
/**
|
|
3
|
+
*
|
|
4
|
+
* @param nitpicker
|
|
5
|
+
* @param startLog
|
|
6
|
+
* @param verbose
|
|
7
|
+
*/
|
|
8
|
+
export async function log(nitpicker, startLog, verbose) {
|
|
9
|
+
return new Promise((_, reject) => {
|
|
10
|
+
const plugins = new Map();
|
|
11
|
+
const logger = new Logger({
|
|
12
|
+
dots: ['. ', '.. ', '...'],
|
|
13
|
+
propeller: ['\\', '|', '/', '-'],
|
|
14
|
+
});
|
|
15
|
+
if (verbose) {
|
|
16
|
+
logger.verbosely();
|
|
17
|
+
}
|
|
18
|
+
if (startLog) {
|
|
19
|
+
logger.write(startLog);
|
|
20
|
+
logger.close();
|
|
21
|
+
}
|
|
22
|
+
nitpicker.on('error', (err) => {
|
|
23
|
+
const error = err.error || new Error(err.message);
|
|
24
|
+
reject(error);
|
|
25
|
+
});
|
|
26
|
+
nitpicker.on('audit', (plugin) => {
|
|
27
|
+
if (verbose) {
|
|
28
|
+
logger.write(`${plugin.module}: ${plugin.log}`);
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
plugins.set(plugin.module, plugin.log);
|
|
32
|
+
const logs = [...plugins].map(([mod, log]) => {
|
|
33
|
+
const content = log === 'Finished' ? '✨ Finished' : `%propeller% ${log}`;
|
|
34
|
+
return ` 🔬 ${mod}: ${content}`;
|
|
35
|
+
});
|
|
36
|
+
const heading = ' Audit through%dots%';
|
|
37
|
+
logger.write([heading, ...logs]);
|
|
38
|
+
});
|
|
39
|
+
nitpicker.on('writeFile', ({ filePath }) => {
|
|
40
|
+
logger.close();
|
|
41
|
+
logger.write(` 📥 Write file: ${filePath}`);
|
|
42
|
+
logger.close();
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
}
|
package/lib/logger.d.ts
ADDED
package/lib/logger.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export default class Logger {
|
|
2
|
+
verbose = false;
|
|
3
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
4
|
+
constructor(_options) { }
|
|
5
|
+
verbosely() {
|
|
6
|
+
this.verbose = true;
|
|
7
|
+
}
|
|
8
|
+
write(lines) {
|
|
9
|
+
const output = Array.isArray(lines) ? lines : [lines];
|
|
10
|
+
for (const line of output) {
|
|
11
|
+
const cleaned = line.replaceAll(/%dots%|%propeller%/g, '');
|
|
12
|
+
process.stderr.write(cleaned + '\n');
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
close() { }
|
|
16
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@nitpicker/cli",
|
|
3
|
+
"version": "0.4.2",
|
|
4
|
+
"description": "Web site crawler, analyzer, and reporter CLI",
|
|
5
|
+
"author": "D-ZERO",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/d-zero-dev/nitpicker.git",
|
|
10
|
+
"directory": "packages/@nitpicker/cli"
|
|
11
|
+
},
|
|
12
|
+
"publishConfig": {
|
|
13
|
+
"access": "public"
|
|
14
|
+
},
|
|
15
|
+
"type": "module",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"import": "./lib/index.js",
|
|
19
|
+
"types": "./lib/index.d.ts"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"bin": {
|
|
23
|
+
"nitpicker": "./bin/nitpicker.js"
|
|
24
|
+
},
|
|
25
|
+
"scripts": {
|
|
26
|
+
"build": "tsc",
|
|
27
|
+
"clean": "tsc --build --clean"
|
|
28
|
+
},
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"@d-zero/dealer": "1.6.3",
|
|
31
|
+
"@d-zero/readtext": "1.1.19",
|
|
32
|
+
"@d-zero/roar": "2.0.0",
|
|
33
|
+
"@d-zero/shared": "0.20.0",
|
|
34
|
+
"@nitpicker/core": "0.4.2",
|
|
35
|
+
"@nitpicker/crawler": "0.4.2",
|
|
36
|
+
"@nitpicker/report-google-sheets": "0.4.2",
|
|
37
|
+
"ansi-colors": "4.1.3",
|
|
38
|
+
"debug": "4.4.3",
|
|
39
|
+
"enquirer": "2.4.1"
|
|
40
|
+
},
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@types/debug": "4.1.12"
|
|
43
|
+
},
|
|
44
|
+
"gitHead": "14066d0e7b9e652ddd7a5abb5f20bba682f09c0c"
|
|
45
|
+
}
|