@mdvp/cli 1.9.1 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.mjs +29 -2
- package/package.json +1 -1
package/cli.mjs
CHANGED
|
@@ -299,6 +299,9 @@ ${BOLD}Account${R}
|
|
|
299
299
|
submit <domain> --local Submit to local crawler node instead
|
|
300
300
|
|
|
301
301
|
${BOLD}Crawler${R}
|
|
302
|
+
recrawl Re-queue existing sites for recrawl (updates data)
|
|
303
|
+
recrawl linear.app Re-queue specific sites
|
|
304
|
+
recrawl --limit=100 Re-queue oldest N sites
|
|
302
305
|
hire Become a crawler node (downloads + runs worker)
|
|
303
306
|
hire --daemon Run crawler in background
|
|
304
307
|
hire --tabs=4 Run with 4 parallel tabs
|
|
@@ -357,8 +360,15 @@ async function cmdHire(opts) {
|
|
|
357
360
|
await download(extractUrl, `${dir}/extract.js`)
|
|
358
361
|
.catch(() => {})
|
|
359
362
|
|
|
360
|
-
|
|
361
|
-
|
|
363
|
+
writeFileSync(`${dir}/package.json`, '{"type":"module","dependencies":{"puppeteer":"*"}}')
|
|
364
|
+
|
|
365
|
+
const needsInstall = !existsSync(`${dir}/node_modules/puppeteer`)
|
|
366
|
+
if (needsInstall) {
|
|
367
|
+
process.stderr.write(`${DIM}Installing puppeteer (first run, ~30s)...${R}\n`)
|
|
368
|
+
await new Promise((res, rej) => {
|
|
369
|
+
const child = spawn("npm", ["install", "--prefer-offline"], { cwd: dir, stdio: "inherit" })
|
|
370
|
+
child.on("exit", (code) => code === 0 ? res() : rej(new Error(`npm install failed (${code})`)))
|
|
371
|
+
})
|
|
362
372
|
}
|
|
363
373
|
|
|
364
374
|
const apiUrl = local ? "http://localhost:7227" : API
|
|
@@ -391,6 +401,20 @@ async function cmdHire(opts) {
|
|
|
391
401
|
}
|
|
392
402
|
}
|
|
393
403
|
|
|
404
|
+
async function cmdRecrawl(opts, domains) {
|
|
405
|
+
const { apiKey } = opts
|
|
406
|
+
if (!apiKey) { console.error(`${RED}No API key. Run: npx @mdvp/cli login${R}`); process.exit(1) }
|
|
407
|
+
|
|
408
|
+
const limit = opts.limit || 50
|
|
409
|
+
const body = domains.length > 0 ? { domains } : { limit }
|
|
410
|
+
process.stderr.write(`${DIM}queuing ${domains.length > 0 ? domains.join(', ') : `up to ${limit} oldest`} for recrawl...${R}\n`)
|
|
411
|
+
|
|
412
|
+
const d = await apiPost("/crawl/recrawl", body, apiKey)
|
|
413
|
+
if (d.error) { console.error(`${RED}Error: ${d.error}${R}`); process.exit(1) }
|
|
414
|
+
console.log(`\n ${BOLD}Queued!${R} ${d.queued} sites scheduled for recrawl.`)
|
|
415
|
+
console.log(` ${DIM}Run: npx @mdvp/cli hire --tabs=4 to start crawling${R}\n`)
|
|
416
|
+
}
|
|
417
|
+
|
|
394
418
|
async function cmdSubmit(domain, opts) {
|
|
395
419
|
const { apiKey, local } = opts
|
|
396
420
|
domain = parseDomain(domain)
|
|
@@ -452,6 +476,9 @@ async function main() {
|
|
|
452
476
|
const d = await apiGet("/dataset/stats")
|
|
453
477
|
if (opts.json) { console.log(JSON.stringify(d, null, 2)); return }
|
|
454
478
|
console.log(`\n Total sites: ${d.totalSites}\n Average score: ${d.averageScore}\n`)
|
|
479
|
+
} else if (cmd === "recrawl") {
|
|
480
|
+
const domains = positional.slice(1) // optional: npx @mdvp/cli recrawl linear.app vercel.com
|
|
481
|
+
await cmdRecrawl({ ...opts, limit: parseInt(flagValues.limit || "50") || 50 }, domains)
|
|
455
482
|
} else if (cmd === "hire" || cmd === "apply" || cmd === "serve") {
|
|
456
483
|
await cmdHire(opts)
|
|
457
484
|
} else if (cmd === "label" && arg1) {
|