@lenne.tech/cli 1.12.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/build/commands/config/validate.js +36 -2
- package/build/commands/fullstack/init.js +44 -18
- package/build/commands/server/create.js +53 -26
- package/build/commands/tools/crawl.js +307 -0
- package/build/extensions/server.js +72 -43
- package/build/lib/browser-fetcher.js +139 -0
- package/build/lib/crawler.js +661 -0
- package/docs/LT-ECOSYSTEM-GUIDE.md +1 -0
- package/docs/commands.md +57 -1
- package/docs/lt.config.md +37 -0
- package/package.json +8 -1
package/README.md
CHANGED
|
@@ -94,6 +94,37 @@ $ lt fullstack init --name myapp --framework-mode vendor --dry-run --noConfirm
|
|
|
94
94
|
$ lt server create --name myapp --framework-mode vendor
|
|
95
95
|
```
|
|
96
96
|
|
|
97
|
+
### Experimental: `--next` (nest-base)
|
|
98
|
+
|
|
99
|
+
Both `lt fullstack init` and `lt server create` support an experimental
|
|
100
|
+
`--next` flag that swaps the API template from
|
|
101
|
+
[`nest-server-starter`](https://github.com/lenneTech/nest-server-starter)
|
|
102
|
+
(MongoDB) to [`nest-base`](https://github.com/lenneTech/nest-base) — a new
|
|
103
|
+
NestJS stack on **Bun + Prisma 7 + Postgres + Better-Auth** with a built-in
|
|
104
|
+
`/dev` cockpit.
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
# experimental standalone api
|
|
108
|
+
$ lt server create my-next-api --next --noConfirm
|
|
109
|
+
|
|
110
|
+
# experimental fullstack (nuxt + nest-base)
|
|
111
|
+
$ lt fullstack init --name my-next-app --frontend nuxt --next --noConfirm
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
When `--next` is set the CLI:
|
|
115
|
+
|
|
116
|
+
- clones `nest-base` instead of `nest-server-starter`,
|
|
117
|
+
- forces `--api-mode Rest` and `--framework-mode npm` (other modes are not
|
|
118
|
+
applicable to nest-base),
|
|
119
|
+
- skips `nest-server-starter`-specific patching (`config.env.ts`,
|
|
120
|
+
`main.ts` Swagger setup, `meta.json`, `lt.config.json`),
|
|
121
|
+
- skips the workspace install in fullstack mode — run `pnpm install` for
|
|
122
|
+
the frontend and `bun install` for the API yourself.
|
|
123
|
+
|
|
124
|
+
This option is **experimental** and may change. The downstream `lt server
|
|
125
|
+
module/object/addProp/test/permissions` commands target the classic
|
|
126
|
+
`nest-server` layout and are not yet compatible with `nest-base`.
|
|
127
|
+
|
|
97
128
|
### Working on an existing project
|
|
98
129
|
|
|
99
130
|
All `lt server …` commands (module, object, addProp, test, permissions)
|
|
@@ -114,6 +114,22 @@ const KNOWN_KEYS = {
|
|
|
114
114
|
path: 'string',
|
|
115
115
|
},
|
|
116
116
|
},
|
|
117
|
+
tools: {
|
|
118
|
+
crawl: {
|
|
119
|
+
concurrency: 'number',
|
|
120
|
+
depth: 'number|all',
|
|
121
|
+
includeImages: 'boolean',
|
|
122
|
+
includeSitemap: 'boolean',
|
|
123
|
+
maxPages: 'number',
|
|
124
|
+
noConfirm: 'boolean',
|
|
125
|
+
out: 'string',
|
|
126
|
+
prune: 'boolean',
|
|
127
|
+
renderJs: 'boolean',
|
|
128
|
+
selector: 'string',
|
|
129
|
+
timeout: 'number',
|
|
130
|
+
},
|
|
131
|
+
noConfirm: 'boolean',
|
|
132
|
+
},
|
|
117
133
|
typescript: {
|
|
118
134
|
create: { author: 'string', noConfirm: 'boolean', updatePackages: 'boolean' },
|
|
119
135
|
},
|
|
@@ -165,8 +181,26 @@ function validateConfig(config, knownKeys, path = '') {
|
|
|
165
181
|
}
|
|
166
182
|
// Validate type
|
|
167
183
|
if (typeof expectedType === 'string') {
|
|
168
|
-
// Simple type check
|
|
169
|
-
if (expectedType
|
|
184
|
+
// Simple type check. `'a|b'` means union (e.g. "number|all").
|
|
185
|
+
if (expectedType.includes('|')) {
|
|
186
|
+
const tokens = expectedType.split('|').map((t) => t.trim());
|
|
187
|
+
const ok = tokens.some((token) => {
|
|
188
|
+
if (token === 'string')
|
|
189
|
+
return typeof value === 'string';
|
|
190
|
+
if (token === 'number')
|
|
191
|
+
return typeof value === 'number';
|
|
192
|
+
if (token === 'boolean')
|
|
193
|
+
return typeof value === 'boolean';
|
|
194
|
+
if (token === 'array')
|
|
195
|
+
return Array.isArray(value);
|
|
196
|
+
// Everything else is treated as a string literal enum member.
|
|
197
|
+
return value === token;
|
|
198
|
+
});
|
|
199
|
+
if (!ok) {
|
|
200
|
+
result.errors.push(`${currentPath}: expected ${tokens.join(' | ')}, got ${typeof value}`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
else if (expectedType === 'string' && typeof value !== 'string') {
|
|
170
204
|
result.errors.push(`${currentPath}: expected string, got ${typeof value}`);
|
|
171
205
|
}
|
|
172
206
|
else if (expectedType === 'boolean' && typeof value !== 'boolean') {
|
|
@@ -27,7 +27,7 @@ const NewCommand = {
|
|
|
27
27
|
// Info
|
|
28
28
|
info('Create a new fullstack workspace');
|
|
29
29
|
// Hint for non-interactive callers (e.g. Claude Code)
|
|
30
|
-
toolbox.tools.nonInteractiveHint('lt fullstack init --name <name> --frontend <nuxt|angular> --api-mode <Rest|GraphQL|Both> --framework-mode <npm|vendor> [--framework-upstream-branch <ref>] [--dry-run] --noConfirm');
|
|
30
|
+
toolbox.tools.nonInteractiveHint('lt fullstack init --name <name> --frontend <nuxt|angular> --api-mode <Rest|GraphQL|Both> --framework-mode <npm|vendor> [--framework-upstream-branch <ref>] [--next] [--dry-run] --noConfirm');
|
|
31
31
|
// Check git
|
|
32
32
|
if (!(yield git.gitInstalled())) {
|
|
33
33
|
return;
|
|
@@ -46,8 +46,9 @@ const NewCommand = {
|
|
|
46
46
|
const configFrontendLink = (_v = (_u = ltConfig === null || ltConfig === void 0 ? void 0 : ltConfig.commands) === null || _u === void 0 ? void 0 : _u.fullstack) === null || _v === void 0 ? void 0 : _v.frontendLink;
|
|
47
47
|
const configFrameworkMode = (_x = (_w = ltConfig === null || ltConfig === void 0 ? void 0 : ltConfig.commands) === null || _w === void 0 ? void 0 : _w.fullstack) === null || _x === void 0 ? void 0 : _x.frameworkMode;
|
|
48
48
|
// Parse CLI arguments
|
|
49
|
-
const { 'api-branch': cliApiBranch, 'api-copy': cliApiCopy, 'api-link': cliApiLink, 'api-mode': cliApiMode, 'dry-run': cliDryRun, 'framework-mode': cliFrameworkMode, 'framework-upstream-branch': cliFrameworkUpstreamBranch, frontend: cliFrontend, 'frontend-branch': cliFrontendBranch, 'frontend-copy': cliFrontendCopy, 'frontend-framework-mode': cliFrontendFrameworkMode, 'frontend-link': cliFrontendLink, git: cliGit, 'git-link': cliGitLink, name: cliName, } = parameters.options;
|
|
49
|
+
const { 'api-branch': cliApiBranch, 'api-copy': cliApiCopy, 'api-link': cliApiLink, 'api-mode': cliApiMode, 'dry-run': cliDryRun, 'framework-mode': cliFrameworkMode, 'framework-upstream-branch': cliFrameworkUpstreamBranch, frontend: cliFrontend, 'frontend-branch': cliFrontendBranch, 'frontend-copy': cliFrontendCopy, 'frontend-framework-mode': cliFrontendFrameworkMode, 'frontend-link': cliFrontendLink, git: cliGit, 'git-link': cliGitLink, name: cliName, next: cliNext, } = parameters.options;
|
|
50
50
|
const dryRun = cliDryRun === true || cliDryRun === 'true';
|
|
51
|
+
const experimental = cliNext === true || cliNext === 'true';
|
|
51
52
|
const frameworkUpstreamBranch = typeof cliFrameworkUpstreamBranch === 'string' && cliFrameworkUpstreamBranch.length > 0
|
|
52
53
|
? cliFrameworkUpstreamBranch
|
|
53
54
|
: undefined;
|
|
@@ -109,7 +110,11 @@ const NewCommand = {
|
|
|
109
110
|
// Determine API mode with priority: CLI > config > global > interactive (default: Rest)
|
|
110
111
|
const globalApiMode = config.getGlobalDefault(ltConfig, 'apiMode');
|
|
111
112
|
let apiMode;
|
|
112
|
-
if (
|
|
113
|
+
if (experimental) {
|
|
114
|
+
apiMode = 'Rest';
|
|
115
|
+
info('Using experimental nest-base template (Bun + Prisma + Postgres + Better-Auth)');
|
|
116
|
+
}
|
|
117
|
+
else if (cliApiMode) {
|
|
113
118
|
apiMode = cliApiMode;
|
|
114
119
|
}
|
|
115
120
|
else if (configApiMode) {
|
|
@@ -154,7 +159,10 @@ const NewCommand = {
|
|
|
154
159
|
//
|
|
155
160
|
// Default is still 'npm' until the vendoring pilot is fully evaluated.
|
|
156
161
|
let frameworkMode;
|
|
157
|
-
if (
|
|
162
|
+
if (experimental) {
|
|
163
|
+
frameworkMode = 'npm';
|
|
164
|
+
}
|
|
165
|
+
else if (cliFrameworkMode === 'npm' || cliFrameworkMode === 'vendor') {
|
|
158
166
|
frameworkMode = cliFrameworkMode;
|
|
159
167
|
}
|
|
160
168
|
else if (cliFrameworkMode) {
|
|
@@ -291,7 +299,10 @@ const NewCommand = {
|
|
|
291
299
|
info('Would execute:');
|
|
292
300
|
info(` 1. git clone lt-monorepo → ${projectDir}/`);
|
|
293
301
|
info(` 2. setup frontend (${frontend}) → ${projectDir}/projects/app`);
|
|
294
|
-
if (
|
|
302
|
+
if (experimental) {
|
|
303
|
+
info(` 3. clone nest-base (experimental) → ${projectDir}/projects/api`);
|
|
304
|
+
}
|
|
305
|
+
else if (frameworkMode === 'vendor') {
|
|
295
306
|
info(` 3. clone nest-server-starter → ${projectDir}/projects/api`);
|
|
296
307
|
info(` 4. clone @lenne.tech/nest-server${frameworkUpstreamBranch ? ` (branch/tag: ${frameworkUpstreamBranch})` : ''} → /tmp`);
|
|
297
308
|
info(` 5. vendor core/ + flatten-fix + codemod consumer imports`);
|
|
@@ -420,6 +431,7 @@ const NewCommand = {
|
|
|
420
431
|
apiMode,
|
|
421
432
|
branch: apiBranch,
|
|
422
433
|
copyPath: apiCopy,
|
|
434
|
+
experimental,
|
|
423
435
|
frameworkMode,
|
|
424
436
|
frameworkUpstreamBranch,
|
|
425
437
|
linkPath: apiLink,
|
|
@@ -466,15 +478,20 @@ const NewCommand = {
|
|
|
466
478
|
// instead.` and silently disables CVE overrides.
|
|
467
479
|
(0, hoist_workspace_pnpm_config_1.hoistWorkspacePnpmConfig)({ filesystem, projectDir, subProjects: ['projects/api', 'projects/app'] });
|
|
468
480
|
// Install all packages
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
481
|
+
if (!experimental) {
|
|
482
|
+
const installSpinner = spin('Install all packages');
|
|
483
|
+
try {
|
|
484
|
+
const detectedPm = toolbox.pm.detect(projectDir);
|
|
485
|
+
yield system.run(`cd ${projectDir} && ${toolbox.pm.install(detectedPm)} && ${toolbox.pm.run('init', detectedPm)}`);
|
|
486
|
+
installSpinner.succeed('Successfully installed all packages');
|
|
487
|
+
}
|
|
488
|
+
catch (err) {
|
|
489
|
+
installSpinner.fail(`Failed to install packages: ${err.message}`);
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
474
492
|
}
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
return;
|
|
493
|
+
else {
|
|
494
|
+
info('Skipping workspace install — run `bun install` (api) and `pnpm install` (app) manually.');
|
|
478
495
|
}
|
|
479
496
|
// Post-install format pass. processApiMode (run earlier in
|
|
480
497
|
// setupServerForFullstack) and convertAppCloneToVendored rewrite
|
|
@@ -482,10 +499,10 @@ const NewCommand = {
|
|
|
482
499
|
// `pnpm run format:check` (multi-line arrays/imports after region
|
|
483
500
|
// stripping, import-path rewrites that now fit single-line). The
|
|
484
501
|
// formatter is only available after install, so we normalize here.
|
|
485
|
-
if (apiMode && filesystem.isDirectory(`${projectDir}/projects/api`)) {
|
|
502
|
+
if (!experimental && apiMode && filesystem.isDirectory(`${projectDir}/projects/api`)) {
|
|
486
503
|
yield toolbox.apiMode.formatProject(`${projectDir}/projects/api`);
|
|
487
504
|
}
|
|
488
|
-
if (isNuxt && filesystem.isDirectory(`${projectDir}/projects/app`)) {
|
|
505
|
+
if (!experimental && isNuxt && filesystem.isDirectory(`${projectDir}/projects/app`)) {
|
|
489
506
|
yield toolbox.apiMode.formatProject(`${projectDir}/projects/app`);
|
|
490
507
|
}
|
|
491
508
|
// Create initial commit after everything is set up
|
|
@@ -511,9 +528,18 @@ const NewCommand = {
|
|
|
511
528
|
success(`Generated fullstack workspace with ${frontend} in ${projectDir} with ${name} app in ${helper.msToMinutesAndSeconds(timer())}m.`);
|
|
512
529
|
info('');
|
|
513
530
|
info('Next:');
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
531
|
+
if (experimental) {
|
|
532
|
+
info(` $ cd ${projectDir}`);
|
|
533
|
+
info(' Frontend: cd projects/app && pnpm install');
|
|
534
|
+
info(' API: cd projects/api && bun install');
|
|
535
|
+
info(' Configure projects/api/.env (see .env.example)');
|
|
536
|
+
info(' Start Postgres + run prisma generate / migrate');
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
info(` Run ${name}`);
|
|
540
|
+
info(` $ cd ${projectDir}`);
|
|
541
|
+
info(` $ ${toolbox.pm.run('start')}`);
|
|
542
|
+
}
|
|
517
543
|
info('');
|
|
518
544
|
if (!toolbox.parameters.options.fromGluegunMenu) {
|
|
519
545
|
process.exit();
|
|
@@ -42,6 +42,13 @@ const NewCommand = {
|
|
|
42
42
|
{ description: 'Git branch to clone from', flag: '--branch', required: false, type: 'string' },
|
|
43
43
|
{ description: 'Copy from local path instead of cloning', flag: '--copy', required: false, type: 'string' },
|
|
44
44
|
{ description: 'Symlink to local path instead of cloning', flag: '--link', required: false, type: 'string' },
|
|
45
|
+
{
|
|
46
|
+
default: false,
|
|
47
|
+
description: 'Use experimental nest-base template (Bun + Prisma + Postgres)',
|
|
48
|
+
flag: '--next',
|
|
49
|
+
required: false,
|
|
50
|
+
type: 'boolean',
|
|
51
|
+
},
|
|
45
52
|
{
|
|
46
53
|
default: false,
|
|
47
54
|
description: 'Skip all interactive prompts',
|
|
@@ -77,6 +84,7 @@ const NewCommand = {
|
|
|
77
84
|
const cliApiMode = parameters.options['api-mode'] || parameters.options.apiMode;
|
|
78
85
|
const cliFrameworkMode = parameters.options['framework-mode'];
|
|
79
86
|
const cliFrameworkUpstreamBranch = parameters.options['framework-upstream-branch'];
|
|
87
|
+
const experimental = parameters.options.next === true || parameters.options.next === 'true';
|
|
80
88
|
// Determine noConfirm with priority: CLI > config > global > default (false)
|
|
81
89
|
const noConfirm = config.getNoConfirm({
|
|
82
90
|
cliValue: cliNoConfirm,
|
|
@@ -88,7 +96,7 @@ const NewCommand = {
|
|
|
88
96
|
// Info
|
|
89
97
|
info('Create a new server');
|
|
90
98
|
// Hint for non-interactive callers (e.g. Claude Code)
|
|
91
|
-
toolbox.tools.nonInteractiveHint('lt server create --name <name> --api-mode <Rest|GraphQL|Both> --noConfirm');
|
|
99
|
+
toolbox.tools.nonInteractiveHint('lt server create --name <name> --api-mode <Rest|GraphQL|Both> [--next] --noConfirm');
|
|
92
100
|
// Check git
|
|
93
101
|
if (!(yield git.gitInstalled())) {
|
|
94
102
|
return;
|
|
@@ -150,7 +158,11 @@ const NewCommand = {
|
|
|
150
158
|
}
|
|
151
159
|
// Determine API mode with priority: CLI > config > global > interactive (default: Rest)
|
|
152
160
|
let apiMode;
|
|
153
|
-
if (
|
|
161
|
+
if (experimental) {
|
|
162
|
+
apiMode = 'Rest';
|
|
163
|
+
info('Using experimental nest-base template (Bun + Prisma + Postgres + Better-Auth)');
|
|
164
|
+
}
|
|
165
|
+
else if (cliApiMode) {
|
|
154
166
|
apiMode = cliApiMode;
|
|
155
167
|
}
|
|
156
168
|
else if (configApiMode) {
|
|
@@ -184,7 +196,10 @@ const NewCommand = {
|
|
|
184
196
|
// Determine framework consumption mode — same resolution cascade as
|
|
185
197
|
// lt fullstack init: CLI flag > lt.config > interactive (default npm).
|
|
186
198
|
let frameworkMode;
|
|
187
|
-
if (
|
|
199
|
+
if (experimental) {
|
|
200
|
+
frameworkMode = 'npm';
|
|
201
|
+
}
|
|
202
|
+
else if (cliFrameworkMode === 'npm' || cliFrameworkMode === 'vendor') {
|
|
188
203
|
frameworkMode = cliFrameworkMode;
|
|
189
204
|
}
|
|
190
205
|
else if (cliFrameworkMode) {
|
|
@@ -222,6 +237,7 @@ const NewCommand = {
|
|
|
222
237
|
branch,
|
|
223
238
|
copyPath,
|
|
224
239
|
description,
|
|
240
|
+
experimental,
|
|
225
241
|
frameworkMode,
|
|
226
242
|
frameworkUpstreamBranch,
|
|
227
243
|
linkPath,
|
|
@@ -279,36 +295,47 @@ const NewCommand = {
|
|
|
279
295
|
}
|
|
280
296
|
// Derive controller type from API mode and save project config
|
|
281
297
|
const controllerType = apiMode;
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
298
|
+
if (!experimental) {
|
|
299
|
+
// Create lt.config.json
|
|
300
|
+
const projectConfig = {
|
|
301
|
+
commands: {
|
|
302
|
+
server: {
|
|
303
|
+
module: {
|
|
304
|
+
controller: controllerType,
|
|
305
|
+
},
|
|
288
306
|
},
|
|
289
307
|
},
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
308
|
+
meta: {
|
|
309
|
+
apiMode,
|
|
310
|
+
version: '1.0.0',
|
|
311
|
+
},
|
|
312
|
+
};
|
|
313
|
+
const configPath = filesystem.path(projectDir, 'lt.config.json');
|
|
314
|
+
filesystem.write(configPath, projectConfig, { jsonIndent: 2 });
|
|
315
|
+
info('');
|
|
316
|
+
success(`Configuration saved to ${projectDir}/lt.config.json`);
|
|
317
|
+
info(` API mode: ${apiMode}`);
|
|
318
|
+
info(` Default controller type: ${controllerType}`);
|
|
319
|
+
}
|
|
302
320
|
// We're done, so show what to do next
|
|
303
321
|
info('');
|
|
304
322
|
success(`Generated ${name} server with lenne.Tech CLI ${meta.version()} in ${helper.msToMinutesAndSeconds(timer())}m.`);
|
|
305
323
|
info('');
|
|
306
324
|
info('Next:');
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
325
|
+
if (experimental) {
|
|
326
|
+
info(` Go to project directory: cd ${projectDir}`);
|
|
327
|
+
info(' Install dependencies: bun install');
|
|
328
|
+
info(' Configure .env (see .env.example)');
|
|
329
|
+
info(' Start Postgres + run prisma generate / migrate');
|
|
330
|
+
info(' Start server: bun run dev');
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
info(' Start database server (e.g. MongoDB)');
|
|
334
|
+
info(` Check config: ${projectDir}/src/config.env.ts`);
|
|
335
|
+
info(` Go to project directory: cd ${projectDir}`);
|
|
336
|
+
info(` Run tests: ${toolbox.pm.run('test:e2e')}`);
|
|
337
|
+
info(` Start server: ${toolbox.pm.run('start')}`);
|
|
338
|
+
}
|
|
312
339
|
info('');
|
|
313
340
|
if (!toolbox.parameters.options.fromGluegunMenu) {
|
|
314
341
|
process.exit();
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
const path_1 = require("path");
|
|
13
|
+
const crawler_1 = require("../../lib/crawler");
|
|
14
|
+
/**
|
|
15
|
+
* Crawl a website (optionally following same-origin links up to a
|
|
16
|
+
* configurable depth) and store the content as Markdown files for use
|
|
17
|
+
* as a Claude Code knowledge base. Inspired by ../../../../chrome-md:
|
|
18
|
+
* shares the defuddle + Turndown extraction pipeline but runs headless
|
|
19
|
+
* from Node and follows links / sitemaps automatically.
|
|
20
|
+
*/
|
|
21
|
+
const NewCommand = {
|
|
22
|
+
alias: ['cr'],
|
|
23
|
+
description: 'Crawl site to Markdown',
|
|
24
|
+
hidden: false,
|
|
25
|
+
name: 'crawl',
|
|
26
|
+
run: (toolbox) => __awaiter(void 0, void 0, void 0, function* () {
|
|
27
|
+
var _a, _b, _c, _d, _e;
|
|
28
|
+
const { config, filesystem, helper, parameters, print: { error, info, spin, success, warning }, prompt: { confirm }, tools, } = toolbox;
|
|
29
|
+
if (tools.helpJson({
|
|
30
|
+
aliases: ['cr'],
|
|
31
|
+
description: 'Crawl a website into Markdown files (for Claude Code knowledge bases)',
|
|
32
|
+
name: 'crawl',
|
|
33
|
+
options: [
|
|
34
|
+
{
|
|
35
|
+
description: 'Start URL (absolute http/https URL)',
|
|
36
|
+
flag: '--url',
|
|
37
|
+
required: true,
|
|
38
|
+
type: 'string',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
default: '.',
|
|
42
|
+
description: 'Output directory (created if missing)',
|
|
43
|
+
flag: '--out',
|
|
44
|
+
type: 'string',
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
default: 0,
|
|
48
|
+
description: 'Link depth. 0 = only start page; 1 = + direct links; N = up to N hops; "all" (or -1) = follow every same-origin link until --max-pages is reached',
|
|
49
|
+
flag: '--depth',
|
|
50
|
+
type: 'number|all',
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
default: true,
|
|
54
|
+
description: 'Download images and inline them with local paths',
|
|
55
|
+
flag: '--images',
|
|
56
|
+
type: 'boolean',
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
default: true,
|
|
60
|
+
description: 'Also seed queue from <origin>/sitemap.xml',
|
|
61
|
+
flag: '--sitemap',
|
|
62
|
+
type: 'boolean',
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
default: 4,
|
|
66
|
+
description: 'Parallel HTTP requests',
|
|
67
|
+
flag: '--concurrency',
|
|
68
|
+
type: 'number',
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
default: 200,
|
|
72
|
+
description: 'Maximum number of pages to crawl (safety cap)',
|
|
73
|
+
flag: '--max-pages',
|
|
74
|
+
type: 'number',
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
description: 'CSS selector for the main content container',
|
|
78
|
+
flag: '--selector',
|
|
79
|
+
type: 'string',
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
default: 20000,
|
|
83
|
+
description: 'HTTP request timeout in ms',
|
|
84
|
+
flag: '--timeout',
|
|
85
|
+
type: 'number',
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
default: false,
|
|
89
|
+
description: 'Shortcut for --depth all (follows every same-origin link until --max-pages)',
|
|
90
|
+
flag: '--all',
|
|
91
|
+
type: 'boolean',
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
default: true,
|
|
95
|
+
description: "Render pages through a headless browser before extracting (for SPAs like Vue/Nuxt/React/Angular). Uses playwright-core with system Chrome / Edge, falling back to Playwright's bundled chromium. Disable with --no-render for plain HTTP fetches.",
|
|
96
|
+
flag: '--render',
|
|
97
|
+
type: 'boolean',
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
default: false,
|
|
101
|
+
description: 'If --render cannot find any browser, auto-install Playwright chromium (one-time ~170 MB download).',
|
|
102
|
+
flag: '--install-browser',
|
|
103
|
+
type: 'boolean',
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
default: true,
|
|
107
|
+
description: 'After a multi-page crawl, remove any .md or image files inside <outDir>/pages and <outDir>/images that were not written by this run. Disable with --no-prune to preserve old files.',
|
|
108
|
+
flag: '--prune',
|
|
109
|
+
type: 'boolean',
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
default: false,
|
|
113
|
+
description: 'Skip confirmation prompts',
|
|
114
|
+
flag: '--noConfirm',
|
|
115
|
+
type: 'boolean',
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
})) {
|
|
119
|
+
return 'crawl';
|
|
120
|
+
}
|
|
121
|
+
tools.nonInteractiveHint('lt tools crawl <url> --out <dir> --depth 1 --noConfirm');
|
|
122
|
+
const ltConfig = config.loadConfig();
|
|
123
|
+
const commandConfig = (_b = (_a = ltConfig === null || ltConfig === void 0 ? void 0 : ltConfig.commands) === null || _a === void 0 ? void 0 : _a.tools) === null || _b === void 0 ? void 0 : _b.crawl;
|
|
124
|
+
// URL: positional argument > --url > interactive prompt.
|
|
125
|
+
const urlInput = parameters.first ||
|
|
126
|
+
parameters.options.url ||
|
|
127
|
+
(yield helper.getInput(undefined, { name: 'Website URL', showError: false }));
|
|
128
|
+
if (!urlInput) {
|
|
129
|
+
error('No URL provided');
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
const url = normalizeSeedUrl(urlInput);
|
|
133
|
+
try {
|
|
134
|
+
new URL(url);
|
|
135
|
+
}
|
|
136
|
+
catch (_f) {
|
|
137
|
+
error(`Invalid URL: ${urlInput}`);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const depthRaw = config.getValue({
|
|
141
|
+
// `--all` is a convenience shortcut for `--depth all`. It wins
|
|
142
|
+
// over a numeric `--depth` so users can combine both.
|
|
143
|
+
cliValue: parameters.options.all === true ? 'all' : parameters.options.depth,
|
|
144
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.depth,
|
|
145
|
+
defaultValue: 0,
|
|
146
|
+
});
|
|
147
|
+
const depth = parseDepth(depthRaw);
|
|
148
|
+
const includeImages = config.getValue({
|
|
149
|
+
cliValue: parameters.options.images === false ? false : undefined,
|
|
150
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.includeImages,
|
|
151
|
+
defaultValue: true,
|
|
152
|
+
});
|
|
153
|
+
const includeSitemap = config.getValue({
|
|
154
|
+
cliValue: parameters.options.sitemap === false ? false : undefined,
|
|
155
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.includeSitemap,
|
|
156
|
+
defaultValue: true,
|
|
157
|
+
});
|
|
158
|
+
const concurrency = Number(config.getValue({
|
|
159
|
+
cliValue: parameters.options.concurrency,
|
|
160
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.concurrency,
|
|
161
|
+
defaultValue: 4,
|
|
162
|
+
}));
|
|
163
|
+
const maxPages = Number(config.getValue({
|
|
164
|
+
cliValue: (_c = parameters.options.maxPages) !== null && _c !== void 0 ? _c : parameters.options['max-pages'],
|
|
165
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.maxPages,
|
|
166
|
+
defaultValue: 200,
|
|
167
|
+
}));
|
|
168
|
+
const timeout = Number(config.getValue({
|
|
169
|
+
cliValue: parameters.options.timeout,
|
|
170
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.timeout,
|
|
171
|
+
defaultValue: 20000,
|
|
172
|
+
}));
|
|
173
|
+
const selector = config.getValue({
|
|
174
|
+
cliValue: parameters.options.selector,
|
|
175
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.selector,
|
|
176
|
+
});
|
|
177
|
+
// `--render` and `--prune` default ON — the common case is a
|
|
178
|
+
// full SPA-aware knowledge-base crawl that stays in sync on
|
|
179
|
+
// updates. `--no-render` / `--no-prune` opt out explicitly.
|
|
180
|
+
const renderJs = config.getValue({
|
|
181
|
+
cliValue: parameters.options.render === false ? false : undefined,
|
|
182
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.renderJs,
|
|
183
|
+
defaultValue: true,
|
|
184
|
+
});
|
|
185
|
+
const installBrowser = parameters.options['install-browser'] === true || parameters.options.installBrowser === true;
|
|
186
|
+
const pruneOrphans = config.getValue({
|
|
187
|
+
cliValue: parameters.options.prune === false ? false : undefined,
|
|
188
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.prune,
|
|
189
|
+
defaultValue: true,
|
|
190
|
+
});
|
|
191
|
+
const outDir = (0, path_1.resolve)(config.getValue({
|
|
192
|
+
cliValue: (_d = parameters.options.out) !== null && _d !== void 0 ? _d : parameters.options.output,
|
|
193
|
+
configValue: commandConfig === null || commandConfig === void 0 ? void 0 : commandConfig.out,
|
|
194
|
+
defaultValue: filesystem.cwd(),
|
|
195
|
+
}) || filesystem.cwd());
|
|
196
|
+
const noConfirm = config.getNoConfirm({
|
|
197
|
+
cliValue: parameters.options.noConfirm,
|
|
198
|
+
commandConfig,
|
|
199
|
+
config: ltConfig,
|
|
200
|
+
parentConfig: (_e = ltConfig === null || ltConfig === void 0 ? void 0 : ltConfig.commands) === null || _e === void 0 ? void 0 : _e.tools,
|
|
201
|
+
});
|
|
202
|
+
info('');
|
|
203
|
+
info(`Crawling: ${url}`);
|
|
204
|
+
info(`Output: ${outDir}`);
|
|
205
|
+
info(`Depth: ${depth === 'all' ? 'all (bounded by --max-pages)' : depth}`);
|
|
206
|
+
info(`Sitemap: ${includeSitemap ? 'yes' : 'no'}`);
|
|
207
|
+
info(`Images: ${includeImages ? 'yes' : 'no'}`);
|
|
208
|
+
info(`Parallel: ${concurrency}`);
|
|
209
|
+
info(`Max: ${maxPages} pages`);
|
|
210
|
+
info(`Render: ${renderJs ? 'yes (headless browser)' : 'no (raw HTTP)'}`);
|
|
211
|
+
info(`Prune: ${pruneOrphans ? 'yes (remove orphaned pages/images)' : 'no'}`);
|
|
212
|
+
if (selector)
|
|
213
|
+
info(`Selector: ${selector}`);
|
|
214
|
+
info('');
|
|
215
|
+
if (!noConfirm && !(yield confirm('Start crawl?'))) {
|
|
216
|
+
return 'crawl cancelled';
|
|
217
|
+
}
|
|
218
|
+
const spinner = spin('Crawling...');
|
|
219
|
+
const result = yield (0, crawler_1.crawlSite)({
|
|
220
|
+
autoInstallBrowser: installBrowser,
|
|
221
|
+
concurrency,
|
|
222
|
+
depth,
|
|
223
|
+
includeImages,
|
|
224
|
+
includeSitemap,
|
|
225
|
+
maxPages,
|
|
226
|
+
onLog: (msg) => {
|
|
227
|
+
spinner.text = msg;
|
|
228
|
+
},
|
|
229
|
+
outDir,
|
|
230
|
+
prune: pruneOrphans,
|
|
231
|
+
renderJs,
|
|
232
|
+
selector,
|
|
233
|
+
timeout,
|
|
234
|
+
url,
|
|
235
|
+
}).catch((err) => {
|
|
236
|
+
spinner.fail('Crawl failed');
|
|
237
|
+
error(err.message);
|
|
238
|
+
return null;
|
|
239
|
+
});
|
|
240
|
+
if (!result) {
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
spinner.succeed(`Crawl complete: ${result.pages.length} page(s)`);
|
|
244
|
+
info('');
|
|
245
|
+
if (result.indexFile) {
|
|
246
|
+
success(`Overview: ${result.indexFile}`);
|
|
247
|
+
}
|
|
248
|
+
for (const page of result.pages.slice(0, 10)) {
|
|
249
|
+
info(` - ${page.relativePath} (${page.url})`);
|
|
250
|
+
}
|
|
251
|
+
if (result.pages.length > 10) {
|
|
252
|
+
info(` ... and ${result.pages.length - 10} more`);
|
|
253
|
+
}
|
|
254
|
+
if (result.pruned.length > 0) {
|
|
255
|
+
info(`Pruned ${result.pruned.length} orphaned file(s)`);
|
|
256
|
+
for (const path of result.pruned.slice(0, 5)) {
|
|
257
|
+
info(` - ${path}`);
|
|
258
|
+
}
|
|
259
|
+
if (result.pruned.length > 5)
|
|
260
|
+
info(` ... and ${result.pruned.length - 5} more`);
|
|
261
|
+
}
|
|
262
|
+
if (result.skipped.length > 0) {
|
|
263
|
+
warning(`Skipped ${result.skipped.length} URL(s) (non-HTML or foreign origin)`);
|
|
264
|
+
}
|
|
265
|
+
if (result.errors.length > 0) {
|
|
266
|
+
warning(`${result.errors.length} error(s):`);
|
|
267
|
+
for (const err of result.errors.slice(0, 5)) {
|
|
268
|
+
warning(` - ${err.url}: ${err.reason}`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
if (!toolbox.parameters.options.fromGluegunMenu) {
|
|
272
|
+
process.exit();
|
|
273
|
+
}
|
|
274
|
+
return `crawled ${result.pages.length} pages`;
|
|
275
|
+
}),
|
|
276
|
+
};
|
|
277
|
+
function normalizeSeedUrl(raw) {
|
|
278
|
+
const trimmed = raw.trim();
|
|
279
|
+
if (/^https?:\/\//i.test(trimmed))
|
|
280
|
+
return trimmed;
|
|
281
|
+
return `https://${trimmed}`;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Parse the --depth parameter. Accepts positive integers, the string
|
|
285
|
+
* "all", and negative values (treated as "all"). Invalid values fall
|
|
286
|
+
* back to `0` so the crawl still runs against the seed URL.
|
|
287
|
+
*/
|
|
288
|
+
function parseDepth(raw) {
|
|
289
|
+
if (raw === undefined || raw === null)
|
|
290
|
+
return 0;
|
|
291
|
+
if (typeof raw === 'string') {
|
|
292
|
+
const normalized = raw.trim().toLowerCase();
|
|
293
|
+
if (normalized === 'all' || normalized === '-1')
|
|
294
|
+
return 'all';
|
|
295
|
+
const n = Number(normalized);
|
|
296
|
+
if (!Number.isFinite(n))
|
|
297
|
+
return 0;
|
|
298
|
+
return n < 0 ? 'all' : Math.floor(n);
|
|
299
|
+
}
|
|
300
|
+
if (typeof raw === 'number') {
|
|
301
|
+
if (!Number.isFinite(raw))
|
|
302
|
+
return 'all';
|
|
303
|
+
return raw < 0 ? 'all' : Math.floor(raw);
|
|
304
|
+
}
|
|
305
|
+
return 0;
|
|
306
|
+
}
|
|
307
|
+
exports.default = NewCommand;
|