@jackwener/opencli 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +88 -0
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/SKILL.md +4 -2
- package/dist/build-manifest.d.ts +11 -0
- package/dist/build-manifest.js +101 -0
- package/dist/cli-manifest.json +1273 -0
- package/dist/clis/bilibili/following.d.ts +1 -0
- package/dist/clis/bilibili/following.js +41 -0
- package/dist/engine.d.ts +13 -0
- package/dist/engine.js +122 -17
- package/dist/pipeline/steps/fetch.js +57 -1
- package/dist/registry.d.ts +3 -0
- package/package.json +3 -2
- package/src/build-manifest.ts +133 -0
- package/src/clis/bilibili/following.ts +50 -0
- package/src/engine.ts +123 -17
- package/src/pipeline/steps/fetch.ts +63 -1
- package/src/registry.ts +3 -0
package/src/engine.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CLI discovery: finds YAML/TS CLI definitions and registers them.
|
|
3
|
+
*
|
|
4
|
+
* Supports two modes:
|
|
5
|
+
* 1. FAST PATH (manifest): If a pre-compiled cli-manifest.json exists,
|
|
6
|
+
* registers all YAML commands instantly without runtime YAML parsing.
|
|
7
|
+
* TS modules are loaded lazily only when their command is executed.
|
|
8
|
+
* 2. FALLBACK (filesystem scan): Traditional runtime discovery for development.
|
|
3
9
|
*/
|
|
4
10
|
|
|
5
11
|
import * as fs from 'node:fs';
|
|
@@ -9,25 +15,99 @@ import { type CliCommand, type Arg, Strategy, registerCommand } from './registry
|
|
|
9
15
|
import type { IPage } from './types.js';
|
|
10
16
|
import { executePipeline } from './pipeline.js';
|
|
11
17
|
|
|
18
|
+
/** Set of TS module paths that have been loaded */
|
|
19
|
+
const _loadedModules = new Set<string>();
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Discover and register CLI commands.
|
|
23
|
+
* Uses pre-compiled manifest when available for instant startup.
|
|
24
|
+
*/
|
|
12
25
|
export async function discoverClis(...dirs: string[]): Promise<void> {
|
|
13
|
-
|
|
26
|
+
// Fast path: try manifest first (production / post-build)
|
|
14
27
|
for (const dir of dirs) {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
const manifestPath = path.resolve(dir, '..', 'cli-manifest.json');
|
|
29
|
+
if (fs.existsSync(manifestPath)) {
|
|
30
|
+
loadFromManifest(manifestPath, dir);
|
|
31
|
+
continue; // Skip filesystem scan for this directory
|
|
32
|
+
}
|
|
33
|
+
// Fallback: runtime filesystem scan (development)
|
|
34
|
+
await discoverClisFromFs(dir);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Fast-path: register commands from pre-compiled manifest.
|
|
40
|
+
* YAML pipelines are inlined — zero YAML parsing at runtime.
|
|
41
|
+
* TS modules are deferred — loaded lazily on first execution.
|
|
42
|
+
*/
|
|
43
|
+
function loadFromManifest(manifestPath: string, clisDir: string): void {
|
|
44
|
+
try {
|
|
45
|
+
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8')) as any[];
|
|
46
|
+
for (const entry of manifest) {
|
|
47
|
+
if (entry.type === 'yaml') {
|
|
48
|
+
// YAML pipelines fully inlined in manifest — register directly
|
|
49
|
+
const strategy = (Strategy as any)[entry.strategy.toUpperCase()] ?? Strategy.COOKIE;
|
|
50
|
+
const cmd: CliCommand = {
|
|
51
|
+
site: entry.site,
|
|
52
|
+
name: entry.name,
|
|
53
|
+
description: entry.description ?? '',
|
|
54
|
+
domain: entry.domain,
|
|
55
|
+
strategy,
|
|
56
|
+
browser: entry.browser,
|
|
57
|
+
args: entry.args ?? [],
|
|
58
|
+
columns: entry.columns,
|
|
59
|
+
pipeline: entry.pipeline,
|
|
60
|
+
timeoutSeconds: entry.timeout,
|
|
61
|
+
source: `manifest:${entry.site}/${entry.name}`,
|
|
62
|
+
};
|
|
63
|
+
registerCommand(cmd);
|
|
64
|
+
} else if (entry.type === 'ts' && entry.modulePath) {
|
|
65
|
+
// TS adapters: register a lightweight stub.
|
|
66
|
+
// The actual module is loaded lazily on first executeCommand().
|
|
67
|
+
const strategy = (Strategy as any)[(entry.strategy ?? 'cookie').toUpperCase()] ?? Strategy.COOKIE;
|
|
68
|
+
const modulePath = path.resolve(clisDir, entry.modulePath);
|
|
69
|
+
const cmd: CliCommand = {
|
|
70
|
+
site: entry.site,
|
|
71
|
+
name: entry.name,
|
|
72
|
+
description: entry.description ?? '',
|
|
73
|
+
domain: entry.domain,
|
|
74
|
+
strategy,
|
|
75
|
+
browser: entry.browser ?? true,
|
|
76
|
+
args: entry.args ?? [],
|
|
77
|
+
columns: entry.columns,
|
|
78
|
+
timeoutSeconds: entry.timeout,
|
|
79
|
+
source: modulePath,
|
|
80
|
+
// Mark as lazy — executeCommand will load the module before running
|
|
81
|
+
_lazy: true,
|
|
82
|
+
_modulePath: modulePath,
|
|
83
|
+
};
|
|
84
|
+
registerCommand(cmd);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
} catch (err: any) {
|
|
88
|
+
process.stderr.write(`Warning: failed to load manifest ${manifestPath}: ${err.message}\n`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Fallback: traditional filesystem scan (used during development with tsx).
|
|
94
|
+
*/
|
|
95
|
+
async function discoverClisFromFs(dir: string): Promise<void> {
|
|
96
|
+
if (!fs.existsSync(dir)) return;
|
|
97
|
+
const promises: Promise<any>[] = [];
|
|
98
|
+
for (const site of fs.readdirSync(dir)) {
|
|
99
|
+
const siteDir = path.join(dir, site);
|
|
100
|
+
if (!fs.statSync(siteDir).isDirectory()) continue;
|
|
101
|
+
for (const file of fs.readdirSync(siteDir)) {
|
|
102
|
+
const filePath = path.join(siteDir, file);
|
|
103
|
+
if (file.endsWith('.yaml') || file.endsWith('.yml')) {
|
|
104
|
+
registerYamlCli(filePath, site);
|
|
105
|
+
} else if (file.endsWith('.js') && !file.endsWith('.d.js')) {
|
|
106
|
+
promises.push(
|
|
107
|
+
import(`file://${filePath}`).catch((err: any) => {
|
|
108
|
+
process.stderr.write(`Warning: failed to load module ${filePath}: ${err.message}\n`);
|
|
109
|
+
})
|
|
110
|
+
);
|
|
31
111
|
}
|
|
32
112
|
}
|
|
33
113
|
}
|
|
@@ -80,12 +160,38 @@ function registerYamlCli(filePath: string, defaultSite: string): void {
|
|
|
80
160
|
}
|
|
81
161
|
}
|
|
82
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Execute a CLI command. Handles lazy-loading of TS modules.
|
|
165
|
+
*/
|
|
83
166
|
export async function executeCommand(
|
|
84
167
|
cmd: CliCommand,
|
|
85
168
|
page: IPage | null,
|
|
86
169
|
kwargs: Record<string, any>,
|
|
87
170
|
debug: boolean = false,
|
|
88
171
|
): Promise<any> {
|
|
172
|
+
// Lazy-load TS module on first execution
|
|
173
|
+
if ((cmd as any)._lazy && (cmd as any)._modulePath) {
|
|
174
|
+
const modulePath = (cmd as any)._modulePath;
|
|
175
|
+
if (!_loadedModules.has(modulePath)) {
|
|
176
|
+
try {
|
|
177
|
+
await import(`file://${modulePath}`);
|
|
178
|
+
_loadedModules.add(modulePath);
|
|
179
|
+
} catch (err: any) {
|
|
180
|
+
throw new Error(`Failed to load adapter module ${modulePath}: ${err.message}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// After loading, the module's cli() call will have updated the registry
|
|
184
|
+
// with the real func/pipeline. Re-fetch the command.
|
|
185
|
+
const { getRegistry, fullName } = await import('./registry.js');
|
|
186
|
+
const updated = getRegistry().get(fullName(cmd));
|
|
187
|
+
if (updated && updated.func) {
|
|
188
|
+
return updated.func(page, kwargs, debug);
|
|
189
|
+
}
|
|
190
|
+
if (updated && updated.pipeline) {
|
|
191
|
+
return executePipeline(page, updated.pipeline, { args: kwargs, debug });
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
89
195
|
if (cmd.func) {
|
|
90
196
|
return cmd.func(page, kwargs, debug);
|
|
91
197
|
}
|
|
@@ -56,6 +56,46 @@ async function fetchSingle(
|
|
|
56
56
|
`);
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Batch fetch: send all URLs into the browser as a single evaluate() call.
|
|
61
|
+
* This eliminates N-1 cross-process IPC round trips, performing all fetches
|
|
62
|
+
* inside the V8 engine and returning results as one JSON array.
|
|
63
|
+
*/
|
|
64
|
+
async function fetchBatchInBrowser(
|
|
65
|
+
page: IPage, urls: string[], method: string,
|
|
66
|
+
headers: Record<string, string>, concurrency: number,
|
|
67
|
+
): Promise<any[]> {
|
|
68
|
+
const headersJs = JSON.stringify(headers);
|
|
69
|
+
const urlsJs = JSON.stringify(urls);
|
|
70
|
+
return page.evaluate(`
|
|
71
|
+
async () => {
|
|
72
|
+
const urls = ${urlsJs};
|
|
73
|
+
const method = "${method}";
|
|
74
|
+
const headers = ${headersJs};
|
|
75
|
+
const concurrency = ${concurrency};
|
|
76
|
+
|
|
77
|
+
const results = new Array(urls.length);
|
|
78
|
+
let idx = 0;
|
|
79
|
+
|
|
80
|
+
async function worker() {
|
|
81
|
+
while (idx < urls.length) {
|
|
82
|
+
const i = idx++;
|
|
83
|
+
try {
|
|
84
|
+
const resp = await fetch(urls[i], { method, headers, credentials: "include" });
|
|
85
|
+
results[i] = await resp.json();
|
|
86
|
+
} catch (e) {
|
|
87
|
+
results[i] = { error: e.message };
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const workers = Array.from({ length: Math.min(concurrency, urls.length) }, () => worker());
|
|
93
|
+
await Promise.all(workers);
|
|
94
|
+
return results;
|
|
95
|
+
}
|
|
96
|
+
`);
|
|
97
|
+
}
|
|
98
|
+
|
|
59
99
|
export async function stepFetch(page: IPage | null, params: any, data: any, args: Record<string, any>): Promise<any> {
|
|
60
100
|
const urlOrObj = typeof params === 'string' ? params : (params?.url ?? '');
|
|
61
101
|
const method = params?.method ?? 'GET';
|
|
@@ -66,9 +106,31 @@ export async function stepFetch(page: IPage | null, params: any, data: any, args
|
|
|
66
106
|
// Per-item fetch when data is array and URL references item
|
|
67
107
|
if (Array.isArray(data) && urlTemplate.includes('item')) {
|
|
68
108
|
const concurrency = typeof params?.concurrency === 'number' ? params.concurrency : 5;
|
|
109
|
+
|
|
110
|
+
// Render all URLs upfront
|
|
111
|
+
const renderedHeaders: Record<string, string> = {};
|
|
112
|
+
for (const [k, v] of Object.entries(headers)) renderedHeaders[k] = String(render(v, { args, data }));
|
|
113
|
+
const renderedParams: Record<string, string> = {};
|
|
114
|
+
for (const [k, v] of Object.entries(queryParams)) renderedParams[k] = String(render(v, { args, data }));
|
|
115
|
+
|
|
116
|
+
const urls = data.map((item: any, index: number) => {
|
|
117
|
+
let url = String(render(urlTemplate, { args, data, item, index }));
|
|
118
|
+
if (Object.keys(renderedParams).length > 0) {
|
|
119
|
+
const qs = new URLSearchParams(renderedParams).toString();
|
|
120
|
+
url = `${url}${url.includes('?') ? '&' : '?'}${qs}`;
|
|
121
|
+
}
|
|
122
|
+
return url;
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// BATCH IPC: if browser is available, batch all fetches into a single evaluate() call
|
|
126
|
+
if (page !== null) {
|
|
127
|
+
return fetchBatchInBrowser(page, urls, method.toUpperCase(), renderedHeaders, concurrency);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Non-browser: use concurrent pool (already optimized)
|
|
69
131
|
return mapConcurrent(data, concurrency, async (item, index) => {
|
|
70
132
|
const itemUrl = String(render(urlTemplate, { args, data, item, index }));
|
|
71
|
-
return fetchSingle(
|
|
133
|
+
return fetchSingle(null, itemUrl, method, queryParams, headers, args, data);
|
|
72
134
|
});
|
|
73
135
|
}
|
|
74
136
|
const url = render(urlOrObj, { args, data });
|
package/src/registry.ts
CHANGED