@jackwener/opencli 1.7.6 → 1.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -8
- package/README.zh-CN.md +14 -8
- package/cli-manifest.json +325 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/bilibili/video.js +11 -4
- package/clis/bilibili/video.test.js +51 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/deepseek/ask.js +19 -13
- package/clis/deepseek/ask.test.js +93 -1
- package/clis/deepseek/utils.js +108 -23
- package/clis/deepseek/utils.test.js +109 -1
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +28 -0
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/cdp.js +11 -2
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +333 -43
- package/dist/src/cli.test.js +257 -1
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/package.json +5 -1
package/dist/src/cli.test.js
CHANGED
|
@@ -17,7 +17,7 @@ vi.mock('./browser/index.js', () => {
|
|
|
17
17
|
},
|
|
18
18
|
};
|
|
19
19
|
});
|
|
20
|
-
import { createProgram, findPackageRoot, resolveBrowserVerifyInvocation } from './cli.js';
|
|
20
|
+
import { createProgram, findPackageRoot, normalizeVerifyRows, renderVerifyPreview, resolveBrowserVerifyInvocation } from './cli.js';
|
|
21
21
|
describe('resolveBrowserVerifyInvocation', () => {
|
|
22
22
|
it('prefers the built entry declared in package metadata', () => {
|
|
23
23
|
const projectRoot = path.join('repo-root');
|
|
@@ -102,6 +102,7 @@ describe('browser tab targeting commands', () => {
|
|
|
102
102
|
getActivePage: vi.fn().mockReturnValue('tab-1'),
|
|
103
103
|
getCurrentUrl: vi.fn().mockResolvedValue('https://one.example'),
|
|
104
104
|
startNetworkCapture: vi.fn().mockResolvedValue(true),
|
|
105
|
+
getCookies: vi.fn().mockResolvedValue([]),
|
|
105
106
|
evaluate: vi.fn().mockResolvedValue({ ok: true }),
|
|
106
107
|
tabs: vi.fn().mockResolvedValue([
|
|
107
108
|
{ index: 0, page: 'tab-1', url: 'https://one.example', title: 'one', active: true },
|
|
@@ -117,6 +118,15 @@ describe('browser tab targeting commands', () => {
|
|
|
117
118
|
readNetworkCapture: vi.fn().mockResolvedValue([]),
|
|
118
119
|
};
|
|
119
120
|
});
|
|
121
|
+
function lastJsonLog() {
|
|
122
|
+
const calls = consoleLogSpy.mock.calls;
|
|
123
|
+
if (calls.length === 0)
|
|
124
|
+
throw new Error('Expected at least one console.log call');
|
|
125
|
+
const last = calls[calls.length - 1][0];
|
|
126
|
+
if (typeof last !== 'string')
|
|
127
|
+
throw new Error(`Expected string arg to console.log, got ${typeof last}`);
|
|
128
|
+
return JSON.parse(last);
|
|
129
|
+
}
|
|
120
130
|
it('binds browser commands to an explicit target tab via --tab', async () => {
|
|
121
131
|
const program = createProgram('', '');
|
|
122
132
|
await program.parseAsync(['node', 'opencli', 'browser', 'eval', '--tab', 'tab-2', 'document.title']);
|
|
@@ -233,6 +243,203 @@ describe('browser tab targeting commands', () => {
|
|
|
233
243
|
expect(browserState.page?.closeTab).not.toHaveBeenCalled();
|
|
234
244
|
expect(stderrSpy.mock.calls.flat().join('\n')).toContain('Target tab tab-stale is not part of the current browser session');
|
|
235
245
|
});
|
|
246
|
+
it('browser analyze merges HttpOnly cookie names from page.getCookies and drains stale capture before verdict', async () => {
|
|
247
|
+
browserState.page = {
|
|
248
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
249
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
250
|
+
setActivePage: vi.fn(),
|
|
251
|
+
getActivePage: vi.fn().mockReturnValue('tab-1'),
|
|
252
|
+
getCurrentUrl: vi.fn().mockResolvedValue('https://target.example'),
|
|
253
|
+
startNetworkCapture: vi.fn().mockResolvedValue(true),
|
|
254
|
+
getCookies: vi.fn().mockResolvedValue([{ name: 'cf_clearance', value: 'x', domain: '.target.example' }]),
|
|
255
|
+
evaluate: vi.fn().mockResolvedValue({
|
|
256
|
+
cookieNames: [],
|
|
257
|
+
initialState: {
|
|
258
|
+
__INITIAL_STATE__: false,
|
|
259
|
+
__NUXT__: false,
|
|
260
|
+
__NEXT_DATA__: false,
|
|
261
|
+
__APOLLO_STATE__: false,
|
|
262
|
+
},
|
|
263
|
+
title: 'Target',
|
|
264
|
+
finalUrl: 'https://target.example/',
|
|
265
|
+
}),
|
|
266
|
+
tabs: vi.fn().mockResolvedValue([{ index: 0, page: 'tab-1', url: 'https://target.example', title: 'Target', active: true }]),
|
|
267
|
+
readNetworkCapture: vi.fn()
|
|
268
|
+
.mockResolvedValueOnce([
|
|
269
|
+
{
|
|
270
|
+
url: 'https://stale.example/api/old',
|
|
271
|
+
method: 'GET',
|
|
272
|
+
responseStatus: 200,
|
|
273
|
+
responseContentType: 'application/json',
|
|
274
|
+
responsePreview: '{"stale":true}',
|
|
275
|
+
},
|
|
276
|
+
])
|
|
277
|
+
.mockResolvedValueOnce([
|
|
278
|
+
{
|
|
279
|
+
url: 'https://target.example/waf',
|
|
280
|
+
method: 'GET',
|
|
281
|
+
responseStatus: 403,
|
|
282
|
+
responseContentType: 'text/html',
|
|
283
|
+
responsePreview: 'Cloudflare Ray ID',
|
|
284
|
+
},
|
|
285
|
+
]),
|
|
286
|
+
};
|
|
287
|
+
const program = createProgram('', '');
|
|
288
|
+
await program.parseAsync(['node', 'opencli', 'browser', 'analyze', 'https://target.example/']);
|
|
289
|
+
const out = lastJsonLog();
|
|
290
|
+
expect(browserState.page?.readNetworkCapture).toHaveBeenCalledTimes(2);
|
|
291
|
+
expect(out.anti_bot.vendor).toBe('cloudflare');
|
|
292
|
+
expect(out.anti_bot.evidence).toContain('cookie:cf_clearance');
|
|
293
|
+
});
|
|
294
|
+
it('browser analyze falls back to interceptor buffer when network capture is unsupported', async () => {
|
|
295
|
+
let bufferReads = 0;
|
|
296
|
+
browserState.page = {
|
|
297
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
298
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
299
|
+
setActivePage: vi.fn(),
|
|
300
|
+
getActivePage: vi.fn().mockReturnValue('tab-1'),
|
|
301
|
+
getCurrentUrl: vi.fn().mockResolvedValue('https://target.example'),
|
|
302
|
+
startNetworkCapture: vi.fn().mockResolvedValue(false),
|
|
303
|
+
getCookies: vi.fn().mockResolvedValue([{ name: 'cf_clearance', value: 'x', domain: '.target.example' }]),
|
|
304
|
+
evaluate: vi.fn().mockImplementation(async (arg) => {
|
|
305
|
+
if (typeof arg === 'string' && arg.includes('document.cookie')) {
|
|
306
|
+
return {
|
|
307
|
+
cookieNames: [],
|
|
308
|
+
initialState: {
|
|
309
|
+
__INITIAL_STATE__: false,
|
|
310
|
+
__NUXT__: false,
|
|
311
|
+
__NEXT_DATA__: false,
|
|
312
|
+
__APOLLO_STATE__: false,
|
|
313
|
+
},
|
|
314
|
+
title: 'Target',
|
|
315
|
+
finalUrl: 'https://target.example/',
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
if (typeof arg === 'string' && arg.includes('window.__opencli_net = []')) {
|
|
319
|
+
bufferReads += 1;
|
|
320
|
+
if (bufferReads === 1) {
|
|
321
|
+
return JSON.stringify([
|
|
322
|
+
{
|
|
323
|
+
url: 'https://stale.example/api/old',
|
|
324
|
+
method: 'GET',
|
|
325
|
+
status: 200,
|
|
326
|
+
size: 12,
|
|
327
|
+
ct: 'application/json',
|
|
328
|
+
body: { stale: true },
|
|
329
|
+
},
|
|
330
|
+
]);
|
|
331
|
+
}
|
|
332
|
+
return JSON.stringify([
|
|
333
|
+
{
|
|
334
|
+
url: 'https://target.example/waf',
|
|
335
|
+
method: 'GET',
|
|
336
|
+
status: 403,
|
|
337
|
+
size: 17,
|
|
338
|
+
ct: 'text/html',
|
|
339
|
+
body: 'Cloudflare Ray ID',
|
|
340
|
+
},
|
|
341
|
+
]);
|
|
342
|
+
}
|
|
343
|
+
return undefined;
|
|
344
|
+
}),
|
|
345
|
+
tabs: vi.fn().mockResolvedValue([{ index: 0, page: 'tab-1', url: 'https://target.example', title: 'Target', active: true }]),
|
|
346
|
+
readNetworkCapture: vi.fn().mockResolvedValue([]),
|
|
347
|
+
};
|
|
348
|
+
const program = createProgram('', '');
|
|
349
|
+
await program.parseAsync(['node', 'opencli', 'browser', 'analyze', 'https://target.example/']);
|
|
350
|
+
const out = lastJsonLog();
|
|
351
|
+
expect(browserState.page?.readNetworkCapture).toHaveBeenCalledTimes(2);
|
|
352
|
+
expect(bufferReads).toBe(2);
|
|
353
|
+
expect(out.anti_bot.vendor).toBe('cloudflare');
|
|
354
|
+
expect(out.anti_bot.evidence).toContain('cookie:cf_clearance');
|
|
355
|
+
expect(out.anti_bot.evidence).toContain('body:https://target.example/waf');
|
|
356
|
+
});
|
|
357
|
+
it('browser wait xhr starts capture, injects interceptor on fallback, and ignores stale ring entries', async () => {
|
|
358
|
+
browserState.page = {
|
|
359
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
360
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
361
|
+
setActivePage: vi.fn(),
|
|
362
|
+
getActivePage: vi.fn().mockReturnValue('tab-1'),
|
|
363
|
+
getCurrentUrl: vi.fn().mockResolvedValue('https://target.example'),
|
|
364
|
+
startNetworkCapture: vi.fn().mockResolvedValue(false),
|
|
365
|
+
evaluate: vi.fn().mockResolvedValue(undefined),
|
|
366
|
+
tabs: vi.fn().mockResolvedValue([{ index: 0, page: 'tab-1', url: 'https://target.example', title: 'Target', active: true }]),
|
|
367
|
+
readNetworkCapture: vi.fn()
|
|
368
|
+
.mockResolvedValueOnce([
|
|
369
|
+
{
|
|
370
|
+
url: 'https://stale.example/api/old',
|
|
371
|
+
method: 'GET',
|
|
372
|
+
responseStatus: 200,
|
|
373
|
+
responseContentType: 'application/json',
|
|
374
|
+
responsePreview: '{"stale":true}',
|
|
375
|
+
},
|
|
376
|
+
])
|
|
377
|
+
.mockResolvedValueOnce([
|
|
378
|
+
{
|
|
379
|
+
url: 'https://target.example/api/target',
|
|
380
|
+
method: 'GET',
|
|
381
|
+
responseStatus: 200,
|
|
382
|
+
responseContentType: 'application/json',
|
|
383
|
+
responsePreview: '{"ok":true}',
|
|
384
|
+
},
|
|
385
|
+
]),
|
|
386
|
+
};
|
|
387
|
+
const program = createProgram('', '');
|
|
388
|
+
await program.parseAsync(['node', 'opencli', 'browser', 'wait', 'xhr', '/api/target', '--timeout', '900']);
|
|
389
|
+
const out = lastJsonLog();
|
|
390
|
+
expect(browserState.page?.startNetworkCapture).toHaveBeenCalledTimes(1);
|
|
391
|
+
expect(browserState.page?.evaluate).toHaveBeenCalledWith(expect.stringContaining('window.__opencli_net'));
|
|
392
|
+
expect(browserState.page?.readNetworkCapture).toHaveBeenCalledTimes(2);
|
|
393
|
+
expect(out.matched.url).toBe('https://target.example/api/target');
|
|
394
|
+
});
|
|
395
|
+
it('browser wait xhr reads interceptor buffer when network capture is unsupported', async () => {
|
|
396
|
+
let bufferReads = 0;
|
|
397
|
+
browserState.page = {
|
|
398
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
399
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
400
|
+
setActivePage: vi.fn(),
|
|
401
|
+
getActivePage: vi.fn().mockReturnValue('tab-1'),
|
|
402
|
+
getCurrentUrl: vi.fn().mockResolvedValue('https://target.example'),
|
|
403
|
+
startNetworkCapture: vi.fn().mockResolvedValue(false),
|
|
404
|
+
evaluate: vi.fn().mockImplementation(async (arg) => {
|
|
405
|
+
if (typeof arg === 'string' && arg.includes('window.__opencli_net = []')) {
|
|
406
|
+
bufferReads += 1;
|
|
407
|
+
if (bufferReads === 1) {
|
|
408
|
+
return JSON.stringify([
|
|
409
|
+
{
|
|
410
|
+
url: 'https://stale.example/api/old',
|
|
411
|
+
method: 'GET',
|
|
412
|
+
status: 200,
|
|
413
|
+
size: 12,
|
|
414
|
+
ct: 'application/json',
|
|
415
|
+
body: { stale: true },
|
|
416
|
+
},
|
|
417
|
+
]);
|
|
418
|
+
}
|
|
419
|
+
return JSON.stringify([
|
|
420
|
+
{
|
|
421
|
+
url: 'https://target.example/api/target',
|
|
422
|
+
method: 'GET',
|
|
423
|
+
status: 200,
|
|
424
|
+
size: 11,
|
|
425
|
+
ct: 'application/json',
|
|
426
|
+
body: { ok: true },
|
|
427
|
+
},
|
|
428
|
+
]);
|
|
429
|
+
}
|
|
430
|
+
return undefined;
|
|
431
|
+
}),
|
|
432
|
+
tabs: vi.fn().mockResolvedValue([{ index: 0, page: 'tab-1', url: 'https://target.example', title: 'Target', active: true }]),
|
|
433
|
+
readNetworkCapture: vi.fn().mockResolvedValue([]),
|
|
434
|
+
};
|
|
435
|
+
const program = createProgram('', '');
|
|
436
|
+
await program.parseAsync(['node', 'opencli', 'browser', 'wait', 'xhr', '/api/target', '--timeout', '900']);
|
|
437
|
+
const out = lastJsonLog();
|
|
438
|
+
expect(browserState.page?.startNetworkCapture).toHaveBeenCalledTimes(1);
|
|
439
|
+
expect(browserState.page?.readNetworkCapture).toHaveBeenCalledTimes(2);
|
|
440
|
+
expect(bufferReads).toBe(2);
|
|
441
|
+
expect(out.matched.url).toBe('https://target.example/api/target');
|
|
442
|
+
});
|
|
236
443
|
});
|
|
237
444
|
describe('browser network command', () => {
|
|
238
445
|
const consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => { });
|
|
@@ -1045,3 +1252,52 @@ describe('findPackageRoot', () => {
|
|
|
1045
1252
|
expect(findPackageRoot(cliFile, (candidate) => exists.has(candidate))).toBe(packageRoot);
|
|
1046
1253
|
});
|
|
1047
1254
|
});
|
|
1255
|
+
describe('normalizeVerifyRows', () => {
|
|
1256
|
+
it('returns an empty array for null / primitives', () => {
|
|
1257
|
+
expect(normalizeVerifyRows(null)).toEqual([]);
|
|
1258
|
+
expect(normalizeVerifyRows(undefined)).toEqual([]);
|
|
1259
|
+
expect(normalizeVerifyRows('hello')).toEqual([]);
|
|
1260
|
+
});
|
|
1261
|
+
it('passes through array-of-objects', () => {
|
|
1262
|
+
const rows = [{ a: 1 }, { a: 2 }];
|
|
1263
|
+
expect(normalizeVerifyRows(rows)).toEqual(rows);
|
|
1264
|
+
});
|
|
1265
|
+
it('wraps array-of-primitives as { value } rows', () => {
|
|
1266
|
+
expect(normalizeVerifyRows([1, 'two', null])).toEqual([
|
|
1267
|
+
{ value: 1 }, { value: 'two' }, { value: null },
|
|
1268
|
+
]);
|
|
1269
|
+
});
|
|
1270
|
+
it('unwraps common envelope shapes', () => {
|
|
1271
|
+
expect(normalizeVerifyRows({ rows: [{ a: 1 }] })).toEqual([{ a: 1 }]);
|
|
1272
|
+
expect(normalizeVerifyRows({ items: [{ b: 2 }] })).toEqual([{ b: 2 }]);
|
|
1273
|
+
expect(normalizeVerifyRows({ data: [{ c: 3 }] })).toEqual([{ c: 3 }]);
|
|
1274
|
+
expect(normalizeVerifyRows({ results: [{ d: 4 }] })).toEqual([{ d: 4 }]);
|
|
1275
|
+
});
|
|
1276
|
+
it('wraps a single object as a one-row array', () => {
|
|
1277
|
+
expect(normalizeVerifyRows({ ok: true })).toEqual([{ ok: true }]);
|
|
1278
|
+
});
|
|
1279
|
+
});
|
|
1280
|
+
describe('renderVerifyPreview', () => {
|
|
1281
|
+
it('emits a placeholder for empty rows', () => {
|
|
1282
|
+
expect(renderVerifyPreview([])).toContain('no rows');
|
|
1283
|
+
});
|
|
1284
|
+
it('prints column headers followed by row cells', () => {
|
|
1285
|
+
const out = renderVerifyPreview([{ a: 'x', b: 1 }, { a: 'y', b: 2 }]);
|
|
1286
|
+
const lines = out.split('\n');
|
|
1287
|
+
expect(lines[0]).toContain('a');
|
|
1288
|
+
expect(lines[0]).toContain('b');
|
|
1289
|
+
expect(lines.some((l) => l.includes('x') && l.includes('1'))).toBe(true);
|
|
1290
|
+
expect(lines.some((l) => l.includes('y') && l.includes('2'))).toBe(true);
|
|
1291
|
+
});
|
|
1292
|
+
it('truncates long cells and reports hidden rows / columns', () => {
|
|
1293
|
+
const rows = Array.from({ length: 15 }, (_, i) => ({
|
|
1294
|
+
a: i, b: 'x'.repeat(100), c: i, d: i, e: i, f: i, g: i, h: i,
|
|
1295
|
+
}));
|
|
1296
|
+
const out = renderVerifyPreview(rows, { maxRows: 5, maxCols: 3, cellMax: 10 });
|
|
1297
|
+
expect(out).toContain('and 10 more row');
|
|
1298
|
+
expect(out).toContain('more column');
|
|
1299
|
+
// cell gets truncated
|
|
1300
|
+
expect(out).toContain('xxxxxxxxxx');
|
|
1301
|
+
expect(out).not.toContain('xxxxxxxxxxx'); // never 11 consecutive
|
|
1302
|
+
});
|
|
1303
|
+
});
|
package/dist/src/daemon.d.ts
CHANGED
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
* 1. Origin check — reject HTTP/WS from non chrome-extension:// origins
|
|
10
10
|
* 2. Custom header — require X-OpenCLI header (browsers can't send it
|
|
11
11
|
* without CORS preflight, which we deny)
|
|
12
|
-
* 3. No CORS headers —
|
|
12
|
+
* 3. No CORS headers on command endpoints — only /ping is readable from the
|
|
13
|
+
* Browser Bridge extension origin so the extension can probe daemon reachability
|
|
13
14
|
* 4. Body size limit — 1 MB max to prevent OOM
|
|
14
15
|
* 5. WebSocket verifyClient — reject upgrade before connection is established
|
|
15
16
|
*
|
|
@@ -18,4 +19,4 @@
|
|
|
18
19
|
* - Persistent — stays alive until explicit shutdown, SIGTERM, or uninstall
|
|
19
20
|
* - Listens on localhost:19825
|
|
20
21
|
*/
|
|
21
|
-
export
|
|
22
|
+
export declare function getResponseCorsHeaders(pathname: string, origin?: string): Record<string, string> | undefined;
|
package/dist/src/daemon.js
CHANGED
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
* 1. Origin check — reject HTTP/WS from non chrome-extension:// origins
|
|
10
10
|
* 2. Custom header — require X-OpenCLI header (browsers can't send it
|
|
11
11
|
* without CORS preflight, which we deny)
|
|
12
|
-
* 3. No CORS headers —
|
|
12
|
+
* 3. No CORS headers on command endpoints — only /ping is readable from the
|
|
13
|
+
* Browser Bridge extension origin so the extension can probe daemon reachability
|
|
13
14
|
* 4. Body size limit — 1 MB max to prevent OOM
|
|
14
15
|
* 5. WebSocket verifyClient — reject upgrade before connection is established
|
|
15
16
|
*
|
|
@@ -60,10 +61,20 @@ function readBody(req) {
|
|
|
60
61
|
reject(err); });
|
|
61
62
|
});
|
|
62
63
|
}
|
|
63
|
-
function jsonResponse(res, status, data) {
|
|
64
|
-
res.writeHead(status, { 'Content-Type': 'application/json' });
|
|
64
|
+
function jsonResponse(res, status, data, extraHeaders) {
|
|
65
|
+
res.writeHead(status, { 'Content-Type': 'application/json', ...extraHeaders });
|
|
65
66
|
res.end(JSON.stringify(data));
|
|
66
67
|
}
|
|
68
|
+
export function getResponseCorsHeaders(pathname, origin) {
|
|
69
|
+
if (pathname !== '/ping')
|
|
70
|
+
return undefined;
|
|
71
|
+
if (!origin || !origin.startsWith('chrome-extension://'))
|
|
72
|
+
return undefined;
|
|
73
|
+
return {
|
|
74
|
+
'Access-Control-Allow-Origin': origin,
|
|
75
|
+
Vary: 'Origin',
|
|
76
|
+
};
|
|
77
|
+
}
|
|
67
78
|
async function handleRequest(req, res) {
|
|
68
79
|
// ─── Security: Origin & custom-header check ──────────────────────
|
|
69
80
|
// Block browser-based CSRF: browsers always send an Origin header on
|
|
@@ -93,7 +104,7 @@ async function handleRequest(req, res) {
|
|
|
93
104
|
// Timing side-channels can reveal daemon presence to local processes, which
|
|
94
105
|
// is an accepted risk given the daemon is loopback-only and short-lived.
|
|
95
106
|
if (req.method === 'GET' && pathname === '/ping') {
|
|
96
|
-
jsonResponse(res, 200, { ok: true });
|
|
107
|
+
jsonResponse(res, 200, { ok: true }, getResponseCorsHeaders(pathname, origin));
|
|
97
108
|
return;
|
|
98
109
|
}
|
|
99
110
|
// Require custom header on all other HTTP requests. Browsers cannot attach
|
|
@@ -272,6 +283,7 @@ wss.on('connection', (ws) => {
|
|
|
272
283
|
if (extensionWs === ws) {
|
|
273
284
|
extensionWs = null;
|
|
274
285
|
extensionVersion = null;
|
|
286
|
+
extensionCompatRange = null;
|
|
275
287
|
// Reject pending requests in case 'close' does not follow this 'error'
|
|
276
288
|
for (const [, p] of pending) {
|
|
277
289
|
clearTimeout(p.timer);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { getResponseCorsHeaders } from './daemon.js';
|
|
3
|
+
describe('getResponseCorsHeaders', () => {
|
|
4
|
+
it('allows the Browser Bridge extension origin to read /ping', () => {
|
|
5
|
+
expect(getResponseCorsHeaders('/ping', 'chrome-extension://abc123')).toEqual({
|
|
6
|
+
'Access-Control-Allow-Origin': 'chrome-extension://abc123',
|
|
7
|
+
Vary: 'Origin',
|
|
8
|
+
});
|
|
9
|
+
});
|
|
10
|
+
it('does not add CORS headers for ordinary web origins', () => {
|
|
11
|
+
expect(getResponseCorsHeaders('/ping', 'https://example.com')).toBeUndefined();
|
|
12
|
+
});
|
|
13
|
+
it('does not add CORS headers when origin is absent', () => {
|
|
14
|
+
expect(getResponseCorsHeaders('/ping')).toBeUndefined();
|
|
15
|
+
});
|
|
16
|
+
it('does not add CORS headers for command endpoints even from the extension origin', () => {
|
|
17
|
+
expect(getResponseCorsHeaders('/command', 'chrome-extension://abc123')).toBeUndefined();
|
|
18
|
+
});
|
|
19
|
+
});
|
|
@@ -37,6 +37,18 @@ export interface ArticleDownloadOptions {
|
|
|
37
37
|
detectImageExt?: (url: string) => string;
|
|
38
38
|
/** Custom frontmatter labels (default: Chinese labels) */
|
|
39
39
|
frontmatterLabels?: FrontmatterLabels;
|
|
40
|
+
/**
|
|
41
|
+
* Extra CSS selectors removed from the article before Turndown conversion.
|
|
42
|
+
* Use this to drop site-specific noise the adapter can't always trim upstream
|
|
43
|
+
* (e.g. zhihu 折叠卡, weixin 赞赏栏, wiki infobox).
|
|
44
|
+
*/
|
|
45
|
+
cleanSelectors?: string[];
|
|
46
|
+
/**
|
|
47
|
+
* Write the markdown to `process.stdout` instead of a file on disk. Image
|
|
48
|
+
* download and directory creation are skipped — remote image URLs are kept
|
|
49
|
+
* as-is so the output is self-contained when piped.
|
|
50
|
+
*/
|
|
51
|
+
stdout?: boolean;
|
|
40
52
|
}
|
|
41
53
|
export interface ArticleDownloadResult {
|
|
42
54
|
title: string;
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import * as fs from 'node:fs';
|
|
9
9
|
import * as path from 'node:path';
|
|
10
10
|
import TurndownService from 'turndown';
|
|
11
|
+
import { gfm } from 'turndown-plugin-gfm';
|
|
11
12
|
import { httpDownload, sanitizeFilename } from './index.js';
|
|
12
13
|
import { formatBytes } from './progress.js';
|
|
13
14
|
const IMAGE_CONCURRENCY = 5;
|
|
@@ -19,22 +20,127 @@ const DEFAULT_LABELS = {
|
|
|
19
20
|
// ============================================================
|
|
20
21
|
// Markdown Conversion
|
|
21
22
|
// ============================================================
|
|
22
|
-
|
|
23
|
+
// Nodes that never carry article content. Turndown keeps them by default — if an
|
|
24
|
+
// adapter's contentHtml extraction misses one, CSS / scripts / widget markup
|
|
25
|
+
// ends up inline in the .md. Strip them unconditionally at the converter level.
|
|
26
|
+
// `svg` is not in HTMLElementTagNameMap, so we type-narrow manually.
|
|
27
|
+
// `header/footer/nav/aside` cover page chrome that adapters occasionally
|
|
28
|
+
// forget to trim — the article's own title/author/publishTime are supplied
|
|
29
|
+
// as separate fields on ArticleData, so duplicated nodes are redundant.
|
|
30
|
+
// `iframe` is NOT in this set — it's handled by a dedicated rule below that
|
|
31
|
+
// degrades to a link so embedded content (YouTube, Twitter, CodePen …) keeps
|
|
32
|
+
// a reachable URL in the exported markdown.
|
|
33
|
+
const STRIPPED_TAGS = [
|
|
34
|
+
'script', 'style', 'noscript',
|
|
35
|
+
'canvas',
|
|
36
|
+
'form', 'button', 'dialog',
|
|
37
|
+
'header', 'footer', 'nav', 'aside',
|
|
38
|
+
];
|
|
39
|
+
function createTurndown(configure, cleanSelectors) {
|
|
23
40
|
const td = new TurndownService({
|
|
24
41
|
headingStyle: 'atx',
|
|
25
42
|
codeBlockStyle: 'fenced',
|
|
26
43
|
bulletListMarker: '-',
|
|
27
44
|
});
|
|
45
|
+
td.use(gfm);
|
|
46
|
+
td.remove(STRIPPED_TAGS);
|
|
47
|
+
// turndown-plugin-gfm@1.0.2 emits single-tilde strikethrough (`~x~`), which
|
|
48
|
+
// is not the canonical GFM form. Override it so exported markdown is
|
|
49
|
+
// portable across common renderers.
|
|
50
|
+
td.addRule('canonicalStrikethrough', {
|
|
51
|
+
filter: (node) => ['DEL', 'S', 'STRIKE'].includes(node.nodeName),
|
|
52
|
+
replacement: (content) => `~~${content}~~`,
|
|
53
|
+
});
|
|
54
|
+
// SVG isn't in the static HTML tag map; match by name with a custom filter.
|
|
55
|
+
td.addRule('stripSvg', {
|
|
56
|
+
filter: (node) => node.nodeName === 'svg' || node.nodeName === 'SVG',
|
|
57
|
+
replacement: () => '',
|
|
58
|
+
});
|
|
28
59
|
td.addRule('linebreak', {
|
|
29
60
|
filter: 'br',
|
|
30
61
|
replacement: () => '\n',
|
|
31
62
|
});
|
|
63
|
+
// Inline base64 images would land as huge ``
|
|
64
|
+
// strings that the image downloader can't localize. Drop them.
|
|
65
|
+
td.addRule('ignoreBase64Images', {
|
|
66
|
+
filter: (node) => {
|
|
67
|
+
if (node.nodeName !== 'IMG')
|
|
68
|
+
return false;
|
|
69
|
+
const src = node.getAttribute?.('src') ?? '';
|
|
70
|
+
return src.startsWith('data:');
|
|
71
|
+
},
|
|
72
|
+
replacement: () => '',
|
|
73
|
+
});
|
|
74
|
+
// Markdown has no native video/audio primitive. Emit inline HTML so
|
|
75
|
+
// renderers that support it (GitHub, VS Code preview …) still play the
|
|
76
|
+
// media; viewers that don't simply show the tag as text, which is still
|
|
77
|
+
// more information than dropping the node outright.
|
|
78
|
+
td.addRule('videoElement', {
|
|
79
|
+
filter: (node) => node.nodeName === 'VIDEO',
|
|
80
|
+
replacement: (_content, node) => {
|
|
81
|
+
const el = node;
|
|
82
|
+
const src = el.getAttribute('src')
|
|
83
|
+
|| el.querySelector('source')?.getAttribute('src')
|
|
84
|
+
|| '';
|
|
85
|
+
if (!src)
|
|
86
|
+
return '';
|
|
87
|
+
const poster = el.getAttribute('poster') || '';
|
|
88
|
+
return `\n<video src="${src}" controls${poster ? ` poster="${poster}"` : ''}></video>\n`;
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
td.addRule('audioElement', {
|
|
92
|
+
filter: (node) => node.nodeName === 'AUDIO',
|
|
93
|
+
replacement: (_content, node) => {
|
|
94
|
+
const el = node;
|
|
95
|
+
const src = el.getAttribute('src')
|
|
96
|
+
|| el.querySelector('source')?.getAttribute('src')
|
|
97
|
+
|| '';
|
|
98
|
+
return src ? `\n<audio src="${src}" controls></audio>\n` : '';
|
|
99
|
+
},
|
|
100
|
+
});
|
|
101
|
+
// Iframes (YouTube, Twitter, CodePen …) degrade to a markdown link so the
|
|
102
|
+
// embedded resource is still reachable from the exported file.
|
|
103
|
+
td.addRule('iframeToLink', {
|
|
104
|
+
filter: (node) => node.nodeName === 'IFRAME',
|
|
105
|
+
replacement: (_content, node) => {
|
|
106
|
+
const el = node;
|
|
107
|
+
const src = el.getAttribute('src') || '';
|
|
108
|
+
if (!src)
|
|
109
|
+
return '';
|
|
110
|
+
const title = el.getAttribute('title') || 'Embedded content';
|
|
111
|
+
return `\n[${title}](${src})\n`;
|
|
112
|
+
},
|
|
113
|
+
});
|
|
114
|
+
// Per-adapter dirty-node removal. Adapters know their site's specific noise
|
|
115
|
+
// (zhihu 折叠卡, weixin 赞赏栏, wiki 折叠 infobox …); we keep the default set
|
|
116
|
+
// empty so the generic converter stays untouched.
|
|
117
|
+
const selectorRules = (cleanSelectors ?? [])
|
|
118
|
+
.map(sel => sel.trim())
|
|
119
|
+
.filter(Boolean);
|
|
120
|
+
if (selectorRules.length > 0) {
|
|
121
|
+
td.addRule('cleanSelectors', {
|
|
122
|
+
filter: (node) => {
|
|
123
|
+
const match = node.matches;
|
|
124
|
+
if (typeof match !== 'function')
|
|
125
|
+
return false;
|
|
126
|
+
return selectorRules.some((sel) => {
|
|
127
|
+
try {
|
|
128
|
+
return match.call(node, sel);
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
},
|
|
135
|
+
replacement: () => '',
|
|
136
|
+
});
|
|
137
|
+
}
|
|
32
138
|
if (configure)
|
|
33
139
|
configure(td);
|
|
34
140
|
return td;
|
|
35
141
|
}
|
|
36
|
-
function convertToMarkdown(contentHtml, codeBlocks, configure) {
|
|
37
|
-
const td = createTurndown(configure);
|
|
142
|
+
function convertToMarkdown(contentHtml, codeBlocks, configure, cleanSelectors) {
|
|
143
|
+
const td = createTurndown(configure, cleanSelectors);
|
|
38
144
|
let md = td.turndown(contentHtml);
|
|
39
145
|
// Restore code block placeholders
|
|
40
146
|
codeBlocks.forEach((block, i) => {
|
|
@@ -44,8 +150,12 @@ function convertToMarkdown(contentHtml, codeBlocks, configure) {
|
|
|
44
150
|
});
|
|
45
151
|
// Clean up
|
|
46
152
|
md = md.replace(/\u00a0/g, ' ');
|
|
47
|
-
|
|
153
|
+
// Turndown leaves behind lone dashes / middle dots when list bullets or
|
|
154
|
+
// decorative separators lose their surrounding inline context.
|
|
155
|
+
md = md.replace(/^[ \t]*[-·][ \t]*$/gm, '');
|
|
156
|
+
md = md.replace(/^[ \t]+$/gm, '');
|
|
48
157
|
md = md.replace(/[ \t]+$/gm, '');
|
|
158
|
+
md = md.replace(/\n{3,}/g, '\n\n');
|
|
49
159
|
return md;
|
|
50
160
|
}
|
|
51
161
|
function replaceImageUrls(md, urlMap) {
|
|
@@ -120,7 +230,7 @@ async function downloadImages(imgUrls, imgDir, headers, detectExt) {
|
|
|
120
230
|
* 6. File write
|
|
121
231
|
*/
|
|
122
232
|
export async function downloadArticle(data, options) {
|
|
123
|
-
const { output, downloadImages: shouldDownloadImages = true, imageHeaders, maxTitleLength = 80, configureTurndown, detectImageExt, frontmatterLabels, } = options;
|
|
233
|
+
const { output, downloadImages: shouldDownloadImages = true, imageHeaders, maxTitleLength = 80, configureTurndown, detectImageExt, frontmatterLabels, cleanSelectors, stdout = false, } = options;
|
|
124
234
|
const labels = { ...DEFAULT_LABELS, ...frontmatterLabels };
|
|
125
235
|
if (!data.title) {
|
|
126
236
|
return [{
|
|
@@ -143,33 +253,47 @@ export async function downloadArticle(data, options) {
|
|
|
143
253
|
}];
|
|
144
254
|
}
|
|
145
255
|
// Convert HTML to Markdown
|
|
146
|
-
let markdown = convertToMarkdown(data.contentHtml, data.codeBlocks || [], configureTurndown);
|
|
147
|
-
// Prepare output directory
|
|
256
|
+
let markdown = convertToMarkdown(data.contentHtml, data.codeBlocks || [], configureTurndown, cleanSelectors);
|
|
148
257
|
const safeTitle = sanitizeFilename(data.title, maxTitleLength);
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
258
|
+
// Download images only when writing to disk. In stdout mode remote URLs
|
|
259
|
+
// stay intact so the piped output is self-contained.
|
|
260
|
+
if (!stdout && shouldDownloadImages && data.imageUrls && data.imageUrls.length > 0) {
|
|
261
|
+
const articleDir = path.join(output, safeTitle);
|
|
262
|
+
fs.mkdirSync(articleDir, { recursive: true });
|
|
153
263
|
const imagesDir = path.join(articleDir, 'images');
|
|
154
264
|
fs.mkdirSync(imagesDir, { recursive: true });
|
|
155
265
|
const urlMap = await downloadImages(data.imageUrls, imagesDir, imageHeaders, detectImageExt);
|
|
156
266
|
markdown = replaceImageUrls(markdown, urlMap);
|
|
157
267
|
}
|
|
158
|
-
// Build frontmatter with customizable labels
|
|
159
|
-
|
|
268
|
+
// Build frontmatter with customizable labels.
|
|
269
|
+
// Shape: `# Title\n[> meta\n...]\n---\n\n<markdown>` — exactly one blank
|
|
270
|
+
// line separates every section, so we never produce ≥3 consecutive newlines.
|
|
271
|
+
const headerLines = [`# ${data.title}`];
|
|
160
272
|
if (data.author)
|
|
161
273
|
headerLines.push(`> ${labels.author}: ${data.author}`);
|
|
162
274
|
if (data.publishTime)
|
|
163
275
|
headerLines.push(`> ${labels.publishTime}: ${data.publishTime}`);
|
|
164
276
|
if (data.sourceUrl)
|
|
165
277
|
headerLines.push(`> ${labels.sourceUrl}: ${data.sourceUrl}`);
|
|
166
|
-
headerLines.
|
|
167
|
-
const fullContent =
|
|
168
|
-
|
|
278
|
+
const frontmatter = headerLines.join('\n') + '\n\n---\n\n';
|
|
279
|
+
const fullContent = frontmatter + markdown;
|
|
280
|
+
const size = Buffer.byteLength(fullContent, 'utf-8');
|
|
281
|
+
if (stdout) {
|
|
282
|
+
process.stdout.write(fullContent.endsWith('\n') ? fullContent : fullContent + '\n');
|
|
283
|
+
return [{
|
|
284
|
+
title: data.title,
|
|
285
|
+
author: data.author || '-',
|
|
286
|
+
publish_time: data.publishTime || '-',
|
|
287
|
+
status: 'success',
|
|
288
|
+
size: formatBytes(size),
|
|
289
|
+
saved: '-',
|
|
290
|
+
}];
|
|
291
|
+
}
|
|
292
|
+
const articleDir = path.join(output, safeTitle);
|
|
293
|
+
fs.mkdirSync(articleDir, { recursive: true });
|
|
169
294
|
const filename = `${safeTitle}.md`;
|
|
170
295
|
const filePath = path.join(articleDir, filename);
|
|
171
296
|
fs.writeFileSync(filePath, fullContent, 'utf-8');
|
|
172
|
-
const size = Buffer.byteLength(fullContent, 'utf-8');
|
|
173
297
|
return [{
|
|
174
298
|
title: data.title,
|
|
175
299
|
author: data.author || '-',
|