@jackwener/opencli 1.7.6 → 1.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -8
- package/README.zh-CN.md +14 -8
- package/cli-manifest.json +469 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/amazon/discussion.js +37 -6
- package/clis/amazon/discussion.test.js +147 -32
- package/clis/bilibili/video.js +11 -4
- package/clis/bilibili/video.test.js +51 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/chatgpt-app/ask.js +3 -19
- package/clis/chatgpt-app/ax.js +132 -1
- package/clis/chatgpt-app/ax.test.js +23 -0
- package/clis/chatgpt-app/send.js +2 -21
- package/clis/deepseek/ask.js +50 -18
- package/clis/deepseek/ask.test.js +195 -2
- package/clis/deepseek/utils.js +113 -29
- package/clis/deepseek/utils.test.js +109 -1
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/powerchina/search.js +250 -0
- package/clis/powerchina/search.test.js +67 -0
- package/clis/sinafinance/stock.js +5 -2
- package/clis/sinafinance/stock.test.js +59 -0
- package/clis/toutiao/articles.js +81 -0
- package/clis/toutiao/articles.test.js +23 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +28 -0
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weixin/create-draft.js +225 -0
- package/clis/weixin/drafts.js +65 -0
- package/clis/weixin/drafts.test.js +65 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/cdp.js +11 -2
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +333 -43
- package/dist/src/cli.test.js +257 -1
- package/dist/src/commanderAdapter.js +12 -0
- package/dist/src/commanderAdapter.test.js +11 -0
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/package.json +5 -1
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Article extraction via Readability — generic `page → article HTML` pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Complements `src/browser/extract.ts`: that one takes a caller-supplied
|
|
5
|
+
* selector. This one works with zero configuration on arbitrary article pages
|
|
6
|
+
* (blogs, news, docs) by running `@mozilla/readability` inside the page
|
|
7
|
+
* context via CDP evaluate.
|
|
8
|
+
*
|
|
9
|
+
* Pipeline:
|
|
10
|
+
* 1. Short-circuit non-HTML documents (`text/plain`, JSON, XML) — a page
|
|
11
|
+
* renderer wrapping a plain-text file would pollute the DOM pipeline.
|
|
12
|
+
* 2. Short-circuit the "body is a single <pre>" case, which browsers use
|
|
13
|
+
* when loading *.txt / *.md over file:// or raw.githubusercontent.com.
|
|
14
|
+
* 3. Deep-clone the document, apply caller-supplied `cleanSelectors` to the
|
|
15
|
+
* clone (preserves live page state for subsequent snapshot/click).
|
|
16
|
+
* 4. Inject Readability + isProbablyReaderable sources into the page,
|
|
17
|
+
* parse on the clone. `isProbablyReaderable` gates the parse unless
|
|
18
|
+
* `force: true`.
|
|
19
|
+
* 5. On Readability miss, walk a fallback selector chain
|
|
20
|
+
* (main → [role="main"] → #main-content → … → body) and return the
|
|
21
|
+
* first root with >80 characters of text.
|
|
22
|
+
*
|
|
23
|
+
* Readability runs in the page's own window because it needs real DOM APIs
|
|
24
|
+
* (getComputedStyle, treeWalker). Running it Node-side would require jsdom —
|
|
25
|
+
* a heavy dep the rest of OpenCLI doesn't need.
|
|
26
|
+
*/
|
|
27
|
+
import * as fs from 'node:fs';
|
|
28
|
+
import { createRequire } from 'node:module';
|
|
29
|
+
const requireFromHere = createRequire(import.meta.url);
|
|
30
|
+
let cachedSources = null;
|
|
31
|
+
function readabilitySources() {
|
|
32
|
+
if (cachedSources)
|
|
33
|
+
return cachedSources;
|
|
34
|
+
const readabilityPath = requireFromHere.resolve('@mozilla/readability/Readability.js');
|
|
35
|
+
const readerablePath = requireFromHere.resolve('@mozilla/readability/Readability-readerable.js');
|
|
36
|
+
cachedSources = {
|
|
37
|
+
readability: fs.readFileSync(readabilityPath, 'utf8'),
|
|
38
|
+
readerable: fs.readFileSync(readerablePath, 'utf8'),
|
|
39
|
+
};
|
|
40
|
+
return cachedSources;
|
|
41
|
+
}
|
|
42
|
+
export const DEFAULT_FALLBACK_SELECTORS = [
|
|
43
|
+
'main',
|
|
44
|
+
'[role="main"]',
|
|
45
|
+
'#main-content',
|
|
46
|
+
'#main',
|
|
47
|
+
'#content',
|
|
48
|
+
'.content',
|
|
49
|
+
'article',
|
|
50
|
+
'body',
|
|
51
|
+
];
|
|
52
|
+
const MIN_FALLBACK_TEXT_LENGTH = 80;
|
|
53
|
+
/**
|
|
54
|
+
* Build the JS expression evaluated in-page to extract the article. Exported
|
|
55
|
+
* for testability — callers on the host side should use `extractArticle`.
|
|
56
|
+
*/
|
|
57
|
+
export function buildExtractArticleJs(options = {}) {
|
|
58
|
+
const { readability, readerable } = readabilitySources();
|
|
59
|
+
const cleanSelectors = options.cleanSelectors ?? [];
|
|
60
|
+
const fallbackSelectors = options.fallbackSelectors ?? DEFAULT_FALLBACK_SELECTORS;
|
|
61
|
+
const force = !!options.force;
|
|
62
|
+
// Library sources contain backticks and ${...} fragments, so we embed them
|
|
63
|
+
// as JSON-encoded string literals and eval them inside a Function() scope.
|
|
64
|
+
// This isolates their var declarations from the outer IIFE without polluting
|
|
65
|
+
// window globals.
|
|
66
|
+
const readabilityLit = JSON.stringify(readability);
|
|
67
|
+
const readerableLit = JSON.stringify(readerable);
|
|
68
|
+
const cleanLit = JSON.stringify(cleanSelectors);
|
|
69
|
+
const fallbackLit = JSON.stringify(fallbackSelectors);
|
|
70
|
+
const forceLit = JSON.stringify(force);
|
|
71
|
+
return [
|
|
72
|
+
'(() => {',
|
|
73
|
+
' const cleanSelectors = ' + cleanLit + ';',
|
|
74
|
+
' const fallbackSelectors = ' + fallbackLit + ';',
|
|
75
|
+
' const force = ' + forceLit + ';',
|
|
76
|
+
' const minFallbackText = ' + MIN_FALLBACK_TEXT_LENGTH + ';',
|
|
77
|
+
' const readabilitySrc = ' + readabilityLit + ';',
|
|
78
|
+
' const readerableSrc = ' + readerableLit + ';',
|
|
79
|
+
'',
|
|
80
|
+
' function escapeHtml(s) {',
|
|
81
|
+
' return String(s).replace(/[&<>]/g, c => ({ "&": "&", "<": "<", ">": ">" }[c]));',
|
|
82
|
+
' }',
|
|
83
|
+
'',
|
|
84
|
+
' // Short-circuit 1: non-HTML document',
|
|
85
|
+
' const ct = document.contentType || "";',
|
|
86
|
+
' if (ct && ct !== "text/html" && ct !== "application/xhtml+xml") {',
|
|
87
|
+
' const body = document.body ? (document.body.textContent || "") : "";',
|
|
88
|
+
' return { source: "raw-text", html: "<pre>" + escapeHtml(body) + "</pre>", title: document.title || "" };',
|
|
89
|
+
' }',
|
|
90
|
+
'',
|
|
91
|
+
' // Short-circuit 2: body is a single <pre>',
|
|
92
|
+
' if (document.body) {',
|
|
93
|
+
' const kids = document.body.children;',
|
|
94
|
+
' if (kids.length === 1 && kids[0] && kids[0].tagName === "PRE") {',
|
|
95
|
+
' return { source: "pre", html: document.body.outerHTML, title: document.title || "" };',
|
|
96
|
+
' }',
|
|
97
|
+
' }',
|
|
98
|
+
'',
|
|
99
|
+
' // Deep-clone + adapter-supplied dirty-node removal',
|
|
100
|
+
' const cloneDoc = document.cloneNode(true);',
|
|
101
|
+
' for (const sel of cleanSelectors) {',
|
|
102
|
+
' try { for (const n of cloneDoc.querySelectorAll(sel)) n.remove(); }',
|
|
103
|
+
' catch (e) { /* ignore invalid selector */ }',
|
|
104
|
+
' }',
|
|
105
|
+
'',
|
|
106
|
+
' // Inject Readability into an isolated Function scope and extract the',
|
|
107
|
+
' // constructors we need. Library sources use their own module.exports',
|
|
108
|
+
' // guard (if typeof module === "object"), which is falsy here.',
|
|
109
|
+
' const libs = (new Function(',
|
|
110
|
+
' readabilitySrc + "\\n" + readerableSrc + "\\nreturn {" +',
|
|
111
|
+
' " Readability: typeof Readability !== \\"undefined\\" ? Readability : null," +',
|
|
112
|
+
' " isProbablyReaderable: typeof isProbablyReaderable !== \\"undefined\\" ? isProbablyReaderable : null" +',
|
|
113
|
+
' " };"',
|
|
114
|
+
' ))();',
|
|
115
|
+
' const Readability = libs.Readability;',
|
|
116
|
+
' const isProbablyReaderable = libs.isProbablyReaderable;',
|
|
117
|
+
'',
|
|
118
|
+
' const readerableOk = force || (typeof isProbablyReaderable === "function" ? isProbablyReaderable(cloneDoc) : true);',
|
|
119
|
+
' let article = null;',
|
|
120
|
+
' if (readerableOk && typeof Readability === "function") {',
|
|
121
|
+
' try { article = new Readability(cloneDoc).parse(); } catch (e) { article = null; }',
|
|
122
|
+
' }',
|
|
123
|
+
' if (article && article.content) {',
|
|
124
|
+
' return {',
|
|
125
|
+
' source: "readability",',
|
|
126
|
+
' html: article.content,',
|
|
127
|
+
' title: article.title || document.title || "",',
|
|
128
|
+
' byline: article.byline || undefined,',
|
|
129
|
+
' publishedTime: article.publishedTime || undefined,',
|
|
130
|
+
' siteName: article.siteName || undefined,',
|
|
131
|
+
' };',
|
|
132
|
+
' }',
|
|
133
|
+
'',
|
|
134
|
+
' // Fallback chain',
|
|
135
|
+
' for (const sel of fallbackSelectors) {',
|
|
136
|
+
' let el = null;',
|
|
137
|
+
' try { el = cloneDoc.querySelector(sel); } catch (e) { continue; }',
|
|
138
|
+
' if (!el) continue;',
|
|
139
|
+
' const text = (el.textContent || "").trim();',
|
|
140
|
+
' if (text.length < minFallbackText) continue;',
|
|
141
|
+
' return { source: "fallback", html: el.outerHTML, title: document.title || "" };',
|
|
142
|
+
' }',
|
|
143
|
+
'',
|
|
144
|
+
' return null;',
|
|
145
|
+
'})()',
|
|
146
|
+
].join('\n');
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Run the extract pipeline on the given page. Returns `null` when no usable
|
|
150
|
+
* content is found (Readability miss + empty fallback chain).
|
|
151
|
+
*/
|
|
152
|
+
export async function extractArticle(page, options = {}) {
|
|
153
|
+
const js = buildExtractArticleJs(options);
|
|
154
|
+
const raw = await page.evaluate(js);
|
|
155
|
+
if (raw == null || typeof raw !== 'object')
|
|
156
|
+
return null;
|
|
157
|
+
const r = raw;
|
|
158
|
+
if (typeof r.html !== 'string' || typeof r.source !== 'string')
|
|
159
|
+
return null;
|
|
160
|
+
const source = r.source;
|
|
161
|
+
return {
|
|
162
|
+
html: r.html,
|
|
163
|
+
title: typeof r.title === 'string' ? r.title : '',
|
|
164
|
+
...(r.byline && { byline: r.byline }),
|
|
165
|
+
...(r.publishedTime && { publishedTime: r.publishedTime }),
|
|
166
|
+
...(r.siteName && { siteName: r.siteName }),
|
|
167
|
+
source,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { buildExtractArticleJs, extractArticle, DEFAULT_FALLBACK_SELECTORS, } from './article-extract.js';
|
|
3
|
+
function fakePage(response) {
|
|
4
|
+
const state = { lastJs: null };
|
|
5
|
+
return {
|
|
6
|
+
lastJs: null,
|
|
7
|
+
async evaluate(js) {
|
|
8
|
+
state.lastJs = js;
|
|
9
|
+
Object.assign(this, state);
|
|
10
|
+
return response;
|
|
11
|
+
},
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
describe('buildExtractArticleJs', () => {
|
|
15
|
+
it('embeds Readability + Readerable sources once per evaluation', () => {
|
|
16
|
+
const js = buildExtractArticleJs();
|
|
17
|
+
// Both libs should be inlined (matched by identifying strings from the
|
|
18
|
+
// upstream @mozilla/readability sources).
|
|
19
|
+
expect(js).toContain('function Readability(doc, options)');
|
|
20
|
+
expect(js).toContain('function isProbablyReaderable');
|
|
21
|
+
});
|
|
22
|
+
it('serializes caller-supplied options into the evaluated JS', () => {
|
|
23
|
+
const js = buildExtractArticleJs({
|
|
24
|
+
cleanSelectors: ['.ads', '#banner'],
|
|
25
|
+
fallbackSelectors: ['article', 'body'],
|
|
26
|
+
force: true,
|
|
27
|
+
});
|
|
28
|
+
expect(js).toContain('[".ads","#banner"]');
|
|
29
|
+
expect(js).toContain('["article","body"]');
|
|
30
|
+
expect(js).toContain('const force = true;');
|
|
31
|
+
});
|
|
32
|
+
it('uses the default fallback chain when none is supplied', () => {
|
|
33
|
+
const js = buildExtractArticleJs();
|
|
34
|
+
for (const sel of DEFAULT_FALLBACK_SELECTORS) {
|
|
35
|
+
expect(js).toContain(JSON.stringify(sel));
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
it('runs fallback selection against the cleaned clone', () => {
|
|
39
|
+
const js = buildExtractArticleJs({ cleanSelectors: ['.noise'] });
|
|
40
|
+
expect(js).toContain('el = cloneDoc.querySelector(sel);');
|
|
41
|
+
expect(js).not.toContain('el = document.querySelector(sel);');
|
|
42
|
+
});
|
|
43
|
+
it('produces syntactically valid JavaScript', () => {
|
|
44
|
+
// Parsing via the Function constructor rejects any syntax error in the
|
|
45
|
+
// generated code — including accidental template-literal break-outs from
|
|
46
|
+
// the embedded Readability sources.
|
|
47
|
+
expect(() => new Function(buildExtractArticleJs())).not.toThrow();
|
|
48
|
+
expect(() => new Function(buildExtractArticleJs({ force: true }))).not.toThrow();
|
|
49
|
+
expect(() => new Function(buildExtractArticleJs({
|
|
50
|
+
cleanSelectors: ['.a', '.b'],
|
|
51
|
+
fallbackSelectors: ['main', 'body'],
|
|
52
|
+
}))).not.toThrow();
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
describe('extractArticle (host-side)', () => {
|
|
56
|
+
it('returns a normalized ExtractedArticle when the page responds with one', async () => {
|
|
57
|
+
const page = fakePage({
|
|
58
|
+
source: 'readability',
|
|
59
|
+
html: '<p>hello</p>',
|
|
60
|
+
title: 'Hello',
|
|
61
|
+
byline: 'Alice',
|
|
62
|
+
publishedTime: '2026-04-22',
|
|
63
|
+
siteName: 'Example',
|
|
64
|
+
});
|
|
65
|
+
const res = await extractArticle(page);
|
|
66
|
+
expect(res).toEqual({
|
|
67
|
+
source: 'readability',
|
|
68
|
+
html: '<p>hello</p>',
|
|
69
|
+
title: 'Hello',
|
|
70
|
+
byline: 'Alice',
|
|
71
|
+
publishedTime: '2026-04-22',
|
|
72
|
+
siteName: 'Example',
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
it('drops undefined optional fields cleanly', async () => {
|
|
76
|
+
const page = fakePage({ source: 'fallback', html: '<main>x</main>', title: 't' });
|
|
77
|
+
const res = await extractArticle(page);
|
|
78
|
+
expect(res).toEqual({ source: 'fallback', html: '<main>x</main>', title: 't' });
|
|
79
|
+
expect(res).not.toHaveProperty('byline');
|
|
80
|
+
expect(res).not.toHaveProperty('publishedTime');
|
|
81
|
+
});
|
|
82
|
+
it('returns null on a missing body or malformed payload', async () => {
|
|
83
|
+
expect(await extractArticle(fakePage(null))).toBeNull();
|
|
84
|
+
expect(await extractArticle(fakePage('oops'))).toBeNull();
|
|
85
|
+
expect(await extractArticle(fakePage({ source: 'readability' }))).toBeNull();
|
|
86
|
+
expect(await extractArticle(fakePage({ html: '<p>x</p>' }))).toBeNull();
|
|
87
|
+
});
|
|
88
|
+
it('defaults title to empty string when the page omits it', async () => {
|
|
89
|
+
const page = fakePage({ source: 'pre', html: '<body><pre>x</pre></body>' });
|
|
90
|
+
const res = await extractArticle(page);
|
|
91
|
+
expect(res?.title).toBe('');
|
|
92
|
+
expect(res?.source).toBe('pre');
|
|
93
|
+
});
|
|
94
|
+
});
|
package/dist/src/browser/cdp.js
CHANGED
|
@@ -91,7 +91,12 @@ export class CDPBridge {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
|
-
catch {
|
|
94
|
+
catch (err) {
|
|
95
|
+
if (process.env.OPENCLI_VERBOSE) {
|
|
96
|
+
// eslint-disable-next-line no-console
|
|
97
|
+
console.error('[cdp] Failed to parse WebSocket message:', err instanceof Error ? err.message : err);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
95
100
|
});
|
|
96
101
|
});
|
|
97
102
|
}
|
|
@@ -253,8 +258,12 @@ class CDPPage extends BasePage {
|
|
|
253
258
|
this._networkEntries[idx].responseBodyFullSize = fullSize;
|
|
254
259
|
this._networkEntries[idx].responseBodyTruncated = truncated;
|
|
255
260
|
}
|
|
256
|
-
}).catch(() => {
|
|
261
|
+
}).catch((err) => {
|
|
257
262
|
// Body unavailable for some requests (e.g. uploads) — non-fatal
|
|
263
|
+
if (process.env.OPENCLI_VERBOSE) {
|
|
264
|
+
// eslint-disable-next-line no-console
|
|
265
|
+
console.error(`[cdp] getResponseBody failed for ${p.requestId}:`, err instanceof Error ? err.message : err);
|
|
266
|
+
}
|
|
258
267
|
}).finally(() => {
|
|
259
268
|
this._pendingBodyFetches.delete(bodyFetch);
|
|
260
269
|
});
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
export type FixtureExpect = {
|
|
2
|
+
rowCount?: {
|
|
3
|
+
min?: number;
|
|
4
|
+
max?: number;
|
|
5
|
+
};
|
|
6
|
+
columns?: string[];
|
|
7
|
+
types?: Record<string, string>;
|
|
8
|
+
patterns?: Record<string, string>;
|
|
9
|
+
notEmpty?: string[];
|
|
10
|
+
/**
|
|
11
|
+
* Substrings/regex fragments that MUST NOT appear in the column value.
|
|
12
|
+
*
|
|
13
|
+
* Catches silent content contamination that `notEmpty` alone misses —
|
|
14
|
+
* e.g. a `description` field that accidentally carries "address: ..." /
|
|
15
|
+
* "category: ..." fragments from sibling DOM nodes, or a `title` that
|
|
16
|
+
* bled in a navigation-breadcrumb prefix. Each entry is matched as a
|
|
17
|
+
* plain substring against the stringified column value.
|
|
18
|
+
*/
|
|
19
|
+
mustNotContain?: Record<string, string[]>;
|
|
20
|
+
/**
|
|
21
|
+
* Columns whose values must be truthy. Complements `notEmpty` (which
|
|
22
|
+
* only rejects empty-string/null/undefined) by also catching silent
|
|
23
|
+
* `|| 0` / `|| false` fallbacks in numeric/boolean fields. Fires when
|
|
24
|
+
* the value coerces to `false` in JS.
|
|
25
|
+
*/
|
|
26
|
+
mustBeTruthy?: string[];
|
|
27
|
+
};
|
|
28
|
+
export type FixtureArgs = Record<string, unknown> | unknown[];
|
|
29
|
+
export type Fixture = {
|
|
30
|
+
args?: FixtureArgs;
|
|
31
|
+
expect?: FixtureExpect;
|
|
32
|
+
};
|
|
33
|
+
export type ValidationFailure = {
|
|
34
|
+
rule: 'rowCount' | 'column' | 'type' | 'pattern' | 'notEmpty' | 'mustNotContain' | 'mustBeTruthy';
|
|
35
|
+
detail: string;
|
|
36
|
+
rowIndex?: number;
|
|
37
|
+
};
|
|
38
|
+
export type Row = Record<string, unknown>;
|
|
39
|
+
export declare function fixturePath(site: string, command: string): string;
|
|
40
|
+
export declare function loadFixture(site: string, command: string): Fixture | null;
|
|
41
|
+
export declare function writeFixture(site: string, command: string, fixture: Fixture): string;
|
|
42
|
+
/**
|
|
43
|
+
* Derive a reasonable fixture from sample output. Used by `--write-fixture`
|
|
44
|
+
* to seed a first draft the author can hand-tune.
|
|
45
|
+
*
|
|
46
|
+
* Heuristics:
|
|
47
|
+
* - rowCount.min = 1 if rows non-empty, else 0
|
|
48
|
+
* - columns = keys from the first row
|
|
49
|
+
* - types = typeof of the first row's values, with "number|string" for mixed
|
|
50
|
+
* - no auto patterns / notEmpty — author should add those deliberately
|
|
51
|
+
*/
|
|
52
|
+
export declare function deriveFixture(rows: Row[], args?: FixtureArgs): Fixture;
|
|
53
|
+
export declare function validateRows(rows: Row[], fixture: Fixture): ValidationFailure[];
|
|
54
|
+
/**
|
|
55
|
+
* Convert fixture args into argv tokens appended after the command name.
|
|
56
|
+
* - Array form is passed through verbatim (stringified), supporting positional subjects.
|
|
57
|
+
* - Object form is expanded to `--key value` pairs.
|
|
58
|
+
*/
|
|
59
|
+
export declare function expandFixtureArgs(args: FixtureArgs | undefined): string[];
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verify fixture: structural expectations for `opencli browser verify` output.
|
|
3
|
+
*
|
|
4
|
+
* The adapter-author skill runbook says every published adapter must write a
|
|
5
|
+
* fixture under `~/.opencli/sites/<site>/verify/<command>.json` so later verify
|
|
6
|
+
* runs can catch shape regressions (missing columns, wrong types, bleeding
|
|
7
|
+
* values) without relying on exact content match — BBS / news / market data is
|
|
8
|
+
* too volatile for value equality.
|
|
9
|
+
*
|
|
10
|
+
* Schema:
|
|
11
|
+
* {
|
|
12
|
+
* // args can be either:
|
|
13
|
+
* // - an object of named flags: { "limit": 3 } → expands to `--limit 3`
|
|
14
|
+
* // - a raw argv array: ["123", "--limit", "3"] → passed verbatim
|
|
15
|
+
* // Use the array form for adapters that take positional subjects (e.g. <tid>, <url>, <query>).
|
|
16
|
+
* "args": { "limit": 3 },
|
|
17
|
+
* "expect": {
|
|
18
|
+
* "rowCount": { "min": 1, "max": 10 }, // inclusive bounds
|
|
19
|
+
* "columns": ["a", "b"], // every row must have these keys
|
|
20
|
+
* "types": { "a": "string", "b": "number|string" },
|
|
21
|
+
* "patterns": { "url": "^https?://" },
|
|
22
|
+
* "notEmpty": ["title", "url"], // trimmed string must be non-empty
|
|
23
|
+
* "mustNotContain": { // catch content-contamination bleed
|
|
24
|
+
* "description": ["address:", "category:"]
|
|
25
|
+
* },
|
|
26
|
+
* "mustBeTruthy": ["count"] // catch silent `|| 0` fallbacks
|
|
27
|
+
* }
|
|
28
|
+
* }
|
|
29
|
+
*/
|
|
30
|
+
import * as fs from 'node:fs';
|
|
31
|
+
import * as os from 'node:os';
|
|
32
|
+
import * as path from 'node:path';
|
|
33
|
+
export function fixturePath(site, command) {
|
|
34
|
+
return path.join(os.homedir(), '.opencli', 'sites', site, 'verify', `${command}.json`);
|
|
35
|
+
}
|
|
36
|
+
export function loadFixture(site, command) {
|
|
37
|
+
const p = fixturePath(site, command);
|
|
38
|
+
if (!fs.existsSync(p))
|
|
39
|
+
return null;
|
|
40
|
+
try {
|
|
41
|
+
const raw = fs.readFileSync(p, 'utf-8');
|
|
42
|
+
const parsed = JSON.parse(raw);
|
|
43
|
+
return parsed;
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
throw new Error(`Failed to parse fixture ${p}: ${err instanceof Error ? err.message : String(err)}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
export function writeFixture(site, command, fixture) {
|
|
50
|
+
const p = fixturePath(site, command);
|
|
51
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
52
|
+
fs.writeFileSync(p, `${JSON.stringify(fixture, null, 2)}\n`, 'utf-8');
|
|
53
|
+
return p;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Derive a reasonable fixture from sample output. Used by `--write-fixture`
|
|
57
|
+
* to seed a first draft the author can hand-tune.
|
|
58
|
+
*
|
|
59
|
+
* Heuristics:
|
|
60
|
+
* - rowCount.min = 1 if rows non-empty, else 0
|
|
61
|
+
* - columns = keys from the first row
|
|
62
|
+
* - types = typeof of the first row's values, with "number|string" for mixed
|
|
63
|
+
* - no auto patterns / notEmpty — author should add those deliberately
|
|
64
|
+
*/
|
|
65
|
+
export function deriveFixture(rows, args) {
|
|
66
|
+
const expect = {};
|
|
67
|
+
if (rows.length === 0) {
|
|
68
|
+
expect.rowCount = { min: 0 };
|
|
69
|
+
return { ...(args ? { args } : {}), expect };
|
|
70
|
+
}
|
|
71
|
+
expect.rowCount = { min: 1 };
|
|
72
|
+
const first = rows[0];
|
|
73
|
+
const columns = Object.keys(first);
|
|
74
|
+
expect.columns = columns;
|
|
75
|
+
const types = {};
|
|
76
|
+
for (const col of columns) {
|
|
77
|
+
const observed = new Set();
|
|
78
|
+
for (const row of rows) {
|
|
79
|
+
const v = row[col];
|
|
80
|
+
observed.add(jsType(v));
|
|
81
|
+
}
|
|
82
|
+
types[col] = [...observed].sort().join('|');
|
|
83
|
+
}
|
|
84
|
+
expect.types = types;
|
|
85
|
+
return { ...(args ? { args } : {}), expect };
|
|
86
|
+
}
|
|
87
|
+
export function validateRows(rows, fixture) {
|
|
88
|
+
const failures = [];
|
|
89
|
+
const expect = fixture.expect;
|
|
90
|
+
if (!expect)
|
|
91
|
+
return failures;
|
|
92
|
+
if (expect.rowCount) {
|
|
93
|
+
const { min, max } = expect.rowCount;
|
|
94
|
+
if (typeof min === 'number' && rows.length < min) {
|
|
95
|
+
failures.push({ rule: 'rowCount', detail: `got ${rows.length} rows, expected at least ${min}` });
|
|
96
|
+
}
|
|
97
|
+
if (typeof max === 'number' && rows.length > max) {
|
|
98
|
+
failures.push({ rule: 'rowCount', detail: `got ${rows.length} rows, expected at most ${max}` });
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const columns = expect.columns ?? [];
|
|
102
|
+
const types = expect.types ?? {};
|
|
103
|
+
const patterns = expect.patterns ?? {};
|
|
104
|
+
const notEmpty = expect.notEmpty ?? [];
|
|
105
|
+
const compiledPatterns = {};
|
|
106
|
+
for (const [col, src] of Object.entries(patterns)) {
|
|
107
|
+
try {
|
|
108
|
+
compiledPatterns[col] = new RegExp(src);
|
|
109
|
+
}
|
|
110
|
+
catch (err) {
|
|
111
|
+
failures.push({ rule: 'pattern', detail: `pattern for "${col}" invalid: ${err instanceof Error ? err.message : String(err)}` });
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
rows.forEach((row, i) => {
|
|
115
|
+
for (const col of columns) {
|
|
116
|
+
if (!(col in row)) {
|
|
117
|
+
failures.push({ rule: 'column', detail: `missing column "${col}"`, rowIndex: i });
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
for (const [col, declared] of Object.entries(types)) {
|
|
121
|
+
if (!(col in row))
|
|
122
|
+
continue;
|
|
123
|
+
const actual = jsType(row[col]);
|
|
124
|
+
if (!typeMatches(actual, declared)) {
|
|
125
|
+
failures.push({
|
|
126
|
+
rule: 'type',
|
|
127
|
+
detail: `"${col}" is ${actual}, expected ${declared}`,
|
|
128
|
+
rowIndex: i,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
for (const [col, re] of Object.entries(compiledPatterns)) {
|
|
133
|
+
if (!(col in row))
|
|
134
|
+
continue;
|
|
135
|
+
const v = row[col];
|
|
136
|
+
if (v === null || v === undefined)
|
|
137
|
+
continue;
|
|
138
|
+
if (!re.test(String(v))) {
|
|
139
|
+
failures.push({
|
|
140
|
+
rule: 'pattern',
|
|
141
|
+
detail: `"${col}"=${JSON.stringify(String(v).slice(0, 60))} does not match /${re.source}/`,
|
|
142
|
+
rowIndex: i,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
for (const col of notEmpty) {
|
|
147
|
+
const v = row[col];
|
|
148
|
+
if (v === null || v === undefined || String(v).trim() === '') {
|
|
149
|
+
failures.push({ rule: 'notEmpty', detail: `"${col}" is empty`, rowIndex: i });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
for (const [col, needles] of Object.entries(expect.mustNotContain ?? {})) {
|
|
153
|
+
if (!(col in row))
|
|
154
|
+
continue;
|
|
155
|
+
const v = row[col];
|
|
156
|
+
if (v === null || v === undefined)
|
|
157
|
+
continue;
|
|
158
|
+
const haystack = String(v);
|
|
159
|
+
for (const needle of needles) {
|
|
160
|
+
if (haystack.includes(needle)) {
|
|
161
|
+
failures.push({
|
|
162
|
+
rule: 'mustNotContain',
|
|
163
|
+
detail: `"${col}" contains forbidden substring ${JSON.stringify(needle)}`,
|
|
164
|
+
rowIndex: i,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
for (const col of expect.mustBeTruthy ?? []) {
|
|
170
|
+
if (!(col in row))
|
|
171
|
+
continue;
|
|
172
|
+
if (!row[col]) {
|
|
173
|
+
failures.push({
|
|
174
|
+
rule: 'mustBeTruthy',
|
|
175
|
+
detail: `"${col}" is falsy (${JSON.stringify(row[col])}) — likely silent fallback`,
|
|
176
|
+
rowIndex: i,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
});
|
|
181
|
+
return failures;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Convert fixture args into argv tokens appended after the command name.
|
|
185
|
+
* - Array form is passed through verbatim (stringified), supporting positional subjects.
|
|
186
|
+
* - Object form is expanded to `--key value` pairs.
|
|
187
|
+
*/
|
|
188
|
+
export function expandFixtureArgs(args) {
|
|
189
|
+
if (!args)
|
|
190
|
+
return [];
|
|
191
|
+
if (Array.isArray(args))
|
|
192
|
+
return args.map((v) => String(v));
|
|
193
|
+
const out = [];
|
|
194
|
+
for (const [k, v] of Object.entries(args)) {
|
|
195
|
+
out.push(`--${k}`, String(v));
|
|
196
|
+
}
|
|
197
|
+
return out;
|
|
198
|
+
}
|
|
199
|
+
function jsType(v) {
|
|
200
|
+
if (v === null)
|
|
201
|
+
return 'null';
|
|
202
|
+
if (Array.isArray(v))
|
|
203
|
+
return 'array';
|
|
204
|
+
return typeof v;
|
|
205
|
+
}
|
|
206
|
+
function typeMatches(actual, declared) {
|
|
207
|
+
const allowed = declared.split('|').map((s) => s.trim()).filter(Boolean);
|
|
208
|
+
if (allowed.length === 0)
|
|
209
|
+
return true;
|
|
210
|
+
if (allowed.includes('any'))
|
|
211
|
+
return true;
|
|
212
|
+
return allowed.includes(actual);
|
|
213
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|