@jackwener/opencli 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/CLI-CREATOR.md +594 -0
  2. package/README.md +124 -39
  3. package/README.zh-CN.md +151 -0
  4. package/SKILL.md +178 -102
  5. package/dist/bilibili.d.ts +6 -5
  6. package/dist/browser.d.ts +3 -1
  7. package/dist/browser.js +44 -2
  8. package/dist/cascade.d.ts +46 -0
  9. package/dist/cascade.js +180 -0
  10. package/dist/clis/bbc/news.js +42 -0
  11. package/dist/clis/bilibili/hot.yaml +38 -0
  12. package/dist/clis/boss/search.js +47 -0
  13. package/dist/clis/ctrip/search.d.ts +1 -0
  14. package/dist/clis/ctrip/search.js +62 -0
  15. package/dist/clis/hackernews/top.yaml +36 -0
  16. package/dist/clis/index.d.ts +10 -1
  17. package/dist/clis/index.js +19 -1
  18. package/dist/clis/reddit/hot.yaml +46 -0
  19. package/dist/clis/reuters/search.d.ts +1 -0
  20. package/dist/clis/reuters/search.js +52 -0
  21. package/dist/clis/smzdm/search.d.ts +1 -0
  22. package/dist/clis/smzdm/search.js +66 -0
  23. package/dist/clis/twitter/trending.yaml +40 -0
  24. package/dist/clis/v2ex/hot.yaml +25 -0
  25. package/dist/clis/v2ex/latest.yaml +25 -0
  26. package/dist/clis/v2ex/topic.yaml +27 -0
  27. package/dist/clis/weibo/hot.d.ts +1 -0
  28. package/dist/clis/weibo/hot.js +41 -0
  29. package/dist/clis/xiaohongshu/feed.yaml +32 -0
  30. package/dist/clis/xiaohongshu/notifications.yaml +38 -0
  31. package/dist/clis/xiaohongshu/search.d.ts +5 -0
  32. package/dist/clis/xiaohongshu/search.js +68 -0
  33. package/dist/clis/yahoo-finance/quote.d.ts +1 -0
  34. package/dist/clis/yahoo-finance/quote.js +74 -0
  35. package/dist/clis/youtube/search.d.ts +1 -0
  36. package/dist/clis/youtube/search.js +60 -0
  37. package/dist/clis/zhihu/hot.yaml +42 -0
  38. package/dist/clis/zhihu/question.d.ts +1 -0
  39. package/dist/clis/zhihu/question.js +39 -0
  40. package/dist/clis/zhihu/search.yaml +55 -0
  41. package/dist/engine.d.ts +2 -1
  42. package/dist/explore.d.ts +23 -13
  43. package/dist/explore.js +293 -422
  44. package/dist/generate.js +2 -1
  45. package/dist/main.js +21 -2
  46. package/dist/pipeline/executor.d.ts +9 -0
  47. package/dist/pipeline/executor.js +88 -0
  48. package/dist/pipeline/index.d.ts +5 -0
  49. package/dist/pipeline/index.js +5 -0
  50. package/dist/pipeline/steps/browser.d.ts +12 -0
  51. package/dist/pipeline/steps/browser.js +68 -0
  52. package/dist/pipeline/steps/fetch.d.ts +5 -0
  53. package/dist/pipeline/steps/fetch.js +50 -0
  54. package/dist/pipeline/steps/intercept.d.ts +5 -0
  55. package/dist/pipeline/steps/intercept.js +75 -0
  56. package/dist/pipeline/steps/tap.d.ts +12 -0
  57. package/dist/pipeline/steps/tap.js +130 -0
  58. package/dist/pipeline/steps/transform.d.ts +8 -0
  59. package/dist/pipeline/steps/transform.js +53 -0
  60. package/dist/pipeline/template.d.ts +16 -0
  61. package/dist/pipeline/template.js +115 -0
  62. package/dist/pipeline/template.test.d.ts +4 -0
  63. package/dist/pipeline/template.test.js +102 -0
  64. package/dist/pipeline/transform.test.d.ts +4 -0
  65. package/dist/pipeline/transform.test.js +90 -0
  66. package/dist/pipeline.d.ts +5 -7
  67. package/dist/pipeline.js +5 -313
  68. package/dist/registry.d.ts +3 -2
  69. package/dist/runtime.d.ts +2 -1
  70. package/dist/synthesize.d.ts +11 -8
  71. package/dist/synthesize.js +142 -118
  72. package/dist/types.d.ts +27 -0
  73. package/dist/types.js +7 -0
  74. package/package.json +9 -4
  75. package/src/bilibili.ts +9 -7
  76. package/src/browser.ts +41 -3
  77. package/src/cascade.ts +218 -0
  78. package/src/clis/bbc/news.ts +42 -0
  79. package/src/clis/boss/search.ts +47 -0
  80. package/src/clis/ctrip/search.ts +62 -0
  81. package/src/clis/index.ts +28 -1
  82. package/src/clis/reddit/hot.yaml +46 -0
  83. package/src/clis/reuters/search.ts +52 -0
  84. package/src/clis/smzdm/search.ts +66 -0
  85. package/src/clis/v2ex/hot.yaml +5 -9
  86. package/src/clis/v2ex/latest.yaml +5 -8
  87. package/src/clis/v2ex/topic.yaml +27 -0
  88. package/src/clis/weibo/hot.ts +41 -0
  89. package/src/clis/xiaohongshu/feed.yaml +32 -0
  90. package/src/clis/xiaohongshu/notifications.yaml +38 -0
  91. package/src/clis/xiaohongshu/search.ts +71 -0
  92. package/src/clis/yahoo-finance/quote.ts +74 -0
  93. package/src/clis/youtube/search.ts +60 -0
  94. package/src/clis/zhihu/hot.yaml +22 -8
  95. package/src/clis/zhihu/question.ts +45 -0
  96. package/src/clis/zhihu/search.yaml +55 -0
  97. package/src/engine.ts +2 -1
  98. package/src/explore.ts +303 -465
  99. package/src/generate.ts +3 -1
  100. package/src/main.ts +18 -2
  101. package/src/pipeline/executor.ts +98 -0
  102. package/src/pipeline/index.ts +6 -0
  103. package/src/pipeline/steps/browser.ts +67 -0
  104. package/src/pipeline/steps/fetch.ts +60 -0
  105. package/src/pipeline/steps/intercept.ts +78 -0
  106. package/src/pipeline/steps/tap.ts +137 -0
  107. package/src/pipeline/steps/transform.ts +50 -0
  108. package/src/pipeline/template.test.ts +107 -0
  109. package/src/pipeline/template.ts +101 -0
  110. package/src/pipeline/transform.test.ts +107 -0
  111. package/src/pipeline.ts +5 -292
  112. package/src/registry.ts +4 -2
  113. package/src/runtime.ts +3 -1
  114. package/src/synthesize.ts +142 -137
  115. package/src/types.ts +23 -0
  116. package/vitest.config.ts +7 -0
  117. package/dist/clis/github/search.js +0 -20
  118. package/dist/clis/zhihu/search.js +0 -58
  119. package/dist/promote.d.ts +0 -1
  120. package/dist/promote.js +0 -3
  121. package/dist/register.d.ts +0 -2
  122. package/dist/register.js +0 -2
  123. package/dist/scaffold.d.ts +0 -2
  124. package/dist/scaffold.js +0 -2
  125. package/dist/smoke.d.ts +0 -2
  126. package/dist/smoke.js +0 -2
  127. package/src/clis/github/search.ts +0 -21
  128. package/src/clis/github/trending.yaml +0 -58
  129. package/src/clis/zhihu/search.ts +0 -65
  130. package/src/promote.ts +0 -3
  131. package/src/register.ts +0 -2
  132. package/src/scaffold.ts +0 -2
  133. package/src/smoke.ts +0 -2
  134. /package/dist/clis/{github/search.d.ts → bbc/news.d.ts} +0 -0
  135. /package/dist/clis/{zhihu → boss}/search.d.ts +0 -0
package/src/cascade.ts ADDED
@@ -0,0 +1,218 @@
1
+ /**
2
+ * Strategy Cascade: automatic strategy downgrade chain.
3
+ *
4
+ * Probes an API endpoint starting from the simplest strategy (PUBLIC)
5
+ * and automatically downgrades through the strategy tiers until one works:
6
+ *
7
+ * PUBLIC → COOKIE → HEADER → INTERCEPT → UI
8
+ *
9
+ * This eliminates the need for manual strategy selection — the system
10
+ * automatically finds the minimum-privilege strategy that works.
11
+ */
12
+
13
+ import { Strategy } from './registry.js';
14
+ import type { IPage } from './types.js';
15
+
16
+ /** Strategy cascade order (simplest → most complex) */
17
+ const CASCADE_ORDER: Strategy[] = [
18
+ Strategy.PUBLIC,
19
+ Strategy.COOKIE,
20
+ Strategy.HEADER,
21
+ Strategy.INTERCEPT,
22
+ Strategy.UI,
23
+ ];
24
+
25
+ interface ProbeResult {
26
+ strategy: Strategy;
27
+ success: boolean;
28
+ statusCode?: number;
29
+ hasData?: boolean;
30
+ error?: string;
31
+ responsePreview?: string;
32
+ }
33
+
34
+ interface CascadeResult {
35
+ bestStrategy: Strategy;
36
+ probes: ProbeResult[];
37
+ confidence: number;
38
+ }
39
+
40
+ /**
41
+ * Probe an endpoint with a specific strategy.
42
+ * Returns whether the probe succeeded and basic response info.
43
+ */
44
+ export async function probeEndpoint(
45
+ page: IPage,
46
+ url: string,
47
+ strategy: Strategy,
48
+ opts: { timeout?: number } = {},
49
+ ): Promise<ProbeResult> {
50
+ const result: ProbeResult = { strategy, success: false };
51
+
52
+ try {
53
+ switch (strategy) {
54
+ case Strategy.PUBLIC: {
55
+ // Try direct fetch without browser (no credentials)
56
+ const js = `
57
+ async () => {
58
+ try {
59
+ const resp = await fetch(${JSON.stringify(url)});
60
+ const status = resp.status;
61
+ if (!resp.ok) return { status, ok: false };
62
+ const text = await resp.text();
63
+ let hasData = false;
64
+ try {
65
+ const json = JSON.parse(text);
66
+ hasData = !!json && (Array.isArray(json) ? json.length > 0 :
67
+ typeof json === 'object' && Object.keys(json).length > 0);
68
+ } catch {}
69
+ return { status, ok: true, hasData, preview: text.slice(0, 200) };
70
+ } catch (e) { return { ok: false, error: e.message }; }
71
+ }
72
+ `;
73
+ const resp = await page.evaluate(js);
74
+ result.statusCode = resp?.status;
75
+ result.success = resp?.ok && resp?.hasData;
76
+ result.hasData = resp?.hasData;
77
+ result.responsePreview = resp?.preview;
78
+ break;
79
+ }
80
+
81
+ case Strategy.COOKIE: {
82
+ // Fetch with credentials: 'include' (uses browser cookies)
83
+ const js = `
84
+ async () => {
85
+ try {
86
+ const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
87
+ const status = resp.status;
88
+ if (!resp.ok) return { status, ok: false };
89
+ const text = await resp.text();
90
+ let hasData = false;
91
+ try {
92
+ const json = JSON.parse(text);
93
+ hasData = !!json && (Array.isArray(json) ? json.length > 0 :
94
+ typeof json === 'object' && Object.keys(json).length > 0);
95
+ // Check for API-level error codes (common in Chinese sites)
96
+ if (json.code !== undefined && json.code !== 0) hasData = false;
97
+ } catch {}
98
+ return { status, ok: true, hasData, preview: text.slice(0, 200) };
99
+ } catch (e) { return { ok: false, error: e.message }; }
100
+ }
101
+ `;
102
+ const resp = await page.evaluate(js);
103
+ result.statusCode = resp?.status;
104
+ result.success = resp?.ok && resp?.hasData;
105
+ result.hasData = resp?.hasData;
106
+ result.responsePreview = resp?.preview;
107
+ break;
108
+ }
109
+
110
+ case Strategy.HEADER: {
111
+ // Fetch with credentials + try to extract common auth headers
112
+ const js = `
113
+ async () => {
114
+ try {
115
+ // Try to extract CSRF tokens from cookies
116
+ const cookies = document.cookie.split(';').map(c => c.trim());
117
+ const csrf = cookies.find(c => c.startsWith('ct0=') || c.startsWith('csrf_token=') || c.startsWith('_csrf='))?.split('=').slice(1).join('=');
118
+
119
+ const headers = {};
120
+ if (csrf) {
121
+ headers['X-Csrf-Token'] = csrf;
122
+ headers['X-XSRF-Token'] = csrf;
123
+ }
124
+
125
+ const resp = await fetch(${JSON.stringify(url)}, {
126
+ credentials: 'include',
127
+ headers
128
+ });
129
+ const status = resp.status;
130
+ if (!resp.ok) return { status, ok: false };
131
+ const text = await resp.text();
132
+ let hasData = false;
133
+ try {
134
+ const json = JSON.parse(text);
135
+ hasData = !!json && (Array.isArray(json) ? json.length > 0 :
136
+ typeof json === 'object' && Object.keys(json).length > 0);
137
+ if (json.code !== undefined && json.code !== 0) hasData = false;
138
+ } catch {}
139
+ return { status, ok: true, hasData, preview: text.slice(0, 200) };
140
+ } catch (e) { return { ok: false, error: e.message }; }
141
+ }
142
+ `;
143
+ const resp = await page.evaluate(js);
144
+ result.statusCode = resp?.status;
145
+ result.success = resp?.ok && resp?.hasData;
146
+ result.hasData = resp?.hasData;
147
+ result.responsePreview = resp?.preview;
148
+ break;
149
+ }
150
+
151
+ case Strategy.INTERCEPT:
152
+ case Strategy.UI:
153
+ // These require specific implementation per-site
154
+ // Mark as needing manual implementation
155
+ result.success = false;
156
+ result.error = `Strategy ${strategy} requires site-specific implementation`;
157
+ break;
158
+ }
159
+ } catch (err: any) {
160
+ result.success = false;
161
+ result.error = err.message ?? String(err);
162
+ }
163
+
164
+ return result;
165
+ }
166
+
167
+ /**
168
+ * Run the cascade: try each strategy in order until one works.
169
+ * Returns the simplest working strategy.
170
+ */
171
+ export async function cascadeProbe(
172
+ page: IPage,
173
+ url: string,
174
+ opts: { maxStrategy?: Strategy; timeout?: number } = {},
175
+ ): Promise<CascadeResult> {
176
+ const maxIdx = opts.maxStrategy
177
+ ? CASCADE_ORDER.indexOf(opts.maxStrategy)
178
+ : CASCADE_ORDER.indexOf(Strategy.HEADER); // Don't auto-try INTERCEPT/UI
179
+
180
+ const probes: ProbeResult[] = [];
181
+
182
+ for (let i = 0; i <= Math.min(maxIdx, CASCADE_ORDER.length - 1); i++) {
183
+ const strategy = CASCADE_ORDER[i];
184
+ const probe = await probeEndpoint(page, url, strategy, opts);
185
+ probes.push(probe);
186
+
187
+ if (probe.success) {
188
+ return {
189
+ bestStrategy: strategy,
190
+ probes,
191
+ confidence: 1.0 - (i * 0.1), // Higher confidence for simpler strategies
192
+ };
193
+ }
194
+ }
195
+
196
+ // None worked — default to COOKIE (most common for logged-in sites)
197
+ return {
198
+ bestStrategy: Strategy.COOKIE,
199
+ probes,
200
+ confidence: 0.3,
201
+ };
202
+ }
203
+
204
+ /**
205
+ * Render cascade results for display.
206
+ */
207
+ export function renderCascadeResult(result: CascadeResult): string {
208
+ const lines = [
209
+ `Strategy Cascade: ${result.bestStrategy} (${(result.confidence * 100).toFixed(0)}% confidence)`,
210
+ ];
211
+ for (const probe of result.probes) {
212
+ const icon = probe.success ? '✅' : '❌';
213
+ const status = probe.statusCode ? ` [${probe.statusCode}]` : '';
214
+ const err = probe.error ? ` — ${probe.error}` : '';
215
+ lines.push(` ${icon} ${probe.strategy}${status}${err}`);
216
+ }
217
+ return lines.join('\n');
218
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * BBC News headlines — public RSS feed, no browser needed.
3
+ * Source: bb-sites/bbc/news.js
4
+ */
5
+ import { cli, Strategy } from '../../registry.js';
6
+
7
+ cli({
8
+ site: 'bbc',
9
+ name: 'news',
10
+ description: 'BBC News headlines (RSS)',
11
+ domain: 'www.bbc.com',
12
+ strategy: Strategy.PUBLIC,
13
+ args: [
14
+ { name: 'limit', type: 'int', default: 20, help: 'Number of headlines (max 50)' },
15
+ ],
16
+ columns: ['rank', 'title', 'description', 'url'],
17
+ func: async (page, kwargs) => {
18
+ const count = Math.min(kwargs.limit || 20, 50);
19
+ const resp = await fetch('https://feeds.bbci.co.uk/news/rss.xml');
20
+ if (!resp.ok) return [];
21
+ const xml = await resp.text();
22
+ // Simple XML parsing without DOMParser (works in Node)
23
+ const items: any[] = [];
24
+ const itemRegex = /<item>([\s\S]*?)<\/item>/g;
25
+ let match;
26
+ while ((match = itemRegex.exec(xml)) && items.length < count) {
27
+ const block = match[1];
28
+ const title = block.match(/<title><!\[CDATA\[(.*?)\]\]>|<title>(.*?)<\/title>/)?.[1] || block.match(/<title>(.*?)<\/title>/)?.[1] || '';
29
+ const desc = block.match(/<description><!\[CDATA\[(.*?)\]\]>|<description>(.*?)<\/description>/)?.[1] || block.match(/<description>(.*?)<\/description>/)?.[1] || '';
30
+ const link = block.match(/<link>(.*?)<\/link>/)?.[1] || block.match(/<guid[^>]*>(.*?)<\/guid>/)?.[1] || '';
31
+ if (title) {
32
+ items.push({
33
+ rank: items.length + 1,
34
+ title: title.trim(),
35
+ description: desc.trim().substring(0, 200),
36
+ url: link.trim(),
37
+ });
38
+ }
39
+ }
40
+ return items;
41
+ },
42
+ });
@@ -0,0 +1,47 @@
1
+ /**
2
+ * BOSS直聘 job search — browser cookie API.
3
+ * Source: bb-sites/boss/search.js
4
+ */
5
+ import { cli, Strategy } from '../../registry.js';
6
+
7
+ cli({
8
+ site: 'boss',
9
+ name: 'search',
10
+ description: 'BOSS直聘搜索职位',
11
+ domain: 'www.zhipin.com',
12
+ strategy: Strategy.COOKIE,
13
+ args: [
14
+ { name: 'query', required: true, help: 'Search keyword (e.g. AI agent, 前端)' },
15
+ { name: 'city', default: '101010100', help: 'City code (101010100=北京, 101020100=上海, 101210100=杭州, 101280100=广州)' },
16
+ { name: 'limit', type: 'int', default: 15, help: 'Number of results' },
17
+ ],
18
+ columns: ['name', 'salary', 'company', 'city', 'experience', 'degree', 'boss', 'url'],
19
+ func: async (page, kwargs) => {
20
+ await page.goto('https://www.zhipin.com');
21
+ await page.wait(2);
22
+ const data = await page.evaluate(`
23
+ (async () => {
24
+ const params = new URLSearchParams({
25
+ scene: '1', query: '${kwargs.query.replace(/'/g, "\\'")}',
26
+ city: '${kwargs.city || '101010100'}', page: '1', pageSize: '15',
27
+ experience: '', degree: '', payType: '', partTime: '',
28
+ industry: '', scale: '', stage: '', position: '',
29
+ jobType: '', salary: '', multiBusinessDistrict: '', multiSubway: ''
30
+ });
31
+ const resp = await fetch('/wapi/zpgeek/search/joblist.json?' + params.toString(), {credentials: 'include'});
32
+ if (!resp.ok) return {error: 'HTTP ' + resp.status};
33
+ const d = await resp.json();
34
+ if (d.code !== 0) return {error: d.message || 'API error'};
35
+ const zpData = d.zpData || {};
36
+ return (zpData.jobList || []).map(j => ({
37
+ name: j.jobName, salary: j.salaryDesc, company: j.brandName,
38
+ city: j.cityName, experience: j.jobExperience, degree: j.jobDegree,
39
+ boss: j.bossName + ' · ' + j.bossTitle,
40
+ url: j.encryptJobId ? 'https://www.zhipin.com/job_detail/' + j.encryptJobId + '.html' : ''
41
+ }));
42
+ })()
43
+ `);
44
+ if (!Array.isArray(data)) return [];
45
+ return data.slice(0, kwargs.limit || 15);
46
+ },
47
+ });
@@ -0,0 +1,62 @@
1
+ /**
2
+ * 携程旅行搜索 — browser cookie, multi-strategy.
3
+ * Source: bb-sites/ctrip/search.js (simplified to suggestion API)
4
+ */
5
+ import { cli, Strategy } from '../../registry.js';
6
+
7
+ cli({
8
+ site: 'ctrip',
9
+ name: 'search',
10
+ description: '携程旅行搜索',
11
+ domain: 'www.ctrip.com',
12
+ strategy: Strategy.COOKIE,
13
+ args: [
14
+ { name: 'query', required: true, help: 'Search keyword (city or attraction)' },
15
+ { name: 'limit', type: 'int', default: 15, help: 'Number of results' },
16
+ ],
17
+ columns: ['rank', 'name', 'type', 'score', 'price', 'url'],
18
+ func: async (page, kwargs) => {
19
+ const limit = kwargs.limit || 15;
20
+ await page.goto('https://www.ctrip.com');
21
+ await page.wait(2);
22
+ const data = await page.evaluate(`
23
+ (async () => {
24
+ const query = '${kwargs.query.replace(/'/g, "\\'")}';
25
+ const limit = ${limit};
26
+
27
+ // Strategy 1: Suggestion API
28
+ try {
29
+ const suggestUrl = 'https://m.ctrip.com/restapi/h5api/searchapp/search?action=onekeyali&keyword=' + encodeURIComponent(query);
30
+ const resp = await fetch(suggestUrl, {credentials: 'include'});
31
+ if (resp.ok) {
32
+ const d = await resp.json();
33
+ const raw = d.data || d.result || d;
34
+ if (raw && typeof raw === 'object') {
35
+ // Flatten all result categories
36
+ const items = [];
37
+ for (const key of Object.keys(raw)) {
38
+ const list = Array.isArray(raw[key]) ? raw[key] : [];
39
+ for (const item of list) {
40
+ if (items.length >= limit) break;
41
+ items.push({
42
+ rank: items.length + 1,
43
+ name: item.word || item.name || item.title || '',
44
+ type: item.type || item.tpName || key,
45
+ score: item.score || '',
46
+ price: item.price || item.minPrice || '',
47
+ url: item.url || item.surl || '',
48
+ });
49
+ }
50
+ }
51
+ if (items.length > 0) return items;
52
+ }
53
+ }
54
+ } catch(e) {}
55
+
56
+ return {error: 'No results for: ' + query};
57
+ })()
58
+ `);
59
+ if (!Array.isArray(data)) return [];
60
+ return data;
61
+ },
62
+ });
package/src/clis/index.ts CHANGED
@@ -16,4 +16,31 @@ import './bilibili/user-videos.js';
16
16
  import './github/search.js';
17
17
 
18
18
  // zhihu
19
- import './zhihu/search.js';
19
+ import './zhihu/question.js';
20
+
21
+ // xiaohongshu
22
+ import './xiaohongshu/search.js';
23
+
24
+ // bbc
25
+ import './bbc/news.js';
26
+
27
+ // weibo
28
+ import './weibo/hot.js';
29
+
30
+ // boss
31
+ import './boss/search.js';
32
+
33
+ // yahoo-finance
34
+ import './yahoo-finance/quote.js';
35
+
36
+ // reuters
37
+ import './reuters/search.js';
38
+
39
+ // smzdm
40
+ import './smzdm/search.js';
41
+
42
+ // ctrip
43
+ import './ctrip/search.js';
44
+
45
+ // youtube
46
+ import './youtube/search.js';
@@ -0,0 +1,46 @@
1
+ site: reddit
2
+ name: hot
3
+ description: Reddit 热门帖子
4
+ domain: www.reddit.com
5
+
6
+ args:
7
+ subreddit:
8
+ type: str
9
+ default: ""
10
+ description: "Subreddit name (e.g. programming). Empty for frontpage"
11
+ limit:
12
+ type: int
13
+ default: 20
14
+ description: Number of posts
15
+
16
+ pipeline:
17
+ - navigate: https://www.reddit.com
18
+
19
+ - evaluate: |
20
+ (async () => {
21
+ const sub = '${{ args.subreddit }}';
22
+ const path = sub ? '/r/' + sub + '/hot.json' : '/hot.json';
23
+ const res = await fetch(path + '?limit=${{ args.limit }}&raw_json=1', {
24
+ credentials: 'include'
25
+ });
26
+ const d = await res.json();
27
+ return (d?.data?.children || []).map(c => ({
28
+ title: c.data.title,
29
+ subreddit: c.data.subreddit_name_prefixed,
30
+ score: c.data.score,
31
+ comments: c.data.num_comments,
32
+ author: c.data.author,
33
+ url: 'https://www.reddit.com' + c.data.permalink,
34
+ }));
35
+ })()
36
+
37
+ - map:
38
+ rank: ${{ index + 1 }}
39
+ title: ${{ item.title }}
40
+ subreddit: ${{ item.subreddit }}
41
+ score: ${{ item.score }}
42
+ comments: ${{ item.comments }}
43
+
44
+ - limit: ${{ args.limit }}
45
+
46
+ columns: [rank, title, subreddit, score, comments]
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Reuters news search — API with HTML fallback.
3
+ * Source: bb-sites/reuters/search.js
4
+ */
5
+ import { cli, Strategy } from '../../registry.js';
6
+
7
+ cli({
8
+ site: 'reuters',
9
+ name: 'search',
10
+ description: 'Reuters 路透社新闻搜索',
11
+ domain: 'www.reuters.com',
12
+ strategy: Strategy.COOKIE,
13
+ args: [
14
+ { name: 'query', required: true, help: 'Search query' },
15
+ { name: 'limit', type: 'int', default: 10, help: 'Number of results (max 40)' },
16
+ ],
17
+ columns: ['rank', 'title', 'date', 'section', 'url'],
18
+ func: async (page, kwargs) => {
19
+ const count = Math.min(kwargs.limit || 10, 40);
20
+ await page.goto('https://www.reuters.com');
21
+ await page.wait(2);
22
+ const data = await page.evaluate(`
23
+ (async () => {
24
+ const count = ${count};
25
+ const apiQuery = JSON.stringify({
26
+ keyword: '${kwargs.query.replace(/'/g, "\\'")}',
27
+ offset: 0, orderby: 'display_date:desc', size: count, website: 'reuters'
28
+ });
29
+ const apiUrl = 'https://www.reuters.com/pf/api/v3/content/fetch/articles-by-search-v2?query=' + encodeURIComponent(apiQuery);
30
+ try {
31
+ const resp = await fetch(apiUrl, {credentials: 'include'});
32
+ if (resp.ok) {
33
+ const data = await resp.json();
34
+ const articles = data.result?.articles || data.articles || [];
35
+ if (articles.length > 0) {
36
+ return articles.slice(0, count).map((a, i) => ({
37
+ rank: i + 1,
38
+ title: a.title || a.headlines?.basic || '',
39
+ date: (a.display_date || a.published_time || '').split('T')[0],
40
+ section: a.taxonomy?.section?.name || '',
41
+ url: a.canonical_url ? 'https://www.reuters.com' + a.canonical_url : '',
42
+ }));
43
+ }
44
+ }
45
+ } catch(e) {}
46
+ return {error: 'Reuters API unavailable'};
47
+ })()
48
+ `);
49
+ if (!Array.isArray(data)) return [];
50
+ return data;
51
+ },
52
+ });
@@ -0,0 +1,66 @@
1
+ /**
2
+ * 什么值得买搜索好价 — browser cookie, HTML parse.
3
+ * Source: bb-sites/smzdm/search.js
4
+ */
5
+ import { cli, Strategy } from '../../registry.js';
6
+
7
+ cli({
8
+ site: 'smzdm',
9
+ name: 'search',
10
+ description: '什么值得买搜索好价',
11
+ domain: 'www.smzdm.com',
12
+ strategy: Strategy.COOKIE,
13
+ args: [
14
+ { name: 'keyword', required: true, help: 'Search keyword' },
15
+ { name: 'limit', type: 'int', default: 20, help: 'Number of results' },
16
+ ],
17
+ columns: ['rank', 'title', 'price', 'mall', 'comments', 'url'],
18
+ func: async (page, kwargs) => {
19
+ const q = encodeURIComponent(kwargs.keyword);
20
+ const limit = kwargs.limit || 20;
21
+ await page.goto('https://www.smzdm.com');
22
+ await page.wait(2);
23
+ const data = await page.evaluate(`
24
+ (async () => {
25
+ const q = '${q}';
26
+ const limit = ${limit};
27
+ // Try youhui channel first, then home
28
+ for (const channel of ['youhui', 'home']) {
29
+ try {
30
+ const resp = await fetch('https://search.smzdm.com/ajax/?c=' + channel + '&s=' + q + '&p=1&v=b', {
31
+ credentials: 'include',
32
+ headers: {'X-Requested-With': 'XMLHttpRequest'}
33
+ });
34
+ if (!resp.ok) continue;
35
+ const html = await resp.text();
36
+ if (html.indexOf('feed-row-wide') === -1) continue;
37
+ const parser = new DOMParser();
38
+ const doc = parser.parseFromString(html, 'text/html');
39
+ const items = doc.querySelectorAll('li.feed-row-wide');
40
+ const results = [];
41
+ items.forEach((li, i) => {
42
+ if (results.length >= limit) return;
43
+ const titleEl = li.querySelector('h5.feed-block-title > a')
44
+ || li.querySelector('h5 > a');
45
+ if (!titleEl) return;
46
+ const title = (titleEl.getAttribute('title') || titleEl.textContent || '').trim();
47
+ const url = titleEl.getAttribute('href') || '';
48
+ const priceEl = li.querySelector('.z-highlight');
49
+ const price = priceEl ? priceEl.textContent.trim() : '';
50
+ let mall = '';
51
+ const extrasSpan = li.querySelector('.z-feed-foot-r .feed-block-extras span');
52
+ if (extrasSpan) mall = extrasSpan.textContent.trim();
53
+ const commentEl = li.querySelector('.feed-btn-comment');
54
+ const comments = commentEl ? parseInt(commentEl.textContent.trim()) || 0 : 0;
55
+ results.push({rank: results.length + 1, title, price, mall, comments, url});
56
+ });
57
+ if (results.length > 0) return results;
58
+ } catch(e) { continue; }
59
+ }
60
+ return {error: 'No results'};
61
+ })()
62
+ `);
63
+ if (!Array.isArray(data)) return [];
64
+ return data;
65
+ },
66
+ });
@@ -2,6 +2,8 @@ site: v2ex
2
2
  name: hot
3
3
  description: V2EX 热门话题
4
4
  domain: www.v2ex.com
5
+ strategy: public
6
+ browser: false
5
7
 
6
8
  args:
7
9
  limit:
@@ -10,20 +12,14 @@ args:
10
12
  description: Number of topics
11
13
 
12
14
  pipeline:
13
- - evaluate: |
14
- (async () => {
15
- const res = await fetch('https://www.v2ex.com/api/topics/hot.json');
16
- return await res.json();
17
- })()
15
+ - fetch:
16
+ url: https://www.v2ex.com/api/topics/hot.json
18
17
 
19
18
  - map:
20
19
  rank: ${{ index + 1 }}
21
20
  title: ${{ item.title }}
22
- node: ${{ item.node?.title }}
23
- author: ${{ item.member?.username }}
24
21
  replies: ${{ item.replies }}
25
- url: ${{ item.url }}
26
22
 
27
23
  - limit: ${{ args.limit }}
28
24
 
29
- columns: [rank, title, node, author, replies]
25
+ columns: [rank, title, replies]
@@ -2,6 +2,8 @@ site: v2ex
2
2
  name: latest
3
3
  description: V2EX 最新话题
4
4
  domain: www.v2ex.com
5
+ strategy: public
6
+ browser: false
5
7
 
6
8
  args:
7
9
  limit:
@@ -10,19 +12,14 @@ args:
10
12
  description: Number of topics
11
13
 
12
14
  pipeline:
13
- - evaluate: |
14
- (async () => {
15
- const res = await fetch('https://www.v2ex.com/api/topics/latest.json');
16
- return await res.json();
17
- })()
15
+ - fetch:
16
+ url: https://www.v2ex.com/api/topics/latest.json
18
17
 
19
18
  - map:
20
19
  rank: ${{ index + 1 }}
21
20
  title: ${{ item.title }}
22
- node: ${{ item.node?.title }}
23
- author: ${{ item.member?.username }}
24
21
  replies: ${{ item.replies }}
25
22
 
26
23
  - limit: ${{ args.limit }}
27
24
 
28
- columns: [rank, title, node, author, replies]
25
+ columns: [rank, title, replies]
@@ -0,0 +1,27 @@
1
+ site: v2ex
2
+ name: topic
3
+ description: V2EX 主题详情和回复
4
+ domain: www.v2ex.com
5
+ strategy: public
6
+ browser: false
7
+
8
+ args:
9
+ id:
10
+ type: str
11
+ required: true
12
+ description: Topic ID
13
+
14
+ pipeline:
15
+ - fetch:
16
+ url: https://www.v2ex.com/api/topics/show.json
17
+ params:
18
+ id: ${{ args.id }}
19
+
20
+ - map:
21
+ title: ${{ item.title }}
22
+ replies: ${{ item.replies }}
23
+ url: ${{ item.url }}
24
+
25
+ - limit: 1
26
+
27
+ columns: [title, replies, url]