@jackwener/opencli 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +100 -139
- package/dist/build-manifest.js +62 -2
- package/dist/cli-manifest.json +595 -75
- package/dist/clis/xiaohongshu/search.d.ts +5 -2
- package/dist/clis/xiaohongshu/search.js +35 -41
- package/package.json +1 -1
- package/src/build-manifest.ts +63 -2
- package/src/clis/xiaohongshu/search.ts +41 -44
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Xiaohongshu search —
|
|
3
|
-
*
|
|
2
|
+
* Xiaohongshu search — DOM-based extraction from search results page.
|
|
3
|
+
* The previous Pinia store + XHR interception approach broke because
|
|
4
|
+
* the API now returns empty items. This version navigates directly to
|
|
5
|
+
* the search results page and extracts data from rendered DOM elements.
|
|
6
|
+
* Ref: https://github.com/jackwener/opencli/issues/10
|
|
4
7
|
*/
|
|
5
8
|
export {};
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Xiaohongshu search —
|
|
3
|
-
*
|
|
2
|
+
* Xiaohongshu search — DOM-based extraction from search results page.
|
|
3
|
+
* The previous Pinia store + XHR interception approach broke because
|
|
4
|
+
* the API now returns empty items. This version navigates directly to
|
|
5
|
+
* the search results page and extracts data from rendered DOM elements.
|
|
6
|
+
* Ref: https://github.com/jackwener/opencli/issues/10
|
|
4
7
|
*/
|
|
5
8
|
import { cli, Strategy } from '../../registry.js';
|
|
6
9
|
cli({
|
|
@@ -13,54 +16,45 @@ cli({
|
|
|
13
16
|
{ name: 'keyword', required: true, help: 'Search keyword' },
|
|
14
17
|
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
|
|
15
18
|
],
|
|
16
|
-
columns: ['rank', 'title', 'author', 'likes'
|
|
19
|
+
columns: ['rank', 'title', 'author', 'likes'],
|
|
17
20
|
func: async (page, kwargs) => {
|
|
18
|
-
|
|
19
|
-
await page.
|
|
21
|
+
const keyword = encodeURIComponent(kwargs.keyword);
|
|
22
|
+
await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
|
|
23
|
+
await page.wait(3);
|
|
24
|
+
// Scroll a couple of times to load more results
|
|
25
|
+
await page.autoScroll({ times: 2 });
|
|
20
26
|
const data = await page.evaluate(`
|
|
21
|
-
(
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
|
|
27
|
+
(() => {
|
|
28
|
+
const notes = document.querySelectorAll('section.note-item');
|
|
29
|
+
const results = [];
|
|
30
|
+
notes.forEach(el => {
|
|
31
|
+
// Skip "related searches" sections
|
|
32
|
+
if (el.classList.contains('query-note-item')) return;
|
|
25
33
|
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
const titleEl = el.querySelector('.title, .note-title, a.title');
|
|
35
|
+
const nameEl = el.querySelector('.name, .author-name, .nick-name');
|
|
36
|
+
const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count');
|
|
37
|
+
const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]');
|
|
28
38
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
const origSend = XMLHttpRequest.prototype.send;
|
|
32
|
-
XMLHttpRequest.prototype.open = function(m, u) { this.__url = u; return origOpen.apply(this, arguments); };
|
|
33
|
-
XMLHttpRequest.prototype.send = function(b) {
|
|
34
|
-
if (this.__url?.includes('search/notes')) {
|
|
35
|
-
const x = this;
|
|
36
|
-
const orig = x.onreadystatechange;
|
|
37
|
-
x.onreadystatechange = function() { if (x.readyState === 4 && !captured) { try { captured = JSON.parse(x.responseText); } catch {} } if (orig) orig.apply(this, arguments); };
|
|
38
|
-
}
|
|
39
|
-
return origSend.apply(this, arguments);
|
|
40
|
-
};
|
|
39
|
+
const href = linkEl?.getAttribute('href') || '';
|
|
40
|
+
const noteId = href.match(/\\/(?:explore|note)\\/([a-f0-9]+)/)?.[1] || '';
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (!captured?.success) return {error: captured?.msg || 'Search failed'};
|
|
52
|
-
return (captured.data?.items || []).map(i => ({
|
|
53
|
-
title: i.note_card?.display_title || '',
|
|
54
|
-
type: i.note_card?.type || '',
|
|
55
|
-
url: 'https://www.xiaohongshu.com/explore/' + i.id,
|
|
56
|
-
author: i.note_card?.user?.nickname || '',
|
|
57
|
-
likes: i.note_card?.interact_info?.liked_count || '0',
|
|
58
|
-
}));
|
|
42
|
+
results.push({
|
|
43
|
+
title: (titleEl?.textContent || '').trim(),
|
|
44
|
+
author: (nameEl?.textContent || '').trim(),
|
|
45
|
+
likes: (likesEl?.textContent || '0').trim(),
|
|
46
|
+
url: noteId ? 'https://www.xiaohongshu.com/explore/' + noteId : '',
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
return results;
|
|
59
50
|
})()
|
|
60
51
|
`);
|
|
61
52
|
if (!Array.isArray(data))
|
|
62
53
|
return [];
|
|
63
|
-
return data
|
|
54
|
+
return data
|
|
55
|
+
.filter((item) => item.title)
|
|
56
|
+
.slice(0, kwargs.limit)
|
|
57
|
+
.map((item, i) => ({
|
|
64
58
|
rank: i + 1,
|
|
65
59
|
...item,
|
|
66
60
|
}));
|
package/package.json
CHANGED
package/src/build-manifest.ts
CHANGED
|
@@ -87,10 +87,11 @@ function scanYaml(filePath: string, site: string): ManifestEntry | null {
|
|
|
87
87
|
|
|
88
88
|
function scanTs(filePath: string, site: string): ManifestEntry {
|
|
89
89
|
// TS adapters self-register via cli() at import time.
|
|
90
|
-
// We
|
|
90
|
+
// We statically parse the source to extract metadata for the manifest stub.
|
|
91
91
|
const baseName = path.basename(filePath, path.extname(filePath));
|
|
92
92
|
const relativePath = `${site}/${baseName}.js`;
|
|
93
|
-
|
|
93
|
+
|
|
94
|
+
const entry: ManifestEntry = {
|
|
94
95
|
site,
|
|
95
96
|
name: baseName,
|
|
96
97
|
description: '',
|
|
@@ -100,6 +101,66 @@ function scanTs(filePath: string, site: string): ManifestEntry {
|
|
|
100
101
|
type: 'ts',
|
|
101
102
|
modulePath: relativePath,
|
|
102
103
|
};
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const src = fs.readFileSync(filePath, 'utf-8');
|
|
107
|
+
|
|
108
|
+
// Extract description
|
|
109
|
+
const descMatch = src.match(/description\s*:\s*['"`]([^'"`]*)['"`]/);
|
|
110
|
+
if (descMatch) entry.description = descMatch[1];
|
|
111
|
+
|
|
112
|
+
// Extract domain
|
|
113
|
+
const domainMatch = src.match(/domain\s*:\s*['"`]([^'"`]*)['"`]/);
|
|
114
|
+
if (domainMatch) entry.domain = domainMatch[1];
|
|
115
|
+
|
|
116
|
+
// Extract strategy
|
|
117
|
+
const stratMatch = src.match(/strategy\s*:\s*Strategy\.(\w+)/);
|
|
118
|
+
if (stratMatch) entry.strategy = stratMatch[1].toLowerCase();
|
|
119
|
+
|
|
120
|
+
// Extract columns
|
|
121
|
+
const colMatch = src.match(/columns\s*:\s*\[([^\]]*)\]/);
|
|
122
|
+
if (colMatch) {
|
|
123
|
+
entry.columns = colMatch[1].split(',').map(s => s.trim().replace(/^['"`]|['"`]$/g, '')).filter(Boolean);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Extract args array items: { name: '...', ... }
|
|
127
|
+
const argsBlockMatch = src.match(/args\s*:\s*\[([\s\S]*?)\]\s*,/);
|
|
128
|
+
if (argsBlockMatch) {
|
|
129
|
+
const argsBlock = argsBlockMatch[1];
|
|
130
|
+
const argRegex = /\{\s*name\s*:\s*['"`](\w+)['"`]([^}]*)\}/g;
|
|
131
|
+
let m;
|
|
132
|
+
while ((m = argRegex.exec(argsBlock)) !== null) {
|
|
133
|
+
const argName = m[1];
|
|
134
|
+
const body = m[2];
|
|
135
|
+
const typeMatch = body.match(/type\s*:\s*['"`](\w+)['"`]/);
|
|
136
|
+
const defaultMatch = body.match(/default\s*:\s*([^,}]+)/);
|
|
137
|
+
const requiredMatch = body.match(/required\s*:\s*(true|false)/);
|
|
138
|
+
const helpMatch = body.match(/help\s*:\s*['"`]([^'"`]*)['"`]/);
|
|
139
|
+
|
|
140
|
+
let defaultVal: any = undefined;
|
|
141
|
+
if (defaultMatch) {
|
|
142
|
+
const raw = defaultMatch[1].trim();
|
|
143
|
+
if (raw === 'true') defaultVal = true;
|
|
144
|
+
else if (raw === 'false') defaultVal = false;
|
|
145
|
+
else if (/^\d+$/.test(raw)) defaultVal = parseInt(raw, 10);
|
|
146
|
+
else if (/^\d+\.\d+$/.test(raw)) defaultVal = parseFloat(raw);
|
|
147
|
+
else defaultVal = raw.replace(/^['"`]|['"`]$/g, '');
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
entry.args.push({
|
|
151
|
+
name: argName,
|
|
152
|
+
type: typeMatch?.[1] ?? 'str',
|
|
153
|
+
default: defaultVal,
|
|
154
|
+
required: requiredMatch?.[1] === 'true',
|
|
155
|
+
help: helpMatch?.[1] ?? '',
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
} catch {
|
|
160
|
+
// If parsing fails, fall back to empty metadata — module will self-register at runtime
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return entry;
|
|
103
164
|
}
|
|
104
165
|
|
|
105
166
|
// Main
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Xiaohongshu search —
|
|
3
|
-
*
|
|
2
|
+
* Xiaohongshu search — DOM-based extraction from search results page.
|
|
3
|
+
* The previous Pinia store + XHR interception approach broke because
|
|
4
|
+
* the API now returns empty items. This version navigates directly to
|
|
5
|
+
* the search results page and extracts data from rendered DOM elements.
|
|
6
|
+
* Ref: https://github.com/jackwener/opencli/issues/10
|
|
4
7
|
*/
|
|
5
8
|
|
|
6
9
|
import { cli, Strategy } from '../../registry.js';
|
|
@@ -15,57 +18,51 @@ cli({
|
|
|
15
18
|
{ name: 'keyword', required: true, help: 'Search keyword' },
|
|
16
19
|
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
|
|
17
20
|
],
|
|
18
|
-
columns: ['rank', 'title', 'author', 'likes'
|
|
21
|
+
columns: ['rank', 'title', 'author', 'likes'],
|
|
19
22
|
func: async (page, kwargs) => {
|
|
20
|
-
|
|
21
|
-
await page.
|
|
23
|
+
const keyword = encodeURIComponent(kwargs.keyword);
|
|
24
|
+
await page.goto(
|
|
25
|
+
`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`
|
|
26
|
+
);
|
|
27
|
+
await page.wait(3);
|
|
22
28
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
const app = document.querySelector('#app')?.__vue_app__;
|
|
26
|
-
const pinia = app?.config?.globalProperties?.$pinia;
|
|
27
|
-
if (!pinia?._s) return {error: 'Page not ready'};
|
|
29
|
+
// Scroll a couple of times to load more results
|
|
30
|
+
await page.autoScroll({ times: 2 });
|
|
28
31
|
|
|
29
|
-
|
|
30
|
-
|
|
32
|
+
const data = await page.evaluate(`
|
|
33
|
+
(() => {
|
|
34
|
+
const notes = document.querySelectorAll('section.note-item');
|
|
35
|
+
const results = [];
|
|
36
|
+
notes.forEach(el => {
|
|
37
|
+
// Skip "related searches" sections
|
|
38
|
+
if (el.classList.contains('query-note-item')) return;
|
|
31
39
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
XMLHttpRequest.prototype.send = function(b) {
|
|
37
|
-
if (this.__url?.includes('search/notes')) {
|
|
38
|
-
const x = this;
|
|
39
|
-
const orig = x.onreadystatechange;
|
|
40
|
-
x.onreadystatechange = function() { if (x.readyState === 4 && !captured) { try { captured = JSON.parse(x.responseText); } catch {} } if (orig) orig.apply(this, arguments); };
|
|
41
|
-
}
|
|
42
|
-
return origSend.apply(this, arguments);
|
|
43
|
-
};
|
|
40
|
+
const titleEl = el.querySelector('.title, .note-title, a.title');
|
|
41
|
+
const nameEl = el.querySelector('.name, .author-name, .nick-name');
|
|
42
|
+
const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count');
|
|
43
|
+
const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]');
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
await searchStore.loadMore();
|
|
48
|
-
await new Promise(r => setTimeout(r, 800));
|
|
49
|
-
} finally {
|
|
50
|
-
XMLHttpRequest.prototype.open = origOpen;
|
|
51
|
-
XMLHttpRequest.prototype.send = origSend;
|
|
52
|
-
}
|
|
45
|
+
const href = linkEl?.getAttribute('href') || '';
|
|
46
|
+
const noteId = href.match(/\\/(?:explore|note)\\/([a-f0-9]+)/)?.[1] || '';
|
|
53
47
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
48
|
+
results.push({
|
|
49
|
+
title: (titleEl?.textContent || '').trim(),
|
|
50
|
+
author: (nameEl?.textContent || '').trim(),
|
|
51
|
+
likes: (likesEl?.textContent || '0').trim(),
|
|
52
|
+
url: noteId ? 'https://www.xiaohongshu.com/explore/' + noteId : '',
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
return results;
|
|
62
56
|
})()
|
|
63
57
|
`);
|
|
64
58
|
|
|
65
59
|
if (!Array.isArray(data)) return [];
|
|
66
|
-
return data
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
60
|
+
return data
|
|
61
|
+
.filter((item: any) => item.title)
|
|
62
|
+
.slice(0, kwargs.limit)
|
|
63
|
+
.map((item: any, i: number) => ({
|
|
64
|
+
rank: i + 1,
|
|
65
|
+
...item,
|
|
66
|
+
}));
|
|
70
67
|
},
|
|
71
68
|
});
|