@jackwener/opencli 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
1
  /**
2
- * Xiaohongshu search — trigger search via Pinia store + XHR interception.
3
- * Inspired by bb-sites/xiaohongshu/search.js but adapted for opencli pipeline.
2
+ * Xiaohongshu search — DOM-based extraction from search results page.
3
+ * The previous Pinia store + XHR interception approach broke because
4
+ * the API now returns empty items. This version navigates directly to
5
+ * the search results page and extracts data from rendered DOM elements.
6
+ * Ref: https://github.com/jackwener/opencli/issues/10
4
7
  */
5
8
  export {};
@@ -1,6 +1,9 @@
1
1
  /**
2
- * Xiaohongshu search — trigger search via Pinia store + XHR interception.
3
- * Inspired by bb-sites/xiaohongshu/search.js but adapted for opencli pipeline.
2
+ * Xiaohongshu search — DOM-based extraction from search results page.
3
+ * The previous Pinia store + XHR interception approach broke because
4
+ * the API now returns empty items. This version navigates directly to
5
+ * the search results page and extracts data from rendered DOM elements.
6
+ * Ref: https://github.com/jackwener/opencli/issues/10
4
7
  */
5
8
  import { cli, Strategy } from '../../registry.js';
6
9
  cli({
@@ -13,54 +16,45 @@ cli({
13
16
  { name: 'keyword', required: true, help: 'Search keyword' },
14
17
  { name: 'limit', type: 'int', default: 20, help: 'Number of results' },
15
18
  ],
16
- columns: ['rank', 'title', 'author', 'likes', 'type'],
19
+ columns: ['rank', 'title', 'author', 'likes'],
17
20
  func: async (page, kwargs) => {
18
- await page.goto('https://www.xiaohongshu.com');
19
- await page.wait(2);
21
+ const keyword = encodeURIComponent(kwargs.keyword);
22
+ await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
23
+ await page.wait(3);
24
+ // Scroll a couple of times to load more results
25
+ await page.autoScroll({ times: 2 });
20
26
  const data = await page.evaluate(`
21
- (async () => {
22
- const app = document.querySelector('#app')?.__vue_app__;
23
- const pinia = app?.config?.globalProperties?.$pinia;
24
- if (!pinia?._s) return {error: 'Page not ready'};
27
+ (() => {
28
+ const notes = document.querySelectorAll('section.note-item');
29
+ const results = [];
30
+ notes.forEach(el => {
31
+ // Skip "related searches" sections
32
+ if (el.classList.contains('query-note-item')) return;
25
33
 
26
- const searchStore = pinia._s.get('search');
27
- if (!searchStore) return {error: 'Search store not found'};
34
+ const titleEl = el.querySelector('.title, .note-title, a.title');
35
+ const nameEl = el.querySelector('.name, .author-name, .nick-name');
36
+ const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count');
37
+ const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]');
28
38
 
29
- let captured = null;
30
- const origOpen = XMLHttpRequest.prototype.open;
31
- const origSend = XMLHttpRequest.prototype.send;
32
- XMLHttpRequest.prototype.open = function(m, u) { this.__url = u; return origOpen.apply(this, arguments); };
33
- XMLHttpRequest.prototype.send = function(b) {
34
- if (this.__url?.includes('search/notes')) {
35
- const x = this;
36
- const orig = x.onreadystatechange;
37
- x.onreadystatechange = function() { if (x.readyState === 4 && !captured) { try { captured = JSON.parse(x.responseText); } catch {} } if (orig) orig.apply(this, arguments); };
38
- }
39
- return origSend.apply(this, arguments);
40
- };
39
+ const href = linkEl?.getAttribute('href') || '';
40
+ const noteId = href.match(/\\/(?:explore|note)\\/([a-f0-9]+)/)?.[1] || '';
41
41
 
42
- try {
43
- searchStore.mutateSearchValue('${kwargs.keyword}');
44
- await searchStore.loadMore();
45
- await new Promise(r => setTimeout(r, 800));
46
- } finally {
47
- XMLHttpRequest.prototype.open = origOpen;
48
- XMLHttpRequest.prototype.send = origSend;
49
- }
50
-
51
- if (!captured?.success) return {error: captured?.msg || 'Search failed'};
52
- return (captured.data?.items || []).map(i => ({
53
- title: i.note_card?.display_title || '',
54
- type: i.note_card?.type || '',
55
- url: 'https://www.xiaohongshu.com/explore/' + i.id,
56
- author: i.note_card?.user?.nickname || '',
57
- likes: i.note_card?.interact_info?.liked_count || '0',
58
- }));
42
+ results.push({
43
+ title: (titleEl?.textContent || '').trim(),
44
+ author: (nameEl?.textContent || '').trim(),
45
+ likes: (likesEl?.textContent || '0').trim(),
46
+ url: noteId ? 'https://www.xiaohongshu.com/explore/' + noteId : '',
47
+ });
48
+ });
49
+ return results;
59
50
  })()
60
51
  `);
61
52
  if (!Array.isArray(data))
62
53
  return [];
63
- return data.slice(0, kwargs.limit).map((item, i) => ({
54
+ return data
55
+ .filter((item) => item.title)
56
+ .slice(0, kwargs.limit)
57
+ .map((item, i) => ({
64
58
  rank: i + 1,
65
59
  ...item,
66
60
  }));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jackwener/opencli",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -87,10 +87,11 @@ function scanYaml(filePath: string, site: string): ManifestEntry | null {
87
87
 
88
88
  function scanTs(filePath: string, site: string): ManifestEntry {
89
89
  // TS adapters self-register via cli() at import time.
90
- // We record their module path for lazy dynamic import.
90
+ // We statically parse the source to extract metadata for the manifest stub.
91
91
  const baseName = path.basename(filePath, path.extname(filePath));
92
92
  const relativePath = `${site}/${baseName}.js`;
93
- return {
93
+
94
+ const entry: ManifestEntry = {
94
95
  site,
95
96
  name: baseName,
96
97
  description: '',
@@ -100,6 +101,66 @@ function scanTs(filePath: string, site: string): ManifestEntry {
100
101
  type: 'ts',
101
102
  modulePath: relativePath,
102
103
  };
104
+
105
+ try {
106
+ const src = fs.readFileSync(filePath, 'utf-8');
107
+
108
+ // Extract description
109
+ const descMatch = src.match(/description\s*:\s*['"`]([^'"`]*)['"`]/);
110
+ if (descMatch) entry.description = descMatch[1];
111
+
112
+ // Extract domain
113
+ const domainMatch = src.match(/domain\s*:\s*['"`]([^'"`]*)['"`]/);
114
+ if (domainMatch) entry.domain = domainMatch[1];
115
+
116
+ // Extract strategy
117
+ const stratMatch = src.match(/strategy\s*:\s*Strategy\.(\w+)/);
118
+ if (stratMatch) entry.strategy = stratMatch[1].toLowerCase();
119
+
120
+ // Extract columns
121
+ const colMatch = src.match(/columns\s*:\s*\[([^\]]*)\]/);
122
+ if (colMatch) {
123
+ entry.columns = colMatch[1].split(',').map(s => s.trim().replace(/^['"`]|['"`]$/g, '')).filter(Boolean);
124
+ }
125
+
126
+ // Extract args array items: { name: '...', ... }
127
+ const argsBlockMatch = src.match(/args\s*:\s*\[([\s\S]*?)\]\s*,/);
128
+ if (argsBlockMatch) {
129
+ const argsBlock = argsBlockMatch[1];
130
+ const argRegex = /\{\s*name\s*:\s*['"`](\w+)['"`]([^}]*)\}/g;
131
+ let m;
132
+ while ((m = argRegex.exec(argsBlock)) !== null) {
133
+ const argName = m[1];
134
+ const body = m[2];
135
+ const typeMatch = body.match(/type\s*:\s*['"`](\w+)['"`]/);
136
+ const defaultMatch = body.match(/default\s*:\s*([^,}]+)/);
137
+ const requiredMatch = body.match(/required\s*:\s*(true|false)/);
138
+ const helpMatch = body.match(/help\s*:\s*['"`]([^'"`]*)['"`]/);
139
+
140
+ let defaultVal: any = undefined;
141
+ if (defaultMatch) {
142
+ const raw = defaultMatch[1].trim();
143
+ if (raw === 'true') defaultVal = true;
144
+ else if (raw === 'false') defaultVal = false;
145
+ else if (/^\d+$/.test(raw)) defaultVal = parseInt(raw, 10);
146
+ else if (/^\d+\.\d+$/.test(raw)) defaultVal = parseFloat(raw);
147
+ else defaultVal = raw.replace(/^['"`]|['"`]$/g, '');
148
+ }
149
+
150
+ entry.args.push({
151
+ name: argName,
152
+ type: typeMatch?.[1] ?? 'str',
153
+ default: defaultVal,
154
+ required: requiredMatch?.[1] === 'true',
155
+ help: helpMatch?.[1] ?? '',
156
+ });
157
+ }
158
+ }
159
+ } catch {
160
+ // If parsing fails, fall back to empty metadata — module will self-register at runtime
161
+ }
162
+
163
+ return entry;
103
164
  }
104
165
 
105
166
  // Main
@@ -1,6 +1,9 @@
1
1
  /**
2
- * Xiaohongshu search — trigger search via Pinia store + XHR interception.
3
- * Inspired by bb-sites/xiaohongshu/search.js but adapted for opencli pipeline.
2
+ * Xiaohongshu search — DOM-based extraction from search results page.
3
+ * The previous Pinia store + XHR interception approach broke because
4
+ * the API now returns empty items. This version navigates directly to
5
+ * the search results page and extracts data from rendered DOM elements.
6
+ * Ref: https://github.com/jackwener/opencli/issues/10
4
7
  */
5
8
 
6
9
  import { cli, Strategy } from '../../registry.js';
@@ -15,57 +18,51 @@ cli({
15
18
  { name: 'keyword', required: true, help: 'Search keyword' },
16
19
  { name: 'limit', type: 'int', default: 20, help: 'Number of results' },
17
20
  ],
18
- columns: ['rank', 'title', 'author', 'likes', 'type'],
21
+ columns: ['rank', 'title', 'author', 'likes'],
19
22
  func: async (page, kwargs) => {
20
- await page.goto('https://www.xiaohongshu.com');
21
- await page.wait(2);
23
+ const keyword = encodeURIComponent(kwargs.keyword);
24
+ await page.goto(
25
+ `https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`
26
+ );
27
+ await page.wait(3);
22
28
 
23
- const data = await page.evaluate(`
24
- (async () => {
25
- const app = document.querySelector('#app')?.__vue_app__;
26
- const pinia = app?.config?.globalProperties?.$pinia;
27
- if (!pinia?._s) return {error: 'Page not ready'};
29
+ // Scroll a couple of times to load more results
30
+ await page.autoScroll({ times: 2 });
28
31
 
29
- const searchStore = pinia._s.get('search');
30
- if (!searchStore) return {error: 'Search store not found'};
32
+ const data = await page.evaluate(`
33
+ (() => {
34
+ const notes = document.querySelectorAll('section.note-item');
35
+ const results = [];
36
+ notes.forEach(el => {
37
+ // Skip "related searches" sections
38
+ if (el.classList.contains('query-note-item')) return;
31
39
 
32
- let captured = null;
33
- const origOpen = XMLHttpRequest.prototype.open;
34
- const origSend = XMLHttpRequest.prototype.send;
35
- XMLHttpRequest.prototype.open = function(m, u) { this.__url = u; return origOpen.apply(this, arguments); };
36
- XMLHttpRequest.prototype.send = function(b) {
37
- if (this.__url?.includes('search/notes')) {
38
- const x = this;
39
- const orig = x.onreadystatechange;
40
- x.onreadystatechange = function() { if (x.readyState === 4 && !captured) { try { captured = JSON.parse(x.responseText); } catch {} } if (orig) orig.apply(this, arguments); };
41
- }
42
- return origSend.apply(this, arguments);
43
- };
40
+ const titleEl = el.querySelector('.title, .note-title, a.title');
41
+ const nameEl = el.querySelector('.name, .author-name, .nick-name');
42
+ const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count');
43
+ const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]');
44
44
 
45
- try {
46
- searchStore.mutateSearchValue('${kwargs.keyword}');
47
- await searchStore.loadMore();
48
- await new Promise(r => setTimeout(r, 800));
49
- } finally {
50
- XMLHttpRequest.prototype.open = origOpen;
51
- XMLHttpRequest.prototype.send = origSend;
52
- }
45
+ const href = linkEl?.getAttribute('href') || '';
46
+ const noteId = href.match(/\\/(?:explore|note)\\/([a-f0-9]+)/)?.[1] || '';
53
47
 
54
- if (!captured?.success) return {error: captured?.msg || 'Search failed'};
55
- return (captured.data?.items || []).map(i => ({
56
- title: i.note_card?.display_title || '',
57
- type: i.note_card?.type || '',
58
- url: 'https://www.xiaohongshu.com/explore/' + i.id,
59
- author: i.note_card?.user?.nickname || '',
60
- likes: i.note_card?.interact_info?.liked_count || '0',
61
- }));
48
+ results.push({
49
+ title: (titleEl?.textContent || '').trim(),
50
+ author: (nameEl?.textContent || '').trim(),
51
+ likes: (likesEl?.textContent || '0').trim(),
52
+ url: noteId ? 'https://www.xiaohongshu.com/explore/' + noteId : '',
53
+ });
54
+ });
55
+ return results;
62
56
  })()
63
57
  `);
64
58
 
65
59
  if (!Array.isArray(data)) return [];
66
- return data.slice(0, kwargs.limit).map((item: any, i: number) => ({
67
- rank: i + 1,
68
- ...item,
69
- }));
60
+ return data
61
+ .filter((item: any) => item.title)
62
+ .slice(0, kwargs.limit)
63
+ .map((item: any, i: number) => ({
64
+ rank: i + 1,
65
+ ...item,
66
+ }));
70
67
  },
71
68
  });