@optima-chat/scout-cli 0.1.11 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -2
- package/dist/commands/browser.d.ts +3 -0
- package/dist/commands/browser.d.ts.map +1 -0
- package/dist/commands/browser.js +614 -0
- package/dist/commands/browser.js.map +1 -0
- package/dist/commands/browser.test.d.ts +2 -0
- package/dist/commands/browser.test.d.ts.map +1 -0
- package/dist/commands/browser.test.js +228 -0
- package/dist/commands/browser.test.js.map +1 -0
- package/dist/commands/info.d.ts +3 -0
- package/dist/commands/info.d.ts.map +1 -0
- package/dist/commands/info.js +23 -0
- package/dist/commands/info.js.map +1 -0
- package/dist/commands/supplier-search.d.ts +3 -0
- package/dist/commands/supplier-search.d.ts.map +1 -0
- package/dist/commands/supplier-search.js +76 -0
- package/dist/commands/supplier-search.js.map +1 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/utils/api.d.ts +4 -0
- package/dist/utils/api.d.ts.map +1 -1
- package/dist/utils/api.js +13 -0
- package/dist/utils/api.js.map +1 -1
- package/dist/utils/config.d.ts +18 -0
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +55 -2
- package/dist/utils/config.js.map +1 -1
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Optima Scout CLI
|
|
2
2
|
|
|
3
|
-
AI-powered Amazon product research tool for Claude Code and LLMs.
|
|
3
|
+
AI-powered Amazon product research and 1688 supplier sourcing tool for Claude Code and LLMs.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -14,11 +14,14 @@ npm install -g @optima-chat/scout-cli
|
|
|
14
14
|
# Initialize Claude Code skills
|
|
15
15
|
scout init
|
|
16
16
|
|
|
17
|
-
# Search products
|
|
17
|
+
# Search Amazon products
|
|
18
18
|
scout search "coffee maker"
|
|
19
19
|
|
|
20
20
|
# Get product details
|
|
21
21
|
scout product B01GJOMWVA
|
|
22
|
+
|
|
23
|
+
# Search 1688 suppliers
|
|
24
|
+
scout supplier-search "咖啡机"
|
|
22
25
|
```
|
|
23
26
|
|
|
24
27
|
## Commands
|
|
@@ -57,6 +60,20 @@ Get detailed product information.
|
|
|
57
60
|
scout product B004YAVF8I --domain amazon.com
|
|
58
61
|
```
|
|
59
62
|
|
|
63
|
+
### `scout supplier-search <keyword>`
|
|
64
|
+
|
|
65
|
+
Search for suppliers on 1688.com.
|
|
66
|
+
|
|
67
|
+
**Options:**
|
|
68
|
+
- `-l, --limit <number>` - Result limit (default: `20`, max: `100`)
|
|
69
|
+
- `-f, --format <format>` - Output: `json` | `text`
|
|
70
|
+
|
|
71
|
+
**Example:**
|
|
72
|
+
```bash
|
|
73
|
+
scout supplier-search "咖啡机" --limit 10
|
|
74
|
+
scout supplier-search "蓝牙耳机" --format json
|
|
75
|
+
```
|
|
76
|
+
|
|
60
77
|
## Configuration
|
|
61
78
|
|
|
62
79
|
Set API endpoint via environment variable:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../../src/commands/browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,eAAO,MAAM,cAAc,SAC+B,CAAC"}
|
|
@@ -0,0 +1,614 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { api } from '../utils/api.js';
|
|
3
|
+
export const browserCommand = new Command('browser')
|
|
4
|
+
.description('Interact with user browser via extension');
|
|
5
|
+
async function queryElements(session, selector, attrs, limit = 200) {
|
|
6
|
+
try {
|
|
7
|
+
const result = await api.post('/api/browser/query', {
|
|
8
|
+
session,
|
|
9
|
+
selector,
|
|
10
|
+
attributes: attrs,
|
|
11
|
+
limit,
|
|
12
|
+
});
|
|
13
|
+
return result;
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
return [];
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
async function downloadMedia(product, outputDir, referer) {
|
|
20
|
+
const fs = await import('fs');
|
|
21
|
+
const path = await import('path');
|
|
22
|
+
const https = await import('https');
|
|
23
|
+
const http = await import('http');
|
|
24
|
+
const downloadFile = (url, destPath) => {
|
|
25
|
+
return new Promise((resolve, reject) => {
|
|
26
|
+
const protocol = url.startsWith('https') ? https : http;
|
|
27
|
+
const file = fs.createWriteStream(destPath);
|
|
28
|
+
protocol
|
|
29
|
+
.get(url, { headers: { 'User-Agent': 'Mozilla/5.0', Referer: referer } }, (response) => {
|
|
30
|
+
if (response.statusCode === 200) {
|
|
31
|
+
response.pipe(file);
|
|
32
|
+
file.on('finish', () => {
|
|
33
|
+
file.close();
|
|
34
|
+
resolve();
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
file.close();
|
|
39
|
+
fs.unlinkSync(destPath);
|
|
40
|
+
reject(new Error(`HTTP ${response.statusCode}`));
|
|
41
|
+
}
|
|
42
|
+
})
|
|
43
|
+
.on('error', (err) => {
|
|
44
|
+
file.close();
|
|
45
|
+
reject(err);
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
};
|
|
49
|
+
let imgSuccess = 0;
|
|
50
|
+
for (let i = 0; i < product.images.length; i++) {
|
|
51
|
+
const url = product.images[i];
|
|
52
|
+
const ext = url.includes('.webp') ? 'webp' : url.includes('.png') ? 'png' : 'jpg';
|
|
53
|
+
const imgPath = path.join(outputDir, `image_${String(i + 1).padStart(2, '0')}.${ext}`);
|
|
54
|
+
try {
|
|
55
|
+
await downloadFile(url, imgPath);
|
|
56
|
+
imgSuccess++;
|
|
57
|
+
console.error(` ✓ image_${String(i + 1).padStart(2, '0')}.${ext}`);
|
|
58
|
+
}
|
|
59
|
+
catch (e) {
|
|
60
|
+
console.error(` ✗ image_${String(i + 1).padStart(2, '0')}: ${e}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
let vidSuccess = 0;
|
|
64
|
+
for (let i = 0; i < product.videos.length; i++) {
|
|
65
|
+
const url = product.videos[i];
|
|
66
|
+
const vidPath = path.join(outputDir, `video_${String(i + 1).padStart(2, '0')}.mp4`);
|
|
67
|
+
try {
|
|
68
|
+
await downloadFile(url, vidPath);
|
|
69
|
+
vidSuccess++;
|
|
70
|
+
console.error(` ✓ video_${String(i + 1).padStart(2, '0')}.mp4`);
|
|
71
|
+
}
|
|
72
|
+
catch (e) {
|
|
73
|
+
console.error(` ✗ video_${String(i + 1).padStart(2, '0')}: ${e}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return { images: imgSuccess, videos: vidSuccess };
|
|
77
|
+
}
|
|
78
|
+
// ============================================
|
|
79
|
+
// Basic browser commands
|
|
80
|
+
// ============================================
|
|
81
|
+
// Status check
|
|
82
|
+
browserCommand
|
|
83
|
+
.command('status')
|
|
84
|
+
.description('Check browser connection status')
|
|
85
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
86
|
+
.action(async (options) => {
|
|
87
|
+
try {
|
|
88
|
+
if (!options.session) {
|
|
89
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
90
|
+
process.exit(1);
|
|
91
|
+
}
|
|
92
|
+
const result = await api.get(`/api/browser/status?session=${options.session}`);
|
|
93
|
+
console.log(JSON.stringify(result, null, 2));
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
97
|
+
process.exit(1);
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
// List connections
|
|
101
|
+
browserCommand
|
|
102
|
+
.command('connections')
|
|
103
|
+
.description('List all connected browsers')
|
|
104
|
+
.action(async () => {
|
|
105
|
+
try {
|
|
106
|
+
const result = await api.get('/api/browser/connections');
|
|
107
|
+
console.log(JSON.stringify(result, null, 2));
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
111
|
+
process.exit(1);
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
// Screenshot
|
|
115
|
+
browserCommand
|
|
116
|
+
.command('screenshot')
|
|
117
|
+
.description('Capture current page screenshot and save to file')
|
|
118
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
119
|
+
.option('-f, --format <format>', 'Image format (png|jpeg)', 'png')
|
|
120
|
+
.option('-o, --output <path>', 'Output file path (default: ./screenshot-<timestamp>.<format>)')
|
|
121
|
+
.action(async (options) => {
|
|
122
|
+
try {
|
|
123
|
+
if (!options.session) {
|
|
124
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
125
|
+
process.exit(1);
|
|
126
|
+
}
|
|
127
|
+
const result = await api.post('/api/browser/screenshot', {
|
|
128
|
+
session: options.session,
|
|
129
|
+
format: options.format,
|
|
130
|
+
});
|
|
131
|
+
const base64Data = result.dataUrl.replace(/^data:image\/\w+;base64,/, '');
|
|
132
|
+
const buffer = Buffer.from(base64Data, 'base64');
|
|
133
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
134
|
+
const outputPath = options.output || `./screenshot-${timestamp}.${options.format}`;
|
|
135
|
+
const fs = await import('fs');
|
|
136
|
+
fs.writeFileSync(outputPath, buffer);
|
|
137
|
+
console.log(JSON.stringify({ success: true, path: outputPath, size: buffer.length }));
|
|
138
|
+
}
|
|
139
|
+
catch (error) {
|
|
140
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
141
|
+
process.exit(1);
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
// Query elements
|
|
145
|
+
browserCommand
|
|
146
|
+
.command('query <selector>')
|
|
147
|
+
.description('Query page elements using CSS selector')
|
|
148
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
149
|
+
.option('-a, --attributes <attrs>', 'Attributes to extract (comma-separated)')
|
|
150
|
+
.option('--schema <json>', 'Extraction schema (JSON format)')
|
|
151
|
+
.option('-l, --limit <n>', 'Maximum number of elements', parseInt)
|
|
152
|
+
.action(async (selector, options) => {
|
|
153
|
+
try {
|
|
154
|
+
if (!options.session) {
|
|
155
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
156
|
+
process.exit(1);
|
|
157
|
+
}
|
|
158
|
+
const result = await api.post('/api/browser/query', {
|
|
159
|
+
session: options.session,
|
|
160
|
+
selector,
|
|
161
|
+
attributes: options.attributes?.split(','),
|
|
162
|
+
schema: options.schema ? JSON.parse(options.schema) : undefined,
|
|
163
|
+
limit: options.limit,
|
|
164
|
+
});
|
|
165
|
+
console.log(JSON.stringify(result, null, 2));
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
169
|
+
process.exit(1);
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
// Click element
|
|
173
|
+
browserCommand
|
|
174
|
+
.command('click <selector>')
|
|
175
|
+
.description('Click a page element')
|
|
176
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
177
|
+
.option('-i, --index <n>', 'Element index (when multiple matches)', (v) => parseInt(v, 10), 0)
|
|
178
|
+
.action(async (selector, options) => {
|
|
179
|
+
try {
|
|
180
|
+
if (!options.session) {
|
|
181
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
182
|
+
process.exit(1);
|
|
183
|
+
}
|
|
184
|
+
const result = await api.post('/api/browser/click', {
|
|
185
|
+
session: options.session,
|
|
186
|
+
selector,
|
|
187
|
+
index: options.index,
|
|
188
|
+
});
|
|
189
|
+
console.log(JSON.stringify(result));
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
193
|
+
process.exit(1);
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
// Scroll page
|
|
197
|
+
browserCommand
|
|
198
|
+
.command('scroll')
|
|
199
|
+
.description('Scroll the page')
|
|
200
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
201
|
+
.option('-d, --direction <dir>', 'Direction: up/down/left/right', 'down')
|
|
202
|
+
.option('--distance <px>', 'Distance in pixels', (v) => parseInt(v, 10), 500)
|
|
203
|
+
.action(async (options) => {
|
|
204
|
+
try {
|
|
205
|
+
if (!options.session) {
|
|
206
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
207
|
+
process.exit(1);
|
|
208
|
+
}
|
|
209
|
+
const result = await api.post('/api/browser/scroll', {
|
|
210
|
+
session: options.session,
|
|
211
|
+
direction: options.direction,
|
|
212
|
+
distance: options.distance,
|
|
213
|
+
});
|
|
214
|
+
console.log(JSON.stringify(result));
|
|
215
|
+
}
|
|
216
|
+
catch (error) {
|
|
217
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
218
|
+
process.exit(1);
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
// Navigate
|
|
222
|
+
browserCommand
|
|
223
|
+
.command('navigate <url>')
|
|
224
|
+
.description('Navigate to a URL')
|
|
225
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
226
|
+
.action(async (url, options) => {
|
|
227
|
+
try {
|
|
228
|
+
if (!options.session) {
|
|
229
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
230
|
+
process.exit(1);
|
|
231
|
+
}
|
|
232
|
+
const result = await api.post('/api/browser/navigate', {
|
|
233
|
+
session: options.session,
|
|
234
|
+
url,
|
|
235
|
+
});
|
|
236
|
+
console.log(JSON.stringify(result));
|
|
237
|
+
}
|
|
238
|
+
catch (error) {
|
|
239
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
240
|
+
process.exit(1);
|
|
241
|
+
}
|
|
242
|
+
});
|
|
243
|
+
// Page info
|
|
244
|
+
browserCommand
|
|
245
|
+
.command('page-info')
|
|
246
|
+
.description('Get current page information')
|
|
247
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
248
|
+
.action(async (options) => {
|
|
249
|
+
try {
|
|
250
|
+
if (!options.session) {
|
|
251
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
252
|
+
process.exit(1);
|
|
253
|
+
}
|
|
254
|
+
const result = await api.post('/api/browser/page-info', {
|
|
255
|
+
session: options.session,
|
|
256
|
+
});
|
|
257
|
+
console.log(JSON.stringify(result, null, 2));
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
261
|
+
process.exit(1);
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
// ============================================
|
|
265
|
+
// Platform-specific scrapers
|
|
266
|
+
// ============================================
|
|
267
|
+
// UI image patterns to filter out (Taobao/Tmall)
|
|
268
|
+
const TAOBAO_UI_PATTERNS = [
|
|
269
|
+
'/tps/',
|
|
270
|
+
'tps-',
|
|
271
|
+
'-tps-',
|
|
272
|
+
'icon',
|
|
273
|
+
'logo',
|
|
274
|
+
'.gif',
|
|
275
|
+
'avatar',
|
|
276
|
+
'TB1',
|
|
277
|
+
'atmosphere',
|
|
278
|
+
'storag-merlin',
|
|
279
|
+
'-2-tps-',
|
|
280
|
+
'shopmanager', // shop logo/management images
|
|
281
|
+
'O1CN01KsDwNS', // common UI element
|
|
282
|
+
'O1CN01Dqo1gd', // common UI element
|
|
283
|
+
'O1CN01z163bz', // badge/tag
|
|
284
|
+
'O1CN012pqGiT', // UI element
|
|
285
|
+
];
|
|
286
|
+
// Helper to check if URL is a UI image
|
|
287
|
+
function isUiImage(url) {
|
|
288
|
+
return TAOBAO_UI_PATTERNS.some((p) => url.includes(p));
|
|
289
|
+
}
|
|
290
|
+
// Helper to check if URL is a product image (from seller's store)
|
|
291
|
+
function isProductImage(url, sellerId) {
|
|
292
|
+
// Must be from alicdn
|
|
293
|
+
if (!url.includes('alicdn'))
|
|
294
|
+
return false;
|
|
295
|
+
// Filter out UI images
|
|
296
|
+
if (isUiImage(url))
|
|
297
|
+
return false;
|
|
298
|
+
// Product images typically have seller ID or O1CN pattern for product photos
|
|
299
|
+
if (sellerId && url.includes(sellerId))
|
|
300
|
+
return true;
|
|
301
|
+
// Main product images often have these patterns
|
|
302
|
+
if (url.includes('/bao/uploaded/') || url.includes('/imgextra/')) {
|
|
303
|
+
// But filter out small sizes (likely thumbnails/icons)
|
|
304
|
+
if (url.includes('-96-') || url.includes('-64-') || url.includes('-48-'))
|
|
305
|
+
return false;
|
|
306
|
+
return true;
|
|
307
|
+
}
|
|
308
|
+
return false;
|
|
309
|
+
}
|
|
310
|
+
// Taobao / Tmall
|
|
311
|
+
browserCommand
|
|
312
|
+
.command('taobao')
|
|
313
|
+
.description('Scrape product from Taobao/Tmall detail page')
|
|
314
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
315
|
+
.option('-o, --output <dir>', 'Output directory (default: ./taobao-<itemId>)')
|
|
316
|
+
.option('-d, --download', 'Download images and videos', false)
|
|
317
|
+
.action(async (options) => {
|
|
318
|
+
try {
|
|
319
|
+
if (!options.session) {
|
|
320
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
321
|
+
process.exit(1);
|
|
322
|
+
}
|
|
323
|
+
const fs = await import('fs');
|
|
324
|
+
const path = await import('path');
|
|
325
|
+
console.error('抓取页面信息...');
|
|
326
|
+
const pageInfo = await api.post('/api/browser/page-info', {
|
|
327
|
+
session: options.session,
|
|
328
|
+
});
|
|
329
|
+
// Validate URL
|
|
330
|
+
if (!pageInfo.url.includes('taobao.com') && !pageInfo.url.includes('tmall.com')) {
|
|
331
|
+
console.error(JSON.stringify({ error: '当前页面不是淘宝/天猫商品页' }));
|
|
332
|
+
process.exit(1);
|
|
333
|
+
}
|
|
334
|
+
const itemIdMatch = pageInfo.url.match(/id=(\d+)/);
|
|
335
|
+
const itemId = itemIdMatch ? itemIdMatch[1] : 'unknown';
|
|
336
|
+
const title = pageInfo.title.replace(/-淘宝网|-天猫.*$/g, '').trim();
|
|
337
|
+
if (itemId === 'unknown') {
|
|
338
|
+
console.error(JSON.stringify({ error: '无法从 URL 提取商品 ID' }));
|
|
339
|
+
process.exit(1);
|
|
340
|
+
}
|
|
341
|
+
const product = {
|
|
342
|
+
platform: 'taobao',
|
|
343
|
+
item_id: itemId,
|
|
344
|
+
title,
|
|
345
|
+
url: `https://item.taobao.com/item.htm?id=${itemId}`,
|
|
346
|
+
price: {},
|
|
347
|
+
images: [],
|
|
348
|
+
videos: [],
|
|
349
|
+
specs: {},
|
|
350
|
+
scraped_at: new Date().toISOString(),
|
|
351
|
+
};
|
|
352
|
+
// Price
|
|
353
|
+
console.error('抓取价格...');
|
|
354
|
+
const priceElements = await queryElements(options.session, "[class*='price'], [class*='Price']", undefined, 20);
|
|
355
|
+
for (const el of priceElements) {
|
|
356
|
+
const text = String(el.text || '');
|
|
357
|
+
const couponMatch = text.match(/券后[¥¥]?(\d+)/);
|
|
358
|
+
if (couponMatch)
|
|
359
|
+
product.price.current = parseInt(couponMatch[1], 10);
|
|
360
|
+
const origMatch = text.match(/优惠前[¥¥]?(\d+)/);
|
|
361
|
+
if (origMatch)
|
|
362
|
+
product.price.original = parseInt(origMatch[1], 10);
|
|
363
|
+
if (!product.price.current && !product.price.original) {
|
|
364
|
+
const priceMatch = text.match(/[¥¥](\d+)/);
|
|
365
|
+
if (priceMatch)
|
|
366
|
+
product.price.current = parseInt(priceMatch[1], 10);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Scroll to load lazy images (detail description images)
|
|
370
|
+
console.error('滚动页面加载详情图...');
|
|
371
|
+
for (let i = 0; i < 5; i++) {
|
|
372
|
+
await api.post('/api/browser/scroll', {
|
|
373
|
+
session: options.session,
|
|
374
|
+
direction: 'down',
|
|
375
|
+
distance: 1500,
|
|
376
|
+
});
|
|
377
|
+
await new Promise((r) => setTimeout(r, 500)); // wait for images to load
|
|
378
|
+
}
|
|
379
|
+
// Scroll back to top
|
|
380
|
+
await api.post('/api/browser/scroll', {
|
|
381
|
+
session: options.session,
|
|
382
|
+
direction: 'up',
|
|
383
|
+
distance: 10000,
|
|
384
|
+
});
|
|
385
|
+
// Extract seller ID from existing images for better filtering
|
|
386
|
+
let sellerId;
|
|
387
|
+
const testImages = await queryElements(options.session, 'img', ['@src'], 50);
|
|
388
|
+
for (const el of testImages) {
|
|
389
|
+
const src = String(el.src || '');
|
|
390
|
+
// Seller ID pattern: /i1/12345678/ or similar
|
|
391
|
+
const match = src.match(/\/i\d\/(\d{6,})\//);
|
|
392
|
+
if (match) {
|
|
393
|
+
sellerId = match[1];
|
|
394
|
+
console.error(`检测到卖家ID: ${sellerId}`);
|
|
395
|
+
break;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
// Images - with better filtering
|
|
399
|
+
console.error('抓取图片...');
|
|
400
|
+
const imageElements = await queryElements(options.session, 'img', ['@src', '@data-src'], 300);
|
|
401
|
+
const seenImages = new Set();
|
|
402
|
+
for (const el of imageElements) {
|
|
403
|
+
let src = String(el.src || el['data-src'] || '');
|
|
404
|
+
if (!src)
|
|
405
|
+
continue;
|
|
406
|
+
// Normalize URL
|
|
407
|
+
if (src.startsWith('//'))
|
|
408
|
+
src = 'https:' + src;
|
|
409
|
+
src = src.replace(/_\d+x\d+[^/]*$/, '').replace(/\?.*$/, '');
|
|
410
|
+
// Check if it's a product image
|
|
411
|
+
if (!isProductImage(src, sellerId))
|
|
412
|
+
continue;
|
|
413
|
+
// Dedupe and add
|
|
414
|
+
if (src.length > 40 && !seenImages.has(src)) {
|
|
415
|
+
seenImages.add(src);
|
|
416
|
+
product.images.push(src);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
// Also look for detail description images (often in iframes or specific containers)
|
|
420
|
+
console.error('抓取详情图...');
|
|
421
|
+
const detailImages = await queryElements(options.session, '#desc img, .detail-content img, [class*="descV8"] img, [class*="description"] img', ['@src', '@data-src'], 200);
|
|
422
|
+
for (const el of detailImages) {
|
|
423
|
+
let src = String(el.src || el['data-src'] || '');
|
|
424
|
+
if (!src || !src.includes('alicdn'))
|
|
425
|
+
continue;
|
|
426
|
+
if (src.startsWith('//'))
|
|
427
|
+
src = 'https:' + src;
|
|
428
|
+
src = src.replace(/\?.*$/, '');
|
|
429
|
+
if (src.length > 40 && !seenImages.has(src) && !isUiImage(src)) {
|
|
430
|
+
seenImages.add(src);
|
|
431
|
+
product.images.push(src);
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
// Videos
|
|
435
|
+
console.error('抓取视频...');
|
|
436
|
+
const videoElements = await queryElements(options.session, 'video, source', ['@src', '@data-src'], 20);
|
|
437
|
+
const seenVideos = new Set();
|
|
438
|
+
for (const el of videoElements) {
|
|
439
|
+
let src = String(el.src || el['data-src'] || '');
|
|
440
|
+
if (src && !seenVideos.has(src)) {
|
|
441
|
+
if (src.startsWith('//'))
|
|
442
|
+
src = 'https:' + src;
|
|
443
|
+
seenVideos.add(src);
|
|
444
|
+
product.videos.push(src);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
// Specs
|
|
448
|
+
console.error('抓取规格...');
|
|
449
|
+
const specElements = await queryElements(options.session, "[class*='sku'], [class*='Sku'], [class*='attr']", undefined, 50);
|
|
450
|
+
const specKeys = ['开关类型', '插头类型', '颜色分类', '尺寸', '材质', '品牌', '型号', '产地'];
|
|
451
|
+
for (const el of specElements) {
|
|
452
|
+
const text = String(el.text || '');
|
|
453
|
+
for (const key of specKeys) {
|
|
454
|
+
if (text.includes(key) && !product.specs[key]) {
|
|
455
|
+
const match = text.match(new RegExp(`${key}([^开插颜尺材品型产]+)`));
|
|
456
|
+
if (match)
|
|
457
|
+
product.specs[key] = match[1].trim().slice(0, 60);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
// Save
|
|
462
|
+
const outputDir = options.output || `./taobao-${itemId}`;
|
|
463
|
+
if (!fs.existsSync(outputDir))
|
|
464
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
465
|
+
fs.writeFileSync(path.join(outputDir, 'product.json'), JSON.stringify(product, null, 2));
|
|
466
|
+
// Download
|
|
467
|
+
if (options.download) {
|
|
468
|
+
console.error('下载媒体文件...');
|
|
469
|
+
const result = await downloadMedia(product, outputDir, 'https://item.taobao.com/');
|
|
470
|
+
console.error(`下载完成: ${result.images}/${product.images.length} 图片, ${result.videos}/${product.videos.length} 视频`);
|
|
471
|
+
}
|
|
472
|
+
console.log(JSON.stringify({
|
|
473
|
+
success: true,
|
|
474
|
+
platform: 'taobao',
|
|
475
|
+
item_id: product.item_id,
|
|
476
|
+
title: product.title,
|
|
477
|
+
price: product.price,
|
|
478
|
+
images_count: product.images.length,
|
|
479
|
+
videos_count: product.videos.length,
|
|
480
|
+
specs_count: Object.keys(product.specs).length,
|
|
481
|
+
output_dir: outputDir,
|
|
482
|
+
}));
|
|
483
|
+
}
|
|
484
|
+
catch (error) {
|
|
485
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
486
|
+
process.exit(1);
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
// 1688 (Alibaba China)
|
|
490
|
+
browserCommand
|
|
491
|
+
.command('1688')
|
|
492
|
+
.description('Scrape product from 1688.com detail page')
|
|
493
|
+
.option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
|
|
494
|
+
.option('-o, --output <dir>', 'Output directory (default: ./1688-<itemId>)')
|
|
495
|
+
.option('-d, --download', 'Download images and videos', false)
|
|
496
|
+
.action(async (options) => {
|
|
497
|
+
try {
|
|
498
|
+
if (!options.session) {
|
|
499
|
+
console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
|
|
500
|
+
process.exit(1);
|
|
501
|
+
}
|
|
502
|
+
const fs = await import('fs');
|
|
503
|
+
const path = await import('path');
|
|
504
|
+
console.error('抓取页面信息...');
|
|
505
|
+
const pageInfo = await api.post('/api/browser/page-info', {
|
|
506
|
+
session: options.session,
|
|
507
|
+
});
|
|
508
|
+
if (!pageInfo.url.includes('1688.com')) {
|
|
509
|
+
console.error(JSON.stringify({ error: '当前页面不是 1688 商品页' }));
|
|
510
|
+
process.exit(1);
|
|
511
|
+
}
|
|
512
|
+
// 1688 URL pattern: /offer/123456.html or offerId=123456
|
|
513
|
+
const itemIdMatch = pageInfo.url.match(/offer\/(\d+)\.html/) || pageInfo.url.match(/offerId=(\d+)/);
|
|
514
|
+
const itemId = itemIdMatch ? itemIdMatch[1] : 'unknown';
|
|
515
|
+
const title = pageInfo.title.replace(/-阿里巴巴|-1688\.com/g, '').trim();
|
|
516
|
+
if (itemId === 'unknown') {
|
|
517
|
+
console.error(JSON.stringify({ error: '无法从 URL 提取商品 ID' }));
|
|
518
|
+
process.exit(1);
|
|
519
|
+
}
|
|
520
|
+
const product = {
|
|
521
|
+
platform: '1688',
|
|
522
|
+
item_id: itemId,
|
|
523
|
+
title,
|
|
524
|
+
url: `https://detail.1688.com/offer/${itemId}.html`,
|
|
525
|
+
price: {},
|
|
526
|
+
images: [],
|
|
527
|
+
videos: [],
|
|
528
|
+
specs: {},
|
|
529
|
+
scraped_at: new Date().toISOString(),
|
|
530
|
+
};
|
|
531
|
+
// Price - 1688 uses different price patterns (阶梯价, 批发价)
|
|
532
|
+
console.error('抓取价格...');
|
|
533
|
+
const priceElements = await queryElements(options.session, "[class*='price'], [class*='Price']", undefined, 30);
|
|
534
|
+
for (const el of priceElements) {
|
|
535
|
+
const text = String(el.text || '');
|
|
536
|
+
// 1688 price patterns
|
|
537
|
+
const priceMatch = text.match(/[¥¥](\d+\.?\d*)/);
|
|
538
|
+
if (priceMatch && !product.price.current) {
|
|
539
|
+
product.price.current = parseFloat(priceMatch[1]);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
// Images - 1688 uses cbu01.alicdn.com
|
|
543
|
+
console.error('抓取图片...');
|
|
544
|
+
const imageElements = await queryElements(options.session, 'img', ['@src', '@data-src'], 200);
|
|
545
|
+
const seenImages = new Set();
|
|
546
|
+
for (const el of imageElements) {
|
|
547
|
+
let src = String(el.src || el['data-src'] || '');
|
|
548
|
+
if (!src.includes('alicdn') && !src.includes('1688.com'))
|
|
549
|
+
continue;
|
|
550
|
+
if (['icon', 'logo', '/tps/', '.gif', 'avatar'].some((x) => src.includes(x)))
|
|
551
|
+
continue;
|
|
552
|
+
src = src.replace(/_\d+x\d+[^/]*$/, '').replace(/\?.*$/, '');
|
|
553
|
+
if (src.startsWith('//'))
|
|
554
|
+
src = 'https:' + src;
|
|
555
|
+
if (src.length > 40 && !seenImages.has(src)) {
|
|
556
|
+
seenImages.add(src);
|
|
557
|
+
product.images.push(src);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
// Videos
|
|
561
|
+
console.error('抓取视频...');
|
|
562
|
+
const videoElements = await queryElements(options.session, 'video, source', ['@src', '@data-src'], 20);
|
|
563
|
+
const seenVideos = new Set();
|
|
564
|
+
for (const el of videoElements) {
|
|
565
|
+
let src = String(el.src || el['data-src'] || '');
|
|
566
|
+
if (src && !seenVideos.has(src)) {
|
|
567
|
+
if (src.startsWith('//'))
|
|
568
|
+
src = 'https:' + src;
|
|
569
|
+
seenVideos.add(src);
|
|
570
|
+
product.videos.push(src);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
// Specs
|
|
574
|
+
console.error('抓取规格...');
|
|
575
|
+
const specElements = await queryElements(options.session, "[class*='attr'], [class*='sku'], [class*='prop']", undefined, 50);
|
|
576
|
+
const specKeys = ['颜色', '尺码', '尺寸', '材质', '品牌', '型号', '产地', '货号'];
|
|
577
|
+
for (const el of specElements) {
|
|
578
|
+
const text = String(el.text || '');
|
|
579
|
+
for (const key of specKeys) {
|
|
580
|
+
if (text.includes(key) && !product.specs[key]) {
|
|
581
|
+
const match = text.match(new RegExp(`${key}[::]*([^颜尺材品型产货\\s]+)`));
|
|
582
|
+
if (match)
|
|
583
|
+
product.specs[key] = match[1].trim().slice(0, 60);
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
// Save
|
|
588
|
+
const outputDir = options.output || `./1688-${itemId}`;
|
|
589
|
+
if (!fs.existsSync(outputDir))
|
|
590
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
591
|
+
fs.writeFileSync(path.join(outputDir, 'product.json'), JSON.stringify(product, null, 2));
|
|
592
|
+
if (options.download) {
|
|
593
|
+
console.error('下载媒体文件...');
|
|
594
|
+
const result = await downloadMedia(product, outputDir, 'https://detail.1688.com/');
|
|
595
|
+
console.error(`下载完成: ${result.images}/${product.images.length} 图片, ${result.videos}/${product.videos.length} 视频`);
|
|
596
|
+
}
|
|
597
|
+
console.log(JSON.stringify({
|
|
598
|
+
success: true,
|
|
599
|
+
platform: '1688',
|
|
600
|
+
item_id: product.item_id,
|
|
601
|
+
title: product.title,
|
|
602
|
+
price: product.price,
|
|
603
|
+
images_count: product.images.length,
|
|
604
|
+
videos_count: product.videos.length,
|
|
605
|
+
specs_count: Object.keys(product.specs).length,
|
|
606
|
+
output_dir: outputDir,
|
|
607
|
+
}));
|
|
608
|
+
}
|
|
609
|
+
catch (error) {
|
|
610
|
+
console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
|
|
611
|
+
process.exit(1);
|
|
612
|
+
}
|
|
613
|
+
});
|
|
614
|
+
//# sourceMappingURL=browser.js.map
|