@optima-chat/scout-cli 0.1.11 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Optima Scout CLI
2
2
 
3
- AI-powered Amazon product research tool for Claude Code and LLMs.
3
+ AI-powered Amazon product research and 1688 supplier sourcing tool for Claude Code and LLMs.
4
4
 
5
5
  ## Installation
6
6
 
@@ -14,11 +14,14 @@ npm install -g @optima-chat/scout-cli
14
14
  # Initialize Claude Code skills
15
15
  scout init
16
16
 
17
- # Search products
17
+ # Search Amazon products
18
18
  scout search "coffee maker"
19
19
 
20
20
  # Get product details
21
21
  scout product B01GJOMWVA
22
+
23
+ # Search 1688 suppliers
24
+ scout supplier-search "咖啡机"
22
25
  ```
23
26
 
24
27
  ## Commands
@@ -57,6 +60,20 @@ Get detailed product information.
57
60
  scout product B004YAVF8I --domain amazon.com
58
61
  ```
59
62
 
63
+ ### `scout supplier-search <keyword>`
64
+
65
+ Search for suppliers on 1688.com.
66
+
67
+ **Options:**
68
+ - `-l, --limit <number>` - Result limit (default: `20`, max: `100`)
69
+ - `-f, --format <format>` - Output: `json` | `text`
70
+
71
+ **Example:**
72
+ ```bash
73
+ scout supplier-search "咖啡机" --limit 10
74
+ scout supplier-search "蓝牙耳机" --format json
75
+ ```
76
+
60
77
  ## Configuration
61
78
 
62
79
  Set API endpoint via environment variable:
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare const browserCommand: Command;
3
+ //# sourceMappingURL=browser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../../src/commands/browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,eAAO,MAAM,cAAc,SAC+B,CAAC"}
@@ -0,0 +1,614 @@
1
+ import { Command } from 'commander';
2
+ import { api } from '../utils/api.js';
3
+ export const browserCommand = new Command('browser')
4
+ .description('Interact with user browser via extension');
5
+ async function queryElements(session, selector, attrs, limit = 200) {
6
+ try {
7
+ const result = await api.post('/api/browser/query', {
8
+ session,
9
+ selector,
10
+ attributes: attrs,
11
+ limit,
12
+ });
13
+ return result;
14
+ }
15
+ catch {
16
+ return [];
17
+ }
18
+ }
19
+ async function downloadMedia(product, outputDir, referer) {
20
+ const fs = await import('fs');
21
+ const path = await import('path');
22
+ const https = await import('https');
23
+ const http = await import('http');
24
+ const downloadFile = (url, destPath) => {
25
+ return new Promise((resolve, reject) => {
26
+ const protocol = url.startsWith('https') ? https : http;
27
+ const file = fs.createWriteStream(destPath);
28
+ protocol
29
+ .get(url, { headers: { 'User-Agent': 'Mozilla/5.0', Referer: referer } }, (response) => {
30
+ if (response.statusCode === 200) {
31
+ response.pipe(file);
32
+ file.on('finish', () => {
33
+ file.close();
34
+ resolve();
35
+ });
36
+ }
37
+ else {
38
+ file.close();
39
+ fs.unlinkSync(destPath);
40
+ reject(new Error(`HTTP ${response.statusCode}`));
41
+ }
42
+ })
43
+ .on('error', (err) => {
44
+ file.close();
45
+ reject(err);
46
+ });
47
+ });
48
+ };
49
+ let imgSuccess = 0;
50
+ for (let i = 0; i < product.images.length; i++) {
51
+ const url = product.images[i];
52
+ const ext = url.includes('.webp') ? 'webp' : url.includes('.png') ? 'png' : 'jpg';
53
+ const imgPath = path.join(outputDir, `image_${String(i + 1).padStart(2, '0')}.${ext}`);
54
+ try {
55
+ await downloadFile(url, imgPath);
56
+ imgSuccess++;
57
+ console.error(` ✓ image_${String(i + 1).padStart(2, '0')}.${ext}`);
58
+ }
59
+ catch (e) {
60
+ console.error(` ✗ image_${String(i + 1).padStart(2, '0')}: ${e}`);
61
+ }
62
+ }
63
+ let vidSuccess = 0;
64
+ for (let i = 0; i < product.videos.length; i++) {
65
+ const url = product.videos[i];
66
+ const vidPath = path.join(outputDir, `video_${String(i + 1).padStart(2, '0')}.mp4`);
67
+ try {
68
+ await downloadFile(url, vidPath);
69
+ vidSuccess++;
70
+ console.error(` ✓ video_${String(i + 1).padStart(2, '0')}.mp4`);
71
+ }
72
+ catch (e) {
73
+ console.error(` ✗ video_${String(i + 1).padStart(2, '0')}: ${e}`);
74
+ }
75
+ }
76
+ return { images: imgSuccess, videos: vidSuccess };
77
+ }
78
+ // ============================================
79
+ // Basic browser commands
80
+ // ============================================
81
+ // Status check
82
+ browserCommand
83
+ .command('status')
84
+ .description('Check browser connection status')
85
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
86
+ .action(async (options) => {
87
+ try {
88
+ if (!options.session) {
89
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
90
+ process.exit(1);
91
+ }
92
+ const result = await api.get(`/api/browser/status?session=${options.session}`);
93
+ console.log(JSON.stringify(result, null, 2));
94
+ }
95
+ catch (error) {
96
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
97
+ process.exit(1);
98
+ }
99
+ });
100
+ // List connections
101
+ browserCommand
102
+ .command('connections')
103
+ .description('List all connected browsers')
104
+ .action(async () => {
105
+ try {
106
+ const result = await api.get('/api/browser/connections');
107
+ console.log(JSON.stringify(result, null, 2));
108
+ }
109
+ catch (error) {
110
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
111
+ process.exit(1);
112
+ }
113
+ });
114
+ // Screenshot
115
+ browserCommand
116
+ .command('screenshot')
117
+ .description('Capture current page screenshot and save to file')
118
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
119
+ .option('-f, --format <format>', 'Image format (png|jpeg)', 'png')
120
+ .option('-o, --output <path>', 'Output file path (default: ./screenshot-<timestamp>.<format>)')
121
+ .action(async (options) => {
122
+ try {
123
+ if (!options.session) {
124
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
125
+ process.exit(1);
126
+ }
127
+ const result = await api.post('/api/browser/screenshot', {
128
+ session: options.session,
129
+ format: options.format,
130
+ });
131
+ const base64Data = result.dataUrl.replace(/^data:image\/\w+;base64,/, '');
132
+ const buffer = Buffer.from(base64Data, 'base64');
133
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
134
+ const outputPath = options.output || `./screenshot-${timestamp}.${options.format}`;
135
+ const fs = await import('fs');
136
+ fs.writeFileSync(outputPath, buffer);
137
+ console.log(JSON.stringify({ success: true, path: outputPath, size: buffer.length }));
138
+ }
139
+ catch (error) {
140
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
141
+ process.exit(1);
142
+ }
143
+ });
144
+ // Query elements
145
+ browserCommand
146
+ .command('query <selector>')
147
+ .description('Query page elements using CSS selector')
148
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
149
+ .option('-a, --attributes <attrs>', 'Attributes to extract (comma-separated)')
150
+ .option('--schema <json>', 'Extraction schema (JSON format)')
151
+ .option('-l, --limit <n>', 'Maximum number of elements', parseInt)
152
+ .action(async (selector, options) => {
153
+ try {
154
+ if (!options.session) {
155
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
156
+ process.exit(1);
157
+ }
158
+ const result = await api.post('/api/browser/query', {
159
+ session: options.session,
160
+ selector,
161
+ attributes: options.attributes?.split(','),
162
+ schema: options.schema ? JSON.parse(options.schema) : undefined,
163
+ limit: options.limit,
164
+ });
165
+ console.log(JSON.stringify(result, null, 2));
166
+ }
167
+ catch (error) {
168
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
169
+ process.exit(1);
170
+ }
171
+ });
172
+ // Click element
173
+ browserCommand
174
+ .command('click <selector>')
175
+ .description('Click a page element')
176
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
177
+ .option('-i, --index <n>', 'Element index (when multiple matches)', (v) => parseInt(v, 10), 0)
178
+ .action(async (selector, options) => {
179
+ try {
180
+ if (!options.session) {
181
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
182
+ process.exit(1);
183
+ }
184
+ const result = await api.post('/api/browser/click', {
185
+ session: options.session,
186
+ selector,
187
+ index: options.index,
188
+ });
189
+ console.log(JSON.stringify(result));
190
+ }
191
+ catch (error) {
192
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
193
+ process.exit(1);
194
+ }
195
+ });
196
+ // Scroll page
197
+ browserCommand
198
+ .command('scroll')
199
+ .description('Scroll the page')
200
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
201
+ .option('-d, --direction <dir>', 'Direction: up/down/left/right', 'down')
202
+ .option('--distance <px>', 'Distance in pixels', (v) => parseInt(v, 10), 500)
203
+ .action(async (options) => {
204
+ try {
205
+ if (!options.session) {
206
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
207
+ process.exit(1);
208
+ }
209
+ const result = await api.post('/api/browser/scroll', {
210
+ session: options.session,
211
+ direction: options.direction,
212
+ distance: options.distance,
213
+ });
214
+ console.log(JSON.stringify(result));
215
+ }
216
+ catch (error) {
217
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
218
+ process.exit(1);
219
+ }
220
+ });
221
+ // Navigate
222
+ browserCommand
223
+ .command('navigate <url>')
224
+ .description('Navigate to a URL')
225
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
226
+ .action(async (url, options) => {
227
+ try {
228
+ if (!options.session) {
229
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
230
+ process.exit(1);
231
+ }
232
+ const result = await api.post('/api/browser/navigate', {
233
+ session: options.session,
234
+ url,
235
+ });
236
+ console.log(JSON.stringify(result));
237
+ }
238
+ catch (error) {
239
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
240
+ process.exit(1);
241
+ }
242
+ });
243
+ // Page info
244
+ browserCommand
245
+ .command('page-info')
246
+ .description('Get current page information')
247
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
248
+ .action(async (options) => {
249
+ try {
250
+ if (!options.session) {
251
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
252
+ process.exit(1);
253
+ }
254
+ const result = await api.post('/api/browser/page-info', {
255
+ session: options.session,
256
+ });
257
+ console.log(JSON.stringify(result, null, 2));
258
+ }
259
+ catch (error) {
260
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
261
+ process.exit(1);
262
+ }
263
+ });
264
+ // ============================================
265
+ // Platform-specific scrapers
266
+ // ============================================
267
+ // UI image patterns to filter out (Taobao/Tmall)
268
+ const TAOBAO_UI_PATTERNS = [
269
+ '/tps/',
270
+ 'tps-',
271
+ '-tps-',
272
+ 'icon',
273
+ 'logo',
274
+ '.gif',
275
+ 'avatar',
276
+ 'TB1',
277
+ 'atmosphere',
278
+ 'storag-merlin',
279
+ '-2-tps-',
280
+ 'shopmanager', // shop logo/management images
281
+ 'O1CN01KsDwNS', // common UI element
282
+ 'O1CN01Dqo1gd', // common UI element
283
+ 'O1CN01z163bz', // badge/tag
284
+ 'O1CN012pqGiT', // UI element
285
+ ];
286
+ // Helper to check if URL is a UI image
287
+ function isUiImage(url) {
288
+ return TAOBAO_UI_PATTERNS.some((p) => url.includes(p));
289
+ }
290
+ // Helper to check if URL is a product image (from seller's store)
291
+ function isProductImage(url, sellerId) {
292
+ // Must be from alicdn
293
+ if (!url.includes('alicdn'))
294
+ return false;
295
+ // Filter out UI images
296
+ if (isUiImage(url))
297
+ return false;
298
+ // Product images typically have seller ID or O1CN pattern for product photos
299
+ if (sellerId && url.includes(sellerId))
300
+ return true;
301
+ // Main product images often have these patterns
302
+ if (url.includes('/bao/uploaded/') || url.includes('/imgextra/')) {
303
+ // But filter out small sizes (likely thumbnails/icons)
304
+ if (url.includes('-96-') || url.includes('-64-') || url.includes('-48-'))
305
+ return false;
306
+ return true;
307
+ }
308
+ return false;
309
+ }
310
+ // Taobao / Tmall
311
+ browserCommand
312
+ .command('taobao')
313
+ .description('Scrape product from Taobao/Tmall detail page')
314
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
315
+ .option('-o, --output <dir>', 'Output directory (default: ./taobao-<itemId>)')
316
+ .option('-d, --download', 'Download images and videos', false)
317
+ .action(async (options) => {
318
+ try {
319
+ if (!options.session) {
320
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
321
+ process.exit(1);
322
+ }
323
+ const fs = await import('fs');
324
+ const path = await import('path');
325
+ console.error('抓取页面信息...');
326
+ const pageInfo = await api.post('/api/browser/page-info', {
327
+ session: options.session,
328
+ });
329
+ // Validate URL
330
+ if (!pageInfo.url.includes('taobao.com') && !pageInfo.url.includes('tmall.com')) {
331
+ console.error(JSON.stringify({ error: '当前页面不是淘宝/天猫商品页' }));
332
+ process.exit(1);
333
+ }
334
+ const itemIdMatch = pageInfo.url.match(/id=(\d+)/);
335
+ const itemId = itemIdMatch ? itemIdMatch[1] : 'unknown';
336
+ const title = pageInfo.title.replace(/-淘宝网|-天猫.*$/g, '').trim();
337
+ if (itemId === 'unknown') {
338
+ console.error(JSON.stringify({ error: '无法从 URL 提取商品 ID' }));
339
+ process.exit(1);
340
+ }
341
+ const product = {
342
+ platform: 'taobao',
343
+ item_id: itemId,
344
+ title,
345
+ url: `https://item.taobao.com/item.htm?id=${itemId}`,
346
+ price: {},
347
+ images: [],
348
+ videos: [],
349
+ specs: {},
350
+ scraped_at: new Date().toISOString(),
351
+ };
352
+ // Price
353
+ console.error('抓取价格...');
354
+ const priceElements = await queryElements(options.session, "[class*='price'], [class*='Price']", undefined, 20);
355
+ for (const el of priceElements) {
356
+ const text = String(el.text || '');
357
+ const couponMatch = text.match(/券后[¥¥]?(\d+)/);
358
+ if (couponMatch)
359
+ product.price.current = parseInt(couponMatch[1], 10);
360
+ const origMatch = text.match(/优惠前[¥¥]?(\d+)/);
361
+ if (origMatch)
362
+ product.price.original = parseInt(origMatch[1], 10);
363
+ if (!product.price.current && !product.price.original) {
364
+ const priceMatch = text.match(/[¥¥](\d+)/);
365
+ if (priceMatch)
366
+ product.price.current = parseInt(priceMatch[1], 10);
367
+ }
368
+ }
369
+ // Scroll to load lazy images (detail description images)
370
+ console.error('滚动页面加载详情图...');
371
+ for (let i = 0; i < 5; i++) {
372
+ await api.post('/api/browser/scroll', {
373
+ session: options.session,
374
+ direction: 'down',
375
+ distance: 1500,
376
+ });
377
+ await new Promise((r) => setTimeout(r, 500)); // wait for images to load
378
+ }
379
+ // Scroll back to top
380
+ await api.post('/api/browser/scroll', {
381
+ session: options.session,
382
+ direction: 'up',
383
+ distance: 10000,
384
+ });
385
+ // Extract seller ID from existing images for better filtering
386
+ let sellerId;
387
+ const testImages = await queryElements(options.session, 'img', ['@src'], 50);
388
+ for (const el of testImages) {
389
+ const src = String(el.src || '');
390
+ // Seller ID pattern: /i1/12345678/ or similar
391
+ const match = src.match(/\/i\d\/(\d{6,})\//);
392
+ if (match) {
393
+ sellerId = match[1];
394
+ console.error(`检测到卖家ID: ${sellerId}`);
395
+ break;
396
+ }
397
+ }
398
+ // Images - with better filtering
399
+ console.error('抓取图片...');
400
+ const imageElements = await queryElements(options.session, 'img', ['@src', '@data-src'], 300);
401
+ const seenImages = new Set();
402
+ for (const el of imageElements) {
403
+ let src = String(el.src || el['data-src'] || '');
404
+ if (!src)
405
+ continue;
406
+ // Normalize URL
407
+ if (src.startsWith('//'))
408
+ src = 'https:' + src;
409
+ src = src.replace(/_\d+x\d+[^/]*$/, '').replace(/\?.*$/, '');
410
+ // Check if it's a product image
411
+ if (!isProductImage(src, sellerId))
412
+ continue;
413
+ // Dedupe and add
414
+ if (src.length > 40 && !seenImages.has(src)) {
415
+ seenImages.add(src);
416
+ product.images.push(src);
417
+ }
418
+ }
419
+ // Also look for detail description images (often in iframes or specific containers)
420
+ console.error('抓取详情图...');
421
+ const detailImages = await queryElements(options.session, '#desc img, .detail-content img, [class*="descV8"] img, [class*="description"] img', ['@src', '@data-src'], 200);
422
+ for (const el of detailImages) {
423
+ let src = String(el.src || el['data-src'] || '');
424
+ if (!src || !src.includes('alicdn'))
425
+ continue;
426
+ if (src.startsWith('//'))
427
+ src = 'https:' + src;
428
+ src = src.replace(/\?.*$/, '');
429
+ if (src.length > 40 && !seenImages.has(src) && !isUiImage(src)) {
430
+ seenImages.add(src);
431
+ product.images.push(src);
432
+ }
433
+ }
434
+ // Videos
435
+ console.error('抓取视频...');
436
+ const videoElements = await queryElements(options.session, 'video, source', ['@src', '@data-src'], 20);
437
+ const seenVideos = new Set();
438
+ for (const el of videoElements) {
439
+ let src = String(el.src || el['data-src'] || '');
440
+ if (src && !seenVideos.has(src)) {
441
+ if (src.startsWith('//'))
442
+ src = 'https:' + src;
443
+ seenVideos.add(src);
444
+ product.videos.push(src);
445
+ }
446
+ }
447
+ // Specs
448
+ console.error('抓取规格...');
449
+ const specElements = await queryElements(options.session, "[class*='sku'], [class*='Sku'], [class*='attr']", undefined, 50);
450
+ const specKeys = ['开关类型', '插头类型', '颜色分类', '尺寸', '材质', '品牌', '型号', '产地'];
451
+ for (const el of specElements) {
452
+ const text = String(el.text || '');
453
+ for (const key of specKeys) {
454
+ if (text.includes(key) && !product.specs[key]) {
455
+ const match = text.match(new RegExp(`${key}([^开插颜尺材品型产]+)`));
456
+ if (match)
457
+ product.specs[key] = match[1].trim().slice(0, 60);
458
+ }
459
+ }
460
+ }
461
+ // Save
462
+ const outputDir = options.output || `./taobao-${itemId}`;
463
+ if (!fs.existsSync(outputDir))
464
+ fs.mkdirSync(outputDir, { recursive: true });
465
+ fs.writeFileSync(path.join(outputDir, 'product.json'), JSON.stringify(product, null, 2));
466
+ // Download
467
+ if (options.download) {
468
+ console.error('下载媒体文件...');
469
+ const result = await downloadMedia(product, outputDir, 'https://item.taobao.com/');
470
+ console.error(`下载完成: ${result.images}/${product.images.length} 图片, ${result.videos}/${product.videos.length} 视频`);
471
+ }
472
+ console.log(JSON.stringify({
473
+ success: true,
474
+ platform: 'taobao',
475
+ item_id: product.item_id,
476
+ title: product.title,
477
+ price: product.price,
478
+ images_count: product.images.length,
479
+ videos_count: product.videos.length,
480
+ specs_count: Object.keys(product.specs).length,
481
+ output_dir: outputDir,
482
+ }));
483
+ }
484
+ catch (error) {
485
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
486
+ process.exit(1);
487
+ }
488
+ });
489
+ // 1688 (Alibaba China)
490
+ browserCommand
491
+ .command('1688')
492
+ .description('Scrape product from 1688.com detail page')
493
+ .option('-s, --session <id>', 'Session ID', process.env.SESSION_ID)
494
+ .option('-o, --output <dir>', 'Output directory (default: ./1688-<itemId>)')
495
+ .option('-d, --download', 'Download images and videos', false)
496
+ .action(async (options) => {
497
+ try {
498
+ if (!options.session) {
499
+ console.error(JSON.stringify({ error: 'Session ID is required. Use --session or set SESSION_ID env var.' }));
500
+ process.exit(1);
501
+ }
502
+ const fs = await import('fs');
503
+ const path = await import('path');
504
+ console.error('抓取页面信息...');
505
+ const pageInfo = await api.post('/api/browser/page-info', {
506
+ session: options.session,
507
+ });
508
+ if (!pageInfo.url.includes('1688.com')) {
509
+ console.error(JSON.stringify({ error: '当前页面不是 1688 商品页' }));
510
+ process.exit(1);
511
+ }
512
+ // 1688 URL pattern: /offer/123456.html or offerId=123456
513
+ const itemIdMatch = pageInfo.url.match(/offer\/(\d+)\.html/) || pageInfo.url.match(/offerId=(\d+)/);
514
+ const itemId = itemIdMatch ? itemIdMatch[1] : 'unknown';
515
+ const title = pageInfo.title.replace(/-阿里巴巴|-1688\.com/g, '').trim();
516
+ if (itemId === 'unknown') {
517
+ console.error(JSON.stringify({ error: '无法从 URL 提取商品 ID' }));
518
+ process.exit(1);
519
+ }
520
+ const product = {
521
+ platform: '1688',
522
+ item_id: itemId,
523
+ title,
524
+ url: `https://detail.1688.com/offer/${itemId}.html`,
525
+ price: {},
526
+ images: [],
527
+ videos: [],
528
+ specs: {},
529
+ scraped_at: new Date().toISOString(),
530
+ };
531
+ // Price - 1688 uses different price patterns (阶梯价, 批发价)
532
+ console.error('抓取价格...');
533
+ const priceElements = await queryElements(options.session, "[class*='price'], [class*='Price']", undefined, 30);
534
+ for (const el of priceElements) {
535
+ const text = String(el.text || '');
536
+ // 1688 price patterns
537
+ const priceMatch = text.match(/[¥¥](\d+\.?\d*)/);
538
+ if (priceMatch && !product.price.current) {
539
+ product.price.current = parseFloat(priceMatch[1]);
540
+ }
541
+ }
542
+ // Images - 1688 uses cbu01.alicdn.com
543
+ console.error('抓取图片...');
544
+ const imageElements = await queryElements(options.session, 'img', ['@src', '@data-src'], 200);
545
+ const seenImages = new Set();
546
+ for (const el of imageElements) {
547
+ let src = String(el.src || el['data-src'] || '');
548
+ if (!src.includes('alicdn') && !src.includes('1688.com'))
549
+ continue;
550
+ if (['icon', 'logo', '/tps/', '.gif', 'avatar'].some((x) => src.includes(x)))
551
+ continue;
552
+ src = src.replace(/_\d+x\d+[^/]*$/, '').replace(/\?.*$/, '');
553
+ if (src.startsWith('//'))
554
+ src = 'https:' + src;
555
+ if (src.length > 40 && !seenImages.has(src)) {
556
+ seenImages.add(src);
557
+ product.images.push(src);
558
+ }
559
+ }
560
+ // Videos
561
+ console.error('抓取视频...');
562
+ const videoElements = await queryElements(options.session, 'video, source', ['@src', '@data-src'], 20);
563
+ const seenVideos = new Set();
564
+ for (const el of videoElements) {
565
+ let src = String(el.src || el['data-src'] || '');
566
+ if (src && !seenVideos.has(src)) {
567
+ if (src.startsWith('//'))
568
+ src = 'https:' + src;
569
+ seenVideos.add(src);
570
+ product.videos.push(src);
571
+ }
572
+ }
573
+ // Specs
574
+ console.error('抓取规格...');
575
+ const specElements = await queryElements(options.session, "[class*='attr'], [class*='sku'], [class*='prop']", undefined, 50);
576
+ const specKeys = ['颜色', '尺码', '尺寸', '材质', '品牌', '型号', '产地', '货号'];
577
+ for (const el of specElements) {
578
+ const text = String(el.text || '');
579
+ for (const key of specKeys) {
580
+ if (text.includes(key) && !product.specs[key]) {
581
+ const match = text.match(new RegExp(`${key}[::]*([^颜尺材品型产货\\s]+)`));
582
+ if (match)
583
+ product.specs[key] = match[1].trim().slice(0, 60);
584
+ }
585
+ }
586
+ }
587
+ // Save
588
+ const outputDir = options.output || `./1688-${itemId}`;
589
+ if (!fs.existsSync(outputDir))
590
+ fs.mkdirSync(outputDir, { recursive: true });
591
+ fs.writeFileSync(path.join(outputDir, 'product.json'), JSON.stringify(product, null, 2));
592
+ if (options.download) {
593
+ console.error('下载媒体文件...');
594
+ const result = await downloadMedia(product, outputDir, 'https://detail.1688.com/');
595
+ console.error(`下载完成: ${result.images}/${product.images.length} 图片, ${result.videos}/${product.videos.length} 视频`);
596
+ }
597
+ console.log(JSON.stringify({
598
+ success: true,
599
+ platform: '1688',
600
+ item_id: product.item_id,
601
+ title: product.title,
602
+ price: product.price,
603
+ images_count: product.images.length,
604
+ videos_count: product.videos.length,
605
+ specs_count: Object.keys(product.specs).length,
606
+ output_dir: outputDir,
607
+ }));
608
+ }
609
+ catch (error) {
610
+ console.error(JSON.stringify({ error: error instanceof Error ? error.message : String(error) }));
611
+ process.exit(1);
612
+ }
613
+ });
614
+ //# sourceMappingURL=browser.js.map