@dyyz1993/agent-browser 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +108 -0
  2. package/bin/agent-browser-darwin-arm64 +0 -0
  3. package/dist/__tests__/utils/free-port.d.ts +2 -0
  4. package/dist/__tests__/utils/free-port.d.ts.map +1 -0
  5. package/dist/__tests__/utils/free-port.js +18 -0
  6. package/dist/__tests__/utils/free-port.js.map +1 -0
  7. package/dist/__tests__/utils/parseCli.d.ts.map +1 -1
  8. package/dist/__tests__/utils/parseCli.js +0 -8
  9. package/dist/__tests__/utils/parseCli.js.map +1 -1
  10. package/dist/actions.d.ts.map +1 -1
  11. package/dist/actions.js +32 -12
  12. package/dist/actions.js.map +1 -1
  13. package/dist/browser.d.ts.map +1 -1
  14. package/dist/browser.js +12 -17
  15. package/dist/browser.js.map +1 -1
  16. package/dist/cli/commands.d.ts.map +1 -1
  17. package/dist/cli/commands.js +11 -13
  18. package/dist/cli/commands.js.map +1 -1
  19. package/dist/cli/connection.d.ts.map +1 -1
  20. package/dist/cli/connection.js +86 -47
  21. package/dist/cli/connection.js.map +1 -1
  22. package/dist/cli/flags.d.ts +1 -0
  23. package/dist/cli/flags.d.ts.map +1 -1
  24. package/dist/cli/flags.js +8 -1
  25. package/dist/cli/flags.js.map +1 -1
  26. package/dist/cli/help.d.ts.map +1 -1
  27. package/dist/cli/help.js +75 -23
  28. package/dist/cli/help.js.map +1 -1
  29. package/dist/cli/output.d.ts.map +1 -1
  30. package/dist/cli/output.js +0 -32
  31. package/dist/cli/output.js.map +1 -1
  32. package/dist/cli.js +150 -15
  33. package/dist/cli.js.map +1 -1
  34. package/dist/daemon.d.ts.map +1 -1
  35. package/dist/daemon.js +285 -280
  36. package/dist/daemon.js.map +1 -1
  37. package/dist/flow/exporters/cypress.d.ts +9 -0
  38. package/dist/flow/exporters/cypress.d.ts.map +1 -0
  39. package/dist/flow/exporters/cypress.js +256 -0
  40. package/dist/flow/exporters/cypress.js.map +1 -0
  41. package/dist/flow/exporters/index.d.ts +2 -0
  42. package/dist/flow/exporters/index.d.ts.map +1 -1
  43. package/dist/flow/exporters/index.js +2 -0
  44. package/dist/flow/exporters/index.js.map +1 -1
  45. package/dist/flow/exporters/selenium.d.ts +9 -0
  46. package/dist/flow/exporters/selenium.d.ts.map +1 -0
  47. package/dist/flow/exporters/selenium.js +298 -0
  48. package/dist/flow/exporters/selenium.js.map +1 -0
  49. package/dist/flow/flow-executor.d.ts +2 -0
  50. package/dist/flow/flow-executor.d.ts.map +1 -1
  51. package/dist/flow/flow-executor.js +143 -49
  52. package/dist/flow/flow-executor.js.map +1 -1
  53. package/dist/flow/index.d.ts +1 -1
  54. package/dist/flow/index.d.ts.map +1 -1
  55. package/dist/flow/index.js +1 -1
  56. package/dist/flow/index.js.map +1 -1
  57. package/dist/flow/output.js.map +1 -1
  58. package/dist/flow/plugin-system.d.ts.map +1 -1
  59. package/dist/flow/plugin-system.js.map +1 -1
  60. package/dist/flow/presets/console-capture.js +31 -0
  61. package/dist/flow/presets/fetch-capture.js +78 -0
  62. package/dist/flow/presets/sse-stream.js +67 -0
  63. package/dist/flow/presets/xhr-only.js +34 -0
  64. package/dist/flow/recorder-to-flow.js.map +1 -1
  65. package/dist/flow/site-manager.js.map +1 -1
  66. package/dist/flow/types.d.ts +15 -0
  67. package/dist/flow/types.d.ts.map +1 -1
  68. package/dist/flow/yaml-parser.d.ts.map +1 -1
  69. package/dist/flow/yaml-parser.js +2 -0
  70. package/dist/flow/yaml-parser.js.map +1 -1
  71. package/dist/human-mouse.d.ts.map +1 -1
  72. package/dist/protocol.d.ts.map +1 -1
  73. package/dist/protocol.js +1 -12
  74. package/dist/protocol.js.map +1 -1
  75. package/dist/rc-config.d.ts.map +1 -1
  76. package/dist/rc-config.js +1 -2
  77. package/dist/rc-config.js.map +1 -1
  78. package/dist/snapshot-store.d.ts +6 -0
  79. package/dist/snapshot-store.d.ts.map +1 -1
  80. package/dist/snapshot-store.js +15 -0
  81. package/dist/snapshot-store.js.map +1 -1
  82. package/dist/snapshot.d.ts.map +1 -1
  83. package/dist/snapshot.js +48 -30
  84. package/dist/snapshot.js.map +1 -1
  85. package/dist/stream-server-standalone.d.ts.map +1 -1
  86. package/dist/stream-server-standalone.js.map +1 -1
  87. package/dist/stream-server.d.ts.map +1 -1
  88. package/dist/stream-server.js +38 -13
  89. package/dist/stream-server.js.map +1 -1
  90. package/dist/test-live.js +5 -5
  91. package/dist/test-live.js.map +1 -1
  92. package/dist/types.d.ts +2 -10
  93. package/dist/types.d.ts.map +1 -1
  94. package/dist/types.js.map +1 -1
  95. package/dist/viewer-script.d.ts.map +1 -1
  96. package/dist/viewer-script.js +8 -2
  97. package/dist/viewer-script.js.map +1 -1
  98. package/package.json +12 -3
  99. package/scripts/check_goods_container.js +35 -0
  100. package/scripts/check_page_content.js +36 -0
  101. package/scripts/click_applause_rate.js +30 -0
  102. package/scripts/explore_jd_page.js +31 -0
  103. package/scripts/extract_all_jd_data.js +80 -0
  104. package/scripts/extract_jd_product_detail.js +62 -0
  105. package/scripts/extract_jd_products_correct_links.js +78 -0
  106. package/scripts/extract_jd_products_final.js +80 -0
  107. package/scripts/extract_jd_reviews.js +48 -0
  108. package/scripts/extract_jd_seafood_final.js +78 -0
  109. package/scripts/extract_multiple_products.js +77 -0
  110. package/scripts/extract_products_no_scroll.js +68 -0
  111. package/scripts/extract_products_simple.js +68 -0
  112. package/scripts/find_applause_rate.js +26 -0
  113. package/scripts/find_jd_links.js +28 -0
  114. package/scripts/find_main_content.js +20 -0
  115. package/scripts/find_product_cards.js +38 -0
  116. package/scripts/find_root_content.js +26 -0
  117. package/scripts/find_unique_products.js +55 -0
  118. package/scripts/get_jd_product_detail.js +16 -0
  119. package/scripts/get_jd_products.js +23 -0
  120. package/scripts/get_jd_seafood_products.js +44 -0
  121. package/scripts/get_product_details_from_images.js +54 -0
  122. package/scripts/verify-form.sh +67 -0
  123. package/scripts/verify-login.sh +65 -0
  124. package/scripts/verify-recording.sh +80 -0
  125. package/scripts/verify-upload.sh +41 -0
  126. package/skills/agent-browser/SKILL.md +49 -0
  127. package/bin/agent-browser-linux-x64 +0 -0
@@ -0,0 +1,36 @@
1
+ (() => {
2
+ const result = {
3
+ visibleElements: [],
4
+ linksWithProductInClass: []
5
+ };
6
+
7
+ const allElements = document.querySelectorAll('*');
8
+ let count = 0;
9
+
10
+ for (const el of allElements) {
11
+ if (count >= 50) break;
12
+
13
+ const className = el.className ? String(el.className) : '';
14
+ const text = el.textContent?.trim() || '';
15
+
16
+ if ((className.includes('product') || className.includes('item') || className.includes('card')) && text.length > 10) {
17
+ result.visibleElements.push({
18
+ tagName: el.tagName,
19
+ className: className.substring(0, 100),
20
+ text: text.substring(0, 100)
21
+ });
22
+ count++;
23
+ }
24
+ }
25
+
26
+ const productLinks = document.querySelectorAll('a[href*="item.jd"], a[href*="product.jd"]');
27
+ productLinks.forEach((link, index) => {
28
+ if (index >= 10) return;
29
+ result.linksWithProductInClass.push({
30
+ href: link.href.substring(0, 100),
31
+ text: link.textContent?.substring(0, 50)
32
+ });
33
+ });
34
+
35
+ return result;
36
+ })()
@@ -0,0 +1,30 @@
1
+ (async () => {
2
+ const applauseRateEl = document.querySelector('.applause-rate');
3
+
4
+ if (applauseRateEl) {
5
+ applauseRateEl.click();
6
+
7
+ await new Promise(resolve => setTimeout(resolve, 3000));
8
+
9
+ const result = {
10
+ elementFound: true,
11
+ elementText: applauseRateEl.textContent,
12
+ popup: null
13
+ };
14
+
15
+ const popup = document.querySelector('[class*="popup"], [class*="modal"], [class*="dialog"]');
16
+ if (popup) {
17
+ result.popup = {
18
+ className: popup.className,
19
+ text: popup.textContent?.substring(0, 500)
20
+ };
21
+ }
22
+
23
+ return result;
24
+ } else {
25
+ return {
26
+ elementFound: false,
27
+ message: 'applause-rate element not found'
28
+ };
29
+ }
30
+ })()
@@ -0,0 +1,31 @@
1
+ (() => {
2
+ const result = {
3
+ productContainers: [],
4
+ sampleProduct: null
5
+ };
6
+
7
+ const containers = document.querySelectorAll('[class*="product"], [class*="item"], [class*="card"]');
8
+
9
+ containers.forEach((el, index) => {
10
+ if (index < 5) {
11
+ result.productContainers.push({
12
+ className: el.className,
13
+ tagName: el.tagName,
14
+ innerHTML: el.innerHTML.substring(0, 200)
15
+ });
16
+ }
17
+ });
18
+
19
+ const links = document.querySelectorAll('a[href*="item.jd"]');
20
+ if (links.length > 0) {
21
+ const parent = links[0].closest('[class*="product"], [class*="item"], [class*="card"]');
22
+ if (parent) {
23
+ result.sampleProduct = {
24
+ className: parent.className,
25
+ innerHTML: parent.innerHTML.substring(0, 500)
26
+ };
27
+ }
28
+ }
29
+
30
+ return result;
31
+ })()
@@ -0,0 +1,80 @@
1
+ (() => {
2
+ const result = {
3
+ url: window.location.href,
4
+ title: document.title,
5
+ basic: {
6
+ title: '',
7
+ shop: '',
8
+ price: '',
9
+ sales: ''
10
+ },
11
+ reviews: {
12
+ total: '',
13
+ good_percent: '',
14
+ good: '',
15
+ neutral: '',
16
+ bad: ''
17
+ },
18
+ region: ''
19
+ };
20
+
21
+ const allText = document.body.textContent;
22
+
23
+ const titleMatch = allText.match(/首鲜道斑节虾[^¥]{0,200}/);
24
+ if (titleMatch) {
25
+ result.basic.title = titleMatch[0].trim().substring(0, 100);
26
+ }
27
+
28
+ const shopMatch = allText.match(/首鲜道旗舰店/);
29
+ if (shopMatch) {
30
+ result.basic.shop = shopMatch[0];
31
+ }
32
+
33
+ const priceMatch = allText.match(/[¥¥]\s*(\d+\.?\d*)/);
34
+ if (priceMatch) {
35
+ result.basic.price = priceMatch[1];
36
+ }
37
+
38
+ const totalReviewMatch = allText.match(/买家评价[((](\d+[+万])[))]/);
39
+ if (totalReviewMatch) {
40
+ result.reviews.total = totalReviewMatch[1];
41
+ }
42
+
43
+ const goodPercentMatch = allText.match(/超(\d+)%买家赞不绝口/);
44
+ if (goodPercentMatch) {
45
+ result.reviews.good_percent = goodPercentMatch[1];
46
+ }
47
+
48
+ const regionMatch = allText.match(/[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼]\s*[^\s]{0,10}(?:市|省|区|县|街道)/);
49
+ if (regionMatch) {
50
+ result.region = regionMatch[0].trim();
51
+ }
52
+
53
+ const filterItems = document.querySelectorAll('[class*="filter-item"], [class*="comment-item"]');
54
+ filterItems.forEach(item => {
55
+ const text = item.textContent || '';
56
+
57
+ if (text.includes('好评') || text.includes('positive')) {
58
+ const match = text.match(/(\d+)/);
59
+ if (match && !result.reviews.good) {
60
+ result.reviews.good = match[1];
61
+ }
62
+ }
63
+
64
+ if (text.includes('中评') || text.includes('neutral')) {
65
+ const match = text.match(/(\d+)/);
66
+ if (match && !result.reviews.neutral) {
67
+ result.reviews.neutral = match[1];
68
+ }
69
+ }
70
+
71
+ if (text.includes('差评') || text.includes('negative')) {
72
+ const match = text.match(/(\d+)/);
73
+ if (match && !result.reviews.bad) {
74
+ result.reviews.bad = match[1];
75
+ }
76
+ }
77
+ });
78
+
79
+ return result;
80
+ })()
@@ -0,0 +1,62 @@
1
+ (() => {
2
+ const result = {
3
+ url: window.location.href,
4
+ title: document.title,
5
+ price: '',
6
+ shop: '',
7
+ sales: '',
8
+ reviews: {
9
+ good: '',
10
+ neutral: '',
11
+ bad: ''
12
+ },
13
+ region: ''
14
+ };
15
+
16
+ const priceMatch = document.body.textContent.match(/[¥¥]\s*(\d+\.?\d*)/);
17
+ if (priceMatch) {
18
+ result.price = priceMatch[1];
19
+ }
20
+
21
+ const shopMatch = document.body.textContent.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
22
+ if (shopMatch) {
23
+ result.shop = shopMatch[1];
24
+ }
25
+
26
+ const reviewElements = document.querySelectorAll('[class*="comment"], [class*="review"], [class*="rating"]');
27
+
28
+ reviewElements.forEach(el => {
29
+ const text = el.textContent || '';
30
+
31
+ if (text.includes('好评') && !result.reviews.good) {
32
+ const match = text.match(/好评[::]\s*(\d+)/);
33
+ if (match) {
34
+ result.reviews.good = match[1];
35
+ }
36
+ }
37
+
38
+ if (text.includes('中评') && !result.reviews.neutral) {
39
+ const match = text.match(/中评[::]\s*(\d+)/);
40
+ if (match) {
41
+ result.reviews.neutral = match[1];
42
+ }
43
+ }
44
+
45
+ if (text.includes('差评') && !result.reviews.bad) {
46
+ const match = text.match(/差评[::]\s*(\d+)/);
47
+ if (match) {
48
+ result.reviews.bad = match[1];
49
+ }
50
+ }
51
+ });
52
+
53
+ const regionElements = document.querySelectorAll('[class*="address"], [class*="region"], [class*="location"]');
54
+ regionElements.forEach(el => {
55
+ const text = el.textContent || '';
56
+ if (!result.region && text.length > 0 && text.length < 100) {
57
+ result.region = text.trim();
58
+ }
59
+ });
60
+
61
+ return result;
62
+ })()
@@ -0,0 +1,78 @@
1
+ (async () => {
2
+ const result = {
3
+ keyword: "生鲜海鲜",
4
+ crawlTime: new Date().toISOString(),
5
+ products: []
6
+ };
7
+
8
+ window.scrollTo(0, 500);
9
+ await new Promise(resolve => setTimeout(resolve, 2000));
10
+
11
+ window.scrollTo(0, 2000);
12
+ await new Promise(resolve => setTimeout(resolve, 2000));
13
+
14
+ window.scrollTo(0, 4000);
15
+ await new Promise(resolve => setTimeout(resolve, 3000));
16
+
17
+ const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
18
+ const seenLinks = new Set();
19
+
20
+ containers.forEach((container, index) => {
21
+ const link = container.querySelector('a[href]');
22
+ if (!link) return;
23
+
24
+ const href = link.href;
25
+ if (seenLinks.has(href)) return;
26
+ seenLinks.add(href);
27
+
28
+ const text = container.textContent || '';
29
+ const product = {
30
+ id: index + 1,
31
+ link: href
32
+ };
33
+
34
+ const img = container.querySelector('img');
35
+ if (img) {
36
+ const src = img.getAttribute('data-src') || img.src;
37
+ if (src) product.image = src.substring(0, 150);
38
+ }
39
+
40
+ const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
41
+ if (priceMatch) {
42
+ product.price = priceMatch[1];
43
+ }
44
+
45
+ const salesMatch = text.match(/已售([\d\+万]+)/);
46
+ if (salesMatch) {
47
+ product.sales = salesMatch[1];
48
+ }
49
+
50
+ const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
51
+ if (shopMatch) {
52
+ product.shop = shopMatch[1];
53
+ }
54
+
55
+ const adIndex = text.indexOf('广告');
56
+ const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
57
+ let title = text.substring(titleStart).trim();
58
+
59
+ title = title.replace(/已售[\d\+万]+/, '');
60
+ title = title.replace(/[\d\+万]+人加购/, '');
61
+ title = title.replace(/[\d\+万]+人种草/, '');
62
+ title = title.replace(/[\d\+万]+人浏览/, '');
63
+ title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
64
+ title = title.replace(/搜同款|关注|对比/g, '');
65
+ title = title.trim();
66
+
67
+ if (title.length > 5) {
68
+ product.title = title.substring(0, 100);
69
+ }
70
+
71
+ if (product.title || product.price) {
72
+ result.products.push(product);
73
+ }
74
+ });
75
+
76
+ result.total = result.products.length;
77
+ return result;
78
+ })()
@@ -0,0 +1,80 @@
1
+ (async () => {
2
+ const result = {
3
+ keyword: "生鲜海鲜",
4
+ crawlTime: new Date().toISOString(),
5
+ products: []
6
+ };
7
+
8
+ window.scrollTo(0, 100);
9
+ await new Promise(resolve => setTimeout(resolve, 2000));
10
+
11
+ window.scrollTo(0, document.body.scrollHeight);
12
+ await new Promise(resolve => setTimeout(resolve, 3000));
13
+
14
+ const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
15
+
16
+ const seenLinks = new Set();
17
+
18
+ containers.forEach((container, index) => {
19
+ const link = container.querySelector('a[href]');
20
+ if (!link) return;
21
+
22
+ const href = link.href;
23
+ if (seenLinks.has(href)) return;
24
+ seenLinks.add(href);
25
+
26
+ const text = container.textContent || '';
27
+
28
+ const product = {
29
+ id: index + 1,
30
+ link: href
31
+ };
32
+
33
+ const img = container.querySelector('img');
34
+ if (img) {
35
+ const src = img.getAttribute('data-src') || img.src;
36
+ if (src) product.image = src.substring(0, 150);
37
+ }
38
+
39
+ const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
40
+ if (priceMatch) {
41
+ product.price = priceMatch[1];
42
+ }
43
+
44
+ const salesMatch = text.match(/已售([\d\+万]+)/);
45
+ if (salesMatch) {
46
+ product.sales = salesMatch[1];
47
+ }
48
+
49
+ const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
50
+ if (shopMatch) {
51
+ product.shop = shopMatch[1];
52
+ }
53
+
54
+ const adIndex = text.indexOf('广告');
55
+ const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
56
+ const titleText = text.substring(titleStart).trim();
57
+
58
+ const titleParts = titleText.split(/[¥¥]/);
59
+ if (titleParts.length > 0) {
60
+ let title = titleParts[0].trim();
61
+ title = title.replace(/已售[\d\+万]+/, '');
62
+ title = title.replace(/[\d\+万]+人加购/, '');
63
+ title = title.replace(/[\d\+万]+人种草/, '');
64
+ title = title.replace(/[\d\+万]+人浏览/, '');
65
+ title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
66
+ title = title.replace(/搜同款|关注|对比/g, '');
67
+ title = title.trim();
68
+
69
+ if (title.length > 5) {
70
+ product.title = title.substring(0, 100);
71
+ }
72
+ }
73
+
74
+ if (product.title || product.price) {
75
+ result.products.push(product);
76
+ }
77
+ });
78
+
79
+ return result;
80
+ })()
@@ -0,0 +1,48 @@
1
+ (() => {
2
+ const result = {
3
+ reviews: {
4
+ total: '',
5
+ good: '',
6
+ neutral: '',
7
+ bad: ''
8
+ },
9
+ allReviewText: []
10
+ };
11
+
12
+ const allText = document.body.textContent;
13
+
14
+ const totalMatch = allText.match(/买家评价[((](\d+[+万])[))]/);
15
+ if (totalMatch) {
16
+ result.reviews.total = totalMatch[1];
17
+ }
18
+
19
+ const reviewButtons = document.querySelectorAll('[class*="filter"], [class*="tab"]');
20
+
21
+ reviewButtons.forEach(btn => {
22
+ const text = btn.textContent || '';
23
+ result.allReviewText.push(text);
24
+
25
+ if (text.includes('好评') || text.includes('positive')) {
26
+ const match = text.match(/(\d+)/);
27
+ if (match) result.reviews.good = match[1];
28
+ }
29
+
30
+ if (text.includes('中评') || text.includes('neutral')) {
31
+ const match = text.match(/(\d+)/);
32
+ if (match) result.reviews.neutral = match[1];
33
+ }
34
+
35
+ if (text.includes('差评') || text.includes('negative')) {
36
+ const match = text.match(/(\d+)/);
37
+ if (match) result.reviews.bad = match[1];
38
+ }
39
+ });
40
+
41
+ const commentSummary = document.querySelector('[class*="comment-summary"], [class*="summary"]');
42
+ if (commentSummary) {
43
+ const summaryText = commentSummary.textContent || '';
44
+ result.allReviewText.push(summaryText);
45
+ }
46
+
47
+ return result;
48
+ })()
@@ -0,0 +1,78 @@
1
+ (async () => {
2
+ const result = {
3
+ keyword: "生鲜海鲜",
4
+ crawlTime: new Date().toISOString(),
5
+ products: []
6
+ };
7
+
8
+ window.scrollTo(0, 500);
9
+ await new Promise(resolve => setTimeout(resolve, 2000));
10
+
11
+ window.scrollTo(0, 2000);
12
+ await new Promise(resolve => setTimeout(resolve, 2000));
13
+
14
+ window.scrollTo(0, 4000);
15
+ await new Promise(resolve => setTimeout(resolve, 3000));
16
+
17
+ const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
18
+ const seenLinks = new Set();
19
+
20
+ containers.forEach((container, index) => {
21
+ const link = container.querySelector('a[href]');
22
+ if (!link) return;
23
+
24
+ const href = link.href;
25
+ if (seenLinks.has(href)) return;
26
+ seenLinks.add(href);
27
+
28
+ const text = container.textContent || '';
29
+ const product = {
30
+ id: index + 1,
31
+ link: href
32
+ };
33
+
34
+ const img = container.querySelector('img');
35
+ if (img) {
36
+ const src = img.getAttribute('data-src') || img.src;
37
+ if (src) product.image = src.substring(0, 150);
38
+ }
39
+
40
+ const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
41
+ if (priceMatch) {
42
+ product.price = priceMatch[1];
43
+ }
44
+
45
+ const salesMatch = text.match(/已售([\d\+万]+)/);
46
+ if (salesMatch) {
47
+ product.sales = salesMatch[1];
48
+ }
49
+
50
+ const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
51
+ if (shopMatch) {
52
+ product.shop = shopMatch[1];
53
+ }
54
+
55
+ const adIndex = text.indexOf('广告');
56
+ const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
57
+ let title = text.substring(titleStart).trim();
58
+
59
+ title = title.replace(/已售[\d\+万]+/, '');
60
+ title = title.replace(/[\d\+万]+人加购/, '');
61
+ title = title.replace(/[\d\+万]+人种草/, '');
62
+ title = title.replace(/[\d\+万]+人浏览/, '');
63
+ title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
64
+ title = title.replace(/搜同款|关注|对比/g, '');
65
+ title = title.trim();
66
+
67
+ if (title.length > 5) {
68
+ product.title = title.substring(0, 100);
69
+ }
70
+
71
+ if (product.title || product.price) {
72
+ result.products.push(product);
73
+ }
74
+ });
75
+
76
+ result.total = result.products.length;
77
+ return result;
78
+ })()
@@ -0,0 +1,77 @@
1
+ (async () => {
2
+ const result = {
3
+ keyword: "生鲜海鲜",
4
+ crawlTime: new Date().toISOString(),
5
+ products: []
6
+ };
7
+
8
+ window.scrollTo(0, 500);
9
+ await new Promise(resolve => setTimeout(resolve, 2000));
10
+
11
+ window.scrollTo(0, 2000);
12
+ await new Promise(resolve => setTimeout(resolve, 2000));
13
+
14
+ window.scrollTo(0, 4000);
15
+ await new Promise(resolve => setTimeout(resolve, 3000));
16
+
17
+ const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
18
+ const seenLinks = new Set();
19
+
20
+ containers.forEach((container, index) => {
21
+ const link = container.querySelector('a[href]');
22
+ if (!link) return;
23
+
24
+ const href = link.href;
25
+ if (seenLinks.has(href)) return;
26
+ seenLinks.add(href);
27
+
28
+ const text = container.textContent || '';
29
+ const product = {
30
+ id: index + 1,
31
+ link: href
32
+ };
33
+
34
+ const img = container.querySelector('img');
35
+ if (img) {
36
+ const src = img.getAttribute('data-src') || img.src;
37
+ if (src) product.image = src.substring(0, 150);
38
+ }
39
+
40
+ const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
41
+ if (priceMatch) {
42
+ product.price = priceMatch[1];
43
+ }
44
+
45
+ const salesMatch = text.match(/已售([\d\+万]+)/);
46
+ if (salesMatch) {
47
+ product.sales = salesMatch[1];
48
+ }
49
+
50
+ const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
51
+ if (shopMatch) {
52
+ product.shop = shopMatch[1];
53
+ }
54
+
55
+ const adIndex = text.indexOf('广告');
56
+ const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
57
+ let title = text.substring(titleStart).trim();
58
+
59
+ title = title.replace(/已售[\d\+万]+/, '');
60
+ title = title.replace(/[\d\+万]+人加购/, '');
61
+ title = title.replace(/[\d\+万]+人种草/, '');
62
+ title = title.replace(/[\d\+万]+人浏览/, '');
63
+ title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
64
+ title = title.replace(/搜同款|关注|对比/g, '');
65
+ title = title.trim();
66
+
67
+ if (title.length > 5) {
68
+ product.title = title.substring(0, 100);
69
+ }
70
+
71
+ if (product.title || product.price) {
72
+ result.products.push(product);
73
+ }
74
+ });
75
+
76
+ return result;
77
+ })()
@@ -0,0 +1,68 @@
1
+ (() => {
2
+ const result = {
3
+ keyword: "生鲜海鲜",
4
+ crawlTime: new Date().toISOString(),
5
+ products: []
6
+ };
7
+
8
+ const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
9
+ const seenLinks = new Set();
10
+
11
+ containers.forEach((container, index) => {
12
+ const link = container.querySelector('a[href]');
13
+ if (!link) return;
14
+
15
+ const href = link.href;
16
+ if (seenLinks.has(href)) return;
17
+ seenLinks.add(href);
18
+
19
+ const text = container.textContent || '';
20
+ const product = {
21
+ id: index + 1,
22
+ link: href
23
+ };
24
+
25
+ const img = container.querySelector('img');
26
+ if (img) {
27
+ const src = img.getAttribute('data-src') || img.src;
28
+ if (src) product.image = src.substring(0, 150);
29
+ }
30
+
31
+ const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
32
+ if (priceMatch) {
33
+ product.price = priceMatch[1];
34
+ }
35
+
36
+ const salesMatch = text.match(/已售([\d\+万]+)/);
37
+ if (salesMatch) {
38
+ product.sales = salesMatch[1];
39
+ }
40
+
41
+ const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
42
+ if (shopMatch) {
43
+ product.shop = shopMatch[1];
44
+ }
45
+
46
+ const adIndex = text.indexOf('广告');
47
+ const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
48
+ let title = text.substring(titleStart).trim();
49
+
50
+ title = title.replace(/已售[\d\+万]+/, '');
51
+ title = title.replace(/[\d\+万]+人加购/, '');
52
+ title = title.replace(/[\d\+万]+人种草/, '');
53
+ title = title.replace(/[\d\+万]+人浏览/, '');
54
+ title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
55
+ title = title.replace(/搜同款|关注|对比/g, '');
56
+ title = title.trim();
57
+
58
+ if (title.length > 5) {
59
+ product.title = title.substring(0, 100);
60
+ }
61
+
62
+ if (product.title || product.price) {
63
+ result.products.push(product);
64
+ }
65
+ });
66
+
67
+ return result;
68
+ })()