@dyyz1993/agent-browser 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/dist/__tests__/e2e/utils/test-helpers.d.ts +2 -2
- package/dist/__tests__/e2e/utils/test-helpers.d.ts.map +1 -1
- package/dist/__tests__/e2e/utils/test-helpers.js +6 -4
- package/dist/__tests__/e2e/utils/test-helpers.js.map +1 -1
- package/dist/actions/advanced.d.ts +73 -0
- package/dist/actions/advanced.d.ts.map +1 -0
- package/dist/actions/advanced.js +390 -0
- package/dist/actions/advanced.js.map +1 -0
- package/dist/actions/context.d.ts +36 -0
- package/dist/actions/context.d.ts.map +1 -0
- package/dist/actions/context.js +164 -0
- package/dist/actions/context.js.map +1 -0
- package/dist/actions/crawl.d.ts +8 -0
- package/dist/actions/crawl.d.ts.map +1 -0
- package/dist/actions/crawl.js +290 -0
- package/dist/actions/crawl.js.map +1 -0
- package/dist/actions/elements.d.ts +11 -0
- package/dist/actions/elements.d.ts.map +1 -0
- package/dist/actions/elements.js +78 -0
- package/dist/actions/elements.js.map +1 -0
- package/dist/actions/flow.d.ts +4 -0
- package/dist/actions/flow.d.ts.map +1 -0
- package/dist/actions/flow.js +170 -0
- package/dist/actions/flow.js.map +1 -0
- package/dist/actions/index.d.ts +7 -0
- package/dist/actions/index.d.ts.map +1 -0
- package/dist/actions/index.js +323 -0
- package/dist/actions/index.js.map +1 -0
- package/dist/actions/interact.d.ts +4 -0
- package/dist/actions/interact.d.ts.map +1 -0
- package/dist/actions/interact.js +162 -0
- package/dist/actions/interact.js.map +1 -0
- package/dist/actions/interaction.d.ts +31 -0
- package/dist/actions/interaction.d.ts.map +1 -0
- package/dist/actions/interaction.js +477 -0
- package/dist/actions/interaction.js.map +1 -0
- package/dist/actions/locators.d.ts +14 -0
- package/dist/actions/locators.d.ts.map +1 -0
- package/dist/actions/locators.js +310 -0
- package/dist/actions/locators.js.map +1 -0
- package/dist/actions/map.d.ts +4 -0
- package/dist/actions/map.d.ts.map +1 -0
- package/dist/actions/map.js +79 -0
- package/dist/actions/map.js.map +1 -0
- package/dist/actions/meta.d.ts +44 -0
- package/dist/actions/meta.d.ts.map +1 -0
- package/dist/actions/meta.js +190 -0
- package/dist/actions/meta.js.map +1 -0
- package/dist/actions/mouse.d.ts +8 -0
- package/dist/actions/mouse.d.ts.map +1 -0
- package/dist/actions/mouse.js +52 -0
- package/dist/actions/mouse.js.map +1 -0
- package/dist/actions/recorder.d.ts +20 -0
- package/dist/actions/recorder.d.ts.map +1 -0
- package/dist/actions/recorder.js +231 -0
- package/dist/actions/recorder.js.map +1 -0
- package/dist/actions/recording.d.ts +6 -0
- package/dist/actions/recording.d.ts.map +1 -0
- package/dist/actions/recording.js +22 -0
- package/dist/actions/recording.js.map +1 -0
- package/dist/actions/scrape.d.ts +10 -0
- package/dist/actions/scrape.d.ts.map +1 -0
- package/dist/actions/scrape.js +39 -0
- package/dist/actions/scrape.js.map +1 -0
- package/dist/actions/screencast.d.ts +8 -0
- package/dist/actions/screencast.d.ts.map +1 -0
- package/dist/actions/screencast.js +56 -0
- package/dist/actions/screencast.js.map +1 -0
- package/dist/actions/search.d.ts +4 -0
- package/dist/actions/search.d.ts.map +1 -0
- package/dist/actions/search.js +129 -0
- package/dist/actions/search.js.map +1 -0
- package/dist/actions/storage.d.ts +14 -0
- package/dist/actions/storage.d.ts.map +1 -0
- package/dist/actions/storage.js +63 -0
- package/dist/actions/storage.js.map +1 -0
- package/dist/actions/tabs.d.ts +16 -0
- package/dist/actions/tabs.d.ts.map +1 -0
- package/dist/actions/tabs.js +47 -0
- package/dist/actions/tabs.js.map +1 -0
- package/dist/actions/utils.d.ts +15 -0
- package/dist/actions/utils.d.ts.map +1 -0
- package/dist/actions/utils.js +234 -0
- package/dist/actions/utils.js.map +1 -0
- package/dist/browser/browser-manager.d.ts +249 -0
- package/dist/browser/browser-manager.d.ts.map +1 -0
- package/dist/browser/browser-manager.js +1251 -0
- package/dist/browser/browser-manager.js.map +1 -0
- package/dist/browser/index.d.ts +3 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +2 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/network-tracker.d.ts +39 -0
- package/dist/browser/network-tracker.d.ts.map +1 -0
- package/dist/browser/network-tracker.js +287 -0
- package/dist/browser/network-tracker.js.map +1 -0
- package/dist/browser/providers.d.ts +27 -0
- package/dist/browser/providers.d.ts.map +1 -0
- package/dist/browser/providers.js +293 -0
- package/dist/browser/providers.js.map +1 -0
- package/dist/browser/recorder-manager.d.ts +69 -0
- package/dist/browser/recorder-manager.d.ts.map +1 -0
- package/dist/browser/recorder-manager.js +755 -0
- package/dist/browser/recorder-manager.js.map +1 -0
- package/dist/browser/recording-manager.d.ts +46 -0
- package/dist/browser/recording-manager.d.ts.map +1 -0
- package/dist/browser/recording-manager.js +156 -0
- package/dist/browser/recording-manager.js.map +1 -0
- package/dist/browser/screencast-manager.d.ts +49 -0
- package/dist/browser/screencast-manager.d.ts.map +1 -0
- package/dist/browser/screencast-manager.js +131 -0
- package/dist/browser/screencast-manager.js.map +1 -0
- package/dist/browser/types.d.ts +101 -0
- package/dist/browser/types.d.ts.map +1 -0
- package/dist/browser/types.js +2 -0
- package/dist/browser/types.js.map +1 -0
- package/dist/browser-events.d.ts +25 -0
- package/dist/browser-events.d.ts.map +1 -0
- package/dist/browser-events.js +15 -0
- package/dist/browser-events.js.map +1 -0
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +145 -1
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/connection.d.ts.map +1 -1
- package/dist/cli/connection.js +15 -22
- package/dist/cli/connection.js.map +1 -1
- package/dist/cli/flags.d.ts +1 -0
- package/dist/cli/flags.d.ts.map +1 -1
- package/dist/cli/flags.js +8 -0
- package/dist/cli/flags.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +204 -4
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/output.d.ts.map +1 -1
- package/dist/cli/output.js +72 -0
- package/dist/cli/output.js.map +1 -1
- package/dist/cli.js +149 -14
- package/dist/cli.js.map +1 -1
- package/dist/daemon.d.ts +1 -1
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +12 -13
- package/dist/daemon.js.map +1 -1
- package/dist/flow/exporters/playwright.d.ts +23 -1
- package/dist/flow/exporters/playwright.d.ts.map +1 -1
- package/dist/flow/exporters/playwright.js +333 -85
- package/dist/flow/exporters/playwright.js.map +1 -1
- package/dist/flow/exporters/python.d.ts +22 -0
- package/dist/flow/exporters/python.d.ts.map +1 -1
- package/dist/flow/exporters/python.js +325 -74
- package/dist/flow/exporters/python.js.map +1 -1
- package/dist/flow/exporters/selenium.d.ts.map +1 -1
- package/dist/flow/exporters/selenium.js +0 -1
- package/dist/flow/exporters/selenium.js.map +1 -1
- package/dist/flow/flow-executor.d.ts +1 -1
- package/dist/flow/flow-executor.d.ts.map +1 -1
- package/dist/flow/flow-executor.js +11 -11
- package/dist/flow/flow-executor.js.map +1 -1
- package/dist/flow/output.js.map +1 -1
- package/dist/flow/plugin-system.d.ts +1 -1
- package/dist/flow/plugin-system.d.ts.map +1 -1
- package/dist/flow/plugin-system.js +2 -2
- package/dist/flow/plugin-system.js.map +1 -1
- package/dist/flow/plugins/logging-plugin.js +1 -1
- package/dist/flow/plugins/logging-plugin.js.map +1 -1
- package/dist/flow/presets/console-capture.js +50 -0
- package/dist/flow/presets/fetch-capture.js +107 -0
- package/dist/flow/presets/sse-stream.js +85 -0
- package/dist/flow/presets/xhr-only.js +44 -0
- package/dist/flow/recorder-to-flow.d.ts.map +1 -1
- package/dist/flow/recorder-to-flow.js +1 -3
- package/dist/flow/recorder-to-flow.js.map +1 -1
- package/dist/flow/site-manager.d.ts.map +1 -1
- package/dist/flow/site-manager.js +6 -2
- package/dist/flow/site-manager.js.map +1 -1
- package/dist/human-mouse.d.ts +1 -1
- package/dist/human-mouse.d.ts.map +1 -1
- package/dist/human-mouse.js +2 -2
- package/dist/human-mouse.js.map +1 -1
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +91 -1
- package/dist/protocol.js.map +1 -1
- package/dist/rc-config.js +4 -4
- package/dist/rc-config.js.map +1 -1
- package/dist/recorder/inject.js +31 -5
- package/dist/snapshot.d.ts.map +1 -1
- package/dist/snapshot.js +3 -4
- package/dist/snapshot.js.map +1 -1
- package/dist/stream-server-standalone.d.ts +1 -1
- package/dist/stream-server-standalone.d.ts.map +1 -1
- package/dist/stream-server-standalone.js +42 -23
- package/dist/stream-server-standalone.js.map +1 -1
- package/dist/stream-server.d.ts +1 -1
- package/dist/stream-server.d.ts.map +1 -1
- package/dist/stream-server.js +26 -21
- package/dist/stream-server.js.map +1 -1
- package/dist/test-live.js +9 -3
- package/dist/test-live.js.map +1 -1
- package/dist/types.d.ts +123 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +4 -3
- package/scripts/README.md +66 -0
- package/scripts/check_goods_container.js +35 -0
- package/scripts/check_page_content.js +36 -0
- package/scripts/click_applause_rate.js +30 -0
- package/scripts/copy-flow-presets.js +25 -0
- package/scripts/douyin-flow-test.sh +72 -0
- package/scripts/douyin-test.sh +101 -0
- package/scripts/explore_jd_page.js +31 -0
- package/scripts/extract_all_jd_data.js +80 -0
- package/scripts/extract_jd_product_detail.js +62 -0
- package/scripts/extract_jd_products_correct_links.js +78 -0
- package/scripts/extract_jd_products_final.js +80 -0
- package/scripts/extract_jd_reviews.js +48 -0
- package/scripts/extract_jd_seafood_final.js +78 -0
- package/scripts/extract_multiple_products.js +77 -0
- package/scripts/extract_products_no_scroll.js +68 -0
- package/scripts/extract_products_simple.js +68 -0
- package/scripts/find_applause_rate.js +26 -0
- package/scripts/find_jd_links.js +28 -0
- package/scripts/find_main_content.js +20 -0
- package/scripts/find_product_cards.js +38 -0
- package/scripts/find_root_content.js +26 -0
- package/scripts/find_unique_products.js +55 -0
- package/scripts/get_jd_product_detail.js +16 -0
- package/scripts/get_jd_products.js +23 -0
- package/scripts/get_jd_seafood_products.js +44 -0
- package/scripts/get_product_details_from_images.js +54 -0
- package/scripts/verify-form.sh +67 -0
- package/scripts/verify-login.sh +65 -0
- package/scripts/verify-recording.sh +80 -0
- package/scripts/verify-upload.sh +41 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
- package/dist/actions.d.ts +0 -51
- package/dist/actions.d.ts.map +0 -1
- package/dist/actions.js +0 -2662
- package/dist/actions.js.map +0 -1
- package/dist/browser.d.ts +0 -651
- package/dist/browser.d.ts.map +0 -1
- package/dist/browser.js +0 -3088
- package/dist/browser.js.map +0 -1
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
url: window.location.href,
|
|
4
|
+
title: document.title,
|
|
5
|
+
price: '',
|
|
6
|
+
shop: '',
|
|
7
|
+
sales: '',
|
|
8
|
+
reviews: {
|
|
9
|
+
good: '',
|
|
10
|
+
neutral: '',
|
|
11
|
+
bad: ''
|
|
12
|
+
},
|
|
13
|
+
region: ''
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const priceMatch = document.body.textContent.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
17
|
+
if (priceMatch) {
|
|
18
|
+
result.price = priceMatch[1];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const shopMatch = document.body.textContent.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
22
|
+
if (shopMatch) {
|
|
23
|
+
result.shop = shopMatch[1];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const reviewElements = document.querySelectorAll('[class*="comment"], [class*="review"], [class*="rating"]');
|
|
27
|
+
|
|
28
|
+
reviewElements.forEach(el => {
|
|
29
|
+
const text = el.textContent || '';
|
|
30
|
+
|
|
31
|
+
if (text.includes('好评') && !result.reviews.good) {
|
|
32
|
+
const match = text.match(/好评[::]\s*(\d+)/);
|
|
33
|
+
if (match) {
|
|
34
|
+
result.reviews.good = match[1];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (text.includes('中评') && !result.reviews.neutral) {
|
|
39
|
+
const match = text.match(/中评[::]\s*(\d+)/);
|
|
40
|
+
if (match) {
|
|
41
|
+
result.reviews.neutral = match[1];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (text.includes('差评') && !result.reviews.bad) {
|
|
46
|
+
const match = text.match(/差评[::]\s*(\d+)/);
|
|
47
|
+
if (match) {
|
|
48
|
+
result.reviews.bad = match[1];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const regionElements = document.querySelectorAll('[class*="address"], [class*="region"], [class*="location"]');
|
|
54
|
+
regionElements.forEach(el => {
|
|
55
|
+
const text = el.textContent || '';
|
|
56
|
+
if (!result.region && text.length > 0 && text.length < 100) {
|
|
57
|
+
result.region = text.trim();
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
return result;
|
|
62
|
+
})()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
result.total = result.products.length;
|
|
77
|
+
return result;
|
|
78
|
+
})()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 100);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
13
|
+
|
|
14
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
15
|
+
|
|
16
|
+
const seenLinks = new Set();
|
|
17
|
+
|
|
18
|
+
containers.forEach((container, index) => {
|
|
19
|
+
const link = container.querySelector('a[href]');
|
|
20
|
+
if (!link) return;
|
|
21
|
+
|
|
22
|
+
const href = link.href;
|
|
23
|
+
if (seenLinks.has(href)) return;
|
|
24
|
+
seenLinks.add(href);
|
|
25
|
+
|
|
26
|
+
const text = container.textContent || '';
|
|
27
|
+
|
|
28
|
+
const product = {
|
|
29
|
+
id: index + 1,
|
|
30
|
+
link: href
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const img = container.querySelector('img');
|
|
34
|
+
if (img) {
|
|
35
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
36
|
+
if (src) product.image = src.substring(0, 150);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
40
|
+
if (priceMatch) {
|
|
41
|
+
product.price = priceMatch[1];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
45
|
+
if (salesMatch) {
|
|
46
|
+
product.sales = salesMatch[1];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
50
|
+
if (shopMatch) {
|
|
51
|
+
product.shop = shopMatch[1];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const adIndex = text.indexOf('广告');
|
|
55
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
56
|
+
const titleText = text.substring(titleStart).trim();
|
|
57
|
+
|
|
58
|
+
const titleParts = titleText.split(/[¥¥]/);
|
|
59
|
+
if (titleParts.length > 0) {
|
|
60
|
+
let title = titleParts[0].trim();
|
|
61
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
63
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
64
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
65
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
66
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
67
|
+
title = title.trim();
|
|
68
|
+
|
|
69
|
+
if (title.length > 5) {
|
|
70
|
+
product.title = title.substring(0, 100);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (product.title || product.price) {
|
|
75
|
+
result.products.push(product);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
return result;
|
|
80
|
+
})()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
reviews: {
|
|
4
|
+
total: '',
|
|
5
|
+
good: '',
|
|
6
|
+
neutral: '',
|
|
7
|
+
bad: ''
|
|
8
|
+
},
|
|
9
|
+
allReviewText: []
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const allText = document.body.textContent;
|
|
13
|
+
|
|
14
|
+
const totalMatch = allText.match(/买家评价[((](\d+[+万])[))]/);
|
|
15
|
+
if (totalMatch) {
|
|
16
|
+
result.reviews.total = totalMatch[1];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const reviewButtons = document.querySelectorAll('[class*="filter"], [class*="tab"]');
|
|
20
|
+
|
|
21
|
+
reviewButtons.forEach(btn => {
|
|
22
|
+
const text = btn.textContent || '';
|
|
23
|
+
result.allReviewText.push(text);
|
|
24
|
+
|
|
25
|
+
if (text.includes('好评') || text.includes('positive')) {
|
|
26
|
+
const match = text.match(/(\d+)/);
|
|
27
|
+
if (match) result.reviews.good = match[1];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (text.includes('中评') || text.includes('neutral')) {
|
|
31
|
+
const match = text.match(/(\d+)/);
|
|
32
|
+
if (match) result.reviews.neutral = match[1];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (text.includes('差评') || text.includes('negative')) {
|
|
36
|
+
const match = text.match(/(\d+)/);
|
|
37
|
+
if (match) result.reviews.bad = match[1];
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const commentSummary = document.querySelector('[class*="comment-summary"], [class*="summary"]');
|
|
42
|
+
if (commentSummary) {
|
|
43
|
+
const summaryText = commentSummary.textContent || '';
|
|
44
|
+
result.allReviewText.push(summaryText);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return result;
|
|
48
|
+
})()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
result.total = result.products.length;
|
|
77
|
+
return result;
|
|
78
|
+
})()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
return result;
|
|
77
|
+
})()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
+
const seenLinks = new Set();
|
|
10
|
+
|
|
11
|
+
containers.forEach((container, index) => {
|
|
12
|
+
const link = container.querySelector('a[href]');
|
|
13
|
+
if (!link) return;
|
|
14
|
+
|
|
15
|
+
const href = link.href;
|
|
16
|
+
if (seenLinks.has(href)) return;
|
|
17
|
+
seenLinks.add(href);
|
|
18
|
+
|
|
19
|
+
const text = container.textContent || '';
|
|
20
|
+
const product = {
|
|
21
|
+
id: index + 1,
|
|
22
|
+
link: href
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const img = container.querySelector('img');
|
|
26
|
+
if (img) {
|
|
27
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
28
|
+
if (src) product.image = src.substring(0, 150);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
32
|
+
if (priceMatch) {
|
|
33
|
+
product.price = priceMatch[1];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
37
|
+
if (salesMatch) {
|
|
38
|
+
product.sales = salesMatch[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
42
|
+
if (shopMatch) {
|
|
43
|
+
product.shop = shopMatch[1];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const adIndex = text.indexOf('广告');
|
|
47
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
48
|
+
let title = text.substring(titleStart).trim();
|
|
49
|
+
|
|
50
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
51
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
52
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
53
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
54
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
55
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
56
|
+
title = title.trim();
|
|
57
|
+
|
|
58
|
+
if (title.length > 5) {
|
|
59
|
+
product.title = title.substring(0, 100);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (product.title || product.price) {
|
|
63
|
+
result.products.push(product);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return result;
|
|
68
|
+
})()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
+
const seenLinks = new Set();
|
|
10
|
+
|
|
11
|
+
containers.forEach((container, index) => {
|
|
12
|
+
const link = container.querySelector('a[href]');
|
|
13
|
+
if (!link) return;
|
|
14
|
+
|
|
15
|
+
const href = link.href;
|
|
16
|
+
if (seenLinks.has(href)) return;
|
|
17
|
+
seenLinks.add(href);
|
|
18
|
+
|
|
19
|
+
const text = container.textContent || '';
|
|
20
|
+
const product = {
|
|
21
|
+
id: index + 1,
|
|
22
|
+
link: href
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const img = container.querySelector('img');
|
|
26
|
+
if (img) {
|
|
27
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
28
|
+
if (src) product.image = src.substring(0, 150);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
32
|
+
if (priceMatch) {
|
|
33
|
+
product.price = priceMatch[1];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
37
|
+
if (salesMatch) {
|
|
38
|
+
product.sales = salesMatch[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
42
|
+
if (shopMatch) {
|
|
43
|
+
product.shop = shopMatch[1];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const adIndex = text.indexOf('广告');
|
|
47
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
48
|
+
let title = text.substring(titleStart).trim();
|
|
49
|
+
|
|
50
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
51
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
52
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
53
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
54
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
55
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
56
|
+
title = title.trim();
|
|
57
|
+
|
|
58
|
+
if (title.length > 5) {
|
|
59
|
+
product.title = title.substring(0, 100);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (product.title || product.price) {
|
|
63
|
+
result.products.push(product);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return result;
|
|
68
|
+
})()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const applauseRateEl = document.querySelector('.applause-rate');
|
|
3
|
+
|
|
4
|
+
if (applauseRateEl) {
|
|
5
|
+
return {
|
|
6
|
+
elementFound: true,
|
|
7
|
+
elementText: applauseRateEl.textContent,
|
|
8
|
+
elementHTML: applauseRateEl.outerHTML.substring(0, 500)
|
|
9
|
+
};
|
|
10
|
+
} else {
|
|
11
|
+
const allElements = document.querySelectorAll('[class*="applause"], [class*="rate"]');
|
|
12
|
+
const elements = [];
|
|
13
|
+
allElements.forEach(el => {
|
|
14
|
+
elements.push({
|
|
15
|
+
className: el.className,
|
|
16
|
+
text: el.textContent?.substring(0, 100)
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
elementFound: false,
|
|
22
|
+
message: 'applause-rate element not found',
|
|
23
|
+
similarElements: elements.slice(0, 10)
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
})()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
allLinks: [],
|
|
4
|
+
jdLinks: [],
|
|
5
|
+
productLinks: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const links = document.querySelectorAll('a[href*="jd"]');
|
|
9
|
+
|
|
10
|
+
links.forEach((link, index) => {
|
|
11
|
+
if (index < 20) {
|
|
12
|
+
result.allLinks.push({
|
|
13
|
+
href: link.href.substring(0, 100),
|
|
14
|
+
text: link.textContent?.substring(0, 50)
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (link.href.includes('item.jd') || link.href.includes('product')) {
|
|
19
|
+
result.jdLinks.push({
|
|
20
|
+
href: link.href.substring(0, 100),
|
|
21
|
+
text: link.textContent?.substring(0, 50),
|
|
22
|
+
parentClass: link.parentElement?.className
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
return result;
|
|
28
|
+
})()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
bodyChildren: []
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
const bodyChildren = document.body.children;
|
|
7
|
+
|
|
8
|
+
for (let i = 0; i < Math.min(bodyChildren.length, 30); i++) {
|
|
9
|
+
const child = bodyChildren[i];
|
|
10
|
+
result.bodyChildren.push({
|
|
11
|
+
index: i,
|
|
12
|
+
tagName: child.tagName,
|
|
13
|
+
id: child.id,
|
|
14
|
+
className: child.className,
|
|
15
|
+
childrenCount: child.children.length
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return result;
|
|
20
|
+
})()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
bodyClasses: document.body.className,
|
|
4
|
+
children: []
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
const mainContent = document.querySelector('main, #J_goodsList, [id*="goods"], [id*="product"]');
|
|
8
|
+
|
|
9
|
+
if (mainContent) {
|
|
10
|
+
result.mainContainer = {
|
|
11
|
+
id: mainContent.id,
|
|
12
|
+
className: mainContent.className,
|
|
13
|
+
childrenCount: mainContent.children.length
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
Array.from(mainContent.children).slice(0, 5).forEach((child, index) => {
|
|
17
|
+
result.children.push({
|
|
18
|
+
index,
|
|
19
|
+
tagName: child.tagName,
|
|
20
|
+
className: child.className,
|
|
21
|
+
id: child.id,
|
|
22
|
+
innerHTML: child.innerHTML.substring(0, 200)
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
} else {
|
|
26
|
+
Array.from(document.body.querySelectorAll('div')).slice(0, 20).forEach((div, index) => {
|
|
27
|
+
result.children.push({
|
|
28
|
+
index,
|
|
29
|
+
tagName: div.tagName,
|
|
30
|
+
className: div.className,
|
|
31
|
+
id: div.id,
|
|
32
|
+
innerHTML: div.innerHTML.substring(0, 200)
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return result;
|
|
38
|
+
})()
|