@dyyz1993/agent-browser 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +907 -0
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser.js +120 -0
- package/dist/__tests__/e2e/utils/test-helpers.d.ts +5 -0
- package/dist/__tests__/e2e/utils/test-helpers.d.ts.map +1 -0
- package/dist/__tests__/e2e/utils/test-helpers.js +22 -0
- package/dist/__tests__/e2e/utils/test-helpers.js.map +1 -0
- package/dist/__tests__/test-iframe.d.ts +2 -0
- package/dist/__tests__/test-iframe.d.ts.map +1 -0
- package/dist/__tests__/test-iframe.js +52 -0
- package/dist/__tests__/test-iframe.js.map +1 -0
- package/dist/__tests__/utils/parseCli.d.ts +20 -0
- package/dist/__tests__/utils/parseCli.d.ts.map +1 -0
- package/dist/__tests__/utils/parseCli.js +1086 -0
- package/dist/__tests__/utils/parseCli.js.map +1 -0
- package/dist/actions.d.ts +50 -0
- package/dist/actions.d.ts.map +1 -0
- package/dist/actions.js +2164 -0
- package/dist/actions.js.map +1 -0
- package/dist/browser.d.ts +556 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/browser.js +2599 -0
- package/dist/browser.js.map +1 -0
- package/dist/cli/commands.d.ts +8 -0
- package/dist/cli/commands.d.ts.map +1 -0
- package/dist/cli/commands.js +1038 -0
- package/dist/cli/commands.js.map +1 -0
- package/dist/cli/connection.d.ts +50 -0
- package/dist/cli/connection.d.ts.map +1 -0
- package/dist/cli/connection.js +595 -0
- package/dist/cli/connection.js.map +1 -0
- package/dist/cli/flags.d.ts +36 -0
- package/dist/cli/flags.d.ts.map +1 -0
- package/dist/cli/flags.js +206 -0
- package/dist/cli/flags.js.map +1 -0
- package/dist/cli/help.d.ts +4 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +1024 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/output.d.ts +14 -0
- package/dist/cli/output.d.ts.map +1 -0
- package/dist/cli/output.js +456 -0
- package/dist/cli/output.js.map +1 -0
- package/dist/cli-new.d.ts +3 -0
- package/dist/cli-new.d.ts.map +1 -0
- package/dist/cli-new.js +308 -0
- package/dist/cli-new.js.map +1 -0
- package/dist/cli-old.d.ts +3 -0
- package/dist/cli-old.d.ts.map +1 -0
- package/dist/cli-old.js +1101 -0
- package/dist/cli-old.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +403 -0
- package/dist/cli.js.map +1 -0
- package/dist/content-detection.d.ts +18 -0
- package/dist/content-detection.d.ts.map +1 -0
- package/dist/content-detection.js +68 -0
- package/dist/content-detection.js.map +1 -0
- package/dist/daemon.d.ts +55 -0
- package/dist/daemon.d.ts.map +1 -0
- package/dist/daemon.js +426 -0
- package/dist/daemon.js.map +1 -0
- package/dist/diff.d.ts +42 -0
- package/dist/diff.d.ts.map +1 -0
- package/dist/diff.js +166 -0
- package/dist/diff.js.map +1 -0
- package/dist/human-mouse.d.ts +31 -0
- package/dist/human-mouse.d.ts.map +1 -0
- package/dist/human-mouse.js +184 -0
- package/dist/human-mouse.js.map +1 -0
- package/dist/ios-actions.d.ts +11 -0
- package/dist/ios-actions.d.ts.map +1 -0
- package/dist/ios-actions.js +228 -0
- package/dist/ios-actions.js.map +1 -0
- package/dist/ios-manager.d.ts +266 -0
- package/dist/ios-manager.d.ts.map +1 -0
- package/dist/ios-manager.js +1076 -0
- package/dist/ios-manager.js.map +1 -0
- package/dist/message-bridge.d.ts +10 -0
- package/dist/message-bridge.d.ts.map +1 -0
- package/dist/message-bridge.js +60 -0
- package/dist/message-bridge.js.map +1 -0
- package/dist/protocol.d.ts +26 -0
- package/dist/protocol.d.ts.map +1 -0
- package/dist/protocol.js +912 -0
- package/dist/protocol.js.map +1 -0
- package/dist/recorder/binding.d.ts +24 -0
- package/dist/recorder/binding.d.ts.map +1 -0
- package/dist/recorder/binding.js +215 -0
- package/dist/recorder/binding.js.map +1 -0
- package/dist/recorder/index.d.ts +4 -0
- package/dist/recorder/index.d.ts.map +1 -0
- package/dist/recorder/index.js +4 -0
- package/dist/recorder/index.js.map +1 -0
- package/dist/recorder/inject.js +1913 -0
- package/dist/recorder/recorder.d.ts +19 -0
- package/dist/recorder/recorder.d.ts.map +1 -0
- package/dist/recorder/recorder.js +101 -0
- package/dist/recorder/recorder.js.map +1 -0
- package/dist/recorder/store.d.ts +22 -0
- package/dist/recorder/store.d.ts.map +1 -0
- package/dist/recorder/store.js +150 -0
- package/dist/recorder/store.js.map +1 -0
- package/dist/recorder/types.d.ts +73 -0
- package/dist/recorder/types.d.ts.map +1 -0
- package/dist/recorder/types.js +5 -0
- package/dist/recorder/types.js.map +1 -0
- package/dist/snapshot.d.ts +81 -0
- package/dist/snapshot.d.ts.map +1 -0
- package/dist/snapshot.js +1348 -0
- package/dist/snapshot.js.map +1 -0
- package/dist/stream-server-standalone.d.ts +38 -0
- package/dist/stream-server-standalone.d.ts.map +1 -0
- package/dist/stream-server-standalone.js +494 -0
- package/dist/stream-server-standalone.js.map +1 -0
- package/dist/stream-server.d.ts +214 -0
- package/dist/stream-server.d.ts.map +1 -0
- package/dist/stream-server.js +811 -0
- package/dist/stream-server.js.map +1 -0
- package/dist/types.d.ts +914 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/dist/viewer-html.d.ts +2 -0
- package/dist/viewer-html.d.ts.map +1 -0
- package/dist/viewer-html.js +185 -0
- package/dist/viewer-html.js.map +1 -0
- package/dist/viewer-script.d.ts +47 -0
- package/dist/viewer-script.d.ts.map +1 -0
- package/dist/viewer-script.js +586 -0
- package/dist/viewer-script.js.map +1 -0
- package/package.json +86 -0
- package/scripts/build-all-platforms.sh +68 -0
- package/scripts/check-version-sync.js +39 -0
- package/scripts/check_goods_container.js +35 -0
- package/scripts/check_page_content.js +36 -0
- package/scripts/click_applause_rate.js +30 -0
- package/scripts/copy-native.js +36 -0
- package/scripts/copy-recorder.js +21 -0
- package/scripts/e2e-test-recorder.ts +584 -0
- package/scripts/explore_jd_page.js +31 -0
- package/scripts/extract_all_jd_data.js +80 -0
- package/scripts/extract_jd_product_detail.js +62 -0
- package/scripts/extract_jd_products_correct_links.js +78 -0
- package/scripts/extract_jd_products_final.js +80 -0
- package/scripts/extract_jd_reviews.js +48 -0
- package/scripts/extract_jd_seafood_final.js +78 -0
- package/scripts/extract_multiple_products.js +77 -0
- package/scripts/extract_products_no_scroll.js +68 -0
- package/scripts/extract_products_simple.js +68 -0
- package/scripts/find_applause_rate.js +26 -0
- package/scripts/find_jd_links.js +28 -0
- package/scripts/find_main_content.js +20 -0
- package/scripts/find_product_cards.js +38 -0
- package/scripts/find_root_content.js +26 -0
- package/scripts/find_unique_products.js +55 -0
- package/scripts/get_jd_product_detail.js +16 -0
- package/scripts/get_jd_products.js +23 -0
- package/scripts/get_jd_seafood_products.js +44 -0
- package/scripts/get_product_details_from_images.js +54 -0
- package/scripts/postinstall.js +235 -0
- package/scripts/scroll_and_get_products.js +47 -0
- package/scripts/scroll_deep_and_find.js +45 -0
- package/scripts/sync-version.js +69 -0
- package/scripts/verify-baidu-enter.ts +116 -0
- package/skills/agent-browser/SKILL.md +310 -0
- package/skills/agent-browser/references/authentication.md +198 -0
- package/skills/agent-browser/references/commands.md +471 -0
- package/skills/agent-browser/references/data-extraction.md +377 -0
- package/skills/agent-browser/references/proxy-support.md +188 -0
- package/skills/agent-browser/references/session-management.md +197 -0
- package/skills/agent-browser/references/snapshot-refs.md +379 -0
- package/skills/agent-browser/references/video-recording.md +173 -0
- package/skills/agent-browser/templates/api-interception.sh +53 -0
- package/skills/agent-browser/templates/authenticated-session.sh +97 -0
- package/skills/agent-browser/templates/capture-workflow.sh +69 -0
- package/skills/agent-browser/templates/data-extraction.sh +210 -0
- package/skills/agent-browser/templates/form-automation.sh +62 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/SKILL.md +356 -0
- package/skills/skill-creator/references/output-patterns.md +82 -0
- package/skills/skill-creator/references/workflows.md +28 -0
- package/skills/skill-creator/scripts/init_skill.py +303 -0
- package/skills/skill-creator/scripts/package_skill.py +113 -0
- package/skills/skill-creator/scripts/quick_validate.py +95 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
url: window.location.href,
|
|
4
|
+
title: document.title,
|
|
5
|
+
price: '',
|
|
6
|
+
shop: '',
|
|
7
|
+
sales: '',
|
|
8
|
+
reviews: {
|
|
9
|
+
good: '',
|
|
10
|
+
neutral: '',
|
|
11
|
+
bad: ''
|
|
12
|
+
},
|
|
13
|
+
region: ''
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const priceMatch = document.body.textContent.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
17
|
+
if (priceMatch) {
|
|
18
|
+
result.price = priceMatch[1];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const shopMatch = document.body.textContent.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
22
|
+
if (shopMatch) {
|
|
23
|
+
result.shop = shopMatch[1];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const reviewElements = document.querySelectorAll('[class*="comment"], [class*="review"], [class*="rating"]');
|
|
27
|
+
|
|
28
|
+
reviewElements.forEach(el => {
|
|
29
|
+
const text = el.textContent || '';
|
|
30
|
+
|
|
31
|
+
if (text.includes('好评') && !result.reviews.good) {
|
|
32
|
+
const match = text.match(/好评[::]\s*(\d+)/);
|
|
33
|
+
if (match) {
|
|
34
|
+
result.reviews.good = match[1];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (text.includes('中评') && !result.reviews.neutral) {
|
|
39
|
+
const match = text.match(/中评[::]\s*(\d+)/);
|
|
40
|
+
if (match) {
|
|
41
|
+
result.reviews.neutral = match[1];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (text.includes('差评') && !result.reviews.bad) {
|
|
46
|
+
const match = text.match(/差评[::]\s*(\d+)/);
|
|
47
|
+
if (match) {
|
|
48
|
+
result.reviews.bad = match[1];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const regionElements = document.querySelectorAll('[class*="address"], [class*="region"], [class*="location"]');
|
|
54
|
+
regionElements.forEach(el => {
|
|
55
|
+
const text = el.textContent || '';
|
|
56
|
+
if (!result.region && text.length > 0 && text.length < 100) {
|
|
57
|
+
result.region = text.trim();
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
return result;
|
|
62
|
+
})()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
result.total = result.products.length;
|
|
77
|
+
return result;
|
|
78
|
+
})()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 100);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
13
|
+
|
|
14
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
15
|
+
|
|
16
|
+
const seenLinks = new Set();
|
|
17
|
+
|
|
18
|
+
containers.forEach((container, index) => {
|
|
19
|
+
const link = container.querySelector('a[href]');
|
|
20
|
+
if (!link) return;
|
|
21
|
+
|
|
22
|
+
const href = link.href;
|
|
23
|
+
if (seenLinks.has(href)) return;
|
|
24
|
+
seenLinks.add(href);
|
|
25
|
+
|
|
26
|
+
const text = container.textContent || '';
|
|
27
|
+
|
|
28
|
+
const product = {
|
|
29
|
+
id: index + 1,
|
|
30
|
+
link: href
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const img = container.querySelector('img');
|
|
34
|
+
if (img) {
|
|
35
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
36
|
+
if (src) product.image = src.substring(0, 150);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
40
|
+
if (priceMatch) {
|
|
41
|
+
product.price = priceMatch[1];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
45
|
+
if (salesMatch) {
|
|
46
|
+
product.sales = salesMatch[1];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
50
|
+
if (shopMatch) {
|
|
51
|
+
product.shop = shopMatch[1];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const adIndex = text.indexOf('广告');
|
|
55
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
56
|
+
const titleText = text.substring(titleStart).trim();
|
|
57
|
+
|
|
58
|
+
const titleParts = titleText.split(/[¥¥]/);
|
|
59
|
+
if (titleParts.length > 0) {
|
|
60
|
+
let title = titleParts[0].trim();
|
|
61
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
63
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
64
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
65
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
66
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
67
|
+
title = title.trim();
|
|
68
|
+
|
|
69
|
+
if (title.length > 5) {
|
|
70
|
+
product.title = title.substring(0, 100);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (product.title || product.price) {
|
|
75
|
+
result.products.push(product);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
return result;
|
|
80
|
+
})()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
reviews: {
|
|
4
|
+
total: '',
|
|
5
|
+
good: '',
|
|
6
|
+
neutral: '',
|
|
7
|
+
bad: ''
|
|
8
|
+
},
|
|
9
|
+
allReviewText: []
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const allText = document.body.textContent;
|
|
13
|
+
|
|
14
|
+
const totalMatch = allText.match(/买家评价[((](\d+[+万])[))]/);
|
|
15
|
+
if (totalMatch) {
|
|
16
|
+
result.reviews.total = totalMatch[1];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const reviewButtons = document.querySelectorAll('[class*="filter"], [class*="tab"]');
|
|
20
|
+
|
|
21
|
+
reviewButtons.forEach(btn => {
|
|
22
|
+
const text = btn.textContent || '';
|
|
23
|
+
result.allReviewText.push(text);
|
|
24
|
+
|
|
25
|
+
if (text.includes('好评') || text.includes('positive')) {
|
|
26
|
+
const match = text.match(/(\d+)/);
|
|
27
|
+
if (match) result.reviews.good = match[1];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (text.includes('中评') || text.includes('neutral')) {
|
|
31
|
+
const match = text.match(/(\d+)/);
|
|
32
|
+
if (match) result.reviews.neutral = match[1];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (text.includes('差评') || text.includes('negative')) {
|
|
36
|
+
const match = text.match(/(\d+)/);
|
|
37
|
+
if (match) result.reviews.bad = match[1];
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const commentSummary = document.querySelector('[class*="comment-summary"], [class*="summary"]');
|
|
42
|
+
if (commentSummary) {
|
|
43
|
+
const summaryText = commentSummary.textContent || '';
|
|
44
|
+
result.allReviewText.push(summaryText);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return result;
|
|
48
|
+
})()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
result.total = result.products.length;
|
|
77
|
+
return result;
|
|
78
|
+
})()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
return result;
|
|
77
|
+
})()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
+
const seenLinks = new Set();
|
|
10
|
+
|
|
11
|
+
containers.forEach((container, index) => {
|
|
12
|
+
const link = container.querySelector('a[href]');
|
|
13
|
+
if (!link) return;
|
|
14
|
+
|
|
15
|
+
const href = link.href;
|
|
16
|
+
if (seenLinks.has(href)) return;
|
|
17
|
+
seenLinks.add(href);
|
|
18
|
+
|
|
19
|
+
const text = container.textContent || '';
|
|
20
|
+
const product = {
|
|
21
|
+
id: index + 1,
|
|
22
|
+
link: href
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const img = container.querySelector('img');
|
|
26
|
+
if (img) {
|
|
27
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
28
|
+
if (src) product.image = src.substring(0, 150);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
32
|
+
if (priceMatch) {
|
|
33
|
+
product.price = priceMatch[1];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
37
|
+
if (salesMatch) {
|
|
38
|
+
product.sales = salesMatch[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
42
|
+
if (shopMatch) {
|
|
43
|
+
product.shop = shopMatch[1];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const adIndex = text.indexOf('广告');
|
|
47
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
48
|
+
let title = text.substring(titleStart).trim();
|
|
49
|
+
|
|
50
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
51
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
52
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
53
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
54
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
55
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
56
|
+
title = title.trim();
|
|
57
|
+
|
|
58
|
+
if (title.length > 5) {
|
|
59
|
+
product.title = title.substring(0, 100);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (product.title || product.price) {
|
|
63
|
+
result.products.push(product);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return result;
|
|
68
|
+
})()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
+
const seenLinks = new Set();
|
|
10
|
+
|
|
11
|
+
containers.forEach((container, index) => {
|
|
12
|
+
const link = container.querySelector('a[href]');
|
|
13
|
+
if (!link) return;
|
|
14
|
+
|
|
15
|
+
const href = link.href;
|
|
16
|
+
if (seenLinks.has(href)) return;
|
|
17
|
+
seenLinks.add(href);
|
|
18
|
+
|
|
19
|
+
const text = container.textContent || '';
|
|
20
|
+
const product = {
|
|
21
|
+
id: index + 1,
|
|
22
|
+
link: href
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const img = container.querySelector('img');
|
|
26
|
+
if (img) {
|
|
27
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
28
|
+
if (src) product.image = src.substring(0, 150);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
32
|
+
if (priceMatch) {
|
|
33
|
+
product.price = priceMatch[1];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
37
|
+
if (salesMatch) {
|
|
38
|
+
product.sales = salesMatch[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
42
|
+
if (shopMatch) {
|
|
43
|
+
product.shop = shopMatch[1];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const adIndex = text.indexOf('广告');
|
|
47
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
48
|
+
let title = text.substring(titleStart).trim();
|
|
49
|
+
|
|
50
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
51
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
52
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
53
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
54
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
55
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
56
|
+
title = title.trim();
|
|
57
|
+
|
|
58
|
+
if (title.length > 5) {
|
|
59
|
+
product.title = title.substring(0, 100);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (product.title || product.price) {
|
|
63
|
+
result.products.push(product);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return result;
|
|
68
|
+
})()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const applauseRateEl = document.querySelector('.applause-rate');
|
|
3
|
+
|
|
4
|
+
if (applauseRateEl) {
|
|
5
|
+
return {
|
|
6
|
+
elementFound: true,
|
|
7
|
+
elementText: applauseRateEl.textContent,
|
|
8
|
+
elementHTML: applauseRateEl.outerHTML.substring(0, 500)
|
|
9
|
+
};
|
|
10
|
+
} else {
|
|
11
|
+
const allElements = document.querySelectorAll('[class*="applause"], [class*="rate"]');
|
|
12
|
+
const elements = [];
|
|
13
|
+
allElements.forEach(el => {
|
|
14
|
+
elements.push({
|
|
15
|
+
className: el.className,
|
|
16
|
+
text: el.textContent?.substring(0, 100)
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
elementFound: false,
|
|
22
|
+
message: 'applause-rate element not found',
|
|
23
|
+
similarElements: elements.slice(0, 10)
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
})()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
allLinks: [],
|
|
4
|
+
jdLinks: [],
|
|
5
|
+
productLinks: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const links = document.querySelectorAll('a[href*="jd"]');
|
|
9
|
+
|
|
10
|
+
links.forEach((link, index) => {
|
|
11
|
+
if (index < 20) {
|
|
12
|
+
result.allLinks.push({
|
|
13
|
+
href: link.href.substring(0, 100),
|
|
14
|
+
text: link.textContent?.substring(0, 50)
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (link.href.includes('item.jd') || link.href.includes('product')) {
|
|
19
|
+
result.jdLinks.push({
|
|
20
|
+
href: link.href.substring(0, 100),
|
|
21
|
+
text: link.textContent?.substring(0, 50),
|
|
22
|
+
parentClass: link.parentElement?.className
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
return result;
|
|
28
|
+
})()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
bodyChildren: []
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
const bodyChildren = document.body.children;
|
|
7
|
+
|
|
8
|
+
for (let i = 0; i < Math.min(bodyChildren.length, 30); i++) {
|
|
9
|
+
const child = bodyChildren[i];
|
|
10
|
+
result.bodyChildren.push({
|
|
11
|
+
index: i,
|
|
12
|
+
tagName: child.tagName,
|
|
13
|
+
id: child.id,
|
|
14
|
+
className: child.className,
|
|
15
|
+
childrenCount: child.children.length
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return result;
|
|
20
|
+
})()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
bodyClasses: document.body.className,
|
|
4
|
+
children: []
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
const mainContent = document.querySelector('main, #J_goodsList, [id*="goods"], [id*="product"]');
|
|
8
|
+
|
|
9
|
+
if (mainContent) {
|
|
10
|
+
result.mainContainer = {
|
|
11
|
+
id: mainContent.id,
|
|
12
|
+
className: mainContent.className,
|
|
13
|
+
childrenCount: mainContent.children.length
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
Array.from(mainContent.children).slice(0, 5).forEach((child, index) => {
|
|
17
|
+
result.children.push({
|
|
18
|
+
index,
|
|
19
|
+
tagName: child.tagName,
|
|
20
|
+
className: child.className,
|
|
21
|
+
id: child.id,
|
|
22
|
+
innerHTML: child.innerHTML.substring(0, 200)
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
} else {
|
|
26
|
+
Array.from(document.body.querySelectorAll('div')).slice(0, 20).forEach((div, index) => {
|
|
27
|
+
result.children.push({
|
|
28
|
+
index,
|
|
29
|
+
tagName: div.tagName,
|
|
30
|
+
className: div.className,
|
|
31
|
+
id: div.id,
|
|
32
|
+
innerHTML: div.innerHTML.substring(0, 200)
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return result;
|
|
38
|
+
})()
|