@dyyz1993/agent-browser 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/dist/actions.js +2 -2
- package/dist/actions.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +5 -0
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/flags.d.ts +1 -0
- package/dist/cli/flags.d.ts.map +1 -1
- package/dist/cli/flags.js +8 -0
- package/dist/cli/flags.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +51 -1
- package/dist/cli/help.js.map +1 -1
- package/dist/cli.js +150 -14
- package/dist/cli.js.map +1 -1
- package/dist/flow/presets/console-capture.js +31 -0
- package/dist/flow/presets/fetch-capture.js +78 -0
- package/dist/flow/presets/sse-stream.js +67 -0
- package/dist/flow/presets/xhr-only.js +34 -0
- package/dist/ios-actions.d.ts +11 -0
- package/dist/ios-actions.d.ts.map +1 -0
- package/dist/ios-actions.js +228 -0
- package/dist/ios-actions.js.map +1 -0
- package/dist/ios-manager.d.ts +266 -0
- package/dist/ios-manager.d.ts.map +1 -0
- package/dist/ios-manager.js +1076 -0
- package/dist/ios-manager.js.map +1 -0
- package/dist/protocol.js +1 -1
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/scripts/check_goods_container.js +35 -0
- package/scripts/check_page_content.js +36 -0
- package/scripts/click_applause_rate.js +30 -0
- package/scripts/explore_jd_page.js +31 -0
- package/scripts/extract_all_jd_data.js +80 -0
- package/scripts/extract_jd_product_detail.js +62 -0
- package/scripts/extract_jd_products_correct_links.js +78 -0
- package/scripts/extract_jd_products_final.js +80 -0
- package/scripts/extract_jd_reviews.js +48 -0
- package/scripts/extract_jd_seafood_final.js +78 -0
- package/scripts/extract_multiple_products.js +77 -0
- package/scripts/extract_products_no_scroll.js +68 -0
- package/scripts/extract_products_simple.js +68 -0
- package/scripts/find_applause_rate.js +26 -0
- package/scripts/find_jd_links.js +28 -0
- package/scripts/find_main_content.js +20 -0
- package/scripts/find_product_cards.js +38 -0
- package/scripts/find_root_content.js +26 -0
- package/scripts/find_unique_products.js +55 -0
- package/scripts/get_jd_product_detail.js +16 -0
- package/scripts/get_jd_products.js +23 -0
- package/scripts/get_jd_seafood_products.js +44 -0
- package/scripts/get_product_details_from_images.js +54 -0
- package/scripts/verify-form.sh +67 -0
- package/scripts/verify-login.sh +65 -0
- package/scripts/verify-recording.sh +80 -0
- package/scripts/verify-upload.sh +41 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
url: window.location.href,
|
|
4
|
+
title: document.title,
|
|
5
|
+
price: '',
|
|
6
|
+
shop: '',
|
|
7
|
+
sales: '',
|
|
8
|
+
reviews: {
|
|
9
|
+
good: '',
|
|
10
|
+
neutral: '',
|
|
11
|
+
bad: ''
|
|
12
|
+
},
|
|
13
|
+
region: ''
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const priceMatch = document.body.textContent.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
17
|
+
if (priceMatch) {
|
|
18
|
+
result.price = priceMatch[1];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const shopMatch = document.body.textContent.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
22
|
+
if (shopMatch) {
|
|
23
|
+
result.shop = shopMatch[1];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const reviewElements = document.querySelectorAll('[class*="comment"], [class*="review"], [class*="rating"]');
|
|
27
|
+
|
|
28
|
+
reviewElements.forEach(el => {
|
|
29
|
+
const text = el.textContent || '';
|
|
30
|
+
|
|
31
|
+
if (text.includes('好评') && !result.reviews.good) {
|
|
32
|
+
const match = text.match(/好评[::]\s*(\d+)/);
|
|
33
|
+
if (match) {
|
|
34
|
+
result.reviews.good = match[1];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (text.includes('中评') && !result.reviews.neutral) {
|
|
39
|
+
const match = text.match(/中评[::]\s*(\d+)/);
|
|
40
|
+
if (match) {
|
|
41
|
+
result.reviews.neutral = match[1];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (text.includes('差评') && !result.reviews.bad) {
|
|
46
|
+
const match = text.match(/差评[::]\s*(\d+)/);
|
|
47
|
+
if (match) {
|
|
48
|
+
result.reviews.bad = match[1];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const regionElements = document.querySelectorAll('[class*="address"], [class*="region"], [class*="location"]');
|
|
54
|
+
regionElements.forEach(el => {
|
|
55
|
+
const text = el.textContent || '';
|
|
56
|
+
if (!result.region && text.length > 0 && text.length < 100) {
|
|
57
|
+
result.region = text.trim();
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
return result;
|
|
62
|
+
})()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
result.total = result.products.length;
|
|
77
|
+
return result;
|
|
78
|
+
})()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 100);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
13
|
+
|
|
14
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
15
|
+
|
|
16
|
+
const seenLinks = new Set();
|
|
17
|
+
|
|
18
|
+
containers.forEach((container, index) => {
|
|
19
|
+
const link = container.querySelector('a[href]');
|
|
20
|
+
if (!link) return;
|
|
21
|
+
|
|
22
|
+
const href = link.href;
|
|
23
|
+
if (seenLinks.has(href)) return;
|
|
24
|
+
seenLinks.add(href);
|
|
25
|
+
|
|
26
|
+
const text = container.textContent || '';
|
|
27
|
+
|
|
28
|
+
const product = {
|
|
29
|
+
id: index + 1,
|
|
30
|
+
link: href
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const img = container.querySelector('img');
|
|
34
|
+
if (img) {
|
|
35
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
36
|
+
if (src) product.image = src.substring(0, 150);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
40
|
+
if (priceMatch) {
|
|
41
|
+
product.price = priceMatch[1];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
45
|
+
if (salesMatch) {
|
|
46
|
+
product.sales = salesMatch[1];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
50
|
+
if (shopMatch) {
|
|
51
|
+
product.shop = shopMatch[1];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const adIndex = text.indexOf('广告');
|
|
55
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
56
|
+
const titleText = text.substring(titleStart).trim();
|
|
57
|
+
|
|
58
|
+
const titleParts = titleText.split(/[¥¥]/);
|
|
59
|
+
if (titleParts.length > 0) {
|
|
60
|
+
let title = titleParts[0].trim();
|
|
61
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
63
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
64
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
65
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
66
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
67
|
+
title = title.trim();
|
|
68
|
+
|
|
69
|
+
if (title.length > 5) {
|
|
70
|
+
product.title = title.substring(0, 100);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (product.title || product.price) {
|
|
75
|
+
result.products.push(product);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
return result;
|
|
80
|
+
})()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
reviews: {
|
|
4
|
+
total: '',
|
|
5
|
+
good: '',
|
|
6
|
+
neutral: '',
|
|
7
|
+
bad: ''
|
|
8
|
+
},
|
|
9
|
+
allReviewText: []
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const allText = document.body.textContent;
|
|
13
|
+
|
|
14
|
+
const totalMatch = allText.match(/买家评价[((](\d+[+万])[))]/);
|
|
15
|
+
if (totalMatch) {
|
|
16
|
+
result.reviews.total = totalMatch[1];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const reviewButtons = document.querySelectorAll('[class*="filter"], [class*="tab"]');
|
|
20
|
+
|
|
21
|
+
reviewButtons.forEach(btn => {
|
|
22
|
+
const text = btn.textContent || '';
|
|
23
|
+
result.allReviewText.push(text);
|
|
24
|
+
|
|
25
|
+
if (text.includes('好评') || text.includes('positive')) {
|
|
26
|
+
const match = text.match(/(\d+)/);
|
|
27
|
+
if (match) result.reviews.good = match[1];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (text.includes('中评') || text.includes('neutral')) {
|
|
31
|
+
const match = text.match(/(\d+)/);
|
|
32
|
+
if (match) result.reviews.neutral = match[1];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (text.includes('差评') || text.includes('negative')) {
|
|
36
|
+
const match = text.match(/(\d+)/);
|
|
37
|
+
if (match) result.reviews.bad = match[1];
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const commentSummary = document.querySelector('[class*="comment-summary"], [class*="summary"]');
|
|
42
|
+
if (commentSummary) {
|
|
43
|
+
const summaryText = commentSummary.textContent || '';
|
|
44
|
+
result.allReviewText.push(summaryText);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return result;
|
|
48
|
+
})()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
result.total = result.products.length;
|
|
77
|
+
return result;
|
|
78
|
+
})()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
(async () => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
window.scrollTo(0, 500);
|
|
9
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
10
|
+
|
|
11
|
+
window.scrollTo(0, 2000);
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
13
|
+
|
|
14
|
+
window.scrollTo(0, 4000);
|
|
15
|
+
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
16
|
+
|
|
17
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
18
|
+
const seenLinks = new Set();
|
|
19
|
+
|
|
20
|
+
containers.forEach((container, index) => {
|
|
21
|
+
const link = container.querySelector('a[href]');
|
|
22
|
+
if (!link) return;
|
|
23
|
+
|
|
24
|
+
const href = link.href;
|
|
25
|
+
if (seenLinks.has(href)) return;
|
|
26
|
+
seenLinks.add(href);
|
|
27
|
+
|
|
28
|
+
const text = container.textContent || '';
|
|
29
|
+
const product = {
|
|
30
|
+
id: index + 1,
|
|
31
|
+
link: href
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const img = container.querySelector('img');
|
|
35
|
+
if (img) {
|
|
36
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
37
|
+
if (src) product.image = src.substring(0, 150);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
41
|
+
if (priceMatch) {
|
|
42
|
+
product.price = priceMatch[1];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
46
|
+
if (salesMatch) {
|
|
47
|
+
product.sales = salesMatch[1];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
51
|
+
if (shopMatch) {
|
|
52
|
+
product.shop = shopMatch[1];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const adIndex = text.indexOf('广告');
|
|
56
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
57
|
+
let title = text.substring(titleStart).trim();
|
|
58
|
+
|
|
59
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
60
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
61
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
62
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
63
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
64
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
65
|
+
title = title.trim();
|
|
66
|
+
|
|
67
|
+
if (title.length > 5) {
|
|
68
|
+
product.title = title.substring(0, 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (product.title || product.price) {
|
|
72
|
+
result.products.push(product);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
return result;
|
|
77
|
+
})()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
+
const seenLinks = new Set();
|
|
10
|
+
|
|
11
|
+
containers.forEach((container, index) => {
|
|
12
|
+
const link = container.querySelector('a[href]');
|
|
13
|
+
if (!link) return;
|
|
14
|
+
|
|
15
|
+
const href = link.href;
|
|
16
|
+
if (seenLinks.has(href)) return;
|
|
17
|
+
seenLinks.add(href);
|
|
18
|
+
|
|
19
|
+
const text = container.textContent || '';
|
|
20
|
+
const product = {
|
|
21
|
+
id: index + 1,
|
|
22
|
+
link: href
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const img = container.querySelector('img');
|
|
26
|
+
if (img) {
|
|
27
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
28
|
+
if (src) product.image = src.substring(0, 150);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
32
|
+
if (priceMatch) {
|
|
33
|
+
product.price = priceMatch[1];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
37
|
+
if (salesMatch) {
|
|
38
|
+
product.sales = salesMatch[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
42
|
+
if (shopMatch) {
|
|
43
|
+
product.shop = shopMatch[1];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const adIndex = text.indexOf('广告');
|
|
47
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
48
|
+
let title = text.substring(titleStart).trim();
|
|
49
|
+
|
|
50
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
51
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
52
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
53
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
54
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
55
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
56
|
+
title = title.trim();
|
|
57
|
+
|
|
58
|
+
if (title.length > 5) {
|
|
59
|
+
product.title = title.substring(0, 100);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (product.title || product.price) {
|
|
63
|
+
result.products.push(product);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return result;
|
|
68
|
+
})()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
keyword: "生鲜海鲜",
|
|
4
|
+
crawlTime: new Date().toISOString(),
|
|
5
|
+
products: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
+
const seenLinks = new Set();
|
|
10
|
+
|
|
11
|
+
containers.forEach((container, index) => {
|
|
12
|
+
const link = container.querySelector('a[href]');
|
|
13
|
+
if (!link) return;
|
|
14
|
+
|
|
15
|
+
const href = link.href;
|
|
16
|
+
if (seenLinks.has(href)) return;
|
|
17
|
+
seenLinks.add(href);
|
|
18
|
+
|
|
19
|
+
const text = container.textContent || '';
|
|
20
|
+
const product = {
|
|
21
|
+
id: index + 1,
|
|
22
|
+
link: href
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const img = container.querySelector('img');
|
|
26
|
+
if (img) {
|
|
27
|
+
const src = img.getAttribute('data-src') || img.src;
|
|
28
|
+
if (src) product.image = src.substring(0, 150);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const priceMatch = text.match(/[¥¥]\s*(\d+\.?\d*)/);
|
|
32
|
+
if (priceMatch) {
|
|
33
|
+
product.price = priceMatch[1];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const salesMatch = text.match(/已售([\d\+万]+)/);
|
|
37
|
+
if (salesMatch) {
|
|
38
|
+
product.sales = salesMatch[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const shopMatch = text.match(/([^\s]{2,10}(?:旗舰店|专营店|京东自营))/);
|
|
42
|
+
if (shopMatch) {
|
|
43
|
+
product.shop = shopMatch[1];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const adIndex = text.indexOf('广告');
|
|
47
|
+
const titleStart = adIndex >= 0 ? adIndex + 2 : 0;
|
|
48
|
+
let title = text.substring(titleStart).trim();
|
|
49
|
+
|
|
50
|
+
title = title.replace(/已售[\d\+万]+/, '');
|
|
51
|
+
title = title.replace(/[\d\+万]+人加购/, '');
|
|
52
|
+
title = title.replace(/[\d\+万]+人种草/, '');
|
|
53
|
+
title = title.replace(/[\d\+万]+人浏览/, '');
|
|
54
|
+
title = title.replace(/[^\s]{2,10}(?:旗舰店|专营店|京东自营)/, '');
|
|
55
|
+
title = title.replace(/搜同款|关注|对比/g, '');
|
|
56
|
+
title = title.trim();
|
|
57
|
+
|
|
58
|
+
if (title.length > 5) {
|
|
59
|
+
product.title = title.substring(0, 100);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (product.title || product.price) {
|
|
63
|
+
result.products.push(product);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return result;
|
|
68
|
+
})()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const applauseRateEl = document.querySelector('.applause-rate');
|
|
3
|
+
|
|
4
|
+
if (applauseRateEl) {
|
|
5
|
+
return {
|
|
6
|
+
elementFound: true,
|
|
7
|
+
elementText: applauseRateEl.textContent,
|
|
8
|
+
elementHTML: applauseRateEl.outerHTML.substring(0, 500)
|
|
9
|
+
};
|
|
10
|
+
} else {
|
|
11
|
+
const allElements = document.querySelectorAll('[class*="applause"], [class*="rate"]');
|
|
12
|
+
const elements = [];
|
|
13
|
+
allElements.forEach(el => {
|
|
14
|
+
elements.push({
|
|
15
|
+
className: el.className,
|
|
16
|
+
text: el.textContent?.substring(0, 100)
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
elementFound: false,
|
|
22
|
+
message: 'applause-rate element not found',
|
|
23
|
+
similarElements: elements.slice(0, 10)
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
})()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
allLinks: [],
|
|
4
|
+
jdLinks: [],
|
|
5
|
+
productLinks: []
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const links = document.querySelectorAll('a[href*="jd"]');
|
|
9
|
+
|
|
10
|
+
links.forEach((link, index) => {
|
|
11
|
+
if (index < 20) {
|
|
12
|
+
result.allLinks.push({
|
|
13
|
+
href: link.href.substring(0, 100),
|
|
14
|
+
text: link.textContent?.substring(0, 50)
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (link.href.includes('item.jd') || link.href.includes('product')) {
|
|
19
|
+
result.jdLinks.push({
|
|
20
|
+
href: link.href.substring(0, 100),
|
|
21
|
+
text: link.textContent?.substring(0, 50),
|
|
22
|
+
parentClass: link.parentElement?.className
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
return result;
|
|
28
|
+
})()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
bodyChildren: []
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
const bodyChildren = document.body.children;
|
|
7
|
+
|
|
8
|
+
for (let i = 0; i < Math.min(bodyChildren.length, 30); i++) {
|
|
9
|
+
const child = bodyChildren[i];
|
|
10
|
+
result.bodyChildren.push({
|
|
11
|
+
index: i,
|
|
12
|
+
tagName: child.tagName,
|
|
13
|
+
id: child.id,
|
|
14
|
+
className: child.className,
|
|
15
|
+
childrenCount: child.children.length
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return result;
|
|
20
|
+
})()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
(() => {
|
|
2
|
+
const result = {
|
|
3
|
+
bodyClasses: document.body.className,
|
|
4
|
+
children: []
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
const mainContent = document.querySelector('main, #J_goodsList, [id*="goods"], [id*="product"]');
|
|
8
|
+
|
|
9
|
+
if (mainContent) {
|
|
10
|
+
result.mainContainer = {
|
|
11
|
+
id: mainContent.id,
|
|
12
|
+
className: mainContent.className,
|
|
13
|
+
childrenCount: mainContent.children.length
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
Array.from(mainContent.children).slice(0, 5).forEach((child, index) => {
|
|
17
|
+
result.children.push({
|
|
18
|
+
index,
|
|
19
|
+
tagName: child.tagName,
|
|
20
|
+
className: child.className,
|
|
21
|
+
id: child.id,
|
|
22
|
+
innerHTML: child.innerHTML.substring(0, 200)
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
} else {
|
|
26
|
+
Array.from(document.body.querySelectorAll('div')).slice(0, 20).forEach((div, index) => {
|
|
27
|
+
result.children.push({
|
|
28
|
+
index,
|
|
29
|
+
tagName: div.tagName,
|
|
30
|
+
className: div.className,
|
|
31
|
+
id: div.id,
|
|
32
|
+
innerHTML: div.innerHTML.substring(0, 200)
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return result;
|
|
38
|
+
})()
|