channel-worker 2.5.12 → 2.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/api-client.js +11 -1
- package/lib/command-poller.js +106 -0
- package/lib/shopee-scraper.js +247 -0
- package/package.json +1 -1
- package/scripts/upload_facebook.js +27 -12
package/lib/api-client.js
CHANGED
|
@@ -68,7 +68,7 @@ class ApiClient {
|
|
|
68
68
|
async getNextCommand(workerId) {
|
|
69
69
|
// Daemon-handled types. `_pw` variants route to the Playwright pipeline
|
|
70
70
|
// (lib/playwright-runner → scripts/<base>.js) instead of the extension.
|
|
71
|
-
const workerTypes = 'launch_profile,close_profile,launch_veo3_profile,set_profile_proxy,save_file,set_thumbnail,set_tags,set_file_input,click_and_upload,type_text,verify_logins,update_extension,sync_youtube_stats,restart_worker,upload_youtube_pw,upload_tiktok_pw,upload_facebook_pw';
|
|
71
|
+
const workerTypes = 'launch_profile,close_profile,launch_veo3_profile,set_profile_proxy,save_file,set_thumbnail,set_tags,set_file_input,click_and_upload,type_text,verify_logins,update_extension,sync_youtube_stats,restart_worker,upload_youtube_pw,upload_tiktok_pw,upload_facebook_pw,scrape_affiliate_products,ingest_shopee_product';
|
|
72
72
|
return this.request('GET', `/workers/commands?worker_id=${workerId}&types=${encodeURIComponent(workerTypes)}`);
|
|
73
73
|
}
|
|
74
74
|
|
|
@@ -84,6 +84,16 @@ class ApiClient {
|
|
|
84
84
|
return this.request('POST', `/extension/commands/${commandId}/result`, { status, result: result || {}, error: error || null });
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
+
// Shopee scraper callbacks — worker ships parsed products, API upserts into
|
|
88
|
+
// the global AffiliateProduct kho (dedup by shop_id+item_id).
|
|
89
|
+
async upsertAffiliateProducts(products, meta = {}) {
|
|
90
|
+
return this.request('POST', '/products/worker-upsert', { products, ...meta });
|
|
91
|
+
}
|
|
92
|
+
// Single PDP ingest result (Flow 2). idea_id links it back to the fashion idea.
|
|
93
|
+
async ingestShopeeResult(product, meta = {}) {
|
|
94
|
+
return this.request('POST', '/products/worker-ingest', { product, ...meta });
|
|
95
|
+
}
|
|
96
|
+
|
|
87
97
|
// Return the calling daemon's own Worker doc — primarily for reading
|
|
88
98
|
// parallel_limit (the per-daemon scene-generation concurrency cap that
|
|
89
99
|
// replaced the legacy global flowkit_max_concurrent setting).
|
package/lib/command-poller.js
CHANGED
|
@@ -104,6 +104,12 @@ class CommandPoller {
|
|
|
104
104
|
case 'set_profile_proxy':
|
|
105
105
|
await this.handleSetProfileProxy(command);
|
|
106
106
|
break;
|
|
107
|
+
case 'scrape_affiliate_products':
|
|
108
|
+
await this.handleScrapeAffiliateProducts(command);
|
|
109
|
+
break;
|
|
110
|
+
case 'ingest_shopee_product':
|
|
111
|
+
await this.handleIngestShopeeProduct(command);
|
|
112
|
+
break;
|
|
107
113
|
default:
|
|
108
114
|
// Playwright-based pipeline: any command whose type ends in '_pw'
|
|
109
115
|
// is routed to scripts/<base>.js (BrowserClaw-style automation
|
|
@@ -129,6 +135,106 @@ class CommandPoller {
|
|
|
129
135
|
// for upload + page-management tasks). Command type 'upload_youtube_pw' maps
|
|
130
136
|
// to scripts/upload_youtube.js, etc. The script handles its own profile
|
|
131
137
|
// launch via NST + CDP attach; the daemon orchestrates + ships the result.
|
|
138
|
+
// Lazy-init the NST manager (shared with the _pw + launch handlers).
|
|
139
|
+
async _ensureNst(command) {
|
|
140
|
+
if (this.nst) return true;
|
|
141
|
+
try {
|
|
142
|
+
const apiKey = await this.api.getSetting('nst_api_key');
|
|
143
|
+
if (apiKey) { const NstManager = require('./nst-manager'); this.nst = new NstManager(apiKey); }
|
|
144
|
+
} catch {}
|
|
145
|
+
if (!this.nst) {
|
|
146
|
+
await this.api.updateCommand(command._id, { status: 'failed', error: 'NST API key not configured' });
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Ensure a profile (by name) is running and return a puppeteer-core browser
|
|
153
|
+
// attached over CDP via its local debug port. Worker runs ON the same box as
|
|
154
|
+
// Nstbrowser, so localhost:<remoteDebuggingPort> is reachable directly.
|
|
155
|
+
async _connectNstProfileByName(name) {
|
|
156
|
+
const puppeteer = require('puppeteer-core');
|
|
157
|
+
const profileId = await this.nst.findProfile(name);
|
|
158
|
+
if (!profileId) throw new Error(`NST profile "${name}" not found`);
|
|
159
|
+
let running = (await this.nst.getRunningBrowsers()).find(b => b.profileId === profileId);
|
|
160
|
+
if (!running) {
|
|
161
|
+
await this.nst.launchProfile(profileId);
|
|
162
|
+
// brief wait for the debug port to come up
|
|
163
|
+
for (let i = 0; i < 10 && !running; i++) {
|
|
164
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
165
|
+
running = (await this.nst.getRunningBrowsers()).find(b => b.profileId === profileId);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
const port = running?.remoteDebuggingPort;
|
|
169
|
+
if (!port) throw new Error(`No debug port for profile "${name}" (launch failed?)`);
|
|
170
|
+
const browser = await puppeteer.connect({ browserURL: `http://127.0.0.1:${port}`, defaultViewport: null });
|
|
171
|
+
return { browser, disconnect: () => browser.disconnect() };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Flow 1 — quét affiliate offer list (sort theo hoa hồng) → upsert vào kho.
|
|
175
|
+
async handleScrapeAffiliateProducts(command) {
|
|
176
|
+
const payload = command.payload || {};
|
|
177
|
+
if (!(await this._ensureNst(command))) return;
|
|
178
|
+
const scraper = require('./shopee-scraper');
|
|
179
|
+
const profileName = payload.profile_name || (await this.api.getSetting('shopee_affiliate_profile').catch(() => null)) || 'Shopee1';
|
|
180
|
+
let conn;
|
|
181
|
+
try {
|
|
182
|
+
conn = await this._connectNstProfileByName(profileName);
|
|
183
|
+
const res = await scraper.scrapeOffers(conn.browser, {
|
|
184
|
+
category: payload.category || 'women_clothes', // Tier-1 default: quần áo nữ
|
|
185
|
+
match_id: payload.match_id || null,
|
|
186
|
+
sort_types: payload.sort_types || [2, 3], // commission + sales, merged
|
|
187
|
+
pages: payload.pages ?? 2,
|
|
188
|
+
page_limit: payload.page_limit ?? 20,
|
|
189
|
+
delay_ms: payload.delay_ms ?? 2500, // human-like pacing (anti-bot)
|
|
190
|
+
filters: payload.filters || {},
|
|
191
|
+
});
|
|
192
|
+
if (res.status === 'needs_verify') {
|
|
193
|
+
// Anti-bot bounce — DON'T fail/retry into more captchas. Report cleanly
|
|
194
|
+
// so the dashboard can prompt the operator to re-verify via VNC.
|
|
195
|
+
console.warn(`[shopee] needs_verify: ${res.reason}`);
|
|
196
|
+
if (res.products?.length) await this.api.upsertAffiliateProducts(res.products, { user_id: command.user_id });
|
|
197
|
+
await this.api.updateCommand(command._id, { status: 'done', result: { needs_verify: true, reason: res.reason, count: res.products?.length || 0 } });
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
console.log(`[shopee] scraped ${res.raw_count} raw → ${res.products.length} after filters from ${profileName}`);
|
|
201
|
+
await this.api.upsertAffiliateProducts(res.products, { user_id: command.user_id });
|
|
202
|
+
await this.api.updateCommand(command._id, { status: 'done', result: { count: res.products.length, raw: res.raw_count } });
|
|
203
|
+
} catch (err) {
|
|
204
|
+
console.error(`[shopee] scrape failed: ${err.message}`);
|
|
205
|
+
await this.api.updateCommand(command._id, { status: 'failed', error: String(err.message || err).slice(0, 500) });
|
|
206
|
+
} finally {
|
|
207
|
+
if (conn) conn.disconnect();
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Flow 2 — ingest 1 sản phẩm từ link bất kỳ (bắt PDP get_pc) → full product.
|
|
212
|
+
async handleIngestShopeeProduct(command) {
|
|
213
|
+
const payload = command.payload || {};
|
|
214
|
+
if (!(await this._ensureNst(command))) return;
|
|
215
|
+
const scraper = require('./shopee-scraper');
|
|
216
|
+
const profileName = payload.profile_name || (await this.api.getSetting('shopee_shopping_profile').catch(() => null)) || 'Shopee1';
|
|
217
|
+
let ids = (payload.shop_id && payload.item_id) ? { shop_id: payload.shop_id, item_id: payload.item_id } : null;
|
|
218
|
+
if (!ids && payload.url) ids = scraper.parseProductUrl(payload.url);
|
|
219
|
+
if (!ids) {
|
|
220
|
+
await this.api.updateCommand(command._id, { status: 'failed', error: 'need {shop_id,item_id} or a parseable {url}' });
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
let conn;
|
|
224
|
+
try {
|
|
225
|
+
conn = await this._connectNstProfileByName(profileName);
|
|
226
|
+
const product = await scraper.ingestProduct(conn.browser, ids);
|
|
227
|
+
console.log(`[shopee] ingested: ${product.name} (${product.images.length} imgs)`);
|
|
228
|
+
await this.api.ingestShopeeResult(product, { user_id: command.user_id, idea_id: payload.idea_id || null });
|
|
229
|
+
await this.api.updateCommand(command._id, { status: 'done', result: { name: product.name, images: product.images.length } });
|
|
230
|
+
} catch (err) {
|
|
231
|
+
console.error(`[shopee] ingest failed: ${err.message}`);
|
|
232
|
+
await this.api.updateCommand(command._id, { status: 'failed', error: String(err.message || err).slice(0, 500) });
|
|
233
|
+
} finally {
|
|
234
|
+
if (conn) conn.disconnect();
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
132
238
|
async handlePlaywrightCommand(command) {
|
|
133
239
|
const { runPlaywrightScript } = require('./playwright-runner');
|
|
134
240
|
const payload = command.payload || {};
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
// Shopee scraper — runs against a logged-in Nstbrowser profile via CDP.
|
|
2
|
+
//
|
|
3
|
+
// Two jobs, both proven against the live "Shopee1" profile on relabs03:
|
|
4
|
+
// 1. ingestProduct — open a PDP, capture the api/v4/pdp/get_pc response the
|
|
5
|
+
// page itself fires (signed headers we can't forge by hand) → full product.
|
|
6
|
+
// 2. scrapeOffers — in-page fetch of affiliate.shopee.vn offer list (only
|
|
7
|
+
// needs the session cookie) → products + commission %, sorted/paginated.
|
|
8
|
+
//
|
|
9
|
+
// Pattern mirrors the cookie-bridge XHS capture: don't replay the API by hand,
|
|
10
|
+
// let the real page sign it and grab the response at the network layer.
|
|
11
|
+
|
|
12
|
+
const IMG_BASE = 'https://down-vn.img.susercontent.com/file/';
|
|
13
|
+
const imgUrl = (h) => (h && !/^https?:/.test(h) ? IMG_BASE + h : h);
|
|
14
|
+
|
|
15
|
+
// Map a Shopee breadcrumb / fe_category path → our coarse bucket. Keyword match
|
|
16
|
+
// is intentionally loose (VN + EN) so new sub-categories still land somewhere.
|
|
17
|
+
function categoryGroup(text = '') {
|
|
18
|
+
const t = text.toLowerCase();
|
|
19
|
+
if (/giày|dép|sandal|boot|sneaker|shoe/.test(t)) return 'shoes';
|
|
20
|
+
if (/son|kem|skincare|makeup|mỹ phẩm|làm đẹp|beauty|nước hoa|dưỡng/.test(t)) return 'beauty';
|
|
21
|
+
if (/túi|ví|kính|mũ|nón|trang sức|phụ kiện|đồng hồ|thắt lưng|accessor/.test(t)) return 'accessory';
|
|
22
|
+
if (/thời trang|áo|quần|váy|đầm|đồ|set|fashion|apparel|clothing|đồ ngủ|đồ lót/.test(t)) return 'fashion';
|
|
23
|
+
return 'other';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// "13%" / "13.5%" / 0.13 → 13 (numeric percent). Tolerant of both encodings.
|
|
27
|
+
function pctToNumber(v) {
|
|
28
|
+
if (v == null) return 0;
|
|
29
|
+
if (typeof v === 'number') return v <= 1 ? +(v * 100).toFixed(2) : +v.toFixed(2);
|
|
30
|
+
const s = String(v).replace('%', '').trim();
|
|
31
|
+
const n = parseFloat(s);
|
|
32
|
+
if (!isFinite(n)) return 0;
|
|
33
|
+
return s.includes('.') && n < 1 ? +(n * 100).toFixed(2) : +n.toFixed(2);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function parsePdpItem(item) {
|
|
37
|
+
const cat = (item.categories || item.fe_categories || []).map((c) => c.display_name).filter(Boolean).join(' > ');
|
|
38
|
+
const cover = imgUrl(item.image);
|
|
39
|
+
const variants = (item.tier_variations || []).map((t) => ({
|
|
40
|
+
name: t.name,
|
|
41
|
+
options: t.options || [],
|
|
42
|
+
images: (t.images || []).map(imgUrl),
|
|
43
|
+
}));
|
|
44
|
+
// Gallery = cover + all variant images (deduped) — the keyframe source pool.
|
|
45
|
+
const gallery = [cover, ...variants.flatMap((v) => v.images)].filter(Boolean);
|
|
46
|
+
return {
|
|
47
|
+
shop_id: String(item.shop_id),
|
|
48
|
+
item_id: String(item.item_id),
|
|
49
|
+
name: (item.title || item.name || '').trim(),
|
|
50
|
+
description: (item.description || '').trim(),
|
|
51
|
+
brand: item.brand || '',
|
|
52
|
+
category: cat,
|
|
53
|
+
category_group: categoryGroup(cat + ' ' + (item.title || '')),
|
|
54
|
+
price: (item.price || 0) / 100000,
|
|
55
|
+
price_before_discount: (item.price_before_discount || 0) / 100000,
|
|
56
|
+
sold_count: item.historical_sold ?? item.sold ?? null,
|
|
57
|
+
rating: item.item_rating?.rating_star ?? null,
|
|
58
|
+
cover_image: cover,
|
|
59
|
+
images: [...new Set(gallery)],
|
|
60
|
+
variants,
|
|
61
|
+
attributes: (item.attributes || []).map((a) => `${a.name}: ${a.value}`).filter((s) => s !== ': '),
|
|
62
|
+
size_chart_image: imgUrl(item.size_chart),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Open the PDP and resolve with the get_pc payload the page fires. We listen at
|
|
67
|
+
// the response layer because a hand-rolled fetch gets error 90309999 (no sig).
|
|
68
|
+
async function ingestProduct(browser, { shop_id, item_id, timeoutMs = 60000 } = {}) {
|
|
69
|
+
if (!shop_id || !item_id) throw new Error('ingestProduct needs shop_id + item_id');
|
|
70
|
+
const page = await browser.newPage();
|
|
71
|
+
try {
|
|
72
|
+
const captured = new Promise((resolve) => {
|
|
73
|
+
page.on('response', async (res) => {
|
|
74
|
+
if (!res.url().includes('/api/v4/pdp/get_pc')) return;
|
|
75
|
+
try { const j = await res.json(); if (j?.data?.item) resolve(j.data.item); } catch { /* keep waiting */ }
|
|
76
|
+
});
|
|
77
|
+
setTimeout(() => resolve(null), timeoutMs);
|
|
78
|
+
});
|
|
79
|
+
await page.goto(`https://shopee.vn/product/${shop_id}/${item_id}`, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
|
|
80
|
+
const item = await captured;
|
|
81
|
+
if (!item) throw new Error('PDP get_pc not captured (login expired or product gone)');
|
|
82
|
+
const parsed = parsePdpItem(item);
|
|
83
|
+
parsed.product_url = `https://shopee.vn/product/${shop_id}/${item_id}`;
|
|
84
|
+
return parsed;
|
|
85
|
+
} finally {
|
|
86
|
+
await page.close().catch(() => {});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Parse one row of /api/v3/offer/product/list. Price fields come back as
|
|
91
|
+
// STRINGS in the *100000 scale (e.g. "10000100000" = 100,001 VND), same as PDP.
|
|
92
|
+
function parseOfferRow(row) {
|
|
93
|
+
const card = row.batch_item_for_item_card_full || row.item_card_displayed_asset || {};
|
|
94
|
+
const shopId = String(card.shopid || card.shop_id || '');
|
|
95
|
+
const itemId = String(row.item_id || card.itemid || card.item_id || '');
|
|
96
|
+
const rate = pctToNumber(row.default_commission_rate ?? row.max_commission_rate ?? row.commission_rate);
|
|
97
|
+
const sellerRate = pctToNumber(row.seller_commission_rate);
|
|
98
|
+
const price = (Number(card.price || card.price_min) || 0) / 100000;
|
|
99
|
+
const priceBefore = (Number(card.price_before_discount || card.price_min_before_discount) || 0) / 100000;
|
|
100
|
+
const cover = card.image ? imgUrl(card.image) : '';
|
|
101
|
+
const gallery = (card.images || []).map(imgUrl).filter(Boolean);
|
|
102
|
+
const name = (card.name || '').trim();
|
|
103
|
+
return {
|
|
104
|
+
shop_id: shopId,
|
|
105
|
+
item_id: itemId,
|
|
106
|
+
name,
|
|
107
|
+
category_group: categoryGroup(name),
|
|
108
|
+
product_url: row.product_link || (shopId && itemId ? `https://shopee.vn/product/${shopId}/${itemId}` : ''),
|
|
109
|
+
affiliate_link: row.long_link || row.offer_link || '',
|
|
110
|
+
commission_rate: rate,
|
|
111
|
+
commission_seller_rate: sellerRate,
|
|
112
|
+
commission_value: price && rate ? Math.round(price * rate / 100) : null,
|
|
113
|
+
price,
|
|
114
|
+
price_before_discount: priceBefore,
|
|
115
|
+
sold_count: card.historical_sold ?? card.sold ?? null,
|
|
116
|
+
rating: card.item_rating?.rating_star ? +card.item_rating.rating_star.toFixed(2) : null,
|
|
117
|
+
shop_name: card.shop_name || '',
|
|
118
|
+
cover_image: cover,
|
|
119
|
+
images: gallery.length ? [...new Set([cover, ...gallery])].filter(Boolean) : (cover ? [cover] : []),
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Affiliate offer-page category tabs → match_id (read from rc-tabs node keys,
|
|
124
|
+
// confirmed live). Tier-1 fashion focus first; the offer page has NO dedicated
|
|
125
|
+
// Shoes/Bags/Accessories tab → those come later via keyword search.
|
|
126
|
+
const AFFILIATE_CATEGORIES = {
|
|
127
|
+
women_clothes: { match_id: 100017, label: 'Quần áo nữ', group: 'fashion' },
|
|
128
|
+
beauty: { match_id: 100630, label: 'Làm đẹp', group: 'beauty' },
|
|
129
|
+
home_living: { match_id: 100636, label: 'Nhà cửa', group: 'other' },
|
|
130
|
+
grocery: { match_id: 100629, label: 'Tạp hoá', group: 'other' },
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
134
|
+
|
|
135
|
+
// True when the session got bounced to the anti-bot captcha (scene=crawler_item)
|
|
136
|
+
// or logged out — in either case the offer API returns 404/empty and we must
|
|
137
|
+
// STOP (not keep hammering) and ask the operator to re-verify via VNC.
|
|
138
|
+
function isBlockedState(pageUrl, res) {
|
|
139
|
+
if (/\/verify\/captcha|\/buyer\/login|is_from_login/.test(pageUrl || '')) return true;
|
|
140
|
+
if (res && (res.status === 403 || res.status === 404)) return true;
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Apply the Flow-1 hard filters (criteria) to parsed rows. Defaults are lenient;
|
|
145
|
+
// the controller passes the agreed thresholds (commission/price/sold/rating/imgs).
|
|
146
|
+
function applyFilters(rows, f = {}) {
|
|
147
|
+
return rows.filter((p) => {
|
|
148
|
+
if (f.category_groups?.length && !f.category_groups.includes(p.category_group)) return false;
|
|
149
|
+
if (f.min_commission != null && (p.commission_rate || 0) < f.min_commission) return false;
|
|
150
|
+
if (f.price_min != null && (p.price || 0) < f.price_min) return false;
|
|
151
|
+
if (f.price_max != null && (p.price || 0) > f.price_max) return false;
|
|
152
|
+
if (f.min_sold != null && (p.sold_count || 0) < f.min_sold) return false;
|
|
153
|
+
if (f.min_rating != null && p.rating != null && p.rating < f.min_rating) return false;
|
|
154
|
+
if (f.min_images != null && (p.images?.length || 0) < f.min_images) return false;
|
|
155
|
+
return true;
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// In-page fetch the affiliate offer list — anti-bot aware. Pulls one or more
|
|
160
|
+
// sort_types (2=commission, 3=sales), paginates gently with a human-like delay,
|
|
161
|
+
// merges + dedups by item_id, then applies hard filters. Returns
|
|
162
|
+
// { status: 'ok'|'needs_verify', products: [...], raw_count }
|
|
163
|
+
// Never throws on a blocked session — returns status:'needs_verify' so the
|
|
164
|
+
// worker reports it cleanly instead of failing + retrying into more captchas.
|
|
165
|
+
async function scrapeOffers(browser, {
|
|
166
|
+
category = null, // key in AFFILIATE_CATEGORIES (e.g. 'women_clothes')
|
|
167
|
+
match_id = null, // explicit category id override
|
|
168
|
+
sort_types = [2], // [2]=commission; pass [2,3] to merge commission+sales
|
|
169
|
+
pages = 2,
|
|
170
|
+
page_limit = 20,
|
|
171
|
+
delay_ms = 2500, // human-like pause between calls (anti-bot)
|
|
172
|
+
filters = {},
|
|
173
|
+
timeoutMs = 45000,
|
|
174
|
+
// legacy single-sort param (back-compat with the first build)
|
|
175
|
+
sort_type = null,
|
|
176
|
+
} = {}) {
|
|
177
|
+
if (sort_type != null) sort_types = [sort_type];
|
|
178
|
+
const cat = category && AFFILIATE_CATEGORIES[category] ? AFFILIATE_CATEGORIES[category] : null;
|
|
179
|
+
const mid = match_id || cat?.match_id || null;
|
|
180
|
+
// Default the category_groups filter to the category's own group (so a
|
|
181
|
+
// Women-Clothes scrape keeps only fashion rows even though the tab mixes in
|
|
182
|
+
// a few accessories), unless the caller overrides.
|
|
183
|
+
if (cat && !filters.category_groups) filters = { ...filters, category_groups: [cat.group] };
|
|
184
|
+
|
|
185
|
+
const pagesOpen = await browser.pages();
|
|
186
|
+
let page = pagesOpen.find((p) => p.url().includes('affiliate.shopee.vn'));
|
|
187
|
+
let opened = false;
|
|
188
|
+
if (!page) { page = await browser.newPage(); opened = true; }
|
|
189
|
+
try {
|
|
190
|
+
if (!page.url().includes('affiliate.shopee.vn/offer')) {
|
|
191
|
+
await page.goto('https://affiliate.shopee.vn/offer/product_offer', { waitUntil: 'networkidle2', timeout: timeoutMs }).catch(() => {});
|
|
192
|
+
await sleep(1500);
|
|
193
|
+
}
|
|
194
|
+
if (isBlockedState(page.url(), null)) {
|
|
195
|
+
return { status: 'needs_verify', products: [], raw_count: 0, reason: 'session at captcha/login — re-verify via VNC' };
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const byId = new Map();
|
|
199
|
+
let rawCount = 0;
|
|
200
|
+
for (const st of sort_types) {
|
|
201
|
+
for (let i = 0; i < pages; i++) {
|
|
202
|
+
const offset = i * page_limit;
|
|
203
|
+
const res = await page.evaluate(async (q) => {
|
|
204
|
+
const cat = q.mid ? `&list_type=3&match_type=2&match_id=${q.mid}` : '&list_type=0';
|
|
205
|
+
const url = `/api/v3/offer/product/list?sort_type=${q.st}&page_offset=${q.offset}&page_limit=${q.page_limit}&client_type=1${cat}`;
|
|
206
|
+
try {
|
|
207
|
+
const r = await fetch(url, { credentials: 'include' });
|
|
208
|
+
return { status: r.status, json: await r.json().catch(() => null) };
|
|
209
|
+
} catch (e) { return { status: 0, error: String(e.message) }; }
|
|
210
|
+
}, { st, offset, page_limit, mid });
|
|
211
|
+
|
|
212
|
+
if (isBlockedState(page.url(), res) || res.json?.code === 90309999) {
|
|
213
|
+
return { status: 'needs_verify', products: [...byId.values()], raw_count: rawCount, reason: `blocked mid-scrape (http ${res.status}, code ${res.json?.code})` };
|
|
214
|
+
}
|
|
215
|
+
if (res.status !== 200 || res.json?.code !== 0) {
|
|
216
|
+
// soft stop — treat as end-of-data for this sort, move on
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
const list = res.json?.data?.list || [];
|
|
220
|
+
rawCount += list.length;
|
|
221
|
+
for (const row of list.map(parseOfferRow)) {
|
|
222
|
+
if (row.item_id && !byId.has(row.item_id)) byId.set(row.item_id, row);
|
|
223
|
+
}
|
|
224
|
+
if (list.length < page_limit) break; // ran out
|
|
225
|
+
await sleep(delay_ms); // pace between page fetches
|
|
226
|
+
}
|
|
227
|
+
await sleep(delay_ms); // pace between sort passes
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const products = applyFilters([...byId.values()], filters);
|
|
231
|
+
return { status: 'ok', products, raw_count: rawCount };
|
|
232
|
+
} finally {
|
|
233
|
+
if (opened) await page.close().catch(() => {});
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Parse a Shopee product URL → { shop_id, item_id }. Accepts
|
|
238
|
+
// /product/<shop>/<item> and i.<shop>.<item> forms.
|
|
239
|
+
function parseProductUrl(url = '') {
|
|
240
|
+
let m = url.match(/\/product\/(\d+)\/(\d+)/);
|
|
241
|
+
if (m) return { shop_id: m[1], item_id: m[2] };
|
|
242
|
+
m = url.match(/i\.(\d+)\.(\d+)/);
|
|
243
|
+
if (m) return { shop_id: m[1], item_id: m[2] };
|
|
244
|
+
return null;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
module.exports = { ingestProduct, scrapeOffers, parseProductUrl, parseOfferRow, parsePdpItem, categoryGroup, pctToNumber, AFFILIATE_CATEGORIES, applyFilters };
|
package/package.json
CHANGED
|
@@ -187,7 +187,7 @@ async function run({ page, payload, log }) {
|
|
|
187
187
|
} = payload || {};
|
|
188
188
|
if (!video_url) throw new Error('No video_url provided');
|
|
189
189
|
|
|
190
|
-
log('info', '[fb-pw] selectors version=2026.06.
|
|
190
|
+
log('info', '[fb-pw] selectors version=2026.06.12a-thumb-miss-soft');
|
|
191
191
|
|
|
192
192
|
page.on('dialog', (d) => { d.accept().catch(() => {}); });
|
|
193
193
|
|
|
@@ -820,12 +820,18 @@ async function run({ page, payload, log }) {
|
|
|
820
820
|
// attach a custom thumbnail in the Page-wall composer.
|
|
821
821
|
if (!customThumbDone && thumbPath && /^https?:\/\/(www\.)?facebook\.com\/?($|\?|#)/.test(page.url())) {
|
|
822
822
|
const editBtn = await page.evaluate(() => {
|
|
823
|
-
// Look for the thumbnail-edit button inside
|
|
824
|
-
//
|
|
825
|
-
//
|
|
826
|
-
//
|
|
827
|
-
//
|
|
828
|
-
const
|
|
823
|
+
// Look for the thumbnail-edit button inside the REEL COMPOSER dialog
|
|
824
|
+
// only — scanning all dialogs risked matching a button on the
|
|
825
|
+
// page-wall photo composer (which would post a separate "Tin dạng
|
|
826
|
+
// ảnh" alongside the Reel). The Reel composer dialog contains the
|
|
827
|
+
// header text "Tạo thước phim" / "Create reel".
|
|
828
|
+
const allDlgs = document.querySelectorAll("[role='dialog']");
|
|
829
|
+
const dlgs = [];
|
|
830
|
+
for (const d of allDlgs) {
|
|
831
|
+
const txt = (d.innerText || '').slice(0, 400);
|
|
832
|
+
if (/Tạo thước phim|Create reel|Create a reel/i.test(txt)) dlgs.push(d);
|
|
833
|
+
}
|
|
834
|
+
if (dlgs.length === 0) return null;
|
|
829
835
|
for (const dlg of dlgs) {
|
|
830
836
|
const r = dlg.getBoundingClientRect();
|
|
831
837
|
if (r.width < 8 || r.height < 8) continue;
|
|
@@ -1001,9 +1007,15 @@ async function run({ page, payload, log }) {
|
|
|
1001
1007
|
}
|
|
1002
1008
|
|
|
1003
1009
|
// Thumbnail step handling (BS composer legacy).
|
|
1004
|
-
|
|
1010
|
+
// SAFETY GATE — only run on business.facebook.com. On the Page-wall flow
|
|
1011
|
+
// (facebook.com root), the page-wide setInputFiles selectors below would
|
|
1012
|
+
// match the regular "Tạo bài viết" photo composer's hidden file input
|
|
1013
|
+
// and accidentally publish a separate "Tin dạng ảnh" post alongside the
|
|
1014
|
+
// Reel (observed bug: 2 posts at same minute — one image-only, one Reel).
|
|
1015
|
+
const isBSComposer = /^https?:\/\/business\.facebook\.com/.test(page.url());
|
|
1016
|
+
const onThumbStep = isBSComposer ? await firstVisible(page.locator(
|
|
1005
1017
|
"[aria-label='Hình thu nhỏ tạo tự động 1'], [aria-label*='Auto-generated thumbnail'], div[role='button']:has-text('Tải hình ảnh lên'), div[role='button']:has-text('Upload image')"
|
|
1006
|
-
), 3);
|
|
1018
|
+
), 3) : null;
|
|
1007
1019
|
if (onThumbStep && !customThumbDone) {
|
|
1008
1020
|
if (thumbPath) {
|
|
1009
1021
|
log('info', '[fb-pw] thumbnail step — uploading custom thumb…');
|
|
@@ -1314,9 +1326,12 @@ async function run({ page, payload, log }) {
|
|
|
1314
1326
|
// output. (Observed on reel 1506614811005729: step 1 → click Tiếp
|
|
1315
1327
|
// → publish, the thumb-edit pill never appeared.)
|
|
1316
1328
|
if (thumbPath && !customThumbDone) {
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1329
|
+
// Custom thumb couldn't be applied (FB didn't render the "Chỉnh sửa
|
|
1330
|
+
// hình thu nhỏ" overlay). DON'T refuse to publish — the strict
|
|
1331
|
+
// failure shipped false "failed" results (the reel still published)
|
|
1332
|
+
// and double-posts on retry. Publish with FB's auto-thumbnail; the
|
|
1333
|
+
// custom artwork is a nice-to-have, not worth a hard failure.
|
|
1334
|
+
log('warn', `[fb-pw] custom thumb skipped — "Chỉnh sửa hình thu nhỏ" overlay never appeared (step ${step + 1}); publishing with FB auto-thumb`);
|
|
1320
1335
|
}
|
|
1321
1336
|
log('info', `[fb-pw] click publish "${pub.verb}" via "${pub.sel}" (step ${step + 1})`);
|
|
1322
1337
|
// Snapshot the captured-IDs list RIGHT BEFORE the publish click. The
|