channel-worker 2.5.13 → 2.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/command-poller.js +18 -10
- package/lib/shopee-scraper.js +102 -20
- package/package.json +1 -1
- package/scripts/upload_facebook.js +7 -4
package/lib/command-poller.js
CHANGED
|
@@ -180,18 +180,26 @@ class CommandPoller {
|
|
|
180
180
|
let conn;
|
|
181
181
|
try {
|
|
182
182
|
conn = await this._connectNstProfileByName(profileName);
|
|
183
|
-
const
|
|
184
|
-
|
|
183
|
+
const res = await scraper.scrapeOffers(conn.browser, {
|
|
184
|
+
category: payload.category || 'women_clothes', // Tier-1 default: quần áo nữ
|
|
185
|
+
match_id: payload.match_id || null,
|
|
186
|
+
sort_types: payload.sort_types || [2, 3], // commission + sales, merged
|
|
187
|
+
pages: payload.pages ?? 2,
|
|
185
188
|
page_limit: payload.page_limit ?? 20,
|
|
186
|
-
|
|
187
|
-
|
|
189
|
+
delay_ms: payload.delay_ms ?? 2500, // human-like pacing (anti-bot)
|
|
190
|
+
filters: payload.filters || {},
|
|
188
191
|
});
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
192
|
+
if (res.status === 'needs_verify') {
|
|
193
|
+
// Anti-bot bounce — DON'T fail/retry into more captchas. Report cleanly
|
|
194
|
+
// so the dashboard can prompt the operator to re-verify via VNC.
|
|
195
|
+
console.warn(`[shopee] needs_verify: ${res.reason}`);
|
|
196
|
+
if (res.products?.length) await this.api.upsertAffiliateProducts(res.products, { user_id: command.user_id });
|
|
197
|
+
await this.api.updateCommand(command._id, { status: 'done', result: { needs_verify: true, reason: res.reason, count: res.products?.length || 0 } });
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
console.log(`[shopee] scraped ${res.raw_count} raw → ${res.products.length} after filters from ${profileName}`);
|
|
201
|
+
await this.api.upsertAffiliateProducts(res.products, { user_id: command.user_id });
|
|
202
|
+
await this.api.updateCommand(command._id, { status: 'done', result: { count: res.products.length, raw: res.raw_count } });
|
|
195
203
|
} catch (err) {
|
|
196
204
|
console.error(`[shopee] scrape failed: ${err.message}`);
|
|
197
205
|
await this.api.updateCommand(command._id, { status: 'failed', error: String(err.message || err).slice(0, 500) });
|
package/lib/shopee-scraper.js
CHANGED
|
@@ -120,33 +120,115 @@ function parseOfferRow(row) {
|
|
|
120
120
|
};
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
-
//
|
|
124
|
-
//
|
|
125
|
-
|
|
123
|
+
// Affiliate offer-page category tabs → match_id (read from rc-tabs node keys,
|
|
124
|
+
// confirmed live). Tier-1 fashion focus first; the offer page has NO dedicated
|
|
125
|
+
// Shoes/Bags/Accessories tab → those come later via keyword search.
|
|
126
|
+
const AFFILIATE_CATEGORIES = {
|
|
127
|
+
women_clothes: { match_id: 100017, label: 'Quần áo nữ', group: 'fashion' },
|
|
128
|
+
beauty: { match_id: 100630, label: 'Làm đẹp', group: 'beauty' },
|
|
129
|
+
home_living: { match_id: 100636, label: 'Nhà cửa', group: 'other' },
|
|
130
|
+
grocery: { match_id: 100629, label: 'Tạp hoá', group: 'other' },
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
134
|
+
|
|
135
|
+
// True when the session got bounced to the anti-bot captcha (scene=crawler_item)
|
|
136
|
+
// or logged out — in either case the offer API returns 404/empty and we must
|
|
137
|
+
// STOP (not keep hammering) and ask the operator to re-verify via VNC.
|
|
138
|
+
function isBlockedState(pageUrl, res) {
|
|
139
|
+
if (/\/verify\/captcha|\/buyer\/login|is_from_login/.test(pageUrl || '')) return true;
|
|
140
|
+
if (res && (res.status === 403 || res.status === 404)) return true;
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Apply the Flow-1 hard filters (criteria) to parsed rows. Defaults are lenient;
|
|
145
|
+
// the controller passes the agreed thresholds (commission/price/sold/rating/imgs).
|
|
146
|
+
function applyFilters(rows, f = {}) {
|
|
147
|
+
return rows.filter((p) => {
|
|
148
|
+
if (f.category_groups?.length && !f.category_groups.includes(p.category_group)) return false;
|
|
149
|
+
if (f.min_commission != null && (p.commission_rate || 0) < f.min_commission) return false;
|
|
150
|
+
if (f.price_min != null && (p.price || 0) < f.price_min) return false;
|
|
151
|
+
if (f.price_max != null && (p.price || 0) > f.price_max) return false;
|
|
152
|
+
if (f.min_sold != null && (p.sold_count || 0) < f.min_sold) return false;
|
|
153
|
+
if (f.min_rating != null && p.rating != null && p.rating < f.min_rating) return false;
|
|
154
|
+
if (f.min_images != null && (p.images?.length || 0) < f.min_images) return false;
|
|
155
|
+
return true;
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// In-page fetch the affiliate offer list — anti-bot aware. Pulls one or more
|
|
160
|
+
// sort_types (2=commission, 3=sales), paginates gently with a human-like delay,
|
|
161
|
+
// merges + dedups by item_id, then applies hard filters. Returns
|
|
162
|
+
// { status: 'ok'|'needs_verify', products: [...], raw_count }
|
|
163
|
+
// Never throws on a blocked session — returns status:'needs_verify' so the
|
|
164
|
+
// worker reports it cleanly instead of failing + retrying into more captchas.
|
|
165
|
+
async function scrapeOffers(browser, {
|
|
166
|
+
category = null, // key in AFFILIATE_CATEGORIES (e.g. 'women_clothes')
|
|
167
|
+
match_id = null, // explicit category id override
|
|
168
|
+
sort_types = [2], // [2]=commission; pass [2,3] to merge commission+sales
|
|
169
|
+
pages = 2,
|
|
170
|
+
page_limit = 20,
|
|
171
|
+
delay_ms = 2500, // human-like pause between calls (anti-bot)
|
|
172
|
+
filters = {},
|
|
173
|
+
timeoutMs = 45000,
|
|
174
|
+
// legacy single-sort param (back-compat with the first build)
|
|
175
|
+
sort_type = null,
|
|
176
|
+
} = {}) {
|
|
177
|
+
if (sort_type != null) sort_types = [sort_type];
|
|
178
|
+
const cat = category && AFFILIATE_CATEGORIES[category] ? AFFILIATE_CATEGORIES[category] : null;
|
|
179
|
+
const mid = match_id || cat?.match_id || null;
|
|
180
|
+
// Default the category_groups filter to the category's own group (so a
|
|
181
|
+
// Women-Clothes scrape keeps only fashion rows even though the tab mixes in
|
|
182
|
+
// a few accessories), unless the caller overrides.
|
|
183
|
+
if (cat && !filters.category_groups) filters = { ...filters, category_groups: [cat.group] };
|
|
184
|
+
|
|
126
185
|
const pagesOpen = await browser.pages();
|
|
127
186
|
let page = pagesOpen.find((p) => p.url().includes('affiliate.shopee.vn'));
|
|
128
187
|
let opened = false;
|
|
129
188
|
if (!page) { page = await browser.newPage(); opened = true; }
|
|
130
189
|
try {
|
|
131
|
-
if (!page.url().includes('affiliate.shopee.vn')) {
|
|
132
|
-
await page.goto('https://affiliate.shopee.vn/offer/product_offer', { waitUntil: 'networkidle2', timeout: timeoutMs });
|
|
190
|
+
if (!page.url().includes('affiliate.shopee.vn/offer')) {
|
|
191
|
+
await page.goto('https://affiliate.shopee.vn/offer/product_offer', { waitUntil: 'networkidle2', timeout: timeoutMs }).catch(() => {});
|
|
192
|
+
await sleep(1500);
|
|
193
|
+
}
|
|
194
|
+
if (isBlockedState(page.url(), null)) {
|
|
195
|
+
return { status: 'needs_verify', products: [], raw_count: 0, reason: 'session at captcha/login — re-verify via VNC' };
|
|
133
196
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
const
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
197
|
+
|
|
198
|
+
const byId = new Map();
|
|
199
|
+
let rawCount = 0;
|
|
200
|
+
for (const st of sort_types) {
|
|
201
|
+
for (let i = 0; i < pages; i++) {
|
|
202
|
+
const offset = i * page_limit;
|
|
203
|
+
const res = await page.evaluate(async (q) => {
|
|
204
|
+
const cat = q.mid ? `&list_type=3&match_type=2&match_id=${q.mid}` : '&list_type=0';
|
|
205
|
+
const url = `/api/v3/offer/product/list?sort_type=${q.st}&page_offset=${q.offset}&page_limit=${q.page_limit}&client_type=1${cat}`;
|
|
206
|
+
try {
|
|
207
|
+
const r = await fetch(url, { credentials: 'include' });
|
|
208
|
+
return { status: r.status, json: await r.json().catch(() => null) };
|
|
209
|
+
} catch (e) { return { status: 0, error: String(e.message) }; }
|
|
210
|
+
}, { st, offset, page_limit, mid });
|
|
211
|
+
|
|
212
|
+
if (isBlockedState(page.url(), res) || res.json?.code === 90309999) {
|
|
213
|
+
return { status: 'needs_verify', products: [...byId.values()], raw_count: rawCount, reason: `blocked mid-scrape (http ${res.status}, code ${res.json?.code})` };
|
|
214
|
+
}
|
|
215
|
+
if (res.status !== 200 || res.json?.code !== 0) {
|
|
216
|
+
// soft stop — treat as end-of-data for this sort, move on
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
const list = res.json?.data?.list || [];
|
|
220
|
+
rawCount += list.length;
|
|
221
|
+
for (const row of list.map(parseOfferRow)) {
|
|
222
|
+
if (row.item_id && !byId.has(row.item_id)) byId.set(row.item_id, row);
|
|
223
|
+
}
|
|
224
|
+
if (list.length < page_limit) break; // ran out
|
|
225
|
+
await sleep(delay_ms); // pace between page fetches
|
|
144
226
|
}
|
|
145
|
-
|
|
146
|
-
all.push(...list.map(parseOfferRow).filter((p) => p.item_id));
|
|
147
|
-
if (list.length < page_limit) break; // ran out
|
|
227
|
+
await sleep(delay_ms); // pace between sort passes
|
|
148
228
|
}
|
|
149
|
-
|
|
229
|
+
|
|
230
|
+
const products = applyFilters([...byId.values()], filters);
|
|
231
|
+
return { status: 'ok', products, raw_count: rawCount };
|
|
150
232
|
} finally {
|
|
151
233
|
if (opened) await page.close().catch(() => {});
|
|
152
234
|
}
|
|
@@ -162,4 +244,4 @@ function parseProductUrl(url = '') {
|
|
|
162
244
|
return null;
|
|
163
245
|
}
|
|
164
246
|
|
|
165
|
-
module.exports = { ingestProduct, scrapeOffers, parseProductUrl, parseOfferRow, parsePdpItem, categoryGroup, pctToNumber };
|
|
247
|
+
module.exports = { ingestProduct, scrapeOffers, parseProductUrl, parseOfferRow, parsePdpItem, categoryGroup, pctToNumber, AFFILIATE_CATEGORIES, applyFilters };
|
package/package.json
CHANGED
|
@@ -187,7 +187,7 @@ async function run({ page, payload, log }) {
|
|
|
187
187
|
} = payload || {};
|
|
188
188
|
if (!video_url) throw new Error('No video_url provided');
|
|
189
189
|
|
|
190
|
-
log('info', '[fb-pw] selectors version=2026.06.
|
|
190
|
+
log('info', '[fb-pw] selectors version=2026.06.12a-thumb-miss-soft');
|
|
191
191
|
|
|
192
192
|
page.on('dialog', (d) => { d.accept().catch(() => {}); });
|
|
193
193
|
|
|
@@ -1326,9 +1326,12 @@ async function run({ page, payload, log }) {
|
|
|
1326
1326
|
// output. (Observed on reel 1506614811005729: step 1 → click Tiếp
|
|
1327
1327
|
// → publish, the thumb-edit pill never appeared.)
|
|
1328
1328
|
if (thumbPath && !customThumbDone) {
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1329
|
+
// Custom thumb couldn't be applied (FB didn't render the "Chỉnh sửa
|
|
1330
|
+
// hình thu nhỏ" overlay). DON'T refuse to publish — the strict
|
|
1331
|
+
// failure shipped false "failed" results (the reel still published)
|
|
1332
|
+
// and double-posts on retry. Publish with FB's auto-thumbnail; the
|
|
1333
|
+
// custom artwork is a nice-to-have, not worth a hard failure.
|
|
1334
|
+
log('warn', `[fb-pw] custom thumb skipped — "Chỉnh sửa hình thu nhỏ" overlay never appeared (step ${step + 1}); publishing with FB auto-thumb`);
|
|
1332
1335
|
}
|
|
1333
1336
|
log('info', `[fb-pw] click publish "${pub.verb}" via "${pub.sel}" (step ${step + 1})`);
|
|
1334
1337
|
// Snapshot the captured-IDs list RIGHT BEFORE the publish click. The
|