amaprice 1.0.9 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/scraper.js +100 -0
package/package.json
CHANGED
package/src/scraper.js
CHANGED
|
@@ -7,9 +7,14 @@ const PRICE_SELECTORS = [
|
|
|
7
7
|
'#corePriceDisplay_desktop_feature_div .apex-pricetopay-value .a-offscreen',
|
|
8
8
|
'#corePrice_feature_div .a-price .a-offscreen',
|
|
9
9
|
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
|
|
10
|
+
'#corePrice_feature_div .a-price',
|
|
11
|
+
'#corePriceDisplay_desktop_feature_div .a-price',
|
|
10
12
|
'#buybox .a-price .a-offscreen',
|
|
13
|
+
'#buybox .a-price',
|
|
11
14
|
'#desktop_buybox .a-price .a-offscreen',
|
|
15
|
+
'#desktop_buybox .a-price',
|
|
12
16
|
'#newAccordionRow .a-price .a-offscreen',
|
|
17
|
+
'#newAccordionRow .a-price',
|
|
13
18
|
'#priceblock_ourprice',
|
|
14
19
|
'#priceblock_dealprice',
|
|
15
20
|
];
|
|
@@ -110,6 +115,19 @@ function matchesAny(value, patterns) {
|
|
|
110
115
|
return patterns.some((pattern) => pattern.test(value));
|
|
111
116
|
}
|
|
112
117
|
|
|
118
|
+
function normalizeJsonStringValue(raw) {
|
|
119
|
+
const cleaned = cleanPriceText(String(raw || ''));
|
|
120
|
+
if (!cleaned) return cleaned;
|
|
121
|
+
try {
|
|
122
|
+
const escaped = cleaned
|
|
123
|
+
.replace(/\\/g, '\\\\')
|
|
124
|
+
.replace(/"/g, '\\"');
|
|
125
|
+
return cleanPriceText(JSON.parse(`"${escaped}"`));
|
|
126
|
+
} catch {
|
|
127
|
+
return cleaned;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
113
131
|
function cleanPriceText(value) {
|
|
114
132
|
return String(value || '')
|
|
115
133
|
.replace(/ /g, ' ')
|
|
@@ -245,6 +263,77 @@ async function pickPriceFromTwisterData(page, fallbackCurrency) {
|
|
|
245
263
|
return null;
|
|
246
264
|
}
|
|
247
265
|
|
|
266
|
+
function scoreMarkupCandidate(context, index) {
|
|
267
|
+
let score = 0;
|
|
268
|
+
const ctx = String(context || '').toLowerCase();
|
|
269
|
+
if (/desktop_buybox_group_1|buybox|coreprice|pricetopay|apex/.test(ctx)) score += 80;
|
|
270
|
+
if (/used|buying options|basisprice|a-text-price|strike|wasprice/.test(ctx)) score -= 60;
|
|
271
|
+
score -= index;
|
|
272
|
+
return score;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function pickPriceFromInlineMarkup(html, fallbackCurrency) {
|
|
276
|
+
const source = String(html || '');
|
|
277
|
+
if (!source) return null;
|
|
278
|
+
|
|
279
|
+
const candidates = [];
|
|
280
|
+
|
|
281
|
+
const displayRegex = /"displayPrice"\s*:\s*"([^"]+)"/g;
|
|
282
|
+
let match = null;
|
|
283
|
+
let index = 0;
|
|
284
|
+
while ((match = displayRegex.exec(source)) !== null) {
|
|
285
|
+
const raw = normalizeJsonStringValue(match[1]);
|
|
286
|
+
const parsed = parsePrice(raw, fallbackCurrency);
|
|
287
|
+
if (!parsed || !Number.isFinite(parsed.numeric) || parsed.numeric <= 0) {
|
|
288
|
+
index += 1;
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const context = source.slice(Math.max(0, match.index - 140), Math.min(source.length, match.index + 24));
|
|
293
|
+
candidates.push({
|
|
294
|
+
text: raw,
|
|
295
|
+
parsed,
|
|
296
|
+
score: scoreMarkupCandidate(context, index),
|
|
297
|
+
});
|
|
298
|
+
index += 1;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const amountRegex = /"priceAmount"\s*:\s*([0-9]+(?:\.[0-9]+)?)\s*,\s*"currencySymbol"\s*:\s*"([^"]+)"/g;
|
|
302
|
+
let amountMatch = null;
|
|
303
|
+
let amountIndex = 0;
|
|
304
|
+
while ((amountMatch = amountRegex.exec(source)) !== null) {
|
|
305
|
+
const amount = Number(amountMatch[1]);
|
|
306
|
+
const symbol = normalizeJsonStringValue(amountMatch[2]);
|
|
307
|
+
if (!Number.isFinite(amount) || amount <= 0) {
|
|
308
|
+
amountIndex += 1;
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
const text = symbol ? `${symbol} ${amount}` : String(amount);
|
|
313
|
+
const parsed = parsePrice(text, fallbackCurrency);
|
|
314
|
+
if (!parsed || !Number.isFinite(parsed.numeric) || parsed.numeric <= 0) {
|
|
315
|
+
amountIndex += 1;
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const context = source.slice(Math.max(0, amountMatch.index - 140), Math.min(source.length, amountMatch.index + 24));
|
|
320
|
+
candidates.push({
|
|
321
|
+
text,
|
|
322
|
+
parsed,
|
|
323
|
+
score: scoreMarkupCandidate(context, amountIndex),
|
|
324
|
+
});
|
|
325
|
+
amountIndex += 1;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (candidates.length === 0) return null;
|
|
329
|
+
|
|
330
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
331
|
+
return {
|
|
332
|
+
text: candidates[0].text,
|
|
333
|
+
parsed: candidates[0].parsed,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
248
337
|
async function waitForScrapeSignals(page) {
|
|
249
338
|
await Promise.race([
|
|
250
339
|
page.waitForSelector('#productTitle', { timeout: 4500 }).catch(() => null),
|
|
@@ -337,6 +426,15 @@ async function scrapePageOnce(page, url, prefs) {
|
|
|
337
426
|
}
|
|
338
427
|
}
|
|
339
428
|
|
|
429
|
+
if (!parsed) {
|
|
430
|
+
const html = await page.content().catch(() => '');
|
|
431
|
+
const markupCandidate = pickPriceFromInlineMarkup(html, prefs.currency);
|
|
432
|
+
if (markupCandidate) {
|
|
433
|
+
priceRaw = markupCandidate.text;
|
|
434
|
+
parsed = markupCandidate.parsed;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
340
438
|
const asin = extractAsin(url);
|
|
341
439
|
const pageTitle = await page.title();
|
|
342
440
|
const finalUrl = page.url();
|
|
@@ -478,9 +576,11 @@ module.exports = { scrapePrice };
|
|
|
478
576
|
module.exports.__test = {
|
|
479
577
|
PRICE_SELECTORS,
|
|
480
578
|
cleanPriceText,
|
|
579
|
+
normalizeJsonStringValue,
|
|
481
580
|
scorePriceCandidate,
|
|
482
581
|
chooseBestPriceCandidate,
|
|
483
582
|
parseTwisterPriceData,
|
|
583
|
+
pickPriceFromInlineMarkup,
|
|
484
584
|
detectBlockedPage,
|
|
485
585
|
shouldRetryNoPrice,
|
|
486
586
|
};
|