amaprice 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/scraper.js +100 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "amaprice",
3
- "version": "1.0.9",
3
+ "version": "1.0.10",
4
4
  "description": "CLI tool to scrape and track Amazon product prices",
5
5
  "main": "src/scraper.js",
6
6
  "type": "commonjs",
package/src/scraper.js CHANGED
@@ -7,9 +7,14 @@ const PRICE_SELECTORS = [
7
7
  '#corePriceDisplay_desktop_feature_div .apex-pricetopay-value .a-offscreen',
8
8
  '#corePrice_feature_div .a-price .a-offscreen',
9
9
  '#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
10
+ '#corePrice_feature_div .a-price',
11
+ '#corePriceDisplay_desktop_feature_div .a-price',
10
12
  '#buybox .a-price .a-offscreen',
13
+ '#buybox .a-price',
11
14
  '#desktop_buybox .a-price .a-offscreen',
15
+ '#desktop_buybox .a-price',
12
16
  '#newAccordionRow .a-price .a-offscreen',
17
+ '#newAccordionRow .a-price',
13
18
  '#priceblock_ourprice',
14
19
  '#priceblock_dealprice',
15
20
  ];
@@ -110,6 +115,19 @@ function matchesAny(value, patterns) {
110
115
  return patterns.some((pattern) => pattern.test(value));
111
116
  }
112
117
 
118
+ function normalizeJsonStringValue(raw) {
119
+ const cleaned = cleanPriceText(String(raw || ''));
120
+ if (!cleaned) return cleaned;
121
+ try {
122
+ const escaped = cleaned
123
+ .replace(/\\/g, '\\\\')
124
+ .replace(/"/g, '\\"');
125
+ return cleanPriceText(JSON.parse(`"${escaped}"`));
126
+ } catch {
127
+ return cleaned;
128
+ }
129
+ }
130
+
113
131
  function cleanPriceText(value) {
114
132
  return String(value || '')
115
133
  .replace(/ /g, ' ')
@@ -245,6 +263,77 @@ async function pickPriceFromTwisterData(page, fallbackCurrency) {
245
263
  return null;
246
264
  }
247
265
 
266
+ function scoreMarkupCandidate(context, index) {
267
+ let score = 0;
268
+ const ctx = String(context || '').toLowerCase();
269
+ if (/desktop_buybox_group_1|buybox|coreprice|pricetopay|apex/.test(ctx)) score += 80;
270
+ if (/used|buying options|basisprice|a-text-price|strike|wasprice/.test(ctx)) score -= 60;
271
+ score -= index;
272
+ return score;
273
+ }
274
+
275
+ function pickPriceFromInlineMarkup(html, fallbackCurrency) {
276
+ const source = String(html || '');
277
+ if (!source) return null;
278
+
279
+ const candidates = [];
280
+
281
+ const displayRegex = /"displayPrice"\s*:\s*"([^"]+)"/g;
282
+ let match = null;
283
+ let index = 0;
284
+ while ((match = displayRegex.exec(source)) !== null) {
285
+ const raw = normalizeJsonStringValue(match[1]);
286
+ const parsed = parsePrice(raw, fallbackCurrency);
287
+ if (!parsed || !Number.isFinite(parsed.numeric) || parsed.numeric <= 0) {
288
+ index += 1;
289
+ continue;
290
+ }
291
+
292
+ const context = source.slice(Math.max(0, match.index - 140), Math.min(source.length, match.index + 24));
293
+ candidates.push({
294
+ text: raw,
295
+ parsed,
296
+ score: scoreMarkupCandidate(context, index),
297
+ });
298
+ index += 1;
299
+ }
300
+
301
+ const amountRegex = /"priceAmount"\s*:\s*([0-9]+(?:\.[0-9]+)?)\s*,\s*"currencySymbol"\s*:\s*"([^"]+)"/g;
302
+ let amountMatch = null;
303
+ let amountIndex = 0;
304
+ while ((amountMatch = amountRegex.exec(source)) !== null) {
305
+ const amount = Number(amountMatch[1]);
306
+ const symbol = normalizeJsonStringValue(amountMatch[2]);
307
+ if (!Number.isFinite(amount) || amount <= 0) {
308
+ amountIndex += 1;
309
+ continue;
310
+ }
311
+
312
+ const text = symbol ? `${symbol} ${amount}` : String(amount);
313
+ const parsed = parsePrice(text, fallbackCurrency);
314
+ if (!parsed || !Number.isFinite(parsed.numeric) || parsed.numeric <= 0) {
315
+ amountIndex += 1;
316
+ continue;
317
+ }
318
+
319
+ const context = source.slice(Math.max(0, amountMatch.index - 140), Math.min(source.length, amountMatch.index + 24));
320
+ candidates.push({
321
+ text,
322
+ parsed,
323
+ score: scoreMarkupCandidate(context, amountIndex),
324
+ });
325
+ amountIndex += 1;
326
+ }
327
+
328
+ if (candidates.length === 0) return null;
329
+
330
+ candidates.sort((a, b) => b.score - a.score);
331
+ return {
332
+ text: candidates[0].text,
333
+ parsed: candidates[0].parsed,
334
+ };
335
+ }
336
+
248
337
  async function waitForScrapeSignals(page) {
249
338
  await Promise.race([
250
339
  page.waitForSelector('#productTitle', { timeout: 4500 }).catch(() => null),
@@ -337,6 +426,15 @@ async function scrapePageOnce(page, url, prefs) {
337
426
  }
338
427
  }
339
428
 
429
+ if (!parsed) {
430
+ const html = await page.content().catch(() => '');
431
+ const markupCandidate = pickPriceFromInlineMarkup(html, prefs.currency);
432
+ if (markupCandidate) {
433
+ priceRaw = markupCandidate.text;
434
+ parsed = markupCandidate.parsed;
435
+ }
436
+ }
437
+
340
438
  const asin = extractAsin(url);
341
439
  const pageTitle = await page.title();
342
440
  const finalUrl = page.url();
@@ -478,9 +576,11 @@ module.exports = { scrapePrice };
478
576
  module.exports.__test = {
479
577
  PRICE_SELECTORS,
480
578
  cleanPriceText,
579
+ normalizeJsonStringValue,
481
580
  scorePriceCandidate,
482
581
  chooseBestPriceCandidate,
483
582
  parseTwisterPriceData,
583
+ pickPriceFromInlineMarkup,
484
584
  detectBlockedPage,
485
585
  shouldRetryNoPrice,
486
586
  };