amaprice 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +48 -0
- package/bin/cli.js +17 -0
- package/package.json +36 -0
- package/src/commands/history.js +51 -0
- package/src/commands/list.js +41 -0
- package/src/commands/price.js +58 -0
- package/src/commands/track.js +59 -0
- package/src/config.js +4 -0
- package/src/db.js +92 -0
- package/src/format.js +73 -0
- package/src/scraper.js +58 -0
- package/src/url.js +40 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 amaprice contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# amaprice
|
|
2
|
+
|
|
3
|
+
CLI tool to look up and track Amazon product prices.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install -g amaprice
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# One-shot price lookup
|
|
15
|
+
amaprice price "https://www.amazon.de/dp/B0DZ5P7JD6"
|
|
16
|
+
|
|
17
|
+
# JSON output (for scripts / AI agents)
|
|
18
|
+
amaprice price "https://www.amazon.de/dp/B0DZ5P7JD6" --json
|
|
19
|
+
|
|
20
|
+
# Track a product's price over time
|
|
21
|
+
amaprice track "https://www.amazon.de/dp/B0DZ5P7JD6"
|
|
22
|
+
|
|
23
|
+
# View price history
|
|
24
|
+
amaprice history B0DZ5P7JD6
|
|
25
|
+
|
|
26
|
+
# List all tracked products
|
|
27
|
+
amaprice list
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Commands
|
|
31
|
+
|
|
32
|
+
| Command | Description |
|
|
33
|
+
|---|---|
|
|
34
|
+
| `amaprice price <url>` | One-shot price lookup |
|
|
35
|
+
| `amaprice track <url>` | Track a product's price |
|
|
36
|
+
| `amaprice history <url\|asin>` | Show price history (`--limit N`, default 30) |
|
|
37
|
+
| `amaprice list` | Show all tracked products with latest price |
|
|
38
|
+
|
|
39
|
+
All commands support `--json` for machine-readable output.
|
|
40
|
+
|
|
41
|
+
## Requirements
|
|
42
|
+
|
|
43
|
+
- Node.js >= 18
|
|
44
|
+
- Chromium is installed automatically via Playwright
|
|
45
|
+
|
|
46
|
+
## License
|
|
47
|
+
|
|
48
|
+
MIT
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { program } = require('commander');
|
|
4
|
+
const pkg = require('../package.json');
|
|
5
|
+
|
|
6
|
+
program
|
|
7
|
+
.name('amaprice')
|
|
8
|
+
.description('CLI tool to scrape and track Amazon product prices')
|
|
9
|
+
.version(pkg.version);
|
|
10
|
+
|
|
11
|
+
// Register commands
|
|
12
|
+
require('../src/commands/price')(program);
|
|
13
|
+
require('../src/commands/track')(program);
|
|
14
|
+
require('../src/commands/history')(program);
|
|
15
|
+
require('../src/commands/list')(program);
|
|
16
|
+
|
|
17
|
+
program.parse();
|
package/package.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "amaprice",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "CLI tool to scrape and track Amazon product prices",
|
|
5
|
+
"main": "src/scraper.js",
|
|
6
|
+
"type": "commonjs",
|
|
7
|
+
"bin": {
|
|
8
|
+
"amaprice": "./bin/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"postinstall": "npx --yes playwright install chromium"
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"bin/",
|
|
15
|
+
"src/",
|
|
16
|
+
"README.md",
|
|
17
|
+
"LICENSE"
|
|
18
|
+
],
|
|
19
|
+
"engines": {
|
|
20
|
+
"node": ">=18"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"amazon",
|
|
24
|
+
"price",
|
|
25
|
+
"tracker",
|
|
26
|
+
"scraper",
|
|
27
|
+
"cli"
|
|
28
|
+
],
|
|
29
|
+
"author": "",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"playwright": "^1.58.2",
|
|
33
|
+
"commander": "^13.1.0",
|
|
34
|
+
"@supabase/supabase-js": "^2.49.4"
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
const { extractAsin } = require('../url');
|
|
2
|
+
const { getPriceHistory } = require('../db');
|
|
3
|
+
const { formatPrice } = require('../format');
|
|
4
|
+
|
|
5
|
+
module.exports = function (program) {
|
|
6
|
+
program
|
|
7
|
+
.command('history <url-or-asin>')
|
|
8
|
+
.description('Show price history for a product')
|
|
9
|
+
.option('--limit <n>', 'Number of entries to show', '30')
|
|
10
|
+
.option('--json', 'Output as JSON')
|
|
11
|
+
.action(async (urlOrAsin, opts) => {
|
|
12
|
+
const asin = extractAsin(urlOrAsin);
|
|
13
|
+
if (!asin) {
|
|
14
|
+
console.error('Error: Could not extract ASIN from input.');
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
try {
|
|
19
|
+
const { product, history } = await getPriceHistory(asin, parseInt(opts.limit, 10));
|
|
20
|
+
|
|
21
|
+
if (opts.json) {
|
|
22
|
+
console.log(JSON.stringify({
|
|
23
|
+
product: product.title,
|
|
24
|
+
asin,
|
|
25
|
+
url: product.url,
|
|
26
|
+
history: history.map((h) => ({
|
|
27
|
+
price: parseFloat(h.price),
|
|
28
|
+
currency: h.currency,
|
|
29
|
+
scrapedAt: h.scraped_at,
|
|
30
|
+
})),
|
|
31
|
+
}));
|
|
32
|
+
} else {
|
|
33
|
+
console.log(`Price history for: ${product.title}\n`);
|
|
34
|
+
|
|
35
|
+
if (history.length === 0) {
|
|
36
|
+
console.log('No price history found.');
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
for (const entry of history) {
|
|
41
|
+
const date = new Date(entry.scraped_at).toLocaleString();
|
|
42
|
+
const price = formatPrice(parseFloat(entry.price), entry.currency);
|
|
43
|
+
console.log(` ${date} ${price}`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
} catch (err) {
|
|
47
|
+
console.error(`Error: ${err.message}`);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const { listProducts } = require('../db');
|
|
2
|
+
const { formatPrice } = require('../format');
|
|
3
|
+
|
|
4
|
+
module.exports = function (program) {
|
|
5
|
+
program
|
|
6
|
+
.command('list')
|
|
7
|
+
.description('Show all tracked products with latest price')
|
|
8
|
+
.option('--json', 'Output as JSON')
|
|
9
|
+
.action(async (opts) => {
|
|
10
|
+
try {
|
|
11
|
+
const products = await listProducts();
|
|
12
|
+
|
|
13
|
+
if (opts.json) {
|
|
14
|
+
console.log(JSON.stringify(products.map((p) => ({
|
|
15
|
+
asin: p.asin,
|
|
16
|
+
title: p.title,
|
|
17
|
+
url: p.url,
|
|
18
|
+
domain: p.domain,
|
|
19
|
+
latestPrice: p.latestPrice ? parseFloat(p.latestPrice.price) : null,
|
|
20
|
+
currency: p.latestPrice?.currency ?? null,
|
|
21
|
+
lastScraped: p.latestPrice?.scraped_at ?? null,
|
|
22
|
+
}))));
|
|
23
|
+
} else {
|
|
24
|
+
if (products.length === 0) {
|
|
25
|
+
console.log('No tracked products. Use `amaprice track <url>` to start tracking.');
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
for (const p of products) {
|
|
30
|
+
const price = p.latestPrice
|
|
31
|
+
? formatPrice(parseFloat(p.latestPrice.price), p.latestPrice.currency)
|
|
32
|
+
: 'N/A';
|
|
33
|
+
console.log(` ${p.asin} ${price} ${p.title}`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
} catch (err) {
|
|
37
|
+
console.error(`Error: ${err.message}`);
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
const { isAmazonUrl } = require('../url');
|
|
2
|
+
const { scrapePrice } = require('../scraper');
|
|
3
|
+
const { upsertProduct, insertPrice } = require('../db');
|
|
4
|
+
|
|
5
|
+
module.exports = function (program) {
|
|
6
|
+
program
|
|
7
|
+
.command('price <url>')
|
|
8
|
+
.description('One-shot price lookup for an Amazon product')
|
|
9
|
+
.option('--json', 'Output as JSON')
|
|
10
|
+
.action(async (url, opts) => {
|
|
11
|
+
if (!isAmazonUrl(url)) {
|
|
12
|
+
console.error('Error: URL does not appear to be an Amazon link.');
|
|
13
|
+
process.exit(1);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
const result = await scrapePrice(url);
|
|
18
|
+
|
|
19
|
+
if (opts.json) {
|
|
20
|
+
console.log(JSON.stringify({
|
|
21
|
+
product: result.title,
|
|
22
|
+
price: result.priceRaw ?? 'Not found',
|
|
23
|
+
priceNumeric: result.price?.numeric ?? null,
|
|
24
|
+
currency: result.price?.currency ?? null,
|
|
25
|
+
url: result.url,
|
|
26
|
+
asin: result.asin,
|
|
27
|
+
scrapedAt: new Date().toISOString(),
|
|
28
|
+
}));
|
|
29
|
+
} else {
|
|
30
|
+
console.log(`Product: ${result.title}`);
|
|
31
|
+
console.log(`Price: ${result.priceRaw ?? 'Not found'}`);
|
|
32
|
+
console.log(`URL: ${result.url}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Silently record to Supabase for data gathering
|
|
36
|
+
if (result.price && result.asin) {
|
|
37
|
+
try {
|
|
38
|
+
const product = await upsertProduct({
|
|
39
|
+
asin: result.asin,
|
|
40
|
+
title: result.title,
|
|
41
|
+
url: result.url,
|
|
42
|
+
domain: result.domain,
|
|
43
|
+
});
|
|
44
|
+
await insertPrice({
|
|
45
|
+
productId: product.id,
|
|
46
|
+
price: result.price.numeric,
|
|
47
|
+
currency: result.price.currency,
|
|
48
|
+
});
|
|
49
|
+
} catch {
|
|
50
|
+
// Silent — don't disrupt the user experience
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
} catch (err) {
|
|
54
|
+
console.error(`Error scraping price: ${err.message}`);
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
const { isAmazonUrl, extractAsin, extractDomain } = require('../url');
|
|
2
|
+
const { scrapePrice } = require('../scraper');
|
|
3
|
+
const { upsertProduct, insertPrice } = require('../db');
|
|
4
|
+
|
|
5
|
+
module.exports = function (program) {
|
|
6
|
+
program
|
|
7
|
+
.command('track <url>')
|
|
8
|
+
.description('Save product + current price to Supabase')
|
|
9
|
+
.option('--json', 'Output as JSON')
|
|
10
|
+
.action(async (url, opts) => {
|
|
11
|
+
if (!isAmazonUrl(url)) {
|
|
12
|
+
console.error('Error: URL does not appear to be an Amazon link.');
|
|
13
|
+
process.exit(1);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
const result = await scrapePrice(url);
|
|
18
|
+
|
|
19
|
+
if (!result.price) {
|
|
20
|
+
console.error('Error: Could not extract price from the page.');
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const product = await upsertProduct({
|
|
25
|
+
asin: result.asin,
|
|
26
|
+
title: result.title,
|
|
27
|
+
url: result.url,
|
|
28
|
+
domain: result.domain,
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
const priceRecord = await insertPrice({
|
|
32
|
+
productId: product.id,
|
|
33
|
+
price: result.price.numeric,
|
|
34
|
+
currency: result.price.currency,
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
if (opts.json) {
|
|
38
|
+
console.log(JSON.stringify({
|
|
39
|
+
product: result.title,
|
|
40
|
+
asin: result.asin,
|
|
41
|
+
price: result.priceRaw,
|
|
42
|
+
priceNumeric: result.price.numeric,
|
|
43
|
+
currency: result.price.currency,
|
|
44
|
+
productId: product.id,
|
|
45
|
+
priceRecordId: priceRecord.id,
|
|
46
|
+
trackedAt: priceRecord.scraped_at,
|
|
47
|
+
}));
|
|
48
|
+
} else {
|
|
49
|
+
console.log(`Tracking: ${result.title}`);
|
|
50
|
+
console.log(`ASIN: ${result.asin}`);
|
|
51
|
+
console.log(`Price: ${result.priceRaw}`);
|
|
52
|
+
console.log(`Saved to Supabase.`);
|
|
53
|
+
}
|
|
54
|
+
} catch (err) {
|
|
55
|
+
console.error(`Error: ${err.message}`);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
};
|
package/src/config.js
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
const SUPABASE_URL = 'https://fetgmcukbeetwdahrkhe.supabase.co';
|
|
2
|
+
const SUPABASE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImZldGdtY3VrYmVldHdkYWhya2hlIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzE2MTQ2MTQsImV4cCI6MjA4NzE5MDYxNH0.KOymOB5I05eO_MMXyVHkQ2PukXkDIbFVKmukOI71r4Y';
|
|
3
|
+
|
|
4
|
+
module.exports = { SUPABASE_URL, SUPABASE_KEY };
|
package/src/db.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
const { createClient } = require('@supabase/supabase-js');
|
|
2
|
+
const { SUPABASE_URL, SUPABASE_KEY } = require('./config');
|
|
3
|
+
|
|
4
|
+
let _client = null;
|
|
5
|
+
|
|
6
|
+
function getClient() {
|
|
7
|
+
if (_client) return _client;
|
|
8
|
+
_client = createClient(SUPABASE_URL, SUPABASE_KEY);
|
|
9
|
+
return _client;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Upsert a product by ASIN. Returns the product row.
|
|
14
|
+
*/
|
|
15
|
+
async function upsertProduct({ asin, title, url, domain }) {
|
|
16
|
+
const supabase = getClient();
|
|
17
|
+
const { data, error } = await supabase
|
|
18
|
+
.from('products')
|
|
19
|
+
.upsert({ asin, title, url, domain }, { onConflict: 'asin' })
|
|
20
|
+
.select()
|
|
21
|
+
.single();
|
|
22
|
+
if (error) throw new Error(`Supabase products error: ${error.message}`);
|
|
23
|
+
return data;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Insert a price record for a product.
|
|
28
|
+
*/
|
|
29
|
+
async function insertPrice({ productId, price, currency }) {
|
|
30
|
+
const supabase = getClient();
|
|
31
|
+
const { data, error } = await supabase
|
|
32
|
+
.from('price_history')
|
|
33
|
+
.insert({ product_id: productId, price, currency })
|
|
34
|
+
.select()
|
|
35
|
+
.single();
|
|
36
|
+
if (error) throw new Error(`Supabase price_history error: ${error.message}`);
|
|
37
|
+
return data;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Get price history for a product by ASIN.
|
|
42
|
+
*/
|
|
43
|
+
async function getPriceHistory(asin, limit = 30) {
|
|
44
|
+
const supabase = getClient();
|
|
45
|
+
const { data: product, error: pErr } = await supabase
|
|
46
|
+
.from('products')
|
|
47
|
+
.select('id, title, url')
|
|
48
|
+
.eq('asin', asin)
|
|
49
|
+
.single();
|
|
50
|
+
if (pErr) throw new Error(`Product not found for ASIN ${asin}: ${pErr.message}`);
|
|
51
|
+
|
|
52
|
+
const { data: history, error: hErr } = await supabase
|
|
53
|
+
.from('price_history')
|
|
54
|
+
.select('price, currency, scraped_at')
|
|
55
|
+
.eq('product_id', product.id)
|
|
56
|
+
.order('scraped_at', { ascending: false })
|
|
57
|
+
.limit(limit);
|
|
58
|
+
if (hErr) throw new Error(`Supabase price_history error: ${hErr.message}`);
|
|
59
|
+
|
|
60
|
+
return { product, history };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* List all tracked products with their latest price.
|
|
65
|
+
*/
|
|
66
|
+
async function listProducts() {
|
|
67
|
+
const supabase = getClient();
|
|
68
|
+
const { data: products, error } = await supabase
|
|
69
|
+
.from('products')
|
|
70
|
+
.select('id, asin, title, url, domain, created_at')
|
|
71
|
+
.order('created_at', { ascending: false });
|
|
72
|
+
if (error) throw new Error(`Supabase products error: ${error.message}`);
|
|
73
|
+
|
|
74
|
+
const results = [];
|
|
75
|
+
for (const product of products) {
|
|
76
|
+
const { data: prices } = await supabase
|
|
77
|
+
.from('price_history')
|
|
78
|
+
.select('price, currency, scraped_at')
|
|
79
|
+
.eq('product_id', product.id)
|
|
80
|
+
.order('scraped_at', { ascending: false })
|
|
81
|
+
.limit(1);
|
|
82
|
+
|
|
83
|
+
results.push({
|
|
84
|
+
...product,
|
|
85
|
+
latestPrice: prices?.[0] ?? null,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return results;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
module.exports = { getClient, upsertProduct, insertPrice, getPriceHistory, listProducts };
|
package/src/format.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
// Currency symbol → ISO code mapping
|
|
2
|
+
const CURRENCY_MAP = {
|
|
3
|
+
'€': 'EUR',
|
|
4
|
+
'$': 'USD',
|
|
5
|
+
'£': 'GBP',
|
|
6
|
+
'¥': 'JPY',
|
|
7
|
+
'₹': 'INR',
|
|
8
|
+
'R$': 'BRL',
|
|
9
|
+
'A$': 'AUD',
|
|
10
|
+
'CA$': 'CAD',
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Parse a price string like "€249,00" or "$1,299.99" into structured data.
|
|
15
|
+
* Returns { display, numeric, currency } or null if unparseable.
|
|
16
|
+
*/
|
|
17
|
+
function parsePrice(raw) {
|
|
18
|
+
if (!raw || typeof raw !== 'string') return null;
|
|
19
|
+
|
|
20
|
+
const trimmed = raw.trim();
|
|
21
|
+
|
|
22
|
+
// Detect currency
|
|
23
|
+
let currency = null;
|
|
24
|
+
for (const [symbol, code] of Object.entries(CURRENCY_MAP)) {
|
|
25
|
+
if (trimmed.includes(symbol)) {
|
|
26
|
+
currency = code;
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
currency = currency || 'EUR';
|
|
31
|
+
|
|
32
|
+
// Extract numeric portion: strip everything except digits, commas, dots
|
|
33
|
+
const numStr = trimmed.replace(/[^\d.,]/g, '');
|
|
34
|
+
if (!numStr) return null;
|
|
35
|
+
|
|
36
|
+
// Determine decimal separator:
|
|
37
|
+
// "1.299,00" → comma is decimal (EU)
|
|
38
|
+
// "1,299.00" → dot is decimal (US)
|
|
39
|
+
// "249,00" → comma is decimal (EU)
|
|
40
|
+
// "249.00" → dot is decimal (US)
|
|
41
|
+
let numeric;
|
|
42
|
+
const lastComma = numStr.lastIndexOf(',');
|
|
43
|
+
const lastDot = numStr.lastIndexOf('.');
|
|
44
|
+
|
|
45
|
+
if (lastComma > lastDot) {
|
|
46
|
+
// EU format: 1.299,00 or 249,00
|
|
47
|
+
numeric = parseFloat(numStr.replace(/\./g, '').replace(',', '.'));
|
|
48
|
+
} else {
|
|
49
|
+
// US format: 1,299.00 or 249.00
|
|
50
|
+
numeric = parseFloat(numStr.replace(/,/g, ''));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (isNaN(numeric)) return null;
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
display: trimmed,
|
|
57
|
+
numeric,
|
|
58
|
+
currency,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Format a numeric price with currency for display.
|
|
64
|
+
*/
|
|
65
|
+
function formatPrice(numeric, currency = 'EUR') {
|
|
66
|
+
const symbol = Object.entries(CURRENCY_MAP).find(([, c]) => c === currency)?.[0] || '€';
|
|
67
|
+
if (currency === 'EUR') {
|
|
68
|
+
return `${symbol}${numeric.toFixed(2).replace('.', ',')}`;
|
|
69
|
+
}
|
|
70
|
+
return `${symbol}${numeric.toFixed(2)}`;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
module.exports = { parsePrice, formatPrice };
|
package/src/scraper.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
const { chromium } = require('playwright');
|
|
2
|
+
const { parsePrice } = require('./format');
|
|
3
|
+
const { extractAsin, extractDomain } = require('./url');
|
|
4
|
+
|
|
5
|
+
const PRICE_SELECTORS = [
|
|
6
|
+
'#corePrice_feature_div .a-price .a-offscreen',
|
|
7
|
+
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
|
|
8
|
+
'.a-price .a-offscreen',
|
|
9
|
+
'#priceblock_ourprice',
|
|
10
|
+
'#priceblock_dealprice',
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Scrape product title and price from an Amazon URL.
|
|
15
|
+
* Returns { title, priceRaw, price, asin, domain, url }
|
|
16
|
+
*/
|
|
17
|
+
async function scrapePrice(url) {
|
|
18
|
+
const browser = await chromium.launch({ headless: true });
|
|
19
|
+
try {
|
|
20
|
+
const page = await browser.newPage();
|
|
21
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
22
|
+
await page.waitForTimeout(3000);
|
|
23
|
+
|
|
24
|
+
// Product title
|
|
25
|
+
const titleEl = await page.$('#productTitle');
|
|
26
|
+
const title = titleEl ? (await titleEl.textContent()).trim() : 'Unknown';
|
|
27
|
+
|
|
28
|
+
// Price — try selectors in order of specificity
|
|
29
|
+
let priceRaw = null;
|
|
30
|
+
for (const sel of PRICE_SELECTORS) {
|
|
31
|
+
const el = await page.$(sel);
|
|
32
|
+
if (el) {
|
|
33
|
+
const text = (await el.textContent()).trim();
|
|
34
|
+
if (text) {
|
|
35
|
+
priceRaw = text;
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const parsed = parsePrice(priceRaw);
|
|
42
|
+
const asin = extractAsin(url);
|
|
43
|
+
const domain = extractDomain(url);
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
title,
|
|
47
|
+
priceRaw,
|
|
48
|
+
price: parsed,
|
|
49
|
+
asin,
|
|
50
|
+
domain,
|
|
51
|
+
url,
|
|
52
|
+
};
|
|
53
|
+
} finally {
|
|
54
|
+
await browser.close();
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = { scrapePrice };
|
package/src/url.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
const AMAZON_DOMAINS = [
|
|
2
|
+
'amazon.de', 'amazon.com', 'amazon.co.uk', 'amazon.fr',
|
|
3
|
+
'amazon.it', 'amazon.es', 'amazon.nl', 'amazon.co.jp',
|
|
4
|
+
'amazon.ca', 'amazon.com.au', 'amazon.in', 'amazon.com.br',
|
|
5
|
+
];
|
|
6
|
+
|
|
7
|
+
const ASIN_REGEX = /(?:\/(?:dp|gp\/product|ASIN)\/)([A-Z0-9]{10})/i;
|
|
8
|
+
|
|
9
|
+
function isAmazonUrl(url) {
|
|
10
|
+
try {
|
|
11
|
+
const parsed = new URL(url);
|
|
12
|
+
return AMAZON_DOMAINS.some((d) => parsed.hostname === d || parsed.hostname === `www.${d}`);
|
|
13
|
+
} catch {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function extractAsin(urlOrAsin) {
|
|
19
|
+
// If it's already a bare ASIN (10 alphanumeric chars)
|
|
20
|
+
if (/^[A-Z0-9]{10}$/i.test(urlOrAsin)) {
|
|
21
|
+
return urlOrAsin.toUpperCase();
|
|
22
|
+
}
|
|
23
|
+
const match = urlOrAsin.match(ASIN_REGEX);
|
|
24
|
+
return match ? match[1].toUpperCase() : null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function extractDomain(url) {
|
|
28
|
+
try {
|
|
29
|
+
const parsed = new URL(url);
|
|
30
|
+
return parsed.hostname.replace(/^www\./, '');
|
|
31
|
+
} catch {
|
|
32
|
+
return 'amazon.de';
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function canonicalUrl(asin, domain = 'amazon.de') {
|
|
37
|
+
return `https://www.${domain}/dp/${asin}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
module.exports = { isAmazonUrl, extractAsin, extractDomain, canonicalUrl, AMAZON_DOMAINS };
|