amaprice 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -10
- package/package.json +6 -1
- package/src/db.js +61 -0
- package/src/orchestrator/runner.js +122 -0
- package/src/url.js +262 -21
package/README.md
CHANGED
|
@@ -29,13 +29,13 @@ npx amaprice price "https://www.amazon.de/dp/B0DZ5P7JD6"
|
|
|
29
29
|
## Quickstart
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
|
-
# one-shot lookup
|
|
32
|
+
# one-shot lookup (no subscription)
|
|
33
33
|
amaprice price "https://www.amazon.de/dp/B0DZ5P7JD6"
|
|
34
34
|
|
|
35
|
-
# start tracking
|
|
35
|
+
# start tracking + subscribe current user + auto-start background collector
|
|
36
36
|
amaprice track B0DZ5P7JD6 --tier daily
|
|
37
37
|
|
|
38
|
-
# subscribe
|
|
38
|
+
# or subscribe directly to an existing shared catalog product
|
|
39
39
|
amaprice subscribe B0DZ5P7JD6
|
|
40
40
|
|
|
41
41
|
# show history
|
|
@@ -43,6 +43,16 @@ amaprice history B0DZ5P7JD6 --limit 30
|
|
|
43
43
|
|
|
44
44
|
# list tracked products
|
|
45
45
|
amaprice list
|
|
46
|
+
|
|
47
|
+
# list all subscriptions for current user (including paused)
|
|
48
|
+
amaprice subscriptions --all
|
|
49
|
+
|
|
50
|
+
# stop one product subscription for current user
|
|
51
|
+
amaprice unsubscribe B0DZ5P7JD6
|
|
52
|
+
|
|
53
|
+
# stop/start background collector service
|
|
54
|
+
amaprice background off
|
|
55
|
+
amaprice background on
|
|
46
56
|
```
|
|
47
57
|
|
|
48
58
|
## Input Modes
|
|
@@ -59,19 +69,47 @@ Short links from Amazon apps (for example `amzn.eu`, `amzn.to`, `a.co`) are acce
|
|
|
59
69
|
| Command | Description |
|
|
60
70
|
|---|---|
|
|
61
71
|
| `amaprice [url\|asin]` | Shortcut for `amaprice price [url\|asin]` |
|
|
62
|
-
| `amaprice price [url\|asin]` | One-shot lookup and silent history insert |
|
|
63
|
-
| `amaprice track [url\|asin]` | Track product + current
|
|
72
|
+
| `amaprice price [url\|asin]` | One-shot lookup (no subscription) and silent history insert |
|
|
73
|
+
| `amaprice track [url\|asin]` | Track product + subscribe current user + auto-start background (`--tier`, `--manual-tier`, `--auto-tier`, `--inactive`) |
|
|
64
74
|
| `amaprice subscribe [url\|asin]` | Subscribe current user to shared product catalog entry |
|
|
65
75
|
| `amaprice unsubscribe <url\|asin>` | Disable current user subscription |
|
|
66
76
|
| `amaprice subscriptions` | List user subscriptions with latest known prices |
|
|
67
77
|
| `amaprice history <url\|asin>` | Show history (`--limit N`) |
|
|
68
|
-
| `amaprice list` | List
|
|
78
|
+
| `amaprice list` | List current user subscriptions + latest price (default view) |
|
|
79
|
+
| `amaprice list --global` | List global shared catalog tracked products |
|
|
69
80
|
| `amaprice sync --limit <n>` | Run background sync for due products |
|
|
70
81
|
| `amaprice background <on\|off\|status>` | Manage true background collector service |
|
|
71
82
|
| `amaprice tier <url\|asin> <hourly\|daily\|weekly>` | Set tier/status (`--auto`, `--manual`, `--activate`, `--deactivate`) |
|
|
72
83
|
|
|
73
84
|
All commands support `--json`.
|
|
74
85
|
|
|
86
|
+
## Most Common User Flows
|
|
87
|
+
|
|
88
|
+
### One-time Price Check (No Subscription)
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
amaprice price B0DZ5P7JD6
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
This returns the current price immediately and does not create a user subscription.
|
|
95
|
+
|
|
96
|
+
### Subscribe + Run in Background
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
amaprice track B0DZ5P7JD6
|
|
100
|
+
amaprice background status --json
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
`track` (and `subscribe`) auto-starts the background collector on macOS (`launchd`).
|
|
104
|
+
You can close your terminal after this; the service keeps running.
|
|
105
|
+
|
|
106
|
+
### Stop Product + Stop Background Service
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
amaprice unsubscribe B0DZ5P7JD6
|
|
110
|
+
amaprice background off
|
|
111
|
+
```
|
|
112
|
+
|
|
75
113
|
## Background Service (Auto)
|
|
76
114
|
|
|
77
115
|
`track` and `subscribe` automatically ensure a true background collector service is running.
|
|
@@ -145,7 +183,13 @@ Run this SQL in Supabase SQL Editor:
|
|
|
145
183
|
|
|
146
184
|
`supabase/migrations/20260220_add_price_history_currency.sql`
|
|
147
185
|
|
|
148
|
-
|
|
186
|
+
`supabase/migrations/20260222_add_hybrid_orchestration.sql`
|
|
187
|
+
|
|
188
|
+
`supabase/migrations/20260223_enforce_collector_first_claiming.sql`
|
|
189
|
+
|
|
190
|
+
`supabase/migrations/20260225_add_web_track_requests.sql`
|
|
191
|
+
|
|
192
|
+
These migrations add tier fields, indexes, telemetry, worker health rollups, `price_history.currency`, collector orchestration tables/functions, strict collector-first claim policy, and website intake request tracking.
|
|
149
193
|
|
|
150
194
|
Note: these files are additive migrations and expect existing `products` + `price_history` tables.
|
|
151
195
|
|
|
@@ -218,6 +262,18 @@ Environment variables used by the npm package:
|
|
|
218
262
|
|
|
219
263
|
For production background workers, prefer the Supabase **service role key**.
|
|
220
264
|
|
|
265
|
+
Website API runtime (Next.js server routes):
|
|
266
|
+
|
|
267
|
+
| Variable | Default | Used by | Notes |
|
|
268
|
+
|---|---|---|---|
|
|
269
|
+
| `SUPABASE_URL` | none | website API routes | Server-side Supabase project URL |
|
|
270
|
+
| `SUPABASE_SERVICE_ROLE_KEY` | none | website API routes | Required for secure writes (`/api/v1/track-requests`) |
|
|
271
|
+
| `WEB_TRACK_RATE_LIMIT_WINDOW_SECONDS` | `300` | website API routes | Sliding rate-limit window |
|
|
272
|
+
| `WEB_TRACK_RATE_LIMIT_MAX_REQUESTS_PER_IP` | `8` | website API routes | Rate-limit threshold per IP hash |
|
|
273
|
+
| `WEB_TRACK_RATE_LIMIT_MAX_REQUESTS_PER_VISITOR` | `12` | website API routes | Rate-limit threshold per visitor cookie |
|
|
274
|
+
| `WEB_TRACK_IP_HASH_PEPPER` | `amaprice-default-pepper` | website API routes | Pepper for deterministic IP hashing |
|
|
275
|
+
| `TURNSTILE_SECRET_KEY` | none | website API routes | Optional CAPTCHA verification for intake endpoint |
|
|
276
|
+
|
|
221
277
|
## Railway Worker Deployment
|
|
222
278
|
|
|
223
279
|
This repo includes:
|
|
@@ -250,19 +306,28 @@ npm run worker:once
|
|
|
250
306
|
|
|
251
307
|
## Vercel Website Deployment (`amaprice.sh`)
|
|
252
308
|
|
|
253
|
-
|
|
309
|
+
The website is a server-capable Next.js app in `website/` with API routes.
|
|
254
310
|
|
|
255
311
|
Steps:
|
|
256
312
|
1. Import the repo in Vercel.
|
|
257
313
|
2. Leave the project at repo root (deployment is controlled by root `vercel.json`).
|
|
258
314
|
3. Set website env vars:
|
|
259
|
-
- `
|
|
260
|
-
- `
|
|
315
|
+
- `SUPABASE_URL`
|
|
316
|
+
- `SUPABASE_SERVICE_ROLE_KEY`
|
|
261
317
|
- `NEXT_PUBLIC_GOOGLE_SITE_VERIFICATION` (for Google Search Console verification meta tag)
|
|
262
318
|
4. Deploy.
|
|
263
319
|
5. Add domain `amaprice.sh` in Vercel Domains and assign to this project.
|
|
264
320
|
6. Set `www.amaprice.sh` redirect to `amaprice.sh`.
|
|
265
321
|
|
|
322
|
+
### Website API Endpoints
|
|
323
|
+
|
|
324
|
+
- `POST /api/v1/track-requests` - submit Amazon URL/ASIN intake request
|
|
325
|
+
- `GET /api/v1/track-requests/:id` - poll request state (`queued`, `collecting`, `live`, ...)
|
|
326
|
+
- `GET /api/v1/products` - live tracked products for website explorer
|
|
327
|
+
- `GET /api/v1/products/:productId/history` - historical points for one product
|
|
328
|
+
- `GET /api/v1/prices/recent` - recent global price points
|
|
329
|
+
- `GET /api/v1/worker-health` - worker health rollup row
|
|
330
|
+
|
|
266
331
|
Local website development:
|
|
267
332
|
|
|
268
333
|
```bash
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "amaprice",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.17",
|
|
4
4
|
"description": "CLI tool to scrape and track Amazon product prices",
|
|
5
5
|
"main": "src/scraper.js",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -39,5 +39,10 @@
|
|
|
39
39
|
"playwright": "^1.58.2",
|
|
40
40
|
"commander": "^13.1.0",
|
|
41
41
|
"@supabase/supabase-js": "^2.49.4"
|
|
42
|
+
},
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"next": "16.1.6",
|
|
45
|
+
"react": "19.2.4",
|
|
46
|
+
"react-dom": "19.2.4"
|
|
42
47
|
}
|
|
43
48
|
}
|
package/src/db.js
CHANGED
|
@@ -54,6 +54,20 @@ function isMissingHybridSchema(error) {
|
|
|
54
54
|
);
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
function isMissingWebTrackSchema(error) {
|
|
58
|
+
const code = String(error?.code || '');
|
|
59
|
+
const message = String(error?.message || '');
|
|
60
|
+
return (
|
|
61
|
+
code === '42P01'
|
|
62
|
+
|| code === 'PGRST205'
|
|
63
|
+
|| code === '42703'
|
|
64
|
+
|| code === 'PGRST204'
|
|
65
|
+
|| /relation .* does not exist/i.test(message)
|
|
66
|
+
|| /column .* does not exist/i.test(message)
|
|
67
|
+
|| /web_track_requests/i.test(message)
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
|
|
57
71
|
/**
|
|
58
72
|
* Upsert a product by ASIN. Returns the product row.
|
|
59
73
|
*/
|
|
@@ -535,6 +549,51 @@ async function heartbeatCollector({
|
|
|
535
549
|
return data;
|
|
536
550
|
}
|
|
537
551
|
|
|
552
|
+
async function listPendingWebTrackRequests(limit = 20) {
|
|
553
|
+
const supabase = getClient();
|
|
554
|
+
const safeLimit = Math.max(1, Number(limit) || 20);
|
|
555
|
+
|
|
556
|
+
const { data, error } = await supabase
|
|
557
|
+
.from('web_track_requests')
|
|
558
|
+
.select('id, raw_input, status, status_reason, request_meta, queued_at, created_at')
|
|
559
|
+
.eq('status', 'queued')
|
|
560
|
+
.is('product_id', null)
|
|
561
|
+
.order('created_at', { ascending: true })
|
|
562
|
+
.limit(safeLimit);
|
|
563
|
+
|
|
564
|
+
if (error) {
|
|
565
|
+
if (isMissingWebTrackSchema(error)) {
|
|
566
|
+
return [];
|
|
567
|
+
}
|
|
568
|
+
throw new Error(`Supabase web_track_requests error: ${error.message}`);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return data || [];
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
async function updateWebTrackRequestById(requestId, patch) {
|
|
575
|
+
const supabase = getClient();
|
|
576
|
+
const payload = cleanPayload({
|
|
577
|
+
...patch,
|
|
578
|
+
updated_at: new Date().toISOString(),
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
const { data, error } = await supabase
|
|
582
|
+
.from('web_track_requests')
|
|
583
|
+
.update(payload)
|
|
584
|
+
.eq('id', requestId)
|
|
585
|
+
.select()
|
|
586
|
+
.single();
|
|
587
|
+
|
|
588
|
+
if (error) {
|
|
589
|
+
if (isMissingWebTrackSchema(error)) {
|
|
590
|
+
return null;
|
|
591
|
+
}
|
|
592
|
+
throw new Error(`Supabase web_track_requests error: ${error.message}`);
|
|
593
|
+
}
|
|
594
|
+
return data;
|
|
595
|
+
}
|
|
596
|
+
|
|
538
597
|
async function enqueueDueCollectionJobs(limit = 20) {
|
|
539
598
|
const supabase = getClient();
|
|
540
599
|
const safeLimit = Math.max(1, Number(limit) || 20);
|
|
@@ -753,6 +812,8 @@ module.exports = {
|
|
|
753
812
|
upsertCollector,
|
|
754
813
|
getCollectorById,
|
|
755
814
|
heartbeatCollector,
|
|
815
|
+
listPendingWebTrackRequests,
|
|
816
|
+
updateWebTrackRequestById,
|
|
756
817
|
enqueueDueCollectionJobs,
|
|
757
818
|
claimCollectionJobs,
|
|
758
819
|
completeCollectionJob,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
const { runCollectionPipeline } = require('../extractors/pipeline');
|
|
2
2
|
const {
|
|
3
|
+
getProductByAsin,
|
|
3
4
|
enqueueDueCollectionJobs,
|
|
4
5
|
claimCollectionJobs,
|
|
5
6
|
completeCollectionJob,
|
|
@@ -7,10 +8,14 @@ const {
|
|
|
7
8
|
insertCollectionAttempt,
|
|
8
9
|
insertPrice,
|
|
9
10
|
insertScrapeAttempt,
|
|
11
|
+
listPendingWebTrackRequests,
|
|
10
12
|
upsertProductLatestPrice,
|
|
13
|
+
upsertProduct,
|
|
14
|
+
updateWebTrackRequestById,
|
|
11
15
|
updateProductById,
|
|
12
16
|
getRecentPrices,
|
|
13
17
|
} = require('../db');
|
|
18
|
+
const { normalizeAmazonInput, isAmazonUrl, extractDomain } = require('../url');
|
|
14
19
|
const {
|
|
15
20
|
normalizeTier,
|
|
16
21
|
computeNextScrapeAt,
|
|
@@ -78,6 +83,119 @@ function nextJobStateAfterFailure(job) {
|
|
|
78
83
|
return attempts >= maxAttempts ? 'dead' : 'queued';
|
|
79
84
|
}
|
|
80
85
|
|
|
86
|
+
function nowIso() {
|
|
87
|
+
return new Date().toISOString();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function cleanJsonObject(value) {
|
|
91
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
92
|
+
return {};
|
|
93
|
+
}
|
|
94
|
+
return { ...value };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async function resolvePendingWebTrackRequests({ limit = 20 } = {}) {
|
|
98
|
+
const safeLimit = Math.max(1, Number(limit) || 20);
|
|
99
|
+
const pending = await listPendingWebTrackRequests(safeLimit);
|
|
100
|
+
if (!pending || pending.length === 0) {
|
|
101
|
+
return { processed: 0, resolved: 0, rejected: 0 };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
let resolved = 0;
|
|
105
|
+
let rejected = 0;
|
|
106
|
+
|
|
107
|
+
for (const request of pending) {
|
|
108
|
+
const rawInput = String(request.raw_input || '').trim();
|
|
109
|
+
if (!rawInput) {
|
|
110
|
+
rejected += 1;
|
|
111
|
+
await updateWebTrackRequestById(request.id, {
|
|
112
|
+
status: 'rejected',
|
|
113
|
+
status_reason: 'invalid_amazon_input',
|
|
114
|
+
last_error: 'Input is empty and could not be resolved.',
|
|
115
|
+
completed_at: nowIso(),
|
|
116
|
+
request_meta: {
|
|
117
|
+
...cleanJsonObject(request.request_meta),
|
|
118
|
+
resolution_stage: 'collector',
|
|
119
|
+
resolution_error: 'empty_input',
|
|
120
|
+
resolution_finished_at: nowIso(),
|
|
121
|
+
},
|
|
122
|
+
}).catch(() => {});
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const defaultDomain = isAmazonUrl(rawInput) ? extractDomain(rawInput) : 'amazon.de';
|
|
127
|
+
let normalized = null;
|
|
128
|
+
try {
|
|
129
|
+
normalized = await normalizeAmazonInput(rawInput, defaultDomain);
|
|
130
|
+
} catch {
|
|
131
|
+
normalized = null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (!normalized) {
|
|
135
|
+
rejected += 1;
|
|
136
|
+
await updateWebTrackRequestById(request.id, {
|
|
137
|
+
status: 'rejected',
|
|
138
|
+
status_reason: 'invalid_amazon_input',
|
|
139
|
+
last_error: 'Could not resolve Amazon URL to a product ASIN.',
|
|
140
|
+
completed_at: nowIso(),
|
|
141
|
+
request_meta: {
|
|
142
|
+
...cleanJsonObject(request.request_meta),
|
|
143
|
+
resolution_stage: 'collector',
|
|
144
|
+
resolution_error: 'asin_not_found',
|
|
145
|
+
resolution_finished_at: nowIso(),
|
|
146
|
+
},
|
|
147
|
+
}).catch(() => {});
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const existingProduct = await getProductByAsin(normalized.asin);
|
|
152
|
+
const now = nowIso();
|
|
153
|
+
|
|
154
|
+
const product = existingProduct
|
|
155
|
+
? await updateProductById(existingProduct.id, {
|
|
156
|
+
is_active: true,
|
|
157
|
+
next_scrape_at: now,
|
|
158
|
+
domain: normalized.domain,
|
|
159
|
+
url: normalized.url,
|
|
160
|
+
})
|
|
161
|
+
: await upsertProduct({
|
|
162
|
+
asin: normalized.asin,
|
|
163
|
+
title: `ASIN ${normalized.asin}`,
|
|
164
|
+
url: normalized.url,
|
|
165
|
+
domain: normalized.domain,
|
|
166
|
+
tier: 'daily',
|
|
167
|
+
tierMode: 'auto',
|
|
168
|
+
isActive: true,
|
|
169
|
+
nextScrapeAt: now,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
resolved += 1;
|
|
173
|
+
await updateWebTrackRequestById(request.id, {
|
|
174
|
+
asin: normalized.asin,
|
|
175
|
+
domain: normalized.domain,
|
|
176
|
+
normalized_url: normalized.url,
|
|
177
|
+
product_id: product.id,
|
|
178
|
+
status: 'queued',
|
|
179
|
+
status_reason: null,
|
|
180
|
+
last_error: null,
|
|
181
|
+
queued_at: request.queued_at || now,
|
|
182
|
+
completed_at: null,
|
|
183
|
+
request_meta: {
|
|
184
|
+
...cleanJsonObject(request.request_meta),
|
|
185
|
+
resolution_stage: 'collector',
|
|
186
|
+
resolution_source: 'normalize_amazon_input',
|
|
187
|
+
resolution_finished_at: now,
|
|
188
|
+
},
|
|
189
|
+
}).catch(() => {});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
processed: pending.length,
|
|
194
|
+
resolved,
|
|
195
|
+
rejected,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
81
199
|
async function processClaimedJob(job, {
|
|
82
200
|
collectorId = null,
|
|
83
201
|
executor = 'railway',
|
|
@@ -267,6 +385,9 @@ async function runOrchestratedSync({
|
|
|
267
385
|
} = {}) {
|
|
268
386
|
const safeLimit = Math.max(1, Number(limit) || 20);
|
|
269
387
|
|
|
388
|
+
await resolvePendingWebTrackRequests({
|
|
389
|
+
limit: Math.max(safeLimit * 2, safeLimit),
|
|
390
|
+
}).catch(() => {});
|
|
270
391
|
await requeueExpiredCollectionJobs(200).catch(() => {});
|
|
271
392
|
await enqueueDueCollectionJobs(Math.max(safeLimit * 2, safeLimit)).catch(() => {});
|
|
272
393
|
|
|
@@ -319,4 +440,5 @@ module.exports.__test = {
|
|
|
319
440
|
classifyFailure,
|
|
320
441
|
buildNoPriceErrorMessage,
|
|
321
442
|
nextJobStateAfterFailure,
|
|
443
|
+
cleanJsonObject,
|
|
322
444
|
};
|
package/src/url.js
CHANGED
|
@@ -5,7 +5,37 @@ const AMAZON_DOMAINS = [
|
|
|
5
5
|
];
|
|
6
6
|
const AMAZON_SHORT_DOMAINS = ['amzn.eu', 'amzn.to', 'a.co'];
|
|
7
7
|
|
|
8
|
-
const
|
|
8
|
+
const ASIN_PATH_REGEX = /(?:\/(?:dp|gp\/product|gp\/aw\/d|ASIN)\/)([A-Z0-9]{10})(?=[/?]|$)/i;
|
|
9
|
+
const DIRECT_ASIN_REGEX = /^[A-Z0-9]{10}$/i;
|
|
10
|
+
const QUERY_ASIN_KEYS = new Set(['asin', 'pd_rd_i']);
|
|
11
|
+
const QUERY_NESTED_URL_KEYS = new Set(['url', 'u', 'redirecturl', 'path']);
|
|
12
|
+
const HTML_ASIN_PATTERNS = [
|
|
13
|
+
/<link[^>]+rel=["']canonical["'][^>]+href=["'][^"']*(?:\/dp\/|\/gp\/product\/|\/gp\/aw\/d\/)([A-Z0-9]{10})(?=[/?"'&]|$)/i,
|
|
14
|
+
/<link[^>]+href=["'][^"']*(?:\/dp\/|\/gp\/product\/|\/gp\/aw\/d\/)([A-Z0-9]{10})(?=[/?"'&]|$)[^>]*rel=["']canonical["']/i,
|
|
15
|
+
/"(?:currentAsin|parentAsin|landingAsin)"\s*:\s*"([A-Z0-9]{10})"/i,
|
|
16
|
+
/"(?:canonicalUrl|productUrl|dpUrl|redirectUrl)"\s*:\s*"[^"]*(?:\/dp\/|\/gp\/product\/|\/gp\/aw\/d\/)([A-Z0-9]{10})(?=[/?"&]|$)/i,
|
|
17
|
+
];
|
|
18
|
+
const FETCH_TIMEOUT_MS = 9000;
|
|
19
|
+
const BROWSERISH_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36';
|
|
20
|
+
|
|
21
|
+
const DOMAIN_ACCEPT_LANGUAGE = {
|
|
22
|
+
'amazon.de': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
23
|
+
'amazon.fr': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
24
|
+
'amazon.it': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
25
|
+
'amazon.es': 'es-ES,es;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
26
|
+
'amazon.nl': 'nl-NL,nl;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
27
|
+
'amazon.co.jp': 'ja-JP,ja;q=0.9,en-US;q=0.7,en;q=0.6',
|
|
28
|
+
'amazon.com.br': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
29
|
+
'amazon.co.uk': 'en-GB,en;q=0.9',
|
|
30
|
+
'amazon.com': 'en-US,en;q=0.9',
|
|
31
|
+
'amazon.ca': 'en-CA,en;q=0.9,fr-CA;q=0.6',
|
|
32
|
+
'amazon.com.au': 'en-AU,en;q=0.9',
|
|
33
|
+
'amazon.in': 'en-IN,en;q=0.9',
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
function safeTrim(value) {
|
|
37
|
+
return String(value || '').trim();
|
|
38
|
+
}
|
|
9
39
|
|
|
10
40
|
function isAmazonUrl(url) {
|
|
11
41
|
try {
|
|
@@ -19,22 +49,97 @@ function isAmazonUrl(url) {
|
|
|
19
49
|
function isAmazonShortUrl(url) {
|
|
20
50
|
try {
|
|
21
51
|
const parsed = new URL(url);
|
|
22
|
-
const hostname = parsed.hostname.replace(/^www
|
|
52
|
+
const hostname = parsed.hostname.replace(/^www\./i, '').toLowerCase();
|
|
23
53
|
return AMAZON_SHORT_DOMAINS.includes(hostname);
|
|
24
54
|
} catch {
|
|
25
55
|
return false;
|
|
26
56
|
}
|
|
27
57
|
}
|
|
28
58
|
|
|
29
|
-
function
|
|
30
|
-
|
|
31
|
-
if (
|
|
32
|
-
return
|
|
59
|
+
function extractAsinFromPathLikeValue(value) {
|
|
60
|
+
const trimmed = safeTrim(value);
|
|
61
|
+
if (!trimmed) {
|
|
62
|
+
return null;
|
|
33
63
|
}
|
|
34
|
-
|
|
64
|
+
|
|
65
|
+
if (DIRECT_ASIN_REGEX.test(trimmed)) {
|
|
66
|
+
return trimmed.toUpperCase();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const match = trimmed.match(ASIN_PATH_REGEX);
|
|
35
70
|
return match ? match[1].toUpperCase() : null;
|
|
36
71
|
}
|
|
37
72
|
|
|
73
|
+
function extractAsinFromQueryParams(parsed) {
|
|
74
|
+
for (const [rawKey, rawValue] of parsed.searchParams.entries()) {
|
|
75
|
+
const key = safeTrim(rawKey).toLowerCase();
|
|
76
|
+
const value = safeTrim(rawValue);
|
|
77
|
+
if (!key || !value) {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (QUERY_ASIN_KEYS.has(key) && DIRECT_ASIN_REGEX.test(value)) {
|
|
82
|
+
return value.toUpperCase();
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (!QUERY_NESTED_URL_KEYS.has(key)) {
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const nestedDirect = extractAsinFromPathLikeValue(value);
|
|
90
|
+
if (nestedDirect) {
|
|
91
|
+
return nestedDirect;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
const decoded = decodeURIComponent(value);
|
|
96
|
+
const nestedDecoded = extractAsinFromPathLikeValue(decoded);
|
|
97
|
+
if (nestedDecoded) {
|
|
98
|
+
return nestedDecoded;
|
|
99
|
+
}
|
|
100
|
+
} catch {
|
|
101
|
+
// Ignore decode failures for non-encoded payloads.
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function extractAsinFromHtml(html) {
|
|
109
|
+
const body = safeTrim(html);
|
|
110
|
+
if (!body) {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for (const pattern of HTML_ASIN_PATTERNS) {
|
|
115
|
+
const match = body.match(pattern);
|
|
116
|
+
if (match?.[1]) {
|
|
117
|
+
return match[1].toUpperCase();
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function extractAsin(urlOrAsin) {
|
|
125
|
+
const value = safeTrim(urlOrAsin);
|
|
126
|
+
if (!value) {
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const direct = extractAsinFromPathLikeValue(value);
|
|
131
|
+
if (direct) {
|
|
132
|
+
return direct;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
const parsed = new URL(value);
|
|
137
|
+
return extractAsinFromQueryParams(parsed);
|
|
138
|
+
} catch {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
38
143
|
function extractDomain(url) {
|
|
39
144
|
try {
|
|
40
145
|
const parsed = new URL(url);
|
|
@@ -48,39 +153,175 @@ function canonicalUrl(asin, domain = 'amazon.de') {
|
|
|
48
153
|
return `https://www.${domain}/dp/${asin}`;
|
|
49
154
|
}
|
|
50
155
|
|
|
156
|
+
function isResolvableAmazonUrl(url) {
|
|
157
|
+
return isAmazonShortUrl(url) || isAmazonUrl(url);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function defaultDomainForUrl(url) {
|
|
161
|
+
return isAmazonUrl(url) ? extractDomain(url) : 'amazon.de';
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function buildResolveHeaders(url) {
|
|
165
|
+
const domain = defaultDomainForUrl(url);
|
|
166
|
+
return {
|
|
167
|
+
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
168
|
+
'accept-language': DOMAIN_ACCEPT_LANGUAGE[domain] || 'en-US,en;q=0.9',
|
|
169
|
+
'cache-control': 'no-cache',
|
|
170
|
+
pragma: 'no-cache',
|
|
171
|
+
'user-agent': BROWSERISH_USER_AGENT,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async function fetchWithTimeout(url, options = {}) {
|
|
176
|
+
const controller = new AbortController();
|
|
177
|
+
const timeoutId = setTimeout(() => {
|
|
178
|
+
controller.abort();
|
|
179
|
+
}, FETCH_TIMEOUT_MS);
|
|
180
|
+
|
|
181
|
+
try {
|
|
182
|
+
return await fetch(url, {
|
|
183
|
+
...options,
|
|
184
|
+
signal: controller.signal,
|
|
185
|
+
});
|
|
186
|
+
} catch {
|
|
187
|
+
return null;
|
|
188
|
+
} finally {
|
|
189
|
+
clearTimeout(timeoutId);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function extractAsinFromResponse(response) {
|
|
194
|
+
const contentType = response.headers?.get
|
|
195
|
+
? String(response.headers.get('content-type') || '').toLowerCase()
|
|
196
|
+
: '';
|
|
197
|
+
if (contentType && !contentType.includes('text/html')) {
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (typeof response.text !== 'function') {
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
try {
|
|
206
|
+
const html = await response.text();
|
|
207
|
+
return extractAsinFromHtml(String(html || '').slice(0, 512_000));
|
|
208
|
+
} catch {
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function resolveAsinFromAmazonUrl(url, fallbackDomain = 'amazon.de') {
|
|
214
|
+
const asin = extractAsin(url);
|
|
215
|
+
if (!asin || !isAmazonUrl(url)) {
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const domain = extractDomain(url) || fallbackDomain;
|
|
220
|
+
return canonicalUrl(asin, domain);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
async function resolveByFollowingRedirects(url) {
|
|
224
|
+
const originDomain = defaultDomainForUrl(url);
|
|
225
|
+
const response = await fetchWithTimeout(url, {
|
|
226
|
+
method: 'GET',
|
|
227
|
+
redirect: 'follow',
|
|
228
|
+
headers: buildResolveHeaders(url),
|
|
229
|
+
});
|
|
230
|
+
if (!response) {
|
|
231
|
+
return null;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const responseUrl = safeTrim(response.url);
|
|
235
|
+
const fromUrl = resolveAsinFromAmazonUrl(responseUrl, originDomain);
|
|
236
|
+
if (fromUrl) {
|
|
237
|
+
try {
|
|
238
|
+
await response.body?.cancel?.();
|
|
239
|
+
} catch {
|
|
240
|
+
// Ignore body cancellation failures.
|
|
241
|
+
}
|
|
242
|
+
return fromUrl;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const htmlAsin = await extractAsinFromResponse(response);
|
|
246
|
+
if (!htmlAsin) {
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const domain = isAmazonUrl(responseUrl)
|
|
251
|
+
? extractDomain(responseUrl)
|
|
252
|
+
: originDomain;
|
|
253
|
+
return canonicalUrl(htmlAsin, domain);
|
|
254
|
+
}
|
|
255
|
+
|
|
51
256
|
async function resolveAmazonShortUrl(url, maxRedirects = 8) {
|
|
52
|
-
if (!
|
|
257
|
+
if (!isResolvableAmazonUrl(url)) {
|
|
258
|
+
return url;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const followed = await resolveByFollowingRedirects(url);
|
|
262
|
+
if (followed) {
|
|
263
|
+
return followed;
|
|
264
|
+
}
|
|
53
265
|
|
|
54
266
|
let current = url;
|
|
267
|
+
const originDomain = defaultDomainForUrl(url);
|
|
268
|
+
const visited = new Set();
|
|
55
269
|
for (let i = 0; i < maxRedirects; i += 1) {
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
270
|
+
if (visited.has(current)) {
|
|
271
|
+
return current;
|
|
272
|
+
}
|
|
273
|
+
visited.add(current);
|
|
274
|
+
|
|
275
|
+
const response = await fetchWithTimeout(current, {
|
|
276
|
+
method: 'GET',
|
|
277
|
+
redirect: 'manual',
|
|
278
|
+
headers: buildResolveHeaders(current),
|
|
279
|
+
});
|
|
280
|
+
if (!response) {
|
|
63
281
|
return current;
|
|
64
282
|
}
|
|
65
283
|
|
|
284
|
+
const fromResponseUrl = resolveAsinFromAmazonUrl(
|
|
285
|
+
safeTrim(response.url),
|
|
286
|
+
originDomain,
|
|
287
|
+
);
|
|
288
|
+
if (fromResponseUrl) {
|
|
289
|
+
try {
|
|
290
|
+
await response.body?.cancel?.();
|
|
291
|
+
} catch {
|
|
292
|
+
// Ignore body cancellation failures.
|
|
293
|
+
}
|
|
294
|
+
return fromResponseUrl;
|
|
295
|
+
}
|
|
296
|
+
|
|
66
297
|
const location = response.headers?.get ? response.headers.get('location') : null;
|
|
298
|
+
if (!location || response.status < 300 || response.status > 399) {
|
|
299
|
+
const htmlAsin = await extractAsinFromResponse(response);
|
|
300
|
+
if (htmlAsin) {
|
|
301
|
+
const domain = isAmazonUrl(current) ? extractDomain(current) : originDomain;
|
|
302
|
+
return canonicalUrl(htmlAsin, domain);
|
|
303
|
+
}
|
|
304
|
+
return current;
|
|
305
|
+
}
|
|
306
|
+
|
|
67
307
|
try {
|
|
68
308
|
await response.body?.cancel?.();
|
|
69
309
|
} catch {
|
|
70
310
|
// Ignore body cancellation failures.
|
|
71
311
|
}
|
|
72
312
|
|
|
73
|
-
if (!location || response.status < 300 || response.status > 399) {
|
|
74
|
-
return current;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
313
|
try {
|
|
78
314
|
current = new URL(location, current).toString();
|
|
79
315
|
} catch {
|
|
80
316
|
return current;
|
|
81
317
|
}
|
|
82
318
|
|
|
83
|
-
|
|
319
|
+
const fromLocation = resolveAsinFromAmazonUrl(current, originDomain);
|
|
320
|
+
if (fromLocation) {
|
|
321
|
+
return fromLocation;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (!isResolvableAmazonUrl(current)) {
|
|
84
325
|
return current;
|
|
85
326
|
}
|
|
86
327
|
}
|
|
@@ -89,7 +330,7 @@ async function resolveAmazonShortUrl(url, maxRedirects = 8) {
|
|
|
89
330
|
}
|
|
90
331
|
|
|
91
332
|
async function normalizeAmazonInput(input, defaultDomain = 'amazon.de') {
|
|
92
|
-
const raw =
|
|
333
|
+
const raw = safeTrim(input);
|
|
93
334
|
if (!raw) return null;
|
|
94
335
|
|
|
95
336
|
const asin = extractAsin(raw);
|