amaprice 1.0.11 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/bin/cli.js +2 -0
- package/package.json +1 -1
- package/src/background/service.js +505 -0
- package/src/collector/client.js +3 -3
- package/src/collector/daemon-entry.js +22 -0
- package/src/collector/state.js +1 -0
- package/src/commands/background.js +97 -0
- package/src/commands/collector.js +5 -5
- package/src/commands/subscribe.js +8 -0
- package/src/commands/track.js +8 -0
- package/src/extractors/pipeline.js +93 -2
- package/src/extractors/vision.js +15 -6
- package/src/orchestrator/runner.js +1 -0
package/README.md
CHANGED
|
@@ -35,6 +35,9 @@ amaprice price "https://www.amazon.de/dp/B0DZ5P7JD6"
|
|
|
35
35
|
# start tracking with a tier
|
|
36
36
|
amaprice track B0DZ5P7JD6 --tier daily
|
|
37
37
|
|
|
38
|
+
# subscribe current user to shared catalog product
|
|
39
|
+
amaprice subscribe B0DZ5P7JD6
|
|
40
|
+
|
|
38
41
|
# show history
|
|
39
42
|
amaprice history B0DZ5P7JD6 --limit 30
|
|
40
43
|
|
|
@@ -58,13 +61,35 @@ Short links from Amazon apps (for example `amzn.eu`, `amzn.to`, `a.co`) are acce
|
|
|
58
61
|
| `amaprice [url\|asin]` | Shortcut for `amaprice price [url\|asin]` |
|
|
59
62
|
| `amaprice price [url\|asin]` | One-shot lookup and silent history insert |
|
|
60
63
|
| `amaprice track [url\|asin]` | Track product + current price (`--tier`, `--manual-tier`, `--auto-tier`, `--inactive`) |
|
|
64
|
+
| `amaprice subscribe [url\|asin]` | Subscribe current user to shared product catalog entry |
|
|
65
|
+
| `amaprice unsubscribe <url\|asin>` | Disable current user subscription |
|
|
66
|
+
| `amaprice subscriptions` | List user subscriptions with latest known prices |
|
|
61
67
|
| `amaprice history <url\|asin>` | Show history (`--limit N`) |
|
|
62
68
|
| `amaprice list` | List tracked products + latest price |
|
|
63
69
|
| `amaprice sync --limit <n>` | Run background sync for due products |
|
|
70
|
+
| `amaprice background <on\|off\|status>` | Manage true background collector service |
|
|
64
71
|
| `amaprice tier <url\|asin> <hourly\|daily\|weekly>` | Set tier/status (`--auto`, `--manual`, `--activate`, `--deactivate`) |
|
|
65
72
|
|
|
66
73
|
All commands support `--json`.
|
|
67
74
|
|
|
75
|
+
## Background Service (Auto)
|
|
76
|
+
|
|
77
|
+
`track` and `subscribe` automatically ensure a true background collector service is running.
|
|
78
|
+
|
|
79
|
+
This service:
|
|
80
|
+
- keeps running after terminal close
|
|
81
|
+
- survives shell sessions
|
|
82
|
+
- polls queue jobs every `180s` by default
|
|
83
|
+
- currently uses `launchd` on macOS
|
|
84
|
+
|
|
85
|
+
Simple control commands:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
amaprice background status
|
|
89
|
+
amaprice background on
|
|
90
|
+
amaprice background off
|
|
91
|
+
```
|
|
92
|
+
|
|
68
93
|
## Currently Supported Store
|
|
69
94
|
|
|
70
95
|
Amazon domains:
|
|
@@ -177,12 +202,18 @@ Environment variables used by the npm package:
|
|
|
177
202
|
| `SYNC_INTERVAL_MINUTES` | `5` | `src/worker.js` | Worker loop interval |
|
|
178
203
|
| `SYNC_LIMIT` | `20` | `src/worker.js`, `amaprice sync --limit` | Max due products per run |
|
|
179
204
|
| `SYNC_RUN_ONCE` | `0` | `src/worker.js` | Set `1` for single run and exit |
|
|
205
|
+
| `AMAPRICE_AUTO_BACKGROUND` | `1` | `track`, `subscribe` | Set `0` to disable auto background startup |
|
|
206
|
+
| `COLLECTOR_POLL_SECONDS` | `180` | background collector service | Queue poll interval |
|
|
207
|
+
| `COLLECTOR_LIMIT` | `10` | background collector service | Max claimed jobs per poll |
|
|
180
208
|
| `VISION_FALLBACK_ENABLED` | `0` | `src/extractors/pipeline.js` | Enable screenshot + vision fallback when HTML/JSON extraction fails |
|
|
181
209
|
| `OPENROUTER_API_KEY` | none | `src/extractors/vision.js` | Preferred vision provider key |
|
|
182
210
|
| `VISION_MODEL` | `google/gemini-3-flash-preview` | `src/extractors/vision.js` | OpenRouter model ID for vision extraction |
|
|
183
211
|
| `VISION_PROVIDER` | auto | `src/extractors/vision.js` | Optional force value: `openrouter` or `openai` |
|
|
184
212
|
| `OPENROUTER_HTTP_REFERER` | none | `src/extractors/vision.js` | Optional OpenRouter attribution header |
|
|
185
213
|
| `OPENROUTER_TITLE` | none | `src/extractors/vision.js` | Optional OpenRouter attribution header |
|
|
214
|
+
| `VISION_GUARDRAIL_ENABLED` | `1` | `src/extractors/pipeline.js` | Reject suspicious vision outputs before DB writes |
|
|
215
|
+
| `VISION_GUARDRAIL_MIN_CONFIDENCE` | `0.92` | `src/extractors/pipeline.js` | Minimum confidence required for vision price acceptance |
|
|
216
|
+
| `VISION_GUARDRAIL_MAX_REL_DELTA` | `0.5` | `src/extractors/pipeline.js` | Max relative delta vs last known price before rejecting vision price |
|
|
186
217
|
| `OPENAI_API_KEY` | none | `src/extractors/vision.js` | Legacy fallback if `OPENROUTER_API_KEY` is unset |
|
|
187
218
|
|
|
188
219
|
For production background workers, prefer the Supabase **service role key**.
|
|
@@ -202,6 +233,9 @@ Steps:
|
|
|
202
233
|
- `VISION_FALLBACK_ENABLED=1`
|
|
203
234
|
- `OPENROUTER_API_KEY=<your-openrouter-key>`
|
|
204
235
|
- `VISION_MODEL=google/gemini-3-flash-preview`
|
|
236
|
+
- `VISION_GUARDRAIL_ENABLED=1`
|
|
237
|
+
- `VISION_GUARDRAIL_MIN_CONFIDENCE=0.92`
|
|
238
|
+
- `VISION_GUARDRAIL_MAX_REL_DELTA=0.5`
|
|
205
239
|
4. Ensure builder is Dockerfile (root `Dockerfile`).
|
|
206
240
|
5. Deploy.
|
|
207
241
|
6. Confirm logs show `[worker] processed=...`.
|
package/bin/cli.js
CHANGED
|
@@ -12,6 +12,7 @@ const KNOWN_COMMANDS = new Set([
|
|
|
12
12
|
'subscribe',
|
|
13
13
|
'unsubscribe',
|
|
14
14
|
'subscriptions',
|
|
15
|
+
'background',
|
|
15
16
|
'collector',
|
|
16
17
|
'help',
|
|
17
18
|
]);
|
|
@@ -40,6 +41,7 @@ require('../src/commands/tier')(program);
|
|
|
40
41
|
require('../src/commands/subscribe')(program);
|
|
41
42
|
require('../src/commands/unsubscribe')(program);
|
|
42
43
|
require('../src/commands/subscriptions')(program);
|
|
44
|
+
require('../src/commands/background')(program);
|
|
43
45
|
require('../src/commands/collector')(program);
|
|
44
46
|
|
|
45
47
|
program.parse();
|
package/package.json
CHANGED
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
const fs = require('fs/promises');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const os = require('os');
|
|
4
|
+
const { execFile } = require('child_process');
|
|
5
|
+
const { promisify } = require('util');
|
|
6
|
+
|
|
7
|
+
const { getUserId } = require('../user-context');
|
|
8
|
+
const { readCollectorState, writeCollectorState, getStateDir } = require('../collector/state');
|
|
9
|
+
const { upsertCollector, heartbeatCollector, getCollectorById } = require('../db');
|
|
10
|
+
|
|
11
|
+
const execFileAsync = promisify(execFile);
|
|
12
|
+
|
|
13
|
+
const DEFAULT_COLLECTOR_LIMIT = 10;
|
|
14
|
+
const DEFAULT_POLL_SECONDS = 180;
|
|
15
|
+
const MIN_POLL_SECONDS = 30;
|
|
16
|
+
const MAX_POLL_SECONDS = 3600;
|
|
17
|
+
|
|
18
|
+
function sanitizeLabelPart(value) {
|
|
19
|
+
return String(value || '')
|
|
20
|
+
.trim()
|
|
21
|
+
.toLowerCase()
|
|
22
|
+
.replace(/[^a-z0-9._-]+/g, '-')
|
|
23
|
+
.replace(/-+/g, '-')
|
|
24
|
+
.replace(/^-+|-+$/g, '') || 'user';
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function resolveCollectorLimit(value = null) {
|
|
28
|
+
const parsed = Number(value ?? process.env.COLLECTOR_LIMIT);
|
|
29
|
+
if (!Number.isFinite(parsed)) return DEFAULT_COLLECTOR_LIMIT;
|
|
30
|
+
return Math.min(100, Math.max(1, Math.round(parsed)));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function resolvePollSeconds(value = null) {
|
|
34
|
+
const parsed = Number(value ?? process.env.COLLECTOR_POLL_SECONDS);
|
|
35
|
+
if (!Number.isFinite(parsed)) return DEFAULT_POLL_SECONDS;
|
|
36
|
+
return Math.min(MAX_POLL_SECONDS, Math.max(MIN_POLL_SECONDS, Math.round(parsed)));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function getDefaultCollectorName() {
|
|
40
|
+
return `${os.hostname()}-collector`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function getDaemonEntryPath() {
|
|
44
|
+
return path.join(__dirname, '../collector/daemon-entry.js');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function getLaunchdLabel(userId) {
|
|
48
|
+
return `sh.amaprice.collector.${sanitizeLabelPart(userId)}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function getLaunchdPlistPath(label) {
|
|
52
|
+
return path.join(os.homedir(), 'Library', 'LaunchAgents', `${label}.plist`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function getDaemonLogPath() {
|
|
56
|
+
return path.join(getStateDir(), 'collector-daemon.log');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function xmlEscape(value) {
|
|
60
|
+
return String(value)
|
|
61
|
+
.replace(/&/g, '&')
|
|
62
|
+
.replace(/</g, '<')
|
|
63
|
+
.replace(/>/g, '>')
|
|
64
|
+
.replace(/"/g, '"')
|
|
65
|
+
.replace(/'/g, ''');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function renderLaunchdPlist({
|
|
69
|
+
label,
|
|
70
|
+
programArguments,
|
|
71
|
+
stdoutPath,
|
|
72
|
+
stderrPath,
|
|
73
|
+
environment = {},
|
|
74
|
+
}) {
|
|
75
|
+
const argsXml = (programArguments || [])
|
|
76
|
+
.map((arg) => ` <string>${xmlEscape(arg)}</string>`)
|
|
77
|
+
.join('\n');
|
|
78
|
+
|
|
79
|
+
const envRows = Object.entries(environment || {})
|
|
80
|
+
.filter(([, value]) => value != null && String(value).trim() !== '')
|
|
81
|
+
.map(([key, value]) => (
|
|
82
|
+
` <key>${xmlEscape(key)}</key>\n <string>${xmlEscape(value)}</string>`
|
|
83
|
+
))
|
|
84
|
+
.join('\n');
|
|
85
|
+
|
|
86
|
+
const envXml = envRows
|
|
87
|
+
? `\n <key>EnvironmentVariables</key>\n <dict>\n${envRows}\n </dict>`
|
|
88
|
+
: '';
|
|
89
|
+
|
|
90
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
91
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
92
|
+
<plist version="1.0">
|
|
93
|
+
<dict>
|
|
94
|
+
<key>Label</key>
|
|
95
|
+
<string>${xmlEscape(label)}</string>
|
|
96
|
+
<key>ProgramArguments</key>
|
|
97
|
+
<array>
|
|
98
|
+
${argsXml}
|
|
99
|
+
</array>
|
|
100
|
+
<key>RunAtLoad</key>
|
|
101
|
+
<true/>
|
|
102
|
+
<key>KeepAlive</key>
|
|
103
|
+
<true/>
|
|
104
|
+
<key>StandardOutPath</key>
|
|
105
|
+
<string>${xmlEscape(stdoutPath)}</string>
|
|
106
|
+
<key>StandardErrorPath</key>
|
|
107
|
+
<string>${xmlEscape(stderrPath)}</string>${envXml}
|
|
108
|
+
</dict>
|
|
109
|
+
</plist>
|
|
110
|
+
`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function isLaunchdSupported(platform = process.platform) {
|
|
114
|
+
return platform === 'darwin';
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function getLaunchdDomain() {
|
|
118
|
+
if (typeof process.getuid !== 'function') {
|
|
119
|
+
throw new Error('launchd requires a POSIX uid');
|
|
120
|
+
}
|
|
121
|
+
return `gui/${process.getuid()}`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function buildServiceTarget(label) {
|
|
125
|
+
return `${getLaunchdDomain()}/${label}`;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
async function runLaunchctl(args, { allowFailure = false } = {}) {
|
|
129
|
+
try {
|
|
130
|
+
const out = await execFileAsync('launchctl', args);
|
|
131
|
+
return {
|
|
132
|
+
ok: true,
|
|
133
|
+
stdout: String(out.stdout || ''),
|
|
134
|
+
stderr: String(out.stderr || ''),
|
|
135
|
+
};
|
|
136
|
+
} catch (err) {
|
|
137
|
+
if (allowFailure) {
|
|
138
|
+
return {
|
|
139
|
+
ok: false,
|
|
140
|
+
stdout: String(err.stdout || ''),
|
|
141
|
+
stderr: String(err.stderr || ''),
|
|
142
|
+
error: err,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
const stderr = String(err.stderr || err.message || '').trim();
|
|
146
|
+
throw new Error(`launchctl ${args.join(' ')} failed: ${stderr || 'unknown error'}`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function isAlreadyLoadedError(result) {
|
|
151
|
+
const text = `${result?.stderr || ''}\n${result?.stdout || ''}`.toLowerCase();
|
|
152
|
+
return text.includes('already loaded') || text.includes('service already loaded');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function pickDaemonEnvironment(userId) {
|
|
156
|
+
const passthrough = [
|
|
157
|
+
'SUPABASE_URL',
|
|
158
|
+
'SUPABASE_KEY',
|
|
159
|
+
'SUPABASE_ANON_KEY',
|
|
160
|
+
'ORCHESTRATOR_ENABLED',
|
|
161
|
+
'VISION_FALLBACK_ENABLED',
|
|
162
|
+
'OPENROUTER_API_KEY',
|
|
163
|
+
'VISION_MODEL',
|
|
164
|
+
'VISION_PROVIDER',
|
|
165
|
+
'OPENROUTER_HTTP_REFERER',
|
|
166
|
+
'OPENROUTER_TITLE',
|
|
167
|
+
'VISION_GUARDRAIL_ENABLED',
|
|
168
|
+
'VISION_GUARDRAIL_MIN_CONFIDENCE',
|
|
169
|
+
'VISION_GUARDRAIL_MAX_REL_DELTA',
|
|
170
|
+
'PATH',
|
|
171
|
+
];
|
|
172
|
+
|
|
173
|
+
const env = {
|
|
174
|
+
AMAPRICE_USER_ID: userId,
|
|
175
|
+
};
|
|
176
|
+
for (const key of passthrough) {
|
|
177
|
+
if (process.env[key]) env[key] = process.env[key];
|
|
178
|
+
}
|
|
179
|
+
return env;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async function ensureCollectorEnabled({
|
|
183
|
+
userId = getUserId(),
|
|
184
|
+
collectorName = null,
|
|
185
|
+
status = 'active',
|
|
186
|
+
capabilities = null,
|
|
187
|
+
} = {}) {
|
|
188
|
+
const existing = await readCollectorState();
|
|
189
|
+
const collector = await upsertCollector({
|
|
190
|
+
collectorId: existing?.collectorId || null,
|
|
191
|
+
userId,
|
|
192
|
+
name: collectorName || existing?.name || getDefaultCollectorName(),
|
|
193
|
+
kind: 'cli',
|
|
194
|
+
status,
|
|
195
|
+
capabilities: capabilities || existing?.capabilities || {
|
|
196
|
+
html_json: true,
|
|
197
|
+
vision: true,
|
|
198
|
+
railway_dom: true,
|
|
199
|
+
},
|
|
200
|
+
metadata: {
|
|
201
|
+
platform: process.platform,
|
|
202
|
+
node: process.version,
|
|
203
|
+
},
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
const state = {
|
|
207
|
+
collectorId: collector.id,
|
|
208
|
+
userId,
|
|
209
|
+
name: collector.name,
|
|
210
|
+
status,
|
|
211
|
+
capabilities: collector.capabilities,
|
|
212
|
+
enabledAt: existing?.enabledAt || new Date().toISOString(),
|
|
213
|
+
updatedAt: new Date().toISOString(),
|
|
214
|
+
background: {
|
|
215
|
+
...(existing?.background || {}),
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
const statePath = await writeCollectorState(state);
|
|
219
|
+
return { collector, state, statePath };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async function getLaunchdServiceStatus({ label }) {
|
|
223
|
+
const plistPath = getLaunchdPlistPath(label);
|
|
224
|
+
let installed = true;
|
|
225
|
+
try {
|
|
226
|
+
await fs.access(plistPath);
|
|
227
|
+
} catch {
|
|
228
|
+
installed = false;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (!installed) {
|
|
232
|
+
return {
|
|
233
|
+
backend: 'launchd',
|
|
234
|
+
label,
|
|
235
|
+
plistPath,
|
|
236
|
+
installed: false,
|
|
237
|
+
loaded: false,
|
|
238
|
+
running: false,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const print = await runLaunchctl(['print', buildServiceTarget(label)], { allowFailure: true });
|
|
243
|
+
const output = `${print.stdout}\n${print.stderr}`;
|
|
244
|
+
const loaded = print.ok;
|
|
245
|
+
const running = loaded && (/state = running/i.test(output) || /pid = \d+/i.test(output));
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
backend: 'launchd',
|
|
249
|
+
label,
|
|
250
|
+
plistPath,
|
|
251
|
+
installed: true,
|
|
252
|
+
loaded,
|
|
253
|
+
running,
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
async function enableLaunchdService({
|
|
258
|
+
label,
|
|
259
|
+
pollSeconds,
|
|
260
|
+
limit,
|
|
261
|
+
userId,
|
|
262
|
+
}) {
|
|
263
|
+
const plistPath = getLaunchdPlistPath(label);
|
|
264
|
+
const logPath = getDaemonLogPath();
|
|
265
|
+
const daemonEntry = getDaemonEntryPath();
|
|
266
|
+
|
|
267
|
+
await fs.mkdir(path.dirname(plistPath), { recursive: true });
|
|
268
|
+
await fs.mkdir(path.dirname(logPath), { recursive: true });
|
|
269
|
+
|
|
270
|
+
const plist = renderLaunchdPlist({
|
|
271
|
+
label,
|
|
272
|
+
programArguments: [
|
|
273
|
+
process.execPath,
|
|
274
|
+
daemonEntry,
|
|
275
|
+
'--limit',
|
|
276
|
+
String(limit),
|
|
277
|
+
'--poll-seconds',
|
|
278
|
+
String(pollSeconds),
|
|
279
|
+
],
|
|
280
|
+
stdoutPath: logPath,
|
|
281
|
+
stderrPath: logPath,
|
|
282
|
+
environment: pickDaemonEnvironment(userId),
|
|
283
|
+
});
|
|
284
|
+
await fs.writeFile(plistPath, plist, 'utf8');
|
|
285
|
+
|
|
286
|
+
const bootstrap = await runLaunchctl(['bootstrap', getLaunchdDomain(), plistPath], { allowFailure: true });
|
|
287
|
+
if (!bootstrap.ok && !isAlreadyLoadedError(bootstrap)) {
|
|
288
|
+
throw new Error(`Could not bootstrap launchd service: ${bootstrap.stderr || bootstrap.stdout || 'unknown error'}`);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
await runLaunchctl(['enable', buildServiceTarget(label)], { allowFailure: true });
|
|
292
|
+
const kick = await runLaunchctl(['kickstart', '-k', buildServiceTarget(label)], { allowFailure: true });
|
|
293
|
+
if (!kick.ok) {
|
|
294
|
+
await runLaunchctl(['start', label], { allowFailure: true });
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return getLaunchdServiceStatus({ label });
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
async function disableLaunchdService({ label }) {
|
|
301
|
+
const plistPath = getLaunchdPlistPath(label);
|
|
302
|
+
await runLaunchctl(['bootout', buildServiceTarget(label)], { allowFailure: true });
|
|
303
|
+
await runLaunchctl(['disable', buildServiceTarget(label)], { allowFailure: true });
|
|
304
|
+
try {
|
|
305
|
+
await fs.unlink(plistPath);
|
|
306
|
+
} catch (err) {
|
|
307
|
+
if (!err || err.code !== 'ENOENT') {
|
|
308
|
+
throw err;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return getLaunchdServiceStatus({ label });
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
async function ensureBackgroundOn({
|
|
316
|
+
userId = getUserId(),
|
|
317
|
+
collectorName = null,
|
|
318
|
+
pollSeconds = null,
|
|
319
|
+
limit = null,
|
|
320
|
+
} = {}) {
|
|
321
|
+
if (!isLaunchdSupported()) {
|
|
322
|
+
return {
|
|
323
|
+
supported: false,
|
|
324
|
+
running: false,
|
|
325
|
+
reason: `unsupported_platform:${process.platform}`,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const safePollSeconds = resolvePollSeconds(pollSeconds);
|
|
330
|
+
const safeLimit = resolveCollectorLimit(limit);
|
|
331
|
+
const label = getLaunchdLabel(userId);
|
|
332
|
+
const { collector, statePath } = await ensureCollectorEnabled({
|
|
333
|
+
userId,
|
|
334
|
+
collectorName,
|
|
335
|
+
status: 'active',
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
const service = await enableLaunchdService({
|
|
339
|
+
label,
|
|
340
|
+
pollSeconds: safePollSeconds,
|
|
341
|
+
limit: safeLimit,
|
|
342
|
+
userId,
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
await heartbeatCollector({
|
|
346
|
+
collectorId: collector.id,
|
|
347
|
+
status: 'active',
|
|
348
|
+
}).catch(() => {});
|
|
349
|
+
|
|
350
|
+
const local = await readCollectorState();
|
|
351
|
+
await writeCollectorState({
|
|
352
|
+
...(local || {}),
|
|
353
|
+
collectorId: collector.id,
|
|
354
|
+
userId,
|
|
355
|
+
name: collector.name,
|
|
356
|
+
status: 'active',
|
|
357
|
+
background: {
|
|
358
|
+
enabled: true,
|
|
359
|
+
backend: 'launchd',
|
|
360
|
+
label,
|
|
361
|
+
pollSeconds: safePollSeconds,
|
|
362
|
+
limit: safeLimit,
|
|
363
|
+
updatedAt: new Date().toISOString(),
|
|
364
|
+
},
|
|
365
|
+
updatedAt: new Date().toISOString(),
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
return {
|
|
369
|
+
supported: true,
|
|
370
|
+
running: service.running,
|
|
371
|
+
service,
|
|
372
|
+
statePath,
|
|
373
|
+
collectorId: collector.id,
|
|
374
|
+
pollSeconds: safePollSeconds,
|
|
375
|
+
limit: safeLimit,
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async function ensureBackgroundOff({
|
|
380
|
+
userId = getUserId(),
|
|
381
|
+
} = {}) {
|
|
382
|
+
const state = await readCollectorState();
|
|
383
|
+
const label = state?.background?.label || getLaunchdLabel(userId);
|
|
384
|
+
|
|
385
|
+
let service = {
|
|
386
|
+
backend: 'launchd',
|
|
387
|
+
label,
|
|
388
|
+
installed: false,
|
|
389
|
+
loaded: false,
|
|
390
|
+
running: false,
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
if (isLaunchdSupported()) {
|
|
394
|
+
service = await disableLaunchdService({ label });
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
if (state?.collectorId) {
|
|
398
|
+
await heartbeatCollector({
|
|
399
|
+
collectorId: state.collectorId,
|
|
400
|
+
status: 'paused',
|
|
401
|
+
}).catch(() => {});
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
if (state) {
|
|
405
|
+
await writeCollectorState({
|
|
406
|
+
...state,
|
|
407
|
+
status: 'paused',
|
|
408
|
+
background: {
|
|
409
|
+
...(state.background || {}),
|
|
410
|
+
enabled: false,
|
|
411
|
+
backend: 'launchd',
|
|
412
|
+
label,
|
|
413
|
+
updatedAt: new Date().toISOString(),
|
|
414
|
+
},
|
|
415
|
+
updatedAt: new Date().toISOString(),
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return {
|
|
420
|
+
supported: isLaunchdSupported(),
|
|
421
|
+
running: false,
|
|
422
|
+
service,
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
async function getBackgroundStatus({
|
|
427
|
+
userId = getUserId(),
|
|
428
|
+
} = {}) {
|
|
429
|
+
const state = await readCollectorState();
|
|
430
|
+
const remote = state?.collectorId
|
|
431
|
+
? await getCollectorById(state.collectorId).catch(() => null)
|
|
432
|
+
: null;
|
|
433
|
+
const label = state?.background?.label || getLaunchdLabel(userId);
|
|
434
|
+
|
|
435
|
+
const service = isLaunchdSupported()
|
|
436
|
+
? await getLaunchdServiceStatus({ label })
|
|
437
|
+
: {
|
|
438
|
+
backend: null,
|
|
439
|
+
label,
|
|
440
|
+
installed: false,
|
|
441
|
+
loaded: false,
|
|
442
|
+
running: false,
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
return {
|
|
446
|
+
supported: isLaunchdSupported(),
|
|
447
|
+
userId,
|
|
448
|
+
local: state,
|
|
449
|
+
remote,
|
|
450
|
+
service,
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function isAutoBackgroundEnabled() {
|
|
455
|
+
return process.env.AMAPRICE_AUTO_BACKGROUND !== '0';
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
async function maybeEnsureBackgroundOn({
|
|
459
|
+
userId = getUserId(),
|
|
460
|
+
} = {}) {
|
|
461
|
+
if (!isAutoBackgroundEnabled()) {
|
|
462
|
+
return {
|
|
463
|
+
attempted: false,
|
|
464
|
+
running: false,
|
|
465
|
+
reason: 'disabled_by_env',
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
try {
|
|
470
|
+
const report = await ensureBackgroundOn({ userId });
|
|
471
|
+
return {
|
|
472
|
+
attempted: true,
|
|
473
|
+
...report,
|
|
474
|
+
};
|
|
475
|
+
} catch (err) {
|
|
476
|
+
return {
|
|
477
|
+
attempted: true,
|
|
478
|
+
running: false,
|
|
479
|
+
error: err.message,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
module.exports = {
|
|
485
|
+
DEFAULT_COLLECTOR_LIMIT,
|
|
486
|
+
DEFAULT_POLL_SECONDS,
|
|
487
|
+
MIN_POLL_SECONDS,
|
|
488
|
+
MAX_POLL_SECONDS,
|
|
489
|
+
ensureBackgroundOn,
|
|
490
|
+
ensureBackgroundOff,
|
|
491
|
+
getBackgroundStatus,
|
|
492
|
+
maybeEnsureBackgroundOn,
|
|
493
|
+
resolveCollectorLimit,
|
|
494
|
+
resolvePollSeconds,
|
|
495
|
+
};
|
|
496
|
+
|
|
497
|
+
module.exports.__test = {
|
|
498
|
+
sanitizeLabelPart,
|
|
499
|
+
resolveCollectorLimit,
|
|
500
|
+
resolvePollSeconds,
|
|
501
|
+
getLaunchdLabel,
|
|
502
|
+
getLaunchdPlistPath,
|
|
503
|
+
renderLaunchdPlist,
|
|
504
|
+
isLaunchdSupported,
|
|
505
|
+
};
|
package/src/collector/client.js
CHANGED
|
@@ -14,7 +14,7 @@ async function ensureCollectorState() {
|
|
|
14
14
|
return state;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
async function runCollectorOnce({ limit =
|
|
17
|
+
async function runCollectorOnce({ limit = 10 } = {}) {
|
|
18
18
|
const state = await ensureCollectorState();
|
|
19
19
|
await heartbeatCollector({
|
|
20
20
|
collectorId: state.collectorId,
|
|
@@ -42,8 +42,8 @@ async function runCollectorOnce({ limit = 5 } = {}) {
|
|
|
42
42
|
});
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
async function runCollectorLoop({ limit =
|
|
46
|
-
const safePollMs = Math.max(
|
|
45
|
+
async function runCollectorLoop({ limit = 10, pollSeconds = 180 } = {}) {
|
|
46
|
+
const safePollMs = Math.max(30, Number(pollSeconds) || 180) * 1000;
|
|
47
47
|
|
|
48
48
|
while (true) {
|
|
49
49
|
const started = Date.now();
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { runCollectorLoop } = require('./client');
|
|
4
|
+
|
|
5
|
+
function parseArg(name, fallback) {
|
|
6
|
+
const idx = process.argv.indexOf(name);
|
|
7
|
+
if (idx === -1) return fallback;
|
|
8
|
+
const value = Number(process.argv[idx + 1]);
|
|
9
|
+
if (!Number.isFinite(value)) return fallback;
|
|
10
|
+
return value;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async function main() {
|
|
14
|
+
const limit = Math.max(1, Math.round(parseArg('--limit', Number(process.env.COLLECTOR_LIMIT) || 10)));
|
|
15
|
+
const pollSeconds = Math.max(30, Math.round(parseArg('--poll-seconds', Number(process.env.COLLECTOR_POLL_SECONDS) || 180)));
|
|
16
|
+
await runCollectorLoop({ limit, pollSeconds });
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
main().catch((err) => {
|
|
20
|
+
console.error(`[collector-daemon] fatal=${err.message}`);
|
|
21
|
+
process.exit(1);
|
|
22
|
+
});
|
package/src/collector/state.js
CHANGED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
const { getUserId } = require('../user-context');
|
|
2
|
+
const {
|
|
3
|
+
ensureBackgroundOn,
|
|
4
|
+
ensureBackgroundOff,
|
|
5
|
+
getBackgroundStatus,
|
|
6
|
+
resolveCollectorLimit,
|
|
7
|
+
resolvePollSeconds,
|
|
8
|
+
} = require('../background/service');
|
|
9
|
+
|
|
10
|
+
module.exports = function (program) {
|
|
11
|
+
program
|
|
12
|
+
.command('background <action>')
|
|
13
|
+
.description('Manage automatic background collector service (on|off|status)')
|
|
14
|
+
.option('--poll-seconds <n>', 'Polling interval in seconds (default: 180)')
|
|
15
|
+
.option('--limit <n>', 'Max jobs per poll (default: 10)')
|
|
16
|
+
.option('--json', 'Output as JSON')
|
|
17
|
+
.action(async (action, opts) => {
|
|
18
|
+
const normalizedAction = String(action || '').trim().toLowerCase();
|
|
19
|
+
const userId = getUserId();
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
if (normalizedAction === 'on') {
|
|
23
|
+
const report = await ensureBackgroundOn({
|
|
24
|
+
userId,
|
|
25
|
+
pollSeconds: opts.pollSeconds,
|
|
26
|
+
limit: opts.limit,
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
if (opts.json) {
|
|
30
|
+
console.log(JSON.stringify(report));
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (!report.supported) {
|
|
35
|
+
console.log(`Background service unsupported on platform: ${process.platform}`);
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const pollSeconds = resolvePollSeconds(opts.pollSeconds);
|
|
40
|
+
const limit = resolveCollectorLimit(opts.limit);
|
|
41
|
+
console.log(`Background collector ON (poll=${pollSeconds}s limit=${limit})`);
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (normalizedAction === 'off') {
|
|
46
|
+
const report = await ensureBackgroundOff({ userId });
|
|
47
|
+
|
|
48
|
+
if (opts.json) {
|
|
49
|
+
console.log(JSON.stringify(report));
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (!report.supported) {
|
|
54
|
+
console.log(`Background service unsupported on platform: ${process.platform}`);
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
console.log('Background collector OFF');
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (normalizedAction === 'status') {
|
|
63
|
+
const report = await getBackgroundStatus({ userId });
|
|
64
|
+
if (opts.json) {
|
|
65
|
+
console.log(JSON.stringify(report));
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (!report.supported) {
|
|
70
|
+
console.log(`Background service unsupported on platform: ${process.platform}`);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
console.log(`Background collector: ${report.service.running ? 'running' : 'stopped'}`);
|
|
75
|
+
if (report.local?.background?.pollSeconds) {
|
|
76
|
+
console.log(`Poll interval: ${report.local.background.pollSeconds}s`);
|
|
77
|
+
}
|
|
78
|
+
if (report.local?.background?.limit) {
|
|
79
|
+
console.log(`Poll limit: ${report.local.background.limit}`);
|
|
80
|
+
}
|
|
81
|
+
if (report.local?.collectorId) {
|
|
82
|
+
console.log(`Collector ID: ${report.local.collectorId}`);
|
|
83
|
+
}
|
|
84
|
+
if (report.remote?.last_seen_at) {
|
|
85
|
+
console.log(`Last heartbeat: ${report.remote.last_seen_at}`);
|
|
86
|
+
}
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
console.error('Unknown background action. Use: on, off, status.');
|
|
91
|
+
process.exit(1);
|
|
92
|
+
} catch (err) {
|
|
93
|
+
console.error(`Error: ${err.message}`);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
};
|
|
@@ -23,16 +23,16 @@ function getDefaultCollectorName() {
|
|
|
23
23
|
module.exports = function (program) {
|
|
24
24
|
program
|
|
25
25
|
.command('collector <action>')
|
|
26
|
-
.description('Manage local collector
|
|
26
|
+
.description('Manage local collector process (advanced/debug)')
|
|
27
27
|
.option('--name <name>', 'Collector name override')
|
|
28
|
-
.option('--limit <n>', 'Max jobs per loop/once run', '
|
|
29
|
-
.option('--poll-seconds <n>', 'Polling interval for start loop', '
|
|
28
|
+
.option('--limit <n>', 'Max jobs per loop/once run', '10')
|
|
29
|
+
.option('--poll-seconds <n>', 'Polling interval for start loop', '180')
|
|
30
30
|
.option('--json', 'Output as JSON')
|
|
31
31
|
.action(async (action, opts) => {
|
|
32
32
|
const normalizedAction = String(action || '').trim().toLowerCase();
|
|
33
33
|
const userId = getUserId();
|
|
34
|
-
const limit = Math.max(1, Number(opts.limit) ||
|
|
35
|
-
const pollSeconds = Math.max(
|
|
34
|
+
const limit = Math.max(1, Number(opts.limit) || 10);
|
|
35
|
+
const pollSeconds = Math.max(30, Number(opts.pollSeconds) || 180);
|
|
36
36
|
|
|
37
37
|
try {
|
|
38
38
|
if (normalizedAction === 'enable') {
|
|
@@ -10,6 +10,7 @@ const {
|
|
|
10
10
|
upsertUserSubscription,
|
|
11
11
|
} = require('../db');
|
|
12
12
|
const { getUserId } = require('../user-context');
|
|
13
|
+
const { maybeEnsureBackgroundOn } = require('../background/service');
|
|
13
14
|
const { normalizeTier, computeNextScrapeAt } = require('../tiering');
|
|
14
15
|
|
|
15
16
|
module.exports = function (program) {
|
|
@@ -95,6 +96,7 @@ module.exports = function (program) {
|
|
|
95
96
|
tierPref: selectedTier,
|
|
96
97
|
isActive: true,
|
|
97
98
|
});
|
|
99
|
+
const background = await maybeEnsureBackgroundOn({ userId });
|
|
98
100
|
|
|
99
101
|
if (opts.json) {
|
|
100
102
|
console.log(JSON.stringify({
|
|
@@ -111,6 +113,7 @@ module.exports = function (program) {
|
|
|
111
113
|
},
|
|
112
114
|
initialPrice: initial?.price?.numeric || null,
|
|
113
115
|
initialCurrency: initial?.price?.currency || null,
|
|
116
|
+
background,
|
|
114
117
|
}));
|
|
115
118
|
return;
|
|
116
119
|
}
|
|
@@ -118,6 +121,11 @@ module.exports = function (program) {
|
|
|
118
121
|
console.log(`Subscribed: ${product.asin} (${product.title})`);
|
|
119
122
|
console.log(`User: ${userId}`);
|
|
120
123
|
console.log(`Tier pref: ${subscription.tier_pref || 'default'}`);
|
|
124
|
+
if (background.running) {
|
|
125
|
+
console.log(`Background: running (${background.pollSeconds || 180}s poll)`);
|
|
126
|
+
} else if (background.attempted && background.error) {
|
|
127
|
+
console.log(`Background: setup failed (${background.error})`);
|
|
128
|
+
}
|
|
121
129
|
} catch (err) {
|
|
122
130
|
console.error(`Error: ${err.message}`);
|
|
123
131
|
process.exit(1);
|
package/src/commands/track.js
CHANGED
|
@@ -9,6 +9,7 @@ const {
|
|
|
9
9
|
upsertProductLatestPrice,
|
|
10
10
|
} = require('../db');
|
|
11
11
|
const { getUserId } = require('../user-context');
|
|
12
|
+
const { maybeEnsureBackgroundOn } = require('../background/service');
|
|
12
13
|
const { normalizeTier, computeNextScrapeAt } = require('../tiering');
|
|
13
14
|
|
|
14
15
|
module.exports = function (program) {
|
|
@@ -74,6 +75,7 @@ module.exports = function (program) {
|
|
|
74
75
|
const nextTier = normalizeTier(product.tier, selectedTier || 'daily');
|
|
75
76
|
const userId = getUserId();
|
|
76
77
|
let subscription = null;
|
|
78
|
+
const background = await maybeEnsureBackgroundOn({ userId });
|
|
77
79
|
try {
|
|
78
80
|
await updateProductById(product.id, {
|
|
79
81
|
last_price: result.price.numeric,
|
|
@@ -108,12 +110,18 @@ module.exports = function (program) {
|
|
|
108
110
|
active: opts.inactive ? false : (product.is_active ?? true),
|
|
109
111
|
userId,
|
|
110
112
|
subscribed: Boolean(subscription),
|
|
113
|
+
background,
|
|
111
114
|
}));
|
|
112
115
|
} else {
|
|
113
116
|
console.log(`Tracking: ${result.title}`);
|
|
114
117
|
console.log(`ASIN: ${result.asin}`);
|
|
115
118
|
console.log(`Price: ${result.priceRaw}`);
|
|
116
119
|
console.log(`Tier: ${nextTier}`);
|
|
120
|
+
if (background.running) {
|
|
121
|
+
console.log(`Background collector: running (${background.pollSeconds || 180}s poll)`);
|
|
122
|
+
} else if (background.attempted && background.error) {
|
|
123
|
+
console.log(`Background collector: setup failed (${background.error})`);
|
|
124
|
+
}
|
|
117
125
|
console.log(`Saved to Supabase.`);
|
|
118
126
|
}
|
|
119
127
|
} catch (err) {
|
|
@@ -106,14 +106,81 @@ function normalizeScraperResult(result, method) {
|
|
|
106
106
|
};
|
|
107
107
|
}
|
|
108
108
|
|
|
109
|
+
function toFiniteNumber(value) {
|
|
110
|
+
const numeric = Number(value);
|
|
111
|
+
return Number.isFinite(numeric) ? numeric : null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function readFloatEnv(name, fallback, { min = null, max = null } = {}) {
|
|
115
|
+
const raw = process.env[name];
|
|
116
|
+
const parsed = Number(raw);
|
|
117
|
+
if (!Number.isFinite(parsed)) return fallback;
|
|
118
|
+
if (min != null && parsed < min) return min;
|
|
119
|
+
if (max != null && parsed > max) return max;
|
|
120
|
+
return parsed;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function isVisionGuardrailEnabled() {
|
|
124
|
+
return process.env.VISION_GUARDRAIL_ENABLED !== '0';
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function getVisionMinConfidence() {
|
|
128
|
+
return readFloatEnv('VISION_GUARDRAIL_MIN_CONFIDENCE', 0.92, { min: 0, max: 1 });
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function getVisionMaxRelativeDelta() {
|
|
132
|
+
return readFloatEnv('VISION_GUARDRAIL_MAX_REL_DELTA', 0.5, { min: 0, max: 10 });
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function evaluateVisionGuardrails(result, {
|
|
136
|
+
baselinePrice = null,
|
|
137
|
+
enabled = isVisionGuardrailEnabled(),
|
|
138
|
+
minConfidence = getVisionMinConfidence(),
|
|
139
|
+
maxRelativeDelta = getVisionMaxRelativeDelta(),
|
|
140
|
+
} = {}) {
|
|
141
|
+
if (!enabled) {
|
|
142
|
+
return { accepted: true, reason: null };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!result || result.method !== 'vision' || !result.price) {
|
|
146
|
+
return { accepted: true, reason: null };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const confidence = toFiniteNumber(result.confidence) || 0;
|
|
150
|
+
if (confidence < minConfidence) {
|
|
151
|
+
return {
|
|
152
|
+
accepted: false,
|
|
153
|
+
reason: `low_confidence:${confidence.toFixed(3)}<${Number(minConfidence).toFixed(3)}`,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const extracted = toFiniteNumber(result.price?.numeric);
|
|
158
|
+
const baseline = toFiniteNumber(baselinePrice);
|
|
159
|
+
if (extracted == null || baseline == null || baseline <= 0) {
|
|
160
|
+
return { accepted: true, reason: null };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const relativeDelta = Math.abs(extracted - baseline) / baseline;
|
|
164
|
+
if (relativeDelta > maxRelativeDelta) {
|
|
165
|
+
return {
|
|
166
|
+
accepted: false,
|
|
167
|
+
reason: `relative_delta:${relativeDelta.toFixed(3)}>${Number(maxRelativeDelta).toFixed(3)}`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return { accepted: true, reason: null };
|
|
172
|
+
}
|
|
173
|
+
|
|
109
174
|
async function runCollectionPipeline({
|
|
110
175
|
url,
|
|
111
176
|
domain = null,
|
|
112
177
|
allowVision = true,
|
|
113
178
|
allowRailwayDomFallback = true,
|
|
179
|
+
baselinePrice = null,
|
|
114
180
|
}) {
|
|
115
181
|
const effectiveDomain = domain || extractDomain(url);
|
|
116
182
|
const fallbackCurrency = fallbackCurrencyForDomain(effectiveDomain);
|
|
183
|
+
let rejectedVisionResult = null;
|
|
117
184
|
|
|
118
185
|
const htmlJsonResult = normalizeScraperResult(
|
|
119
186
|
await runHtmlJsonExtraction(url, { fallbackCurrency }),
|
|
@@ -139,9 +206,30 @@ async function runCollectionPipeline({
|
|
|
139
206
|
finalUrl: shot.finalUrl,
|
|
140
207
|
}, 'vision');
|
|
141
208
|
|
|
142
|
-
if (normalizedVision.
|
|
209
|
+
if (normalizedVision.blockedSignal) {
|
|
143
210
|
return normalizedVision;
|
|
144
211
|
}
|
|
212
|
+
|
|
213
|
+
if (normalizedVision.price) {
|
|
214
|
+
const guardrail = evaluateVisionGuardrails(normalizedVision, { baselinePrice });
|
|
215
|
+
if (guardrail.accepted) {
|
|
216
|
+
return normalizedVision;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
rejectedVisionResult = {
|
|
220
|
+
...normalizedVision,
|
|
221
|
+
status: 'no_price',
|
|
222
|
+
price: null,
|
|
223
|
+
blockedSignal: false,
|
|
224
|
+
blockedReason: null,
|
|
225
|
+
debug: {
|
|
226
|
+
...(normalizedVision.debug || {}),
|
|
227
|
+
guardrail: 'rejected',
|
|
228
|
+
guardrailReason: guardrail.reason,
|
|
229
|
+
baselinePrice: toFiniteNumber(baselinePrice),
|
|
230
|
+
},
|
|
231
|
+
};
|
|
232
|
+
}
|
|
145
233
|
} catch (err) {
|
|
146
234
|
// Continue to DOM fallback.
|
|
147
235
|
}
|
|
@@ -152,7 +240,7 @@ async function runCollectionPipeline({
|
|
|
152
240
|
return normalizeScraperResult(domResult, 'railway_dom');
|
|
153
241
|
}
|
|
154
242
|
|
|
155
|
-
return htmlJsonResult;
|
|
243
|
+
return rejectedVisionResult || htmlJsonResult;
|
|
156
244
|
}
|
|
157
245
|
|
|
158
246
|
module.exports = {
|
|
@@ -163,4 +251,7 @@ module.exports = {
|
|
|
163
251
|
module.exports.__test = {
|
|
164
252
|
fallbackCurrencyForDomain,
|
|
165
253
|
normalizeScraperResult,
|
|
254
|
+
evaluateVisionGuardrails,
|
|
255
|
+
getVisionMinConfidence,
|
|
256
|
+
getVisionMaxRelativeDelta,
|
|
166
257
|
};
|
package/src/extractors/vision.js
CHANGED
|
@@ -185,6 +185,19 @@ function isVisionEnabled() {
|
|
|
185
185
|
return process.env.VISION_FALLBACK_ENABLED === '1';
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
+
function buildVisionPrompt() {
|
|
189
|
+
return [
|
|
190
|
+
'You extract the final payable price from an Amazon product-detail screenshot.',
|
|
191
|
+
'Respond with JSON only using exactly keys: price, currency, confidence, is_blocked, reason, raw_text.',
|
|
192
|
+
'price must be a decimal number (dot separator), or null when uncertain.',
|
|
193
|
+
'Only use the main buy-box product price for the shown product.',
|
|
194
|
+
'Ignore list/strike prices, "from" ranges, installment/monthly values, coupons, shipping, used/new offers, bundle prices, and sponsored/related product prices.',
|
|
195
|
+
'If the page is captcha/challenge/login/cookie-wall and price is not clearly visible, set is_blocked=true and price=null.',
|
|
196
|
+
'If multiple plausible prices exist, set price=null.',
|
|
197
|
+
'confidence must be a number between 0 and 1.',
|
|
198
|
+
].join(' ');
|
|
199
|
+
}
|
|
200
|
+
|
|
188
201
|
async function requestOpenRouter({ apiKey, model, prompt, base64 }) {
|
|
189
202
|
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
|
190
203
|
method: 'POST',
|
|
@@ -291,12 +304,7 @@ async function extractPriceFromScreenshotBuffer(imageBuffer, {
|
|
|
291
304
|
};
|
|
292
305
|
}
|
|
293
306
|
|
|
294
|
-
const prompt =
|
|
295
|
-
'Extract the currently visible final product price from this e-commerce screenshot.',
|
|
296
|
-
'Respond with JSON only and keys: price, currency, confidence, is_blocked, reason, raw_text.',
|
|
297
|
-
'Use decimal number for price (example: 79.99).',
|
|
298
|
-
'If price is not clearly visible, set price=null and confidence<=0.5.',
|
|
299
|
-
].join(' ');
|
|
307
|
+
const prompt = buildVisionPrompt();
|
|
300
308
|
|
|
301
309
|
const transport = selected.name === 'openrouter'
|
|
302
310
|
? await requestOpenRouter({
|
|
@@ -352,6 +360,7 @@ module.exports = {
|
|
|
352
360
|
};
|
|
353
361
|
|
|
354
362
|
module.exports.__test = {
|
|
363
|
+
buildVisionPrompt,
|
|
355
364
|
extractJsonBlock,
|
|
356
365
|
extractOutputText,
|
|
357
366
|
getProvider,
|