amaprice 1.0.11 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/package.json +1 -1
- package/src/extractors/pipeline.js +93 -2
- package/src/extractors/vision.js +15 -6
- package/src/orchestrator/runner.js +1 -0
package/README.md
CHANGED
|
@@ -183,6 +183,9 @@ Environment variables used by the npm package:
|
|
|
183
183
|
| `VISION_PROVIDER` | auto | `src/extractors/vision.js` | Optional force value: `openrouter` or `openai` |
|
|
184
184
|
| `OPENROUTER_HTTP_REFERER` | none | `src/extractors/vision.js` | Optional OpenRouter attribution header |
|
|
185
185
|
| `OPENROUTER_TITLE` | none | `src/extractors/vision.js` | Optional OpenRouter attribution header |
|
|
186
|
+
| `VISION_GUARDRAIL_ENABLED` | `1` | `src/extractors/pipeline.js` | Reject suspicious vision outputs before DB writes |
|
|
187
|
+
| `VISION_GUARDRAIL_MIN_CONFIDENCE` | `0.92` | `src/extractors/pipeline.js` | Minimum confidence required for vision price acceptance |
|
|
188
|
+
| `VISION_GUARDRAIL_MAX_REL_DELTA` | `0.5` | `src/extractors/pipeline.js` | Max relative delta vs last known price before rejecting vision price |
|
|
186
189
|
| `OPENAI_API_KEY` | none | `src/extractors/vision.js` | Legacy fallback if `OPENROUTER_API_KEY` is unset |
|
|
187
190
|
|
|
188
191
|
For production background workers, prefer the Supabase **service role key**.
|
|
@@ -202,6 +205,9 @@ Steps:
|
|
|
202
205
|
- `VISION_FALLBACK_ENABLED=1`
|
|
203
206
|
- `OPENROUTER_API_KEY=<your-openrouter-key>`
|
|
204
207
|
- `VISION_MODEL=google/gemini-3-flash-preview`
|
|
208
|
+
- `VISION_GUARDRAIL_ENABLED=1`
|
|
209
|
+
- `VISION_GUARDRAIL_MIN_CONFIDENCE=0.92`
|
|
210
|
+
- `VISION_GUARDRAIL_MAX_REL_DELTA=0.5`
|
|
205
211
|
4. Ensure builder is Dockerfile (root `Dockerfile`).
|
|
206
212
|
5. Deploy.
|
|
207
213
|
6. Confirm logs show `[worker] processed=...`.
|
package/package.json
CHANGED
|
@@ -106,14 +106,81 @@ function normalizeScraperResult(result, method) {
|
|
|
106
106
|
};
|
|
107
107
|
}
|
|
108
108
|
|
|
109
|
+
function toFiniteNumber(value) {
|
|
110
|
+
const numeric = Number(value);
|
|
111
|
+
return Number.isFinite(numeric) ? numeric : null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function readFloatEnv(name, fallback, { min = null, max = null } = {}) {
|
|
115
|
+
const raw = process.env[name];
|
|
116
|
+
const parsed = Number(raw);
|
|
117
|
+
if (!Number.isFinite(parsed)) return fallback;
|
|
118
|
+
if (min != null && parsed < min) return min;
|
|
119
|
+
if (max != null && parsed > max) return max;
|
|
120
|
+
return parsed;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function isVisionGuardrailEnabled() {
|
|
124
|
+
return process.env.VISION_GUARDRAIL_ENABLED !== '0';
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function getVisionMinConfidence() {
|
|
128
|
+
return readFloatEnv('VISION_GUARDRAIL_MIN_CONFIDENCE', 0.92, { min: 0, max: 1 });
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function getVisionMaxRelativeDelta() {
|
|
132
|
+
return readFloatEnv('VISION_GUARDRAIL_MAX_REL_DELTA', 0.5, { min: 0, max: 10 });
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function evaluateVisionGuardrails(result, {
|
|
136
|
+
baselinePrice = null,
|
|
137
|
+
enabled = isVisionGuardrailEnabled(),
|
|
138
|
+
minConfidence = getVisionMinConfidence(),
|
|
139
|
+
maxRelativeDelta = getVisionMaxRelativeDelta(),
|
|
140
|
+
} = {}) {
|
|
141
|
+
if (!enabled) {
|
|
142
|
+
return { accepted: true, reason: null };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!result || result.method !== 'vision' || !result.price) {
|
|
146
|
+
return { accepted: true, reason: null };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const confidence = toFiniteNumber(result.confidence) || 0;
|
|
150
|
+
if (confidence < minConfidence) {
|
|
151
|
+
return {
|
|
152
|
+
accepted: false,
|
|
153
|
+
reason: `low_confidence:${confidence.toFixed(3)}<${Number(minConfidence).toFixed(3)}`,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const extracted = toFiniteNumber(result.price?.numeric);
|
|
158
|
+
const baseline = toFiniteNumber(baselinePrice);
|
|
159
|
+
if (extracted == null || baseline == null || baseline <= 0) {
|
|
160
|
+
return { accepted: true, reason: null };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const relativeDelta = Math.abs(extracted - baseline) / baseline;
|
|
164
|
+
if (relativeDelta > maxRelativeDelta) {
|
|
165
|
+
return {
|
|
166
|
+
accepted: false,
|
|
167
|
+
reason: `relative_delta:${relativeDelta.toFixed(3)}>${Number(maxRelativeDelta).toFixed(3)}`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return { accepted: true, reason: null };
|
|
172
|
+
}
|
|
173
|
+
|
|
109
174
|
async function runCollectionPipeline({
|
|
110
175
|
url,
|
|
111
176
|
domain = null,
|
|
112
177
|
allowVision = true,
|
|
113
178
|
allowRailwayDomFallback = true,
|
|
179
|
+
baselinePrice = null,
|
|
114
180
|
}) {
|
|
115
181
|
const effectiveDomain = domain || extractDomain(url);
|
|
116
182
|
const fallbackCurrency = fallbackCurrencyForDomain(effectiveDomain);
|
|
183
|
+
let rejectedVisionResult = null;
|
|
117
184
|
|
|
118
185
|
const htmlJsonResult = normalizeScraperResult(
|
|
119
186
|
await runHtmlJsonExtraction(url, { fallbackCurrency }),
|
|
@@ -139,9 +206,30 @@ async function runCollectionPipeline({
|
|
|
139
206
|
finalUrl: shot.finalUrl,
|
|
140
207
|
}, 'vision');
|
|
141
208
|
|
|
142
|
-
if (normalizedVision.
|
|
209
|
+
if (normalizedVision.blockedSignal) {
|
|
143
210
|
return normalizedVision;
|
|
144
211
|
}
|
|
212
|
+
|
|
213
|
+
if (normalizedVision.price) {
|
|
214
|
+
const guardrail = evaluateVisionGuardrails(normalizedVision, { baselinePrice });
|
|
215
|
+
if (guardrail.accepted) {
|
|
216
|
+
return normalizedVision;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
rejectedVisionResult = {
|
|
220
|
+
...normalizedVision,
|
|
221
|
+
status: 'no_price',
|
|
222
|
+
price: null,
|
|
223
|
+
blockedSignal: false,
|
|
224
|
+
blockedReason: null,
|
|
225
|
+
debug: {
|
|
226
|
+
...(normalizedVision.debug || {}),
|
|
227
|
+
guardrail: 'rejected',
|
|
228
|
+
guardrailReason: guardrail.reason,
|
|
229
|
+
baselinePrice: toFiniteNumber(baselinePrice),
|
|
230
|
+
},
|
|
231
|
+
};
|
|
232
|
+
}
|
|
145
233
|
} catch (err) {
|
|
146
234
|
// Continue to DOM fallback.
|
|
147
235
|
}
|
|
@@ -152,7 +240,7 @@ async function runCollectionPipeline({
|
|
|
152
240
|
return normalizeScraperResult(domResult, 'railway_dom');
|
|
153
241
|
}
|
|
154
242
|
|
|
155
|
-
return htmlJsonResult;
|
|
243
|
+
return rejectedVisionResult || htmlJsonResult;
|
|
156
244
|
}
|
|
157
245
|
|
|
158
246
|
module.exports = {
|
|
@@ -163,4 +251,7 @@ module.exports = {
|
|
|
163
251
|
module.exports.__test = {
|
|
164
252
|
fallbackCurrencyForDomain,
|
|
165
253
|
normalizeScraperResult,
|
|
254
|
+
evaluateVisionGuardrails,
|
|
255
|
+
getVisionMinConfidence,
|
|
256
|
+
getVisionMaxRelativeDelta,
|
|
166
257
|
};
|
package/src/extractors/vision.js
CHANGED
|
@@ -185,6 +185,19 @@ function isVisionEnabled() {
|
|
|
185
185
|
return process.env.VISION_FALLBACK_ENABLED === '1';
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
+
function buildVisionPrompt() {
|
|
189
|
+
return [
|
|
190
|
+
'You extract the final payable price from an Amazon product-detail screenshot.',
|
|
191
|
+
'Respond with JSON only using exactly keys: price, currency, confidence, is_blocked, reason, raw_text.',
|
|
192
|
+
'price must be a decimal number (dot separator), or null when uncertain.',
|
|
193
|
+
'Only use the main buy-box product price for the shown product.',
|
|
194
|
+
'Ignore list/strike prices, "from" ranges, installment/monthly values, coupons, shipping, used/new offers, bundle prices, and sponsored/related product prices.',
|
|
195
|
+
'If the page is captcha/challenge/login/cookie-wall and price is not clearly visible, set is_blocked=true and price=null.',
|
|
196
|
+
'If multiple plausible prices exist, set price=null.',
|
|
197
|
+
'confidence must be a number between 0 and 1.',
|
|
198
|
+
].join(' ');
|
|
199
|
+
}
|
|
200
|
+
|
|
188
201
|
async function requestOpenRouter({ apiKey, model, prompt, base64 }) {
|
|
189
202
|
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
|
190
203
|
method: 'POST',
|
|
@@ -291,12 +304,7 @@ async function extractPriceFromScreenshotBuffer(imageBuffer, {
|
|
|
291
304
|
};
|
|
292
305
|
}
|
|
293
306
|
|
|
294
|
-
const prompt =
|
|
295
|
-
'Extract the currently visible final product price from this e-commerce screenshot.',
|
|
296
|
-
'Respond with JSON only and keys: price, currency, confidence, is_blocked, reason, raw_text.',
|
|
297
|
-
'Use decimal number for price (example: 79.99).',
|
|
298
|
-
'If price is not clearly visible, set price=null and confidence<=0.5.',
|
|
299
|
-
].join(' ');
|
|
307
|
+
const prompt = buildVisionPrompt();
|
|
300
308
|
|
|
301
309
|
const transport = selected.name === 'openrouter'
|
|
302
310
|
? await requestOpenRouter({
|
|
@@ -352,6 +360,7 @@ module.exports = {
|
|
|
352
360
|
};
|
|
353
361
|
|
|
354
362
|
module.exports.__test = {
|
|
363
|
+
buildVisionPrompt,
|
|
355
364
|
extractJsonBlock,
|
|
356
365
|
extractOutputText,
|
|
357
366
|
getProvider,
|