@factorypure/client-helpers 1.1.7 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +382 -0
- package/dist/index.js +1225 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3,6 +3,173 @@ import { startOfDay, subDays } from 'date-fns';
|
|
|
3
3
|
import { Index } from 'flexsearch';
|
|
4
4
|
import EnglishPreset from 'flexsearch/lang/en';
|
|
5
5
|
import { z } from 'zod';
|
|
6
|
+
/**
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
* @module @factorypure/client-helpers
|
|
9
|
+
*
|
|
10
|
+
* # Filter System v2
|
|
11
|
+
*
|
|
12
|
+
* This package provides an extensible, rule-based filtering system for scrape results.
|
|
13
|
+
*
|
|
14
|
+
* ## Key Concepts
|
|
15
|
+
*
|
|
16
|
+
* ### Filter Severity
|
|
17
|
+
* - **BLOCK**: Always hides the result, cannot be overridden
|
|
18
|
+
* - **WARNING**: Shows result with warning indicator, can be overridden by override rules
|
|
19
|
+
* - **INFO**: Metadata only, doesn't affect visibility
|
|
20
|
+
*
|
|
21
|
+
* ### Visibility States
|
|
22
|
+
* - **HIDDEN**: Result is filtered out
|
|
23
|
+
* - **VISIBLE_WITH_WARNINGS**: Result is shown but has warning indicators
|
|
24
|
+
* - **VISIBLE**: Result is shown without issues
|
|
25
|
+
*
|
|
26
|
+
* ### Filter Rules
|
|
27
|
+
* Rules implement the FilterRule interface and are evaluated against results.
|
|
28
|
+
* Each rule can:
|
|
29
|
+
* - Have a severity level (BLOCK, WARNING, INFO)
|
|
30
|
+
* - Have a priority (higher priority rules evaluated first)
|
|
31
|
+
* - Be enabled/disabled
|
|
32
|
+
* - Specify which override rules can override it
|
|
33
|
+
*
|
|
34
|
+
* ### Override Rules
|
|
35
|
+
* Special INFO-level rules that can override WARNING-level rules.
|
|
36
|
+
* Examples: SKU match, calculated SKU match, alternate SKU match
|
|
37
|
+
*
|
|
38
|
+
* ## Quick Start
|
|
39
|
+
*
|
|
40
|
+
* ### Using the Default Engine
|
|
41
|
+
* ```typescript
|
|
42
|
+
* import { filterScrapeResultsV2, createDefaultFilterEngine } from '@factorypure/client-helpers'
|
|
43
|
+
*
|
|
44
|
+
* const filteredResults = filterScrapeResultsV2({
|
|
45
|
+
* scrapeResults,
|
|
46
|
+
* variant,
|
|
47
|
+
* variantScrapeOptions,
|
|
48
|
+
* vendorScrapeOptions,
|
|
49
|
+
* globalScrapeOptions,
|
|
50
|
+
* })
|
|
51
|
+
* ```
|
|
52
|
+
*
|
|
53
|
+
* ### Custom Configuration
|
|
54
|
+
* ```typescript
|
|
55
|
+
* const filteredResults = filterScrapeResultsV2({
|
|
56
|
+
* scrapeResults,
|
|
57
|
+
* variant,
|
|
58
|
+
* variantScrapeOptions,
|
|
59
|
+
* vendorScrapeOptions,
|
|
60
|
+
* globalScrapeOptions,
|
|
61
|
+
* filterConfig: {
|
|
62
|
+
* rules: [
|
|
63
|
+
* { id: 'high_price_outlier', enabled: false },
|
|
64
|
+
* { id: 'competitor_exclusion', severity: FilterSeverity.BLOCK },
|
|
65
|
+
* { id: 'refurbished_used', priority: 100 },
|
|
66
|
+
* ]
|
|
67
|
+
* }
|
|
68
|
+
* })
|
|
69
|
+
* ```
|
|
70
|
+
*
|
|
71
|
+
* ### Adding Custom Rules
|
|
72
|
+
* ```typescript
|
|
73
|
+
* import { createCustomFilterRule, FilterSeverity } from '@factorypure/client-helpers'
|
|
74
|
+
*
|
|
75
|
+
* const myCustomRule = createCustomFilterRule({
|
|
76
|
+
* id: 'my_custom_rule',
|
|
77
|
+
* name: 'My Custom Rule',
|
|
78
|
+
* description: 'Filters results based on custom logic',
|
|
79
|
+
* severity: FilterSeverity.WARNING,
|
|
80
|
+
* priority: 50,
|
|
81
|
+
* canBeOverridden: true,
|
|
82
|
+
* overridableBy: ['sku_match'],
|
|
83
|
+
* evaluate: (context) => {
|
|
84
|
+
* if (context.result.title.includes('bad-keyword')) {
|
|
85
|
+
* return {
|
|
86
|
+
* ruleId: 'my_custom_rule',
|
|
87
|
+
* severity: FilterSeverity.WARNING,
|
|
88
|
+
* message: 'Contains bad keyword',
|
|
89
|
+
* metadata: { keyword: 'bad-keyword' },
|
|
90
|
+
* timestamp: new Date().toISOString(),
|
|
91
|
+
* }
|
|
92
|
+
* }
|
|
93
|
+
* return null
|
|
94
|
+
* }
|
|
95
|
+
* })
|
|
96
|
+
*
|
|
97
|
+
* const filteredResults = filterScrapeResultsV2({
|
|
98
|
+
* scrapeResults,
|
|
99
|
+
* variant,
|
|
100
|
+
* variantScrapeOptions,
|
|
101
|
+
* vendorScrapeOptions,
|
|
102
|
+
* globalScrapeOptions,
|
|
103
|
+
* customRules: [myCustomRule],
|
|
104
|
+
* })
|
|
105
|
+
* ```
|
|
106
|
+
*
|
|
107
|
+
* ### Using the Builder Pattern
|
|
108
|
+
* ```typescript
|
|
109
|
+
* import { FilterRuleBuilder, FilterSeverity } from '@factorypure/client-helpers'
|
|
110
|
+
*
|
|
111
|
+
* const myRule = new FilterRuleBuilder()
|
|
112
|
+
* .id('price_above_threshold')
|
|
113
|
+
* .name('Price Above Threshold')
|
|
114
|
+
* .description('Filters results above a price threshold')
|
|
115
|
+
* .severity(FilterSeverity.WARNING)
|
|
116
|
+
* .priority(80)
|
|
117
|
+
* .canBeOverridden(true)
|
|
118
|
+
* .overridableBy(['sku_match'])
|
|
119
|
+
* .evaluate((context) => {
|
|
120
|
+
* if (context.result.extracted_price > 10000) {
|
|
121
|
+
* return {
|
|
122
|
+
* ruleId: 'price_above_threshold',
|
|
123
|
+
* severity: FilterSeverity.WARNING,
|
|
124
|
+
* message: 'Price exceeds $10,000',
|
|
125
|
+
* metadata: { price: context.result.extracted_price },
|
|
126
|
+
* timestamp: new Date().toISOString(),
|
|
127
|
+
* }
|
|
128
|
+
* }
|
|
129
|
+
* return null
|
|
130
|
+
* })
|
|
131
|
+
* .build()
|
|
132
|
+
* ```
|
|
133
|
+
*
|
|
134
|
+
* ## Built-in Rules
|
|
135
|
+
*
|
|
136
|
+
* ### Block Rules (Cannot be overridden)
|
|
137
|
+
* - `high_price_outlier`: Filters results >15% more expensive than variant
|
|
138
|
+
* - `low_price_outlier`: Filters results significantly cheaper than variant
|
|
139
|
+
* - `date_outlier`: Filters results outside date window
|
|
140
|
+
* - `duplicate`: Filters duplicate results
|
|
141
|
+
* - `scam_source_exclusion`: Filters known scam sources
|
|
142
|
+
* - `manually_ignored`: Filters manually ignored/excluded results
|
|
143
|
+
* - `out_of_stock`: Filters out of stock items
|
|
144
|
+
*
|
|
145
|
+
* ### Warning Rules (Can be overridden)
|
|
146
|
+
* - `competitor_exclusion`: Filters excluded competitors
|
|
147
|
+
* - `search_exclusion`: Filters results not matching search criteria
|
|
148
|
+
* - `skip_sku`: Filters results with SKUs from skip list
|
|
149
|
+
* - `vendor_exclusion`: Filters results with excluded vendor names
|
|
150
|
+
* - `brand_mismatch`: Filters results with mismatched brands
|
|
151
|
+
* - `calculated_sku_mismatch`: Filters results with mismatched calculated SKUs
|
|
152
|
+
* - `critical_spec_mismatch`: Filters results with mismatched critical specs
|
|
153
|
+
* - `refurbished_used`: Filters refurbished/used items
|
|
154
|
+
*
|
|
155
|
+
* ### Override Rules
|
|
156
|
+
* - `sku_match`: SKU found in title
|
|
157
|
+
* - `calculated_sku_match`: Calculated SKU matches variant SKU
|
|
158
|
+
* - `alt_sku_match`: Alternate SKU matches
|
|
159
|
+
* - `product_id_match`: Product ID is linked
|
|
160
|
+
*
|
|
161
|
+
* ## Migration from Legacy System
|
|
162
|
+
*
|
|
163
|
+
* The legacy `filterScrapeResults` function is still available for backward compatibility.
|
|
164
|
+
* Results include both old fields (`hide_reasons`, `hide_override_reasons`) and new fields
|
|
165
|
+
* (`filter_results`, `visibility_state`) during the transition period.
|
|
166
|
+
*
|
|
167
|
+
* To migrate:
|
|
168
|
+
* 1. Replace `filterScrapeResults` with `filterScrapeResultsV2`
|
|
169
|
+
* 2. Update code to use `visibility_state` instead of `ignore_result`
|
|
170
|
+
* 3. Use `filter_results` for detailed filter information instead of string arrays
|
|
171
|
+
* 4. Configure rules as needed using `filterConfig` parameter
|
|
172
|
+
*/
|
|
6
173
|
export const regexUnitResultSchema = z.object({
|
|
7
174
|
value: z.string(),
|
|
8
175
|
source: z.string(),
|
|
@@ -22,6 +189,40 @@ export const regexUnitResultsSchema = z.object({
|
|
|
22
189
|
mile: z.array(regexUnitResultSchema),
|
|
23
190
|
ah: z.array(regexUnitResultSchema),
|
|
24
191
|
});
|
|
192
|
+
// ===============================
|
|
193
|
+
// New Filter System - Core Types (Must come before schemas that use them)
|
|
194
|
+
// ===============================
|
|
195
|
+
/**
|
|
196
|
+
* Severity levels for filter rules
|
|
197
|
+
* - BLOCK: Always hides the result, cannot be overridden
|
|
198
|
+
* - WARNING: Shows result with warning indicator, can be overridden
|
|
199
|
+
* - INFO: Metadata only, doesn't affect visibility
|
|
200
|
+
*/
|
|
201
|
+
export var FilterSeverity;
|
|
202
|
+
(function (FilterSeverity) {
|
|
203
|
+
FilterSeverity["BLOCK"] = "BLOCK";
|
|
204
|
+
FilterSeverity["WARNING"] = "WARNING";
|
|
205
|
+
FilterSeverity["INFO"] = "INFO";
|
|
206
|
+
})(FilterSeverity || (FilterSeverity = {}));
|
|
207
|
+
/**
|
|
208
|
+
* Visibility states for results after filter evaluation
|
|
209
|
+
*/
|
|
210
|
+
export var VisibilityState;
|
|
211
|
+
(function (VisibilityState) {
|
|
212
|
+
VisibilityState["HIDDEN"] = "HIDDEN";
|
|
213
|
+
VisibilityState["VISIBLE_WITH_WARNINGS"] = "VISIBLE_WITH_WARNINGS";
|
|
214
|
+
VisibilityState["VISIBLE"] = "VISIBLE";
|
|
215
|
+
})(VisibilityState || (VisibilityState = {}));
|
|
216
|
+
/**
|
|
217
|
+
* Structured result from a filter rule evaluation
|
|
218
|
+
*/
|
|
219
|
+
export const filterResultSchema = z.object({
|
|
220
|
+
ruleId: z.string(),
|
|
221
|
+
severity: z.nativeEnum(FilterSeverity),
|
|
222
|
+
message: z.string(),
|
|
223
|
+
metadata: z.record(z.string(), z.any()).optional(),
|
|
224
|
+
timestamp: z.string(),
|
|
225
|
+
});
|
|
25
226
|
export const scrapeResultsSchema = z.object({
|
|
26
227
|
id: z.number(),
|
|
27
228
|
scrape_id: z.number(),
|
|
@@ -57,6 +258,9 @@ export const scrapeResultsSchema = z.object({
|
|
|
57
258
|
ignore_reasons: z.array(z.string()),
|
|
58
259
|
hide_reasons: z.array(z.string()),
|
|
59
260
|
hide_override_reasons: z.array(z.string()),
|
|
261
|
+
// New filter system fields
|
|
262
|
+
filter_results: z.array(filterResultSchema).optional(),
|
|
263
|
+
visibility_state: z.nativeEnum(VisibilityState).optional(),
|
|
60
264
|
regexUnitResults: z.nullable(regexUnitResultsSchema),
|
|
61
265
|
// add these properties as null to make ts play nice with the ImmersiveScrapeResultsType union
|
|
62
266
|
google_product_link: z.null().optional(),
|
|
@@ -121,6 +325,9 @@ export const immersiveScrapeResultsSchema = z.object({
|
|
|
121
325
|
ignore_reasons: z.array(z.string()),
|
|
122
326
|
hide_reasons: z.array(z.string()),
|
|
123
327
|
hide_override_reasons: z.array(z.string()),
|
|
328
|
+
// New filter system fields
|
|
329
|
+
filter_results: z.array(filterResultSchema).optional(),
|
|
330
|
+
visibility_state: z.nativeEnum(VisibilityState).optional(),
|
|
124
331
|
brand: z.string().nullable(),
|
|
125
332
|
company_id: z.number().nullable(),
|
|
126
333
|
regexUnitResults: z.nullable(regexUnitResultsSchema),
|
|
@@ -163,6 +370,21 @@ export const globalScrapeOptionsSchema = z.object({
|
|
|
163
370
|
})),
|
|
164
371
|
scam_sources: z.array(z.string()),
|
|
165
372
|
});
|
|
373
|
+
/**
|
|
374
|
+
* Overall filter configuration
|
|
375
|
+
*/
|
|
376
|
+
export const filterConfigurationSchema = z.object({
|
|
377
|
+
rules: z
|
|
378
|
+
.array(z.object({
|
|
379
|
+
id: z.string(),
|
|
380
|
+
enabled: z.boolean().optional(),
|
|
381
|
+
severity: z.nativeEnum(FilterSeverity).optional(),
|
|
382
|
+
priority: z.number().optional(),
|
|
383
|
+
parameters: z.record(z.string(), z.any()).optional(),
|
|
384
|
+
}))
|
|
385
|
+
.optional(),
|
|
386
|
+
globalParameters: z.record(z.string(), z.any()).optional(),
|
|
387
|
+
});
|
|
166
388
|
export const HIDE_REASONS = {
|
|
167
389
|
IGNORED: 'Ignored',
|
|
168
390
|
HIGH_PRICE_OUTLIER: 'Too Expensive',
|
|
@@ -187,6 +409,7 @@ export const HIDE_OVERRIDE_REASONS = {
|
|
|
187
409
|
ALT_SKU_MATCH: 'Alt SKU Match',
|
|
188
410
|
CALCULATED_SKU_MATCH: 'Calculated SKU Match',
|
|
189
411
|
CALCULATED_SKU_PARTIAL_MATCH: 'Calculated SKU Partial Match',
|
|
412
|
+
CALCULATED_ALT_SKU_MATCH: 'Calc. Alt SKU Match',
|
|
190
413
|
};
|
|
191
414
|
const HIDE_ALWAYS_MAP = {
|
|
192
415
|
[HIDE_REASONS.HIGH_PRICE_OUTLIER]: true,
|
|
@@ -208,6 +431,1004 @@ const HIDE_ALWAYS_MAP = {
|
|
|
208
431
|
export const TOO_CHEAP_MULTIPLIER = 0.75;
|
|
209
432
|
export const TOO_EXPENSIVE_MULTIPLIER = 1.15;
|
|
210
433
|
// const wattages = Array.from({ length: 41 }, (_, i) => (5000 + i * 500).toString())
|
|
434
|
+
// ===============================
|
|
435
|
+
// Filter Rule Registry
|
|
436
|
+
// ===============================
|
|
437
|
+
/**
|
|
438
|
+
* Registry for filter rules with configuration management
|
|
439
|
+
*/
|
|
440
|
+
export class FilterRuleRegistry {
|
|
441
|
+
rules = new Map();
|
|
442
|
+
configurations = new Map();
|
|
443
|
+
/**
|
|
444
|
+
* Register a new filter rule
|
|
445
|
+
*/
|
|
446
|
+
registerRule(rule) {
|
|
447
|
+
this.rules.set(rule.id, rule);
|
|
448
|
+
}
|
|
449
|
+
/**
|
|
450
|
+
* Get a rule by ID
|
|
451
|
+
*/
|
|
452
|
+
getRule(id) {
|
|
453
|
+
return this.rules.get(id);
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Get all registered rules
|
|
457
|
+
*/
|
|
458
|
+
getAllRules() {
|
|
459
|
+
return Array.from(this.rules.values());
|
|
460
|
+
}
|
|
461
|
+
/**
|
|
462
|
+
* Get all enabled rules sorted by priority
|
|
463
|
+
*/
|
|
464
|
+
getEnabledRules() {
|
|
465
|
+
return Array.from(this.rules.values())
|
|
466
|
+
.filter((rule) => {
|
|
467
|
+
const config = this.configurations.get(rule.id);
|
|
468
|
+
return config?.enabled !== false && rule.enabled !== false;
|
|
469
|
+
})
|
|
470
|
+
.sort((a, b) => b.priority - a.priority);
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Apply configuration to rules
|
|
474
|
+
*/
|
|
475
|
+
applyConfiguration(config) {
|
|
476
|
+
config.rules?.forEach((ruleConfig) => {
|
|
477
|
+
this.configurations.set(ruleConfig.id, ruleConfig);
|
|
478
|
+
});
|
|
479
|
+
}
|
|
480
|
+
/**
|
|
481
|
+
* Get effective configuration for a rule
|
|
482
|
+
*/
|
|
483
|
+
getRuleConfig(ruleId) {
|
|
484
|
+
return this.configurations.get(ruleId);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
// ===============================
|
|
488
|
+
// Filter Engine
|
|
489
|
+
// ===============================
|
|
490
|
+
/**
|
|
491
|
+
* Engine for evaluating filter rules and determining visibility
|
|
492
|
+
*/
|
|
493
|
+
export class FilterEngine {
|
|
494
|
+
registry;
|
|
495
|
+
constructor(registry) {
|
|
496
|
+
this.registry = registry;
|
|
497
|
+
}
|
|
498
|
+
/**
|
|
499
|
+
* Evaluate all rules for a single result
|
|
500
|
+
*/
|
|
501
|
+
evaluateResult(context) {
|
|
502
|
+
const results = [];
|
|
503
|
+
const rules = this.registry.getEnabledRules();
|
|
504
|
+
for (const rule of rules) {
|
|
505
|
+
try {
|
|
506
|
+
const result = rule.evaluate(context);
|
|
507
|
+
if (result) {
|
|
508
|
+
results.push(result);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
catch (error) {
|
|
512
|
+
console.error(`Error evaluating rule ${rule.id}:`, error);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
return results;
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Evaluate rules for a batch of results
|
|
519
|
+
*/
|
|
520
|
+
evaluateBatch(results, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions) {
|
|
521
|
+
return results.map((result) => {
|
|
522
|
+
const context = {
|
|
523
|
+
result,
|
|
524
|
+
variant,
|
|
525
|
+
variantScrapeOptions,
|
|
526
|
+
vendorScrapeOptions,
|
|
527
|
+
globalScrapeOptions,
|
|
528
|
+
};
|
|
529
|
+
const filterResults = this.evaluateResult(context);
|
|
530
|
+
const visibilityState = this.calculateVisibility(filterResults);
|
|
531
|
+
// Populate new fields
|
|
532
|
+
result.filter_results = filterResults;
|
|
533
|
+
result.visibility_state = visibilityState;
|
|
534
|
+
// Maintain backward compatibility with old fields
|
|
535
|
+
result.hide_reasons = filterResults
|
|
536
|
+
.filter((fr) => fr.severity === FilterSeverity.BLOCK || fr.severity === FilterSeverity.WARNING)
|
|
537
|
+
.map((fr) => fr.message);
|
|
538
|
+
result.hide_override_reasons = filterResults.filter((fr) => fr.metadata?.isOverride).map((fr) => fr.message);
|
|
539
|
+
result.ignore_result = visibilityState === VisibilityState.HIDDEN;
|
|
540
|
+
return result;
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
/**
|
|
544
|
+
* Calculate visibility state based on filter results
|
|
545
|
+
*/
|
|
546
|
+
calculateVisibility(filterResults) {
|
|
547
|
+
if (filterResults.length === 0) {
|
|
548
|
+
return VisibilityState.VISIBLE;
|
|
549
|
+
}
|
|
550
|
+
// Check for override results
|
|
551
|
+
const overrideResults = filterResults.filter((fr) => fr.metadata?.isOverride);
|
|
552
|
+
const blockResults = filterResults.filter((fr) => fr.severity === FilterSeverity.BLOCK);
|
|
553
|
+
const warningResults = filterResults.filter((fr) => fr.severity === FilterSeverity.WARNING);
|
|
554
|
+
// Block results that cannot be overridden
|
|
555
|
+
const nonOverridableBlocks = blockResults.filter((br) => {
|
|
556
|
+
const rule = this.registry.getRule(br.ruleId);
|
|
557
|
+
if (!rule || !rule.canBeOverridden)
|
|
558
|
+
return true;
|
|
559
|
+
// Check if any override can override this block
|
|
560
|
+
const canBeOverridden = overrideResults.some((or) => {
|
|
561
|
+
const overrideRule = this.registry.getRule(or.ruleId);
|
|
562
|
+
return overrideRule && rule.overridableBy.includes(overrideRule.id);
|
|
563
|
+
});
|
|
564
|
+
return !canBeOverridden;
|
|
565
|
+
});
|
|
566
|
+
if (nonOverridableBlocks.length > 0) {
|
|
567
|
+
return VisibilityState.HIDDEN;
|
|
568
|
+
}
|
|
569
|
+
// Check for overridable blocks
|
|
570
|
+
const overridableBlocks = blockResults.filter((br) => {
|
|
571
|
+
const rule = this.registry.getRule(br.ruleId);
|
|
572
|
+
if (!rule || !rule.canBeOverridden)
|
|
573
|
+
return false;
|
|
574
|
+
const canBeOverridden = overrideResults.some((or) => {
|
|
575
|
+
const overrideRule = this.registry.getRule(or.ruleId);
|
|
576
|
+
return overrideRule && rule.overridableBy.includes(overrideRule.id);
|
|
577
|
+
});
|
|
578
|
+
return !canBeOverridden; // Still blocked if no override present
|
|
579
|
+
});
|
|
580
|
+
if (overridableBlocks.length > 0) {
|
|
581
|
+
return VisibilityState.HIDDEN;
|
|
582
|
+
}
|
|
583
|
+
// If we have warnings, show with warnings
|
|
584
|
+
if (warningResults.length > 0) {
|
|
585
|
+
return VisibilityState.VISIBLE_WITH_WARNINGS;
|
|
586
|
+
}
|
|
587
|
+
return VisibilityState.VISIBLE;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
// ===============================
|
|
591
|
+
// Built-in Filter Rules
|
|
592
|
+
// ===============================
|
|
593
|
+
/**
|
|
594
|
+
* Rule for filtering high price outliers
|
|
595
|
+
*/
|
|
596
|
+
class HighPriceOutlierRule {
|
|
597
|
+
id = 'high_price_outlier';
|
|
598
|
+
name = 'High Price Outlier';
|
|
599
|
+
description = 'Filters results that are significantly more expensive than the variant price';
|
|
600
|
+
severity = FilterSeverity.BLOCK;
|
|
601
|
+
priority = 100;
|
|
602
|
+
enabled = true;
|
|
603
|
+
canBeOverridden = false;
|
|
604
|
+
overridableBy = [];
|
|
605
|
+
evaluate(context) {
|
|
606
|
+
const multiplier = TOO_EXPENSIVE_MULTIPLIER;
|
|
607
|
+
const isMoreExpensive = context.result.extracted_price > context.variant.price * multiplier;
|
|
608
|
+
if (isMoreExpensive) {
|
|
609
|
+
return {
|
|
610
|
+
ruleId: this.id,
|
|
611
|
+
severity: this.severity,
|
|
612
|
+
message: HIDE_REASONS.HIGH_PRICE_OUTLIER,
|
|
613
|
+
metadata: {
|
|
614
|
+
variantPrice: context.variant.price,
|
|
615
|
+
resultPrice: context.result.extracted_price,
|
|
616
|
+
threshold: multiplier,
|
|
617
|
+
},
|
|
618
|
+
timestamp: new Date().toISOString(),
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
return null;
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Rule for filtering low price outliers
|
|
626
|
+
*/
|
|
627
|
+
class LowPriceOutlierRule {
|
|
628
|
+
id = 'low_price_outlier';
|
|
629
|
+
name = 'Low Price Outlier';
|
|
630
|
+
description = 'Filters results that are significantly cheaper than the variant price';
|
|
631
|
+
severity = FilterSeverity.BLOCK;
|
|
632
|
+
priority = 100;
|
|
633
|
+
enabled = true;
|
|
634
|
+
canBeOverridden = false;
|
|
635
|
+
overridableBy = [];
|
|
636
|
+
evaluate(context) {
|
|
637
|
+
const multiplier = getUndercutThreshold(context.variant.price, context.globalScrapeOptions.undercut_threshold_ranges || []);
|
|
638
|
+
const isTooCheap = context.result.extracted_price < context.variant.price * multiplier;
|
|
639
|
+
if (isTooCheap) {
|
|
640
|
+
return {
|
|
641
|
+
ruleId: this.id,
|
|
642
|
+
severity: this.severity,
|
|
643
|
+
message: HIDE_REASONS.LOW_PRICE_OUTLIER,
|
|
644
|
+
metadata: {
|
|
645
|
+
variantPrice: context.variant.price,
|
|
646
|
+
resultPrice: context.result.extracted_price,
|
|
647
|
+
threshold: multiplier,
|
|
648
|
+
},
|
|
649
|
+
timestamp: new Date().toISOString(),
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
return null;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* Rule for filtering outdated listings
|
|
657
|
+
*/
|
|
658
|
+
class DateOutlierRule {
|
|
659
|
+
id = 'date_outlier';
|
|
660
|
+
name = 'Date Outlier';
|
|
661
|
+
description = 'Filters results outside the specified date window';
|
|
662
|
+
severity = FilterSeverity.BLOCK;
|
|
663
|
+
priority = 90;
|
|
664
|
+
enabled = true;
|
|
665
|
+
canBeOverridden = false;
|
|
666
|
+
overridableBy = [];
|
|
667
|
+
evaluate(context) {
|
|
668
|
+
const itemDate = new Date(context.result.created_at);
|
|
669
|
+
const dayWindow = Number(context.variantScrapeOptions.day_window) || 3;
|
|
670
|
+
const variantDate = startOfDay(subDays(new Date(), dayWindow));
|
|
671
|
+
if (itemDate < variantDate) {
|
|
672
|
+
return {
|
|
673
|
+
ruleId: this.id,
|
|
674
|
+
severity: this.severity,
|
|
675
|
+
message: HIDE_REASONS.DATE_OUTLIER,
|
|
676
|
+
metadata: {
|
|
677
|
+
itemDate: itemDate.toISOString(),
|
|
678
|
+
threshold: variantDate.toISOString(),
|
|
679
|
+
dayWindow,
|
|
680
|
+
},
|
|
681
|
+
timestamp: new Date().toISOString(),
|
|
682
|
+
};
|
|
683
|
+
}
|
|
684
|
+
return null;
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
/**
|
|
688
|
+
* Rule for filtering competitor exclusions
|
|
689
|
+
*/
|
|
690
|
+
class CompetitorExclusionRule {
|
|
691
|
+
id = 'competitor_exclusion';
|
|
692
|
+
name = 'Competitor Exclusion';
|
|
693
|
+
description = 'Filters results from excluded competitor sources';
|
|
694
|
+
severity = FilterSeverity.WARNING;
|
|
695
|
+
priority = 80;
|
|
696
|
+
enabled = true;
|
|
697
|
+
canBeOverridden = true;
|
|
698
|
+
overridableBy = ['sku_match', 'calculated_sku_match', 'alt_sku_match'];
|
|
699
|
+
evaluate(context) {
|
|
700
|
+
const lowerSource = context.result.source.toLowerCase();
|
|
701
|
+
const isExcluded = context.variantScrapeOptions.competitor_exclusions.some((exclusion) => exclusion && lowerSource === exclusion.toLowerCase());
|
|
702
|
+
if (isExcluded) {
|
|
703
|
+
return {
|
|
704
|
+
ruleId: this.id,
|
|
705
|
+
severity: this.severity,
|
|
706
|
+
message: HIDE_REASONS.COMPETITOR_EXCLUSION,
|
|
707
|
+
metadata: { source: context.result.source },
|
|
708
|
+
timestamp: new Date().toISOString(),
|
|
709
|
+
};
|
|
710
|
+
}
|
|
711
|
+
return null;
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
/**
|
|
715
|
+
* Rule for filtering duplicates
|
|
716
|
+
*/
|
|
717
|
+
class DuplicateRule {
|
|
718
|
+
id = 'duplicate';
|
|
719
|
+
name = 'Duplicate';
|
|
720
|
+
description = 'Marks duplicate results based on source, title, and price';
|
|
721
|
+
severity = FilterSeverity.BLOCK;
|
|
722
|
+
priority = 70;
|
|
723
|
+
enabled = true;
|
|
724
|
+
canBeOverridden = false;
|
|
725
|
+
overridableBy = [];
|
|
726
|
+
// Note: This rule requires batch context, so it's handled specially in batch evaluation
|
|
727
|
+
evaluate(context) {
|
|
728
|
+
// This is a placeholder - actual duplicate detection happens in batch processing
|
|
729
|
+
return null;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
/**
|
|
733
|
+
* Rule for search exclusions
|
|
734
|
+
*/
|
|
735
|
+
class SearchExclusionRule {
|
|
736
|
+
id = 'search_exclusion';
|
|
737
|
+
name = 'Search Exclusion';
|
|
738
|
+
description = 'Filters results that do not match search criteria';
|
|
739
|
+
severity = FilterSeverity.WARNING;
|
|
740
|
+
priority = 60;
|
|
741
|
+
enabled = true;
|
|
742
|
+
canBeOverridden = true;
|
|
743
|
+
overridableBy = ['sku_match', 'calculated_sku_match'];
|
|
744
|
+
evaluate(context) {
|
|
745
|
+
// This is handled in batch processing due to FlexSearch requirement
|
|
746
|
+
return null;
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
/**
|
|
750
|
+
* Rule for SKU skip filtering
|
|
751
|
+
*/
|
|
752
|
+
class SkipSkuRule {
|
|
753
|
+
id = 'skip_sku';
|
|
754
|
+
name = 'Skip SKU';
|
|
755
|
+
description = 'Filters results containing SKUs from the skip list';
|
|
756
|
+
severity = FilterSeverity.WARNING;
|
|
757
|
+
priority = 50;
|
|
758
|
+
enabled = true;
|
|
759
|
+
canBeOverridden = true;
|
|
760
|
+
overridableBy = ['sku_match', 'calculated_sku_match'];
|
|
761
|
+
evaluate(context) {
|
|
762
|
+
const nots = [];
|
|
763
|
+
const formatted = context.vendorScrapeOptions.default_skip_skus
|
|
764
|
+
.filter((s) => s.toLowerCase() !== context.variant.sku.toLowerCase())
|
|
765
|
+
.map((s) => `${s}`);
|
|
766
|
+
nots.push(...formatted);
|
|
767
|
+
nots.push(...context.vendorScrapeOptions.vendor_skip_skus.filter((s) => s.toLowerCase() !== context.variant.sku.toLowerCase()));
|
|
768
|
+
const hasExclusion = nots.some((sku) => {
|
|
769
|
+
const escapedSku = sku.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
|
|
770
|
+
const skuRegex = new RegExp(`(?: |\/|\&|\=|"|'|\`)${escapedSku}(?: |\/|\&|\=|"|'|\`)`, 'gi');
|
|
771
|
+
const skuMatches = context.result.title?.match(skuRegex);
|
|
772
|
+
return skuMatches && skuMatches.length > 0;
|
|
773
|
+
});
|
|
774
|
+
if (hasExclusion) {
|
|
775
|
+
return {
|
|
776
|
+
ruleId: this.id,
|
|
777
|
+
severity: this.severity,
|
|
778
|
+
message: HIDE_REASONS.SKIP_SKU,
|
|
779
|
+
metadata: { title: context.result.title },
|
|
780
|
+
timestamp: new Date().toISOString(),
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
return null;
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
/**
|
|
787
|
+
* Rule for vendor exclusions
|
|
788
|
+
*/
|
|
789
|
+
class VendorExclusionRule {
|
|
790
|
+
id = 'vendor_exclusion';
|
|
791
|
+
name = 'Vendor Exclusion';
|
|
792
|
+
description = 'Filters results containing excluded vendor names';
|
|
793
|
+
severity = FilterSeverity.WARNING;
|
|
794
|
+
priority = 50;
|
|
795
|
+
enabled = true;
|
|
796
|
+
canBeOverridden = true;
|
|
797
|
+
overridableBy = ['sku_match', 'calculated_sku_match'];
|
|
798
|
+
evaluate(context) {
|
|
799
|
+
const nots = [];
|
|
800
|
+
const formatted = context.vendorScrapeOptions.default_skip_vendors
|
|
801
|
+
.filter((s) => s.toLowerCase() !== context.variant.vendor.toLowerCase())
|
|
802
|
+
.map((s) => `${s}`);
|
|
803
|
+
nots.push(...formatted);
|
|
804
|
+
const hasExclusion = nots.some((vendor) => {
|
|
805
|
+
const escaped = vendor.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
|
|
806
|
+
const matchRegex = new RegExp(`(?: |\/|\&|\=|"|'|\`)${escaped}(?: |\/|\&|\=|"|'|\`)`, 'gi');
|
|
807
|
+
const matches = context.result.title?.match(matchRegex);
|
|
808
|
+
return matches && matches.length > 0;
|
|
809
|
+
});
|
|
810
|
+
if (hasExclusion) {
|
|
811
|
+
return {
|
|
812
|
+
ruleId: this.id,
|
|
813
|
+
severity: this.severity,
|
|
814
|
+
message: HIDE_REASONS.VENDOR_EXCLUSION,
|
|
815
|
+
metadata: { title: context.result.title },
|
|
816
|
+
timestamp: new Date().toISOString(),
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
return null;
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
/**
|
|
823
|
+
* Rule for scam source exclusions
|
|
824
|
+
*/
|
|
825
|
+
class ScamSourceExclusionRule {
|
|
826
|
+
id = 'scam_source_exclusion';
|
|
827
|
+
name = 'Scam Source Exclusion';
|
|
828
|
+
description = 'Filters results from known scam sources';
|
|
829
|
+
severity = FilterSeverity.BLOCK;
|
|
830
|
+
priority = 95;
|
|
831
|
+
enabled = true;
|
|
832
|
+
canBeOverridden = false;
|
|
833
|
+
overridableBy = [];
|
|
834
|
+
evaluate(context) {
|
|
835
|
+
const lowerSource = context.result.source.toLowerCase();
|
|
836
|
+
const scamSources = context.globalScrapeOptions?.scam_sources || [];
|
|
837
|
+
const isScam = scamSources.some((exclusion) => exclusion && lowerSource === exclusion.toLowerCase());
|
|
838
|
+
if (isScam) {
|
|
839
|
+
return {
|
|
840
|
+
ruleId: this.id,
|
|
841
|
+
severity: this.severity,
|
|
842
|
+
message: HIDE_REASONS.SCAM_SOURCE_EXCLUSION,
|
|
843
|
+
metadata: { source: context.result.source },
|
|
844
|
+
timestamp: new Date().toISOString(),
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
return null;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
/**
|
|
851
|
+
* Rule for manually ignored/excluded results
|
|
852
|
+
*/
|
|
853
|
+
class ManuallyIgnoredRule {
|
|
854
|
+
id = 'manually_ignored';
|
|
855
|
+
name = 'Manually Ignored';
|
|
856
|
+
description = 'Filters results that have been manually ignored or excluded';
|
|
857
|
+
severity = FilterSeverity.BLOCK;
|
|
858
|
+
priority = 110;
|
|
859
|
+
enabled = true;
|
|
860
|
+
canBeOverridden = false;
|
|
861
|
+
overridableBy = [];
|
|
862
|
+
evaluate(context) {
|
|
863
|
+
const resultIgnoreKeys = context.variantScrapeOptions.result_ignore_keys;
|
|
864
|
+
if (!resultIgnoreKeys || resultIgnoreKeys.length === 0) {
|
|
865
|
+
return null;
|
|
866
|
+
}
|
|
867
|
+
for (const key of resultIgnoreKeys) {
|
|
868
|
+
const resultKeyValues = [context.variant.id];
|
|
869
|
+
const keyValues = [];
|
|
870
|
+
const keyParts = JSON.parse(key.key_parts);
|
|
871
|
+
keyParts.forEach((part) => {
|
|
872
|
+
keyValues.push(key[part]);
|
|
873
|
+
if (part !== 'variant_id') {
|
|
874
|
+
resultKeyValues.push(context.result[part === 'price' ? 'extracted_price' : part]);
|
|
875
|
+
}
|
|
876
|
+
});
|
|
877
|
+
const parsedIgnoreKey = keyValues.join('-');
|
|
878
|
+
const resultIgnoreKey = resultKeyValues.join('-');
|
|
879
|
+
if (parsedIgnoreKey === resultIgnoreKey) {
|
|
880
|
+
const message = key.reason === 'Ignored' ? HIDE_REASONS.MANUALLY_IGNORED : HIDE_REASONS.MANUALLY_EXCLUDED;
|
|
881
|
+
return {
|
|
882
|
+
ruleId: this.id,
|
|
883
|
+
severity: this.severity,
|
|
884
|
+
message,
|
|
885
|
+
metadata: { reason: key.reason, keyParts },
|
|
886
|
+
timestamp: new Date().toISOString(),
|
|
887
|
+
};
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
return null;
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Rule for brand mismatches
|
|
895
|
+
*/
|
|
896
|
+
class BrandMismatchRule {
|
|
897
|
+
id = 'brand_mismatch';
|
|
898
|
+
name = 'Brand Mismatch';
|
|
899
|
+
description = 'Filters results with mismatched brands';
|
|
900
|
+
severity = FilterSeverity.WARNING;
|
|
901
|
+
priority = 40;
|
|
902
|
+
enabled = true;
|
|
903
|
+
canBeOverridden = true;
|
|
904
|
+
overridableBy = ['sku_match', 'calculated_sku_match', 'alt_sku_match'];
|
|
905
|
+
evaluate(context) {
|
|
906
|
+
if (!context.result.brand) {
|
|
907
|
+
return null;
|
|
908
|
+
}
|
|
909
|
+
const vendor = context.variant.vendor ? context.variant.vendor.toLowerCase() : null;
|
|
910
|
+
const brandNameAlternates = context.vendorScrapeOptions.brand_name_alternates;
|
|
911
|
+
const targetBrands = [vendor, ...(brandNameAlternates || [])].filter((b) => b).map((b) => b?.toLowerCase());
|
|
912
|
+
const calcBrandMatched = context.result.brand && context.result.brand !== ''
|
|
913
|
+
? targetBrands.includes(context.result.brand.toLowerCase())
|
|
914
|
+
: null;
|
|
915
|
+
if (calcBrandMatched === false) {
|
|
916
|
+
return {
|
|
917
|
+
ruleId: this.id,
|
|
918
|
+
severity: this.severity,
|
|
919
|
+
message: HIDE_REASONS.CALCULATED_BRAND_MISMATCH,
|
|
920
|
+
metadata: {
|
|
921
|
+
resultBrand: context.result.brand,
|
|
922
|
+
expectedBrands: targetBrands,
|
|
923
|
+
},
|
|
924
|
+
timestamp: new Date().toISOString(),
|
|
925
|
+
};
|
|
926
|
+
}
|
|
927
|
+
return null;
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
/**
|
|
931
|
+
* Rule for out of stock items
|
|
932
|
+
*/
|
|
933
|
+
class OutOfStockRule {
|
|
934
|
+
id = 'out_of_stock';
|
|
935
|
+
name = 'Out of Stock';
|
|
936
|
+
description = 'Filters results that are out of stock online';
|
|
937
|
+
severity = FilterSeverity.BLOCK;
|
|
938
|
+
priority = 85;
|
|
939
|
+
enabled = true;
|
|
940
|
+
canBeOverridden = false;
|
|
941
|
+
overridableBy = [];
|
|
942
|
+
evaluate(context) {
|
|
943
|
+
const details = context.result.details_and_offers;
|
|
944
|
+
if (details && details.includes('Out of stock online')) {
|
|
945
|
+
return {
|
|
946
|
+
ruleId: this.id,
|
|
947
|
+
severity: this.severity,
|
|
948
|
+
message: HIDE_REASONS.OUT_OF_STOCK_ONLINE,
|
|
949
|
+
metadata: { details },
|
|
950
|
+
timestamp: new Date().toISOString(),
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
return null;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
/**
|
|
957
|
+
* Rule for calculated SKU mismatches
|
|
958
|
+
*/
|
|
959
|
+
class CalculatedSkuMismatchRule {
|
|
960
|
+
id = 'calculated_sku_mismatch';
|
|
961
|
+
name = 'Calculated SKU Mismatch';
|
|
962
|
+
description = 'Filters results with mismatched calculated SKUs';
|
|
963
|
+
severity = FilterSeverity.WARNING;
|
|
964
|
+
priority = 30;
|
|
965
|
+
enabled = true;
|
|
966
|
+
canBeOverridden = true;
|
|
967
|
+
overridableBy = ['sku_match', 'calculated_sku_match'];
|
|
968
|
+
evaluate(context) {
|
|
969
|
+
const result = context.result;
|
|
970
|
+
if (result.calculated_sku && result.calculated_sku?.toLowerCase() !== context.variant.sku.toLowerCase()) {
|
|
971
|
+
return {
|
|
972
|
+
ruleId: this.id,
|
|
973
|
+
severity: this.severity,
|
|
974
|
+
message: HIDE_REASONS.CALCULATED_SKU_MISMATCH,
|
|
975
|
+
metadata: {
|
|
976
|
+
calculatedSku: result.calculated_sku,
|
|
977
|
+
variantSku: context.variant.sku,
|
|
978
|
+
},
|
|
979
|
+
timestamp: new Date().toISOString(),
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
return null;
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
/**
|
|
986
|
+
* Rule for critical spec mismatches
|
|
987
|
+
*/
|
|
988
|
+
class CriticalSpecMismatchRule {
|
|
989
|
+
id = 'critical_spec_mismatch';
|
|
990
|
+
name = 'Critical Spec Mismatch';
|
|
991
|
+
description = 'Filters results with mismatched critical specifications';
|
|
992
|
+
severity = FilterSeverity.WARNING;
|
|
993
|
+
priority = 35;
|
|
994
|
+
enabled = true;
|
|
995
|
+
canBeOverridden = true;
|
|
996
|
+
overridableBy = ['sku_match', 'calculated_sku_match'];
|
|
997
|
+
evaluate(context) {
|
|
998
|
+
if (!context.result.regexUnitResults || !context.variant.regexUnitResults) {
|
|
999
|
+
return null;
|
|
1000
|
+
}
|
|
1001
|
+
const [regexIgnore, regexIgnoreReasons] = getRegexUnitResultsIgnore(context.result.regexUnitResults, context.variant.regexUnitResults);
|
|
1002
|
+
if (regexIgnore) {
|
|
1003
|
+
return {
|
|
1004
|
+
ruleId: this.id,
|
|
1005
|
+
severity: this.severity,
|
|
1006
|
+
message: HIDE_REASONS.CRITICAL_SPEC_MISMATCH,
|
|
1007
|
+
metadata: { reasons: regexIgnoreReasons },
|
|
1008
|
+
timestamp: new Date().toISOString(),
|
|
1009
|
+
};
|
|
1010
|
+
}
|
|
1011
|
+
return null;
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
/**
|
|
1015
|
+
* Rule for refurbished/used items
|
|
1016
|
+
*/
|
|
1017
|
+
class RefurbishedUsedRule {
|
|
1018
|
+
id = 'refurbished_used';
|
|
1019
|
+
name = 'Refurbished/Used';
|
|
1020
|
+
description = 'Filters refurbished or used items';
|
|
1021
|
+
severity = FilterSeverity.WARNING;
|
|
1022
|
+
priority = 25;
|
|
1023
|
+
enabled = true;
|
|
1024
|
+
canBeOverridden = true;
|
|
1025
|
+
overridableBy = ['sku_match', 'calculated_sku_match'];
|
|
1026
|
+
evaluate(context) {
|
|
1027
|
+
const refurbishedKeywords = getRefurbishedKeywords(context.variant);
|
|
1028
|
+
const normalizedTitle = context.result.title?.toLowerCase().replace(/[^a-z0-9-]/g, ' ') ?? '';
|
|
1029
|
+
const details = context.result.details_and_offers;
|
|
1030
|
+
const isRefurbished = refurbishedKeywords.some((keyword) => normalizedTitle.split(/\s+/).includes(keyword)) ||
|
|
1031
|
+
(details && details.includes('Pre-owned'));
|
|
1032
|
+
if (isRefurbished) {
|
|
1033
|
+
return {
|
|
1034
|
+
ruleId: this.id,
|
|
1035
|
+
severity: this.severity,
|
|
1036
|
+
message: HIDE_REASONS.REFURBISHED_USED,
|
|
1037
|
+
metadata: { title: context.result.title },
|
|
1038
|
+
timestamp: new Date().toISOString(),
|
|
1039
|
+
};
|
|
1040
|
+
}
|
|
1041
|
+
return null;
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
// ===============================
|
|
1045
|
+
// Override Rules
|
|
1046
|
+
// ===============================
|
|
1047
|
+
/**
|
|
1048
|
+
* Override rule for SKU matches in title
|
|
1049
|
+
*/
|
|
1050
|
+
class SkuMatchOverrideRule {
|
|
1051
|
+
id = 'sku_match';
|
|
1052
|
+
name = 'SKU Match';
|
|
1053
|
+
description = 'Overrides hide reasons when SKU is found in title';
|
|
1054
|
+
severity = FilterSeverity.INFO;
|
|
1055
|
+
priority = 200;
|
|
1056
|
+
enabled = true;
|
|
1057
|
+
canBeOverridden = false;
|
|
1058
|
+
overridableBy = [];
|
|
1059
|
+
evaluate(context) {
|
|
1060
|
+
const escapedSku = context.variant.sku.replace(/\+/g, '\\+');
|
|
1061
|
+
const skuRegex = new RegExp(`(?:\\b|[\\(\\[\\{])${escapedSku}(?:\\b|[\\)\\]\\}])`, 'gi');
|
|
1062
|
+
if (skuRegex.test(context.result.title)) {
|
|
1063
|
+
return {
|
|
1064
|
+
ruleId: this.id,
|
|
1065
|
+
severity: this.severity,
|
|
1066
|
+
message: HIDE_OVERRIDE_REASONS.SKU_MATCH,
|
|
1067
|
+
metadata: { isOverride: true, sku: context.variant.sku },
|
|
1068
|
+
timestamp: new Date().toISOString(),
|
|
1069
|
+
};
|
|
1070
|
+
}
|
|
1071
|
+
return null;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
/**
|
|
1075
|
+
* Override rule for calculated SKU matches
|
|
1076
|
+
*/
|
|
1077
|
+
class CalculatedSkuMatchOverrideRule {
|
|
1078
|
+
id = 'calculated_sku_match';
|
|
1079
|
+
name = 'Calculated SKU Match';
|
|
1080
|
+
description = 'Overrides hide reasons when calculated SKU matches';
|
|
1081
|
+
severity = FilterSeverity.INFO;
|
|
1082
|
+
priority = 200;
|
|
1083
|
+
enabled = true;
|
|
1084
|
+
canBeOverridden = false;
|
|
1085
|
+
overridableBy = [];
|
|
1086
|
+
evaluate(context) {
|
|
1087
|
+
const result = context.result;
|
|
1088
|
+
if (context.variant.sku.toLowerCase() === result.calculated_sku?.toLowerCase()) {
|
|
1089
|
+
return {
|
|
1090
|
+
ruleId: this.id,
|
|
1091
|
+
severity: this.severity,
|
|
1092
|
+
message: HIDE_OVERRIDE_REASONS.CALCULATED_SKU_MATCH,
|
|
1093
|
+
metadata: { isOverride: true, calculatedSku: result.calculated_sku },
|
|
1094
|
+
timestamp: new Date().toISOString(),
|
|
1095
|
+
};
|
|
1096
|
+
}
|
|
1097
|
+
return null;
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
/**
|
|
1101
|
+
* Override rule for alternate SKU matches
|
|
1102
|
+
*/
|
|
1103
|
+
class AltSkuMatchOverrideRule {
|
|
1104
|
+
id = 'alt_sku_match';
|
|
1105
|
+
name = 'Alt SKU Match';
|
|
1106
|
+
description = 'Overrides hide reasons when alternate SKU matches';
|
|
1107
|
+
severity = FilterSeverity.INFO;
|
|
1108
|
+
priority = 200;
|
|
1109
|
+
enabled = true;
|
|
1110
|
+
canBeOverridden = false;
|
|
1111
|
+
overridableBy = [];
|
|
1112
|
+
evaluate(context) {
|
|
1113
|
+
const skuAlternates = context.variantScrapeOptions.sku_alternates;
|
|
1114
|
+
// Check title match
|
|
1115
|
+
if (skuAlternates.some((alt) => context.result.title?.toLowerCase().includes(alt.toLowerCase()))) {
|
|
1116
|
+
return {
|
|
1117
|
+
ruleId: this.id,
|
|
1118
|
+
severity: this.severity,
|
|
1119
|
+
message: HIDE_OVERRIDE_REASONS.ALT_SKU_MATCH,
|
|
1120
|
+
metadata: { isOverride: true },
|
|
1121
|
+
timestamp: new Date().toISOString(),
|
|
1122
|
+
};
|
|
1123
|
+
}
|
|
1124
|
+
// Check calculated SKU match
|
|
1125
|
+
const result = context.result;
|
|
1126
|
+
if (skuAlternates.some((alt) => result.calculated_sku?.toLowerCase() === alt.toLowerCase())) {
|
|
1127
|
+
return {
|
|
1128
|
+
ruleId: this.id,
|
|
1129
|
+
severity: this.severity,
|
|
1130
|
+
message: HIDE_OVERRIDE_REASONS.CALCULATED_ALT_SKU_MATCH,
|
|
1131
|
+
metadata: { isOverride: true, calculatedSku: result.calculated_sku },
|
|
1132
|
+
timestamp: new Date().toISOString(),
|
|
1133
|
+
};
|
|
1134
|
+
}
|
|
1135
|
+
return null;
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
/**
|
|
1139
|
+
* Override rule for product ID matches
|
|
1140
|
+
*/
|
|
1141
|
+
class ProductIdMatchOverrideRule {
|
|
1142
|
+
id = 'product_id_match';
|
|
1143
|
+
name = 'Product ID Match';
|
|
1144
|
+
description = 'Overrides hide reasons when product ID is linked';
|
|
1145
|
+
severity = FilterSeverity.INFO;
|
|
1146
|
+
priority = 200;
|
|
1147
|
+
enabled = true;
|
|
1148
|
+
canBeOverridden = false;
|
|
1149
|
+
overridableBy = [];
|
|
1150
|
+
evaluate(context) {
|
|
1151
|
+
if (context.variant.found_product_ids &&
|
|
1152
|
+
context.variant.found_product_ids.includes(context.result.found_product_id)) {
|
|
1153
|
+
return {
|
|
1154
|
+
ruleId: this.id,
|
|
1155
|
+
severity: this.severity,
|
|
1156
|
+
message: 'Product Id Linked',
|
|
1157
|
+
metadata: { isOverride: true, foundProductId: context.result.found_product_id },
|
|
1158
|
+
timestamp: new Date().toISOString(),
|
|
1159
|
+
};
|
|
1160
|
+
}
|
|
1161
|
+
return null;
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
// ===============================
|
|
1165
|
+
// Registry Factory
|
|
1166
|
+
// ===============================
|
|
1167
|
+
/**
|
|
1168
|
+
* Create and populate a registry with all built-in rules
|
|
1169
|
+
*/
|
|
1170
|
+
export function createDefaultFilterRegistry() {
|
|
1171
|
+
const registry = new FilterRuleRegistry();
|
|
1172
|
+
// Register all filter rules
|
|
1173
|
+
registry.registerRule(new HighPriceOutlierRule());
|
|
1174
|
+
registry.registerRule(new LowPriceOutlierRule());
|
|
1175
|
+
registry.registerRule(new DateOutlierRule());
|
|
1176
|
+
registry.registerRule(new CompetitorExclusionRule());
|
|
1177
|
+
registry.registerRule(new DuplicateRule());
|
|
1178
|
+
registry.registerRule(new SearchExclusionRule());
|
|
1179
|
+
registry.registerRule(new SkipSkuRule());
|
|
1180
|
+
registry.registerRule(new VendorExclusionRule());
|
|
1181
|
+
registry.registerRule(new ScamSourceExclusionRule());
|
|
1182
|
+
registry.registerRule(new ManuallyIgnoredRule());
|
|
1183
|
+
registry.registerRule(new BrandMismatchRule());
|
|
1184
|
+
registry.registerRule(new OutOfStockRule());
|
|
1185
|
+
registry.registerRule(new CalculatedSkuMismatchRule());
|
|
1186
|
+
registry.registerRule(new CriticalSpecMismatchRule());
|
|
1187
|
+
registry.registerRule(new RefurbishedUsedRule());
|
|
1188
|
+
// Register override rules
|
|
1189
|
+
registry.registerRule(new SkuMatchOverrideRule());
|
|
1190
|
+
registry.registerRule(new CalculatedSkuMatchOverrideRule());
|
|
1191
|
+
registry.registerRule(new AltSkuMatchOverrideRule());
|
|
1192
|
+
registry.registerRule(new ProductIdMatchOverrideRule());
|
|
1193
|
+
return registry;
|
|
1194
|
+
}
|
|
1195
|
+
/**
|
|
1196
|
+
* Create a filter engine with default configuration
|
|
1197
|
+
*/
|
|
1198
|
+
export function createDefaultFilterEngine(config) {
|
|
1199
|
+
const registry = createDefaultFilterRegistry();
|
|
1200
|
+
if (config) {
|
|
1201
|
+
registry.applyConfiguration(config);
|
|
1202
|
+
}
|
|
1203
|
+
return new FilterEngine(registry);
|
|
1204
|
+
}
|
|
1205
|
+
// ===============================
|
|
1206
|
+
// Extensibility Helpers
|
|
1207
|
+
// ===============================
|
|
1208
|
+
/**
|
|
1209
|
+
* Builder for creating custom filter rules
|
|
1210
|
+
*/
|
|
1211
|
+
export class FilterRuleBuilder {
|
|
1212
|
+
rule = {
|
|
1213
|
+
enabled: true,
|
|
1214
|
+
priority: 50,
|
|
1215
|
+
canBeOverridden: false,
|
|
1216
|
+
overridableBy: [],
|
|
1217
|
+
};
|
|
1218
|
+
id(id) {
|
|
1219
|
+
this.rule.id = id;
|
|
1220
|
+
return this;
|
|
1221
|
+
}
|
|
1222
|
+
name(name) {
|
|
1223
|
+
this.rule.name = name;
|
|
1224
|
+
return this;
|
|
1225
|
+
}
|
|
1226
|
+
description(description) {
|
|
1227
|
+
this.rule.description = description;
|
|
1228
|
+
return this;
|
|
1229
|
+
}
|
|
1230
|
+
severity(severity) {
|
|
1231
|
+
this.rule.severity = severity;
|
|
1232
|
+
return this;
|
|
1233
|
+
}
|
|
1234
|
+
priority(priority) {
|
|
1235
|
+
this.rule.priority = priority;
|
|
1236
|
+
return this;
|
|
1237
|
+
}
|
|
1238
|
+
enabled(enabled) {
|
|
1239
|
+
this.rule.enabled = enabled;
|
|
1240
|
+
return this;
|
|
1241
|
+
}
|
|
1242
|
+
canBeOverridden(canBeOverridden) {
|
|
1243
|
+
this.rule.canBeOverridden = canBeOverridden;
|
|
1244
|
+
return this;
|
|
1245
|
+
}
|
|
1246
|
+
overridableBy(ruleIds) {
|
|
1247
|
+
this.rule.overridableBy = ruleIds;
|
|
1248
|
+
return this;
|
|
1249
|
+
}
|
|
1250
|
+
evaluate(evaluateFn) {
|
|
1251
|
+
this.rule.evaluate = evaluateFn;
|
|
1252
|
+
return this;
|
|
1253
|
+
}
|
|
1254
|
+
build() {
|
|
1255
|
+
if (!this.rule.id || !this.rule.name || !this.rule.description || !this.rule.severity || !this.rule.evaluate) {
|
|
1256
|
+
throw new Error('FilterRuleBuilder: Missing required fields');
|
|
1257
|
+
}
|
|
1258
|
+
return this.rule;
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
/**
|
|
1262
|
+
* Simple factory for creating custom rules without implementing the full interface
|
|
1263
|
+
*/
|
|
1264
|
+
export function createCustomFilterRule(config) {
|
|
1265
|
+
return {
|
|
1266
|
+
id: config.id,
|
|
1267
|
+
name: config.name,
|
|
1268
|
+
description: config.description,
|
|
1269
|
+
severity: config.severity,
|
|
1270
|
+
priority: config.priority ?? 50,
|
|
1271
|
+
enabled: true,
|
|
1272
|
+
canBeOverridden: config.canBeOverridden ?? false,
|
|
1273
|
+
overridableBy: config.overridableBy ?? [],
|
|
1274
|
+
evaluate: config.evaluate,
|
|
1275
|
+
};
|
|
1276
|
+
}
|
|
1277
|
+
// ===============================
|
|
1278
|
+
// Enhanced Batch Filtering (New API)
|
|
1279
|
+
// ===============================
|
|
1280
|
+
/**
|
|
1281
|
+
* Filter scrape results using the new rule-based engine
|
|
1282
|
+
* This provides more flexibility and better extensibility than the legacy filterScrapeResults
|
|
1283
|
+
*/
|
|
1284
|
+
export const filterScrapeResultsV2 = ({ scrapeResults, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions, filterConfig, customRules, }) => {
|
|
1285
|
+
const registry = createDefaultFilterRegistry();
|
|
1286
|
+
// Register custom rules if provided
|
|
1287
|
+
if (customRules) {
|
|
1288
|
+
customRules.forEach((rule) => registry.registerRule(rule));
|
|
1289
|
+
}
|
|
1290
|
+
// Apply configuration
|
|
1291
|
+
if (filterConfig) {
|
|
1292
|
+
registry.applyConfiguration(filterConfig);
|
|
1293
|
+
}
|
|
1294
|
+
const engine = new FilterEngine(registry);
|
|
1295
|
+
// Handle batch-specific rules manually (duplicates and search exclusions)
|
|
1296
|
+
let results = handleDuplicatesV2(scrapeResults);
|
|
1297
|
+
results = handleSearchExclusionsV2(results, variantScrapeOptions, variant, registry);
|
|
1298
|
+
// Evaluate all other rules
|
|
1299
|
+
results = engine.evaluateBatch(results, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions);
|
|
1300
|
+
return results;
|
|
1301
|
+
};
|
|
1302
|
+
/**
|
|
1303
|
+
* Handle duplicates with new filter system
|
|
1304
|
+
*/
|
|
1305
|
+
function handleDuplicatesV2(results) {
|
|
1306
|
+
const filteredUniqueResultsMap = {};
|
|
1307
|
+
results.forEach((item) => {
|
|
1308
|
+
const key = `${item.source}-${item.title}-${item.extracted_price}`;
|
|
1309
|
+
if (!filteredUniqueResultsMap[key]) {
|
|
1310
|
+
filteredUniqueResultsMap[key] = item;
|
|
1311
|
+
}
|
|
1312
|
+
else {
|
|
1313
|
+
const existingItem = filteredUniqueResultsMap[key];
|
|
1314
|
+
if (new Date(item.created_at) > new Date(existingItem.created_at)) {
|
|
1315
|
+
// Mark old item as duplicate
|
|
1316
|
+
const foundResult = results.find((res) => res.id === existingItem.id);
|
|
1317
|
+
if (foundResult) {
|
|
1318
|
+
if (!foundResult.filter_results) {
|
|
1319
|
+
foundResult.filter_results = [];
|
|
1320
|
+
}
|
|
1321
|
+
foundResult.filter_results.push({
|
|
1322
|
+
ruleId: 'duplicate',
|
|
1323
|
+
severity: FilterSeverity.BLOCK,
|
|
1324
|
+
message: HIDE_REASONS.DUPLICATE,
|
|
1325
|
+
metadata: { key },
|
|
1326
|
+
timestamp: new Date().toISOString(),
|
|
1327
|
+
});
|
|
1328
|
+
}
|
|
1329
|
+
filteredUniqueResultsMap[key] = item;
|
|
1330
|
+
}
|
|
1331
|
+
else {
|
|
1332
|
+
// Mark current item as duplicate
|
|
1333
|
+
if (!item.filter_results) {
|
|
1334
|
+
item.filter_results = [];
|
|
1335
|
+
}
|
|
1336
|
+
item.filter_results.push({
|
|
1337
|
+
ruleId: 'duplicate',
|
|
1338
|
+
severity: FilterSeverity.BLOCK,
|
|
1339
|
+
message: HIDE_REASONS.DUPLICATE,
|
|
1340
|
+
metadata: { key },
|
|
1341
|
+
timestamp: new Date().toISOString(),
|
|
1342
|
+
});
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
});
|
|
1346
|
+
return results;
|
|
1347
|
+
}
|
|
1348
|
+
/**
|
|
1349
|
+
* Handle search exclusions with new filter system
|
|
1350
|
+
*/
|
|
1351
|
+
function handleSearchExclusionsV2(dataToSearch, options, variant, registry) {
|
|
1352
|
+
if (!options.search_exclusions || options.search_exclusions.length === 0) {
|
|
1353
|
+
return dataToSearch;
|
|
1354
|
+
}
|
|
1355
|
+
function customEncoder(content) {
|
|
1356
|
+
const tokens = [];
|
|
1357
|
+
const str = content.toLowerCase();
|
|
1358
|
+
const cleanedStr = str.replace(/[\/-]/g, ' ');
|
|
1359
|
+
const cleanedStr2 = cleanedStr.replace(/[^a-z0-9,\/\s]/gi, '');
|
|
1360
|
+
const words = cleanedStr2.split(/\s+/);
|
|
1361
|
+
for (let word of words) {
|
|
1362
|
+
tokens.push(word);
|
|
1363
|
+
}
|
|
1364
|
+
return tokens;
|
|
1365
|
+
}
|
|
1366
|
+
const index = new Index({
|
|
1367
|
+
// @ts-ignore
|
|
1368
|
+
charset: EnglishPreset,
|
|
1369
|
+
encode: customEncoder,
|
|
1370
|
+
tokenize: 'strict',
|
|
1371
|
+
});
|
|
1372
|
+
dataToSearch.forEach((item, id) => {
|
|
1373
|
+
index.add(id, item.title);
|
|
1374
|
+
});
|
|
1375
|
+
const searchTerms = options.match_values;
|
|
1376
|
+
let final = null;
|
|
1377
|
+
searchTerms.forEach((term) => {
|
|
1378
|
+
if (final === null) {
|
|
1379
|
+
final = index.search(term, {
|
|
1380
|
+
resolve: false,
|
|
1381
|
+
suggest: true,
|
|
1382
|
+
});
|
|
1383
|
+
}
|
|
1384
|
+
else {
|
|
1385
|
+
final = final.or({
|
|
1386
|
+
index: index,
|
|
1387
|
+
query: term,
|
|
1388
|
+
resolve: false,
|
|
1389
|
+
suggest: true,
|
|
1390
|
+
});
|
|
1391
|
+
}
|
|
1392
|
+
});
|
|
1393
|
+
final = final.and({
|
|
1394
|
+
index: index,
|
|
1395
|
+
query: variant.sku,
|
|
1396
|
+
resolve: false,
|
|
1397
|
+
suggest: true,
|
|
1398
|
+
});
|
|
1399
|
+
const nots = [];
|
|
1400
|
+
nots.push(...options.search_exclusions);
|
|
1401
|
+
nots.forEach((term) => {
|
|
1402
|
+
final = final.not({
|
|
1403
|
+
index: index,
|
|
1404
|
+
query: term,
|
|
1405
|
+
resolve: false,
|
|
1406
|
+
});
|
|
1407
|
+
});
|
|
1408
|
+
const result = final.resolve({ limit: 1000 });
|
|
1409
|
+
for (let idx = 0; idx < dataToSearch.length; idx++) {
|
|
1410
|
+
const element = dataToSearch[idx];
|
|
1411
|
+
if (!result.includes(idx)) {
|
|
1412
|
+
if (!element.filter_results) {
|
|
1413
|
+
element.filter_results = [];
|
|
1414
|
+
}
|
|
1415
|
+
const rule = registry.getRule('search_exclusion');
|
|
1416
|
+
if (rule) {
|
|
1417
|
+
element.filter_results.push({
|
|
1418
|
+
ruleId: 'search_exclusion',
|
|
1419
|
+
severity: rule.severity,
|
|
1420
|
+
message: HIDE_REASONS.SEARCH_EXCLUSION,
|
|
1421
|
+
metadata: { searchTerms: searchTerms },
|
|
1422
|
+
timestamp: new Date().toISOString(),
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
return dataToSearch;
|
|
1428
|
+
}
|
|
1429
|
+
// ===============================
|
|
1430
|
+
// Legacy Filter Functions (Maintained for backward compatibility)
|
|
1431
|
+
// ===============================
|
|
211
1432
|
export const filterScrapeResults = ({ scrapeResults, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions, }) => {
|
|
212
1433
|
let filteredResults = scrapeResults;
|
|
213
1434
|
filteredResults = filterPriceOutliers(filteredResults, variant.price, globalScrapeOptions);
|
|
@@ -558,9 +1779,12 @@ export const calculateHideOverrideReasons = (result, variant, sku_alternates) =>
|
|
|
558
1779
|
// TODO ALEX - Does this work? Is it doing anything?
|
|
559
1780
|
hide_override_reasons.push('Product Id Linked');
|
|
560
1781
|
}
|
|
561
|
-
if (sku_alternates.some((alt) => result
|
|
1782
|
+
if (sku_alternates.some((alt) => result?.title?.toLowerCase().includes(alt.toLowerCase()))) {
|
|
562
1783
|
hide_override_reasons.push(HIDE_OVERRIDE_REASONS.ALT_SKU_MATCH);
|
|
563
1784
|
}
|
|
1785
|
+
if (sku_alternates.some((alt) => result?.calculated_sku?.toLowerCase() === alt.toLowerCase())) {
|
|
1786
|
+
hide_override_reasons.push(HIDE_OVERRIDE_REASONS.CALCULATED_ALT_SKU_MATCH);
|
|
1787
|
+
}
|
|
564
1788
|
return hide_override_reasons;
|
|
565
1789
|
};
|
|
566
1790
|
function getRegexUnitResultsIgnore(resultRegexResults, variantRegexResults, criticalUnits = ['ton', 'cc', 'hp']) {
|