@factorypure/client-helpers 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,6 +3,173 @@ import { startOfDay, subDays } from 'date-fns';
3
3
  import { Index } from 'flexsearch';
4
4
  import EnglishPreset from 'flexsearch/lang/en';
5
5
  import { z } from 'zod';
6
+ /**
7
+ * @packageDocumentation
8
+ * @module @factorypure/client-helpers
9
+ *
10
+ * # Filter System v2
11
+ *
12
+ * This package provides an extensible, rule-based filtering system for scrape results.
13
+ *
14
+ * ## Key Concepts
15
+ *
16
+ * ### Filter Severity
17
+ * - **BLOCK**: Always hides the result, cannot be overridden
18
+ * - **WARNING**: Shows result with warning indicator, can be overridden by override rules
19
+ * - **INFO**: Metadata only, doesn't affect visibility
20
+ *
21
+ * ### Visibility States
22
+ * - **HIDDEN**: Result is filtered out
23
+ * - **VISIBLE_WITH_WARNINGS**: Result is shown but has warning indicators
24
+ * - **VISIBLE**: Result is shown without issues
25
+ *
26
+ * ### Filter Rules
27
+ * Rules implement the FilterRule interface and are evaluated against results.
28
+ * Each rule can:
29
+ * - Have a severity level (BLOCK, WARNING, INFO)
30
+ * - Have a priority (higher priority rules evaluated first)
31
+ * - Be enabled/disabled
32
+ * - Specify which override rules can override it
33
+ *
34
+ * ### Override Rules
35
+ * Special INFO-level rules that can override WARNING-level rules.
36
+ * Examples: SKU match, calculated SKU match, alternate SKU match
37
+ *
38
+ * ## Quick Start
39
+ *
40
+ * ### Using the Default Engine
41
+ * ```typescript
42
+ * import { filterScrapeResultsV2, createDefaultFilterEngine } from '@factorypure/client-helpers'
43
+ *
44
+ * const filteredResults = filterScrapeResultsV2({
45
+ * scrapeResults,
46
+ * variant,
47
+ * variantScrapeOptions,
48
+ * vendorScrapeOptions,
49
+ * globalScrapeOptions,
50
+ * })
51
+ * ```
52
+ *
53
+ * ### Custom Configuration
54
+ * ```typescript
55
+ * const filteredResults = filterScrapeResultsV2({
56
+ * scrapeResults,
57
+ * variant,
58
+ * variantScrapeOptions,
59
+ * vendorScrapeOptions,
60
+ * globalScrapeOptions,
61
+ * filterConfig: {
62
+ * rules: [
63
+ * { id: 'high_price_outlier', enabled: false },
64
+ * { id: 'competitor_exclusion', severity: FilterSeverity.BLOCK },
65
+ * { id: 'refurbished_used', priority: 100 },
66
+ * ]
67
+ * }
68
+ * })
69
+ * ```
70
+ *
71
+ * ### Adding Custom Rules
72
+ * ```typescript
73
+ * import { createCustomFilterRule, FilterSeverity } from '@factorypure/client-helpers'
74
+ *
75
+ * const myCustomRule = createCustomFilterRule({
76
+ * id: 'my_custom_rule',
77
+ * name: 'My Custom Rule',
78
+ * description: 'Filters results based on custom logic',
79
+ * severity: FilterSeverity.WARNING,
80
+ * priority: 50,
81
+ * canBeOverridden: true,
82
+ * overridableBy: ['sku_match'],
83
+ * evaluate: (context) => {
84
+ * if (context.result.title.includes('bad-keyword')) {
85
+ * return {
86
+ * ruleId: 'my_custom_rule',
87
+ * severity: FilterSeverity.WARNING,
88
+ * message: 'Contains bad keyword',
89
+ * metadata: { keyword: 'bad-keyword' },
90
+ * timestamp: new Date().toISOString(),
91
+ * }
92
+ * }
93
+ * return null
94
+ * }
95
+ * })
96
+ *
97
+ * const filteredResults = filterScrapeResultsV2({
98
+ * scrapeResults,
99
+ * variant,
100
+ * variantScrapeOptions,
101
+ * vendorScrapeOptions,
102
+ * globalScrapeOptions,
103
+ * customRules: [myCustomRule],
104
+ * })
105
+ * ```
106
+ *
107
+ * ### Using the Builder Pattern
108
+ * ```typescript
109
+ * import { FilterRuleBuilder, FilterSeverity } from '@factorypure/client-helpers'
110
+ *
111
+ * const myRule = new FilterRuleBuilder()
112
+ * .id('price_above_threshold')
113
+ * .name('Price Above Threshold')
114
+ * .description('Filters results above a price threshold')
115
+ * .severity(FilterSeverity.WARNING)
116
+ * .priority(80)
117
+ * .canBeOverridden(true)
118
+ * .overridableBy(['sku_match'])
119
+ * .evaluate((context) => {
120
+ * if (context.result.extracted_price > 10000) {
121
+ * return {
122
+ * ruleId: 'price_above_threshold',
123
+ * severity: FilterSeverity.WARNING,
124
+ * message: 'Price exceeds $10,000',
125
+ * metadata: { price: context.result.extracted_price },
126
+ * timestamp: new Date().toISOString(),
127
+ * }
128
+ * }
129
+ * return null
130
+ * })
131
+ * .build()
132
+ * ```
133
+ *
134
+ * ## Built-in Rules
135
+ *
136
+ * ### Block Rules (Cannot be overridden)
137
+ * - `high_price_outlier`: Filters results >15% more expensive than variant
138
+ * - `low_price_outlier`: Filters results significantly cheaper than variant
139
+ * - `date_outlier`: Filters results outside date window
140
+ * - `duplicate`: Filters duplicate results
141
+ * - `scam_source_exclusion`: Filters known scam sources
142
+ * - `manually_ignored`: Filters manually ignored/excluded results
143
+ * - `out_of_stock`: Filters out of stock items
144
+ *
145
+ * ### Warning Rules (Can be overridden)
146
+ * - `competitor_exclusion`: Filters excluded competitors
147
+ * - `search_exclusion`: Filters results not matching search criteria
148
+ * - `skip_sku`: Filters results with SKUs from skip list
149
+ * - `vendor_exclusion`: Filters results with excluded vendor names
150
+ * - `brand_mismatch`: Filters results with mismatched brands
151
+ * - `calculated_sku_mismatch`: Filters results with mismatched calculated SKUs
152
+ * - `critical_spec_mismatch`: Filters results with mismatched critical specs
153
+ * - `refurbished_used`: Filters refurbished/used items
154
+ *
155
+ * ### Override Rules
156
+ * - `sku_match`: SKU found in title
157
+ * - `calculated_sku_match`: Calculated SKU matches variant SKU
158
+ * - `alt_sku_match`: Alternate SKU matches
159
+ * - `product_id_match`: Product ID is linked
160
+ *
161
+ * ## Migration from Legacy System
162
+ *
163
+ * The legacy `filterScrapeResults` function is still available for backward compatibility.
164
+ * Results include both old fields (`hide_reasons`, `hide_override_reasons`) and new fields
165
+ * (`filter_results`, `visibility_state`) during the transition period.
166
+ *
167
+ * To migrate:
168
+ * 1. Replace `filterScrapeResults` with `filterScrapeResultsV2`
169
+ * 2. Update code to use `visibility_state` instead of `ignore_result`
170
+ * 3. Use `filter_results` for detailed filter information instead of string arrays
171
+ * 4. Configure rules as needed using `filterConfig` parameter
172
+ */
6
173
  export const regexUnitResultSchema = z.object({
7
174
  value: z.string(),
8
175
  source: z.string(),
@@ -22,6 +189,40 @@ export const regexUnitResultsSchema = z.object({
22
189
  mile: z.array(regexUnitResultSchema),
23
190
  ah: z.array(regexUnitResultSchema),
24
191
  });
192
+ // ===============================
193
+ // New Filter System - Core Types (Must come before schemas that use them)
194
+ // ===============================
195
+ /**
196
+ * Severity levels for filter rules
197
+ * - BLOCK: Always hides the result, cannot be overridden
198
+ * - WARNING: Shows result with warning indicator, can be overridden
199
+ * - INFO: Metadata only, doesn't affect visibility
200
+ */
201
+ export var FilterSeverity;
202
+ (function (FilterSeverity) {
203
+ FilterSeverity["BLOCK"] = "BLOCK";
204
+ FilterSeverity["WARNING"] = "WARNING";
205
+ FilterSeverity["INFO"] = "INFO";
206
+ })(FilterSeverity || (FilterSeverity = {}));
207
+ /**
208
+ * Visibility states for results after filter evaluation
209
+ */
210
+ export var VisibilityState;
211
+ (function (VisibilityState) {
212
+ VisibilityState["HIDDEN"] = "HIDDEN";
213
+ VisibilityState["VISIBLE_WITH_WARNINGS"] = "VISIBLE_WITH_WARNINGS";
214
+ VisibilityState["VISIBLE"] = "VISIBLE";
215
+ })(VisibilityState || (VisibilityState = {}));
216
+ /**
217
+ * Structured result from a filter rule evaluation
218
+ */
219
+ export const filterResultSchema = z.object({
220
+ ruleId: z.string(),
221
+ severity: z.nativeEnum(FilterSeverity),
222
+ message: z.string(),
223
+ metadata: z.record(z.string(), z.any()).optional(),
224
+ timestamp: z.string(),
225
+ });
25
226
  export const scrapeResultsSchema = z.object({
26
227
  id: z.number(),
27
228
  scrape_id: z.number(),
@@ -57,6 +258,9 @@ export const scrapeResultsSchema = z.object({
57
258
  ignore_reasons: z.array(z.string()),
58
259
  hide_reasons: z.array(z.string()),
59
260
  hide_override_reasons: z.array(z.string()),
261
+ // New filter system fields
262
+ filter_results: z.array(filterResultSchema).optional(),
263
+ visibility_state: z.nativeEnum(VisibilityState).optional(),
60
264
  regexUnitResults: z.nullable(regexUnitResultsSchema),
61
265
  // add these properties as null to make ts play nice with the ImmersiveScrapeResultsType union
62
266
  google_product_link: z.null().optional(),
@@ -121,6 +325,9 @@ export const immersiveScrapeResultsSchema = z.object({
121
325
  ignore_reasons: z.array(z.string()),
122
326
  hide_reasons: z.array(z.string()),
123
327
  hide_override_reasons: z.array(z.string()),
328
+ // New filter system fields
329
+ filter_results: z.array(filterResultSchema).optional(),
330
+ visibility_state: z.nativeEnum(VisibilityState).optional(),
124
331
  brand: z.string().nullable(),
125
332
  company_id: z.number().nullable(),
126
333
  regexUnitResults: z.nullable(regexUnitResultsSchema),
@@ -163,6 +370,21 @@ export const globalScrapeOptionsSchema = z.object({
163
370
  })),
164
371
  scam_sources: z.array(z.string()),
165
372
  });
373
+ /**
374
+ * Overall filter configuration
375
+ */
376
+ export const filterConfigurationSchema = z.object({
377
+ rules: z
378
+ .array(z.object({
379
+ id: z.string(),
380
+ enabled: z.boolean().optional(),
381
+ severity: z.nativeEnum(FilterSeverity).optional(),
382
+ priority: z.number().optional(),
383
+ parameters: z.record(z.string(), z.any()).optional(),
384
+ }))
385
+ .optional(),
386
+ globalParameters: z.record(z.string(), z.any()).optional(),
387
+ });
166
388
  export const HIDE_REASONS = {
167
389
  IGNORED: 'Ignored',
168
390
  HIGH_PRICE_OUTLIER: 'Too Expensive',
@@ -187,6 +409,7 @@ export const HIDE_OVERRIDE_REASONS = {
187
409
  ALT_SKU_MATCH: 'Alt SKU Match',
188
410
  CALCULATED_SKU_MATCH: 'Calculated SKU Match',
189
411
  CALCULATED_SKU_PARTIAL_MATCH: 'Calculated SKU Partial Match',
412
+ CALCULATED_ALT_SKU_MATCH: 'Calc. Alt SKU Match',
190
413
  };
191
414
  const HIDE_ALWAYS_MAP = {
192
415
  [HIDE_REASONS.HIGH_PRICE_OUTLIER]: true,
@@ -208,6 +431,1004 @@ const HIDE_ALWAYS_MAP = {
208
431
  export const TOO_CHEAP_MULTIPLIER = 0.75;
209
432
  export const TOO_EXPENSIVE_MULTIPLIER = 1.15;
210
433
  // const wattages = Array.from({ length: 41 }, (_, i) => (5000 + i * 500).toString())
434
+ // ===============================
435
+ // Filter Rule Registry
436
+ // ===============================
437
+ /**
438
+ * Registry for filter rules with configuration management
439
+ */
440
+ export class FilterRuleRegistry {
441
+ rules = new Map();
442
+ configurations = new Map();
443
+ /**
444
+ * Register a new filter rule
445
+ */
446
+ registerRule(rule) {
447
+ this.rules.set(rule.id, rule);
448
+ }
449
+ /**
450
+ * Get a rule by ID
451
+ */
452
+ getRule(id) {
453
+ return this.rules.get(id);
454
+ }
455
+ /**
456
+ * Get all registered rules
457
+ */
458
+ getAllRules() {
459
+ return Array.from(this.rules.values());
460
+ }
461
+ /**
462
+ * Get all enabled rules sorted by priority
463
+ */
464
+ getEnabledRules() {
465
+ return Array.from(this.rules.values())
466
+ .filter((rule) => {
467
+ const config = this.configurations.get(rule.id);
468
+ return config?.enabled !== false && rule.enabled !== false;
469
+ })
470
+ .sort((a, b) => b.priority - a.priority);
471
+ }
472
+ /**
473
+ * Apply configuration to rules
474
+ */
475
+ applyConfiguration(config) {
476
+ config.rules?.forEach((ruleConfig) => {
477
+ this.configurations.set(ruleConfig.id, ruleConfig);
478
+ });
479
+ }
480
+ /**
481
+ * Get effective configuration for a rule
482
+ */
483
+ getRuleConfig(ruleId) {
484
+ return this.configurations.get(ruleId);
485
+ }
486
+ }
487
+ // ===============================
488
+ // Filter Engine
489
+ // ===============================
490
+ /**
491
+ * Engine for evaluating filter rules and determining visibility
492
+ */
493
+ export class FilterEngine {
494
+ registry;
495
+ constructor(registry) {
496
+ this.registry = registry;
497
+ }
498
+ /**
499
+ * Evaluate all rules for a single result
500
+ */
501
+ evaluateResult(context) {
502
+ const results = [];
503
+ const rules = this.registry.getEnabledRules();
504
+ for (const rule of rules) {
505
+ try {
506
+ const result = rule.evaluate(context);
507
+ if (result) {
508
+ results.push(result);
509
+ }
510
+ }
511
+ catch (error) {
512
+ console.error(`Error evaluating rule ${rule.id}:`, error);
513
+ }
514
+ }
515
+ return results;
516
+ }
517
+ /**
518
+ * Evaluate rules for a batch of results
519
+ */
520
+ evaluateBatch(results, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions) {
521
+ return results.map((result) => {
522
+ const context = {
523
+ result,
524
+ variant,
525
+ variantScrapeOptions,
526
+ vendorScrapeOptions,
527
+ globalScrapeOptions,
528
+ };
529
+ const filterResults = this.evaluateResult(context);
530
+ const visibilityState = this.calculateVisibility(filterResults);
531
+ // Populate new fields
532
+ result.filter_results = filterResults;
533
+ result.visibility_state = visibilityState;
534
+ // Maintain backward compatibility with old fields
535
+ result.hide_reasons = filterResults
536
+ .filter((fr) => fr.severity === FilterSeverity.BLOCK || fr.severity === FilterSeverity.WARNING)
537
+ .map((fr) => fr.message);
538
+ result.hide_override_reasons = filterResults.filter((fr) => fr.metadata?.isOverride).map((fr) => fr.message);
539
+ result.ignore_result = visibilityState === VisibilityState.HIDDEN;
540
+ return result;
541
+ });
542
+ }
543
+ /**
544
+ * Calculate visibility state based on filter results
545
+ */
546
+ calculateVisibility(filterResults) {
547
+ if (filterResults.length === 0) {
548
+ return VisibilityState.VISIBLE;
549
+ }
550
+ // Check for override results
551
+ const overrideResults = filterResults.filter((fr) => fr.metadata?.isOverride);
552
+ const blockResults = filterResults.filter((fr) => fr.severity === FilterSeverity.BLOCK);
553
+ const warningResults = filterResults.filter((fr) => fr.severity === FilterSeverity.WARNING);
554
+ // Block results that cannot be overridden
555
+ const nonOverridableBlocks = blockResults.filter((br) => {
556
+ const rule = this.registry.getRule(br.ruleId);
557
+ if (!rule || !rule.canBeOverridden)
558
+ return true;
559
+ // Check if any override can override this block
560
+ const canBeOverridden = overrideResults.some((or) => {
561
+ const overrideRule = this.registry.getRule(or.ruleId);
562
+ return overrideRule && rule.overridableBy.includes(overrideRule.id);
563
+ });
564
+ return !canBeOverridden;
565
+ });
566
+ if (nonOverridableBlocks.length > 0) {
567
+ return VisibilityState.HIDDEN;
568
+ }
569
+ // Check for overridable blocks
570
+ const overridableBlocks = blockResults.filter((br) => {
571
+ const rule = this.registry.getRule(br.ruleId);
572
+ if (!rule || !rule.canBeOverridden)
573
+ return false;
574
+ const canBeOverridden = overrideResults.some((or) => {
575
+ const overrideRule = this.registry.getRule(or.ruleId);
576
+ return overrideRule && rule.overridableBy.includes(overrideRule.id);
577
+ });
578
+ return !canBeOverridden; // Still blocked if no override present
579
+ });
580
+ if (overridableBlocks.length > 0) {
581
+ return VisibilityState.HIDDEN;
582
+ }
583
+ // If we have warnings, show with warnings
584
+ if (warningResults.length > 0) {
585
+ return VisibilityState.VISIBLE_WITH_WARNINGS;
586
+ }
587
+ return VisibilityState.VISIBLE;
588
+ }
589
+ }
590
+ // ===============================
591
+ // Built-in Filter Rules
592
+ // ===============================
593
+ /**
594
+ * Rule for filtering high price outliers
595
+ */
596
+ class HighPriceOutlierRule {
597
+ id = 'high_price_outlier';
598
+ name = 'High Price Outlier';
599
+ description = 'Filters results that are significantly more expensive than the variant price';
600
+ severity = FilterSeverity.BLOCK;
601
+ priority = 100;
602
+ enabled = true;
603
+ canBeOverridden = false;
604
+ overridableBy = [];
605
+ evaluate(context) {
606
+ const multiplier = TOO_EXPENSIVE_MULTIPLIER;
607
+ const isMoreExpensive = context.result.extracted_price > context.variant.price * multiplier;
608
+ if (isMoreExpensive) {
609
+ return {
610
+ ruleId: this.id,
611
+ severity: this.severity,
612
+ message: HIDE_REASONS.HIGH_PRICE_OUTLIER,
613
+ metadata: {
614
+ variantPrice: context.variant.price,
615
+ resultPrice: context.result.extracted_price,
616
+ threshold: multiplier,
617
+ },
618
+ timestamp: new Date().toISOString(),
619
+ };
620
+ }
621
+ return null;
622
+ }
623
+ }
624
+ /**
625
+ * Rule for filtering low price outliers
626
+ */
627
+ class LowPriceOutlierRule {
628
+ id = 'low_price_outlier';
629
+ name = 'Low Price Outlier';
630
+ description = 'Filters results that are significantly cheaper than the variant price';
631
+ severity = FilterSeverity.BLOCK;
632
+ priority = 100;
633
+ enabled = true;
634
+ canBeOverridden = false;
635
+ overridableBy = [];
636
+ evaluate(context) {
637
+ const multiplier = getUndercutThreshold(context.variant.price, context.globalScrapeOptions.undercut_threshold_ranges || []);
638
+ const isTooCheap = context.result.extracted_price < context.variant.price * multiplier;
639
+ if (isTooCheap) {
640
+ return {
641
+ ruleId: this.id,
642
+ severity: this.severity,
643
+ message: HIDE_REASONS.LOW_PRICE_OUTLIER,
644
+ metadata: {
645
+ variantPrice: context.variant.price,
646
+ resultPrice: context.result.extracted_price,
647
+ threshold: multiplier,
648
+ },
649
+ timestamp: new Date().toISOString(),
650
+ };
651
+ }
652
+ return null;
653
+ }
654
+ }
655
+ /**
656
+ * Rule for filtering outdated listings
657
+ */
658
+ class DateOutlierRule {
659
+ id = 'date_outlier';
660
+ name = 'Date Outlier';
661
+ description = 'Filters results outside the specified date window';
662
+ severity = FilterSeverity.BLOCK;
663
+ priority = 90;
664
+ enabled = true;
665
+ canBeOverridden = false;
666
+ overridableBy = [];
667
+ evaluate(context) {
668
+ const itemDate = new Date(context.result.created_at);
669
+ const dayWindow = Number(context.variantScrapeOptions.day_window) || 3;
670
+ const variantDate = startOfDay(subDays(new Date(), dayWindow));
671
+ if (itemDate < variantDate) {
672
+ return {
673
+ ruleId: this.id,
674
+ severity: this.severity,
675
+ message: HIDE_REASONS.DATE_OUTLIER,
676
+ metadata: {
677
+ itemDate: itemDate.toISOString(),
678
+ threshold: variantDate.toISOString(),
679
+ dayWindow,
680
+ },
681
+ timestamp: new Date().toISOString(),
682
+ };
683
+ }
684
+ return null;
685
+ }
686
+ }
687
+ /**
688
+ * Rule for filtering competitor exclusions
689
+ */
690
+ class CompetitorExclusionRule {
691
+ id = 'competitor_exclusion';
692
+ name = 'Competitor Exclusion';
693
+ description = 'Filters results from excluded competitor sources';
694
+ severity = FilterSeverity.WARNING;
695
+ priority = 80;
696
+ enabled = true;
697
+ canBeOverridden = true;
698
+ overridableBy = ['sku_match', 'calculated_sku_match', 'alt_sku_match'];
699
+ evaluate(context) {
700
+ const lowerSource = context.result.source.toLowerCase();
701
+ const isExcluded = context.variantScrapeOptions.competitor_exclusions.some((exclusion) => exclusion && lowerSource === exclusion.toLowerCase());
702
+ if (isExcluded) {
703
+ return {
704
+ ruleId: this.id,
705
+ severity: this.severity,
706
+ message: HIDE_REASONS.COMPETITOR_EXCLUSION,
707
+ metadata: { source: context.result.source },
708
+ timestamp: new Date().toISOString(),
709
+ };
710
+ }
711
+ return null;
712
+ }
713
+ }
714
+ /**
715
+ * Rule for filtering duplicates
716
+ */
717
+ class DuplicateRule {
718
+ id = 'duplicate';
719
+ name = 'Duplicate';
720
+ description = 'Marks duplicate results based on source, title, and price';
721
+ severity = FilterSeverity.BLOCK;
722
+ priority = 70;
723
+ enabled = true;
724
+ canBeOverridden = false;
725
+ overridableBy = [];
726
+ // Note: This rule requires batch context, so it's handled specially in batch evaluation
727
+ evaluate(context) {
728
+ // This is a placeholder - actual duplicate detection happens in batch processing
729
+ return null;
730
+ }
731
+ }
732
+ /**
733
+ * Rule for search exclusions
734
+ */
735
+ class SearchExclusionRule {
736
+ id = 'search_exclusion';
737
+ name = 'Search Exclusion';
738
+ description = 'Filters results that do not match search criteria';
739
+ severity = FilterSeverity.WARNING;
740
+ priority = 60;
741
+ enabled = true;
742
+ canBeOverridden = true;
743
+ overridableBy = ['sku_match', 'calculated_sku_match'];
744
+ evaluate(context) {
745
+ // This is handled in batch processing due to FlexSearch requirement
746
+ return null;
747
+ }
748
+ }
749
+ /**
750
+ * Rule for SKU skip filtering
751
+ */
752
+ class SkipSkuRule {
753
+ id = 'skip_sku';
754
+ name = 'Skip SKU';
755
+ description = 'Filters results containing SKUs from the skip list';
756
+ severity = FilterSeverity.WARNING;
757
+ priority = 50;
758
+ enabled = true;
759
+ canBeOverridden = true;
760
+ overridableBy = ['sku_match', 'calculated_sku_match'];
761
+ evaluate(context) {
762
+ const nots = [];
763
+ const formatted = context.vendorScrapeOptions.default_skip_skus
764
+ .filter((s) => s.toLowerCase() !== context.variant.sku.toLowerCase())
765
+ .map((s) => `${s}`);
766
+ nots.push(...formatted);
767
+ nots.push(...context.vendorScrapeOptions.vendor_skip_skus.filter((s) => s.toLowerCase() !== context.variant.sku.toLowerCase()));
768
+ const hasExclusion = nots.some((sku) => {
769
+ const escapedSku = sku.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
770
+ const skuRegex = new RegExp(`(?: |\/|\&|\=|"|'|\`)${escapedSku}(?: |\/|\&|\=|"|'|\`)`, 'gi');
771
+ const skuMatches = context.result.title?.match(skuRegex);
772
+ return skuMatches && skuMatches.length > 0;
773
+ });
774
+ if (hasExclusion) {
775
+ return {
776
+ ruleId: this.id,
777
+ severity: this.severity,
778
+ message: HIDE_REASONS.SKIP_SKU,
779
+ metadata: { title: context.result.title },
780
+ timestamp: new Date().toISOString(),
781
+ };
782
+ }
783
+ return null;
784
+ }
785
+ }
786
+ /**
787
+ * Rule for vendor exclusions
788
+ */
789
+ class VendorExclusionRule {
790
+ id = 'vendor_exclusion';
791
+ name = 'Vendor Exclusion';
792
+ description = 'Filters results containing excluded vendor names';
793
+ severity = FilterSeverity.WARNING;
794
+ priority = 50;
795
+ enabled = true;
796
+ canBeOverridden = true;
797
+ overridableBy = ['sku_match', 'calculated_sku_match'];
798
+ evaluate(context) {
799
+ const nots = [];
800
+ const formatted = context.vendorScrapeOptions.default_skip_vendors
801
+ .filter((s) => s.toLowerCase() !== context.variant.vendor.toLowerCase())
802
+ .map((s) => `${s}`);
803
+ nots.push(...formatted);
804
+ const hasExclusion = nots.some((vendor) => {
805
+ const escaped = vendor.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
806
+ const matchRegex = new RegExp(`(?: |\/|\&|\=|"|'|\`)${escaped}(?: |\/|\&|\=|"|'|\`)`, 'gi');
807
+ const matches = context.result.title?.match(matchRegex);
808
+ return matches && matches.length > 0;
809
+ });
810
+ if (hasExclusion) {
811
+ return {
812
+ ruleId: this.id,
813
+ severity: this.severity,
814
+ message: HIDE_REASONS.VENDOR_EXCLUSION,
815
+ metadata: { title: context.result.title },
816
+ timestamp: new Date().toISOString(),
817
+ };
818
+ }
819
+ return null;
820
+ }
821
+ }
822
+ /**
823
+ * Rule for scam source exclusions
824
+ */
825
+ class ScamSourceExclusionRule {
826
+ id = 'scam_source_exclusion';
827
+ name = 'Scam Source Exclusion';
828
+ description = 'Filters results from known scam sources';
829
+ severity = FilterSeverity.BLOCK;
830
+ priority = 95;
831
+ enabled = true;
832
+ canBeOverridden = false;
833
+ overridableBy = [];
834
+ evaluate(context) {
835
+ const lowerSource = context.result.source.toLowerCase();
836
+ const scamSources = context.globalScrapeOptions?.scam_sources || [];
837
+ const isScam = scamSources.some((exclusion) => exclusion && lowerSource === exclusion.toLowerCase());
838
+ if (isScam) {
839
+ return {
840
+ ruleId: this.id,
841
+ severity: this.severity,
842
+ message: HIDE_REASONS.SCAM_SOURCE_EXCLUSION,
843
+ metadata: { source: context.result.source },
844
+ timestamp: new Date().toISOString(),
845
+ };
846
+ }
847
+ return null;
848
+ }
849
+ }
850
+ /**
851
+ * Rule for manually ignored/excluded results
852
+ */
853
+ class ManuallyIgnoredRule {
854
+ id = 'manually_ignored';
855
+ name = 'Manually Ignored';
856
+ description = 'Filters results that have been manually ignored or excluded';
857
+ severity = FilterSeverity.BLOCK;
858
+ priority = 110;
859
+ enabled = true;
860
+ canBeOverridden = false;
861
+ overridableBy = [];
862
+ evaluate(context) {
863
+ const resultIgnoreKeys = context.variantScrapeOptions.result_ignore_keys;
864
+ if (!resultIgnoreKeys || resultIgnoreKeys.length === 0) {
865
+ return null;
866
+ }
867
+ for (const key of resultIgnoreKeys) {
868
+ const resultKeyValues = [context.variant.id];
869
+ const keyValues = [];
870
+ const keyParts = JSON.parse(key.key_parts);
871
+ keyParts.forEach((part) => {
872
+ keyValues.push(key[part]);
873
+ if (part !== 'variant_id') {
874
+ resultKeyValues.push(context.result[part === 'price' ? 'extracted_price' : part]);
875
+ }
876
+ });
877
+ const parsedIgnoreKey = keyValues.join('-');
878
+ const resultIgnoreKey = resultKeyValues.join('-');
879
+ if (parsedIgnoreKey === resultIgnoreKey) {
880
+ const message = key.reason === 'Ignored' ? HIDE_REASONS.MANUALLY_IGNORED : HIDE_REASONS.MANUALLY_EXCLUDED;
881
+ return {
882
+ ruleId: this.id,
883
+ severity: this.severity,
884
+ message,
885
+ metadata: { reason: key.reason, keyParts },
886
+ timestamp: new Date().toISOString(),
887
+ };
888
+ }
889
+ }
890
+ return null;
891
+ }
892
+ }
893
+ /**
894
+ * Rule for brand mismatches
895
+ */
896
+ class BrandMismatchRule {
897
+ id = 'brand_mismatch';
898
+ name = 'Brand Mismatch';
899
+ description = 'Filters results with mismatched brands';
900
+ severity = FilterSeverity.WARNING;
901
+ priority = 40;
902
+ enabled = true;
903
+ canBeOverridden = true;
904
+ overridableBy = ['sku_match', 'calculated_sku_match', 'alt_sku_match'];
905
+ evaluate(context) {
906
+ if (!context.result.brand) {
907
+ return null;
908
+ }
909
+ const vendor = context.variant.vendor ? context.variant.vendor.toLowerCase() : null;
910
+ const brandNameAlternates = context.vendorScrapeOptions.brand_name_alternates;
911
+ const targetBrands = [vendor, ...(brandNameAlternates || [])].filter((b) => b).map((b) => b?.toLowerCase());
912
+ const calcBrandMatched = context.result.brand && context.result.brand !== ''
913
+ ? targetBrands.includes(context.result.brand.toLowerCase())
914
+ : null;
915
+ if (calcBrandMatched === false) {
916
+ return {
917
+ ruleId: this.id,
918
+ severity: this.severity,
919
+ message: HIDE_REASONS.CALCULATED_BRAND_MISMATCH,
920
+ metadata: {
921
+ resultBrand: context.result.brand,
922
+ expectedBrands: targetBrands,
923
+ },
924
+ timestamp: new Date().toISOString(),
925
+ };
926
+ }
927
+ return null;
928
+ }
929
+ }
930
+ /**
931
+ * Rule for out of stock items
932
+ */
933
+ class OutOfStockRule {
934
+ id = 'out_of_stock';
935
+ name = 'Out of Stock';
936
+ description = 'Filters results that are out of stock online';
937
+ severity = FilterSeverity.BLOCK;
938
+ priority = 85;
939
+ enabled = true;
940
+ canBeOverridden = false;
941
+ overridableBy = [];
942
+ evaluate(context) {
943
+ const details = context.result.details_and_offers;
944
+ if (details && details.includes('Out of stock online')) {
945
+ return {
946
+ ruleId: this.id,
947
+ severity: this.severity,
948
+ message: HIDE_REASONS.OUT_OF_STOCK_ONLINE,
949
+ metadata: { details },
950
+ timestamp: new Date().toISOString(),
951
+ };
952
+ }
953
+ return null;
954
+ }
955
+ }
956
+ /**
957
+ * Rule for calculated SKU mismatches
958
+ */
959
+ class CalculatedSkuMismatchRule {
960
+ id = 'calculated_sku_mismatch';
961
+ name = 'Calculated SKU Mismatch';
962
+ description = 'Filters results with mismatched calculated SKUs';
963
+ severity = FilterSeverity.WARNING;
964
+ priority = 30;
965
+ enabled = true;
966
+ canBeOverridden = true;
967
+ overridableBy = ['sku_match', 'calculated_sku_match'];
968
+ evaluate(context) {
969
+ const result = context.result;
970
+ if (result.calculated_sku && result.calculated_sku?.toLowerCase() !== context.variant.sku.toLowerCase()) {
971
+ return {
972
+ ruleId: this.id,
973
+ severity: this.severity,
974
+ message: HIDE_REASONS.CALCULATED_SKU_MISMATCH,
975
+ metadata: {
976
+ calculatedSku: result.calculated_sku,
977
+ variantSku: context.variant.sku,
978
+ },
979
+ timestamp: new Date().toISOString(),
980
+ };
981
+ }
982
+ return null;
983
+ }
984
+ }
985
+ /**
986
+ * Rule for critical spec mismatches
987
+ */
988
+ class CriticalSpecMismatchRule {
989
+ id = 'critical_spec_mismatch';
990
+ name = 'Critical Spec Mismatch';
991
+ description = 'Filters results with mismatched critical specifications';
992
+ severity = FilterSeverity.WARNING;
993
+ priority = 35;
994
+ enabled = true;
995
+ canBeOverridden = true;
996
+ overridableBy = ['sku_match', 'calculated_sku_match'];
997
+ evaluate(context) {
998
+ if (!context.result.regexUnitResults || !context.variant.regexUnitResults) {
999
+ return null;
1000
+ }
1001
+ const [regexIgnore, regexIgnoreReasons] = getRegexUnitResultsIgnore(context.result.regexUnitResults, context.variant.regexUnitResults);
1002
+ if (regexIgnore) {
1003
+ return {
1004
+ ruleId: this.id,
1005
+ severity: this.severity,
1006
+ message: HIDE_REASONS.CRITICAL_SPEC_MISMATCH,
1007
+ metadata: { reasons: regexIgnoreReasons },
1008
+ timestamp: new Date().toISOString(),
1009
+ };
1010
+ }
1011
+ return null;
1012
+ }
1013
+ }
1014
+ /**
1015
+ * Rule for refurbished/used items
1016
+ */
1017
+ class RefurbishedUsedRule {
1018
+ id = 'refurbished_used';
1019
+ name = 'Refurbished/Used';
1020
+ description = 'Filters refurbished or used items';
1021
+ severity = FilterSeverity.WARNING;
1022
+ priority = 25;
1023
+ enabled = true;
1024
+ canBeOverridden = true;
1025
+ overridableBy = ['sku_match', 'calculated_sku_match'];
1026
+ evaluate(context) {
1027
+ const refurbishedKeywords = getRefurbishedKeywords(context.variant);
1028
+ const normalizedTitle = context.result.title?.toLowerCase().replace(/[^a-z0-9-]/g, ' ') ?? '';
1029
+ const details = context.result.details_and_offers;
1030
+ const isRefurbished = refurbishedKeywords.some((keyword) => normalizedTitle.split(/\s+/).includes(keyword)) ||
1031
+ (details && details.includes('Pre-owned'));
1032
+ if (isRefurbished) {
1033
+ return {
1034
+ ruleId: this.id,
1035
+ severity: this.severity,
1036
+ message: HIDE_REASONS.REFURBISHED_USED,
1037
+ metadata: { title: context.result.title },
1038
+ timestamp: new Date().toISOString(),
1039
+ };
1040
+ }
1041
+ return null;
1042
+ }
1043
+ }
1044
+ // ===============================
1045
+ // Override Rules
1046
+ // ===============================
1047
+ /**
1048
+ * Override rule for SKU matches in title
1049
+ */
1050
+ class SkuMatchOverrideRule {
1051
+ id = 'sku_match';
1052
+ name = 'SKU Match';
1053
+ description = 'Overrides hide reasons when SKU is found in title';
1054
+ severity = FilterSeverity.INFO;
1055
+ priority = 200;
1056
+ enabled = true;
1057
+ canBeOverridden = false;
1058
+ overridableBy = [];
1059
+ evaluate(context) {
1060
+ const escapedSku = context.variant.sku.replace(/\+/g, '\\+');
1061
+ const skuRegex = new RegExp(`(?:\\b|[\\(\\[\\{])${escapedSku}(?:\\b|[\\)\\]\\}])`, 'gi');
1062
+ if (skuRegex.test(context.result.title)) {
1063
+ return {
1064
+ ruleId: this.id,
1065
+ severity: this.severity,
1066
+ message: HIDE_OVERRIDE_REASONS.SKU_MATCH,
1067
+ metadata: { isOverride: true, sku: context.variant.sku },
1068
+ timestamp: new Date().toISOString(),
1069
+ };
1070
+ }
1071
+ return null;
1072
+ }
1073
+ }
1074
+ /**
1075
+ * Override rule for calculated SKU matches
1076
+ */
1077
+ class CalculatedSkuMatchOverrideRule {
1078
+ id = 'calculated_sku_match';
1079
+ name = 'Calculated SKU Match';
1080
+ description = 'Overrides hide reasons when calculated SKU matches';
1081
+ severity = FilterSeverity.INFO;
1082
+ priority = 200;
1083
+ enabled = true;
1084
+ canBeOverridden = false;
1085
+ overridableBy = [];
1086
+ evaluate(context) {
1087
+ const result = context.result;
1088
+ if (context.variant.sku.toLowerCase() === result.calculated_sku?.toLowerCase()) {
1089
+ return {
1090
+ ruleId: this.id,
1091
+ severity: this.severity,
1092
+ message: HIDE_OVERRIDE_REASONS.CALCULATED_SKU_MATCH,
1093
+ metadata: { isOverride: true, calculatedSku: result.calculated_sku },
1094
+ timestamp: new Date().toISOString(),
1095
+ };
1096
+ }
1097
+ return null;
1098
+ }
1099
+ }
1100
+ /**
1101
+ * Override rule for alternate SKU matches
1102
+ */
1103
+ class AltSkuMatchOverrideRule {
1104
+ id = 'alt_sku_match';
1105
+ name = 'Alt SKU Match';
1106
+ description = 'Overrides hide reasons when alternate SKU matches';
1107
+ severity = FilterSeverity.INFO;
1108
+ priority = 200;
1109
+ enabled = true;
1110
+ canBeOverridden = false;
1111
+ overridableBy = [];
1112
+ evaluate(context) {
1113
+ const skuAlternates = context.variantScrapeOptions.sku_alternates;
1114
+ // Check title match
1115
+ if (skuAlternates.some((alt) => context.result.title?.toLowerCase().includes(alt.toLowerCase()))) {
1116
+ return {
1117
+ ruleId: this.id,
1118
+ severity: this.severity,
1119
+ message: HIDE_OVERRIDE_REASONS.ALT_SKU_MATCH,
1120
+ metadata: { isOverride: true },
1121
+ timestamp: new Date().toISOString(),
1122
+ };
1123
+ }
1124
+ // Check calculated SKU match
1125
+ const result = context.result;
1126
+ if (skuAlternates.some((alt) => result.calculated_sku?.toLowerCase() === alt.toLowerCase())) {
1127
+ return {
1128
+ ruleId: this.id,
1129
+ severity: this.severity,
1130
+ message: HIDE_OVERRIDE_REASONS.CALCULATED_ALT_SKU_MATCH,
1131
+ metadata: { isOverride: true, calculatedSku: result.calculated_sku },
1132
+ timestamp: new Date().toISOString(),
1133
+ };
1134
+ }
1135
+ return null;
1136
+ }
1137
+ }
1138
+ /**
1139
+ * Override rule for product ID matches
1140
+ */
1141
+ class ProductIdMatchOverrideRule {
1142
+ id = 'product_id_match';
1143
+ name = 'Product ID Match';
1144
+ description = 'Overrides hide reasons when product ID is linked';
1145
+ severity = FilterSeverity.INFO;
1146
+ priority = 200;
1147
+ enabled = true;
1148
+ canBeOverridden = false;
1149
+ overridableBy = [];
1150
+ evaluate(context) {
1151
+ if (context.variant.found_product_ids &&
1152
+ context.variant.found_product_ids.includes(context.result.found_product_id)) {
1153
+ return {
1154
+ ruleId: this.id,
1155
+ severity: this.severity,
1156
+ message: 'Product Id Linked',
1157
+ metadata: { isOverride: true, foundProductId: context.result.found_product_id },
1158
+ timestamp: new Date().toISOString(),
1159
+ };
1160
+ }
1161
+ return null;
1162
+ }
1163
+ }
1164
+ // ===============================
1165
+ // Registry Factory
1166
+ // ===============================
1167
+ /**
1168
+ * Create and populate a registry with all built-in rules
1169
+ */
1170
+ export function createDefaultFilterRegistry() {
1171
+ const registry = new FilterRuleRegistry();
1172
+ // Register all filter rules
1173
+ registry.registerRule(new HighPriceOutlierRule());
1174
+ registry.registerRule(new LowPriceOutlierRule());
1175
+ registry.registerRule(new DateOutlierRule());
1176
+ registry.registerRule(new CompetitorExclusionRule());
1177
+ registry.registerRule(new DuplicateRule());
1178
+ registry.registerRule(new SearchExclusionRule());
1179
+ registry.registerRule(new SkipSkuRule());
1180
+ registry.registerRule(new VendorExclusionRule());
1181
+ registry.registerRule(new ScamSourceExclusionRule());
1182
+ registry.registerRule(new ManuallyIgnoredRule());
1183
+ registry.registerRule(new BrandMismatchRule());
1184
+ registry.registerRule(new OutOfStockRule());
1185
+ registry.registerRule(new CalculatedSkuMismatchRule());
1186
+ registry.registerRule(new CriticalSpecMismatchRule());
1187
+ registry.registerRule(new RefurbishedUsedRule());
1188
+ // Register override rules
1189
+ registry.registerRule(new SkuMatchOverrideRule());
1190
+ registry.registerRule(new CalculatedSkuMatchOverrideRule());
1191
+ registry.registerRule(new AltSkuMatchOverrideRule());
1192
+ registry.registerRule(new ProductIdMatchOverrideRule());
1193
+ return registry;
1194
+ }
1195
+ /**
1196
+ * Create a filter engine with default configuration
1197
+ */
1198
+ export function createDefaultFilterEngine(config) {
1199
+ const registry = createDefaultFilterRegistry();
1200
+ if (config) {
1201
+ registry.applyConfiguration(config);
1202
+ }
1203
+ return new FilterEngine(registry);
1204
+ }
1205
+ // ===============================
1206
+ // Extensibility Helpers
1207
+ // ===============================
1208
+ /**
1209
+ * Builder for creating custom filter rules
1210
+ */
1211
+ export class FilterRuleBuilder {
1212
+ rule = {
1213
+ enabled: true,
1214
+ priority: 50,
1215
+ canBeOverridden: false,
1216
+ overridableBy: [],
1217
+ };
1218
+ id(id) {
1219
+ this.rule.id = id;
1220
+ return this;
1221
+ }
1222
+ name(name) {
1223
+ this.rule.name = name;
1224
+ return this;
1225
+ }
1226
+ description(description) {
1227
+ this.rule.description = description;
1228
+ return this;
1229
+ }
1230
+ severity(severity) {
1231
+ this.rule.severity = severity;
1232
+ return this;
1233
+ }
1234
+ priority(priority) {
1235
+ this.rule.priority = priority;
1236
+ return this;
1237
+ }
1238
+ enabled(enabled) {
1239
+ this.rule.enabled = enabled;
1240
+ return this;
1241
+ }
1242
+ canBeOverridden(canBeOverridden) {
1243
+ this.rule.canBeOverridden = canBeOverridden;
1244
+ return this;
1245
+ }
1246
+ overridableBy(ruleIds) {
1247
+ this.rule.overridableBy = ruleIds;
1248
+ return this;
1249
+ }
1250
+ evaluate(evaluateFn) {
1251
+ this.rule.evaluate = evaluateFn;
1252
+ return this;
1253
+ }
1254
+ build() {
1255
+ if (!this.rule.id || !this.rule.name || !this.rule.description || !this.rule.severity || !this.rule.evaluate) {
1256
+ throw new Error('FilterRuleBuilder: Missing required fields');
1257
+ }
1258
+ return this.rule;
1259
+ }
1260
+ }
1261
+ /**
1262
+ * Simple factory for creating custom rules without implementing the full interface
1263
+ */
1264
+ export function createCustomFilterRule(config) {
1265
+ return {
1266
+ id: config.id,
1267
+ name: config.name,
1268
+ description: config.description,
1269
+ severity: config.severity,
1270
+ priority: config.priority ?? 50,
1271
+ enabled: true,
1272
+ canBeOverridden: config.canBeOverridden ?? false,
1273
+ overridableBy: config.overridableBy ?? [],
1274
+ evaluate: config.evaluate,
1275
+ };
1276
+ }
1277
+ // ===============================
1278
+ // Enhanced Batch Filtering (New API)
1279
+ // ===============================
1280
+ /**
1281
+ * Filter scrape results using the new rule-based engine
1282
+ * This provides more flexibility and better extensibility than the legacy filterScrapeResults
1283
+ */
1284
+ export const filterScrapeResultsV2 = ({ scrapeResults, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions, filterConfig, customRules, }) => {
1285
+ const registry = createDefaultFilterRegistry();
1286
+ // Register custom rules if provided
1287
+ if (customRules) {
1288
+ customRules.forEach((rule) => registry.registerRule(rule));
1289
+ }
1290
+ // Apply configuration
1291
+ if (filterConfig) {
1292
+ registry.applyConfiguration(filterConfig);
1293
+ }
1294
+ const engine = new FilterEngine(registry);
1295
+ // Handle batch-specific rules manually (duplicates and search exclusions)
1296
+ let results = handleDuplicatesV2(scrapeResults);
1297
+ results = handleSearchExclusionsV2(results, variantScrapeOptions, variant, registry);
1298
+ // Evaluate all other rules
1299
+ results = engine.evaluateBatch(results, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions);
1300
+ return results;
1301
+ };
1302
+ /**
1303
+ * Handle duplicates with new filter system
1304
+ */
1305
+ function handleDuplicatesV2(results) {
1306
+ const filteredUniqueResultsMap = {};
1307
+ results.forEach((item) => {
1308
+ const key = `${item.source}-${item.title}-${item.extracted_price}`;
1309
+ if (!filteredUniqueResultsMap[key]) {
1310
+ filteredUniqueResultsMap[key] = item;
1311
+ }
1312
+ else {
1313
+ const existingItem = filteredUniqueResultsMap[key];
1314
+ if (new Date(item.created_at) > new Date(existingItem.created_at)) {
1315
+ // Mark old item as duplicate
1316
+ const foundResult = results.find((res) => res.id === existingItem.id);
1317
+ if (foundResult) {
1318
+ if (!foundResult.filter_results) {
1319
+ foundResult.filter_results = [];
1320
+ }
1321
+ foundResult.filter_results.push({
1322
+ ruleId: 'duplicate',
1323
+ severity: FilterSeverity.BLOCK,
1324
+ message: HIDE_REASONS.DUPLICATE,
1325
+ metadata: { key },
1326
+ timestamp: new Date().toISOString(),
1327
+ });
1328
+ }
1329
+ filteredUniqueResultsMap[key] = item;
1330
+ }
1331
+ else {
1332
+ // Mark current item as duplicate
1333
+ if (!item.filter_results) {
1334
+ item.filter_results = [];
1335
+ }
1336
+ item.filter_results.push({
1337
+ ruleId: 'duplicate',
1338
+ severity: FilterSeverity.BLOCK,
1339
+ message: HIDE_REASONS.DUPLICATE,
1340
+ metadata: { key },
1341
+ timestamp: new Date().toISOString(),
1342
+ });
1343
+ }
1344
+ }
1345
+ });
1346
+ return results;
1347
+ }
1348
+ /**
1349
+ * Handle search exclusions with new filter system
1350
+ */
1351
+ function handleSearchExclusionsV2(dataToSearch, options, variant, registry) {
1352
+ if (!options.search_exclusions || options.search_exclusions.length === 0) {
1353
+ return dataToSearch;
1354
+ }
1355
+ function customEncoder(content) {
1356
+ const tokens = [];
1357
+ const str = content.toLowerCase();
1358
+ const cleanedStr = str.replace(/[\/-]/g, ' ');
1359
+ const cleanedStr2 = cleanedStr.replace(/[^a-z0-9,\/\s]/gi, '');
1360
+ const words = cleanedStr2.split(/\s+/);
1361
+ for (let word of words) {
1362
+ tokens.push(word);
1363
+ }
1364
+ return tokens;
1365
+ }
1366
+ const index = new Index({
1367
+ // @ts-ignore
1368
+ charset: EnglishPreset,
1369
+ encode: customEncoder,
1370
+ tokenize: 'strict',
1371
+ });
1372
+ dataToSearch.forEach((item, id) => {
1373
+ index.add(id, item.title);
1374
+ });
1375
+ const searchTerms = options.match_values;
1376
+ let final = null;
1377
+ searchTerms.forEach((term) => {
1378
+ if (final === null) {
1379
+ final = index.search(term, {
1380
+ resolve: false,
1381
+ suggest: true,
1382
+ });
1383
+ }
1384
+ else {
1385
+ final = final.or({
1386
+ index: index,
1387
+ query: term,
1388
+ resolve: false,
1389
+ suggest: true,
1390
+ });
1391
+ }
1392
+ });
1393
+ final = final.and({
1394
+ index: index,
1395
+ query: variant.sku,
1396
+ resolve: false,
1397
+ suggest: true,
1398
+ });
1399
+ const nots = [];
1400
+ nots.push(...options.search_exclusions);
1401
+ nots.forEach((term) => {
1402
+ final = final.not({
1403
+ index: index,
1404
+ query: term,
1405
+ resolve: false,
1406
+ });
1407
+ });
1408
+ const result = final.resolve({ limit: 1000 });
1409
+ for (let idx = 0; idx < dataToSearch.length; idx++) {
1410
+ const element = dataToSearch[idx];
1411
+ if (!result.includes(idx)) {
1412
+ if (!element.filter_results) {
1413
+ element.filter_results = [];
1414
+ }
1415
+ const rule = registry.getRule('search_exclusion');
1416
+ if (rule) {
1417
+ element.filter_results.push({
1418
+ ruleId: 'search_exclusion',
1419
+ severity: rule.severity,
1420
+ message: HIDE_REASONS.SEARCH_EXCLUSION,
1421
+ metadata: { searchTerms: searchTerms },
1422
+ timestamp: new Date().toISOString(),
1423
+ });
1424
+ }
1425
+ }
1426
+ }
1427
+ return dataToSearch;
1428
+ }
1429
+ // ===============================
1430
+ // Legacy Filter Functions (Maintained for backward compatibility)
1431
+ // ===============================
211
1432
  export const filterScrapeResults = ({ scrapeResults, variant, variantScrapeOptions, vendorScrapeOptions, globalScrapeOptions, }) => {
212
1433
  let filteredResults = scrapeResults;
213
1434
  filteredResults = filterPriceOutliers(filteredResults, variant.price, globalScrapeOptions);
@@ -558,9 +1779,12 @@ export const calculateHideOverrideReasons = (result, variant, sku_alternates) =>
558
1779
  // TODO ALEX - Does this work? Is it doing anything?
559
1780
  hide_override_reasons.push('Product Id Linked');
560
1781
  }
561
- if (sku_alternates.some((alt) => result.title.includes(alt))) {
1782
+ if (sku_alternates.some((alt) => result?.title?.toLowerCase().includes(alt.toLowerCase()))) {
562
1783
  hide_override_reasons.push(HIDE_OVERRIDE_REASONS.ALT_SKU_MATCH);
563
1784
  }
1785
+ if (sku_alternates.some((alt) => result?.calculated_sku?.toLowerCase() === alt.toLowerCase())) {
1786
+ hide_override_reasons.push(HIDE_OVERRIDE_REASONS.CALCULATED_ALT_SKU_MATCH);
1787
+ }
564
1788
  return hide_override_reasons;
565
1789
  };
566
1790
  function getRegexUnitResultsIgnore(resultRegexResults, variantRegexResults, criticalUnits = ['ton', 'cc', 'hp']) {