price_scanner 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8617b68f1e73ccf328691ccae1583660f280c8bb0976109e27b4bfbb39e28aa5
4
- data.tar.gz: '091ee84ae22a07123028cbf482cf4978089e7d3b3a65e944a4f6ad099935a67e'
3
+ metadata.gz: fe537217d5cc6562f1f6198123b1ea1a9de43c61452abc9764f0bb695df3873b
4
+ data.tar.gz: 0cfeeb21a40fdfeeac475dffdf281d2740f5339465f1ddf45f9649632ffe5585
5
5
  SHA512:
6
- metadata.gz: 25d2830a0f48aa2549c3a6a70886c48dffa75ed6377a8237d470d80c0d5f1d4d5def6fd643ad48beeae3cca4c07eef1ec11ddedd85fe8d72cd421fb608667431
7
- data.tar.gz: 33e40fa33ce128411f19f9f1678e97dace81134d195bfd95ea7ec34d2f1d6eba59246c0bc4cc21b9f66f00e41c4490007114428164b052eaa5fef67fa449ebc4
6
+ metadata.gz: 5d6f7530f8339d8cdc023fd8dabb282ce64004d26d5698b8cce943e58ab50017ead499fae745e18189ed4f9604f84ad395a8fd1aa61d66cc588eba40b3844d29
7
+ data.tar.gz: 420cb69e1d0c16fa5a83cadb0897f7d96dc45836646207e0d97c706c503b1e56e02cf23b0f67507b6ec6f792c9a61c2c3dc8959a6e8c9a734220dc923cb20347
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.4
4
+
5
+ - Fix false promotions from duplicated price ranges in accessibility markup (e.g. WooCommerce `screen-reader-text`)
6
+ - Range filter now removes all prices whose values match detected range values, not just the directly connected pair
7
+ - Prevents "2,90€ – 16,90€" + "Plage de prix : 2,90€ à 16,90€" from producing a false 83% discount
8
+
3
9
  ## 0.3.0
4
10
 
5
11
  - Add `include_per_unit:` option to `extract_prices_from_text` — allows including per-unit prices (`£46.00/M`, `29,99 zł/kg`) that are filtered by default
@@ -3,10 +3,13 @@
3
3
  module PriceScanner
4
4
  # Extracts prices from text using regex patterns with smart filtering.
5
5
  module Detector
6
+ # Space chars used as thousand separators: regular space, NBSP (\u00a0), narrow NBSP (\u202f)
7
+ SP = "[\\s\\u00a0\\u202f]"
8
+
6
9
  PRICE_PATTERN = /
7
- (?:zł|pln|€|\$|£)[\s\u00a0]*(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})(?:[.,]\d{1,2})? |
8
- (?<![a-zA-Z\d])(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})[.,]\d{2}[\s\u00a0]*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
9
- (?<![a-zA-Z\d])(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})[\s\u00a0]*(?:zł|pln|€|\$|£)(?!\d)
10
+ (?:zł|pln|€|\$|£)#{SP}*(?:\d{1,3}(?:[.,#{SP}]\d{3})+|\d{1,4})(?:[.,]\d{1,2})? |
11
+ (?<![a-zA-Z\d])(?:\d{1,3}(?:[.,#{SP}]\d{3})+|\d{1,4})[.,]\d{2}#{SP}*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
12
+ (?<![a-zA-Z\d])(?:\d{1,3}(?:[.,#{SP}]\d{3})+|\d{1,4})#{SP}*(?:zł|pln|€|\$|£)(?!\d)
10
13
  /ix
11
14
 
12
15
  PER_UNIT_PATTERN = %r{(?:/\s*|za\s+)(?:kg|g|mg|l|ml|szt|m[²³23]?|cm|mm|op|opak|pcs|pc|unit|each|ea|kaps|tabl|tab)\b}i
@@ -113,7 +116,13 @@ module PriceScanner
113
116
  return prices if prices.size < MIN_PRICES_FOR_RANGE
114
117
 
115
118
  range_indices = find_range_indices(prices, text)
116
- prices.reject.with_index { |_, idx| range_indices.include?(idx) }
119
+ return prices if range_indices.empty?
120
+
121
+ # Remove range prices AND any duplicates with same values.
122
+ # Handles accessibility markup (e.g. screen-reader-text) that repeats
123
+ # range prices with non-dash separators like "à" or "to".
124
+ range_values = range_indices.map { |idx| prices[idx][:value] }.to_set
125
+ prices.reject.with_index { |price, idx| range_indices.include?(idx) || range_values.include?(price[:value]) }
117
126
  end
118
127
 
119
128
  def find_range_indices(prices, text)
@@ -17,13 +17,14 @@ module PriceScanner
17
17
  MULTIPLE_SPACES = /\s{2,}/
18
18
  COLLAPSE_WHITESPACE = /\s+/
19
19
  NBSP = "\u00a0"
20
+ NNBSP = "\u202f"
20
21
  DECIMAL_PLACES = 2
21
22
  THOUSANDS_GROUP = /.{1,3}/
22
23
 
23
24
  module_function
24
25
 
25
26
  def normalized_price(value)
26
- text = value.to_s.tr(NBSP, " ").strip
27
+ text = value.to_s.tr(NBSP + NNBSP, " ").strip
27
28
  return nil if text.empty?
28
29
 
29
30
  clean = clean_price_text(text)
@@ -83,7 +84,7 @@ module PriceScanner
83
84
  end
84
85
 
85
86
  def strip_single_price(cleaned, price)
86
- normalized = price.to_s.tr(NBSP, " ").strip
87
+ normalized = price.to_s.tr(NBSP + NNBSP, " ").strip
87
88
  return cleaned if normalized.empty?
88
89
 
89
90
  result = cleaned.gsub(normalized, "").gsub(normalized.delete(" "), "")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PriceScanner
4
- VERSION = "0.3.2"
4
+ VERSION = "0.3.4"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: price_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justyna