price_scanner 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5866ddf8fd84a9bc437fc7abb5cb6e137ce3d9f9f36d1ff778e1e8db668d0ce1
4
- data.tar.gz: 92ee758d982507e59b5a3da296d5390f6b1af276ebe52359e3718909b5186271
3
+ metadata.gz: 2014de33dd81d654b4fd82992474605dd110ff5154299dec1bfca80ec17f91db
4
+ data.tar.gz: b341b9072d034078d95c8a335d0d8f9d9b62bd2d00dc4379441a48cebe3091b9
5
5
  SHA512:
6
- metadata.gz: 151ba2ea359f8e9bfe5c44b541de9c51e83c8316a8237ca682f00f1e042ac52c2a6328fd778541ab569a7bf577caed233e83d127acf822aabd493c7d519fa6c7
7
- data.tar.gz: 98e055152d925a8ed027f2baab2523d7e77804a6ea139985d296d3bb36273aa44450e80b8093ac18b23f1661b3350dafb62392948a04cdd869d31cf235d7b5b5
6
+ metadata.gz: edffb337ea738fa8541612a49d1a2417f40c4d3463ef4b0509eef224e34172278f1ea585b2c34e470aea0feb457540d7be2218a4fdc97f2fb2e67e8c75244190
7
+ data.tar.gz: e0a1d51e6a441400d41c6c77c25ca9a5d083ec8e2d758a76685321281e4e14c7bf0a05d3bbac27cd60ecc04e0babf8f2bcd0aad23e471246fcbf6f957fcc890c
data/CHANGELOG.md ADDED
@@ -0,0 +1,35 @@
1
+ # Changelog
2
+
3
+ ## 0.2.3
4
+
5
+ - Fix comma-as-thousands-separator not recognized in PRICE_PATTERN (`7,999.00 €` → was parsed as `999.00 €`)
6
+ - Affects prices in English/international format: `$1,299.99`, `8,289.00 €`, etc.
7
+ - Safe change: requires exactly 3 digits after separator, so decimal commas (`19,99 zł`) still work correctly
8
+
9
+ ## 0.2.2
10
+
11
+ - Fix negative price detection with spaced dash ("- 1.040 zł") — savings badges with space between minus and price were not filtered
12
+ - Refactor `negative_price?` with `rindex_non_space` helper (DRY)
13
+ - Distinguish range separators ("Pack of 3 - 29,99 zł") from negative prices
14
+
15
+ ## 0.2.1
16
+
17
+ - Fix false price extraction from model numbers (IP65, HC940, H265, 2K 30MP)
18
+ - Prevent digits before currency symbol from being matched as prices
19
+
20
+ ## 0.2.0
21
+
22
+ - Remove ConsentDetector from gem (moved to smart_offers app)
23
+
24
+ ## 0.1.1
25
+
26
+ - Remove rubycritic dependency
27
+ - Auto-require all price_scanner modules
28
+
29
+ ## 0.1.0
30
+
31
+ - Initial release
32
+ - `PriceScanner::Parser` — normalize prices, extract currency, strip price mentions
33
+ - `PriceScanner::Detector` — extract prices from text, filter negatives/per-unit/ranges/savings
34
+ - Multi-currency support: PLN, EUR, USD, GBP
35
+ - Smart filtering: negative prices, per-unit prices, price ranges, savings amounts
@@ -4,9 +4,9 @@ module PriceScanner
4
4
  # Extracts prices from text using regex patterns with smart filtering.
5
5
  module Detector
6
6
  PRICE_PATTERN = /
7
- (?:zł|pln|€|\$|£)[\s\u00a0]*(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})(?:[.,]\d{1,2})? |
8
- (?<![a-zA-Z\d])(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})[.,]\d{2}[\s\u00a0]*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
9
- (?<![a-zA-Z\d])(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})[\s\u00a0]*(?:zł|pln|€|\$|£)(?!\d)
7
+ (?:zł|pln|€|\$|£)[\s\u00a0]*(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})(?:[.,]\d{1,2})? |
8
+ (?<![a-zA-Z\d])(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})[.,]\d{2}[\s\u00a0]*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
9
+ (?<![a-zA-Z\d])(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})[\s\u00a0]*(?:zł|pln|€|\$|£)(?!\d)
10
10
  /ix
11
11
 
12
12
  PER_UNIT_PATTERN = %r{(?:/\s*|za\s+)(?:kg|g|mg|l|ml|szt|m[²³23]?|cm|mm|op|opak|pcs|pc|unit|each|ea|kaps|tabl|tab)\b}i
@@ -74,23 +74,19 @@ module PriceScanner
74
74
  def negative_price?(text_str, match_index)
75
75
  return false unless match_index.positive?
76
76
 
77
- # Direct prefix: "-1.040,00 "
78
- return true if NEGATIVE_PREFIXES.include?(text_str[match_index - 1])
77
+ # Find the non-whitespace char before price: "-1.040" or "- 1.040"
78
+ dash_pos = rindex_non_space(text_str, match_index - 1)
79
+ return false unless dash_pos && NEGATIVE_PREFIXES.include?(text_str[dash_pos])
79
80
 
80
- # Spaced prefix: "- 1.040 zł" only when dash is at start or preceded by non-digit
81
- # "Pack of 3 - 29,99 zł" → dash after digit = range separator, not negative
82
- i = match_index - 1
83
- i -= 1 while i >= 0 && text_str[i] =~ /\s/
84
- return false unless i >= 0 && NEGATIVE_PREFIXES.include?(text_str[i])
85
-
86
- # Dash at start of text = negative
87
- return true if i == 0
81
+ # Dash at start of text = negative; after digit = range separator ("3 - 29,99")
82
+ before_dash = rindex_non_space(text_str, dash_pos - 1)
83
+ before_dash.nil? || text_str[before_dash] !~ /\d/
84
+ end
88
85
 
89
- # Check what's before the dash (skip whitespace)
90
- j = i - 1
91
- j -= 1 while j >= 0 && text_str[j] =~ /\s/
92
- # Dash after digit = range separator ("3 - 29,99"), not negative
93
- j < 0 || text_str[j] !~ /\d/
86
+ def rindex_non_space(text_str, from)
87
+ i = from
88
+ i -= 1 while i >= 0 && text_str[i] =~ /\s/
89
+ i >= 0 ? i : nil
94
90
  end
95
91
 
96
92
  def per_unit_price?(text_str, match_end)
@@ -151,7 +147,7 @@ module PriceScanner
151
147
  end
152
148
 
153
149
  private_class_method :scan_raw_prices, :find_price_at, :build_price_result,
154
- :negative_price?, :per_unit_price?,
150
+ :negative_price?, :rindex_non_space, :per_unit_price?,
155
151
  :filter_range_prices, :find_range_indices, :range_between?,
156
152
  :filter_savings_by_difference, :savings_amount?, :matches_savings_pattern?
157
153
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PriceScanner
4
- VERSION = "0.2.2"
4
+ VERSION = "0.2.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: price_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justyna
@@ -21,6 +21,7 @@ files:
21
21
  - ".reek.yml"
22
22
  - ".rspec"
23
23
  - ".rubocop.yml"
24
+ - CHANGELOG.md
24
25
  - LICENSE
25
26
  - README.md
26
27
  - Rakefile