price_scanner 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/lib/price_scanner/detector.rb +19 -5
- data/lib/price_scanner/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2014de33dd81d654b4fd82992474605dd110ff5154299dec1bfca80ec17f91db
|
|
4
|
+
data.tar.gz: b341b9072d034078d95c8a335d0d8f9d9b62bd2d00dc4379441a48cebe3091b9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: edffb337ea738fa8541612a49d1a2417f40c4d3463ef4b0509eef224e34172278f1ea585b2c34e470aea0feb457540d7be2218a4fdc97f2fb2e67e8c75244190
|
|
7
|
+
data.tar.gz: e0a1d51e6a441400d41c6c77c25ca9a5d083ec8e2d758a76685321281e4e14c7bf0a05d3bbac27cd60ecc04e0babf8f2bcd0aad23e471246fcbf6f957fcc890c
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.2.3
|
|
4
|
+
|
|
5
|
+
- Fix comma-as-thousands-separator not recognized in PRICE_PATTERN (`7,999.00 €` → was parsed as `999.00 €`)
|
|
6
|
+
- Affects prices in English/international format: `$1,299.99`, `8,289.00 €`, etc.
|
|
7
|
+
- Safe change: requires exactly 3 digits after separator, so decimal commas (`19,99 zł`) still work correctly
|
|
8
|
+
|
|
9
|
+
## 0.2.2
|
|
10
|
+
|
|
11
|
+
- Fix negative price detection with spaced dash ("- 1.040 zł") — savings badges with space between minus and price were not filtered
|
|
12
|
+
- Refactor `negative_price?` with `rindex_non_space` helper (DRY)
|
|
13
|
+
- Distinguish range separators ("Pack of 3 - 29,99 zł") from negative prices
|
|
14
|
+
|
|
15
|
+
## 0.2.1
|
|
16
|
+
|
|
17
|
+
- Fix false price extraction from model numbers (IP65, HC940, H265, 2K 30MP)
|
|
18
|
+
- Prevent digits before currency symbol from being matched as prices
|
|
19
|
+
|
|
20
|
+
## 0.2.0
|
|
21
|
+
|
|
22
|
+
- Remove ConsentDetector from gem (moved to smart_offers app)
|
|
23
|
+
|
|
24
|
+
## 0.1.1
|
|
25
|
+
|
|
26
|
+
- Remove rubycritic dependency
|
|
27
|
+
- Auto-require all price_scanner modules
|
|
28
|
+
|
|
29
|
+
## 0.1.0
|
|
30
|
+
|
|
31
|
+
- Initial release
|
|
32
|
+
- `PriceScanner::Parser` — normalize prices, extract currency, strip price mentions
|
|
33
|
+
- `PriceScanner::Detector` — extract prices from text, filter negatives/per-unit/ranges/savings
|
|
34
|
+
- Multi-currency support: PLN, EUR, USD, GBP
|
|
35
|
+
- Smart filtering: negative prices, per-unit prices, price ranges, savings amounts
|
|
@@ -4,9 +4,9 @@ module PriceScanner
|
|
|
4
4
|
# Extracts prices from text using regex patterns with smart filtering.
|
|
5
5
|
module Detector
|
|
6
6
|
PRICE_PATTERN = /
|
|
7
|
-
(?:zł|pln|€|\$|£)[\s\u00a0]*(?:\d{1,3}(?:[
|
|
8
|
-
(?<![a-zA-Z\d])(?:\d{1,3}(?:[
|
|
9
|
-
(?<![a-zA-Z\d])(?:\d{1,3}(?:[
|
|
7
|
+
(?:zł|pln|€|\$|£)[\s\u00a0]*(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})(?:[.,]\d{1,2})? |
|
|
8
|
+
(?<![a-zA-Z\d])(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})[.,]\d{2}[\s\u00a0]*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
|
|
9
|
+
(?<![a-zA-Z\d])(?:\d{1,3}(?:[.,\s\u00a0]\d{3})+|\d{1,4})[\s\u00a0]*(?:zł|pln|€|\$|£)(?!\d)
|
|
10
10
|
/ix
|
|
11
11
|
|
|
12
12
|
PER_UNIT_PATTERN = %r{(?:/\s*|za\s+)(?:kg|g|mg|l|ml|szt|m[²³23]?|cm|mm|op|opak|pcs|pc|unit|each|ea|kaps|tabl|tab)\b}i
|
|
@@ -72,7 +72,21 @@ module PriceScanner
|
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
def negative_price?(text_str, match_index)
|
|
75
|
-
match_index.positive?
|
|
75
|
+
return false unless match_index.positive?
|
|
76
|
+
|
|
77
|
+
# Find the non-whitespace char before price: "-1.040" or "- 1.040"
|
|
78
|
+
dash_pos = rindex_non_space(text_str, match_index - 1)
|
|
79
|
+
return false unless dash_pos && NEGATIVE_PREFIXES.include?(text_str[dash_pos])
|
|
80
|
+
|
|
81
|
+
# Dash at start of text = negative; after digit = range separator ("3 - 29,99")
|
|
82
|
+
before_dash = rindex_non_space(text_str, dash_pos - 1)
|
|
83
|
+
before_dash.nil? || text_str[before_dash] !~ /\d/
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def rindex_non_space(text_str, from)
|
|
87
|
+
i = from
|
|
88
|
+
i -= 1 while i >= 0 && text_str[i] =~ /\s/
|
|
89
|
+
i >= 0 ? i : nil
|
|
76
90
|
end
|
|
77
91
|
|
|
78
92
|
def per_unit_price?(text_str, match_end)
|
|
@@ -133,7 +147,7 @@ module PriceScanner
|
|
|
133
147
|
end
|
|
134
148
|
|
|
135
149
|
private_class_method :scan_raw_prices, :find_price_at, :build_price_result,
|
|
136
|
-
:negative_price?, :per_unit_price?,
|
|
150
|
+
:negative_price?, :rindex_non_space, :per_unit_price?,
|
|
137
151
|
:filter_range_prices, :find_range_indices, :range_between?,
|
|
138
152
|
:filter_savings_by_difference, :savings_amount?, :matches_savings_pattern?
|
|
139
153
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: price_scanner
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna
|
|
@@ -21,6 +21,7 @@ files:
|
|
|
21
21
|
- ".reek.yml"
|
|
22
22
|
- ".rspec"
|
|
23
23
|
- ".rubocop.yml"
|
|
24
|
+
- CHANGELOG.md
|
|
24
25
|
- LICENSE
|
|
25
26
|
- README.md
|
|
26
27
|
- Rakefile
|