price_scanner 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/price_scanner/detector.rb +16 -1
- data/lib/price_scanner/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 933c597623a0441a0554f9cc3b408134180022e69788f15b4e3d785fcf22ab83
|
|
4
|
+
data.tar.gz: 8dc569cea4f591cd1cd11db0eba535a1b166f9913e9e6069155b1cdf08421709
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6bafb309f4ef2d2296cc7981bf44bbf8eb42e185f4995809cc0e8aa4e0aa80ce25299bda7996892d42c03a225f4b961e4c8d7dcdc98f8329145c7e09ce6a6314
|
|
7
|
+
data.tar.gz: 48fce8a5a51cacb32fe062b380da562e72dcab7a44e2c5ea4bc0a3544bb3f854257c4b86491e9d6cccd4dc2ae056329092bad4189c5434ab6e2873718e137aed
|
|
@@ -14,6 +14,10 @@ module PriceScanner
|
|
|
14
14
|
|
|
15
15
|
NEGATIVE_PREFIXES = ["-", "\u2212"].freeze
|
|
16
16
|
|
|
17
|
+
# Prefixes that indicate the following price is a savings amount, not a product price.
|
|
18
|
+
# "Oszczędzasz 6.40 PLN" = "You save 6.40 PLN" — not the product price.
|
|
19
|
+
SAVINGS_PREFIX_PATTERN = /(?:oszcz[eę]dzasz|zaoszcz[eę]d[zź]|savings?|you\s+save|rabat|discount|remise|risparmio|ahorro|sparen|sie\s+sparen)\s*:?\s*\z/i
|
|
20
|
+
|
|
17
21
|
RANGE_SEPARATOR_PATTERN = /\s*[–—]\s*|\s+-\s+/
|
|
18
22
|
|
|
19
23
|
TEXT_AFTER_LOOKAHEAD = 200
|
|
@@ -65,6 +69,7 @@ module PriceScanner
|
|
|
65
69
|
return unless value
|
|
66
70
|
|
|
67
71
|
return if negative_price?(text_str, match_index)
|
|
72
|
+
return if savings_prefix?(text_str, match_index)
|
|
68
73
|
return if !include_per_unit && per_unit_price?(text_str, match_index + match_str.length)
|
|
69
74
|
|
|
70
75
|
clean_text = match_str.gsub(Parser::COLLAPSE_WHITESPACE, " ").strip
|
|
@@ -89,6 +94,16 @@ module PriceScanner
|
|
|
89
94
|
i >= 0 ? i : nil
|
|
90
95
|
end
|
|
91
96
|
|
|
97
|
+
# Check if text before the price contains a savings prefix like "Oszczędzasz" or "You save"
|
|
98
|
+
def savings_prefix?(text_str, match_index)
|
|
99
|
+
return false unless match_index > 3
|
|
100
|
+
|
|
101
|
+
# Look at up to 30 chars before the price match
|
|
102
|
+
lookback_start = [match_index - 30, 0].max
|
|
103
|
+
text_before = text_str[lookback_start...match_index]
|
|
104
|
+
text_before.match?(SAVINGS_PREFIX_PATTERN)
|
|
105
|
+
end
|
|
106
|
+
|
|
92
107
|
def per_unit_price?(text_str, match_end)
|
|
93
108
|
text_after = text_str[match_end, TEXT_AFTER_LOOKAHEAD].to_s.gsub(Parser::COLLAPSE_WHITESPACE, " ").lstrip
|
|
94
109
|
text_after.match?(PER_UNIT_ANCHOR)
|
|
@@ -147,7 +162,7 @@ module PriceScanner
|
|
|
147
162
|
end
|
|
148
163
|
|
|
149
164
|
private_class_method :scan_raw_prices, :find_price_at, :build_price_result,
|
|
150
|
-
:negative_price?, :rindex_non_space, :per_unit_price?,
|
|
165
|
+
:negative_price?, :rindex_non_space, :savings_prefix?, :per_unit_price?,
|
|
151
166
|
:filter_range_prices, :find_range_indices, :range_between?,
|
|
152
167
|
:filter_savings_by_difference, :savings_amount?, :matches_savings_pattern?
|
|
153
168
|
end
|