price_scanner 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/price_scanner/detector.rb +21 -3
- data/lib/price_scanner/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5866ddf8fd84a9bc437fc7abb5cb6e137ce3d9f9f36d1ff778e1e8db668d0ce1
|
|
4
|
+
data.tar.gz: 92ee758d982507e59b5a3da296d5390f6b1af276ebe52359e3718909b5186271
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 151ba2ea359f8e9bfe5c44b541de9c51e83c8316a8237ca682f00f1e042ac52c2a6328fd778541ab569a7bf577caed233e83d127acf822aabd493c7d519fa6c7
|
|
7
|
+
data.tar.gz: 98e055152d925a8ed027f2baab2523d7e77804a6ea139985d296d3bb36273aa44450e80b8093ac18b23f1661b3350dafb62392948a04cdd869d31cf235d7b5b5
|
|
@@ -5,8 +5,8 @@ module PriceScanner
|
|
|
5
5
|
module Detector
|
|
6
6
|
PRICE_PATTERN = /
|
|
7
7
|
(?:zł|pln|€|\$|£)[\s\u00a0]*(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})(?:[.,]\d{1,2})? |
|
|
8
|
-
(?<![a-zA-Z])(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})[.,]\d{2}[\s\u00a0]*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
|
|
9
|
-
(?<![a-zA-Z])(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})[\s\u00a0]*(?:zł|pln|€|\$|£)(?!\d)
|
|
8
|
+
(?<![a-zA-Z\d])(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})[.,]\d{2}[\s\u00a0]*(?:zł|pln|€|\$|£|eur|usd|gbp)(?!\d) |
|
|
9
|
+
(?<![a-zA-Z\d])(?:\d{1,3}(?:[.\s\u00a0]\d{3})+|\d{1,4})[\s\u00a0]*(?:zł|pln|€|\$|£)(?!\d)
|
|
10
10
|
/ix
|
|
11
11
|
|
|
12
12
|
PER_UNIT_PATTERN = %r{(?:/\s*|za\s+)(?:kg|g|mg|l|ml|szt|m[²³23]?|cm|mm|op|opak|pcs|pc|unit|each|ea|kaps|tabl|tab)\b}i
|
|
@@ -72,7 +72,25 @@ module PriceScanner
|
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
def negative_price?(text_str, match_index)
|
|
75
|
-
match_index.positive?
|
|
75
|
+
return false unless match_index.positive?
|
|
76
|
+
|
|
77
|
+
# Direct prefix: "-1.040,00 zł"
|
|
78
|
+
return true if NEGATIVE_PREFIXES.include?(text_str[match_index - 1])
|
|
79
|
+
|
|
80
|
+
# Spaced prefix: "- 1.040 zł" — only when dash is at start or preceded by non-digit
|
|
81
|
+
# "Pack of 3 - 29,99 zł" → dash after digit = range separator, not negative
|
|
82
|
+
i = match_index - 1
|
|
83
|
+
i -= 1 while i >= 0 && text_str[i] =~ /\s/
|
|
84
|
+
return false unless i >= 0 && NEGATIVE_PREFIXES.include?(text_str[i])
|
|
85
|
+
|
|
86
|
+
# Dash at start of text = negative
|
|
87
|
+
return true if i == 0
|
|
88
|
+
|
|
89
|
+
# Check what's before the dash (skip whitespace)
|
|
90
|
+
j = i - 1
|
|
91
|
+
j -= 1 while j >= 0 && text_str[j] =~ /\s/
|
|
92
|
+
# Dash after digit = range separator ("3 - 29,99"), not negative
|
|
93
|
+
j < 0 || text_str[j] !~ /\d/
|
|
76
94
|
end
|
|
77
95
|
|
|
78
96
|
def per_unit_price?(text_str, match_end)
|