price_scanner 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +8 -1
- data/lib/price_scanner/detector.rb +8 -8
- data/lib/price_scanner/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 253513985856fa4e2c504df5157eefcf971e1f686fe3869ec5a93b047603294b
|
|
4
|
+
data.tar.gz: b098c087d64fc1ed716575576ce5cfd0b30d59b9d667858ac71a203ea42d4373
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2f5cf01095f4ed5beb298262ce385ed43b3cadecbc130590c57a8dfac705ff0dcbe5a74a0534cb4426ef05aba8621d74bae99626709664949a48f31b9a88d432
|
|
7
|
+
data.tar.gz: 27d95b2ce089306059edb5606fc1acd8034dfbf7bb343e253e821edfc259f3389ec1fcc3bde08a1cd3dd25270da206e9bc569d53817a012b3e5e2714b9602f7e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.0
|
|
4
|
+
|
|
5
|
+
- Add `include_per_unit:` option to `extract_prices_from_text` — allows including per-unit prices (`£46.00/M`, `29,99 zł/kg`) that are filtered by default
|
|
6
|
+
|
|
3
7
|
## 0.2.3
|
|
4
8
|
|
|
5
9
|
- Fix comma-as-thousands-separator not recognized in PRICE_PATTERN (`7,999.00 €` → was parsed as `999.00 €`)
|
data/README.md
CHANGED
|
@@ -134,7 +134,7 @@ PriceScanner.scan("Was 449,00 zł, now 349,00 zł. You save 100,00 zł!")
|
|
|
134
134
|
|
|
135
135
|
### Per-unit prices
|
|
136
136
|
|
|
137
|
-
Prices followed by a unit indicator are filtered out.
|
|
137
|
+
Prices followed by a unit indicator are filtered out by default.
|
|
138
138
|
|
|
139
139
|
Supported units: `kg`, `g`, `mg`, `l`, `ml`, `szt`, `m`, `m²`, `m³`, `cm`, `mm`, `op`, `opak`, `pcs`, `pc`, `unit`, `each`, `ea`, `kaps`, `tabl`, `tab`
|
|
140
140
|
|
|
@@ -146,6 +146,13 @@ PriceScanner.scan("32,74 zł/kg — buy 500g for 16,37 zł")
|
|
|
146
146
|
# 32,74 zł/kg is excluded
|
|
147
147
|
```
|
|
148
148
|
|
|
149
|
+
For products priced exclusively per unit (e.g., fabrics sold per meter, bulk goods per kg), pass `include_per_unit: true`:
|
|
150
|
+
|
|
151
|
+
```ruby
|
|
152
|
+
PriceScanner::Detector.extract_prices_from_text("£46.00/M £13.55/M", include_per_unit: true)
|
|
153
|
+
# => [{text: "£46.00/M", value: 46.0}, {text: "£13.55/M", value: 13.55}]
|
|
154
|
+
```
|
|
155
|
+
|
|
149
156
|
### Deduplication
|
|
150
157
|
|
|
151
158
|
If the same price value appears multiple times, only one occurrence is kept.
|
|
@@ -26,9 +26,9 @@ module PriceScanner
|
|
|
26
26
|
|
|
27
27
|
module_function
|
|
28
28
|
|
|
29
|
-
def extract_prices_from_text(text)
|
|
29
|
+
def extract_prices_from_text(text, include_per_unit: false)
|
|
30
30
|
text_str = text.to_s
|
|
31
|
-
raw_prices = scan_raw_prices(text_str)
|
|
31
|
+
raw_prices = scan_raw_prices(text_str, include_per_unit: include_per_unit)
|
|
32
32
|
filtered = filter_range_prices(raw_prices, text_str)
|
|
33
33
|
unique = filtered.uniq { |price| price[:value] }
|
|
34
34
|
filter_savings_by_difference(unique)
|
|
@@ -38,34 +38,34 @@ module PriceScanner
|
|
|
38
38
|
text.to_s.match?(PRICE_PATTERN)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
-
def scan_raw_prices(text_str)
|
|
41
|
+
def scan_raw_prices(text_str, include_per_unit: false)
|
|
42
42
|
results = []
|
|
43
43
|
last_end = 0
|
|
44
44
|
|
|
45
45
|
text_str.scan(PRICE_PATTERN) do |match_str|
|
|
46
|
-
result, last_end = find_price_at(text_str, match_str, last_end)
|
|
46
|
+
result, last_end = find_price_at(text_str, match_str, last_end, include_per_unit: include_per_unit)
|
|
47
47
|
results << result if result
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
results
|
|
51
51
|
end
|
|
52
52
|
|
|
53
|
-
def find_price_at(text_str, match_str, search_from)
|
|
53
|
+
def find_price_at(text_str, match_str, search_from, include_per_unit: false)
|
|
54
54
|
return [nil, search_from] if match_str.empty?
|
|
55
55
|
|
|
56
56
|
match_index = text_str.index(match_str, search_from)
|
|
57
57
|
return [nil, search_from] unless match_index
|
|
58
58
|
|
|
59
59
|
match_end = match_index + match_str.length
|
|
60
|
-
[build_price_result(text_str, match_str, match_index), match_end]
|
|
60
|
+
[build_price_result(text_str, match_str, match_index, include_per_unit: include_per_unit), match_end]
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
-
def build_price_result(text_str, match_str, match_index)
|
|
63
|
+
def build_price_result(text_str, match_str, match_index, include_per_unit: false)
|
|
64
64
|
value = Parser.normalized_price(match_str)
|
|
65
65
|
return unless value
|
|
66
66
|
|
|
67
67
|
return if negative_price?(text_str, match_index)
|
|
68
|
-
return if per_unit_price?(text_str, match_index + match_str.length)
|
|
68
|
+
return if !include_per_unit && per_unit_price?(text_str, match_index + match_str.length)
|
|
69
69
|
|
|
70
70
|
clean_text = match_str.gsub(Parser::COLLAPSE_WHITESPACE, " ").strip
|
|
71
71
|
{ text: clean_text, value: value, position: match_index }
|