price_scanner 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2014de33dd81d654b4fd82992474605dd110ff5154299dec1bfca80ec17f91db
4
- data.tar.gz: b341b9072d034078d95c8a335d0d8f9d9b62bd2d00dc4379441a48cebe3091b9
3
+ metadata.gz: 253513985856fa4e2c504df5157eefcf971e1f686fe3869ec5a93b047603294b
4
+ data.tar.gz: b098c087d64fc1ed716575576ce5cfd0b30d59b9d667858ac71a203ea42d4373
5
5
  SHA512:
6
- metadata.gz: edffb337ea738fa8541612a49d1a2417f40c4d3463ef4b0509eef224e34172278f1ea585b2c34e470aea0feb457540d7be2218a4fdc97f2fb2e67e8c75244190
7
- data.tar.gz: e0a1d51e6a441400d41c6c77c25ca9a5d083ec8e2d758a76685321281e4e14c7bf0a05d3bbac27cd60ecc04e0babf8f2bcd0aad23e471246fcbf6f957fcc890c
6
+ metadata.gz: 2f5cf01095f4ed5beb298262ce385ed43b3cadecbc130590c57a8dfac705ff0dcbe5a74a0534cb4426ef05aba8621d74bae99626709664949a48f31b9a88d432
7
+ data.tar.gz: 27d95b2ce089306059edb5606fc1acd8034dfbf7bb343e253e821edfc259f3389ec1fcc3bde08a1cd3dd25270da206e9bc569d53817a012b3e5e2714b9602f7e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.0
4
+
5
+ - Add `include_per_unit:` option to `extract_prices_from_text` — allows including per-unit prices (`£46.00/M`, `29,99 zł/kg`) that are filtered by default
6
+
3
7
  ## 0.2.3
4
8
 
5
9
  - Fix comma-as-thousands-separator not recognized in PRICE_PATTERN (`7,999.00 €` → was parsed as `999.00 €`)
data/README.md CHANGED
@@ -134,7 +134,7 @@ PriceScanner.scan("Was 449,00 zł, now 349,00 zł. You save 100,00 zł!")
134
134
 
135
135
  ### Per-unit prices
136
136
 
137
- Prices followed by a unit indicator are filtered out.
137
+ Prices followed by a unit indicator are filtered out by default.
138
138
 
139
139
  Supported units: `kg`, `g`, `mg`, `l`, `ml`, `szt`, `m`, `m²`, `m³`, `cm`, `mm`, `op`, `opak`, `pcs`, `pc`, `unit`, `each`, `ea`, `kaps`, `tabl`, `tab`
140
140
 
@@ -146,6 +146,13 @@ PriceScanner.scan("32,74 zł/kg — buy 500g for 16,37 zł")
146
146
  # 32,74 zł/kg is excluded
147
147
  ```
148
148
 
149
+ For products priced exclusively per unit (e.g., fabrics sold per meter, bulk goods per kg), pass `include_per_unit: true`:
150
+
151
+ ```ruby
152
+ PriceScanner::Detector.extract_prices_from_text("£46.00/M £13.55/M", include_per_unit: true)
153
+ # => [{text: "£46.00/M", value: 46.0}, {text: "£13.55/M", value: 13.55}]
154
+ ```
155
+
149
156
  ### Deduplication
150
157
 
151
158
  If the same price value appears multiple times, only one occurrence is kept.
@@ -26,9 +26,9 @@ module PriceScanner
26
26
 
27
27
  module_function
28
28
 
29
- def extract_prices_from_text(text)
29
+ def extract_prices_from_text(text, include_per_unit: false)
30
30
  text_str = text.to_s
31
- raw_prices = scan_raw_prices(text_str)
31
+ raw_prices = scan_raw_prices(text_str, include_per_unit: include_per_unit)
32
32
  filtered = filter_range_prices(raw_prices, text_str)
33
33
  unique = filtered.uniq { |price| price[:value] }
34
34
  filter_savings_by_difference(unique)
@@ -38,34 +38,34 @@ module PriceScanner
38
38
  text.to_s.match?(PRICE_PATTERN)
39
39
  end
40
40
 
41
- def scan_raw_prices(text_str)
41
+ def scan_raw_prices(text_str, include_per_unit: false)
42
42
  results = []
43
43
  last_end = 0
44
44
 
45
45
  text_str.scan(PRICE_PATTERN) do |match_str|
46
- result, last_end = find_price_at(text_str, match_str, last_end)
46
+ result, last_end = find_price_at(text_str, match_str, last_end, include_per_unit: include_per_unit)
47
47
  results << result if result
48
48
  end
49
49
 
50
50
  results
51
51
  end
52
52
 
53
- def find_price_at(text_str, match_str, search_from)
53
+ def find_price_at(text_str, match_str, search_from, include_per_unit: false)
54
54
  return [nil, search_from] if match_str.empty?
55
55
 
56
56
  match_index = text_str.index(match_str, search_from)
57
57
  return [nil, search_from] unless match_index
58
58
 
59
59
  match_end = match_index + match_str.length
60
- [build_price_result(text_str, match_str, match_index), match_end]
60
+ [build_price_result(text_str, match_str, match_index, include_per_unit: include_per_unit), match_end]
61
61
  end
62
62
 
63
- def build_price_result(text_str, match_str, match_index)
63
+ def build_price_result(text_str, match_str, match_index, include_per_unit: false)
64
64
  value = Parser.normalized_price(match_str)
65
65
  return unless value
66
66
 
67
67
  return if negative_price?(text_str, match_index)
68
- return if per_unit_price?(text_str, match_index + match_str.length)
68
+ return if !include_per_unit && per_unit_price?(text_str, match_index + match_str.length)
69
69
 
70
70
  clean_text = match_str.gsub(Parser::COLLAPSE_WHITESPACE, " ").strip
71
71
  { text: clean_text, value: value, position: match_index }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PriceScanner
4
- VERSION = "0.2.3"
4
+ VERSION = "0.3.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: price_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justyna