price_scanner 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8075def2aebf13ff9b49f2e8641d755f3aec22ac96fb9ebeb194ddedc9d5c9df
4
- data.tar.gz: 286a6680e672cd12c0c330ce7158c0907598056043e27ed21d75619828078a7c
3
+ metadata.gz: da472f5717e7a550415ded0a5cc6355452ef12bfd40d308f6f8b6e80c1235c1e
4
+ data.tar.gz: d3b9c31a31d35a170d81d3545713a1c7107e8dc1b2a6a59cd840b42b1081a279
5
5
  SHA512:
6
- metadata.gz: 96c5d3fd0e62555a8dd1163c163c807cd93803689c6fa35fdf036f81166637621997873133960ae4998cfe16245e3620066187b9d415ae332e03372b6b56207e
7
- data.tar.gz: '08d105bb00220c1bdcdb13b453b5ff5f315f35ddce836e1d8a965a8cb94984f640b04a9e7e0cfcc3ddbe85d0be0f018d863ce4ce6db5f172a18017288ac2ffa8'
6
+ metadata.gz: be61008b61a95e5584ff2f4dd9caf76e3eae425625a1834f72815bf255450851b880bf62acea0baad269523d14c8c4e1610674fa073cc7bddb6695052e52fb5e
7
+ data.tar.gz: 93c2af4245b1ab5a1bada8fa1203b80ca414064ff104c720e9d87307320b8a5e8560da631a7d1285c89262e3ce6a18a1197ac8d58ede95fb8c886013f8d58ee7
data/README.md CHANGED
@@ -35,16 +35,6 @@ PriceScanner.contains_price?("Only 99,00 zł") # => true
35
35
  PriceScanner.contains_price?("No price here") # => false
36
36
  ```
37
37
 
38
- ### GDPR consent detection (optional, requires nokogiri)
39
-
40
- ```ruby
41
- require "nokogiri"
42
-
43
- doc = Nokogiri::HTML(html)
44
- node = doc.css(".cookie-banner").first
45
- PriceScanner::ConsentDetector.consent_node?(node) # => true/false
46
- ```
47
-
48
38
  ### Advanced API
49
39
 
50
40
  For finer control, use `Detector` and `Parser` modules directly.
@@ -162,7 +152,7 @@ If the same price value appears multiple times, only one occurrence is kept.
162
152
 
163
153
  ## Features
164
154
 
165
- - **Zero dependencies** (nokogiri optional, only for consent detection)
155
+ - **Zero dependencies**
166
156
  - Case-insensitive currency matching
167
157
  - Handles regular spaces, non-breaking spaces (NBSP), and mixed whitespace
168
158
  - Tracks position of each price in the source text
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PriceScanner
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: price_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justyna
@@ -25,7 +25,6 @@ files:
25
25
  - README.md
26
26
  - Rakefile
27
27
  - lib/price_scanner.rb
28
- - lib/price_scanner/consent_detector.rb
29
28
  - lib/price_scanner/detector.rb
30
29
  - lib/price_scanner/parser.rb
31
30
  - lib/price_scanner/version.rb
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module PriceScanner
4
- # Detects GDPR/cookie consent banners in HTML nodes (requires nokogiri).
5
- module ConsentDetector
6
- CONSENT_TEXT_REGEX = /
7
- \bcookie\b|\bcookies\b|\bconsent\b|\bgdpr\b|\bprivacy\b|\btracking\b|\bpreferences\b|\bpersonaliz|marketing\s+cookies|
8
- do\s+not\s+sell|opt\s+out|opt\s+in|cookie\s+policy|privacy\s+policy|
9
- \bciasteczk(?:a|i|ami|ach|om)?\b|\bprywatn|\bzgod(?:a|y|ę|zie)?\b|\brodo\b
10
- /ix
11
- CONSENT_ACTION_REGEX = /
12
- \baccept\b|\bagree\b|\ballow\b|\bmanage\b|\bpreferences\b|\bdecline\b|\breject\b|\bok\b|\bokay\b|\bcontinue\b|save\s+preferences|
13
- accept\s+all|allow\s+all|got\s+it|\brozumiem\b|\bzgadzam\b|\bakceptuj|\bzaakceptuj|\bodrzuc|\bodmow
14
- /ix
15
- CONSENT_ATTR_REGEX = /
16
- cookie|consent|gdpr|privacy|cmp|onetrust|trustarc|cookielaw|cookiebot|osano|
17
- quantcast|usercentrics|didomi|cookieyes|termly|iubenda|shopify-pc__banner
18
- /ix
19
-
20
- ANCESTOR_DEPTH = 3
21
-
22
- module_function
23
-
24
- def consent_node?(node)
25
- return false unless node
26
-
27
- nodes = [node] + node.ancestors.take(ANCESTOR_DEPTH)
28
- hits = detect_hits(nodes)
29
- text_hit = hits[:text]
30
- attr_hit = hits[:attr]
31
- return false unless text_hit || attr_hit
32
-
33
- (text_hit && hits[:action]) || attr_hit
34
- end
35
-
36
- def detect_hits(nodes)
37
- result = { text: false, attr: false, action: false }
38
- nodes.each do |item|
39
- result[:text] ||= item.text.to_s.match?(CONSENT_TEXT_REGEX)
40
- result[:attr] ||= attribute_text(item).match?(CONSENT_ATTR_REGEX)
41
- result[:action] ||= action_button?(item)
42
- end
43
- result
44
- end
45
-
46
- ATTR_KEYS = %w[id class role aria-label aria-modal].freeze
47
- ACTION_SELECTOR = "button, [role='button'], input[type='button'], input[type='submit'], a"
48
-
49
- def attribute_text(node)
50
- ATTR_KEYS.filter_map { |key| node[key] }.join(" ")
51
- end
52
-
53
- def action_button?(node)
54
- node.css(ACTION_SELECTOR).any? do |button|
55
- collect_text(button).match?(CONSENT_ACTION_REGEX)
56
- end
57
- end
58
-
59
- def collect_text(node)
60
- [node.text, node["aria-label"], node["title"], node["value"]].compact.join(" ")
61
- end
62
-
63
- private_class_method :detect_hits, :attribute_text, :action_button?, :collect_text
64
- end
65
- end