price_scanner 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -11
- data/lib/price_scanner/version.rb +1 -1
- metadata +1 -2
- data/lib/price_scanner/consent_detector.rb +0 -65
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: da472f5717e7a550415ded0a5cc6355452ef12bfd40d308f6f8b6e80c1235c1e
|
|
4
|
+
data.tar.gz: d3b9c31a31d35a170d81d3545713a1c7107e8dc1b2a6a59cd840b42b1081a279
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: be61008b61a95e5584ff2f4dd9caf76e3eae425625a1834f72815bf255450851b880bf62acea0baad269523d14c8c4e1610674fa073cc7bddb6695052e52fb5e
|
|
7
|
+
data.tar.gz: 93c2af4245b1ab5a1bada8fa1203b80ca414064ff104c720e9d87307320b8a5e8560da631a7d1285c89262e3ce6a18a1197ac8d58ede95fb8c886013f8d58ee7
|
data/README.md
CHANGED
|
@@ -35,16 +35,6 @@ PriceScanner.contains_price?("Only 99,00 zł") # => true
|
|
|
35
35
|
PriceScanner.contains_price?("No price here") # => false
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
-
### GDPR consent detection (optional, requires nokogiri)
|
|
39
|
-
|
|
40
|
-
```ruby
|
|
41
|
-
require "nokogiri"
|
|
42
|
-
|
|
43
|
-
doc = Nokogiri::HTML(html)
|
|
44
|
-
node = doc.css(".cookie-banner").first
|
|
45
|
-
PriceScanner::ConsentDetector.consent_node?(node) # => true/false
|
|
46
|
-
```
|
|
47
|
-
|
|
48
38
|
### Advanced API
|
|
49
39
|
|
|
50
40
|
For finer control, use `Detector` and `Parser` modules directly.
|
|
@@ -162,7 +152,7 @@ If the same price value appears multiple times, only one occurrence is kept.
|
|
|
162
152
|
|
|
163
153
|
## Features
|
|
164
154
|
|
|
165
|
-
- **Zero dependencies**
|
|
155
|
+
- **Zero dependencies**
|
|
166
156
|
- Case-insensitive currency matching
|
|
167
157
|
- Handles regular spaces, non-breaking spaces (NBSP), and mixed whitespace
|
|
168
158
|
- Tracks position of each price in the source text
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: price_scanner
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna
|
|
@@ -25,7 +25,6 @@ files:
|
|
|
25
25
|
- README.md
|
|
26
26
|
- Rakefile
|
|
27
27
|
- lib/price_scanner.rb
|
|
28
|
-
- lib/price_scanner/consent_detector.rb
|
|
29
28
|
- lib/price_scanner/detector.rb
|
|
30
29
|
- lib/price_scanner/parser.rb
|
|
31
30
|
- lib/price_scanner/version.rb
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module PriceScanner
|
|
4
|
-
# Detects GDPR/cookie consent banners in HTML nodes (requires nokogiri).
|
|
5
|
-
module ConsentDetector
|
|
6
|
-
CONSENT_TEXT_REGEX = /
|
|
7
|
-
\bcookie\b|\bcookies\b|\bconsent\b|\bgdpr\b|\bprivacy\b|\btracking\b|\bpreferences\b|\bpersonaliz|marketing\s+cookies|
|
|
8
|
-
do\s+not\s+sell|opt\s+out|opt\s+in|cookie\s+policy|privacy\s+policy|
|
|
9
|
-
\bciasteczk(?:a|i|ami|ach|om)?\b|\bprywatn|\bzgod(?:a|y|ę|zie)?\b|\brodo\b
|
|
10
|
-
/ix
|
|
11
|
-
CONSENT_ACTION_REGEX = /
|
|
12
|
-
\baccept\b|\bagree\b|\ballow\b|\bmanage\b|\bpreferences\b|\bdecline\b|\breject\b|\bok\b|\bokay\b|\bcontinue\b|save\s+preferences|
|
|
13
|
-
accept\s+all|allow\s+all|got\s+it|\brozumiem\b|\bzgadzam\b|\bakceptuj|\bzaakceptuj|\bodrzuc|\bodmow
|
|
14
|
-
/ix
|
|
15
|
-
CONSENT_ATTR_REGEX = /
|
|
16
|
-
cookie|consent|gdpr|privacy|cmp|onetrust|trustarc|cookielaw|cookiebot|osano|
|
|
17
|
-
quantcast|usercentrics|didomi|cookieyes|termly|iubenda|shopify-pc__banner
|
|
18
|
-
/ix
|
|
19
|
-
|
|
20
|
-
ANCESTOR_DEPTH = 3
|
|
21
|
-
|
|
22
|
-
module_function
|
|
23
|
-
|
|
24
|
-
def consent_node?(node)
|
|
25
|
-
return false unless node
|
|
26
|
-
|
|
27
|
-
nodes = [node] + node.ancestors.take(ANCESTOR_DEPTH)
|
|
28
|
-
hits = detect_hits(nodes)
|
|
29
|
-
text_hit = hits[:text]
|
|
30
|
-
attr_hit = hits[:attr]
|
|
31
|
-
return false unless text_hit || attr_hit
|
|
32
|
-
|
|
33
|
-
(text_hit && hits[:action]) || attr_hit
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def detect_hits(nodes)
|
|
37
|
-
result = { text: false, attr: false, action: false }
|
|
38
|
-
nodes.each do |item|
|
|
39
|
-
result[:text] ||= item.text.to_s.match?(CONSENT_TEXT_REGEX)
|
|
40
|
-
result[:attr] ||= attribute_text(item).match?(CONSENT_ATTR_REGEX)
|
|
41
|
-
result[:action] ||= action_button?(item)
|
|
42
|
-
end
|
|
43
|
-
result
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
ATTR_KEYS = %w[id class role aria-label aria-modal].freeze
|
|
47
|
-
ACTION_SELECTOR = "button, [role='button'], input[type='button'], input[type='submit'], a"
|
|
48
|
-
|
|
49
|
-
def attribute_text(node)
|
|
50
|
-
ATTR_KEYS.filter_map { |key| node[key] }.join(" ")
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def action_button?(node)
|
|
54
|
-
node.css(ACTION_SELECTOR).any? do |button|
|
|
55
|
-
collect_text(button).match?(CONSENT_ACTION_REGEX)
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
def collect_text(node)
|
|
60
|
-
[node.text, node["aria-label"], node["title"], node["value"]].compact.join(" ")
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
private_class_method :detect_hits, :attribute_text, :action_button?, :collect_text
|
|
64
|
-
end
|
|
65
|
-
end
|