digger 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/digger/pattern.rb +19 -8
- data/lib/digger/version.rb +1 -1
- data/spec/pattern_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 025bfc4f8d6dbf55994363070b67a450fd421457c4f12bf9b714fe965aefaad6
|
|
4
|
+
data.tar.gz: a8464f07a53c332ddb40457998b830b1dff99661431a7608d9dc075175f5e9f1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d84a2f8ee2d65c16d79b66795fc618b07fb5ba4517d902eec7e17107b69c240b36f68728de49f451fd143ddeb872743a906b608d413446971e5f38a632cf6fbe
|
|
7
|
+
data.tar.gz: 4f36e035bab956bd6702024d951eb9963f2f97812f355e00b0840148eaa4fef07949abac0f874f889c88703addd13ea2bc69efa61bac3872b14f371f7fa47ded
|
data/lib/digger/pattern.rb
CHANGED
|
@@ -30,8 +30,8 @@ module Digger
|
|
|
30
30
|
|
|
31
31
|
MATCH_MAX = 3
|
|
32
32
|
|
|
33
|
-
TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
|
|
34
|
-
TYPES_CSS = %w[css_one css_many].freeze
|
|
33
|
+
TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many match_all]
|
|
34
|
+
TYPES_CSS = %w[css_one css_many css_all].freeze
|
|
35
35
|
TYPES_JSON = %w[json jsonp].freeze
|
|
36
36
|
TYPES_OTHER = %w[cookie plain lines header body].freeze
|
|
37
37
|
|
|
@@ -61,7 +61,7 @@ module Digger
|
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
def get_plain(page)
|
|
64
|
-
safe_block.call(page.doc
|
|
64
|
+
safe_block.call(page.doc&.text)
|
|
65
65
|
end
|
|
66
66
|
|
|
67
67
|
def get_lines(page)
|
|
@@ -82,25 +82,36 @@ module Digger
|
|
|
82
82
|
end
|
|
83
83
|
|
|
84
84
|
def css_match(doc)
|
|
85
|
-
block = safe_block { |node| node&.content&.strip }
|
|
86
85
|
# content is Nokogiri::HTML::Document
|
|
87
86
|
contents = doc.css(value)
|
|
88
87
|
if type == 'css_many'
|
|
89
|
-
|
|
88
|
+
block = safe_block { |node| node&.content&.strip }
|
|
89
|
+
contents.map { |node| block.call(node) }
|
|
90
|
+
elsif type == 'css_all'
|
|
91
|
+
block = safe_block
|
|
92
|
+
block.call(contents)
|
|
90
93
|
else
|
|
94
|
+
block = safe_block { |node| node&.content&.strip }
|
|
91
95
|
block.call(contents.first)
|
|
92
96
|
end
|
|
93
97
|
end
|
|
94
98
|
|
|
95
99
|
def regexp_match(body)
|
|
96
|
-
block = safe_block(&:strip)
|
|
97
100
|
# content is String
|
|
98
|
-
if
|
|
101
|
+
if %w[match_many match_all].include? type
|
|
99
102
|
regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
|
|
100
|
-
body.gsub(regexp).to_a
|
|
103
|
+
matches = body.gsub(regexp).to_a
|
|
104
|
+
if type == 'match_many'
|
|
105
|
+
block = safe_block(&:strip)
|
|
106
|
+
matches.map { |node| block.call(node) }
|
|
107
|
+
else
|
|
108
|
+
block = safe_block
|
|
109
|
+
block.call(matches)
|
|
110
|
+
end
|
|
101
111
|
else
|
|
102
112
|
index = TYPES_REGEXP.index(type)
|
|
103
113
|
matches = body.match(value)
|
|
114
|
+
block = safe_block(&:strip)
|
|
104
115
|
block.call(matches[index]) unless matches.nil?
|
|
105
116
|
end
|
|
106
117
|
end
|
data/lib/digger/version.rb
CHANGED
data/spec/pattern_spec.rb
CHANGED
|
@@ -24,4 +24,10 @@ describe Digger::Pattern do
|
|
|
24
24
|
p3 = Digger::Pattern.new({ type: 'plain' })
|
|
25
25
|
expect(p3.match_page(page).length).to be > 100
|
|
26
26
|
end
|
|
27
|
+
|
|
28
|
+
it 'match_all & css_all' do
|
|
29
|
+
p = Digger::Pattern.new({ type: 'match_all', value: '[\d]+' })
|
|
30
|
+
m = p.regexp_match('123,12,1')
|
|
31
|
+
expect(m.length).to eq(3)
|
|
32
|
+
end
|
|
27
33
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: digger
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- binz
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-
|
|
11
|
+
date: 2022-03-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|