digger 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/digger/pattern.rb +19 -8
- data/lib/digger/version.rb +1 -1
- data/spec/pattern_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 025bfc4f8d6dbf55994363070b67a450fd421457c4f12bf9b714fe965aefaad6
|
4
|
+
data.tar.gz: a8464f07a53c332ddb40457998b830b1dff99661431a7608d9dc075175f5e9f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d84a2f8ee2d65c16d79b66795fc618b07fb5ba4517d902eec7e17107b69c240b36f68728de49f451fd143ddeb872743a906b608d413446971e5f38a632cf6fbe
|
7
|
+
data.tar.gz: 4f36e035bab956bd6702024d951eb9963f2f97812f355e00b0840148eaa4fef07949abac0f874f889c88703addd13ea2bc69efa61bac3872b14f371f7fa47ded
|
data/lib/digger/pattern.rb
CHANGED
@@ -30,8 +30,8 @@ module Digger
|
|
30
30
|
|
31
31
|
MATCH_MAX = 3
|
32
32
|
|
33
|
-
TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
|
34
|
-
TYPES_CSS = %w[css_one css_many].freeze
|
33
|
+
TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many match_all]
|
34
|
+
TYPES_CSS = %w[css_one css_many css_all].freeze
|
35
35
|
TYPES_JSON = %w[json jsonp].freeze
|
36
36
|
TYPES_OTHER = %w[cookie plain lines header body].freeze
|
37
37
|
|
@@ -61,7 +61,7 @@ module Digger
|
|
61
61
|
end
|
62
62
|
|
63
63
|
def get_plain(page)
|
64
|
-
safe_block.call(page.doc
|
64
|
+
safe_block.call(page.doc&.text)
|
65
65
|
end
|
66
66
|
|
67
67
|
def get_lines(page)
|
@@ -82,25 +82,36 @@ module Digger
|
|
82
82
|
end
|
83
83
|
|
84
84
|
def css_match(doc)
|
85
|
-
block = safe_block { |node| node&.content&.strip }
|
86
85
|
# content is Nokogiri::HTML::Document
|
87
86
|
contents = doc.css(value)
|
88
87
|
if type == 'css_many'
|
89
|
-
|
88
|
+
block = safe_block { |node| node&.content&.strip }
|
89
|
+
contents.map { |node| block.call(node) }
|
90
|
+
elsif type == 'css_all'
|
91
|
+
block = safe_block
|
92
|
+
block.call(contents)
|
90
93
|
else
|
94
|
+
block = safe_block { |node| node&.content&.strip }
|
91
95
|
block.call(contents.first)
|
92
96
|
end
|
93
97
|
end
|
94
98
|
|
95
99
|
def regexp_match(body)
|
96
|
-
block = safe_block(&:strip)
|
97
100
|
# content is String
|
98
|
-
if
|
101
|
+
if %w[match_many match_all].include? type
|
99
102
|
regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
|
100
|
-
body.gsub(regexp).to_a
|
103
|
+
matches = body.gsub(regexp).to_a
|
104
|
+
if type == 'match_many'
|
105
|
+
block = safe_block(&:strip)
|
106
|
+
matches.map { |node| block.call(node) }
|
107
|
+
else
|
108
|
+
block = safe_block
|
109
|
+
block.call(matches)
|
110
|
+
end
|
101
111
|
else
|
102
112
|
index = TYPES_REGEXP.index(type)
|
103
113
|
matches = body.match(value)
|
114
|
+
block = safe_block(&:strip)
|
104
115
|
block.call(matches[index]) unless matches.nil?
|
105
116
|
end
|
106
117
|
end
|
data/lib/digger/version.rb
CHANGED
data/spec/pattern_spec.rb
CHANGED
@@ -24,4 +24,10 @@ describe Digger::Pattern do
|
|
24
24
|
p3 = Digger::Pattern.new({ type: 'plain' })
|
25
25
|
expect(p3.match_page(page).length).to be > 100
|
26
26
|
end
|
27
|
+
|
28
|
+
it 'match_all & css_all' do
|
29
|
+
p = Digger::Pattern.new({ type: 'match_all', value: '[\d]+' })
|
30
|
+
m = p.regexp_match('123,12,1')
|
31
|
+
expect(m.length).to eq(3)
|
32
|
+
end
|
27
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|