digger 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b036f2d202aa8360cf3b07822168fb8e3e3e084d4b2f5ccb5eca026dd4c47981
4
- data.tar.gz: 85d13e567add73e38c25738852dc3b30e3ad1900579dca815d5aef4cb16dbca8
3
+ metadata.gz: 025bfc4f8d6dbf55994363070b67a450fd421457c4f12bf9b714fe965aefaad6
4
+ data.tar.gz: a8464f07a53c332ddb40457998b830b1dff99661431a7608d9dc075175f5e9f1
5
5
  SHA512:
6
- metadata.gz: decb9abb96f56dc7f75ba95151b02c7415018a789583da1324ab4786317d7e0bfc272a83b47399b51672bdb30b3d90c502f25358d7438b9ee26a8d39764a58dd
7
- data.tar.gz: 73f7dfd179175bcda4a24a120cad95a61093f17166e389e44b37a158ff8ee1ea927844d9b85773914f14d99473776f11752c7aa32fadfec1b8b703e584591c3f
6
+ metadata.gz: d84a2f8ee2d65c16d79b66795fc618b07fb5ba4517d902eec7e17107b69c240b36f68728de49f451fd143ddeb872743a906b608d413446971e5f38a632cf6fbe
7
+ data.tar.gz: 4f36e035bab956bd6702024d951eb9963f2f97812f355e00b0840148eaa4fef07949abac0f874f889c88703addd13ea2bc69efa61bac3872b14f371f7fa47ded
@@ -30,8 +30,8 @@ module Digger
30
30
 
31
31
  MATCH_MAX = 3
32
32
 
33
- TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
34
- TYPES_CSS = %w[css_one css_many].freeze
33
+ TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many match_all]
34
+ TYPES_CSS = %w[css_one css_many css_all].freeze
35
35
  TYPES_JSON = %w[json jsonp].freeze
36
36
  TYPES_OTHER = %w[cookie plain lines header body].freeze
37
37
 
@@ -61,7 +61,7 @@ module Digger
61
61
  end
62
62
 
63
63
  def get_plain(page)
64
- safe_block.call(page.doc.text)
64
+ safe_block.call(page.doc&.text)
65
65
  end
66
66
 
67
67
  def get_lines(page)
@@ -82,25 +82,36 @@ module Digger
82
82
  end
83
83
 
84
84
  def css_match(doc)
85
- block = safe_block { |node| node&.content&.strip }
86
85
  # content is Nokogiri::HTML::Document
87
86
  contents = doc.css(value)
88
87
  if type == 'css_many'
89
- contents.map { |node| block.call(node) }.uniq
88
+ block = safe_block { |node| node&.content&.strip }
89
+ contents.map { |node| block.call(node) }
90
+ elsif type == 'css_all'
91
+ block = safe_block
92
+ block.call(contents)
90
93
  else
94
+ block = safe_block { |node| node&.content&.strip }
91
95
  block.call(contents.first)
92
96
  end
93
97
  end
94
98
 
95
99
  def regexp_match(body)
96
- block = safe_block(&:strip)
97
100
  # content is String
98
- if type == 'match_many'
101
+ if %w[match_many match_all].include? type
99
102
  regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
100
- body.gsub(regexp).to_a.map { |node| block.call(node) }.uniq
103
+ matches = body.gsub(regexp).to_a
104
+ if type == 'match_many'
105
+ block = safe_block(&:strip)
106
+ matches.map { |node| block.call(node) }
107
+ else
108
+ block = safe_block
109
+ block.call(matches)
110
+ end
101
111
  else
102
112
  index = TYPES_REGEXP.index(type)
103
113
  matches = body.match(value)
114
+ block = safe_block(&:strip)
104
115
  block.call(matches[index]) unless matches.nil?
105
116
  end
106
117
  end
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = '0.2.1'.freeze
2
+ VERSION = '0.2.2'.freeze
3
3
  end
data/spec/pattern_spec.rb CHANGED
@@ -24,4 +24,10 @@ describe Digger::Pattern do
24
24
  p3 = Digger::Pattern.new({ type: 'plain' })
25
25
  expect(p3.match_page(page).length).to be > 100
26
26
  end
27
+
28
+ it 'match_all & css_all' do
29
+ p = Digger::Pattern.new({ type: 'match_all', value: '[\d]+' })
30
+ m = p.regexp_match('123,12,1')
31
+ expect(m.length).to eq(3)
32
+ end
27
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-17 00:00:00.000000000 Z
11
+ date: 2022-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake