digger 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b036f2d202aa8360cf3b07822168fb8e3e3e084d4b2f5ccb5eca026dd4c47981
4
- data.tar.gz: 85d13e567add73e38c25738852dc3b30e3ad1900579dca815d5aef4cb16dbca8
3
+ metadata.gz: 025bfc4f8d6dbf55994363070b67a450fd421457c4f12bf9b714fe965aefaad6
4
+ data.tar.gz: a8464f07a53c332ddb40457998b830b1dff99661431a7608d9dc075175f5e9f1
5
5
  SHA512:
6
- metadata.gz: decb9abb96f56dc7f75ba95151b02c7415018a789583da1324ab4786317d7e0bfc272a83b47399b51672bdb30b3d90c502f25358d7438b9ee26a8d39764a58dd
7
- data.tar.gz: 73f7dfd179175bcda4a24a120cad95a61093f17166e389e44b37a158ff8ee1ea927844d9b85773914f14d99473776f11752c7aa32fadfec1b8b703e584591c3f
6
+ metadata.gz: d84a2f8ee2d65c16d79b66795fc618b07fb5ba4517d902eec7e17107b69c240b36f68728de49f451fd143ddeb872743a906b608d413446971e5f38a632cf6fbe
7
+ data.tar.gz: 4f36e035bab956bd6702024d951eb9963f2f97812f355e00b0840148eaa4fef07949abac0f874f889c88703addd13ea2bc69efa61bac3872b14f371f7fa47ded
@@ -30,8 +30,8 @@ module Digger
30
30
 
31
31
  MATCH_MAX = 3
32
32
 
33
- TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
34
- TYPES_CSS = %w[css_one css_many].freeze
33
+ TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many match_all]
34
+ TYPES_CSS = %w[css_one css_many css_all].freeze
35
35
  TYPES_JSON = %w[json jsonp].freeze
36
36
  TYPES_OTHER = %w[cookie plain lines header body].freeze
37
37
 
@@ -61,7 +61,7 @@ module Digger
61
61
  end
62
62
 
63
63
  def get_plain(page)
64
- safe_block.call(page.doc.text)
64
+ safe_block.call(page.doc&.text)
65
65
  end
66
66
 
67
67
  def get_lines(page)
@@ -82,25 +82,36 @@ module Digger
82
82
  end
83
83
 
84
84
  def css_match(doc)
85
- block = safe_block { |node| node&.content&.strip }
86
85
  # content is Nokogiri::HTML::Document
87
86
  contents = doc.css(value)
88
87
  if type == 'css_many'
89
- contents.map { |node| block.call(node) }.uniq
88
+ block = safe_block { |node| node&.content&.strip }
89
+ contents.map { |node| block.call(node) }
90
+ elsif type == 'css_all'
91
+ block = safe_block
92
+ block.call(contents)
90
93
  else
94
+ block = safe_block { |node| node&.content&.strip }
91
95
  block.call(contents.first)
92
96
  end
93
97
  end
94
98
 
95
99
  def regexp_match(body)
96
- block = safe_block(&:strip)
97
100
  # content is String
98
- if type == 'match_many'
101
+ if %w[match_many match_all].include? type
99
102
  regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
100
- body.gsub(regexp).to_a.map { |node| block.call(node) }.uniq
103
+ matches = body.gsub(regexp).to_a
104
+ if type == 'match_many'
105
+ block = safe_block(&:strip)
106
+ matches.map { |node| block.call(node) }
107
+ else
108
+ block = safe_block
109
+ block.call(matches)
110
+ end
101
111
  else
102
112
  index = TYPES_REGEXP.index(type)
103
113
  matches = body.match(value)
114
+ block = safe_block(&:strip)
104
115
  block.call(matches[index]) unless matches.nil?
105
116
  end
106
117
  end
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = '0.2.1'.freeze
2
+ VERSION = '0.2.2'.freeze
3
3
  end
data/spec/pattern_spec.rb CHANGED
@@ -24,4 +24,10 @@ describe Digger::Pattern do
24
24
  p3 = Digger::Pattern.new({ type: 'plain' })
25
25
  expect(p3.match_page(page).length).to be > 100
26
26
  end
27
+
28
+ it 'match_all & css_all' do
29
+ p = Digger::Pattern.new({ type: 'match_all', value: '[\d]+' })
30
+ m = p.regexp_match('123,12,1')
31
+ expect(m.length).to eq(3)
32
+ end
27
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-17 00:00:00.000000000 Z
11
+ date: 2022-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake