digger 0.1.5 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c4a94163a25d4b53ad5b477040f69b8fccca026adc313f8f61759317c1bf198
4
- data.tar.gz: 307b443277c16708103c172e5fb4ef4d833f7d2631a7f85779570a3cbeac8925
3
+ metadata.gz: ed96af1c5ae92569e1de4885958ac5852864c045f6c6337d5f17d91747d8ed80
4
+ data.tar.gz: 82003ae80f54cd3f9b805757e5dcb4c7894bba91c4c376cf08ebd43e6de6e80b
5
5
  SHA512:
6
- metadata.gz: 9da40123fd09615d0c69ca5104d1141b82981813ef8d175bc567a0f35e8f7dd868ce235bc0f265308e2133710f63c3ce8325d6b528ae060eccb77904c12e3139
7
- data.tar.gz: c05be67df6db25345acfdc3615690c0467029dd40ffde262863923b6f0786696a16fd547277ec4d3764ed39b399174dd28e10ca7eb31ffb55f773e08b04f2986
6
+ metadata.gz: b7aad69fd46c7d1688026ece2e1efe14d7dea29b42f94656d794655e12a92677bd3e1034f0c776bf197bcd75c96bd49377df399433bd2e1c13507520af1addc5
7
+ data.tar.gz: 60055a69ec3ad77e80fc4f1b50bb3a6c298e2274827ae21b64d1d82dae53a1d5338ccc99bcdb09c7d2e946abd007a488ff3034a753bc54586e9b651aeb3c5ce7
data/digger.gemspec CHANGED
@@ -1,26 +1,27 @@
1
1
  # coding: utf-8
2
+
2
3
  lib = File.expand_path('../lib', __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
  require 'digger/version'
5
6
 
6
7
  Gem::Specification.new do |spec|
7
- spec.name = "digger"
8
+ spec.name = 'digger'
8
9
  spec.version = Digger::VERSION
9
- spec.authors = ["binz"]
10
- spec.email = ["xinkiang@gmail.com"]
10
+ spec.authors = ['binz']
11
+ spec.email = ['xinkiang@gmail.com']
11
12
  spec.summary = %q{Dig need stractual infomation from web page.}
12
13
  spec.description = %q{}
13
- spec.homepage = ""
14
- spec.license = "MIT"
14
+ spec.homepage = ''
15
+ spec.license = 'MIT'
15
16
 
16
17
  spec.files = `git ls-files -z`.split("\x0")
17
18
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
20
21
 
21
- spec.add_development_dependency "bundler", "~> 2.0"
22
- spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency 'rake', '>= 12.3.3'
23
+ spec.add_development_dependency 'bundler', '~> 2.0'
23
24
 
24
- spec.add_runtime_dependency 'nokogiri', '~> 1.6'
25
25
  spec.add_runtime_dependency 'http-cookie', '~> 1.0'
26
+ spec.add_runtime_dependency 'nokogiri', '~> 1.6'
26
27
  end
data/lib/digger/index.rb CHANGED
@@ -8,33 +8,63 @@ module Digger
8
8
 
9
9
  def urls
10
10
  @urls ||= begin
11
- args = self.args.map{|a| (a.respond_to? :each) ? a.to_a : [a]}
12
- args.shift.product(*args).map{|arg| pattern_applied_url(arg)}
11
+ args = self.args.map { |a| a.respond_to?(:each) ? a.to_a : [a] }
12
+ args.shift.product(*args).map { |arg| pattern_applied_url(arg) }
13
13
  end
14
14
  end
15
15
 
16
16
  def pattern_applied_url(arg)
17
- pattern.gsub('*').each_with_index{|_, i| arg[i]}
17
+ pattern.gsub('*').each_with_index { |_, i| arg[i] }
18
+ end
19
+
20
+ def self.slow_down(entities, conf = {}, &block)
21
+ raise NoBlockError, 'No block given' unless block
22
+
23
+ config = {
24
+ sleep_range_seconds: 4...10, # 随机等待时间范围
25
+ fail_max_cnt: 10, # 最多失败次数
26
+ fail_unit_seconds: 10 * 60, # 失败等待时间
27
+ when_fail: ->(ent, e, failed_cnt) {}
28
+ }.merge(conf)
29
+ failed_cnt = 0
30
+ cursor = 0
31
+ result = []
32
+ while cursor < entities.length
33
+ begin
34
+ result << block.call(entities[cursor])
35
+ rescue StandardError => e
36
+ failed_cnt += 1
37
+ config[:when_fail].call(entities[cursor], e, failed_cnt)
38
+ break if failed_cnt >= config[:fail_max_cnt]
39
+
40
+ sleep(failed_cnt * config[:fail_unit_seconds])
41
+ else
42
+ cursor += 1
43
+ sleep(rand(config[:sleep_range_seconds]))
44
+ end
45
+ end
46
+ result
18
47
  end
19
48
 
20
49
  def self.batch(entities, cocurrence = 1, &block)
21
- raise NoBlockError, "No block given" unless block
50
+ raise NoBlockError, 'No block given' unless block
22
51
 
23
52
  if cocurrence > 1
24
- results = {}
25
- entities.each_slice(cocurrence) do |group|
53
+ results = Array.new(entities.size)
54
+ entities.each_slice(cocurrence).with_index do |group, idx1|
26
55
  threads = []
27
- group.each do |entity|
56
+ group.each_with_index do |entity, idx2|
57
+ index = idx1 * cocurrence + idx2
28
58
  threads << Thread.new(entity) do |ent|
29
- results[ent] = block.call(ent)
59
+ results[index] = block.call(ent)
30
60
  end
31
61
  end
32
- threads.each{|thread| thread.join}
62
+ threads.each(&:join)
33
63
  end
34
- entities.map{|ent| results[ent]}
64
+ results
35
65
  else
36
- entities.map{|ent| block.call(ent) }
66
+ entities.map { |ent| block.call(ent) }
37
67
  end
38
68
  end
39
69
  end
40
- end
70
+ end
data/lib/digger/model.rb CHANGED
@@ -1,16 +1,19 @@
1
1
 
2
2
  module Digger
3
3
  class Model
4
- @@digger_config = {'pattern'=>{}, 'index'=>{}}
4
+ @@digger_config = {
5
+ 'pattern' => {},
6
+ 'index' => {}
7
+ }
5
8
 
6
9
  class << self
7
10
  # patterns
8
11
  def pattern_config
9
- @@digger_config['pattern'][self.name] ||= {}
12
+ @@digger_config['pattern'][name] ||= {}
10
13
  end
11
14
 
12
15
  Pattern::TYPES.each do |method|
13
- define_method method, ->(pairs, &block){
16
+ define_method method, -> (pairs, &block) {
14
17
  pairs.each_pair do |key, value|
15
18
  pattern_config[key] = Pattern.new(type: method, value: value, block: block)
16
19
  end
@@ -18,21 +21,22 @@ module Digger
18
21
  end
19
22
 
20
23
  def validate_presence(*keys)
21
- keys_all = pattern_config.keys
22
- raise "Pattern keys #{(keys - keys_all).join(', ')} should be present" unless keys.all?{|k| keys_all.include?(k) }
24
+ is_all = pattern_config.keys.all? { |k| keys.include?(k) }
25
+ raise "Pattern keys #{(keys - keys_all).join(', ')} should be present" unless is_all
23
26
  end
24
27
 
25
28
  def validate_includeness(*keys)
26
- raise "Pattern keys #{(pattern_config.keys - keys).join(', ')} should not be included" unless pattern_config.keys.all?{|k| keys.include?(k)}
29
+ is_all = pattern_config.keys.all? { |k| keys.include?(k) }
30
+ raise "Pattern keys #{(pattern_config.keys - keys).join(', ')} should not be included" if is_all
27
31
  end
28
32
 
29
33
  # index page
30
34
  def index_config
31
- @@digger_config['index'][self.name]
35
+ @@digger_config['index'][name]
32
36
  end
33
37
 
34
38
  def index_page(pattern, *args)
35
- @@digger_config['index'][self.name] = Index.new(pattern, args)
39
+ @@digger_config['index'][name] = Index.new(pattern, args)
36
40
  end
37
41
 
38
42
  def index_page?
@@ -55,13 +59,15 @@ module Digger
55
59
  end
56
60
 
57
61
  def dig_urls(urls, cocurrence = 1, opts = {})
58
- Index.batch(urls, cocurrence){|url| dig_url(url, opts) }
62
+ Index.batch(urls, cocurrence) { |url| dig_url(url, opts) }
59
63
  end
60
64
 
61
65
  def dig(cocurrence = 1)
62
66
  if self.class.index_page?
63
- self.class.index_config.process(cocurrence){|url| dig_url(url) }
67
+ self.class.index_config.process(cocurrence) do |url|
68
+ dig_url(url)
69
+ end
64
70
  end
65
71
  end
66
72
  end
67
- end
73
+ end
data/lib/digger/page.rb CHANGED
@@ -3,6 +3,8 @@ require 'json'
3
3
  require 'ostruct'
4
4
  require 'set'
5
5
  require 'kconv'
6
+ require 'uri'
7
+ require 'http/cookie'
6
8
 
7
9
  # https://github.com/taganaka/polipus/blob/master/lib/polipus/page.rb
8
10
  module Digger
@@ -27,16 +29,12 @@ module Digger
27
29
  # OpenStruct it holds users defined data
28
30
  attr_accessor :user_data
29
31
 
30
- attr_accessor :aliases
31
-
32
- attr_accessor :domain_aliases
32
+ attr_accessor :aliases, :domain_aliases, :fetched_at
33
33
 
34
34
  # Whether the current page should be stored
35
35
  # Default: true
36
36
  attr_accessor :storable
37
37
 
38
- attr_accessor :fetched_at
39
-
40
38
  #
41
39
  # Create a new page
42
40
  #
@@ -60,7 +58,7 @@ module Digger
60
58
  end
61
59
 
62
60
  def title
63
- doc.title if doc
61
+ doc&.title
64
62
  end
65
63
 
66
64
  #
@@ -74,6 +72,7 @@ module Digger
74
72
  doc.search('//a[@href]').each do |a|
75
73
  u = a['href']
76
74
  next if u.nil? || u.empty?
75
+
77
76
  abs = to_absolute(u) rescue next
78
77
  @links << abs if abs && in_domain?(abs)
79
78
  end
@@ -100,7 +99,11 @@ module Digger
100
99
  end
101
100
 
102
101
  def jsonp
103
- @jsonp ||= JSON.parse body.match(/^[^\(]+?\((.+)\)[^\)]*$/)[1]
102
+ @jsonp ||= JSON.parse body.match(/^[^(]+?\((.+)\)[^)]*$/)[1]
103
+ end
104
+
105
+ def cookies
106
+ @cookies ||= (headers['set-cookie'] || []).flat_map { |c| ::HTTP::Cookie.parse(c, url) }
104
107
  end
105
108
 
106
109
  #
@@ -162,7 +165,7 @@ module Digger
162
165
  # returns +false+ otherwise.
163
166
  #
164
167
  def not_found?
165
- 404 == @code
168
+ @code == 404
166
169
  end
167
170
 
168
171
  #
@@ -176,6 +179,7 @@ module Digger
176
179
  end unless @base
177
180
 
178
181
  return nil if @base && @base.to_s.empty?
182
+
179
183
  @base
180
184
  end
181
185
 
@@ -186,16 +190,7 @@ module Digger
186
190
  def to_absolute(link)
187
191
  return nil if link.nil?
188
192
 
189
- # link = link.to_s.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
190
-
191
- # remove anchor
192
- link =
193
- begin
194
- URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/, '')))
195
- rescue URI::Error
196
- return nil
197
- end
198
-
193
+ link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
199
194
  relative = begin
200
195
  URI(link)
201
196
  rescue URI::Error
@@ -253,6 +248,7 @@ module Digger
253
248
 
254
249
  def expired?(ttl)
255
250
  return false if fetched_at.nil?
251
+
256
252
  (Time.now.to_i - ttl) > fetched_at
257
253
  end
258
254
 
@@ -282,4 +278,4 @@ module Digger
282
278
  from_hash hash
283
279
  end
284
280
  end
285
- end
281
+ end
@@ -1,111 +1,120 @@
1
1
  require 'nokogiri'
2
2
 
3
3
  module Digger
4
+ # Extractor patterns definition
4
5
  class Pattern
5
6
  attr_accessor :type, :value, :block
6
7
 
7
8
  def initialize(hash = {})
8
- hash.each_pair{|key, value| send("#{key}=", value) if %w{type value block}.include?(key.to_s)}
9
+ hash.each_pair { |key, value| send("#{key}=", value) if %w[type value block].include?(key.to_s)}
9
10
  end
10
11
 
11
- def safe_block
12
- block && begin
13
- if block.respond_to?(:call)
14
- block
15
- elsif block.strip == '' #
16
- nil
17
- else
18
- proc{ $SAFE = 2; eval block }.call
19
- end
20
- rescue StandardError
21
- nil
12
+ def safe_block(&default_block)
13
+ if block.nil? || (block.is_a?(String) && block.strip.empty?)
14
+ default_block || ->(v) { v }
15
+ elsif block.respond_to?(:call)
16
+ block
17
+ else
18
+ proc {
19
+ $SAFE = 2
20
+ eval block
21
+ }.call
22
22
  end
23
23
  end
24
24
 
25
25
  def self.wrap(hash)
26
- Hash[hash.map{|key, value| [key, value.is_a?(Pattern) ? value : Pattern.new(value)]}]
26
+ hash.transform_values { |value| value.is_a?(Pattern) ? value : Pattern.new(value) }
27
27
  end
28
28
 
29
29
  MATCH_MAX = 3
30
30
 
31
- TYPES_REGEXP = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many}
32
- TYPES_CSS = %w{css_one css_many}
33
- TYPES_JSON = %w{json jsonp}
34
-
35
- TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON
31
+ TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
32
+ TYPES_CSS = %w[css_one css_many].freeze
33
+ TYPES_JSON = %w[json jsonp].freeze
34
+
35
+ TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + ['cookie']
36
+
37
+ def match_page(page)
38
+ return unless page.success?
36
39
 
37
- def match_page(page, &callback)
38
- blk = callback || safe_block
39
40
  if TYPES_REGEXP.include?(type) # regular expression
40
- blk ||= ->(text){ text.strip }
41
- # content is String
42
- if type == 'match_many'
43
- match = page.body.gsub(value).to_a
44
- else
45
- index = TYPES_REGEXP.index(type)
46
- matches = page.body.match(value)
47
- match = matches.nil? ? nil : matches[index]
48
- end
41
+ regexp_match(page.body)
49
42
  elsif TYPES_CSS.include?(type) # css expression
50
- blk ||= ->(node){ node.content.strip }
51
- # content is Nokogiri::HTML::Document
52
- if type == 'css_one'
53
- match = page.doc.css(value).first
54
- else
55
- match = page.doc.css(value)
56
- end
43
+ css_match(page.doc)
57
44
  elsif TYPES_JSON.include?(type)
58
- json = page.send(type)
59
- match = json_fetch(json, value)
45
+ json_match(page)
46
+ else
47
+ cookie_get(page.cookies)
48
+ end
49
+ end
50
+
51
+ def cookie_get(cookies)
52
+ cookie = cookies.find { |c| c.name == value }&.value
53
+ safe_block.call(cookie)
54
+ end
55
+
56
+ def json_match(page)
57
+ json = page.send(type)
58
+ keys = json_index_keys(value)
59
+ match = json_fetch(json, keys)
60
+ safe_block.call(match)
61
+ end
62
+
63
+ def css_match(doc)
64
+ block = safe_block { |node| node.content.strip }
65
+ # content is Nokogiri::HTML::Document
66
+ contents = doc.css(value)
67
+ if type == 'css_many'
68
+ contents.map { |node| block.call(node) }.uniq
69
+ else
70
+ block.call(contents.first)
60
71
  end
61
- if match.nil?
62
- nil
63
- elsif %w{css_many match_many}.include? type
64
- match.map{|node| blk.call(node) }.uniq
72
+ end
73
+
74
+ def regexp_match(body)
75
+ block = safe_block(&:strip)
76
+ # content is String
77
+ if type == 'match_many'
78
+ body.gsub(value).to_a.map { |node| block.call(node) }.uniq
65
79
  else
66
- blk.call(match)
80
+ index = TYPES_REGEXP.index(type)
81
+ matches = body.match(value)
82
+ block.call(matches[index]) unless matches.nil?
67
83
  end
68
- rescue
69
- nil
70
84
  end
71
85
 
72
86
  def json_fetch(json, keys)
73
- if keys.is_a? String
74
- # parse json keys like '$.k1.k2[0]'
75
- parts = keys.match(/^\$[\S]*$/)[0].scan(/(\.([\w]+)|\[([\d]+)\])/).map do |p|
76
- p[1].nil? ? { index: p[2].to_i } : { key: p[1] }
77
- end
78
- json_fetch(json, parts)
79
- elsif keys.is_a? Array
80
- if keys.length == 0
81
- json
82
- else
83
- pt = keys.shift
84
- json_fetch(json[pt[:index] || pt[:key]], keys)
85
- end
87
+ if keys.empty?
88
+ json
89
+ else
90
+ pt = keys.shift
91
+ json_fetch(json[pt[:index] || pt[:key]], keys)
86
92
  end
87
93
  end
88
94
 
95
+ def json_index_keys(keys)
96
+ keys.to_s.match(/^\$\S*$/)[0].scan(/(\.(\w+)|\[(\d+)\])/).map do |p|
97
+ p[1].nil? ? { index: p[2].to_i } : { key: p[1] }
98
+ end
99
+ end
100
+
101
+ private :json_index_keys, :json_fetch
102
+
103
+ # Nokogiri node methods
89
104
  class Nokogiri::XML::Node
90
- %w{one many}.each do |name|
91
- define_method "inner_#{name}" do |css, &block|
92
- callback = ->(node) do
93
- if node
94
- (block || ->(n){n.text.strip}).call(node)
95
- else
96
- nil
97
- end
98
- end
105
+ %w[one many].each do |name|
106
+ define_method "inner_#{name}" do |css, &block|
107
+ callback = ->(node) { (block || ->(n) { n.text.strip }).call(node) if node }
99
108
  if name == 'one' # inner_one
100
109
  callback.call(self.css(css).first)
101
110
  else # inner_many
102
- self.css(css).map{|node| callback.call(node)}
111
+ self.css(css).map { |node| callback.call(node) }
103
112
  end
104
113
  end
105
114
  end
106
115
  def source
107
116
  to_xml
108
117
  end
109
- end # nokogiri
118
+ end
110
119
  end
111
- end
120
+ end
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = "0.1.5"
2
+ VERSION = '0.1.9'.freeze
3
3
  end
data/spec/digger_spec.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  require 'digger'
2
2
 
3
3
  http = Digger::HTTP.new
4
- page = http.fetch_page('http://nan.so/')
4
+ page = http.fetch_page('http://www.baidu.com/')
5
5
 
6
- pattern = Digger::Pattern.new({type: 'css_many', value: '.sites>a>span' })
6
+ pattern = Digger::Pattern.new({ type: 'css_many', value: '#s-top-left>a' })
7
7
 
8
8
  class Item < Digger::Model
9
- css_many sites: '.sites>a>span'
10
- css_one logo: '.logo'
9
+ css_many sites: '#s-top-left>a'
11
10
  validate_presence :sites
12
- validate_includeness :sites, :logo
11
+ validate_includeness :sites
13
12
  end
14
13
 
15
14
  describe Digger do
@@ -19,12 +18,12 @@ describe Digger do
19
18
 
20
19
  it "pattern should match content" do
21
20
  sites = pattern.match_page(page)
22
- expect(sites.include?('百度网盘')).to eq(true)
21
+ expect(sites.include?('新闻')).to eq(true)
23
22
  end
24
23
 
25
24
  it "model should dig content" do
26
25
  item = Item.new.match_page(page)
27
- expect(item[:sites].include?('读远')).to be(true)
26
+ expect(item[:sites].include?('新闻')).to be(true)
28
27
  end
29
28
 
30
29
  it "validation support" do
@@ -0,0 +1,28 @@
1
+ require 'digger'
2
+
3
+ describe Digger::Index do
4
+ it 'batch digger' do
5
+ list = [1, 2, 3, 4, 5, 6, 7, 8]
6
+ pt = Digger::Index.batch(list, 3) do |num|
7
+ sleep(rand(1..3))
8
+ "##{num}"
9
+ end
10
+ expect(pt.join).to eq(list.map { |num| "##{num}" }.join)
11
+ end
12
+
13
+ it 'slow down' do
14
+ list = [1, 2, 3, 4]
15
+ conf = {
16
+ sleep_range_seconds: 1...2,
17
+ fail_unit_seconds: 1,
18
+ fail_max_cnt: 2,
19
+ when_fail: ->(_, e, nth) { puts "#{nth}: #{e.message}" }
20
+ }
21
+ pt = Digger::Index.slow_down(list, conf) do |num|
22
+ raise 'error' if num == 3
23
+ num
24
+ end
25
+ p pt
26
+ expect(pt.size).to eq(2)
27
+ end
28
+ end
data/spec/page_spec.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'digger'
2
2
  require 'json'
3
+ require 'uri'
4
+ require 'cgi'
3
5
 
4
6
  describe Digger::Page do
5
7
  it 'page json' do
@@ -11,4 +13,15 @@ describe Digger::Page do
11
13
  expect(j1.json['b'][0]).to eq(1)
12
14
  expect(j2.jsonp['b'][1]).to eq(2)
13
15
  end
14
- end
16
+
17
+ it 'fetch baidu' do
18
+ http = Digger::HTTP.new
19
+ page = http.fetch_page('http://baidu.com/')
20
+ expect(page.code).to eq(200)
21
+ end
22
+
23
+ # it 'page uri' do
24
+ # link = 'https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
25
+ # link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#\w*$/, '')
26
+ # end
27
+ end
data/spec/pattern_spec.rb CHANGED
@@ -2,14 +2,19 @@ require 'digger'
2
2
  require 'json'
3
3
 
4
4
  describe Digger::Pattern do
5
- it 'json fetch' do
6
- json = JSON.parse('{"a":1,"b":[1,2,3]}')
7
- pt = Digger::Pattern.new
8
- expect(pt.json_fetch(json, '$')['a']).to eq(1)
9
- expect(pt.json_fetch(json, '$.a')).to eq(1)
10
- expect(pt.json_fetch(json, '$.b').length).to eq(3)
11
- expect(pt.json_fetch(json, '$.b[2]')).to eq(3)
12
- end
13
-
5
+ # it 'json fetch' do
6
+ # json = JSON.parse('[{"a":1,"b":[1,2,3]}]')
7
+ # pt = Digger::Pattern.new
8
+ # expect(pt.json_fetch(json, '$[0]')['a']).to eq(1)
9
+ # expect(pt.json_fetch(json, '$[0].a')).to eq(1)
10
+ # expect(pt.json_fetch(json, '$[0].b').length).to eq(3)
11
+ # expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
12
+ # end
14
13
 
15
- end
14
+ it 'parse cookoe' do
15
+ page = Digger::HTTP.new.fetch_page('https://xueqiu.com/')
16
+ pt = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
17
+ result = pt.match_page(page)
18
+ expect(result.length).to eq(42)
19
+ end
20
+ end
metadata CHANGED
@@ -1,71 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-25 00:00:00.000000000 Z
11
+ date: 2021-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
14
+ name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '2.0'
19
+ version: 12.3.3
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '2.0'
26
+ version: 12.3.3
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '2.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '2.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: nokogiri
42
+ name: http-cookie
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.6'
47
+ version: '1.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.6'
54
+ version: '1.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: http-cookie
56
+ name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.0'
61
+ version: '1.6'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.0'
68
+ version: '1.6'
69
69
  description: ''
70
70
  email:
71
71
  - xinkiang@gmail.com
@@ -87,6 +87,7 @@ files:
87
87
  - lib/digger/pattern.rb
88
88
  - lib/digger/version.rb
89
89
  - spec/digger_spec.rb
90
+ - spec/index_spec.rb
90
91
  - spec/page_spec.rb
91
92
  - spec/pattern_spec.rb
92
93
  - spec/validate_spec.rb
@@ -115,6 +116,7 @@ specification_version: 4
115
116
  summary: Dig need stractual infomation from web page.
116
117
  test_files:
117
118
  - spec/digger_spec.rb
119
+ - spec/index_spec.rb
118
120
  - spec/page_spec.rb
119
121
  - spec/pattern_spec.rb
120
122
  - spec/validate_spec.rb