digger 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f506fd615df8b9d732d6b67bedc72644df26dc3f5725cfe5dba10c1098bae0b
4
- data.tar.gz: 0e9afcb19ba0be5ce4a90787d54c3b43d58100bac3ec45bf5ef93781a60b6eb1
3
+ metadata.gz: 19e59bc2161a078d80d00adf538a7c33891a53f9beeb453748eec7e0810c5b65
4
+ data.tar.gz: da1d93c663b42a6e0b7be2f136bea3b0f3a86c3c36c57a72347c70e7e0538508
5
5
  SHA512:
6
- metadata.gz: 8608a2ee8e06ddd846772d40dc3e417560229729b7b736eda8a7f50977a7d2c6fc523f86fe64480b5172c5295137eae7c85f945f7ca310502c54c8b90dd75e8d
7
- data.tar.gz: 59fef1a13adc8f983c16428ee4d08d6ccdecea09b7583452b6ca07689727c3d6a386f5a5b767a20d117c8c4bc9775a2a6b32fd4caff1c1a5e85ee2af82e39d1b
6
+ metadata.gz: d13b06c8491f9cda42f8a8fd4fa9547aa6a95d075efbf52c789760adaa66d0c2bde24f810e23e4713df4362ae82cb82b0d61996386caaad2c685eb22f1a375db
7
+ data.tar.gz: a63a5cfe70b154b446dcbd0d2937d94a33fb033277c0d17aa1500a9ccd86dc8791e3cc40e8487ecc1c609eec304b91fb4a29b75b89a74b8efe24e29c49957d4e
data/digger.gemspec CHANGED
@@ -1,26 +1,27 @@
1
1
  # coding: utf-8
2
+
2
3
  lib = File.expand_path('../lib', __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
  require 'digger/version'
5
6
 
6
7
  Gem::Specification.new do |spec|
7
- spec.name = "digger"
8
+ spec.name = 'digger'
8
9
  spec.version = Digger::VERSION
9
- spec.authors = ["binz"]
10
- spec.email = ["xinkiang@gmail.com"]
10
+ spec.authors = ['binz']
11
+ spec.email = ['xinkiang@gmail.com']
11
12
  spec.summary = %q{Dig need stractual infomation from web page.}
12
13
  spec.description = %q{}
13
- spec.homepage = ""
14
- spec.license = "MIT"
14
+ spec.homepage = ''
15
+ spec.license = 'MIT'
15
16
 
16
17
  spec.files = `git ls-files -z`.split("\x0")
17
18
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
20
21
 
21
- spec.add_development_dependency "bundler", "~> 2.0"
22
- spec.add_development_dependency "rake", ">= 12.3.3"
22
+ spec.add_development_dependency 'rake', '>= 12.3.3'
23
+ spec.add_development_dependency 'bundler', '~> 2.0'
23
24
 
24
- spec.add_runtime_dependency 'nokogiri', '~> 1.6'
25
25
  spec.add_runtime_dependency 'http-cookie', '~> 1.0'
26
+ spec.add_runtime_dependency 'nokogiri', '~> 1.6'
26
27
  end
data/lib/digger/page.rb CHANGED
@@ -4,6 +4,7 @@ require 'ostruct'
4
4
  require 'set'
5
5
  require 'kconv'
6
6
  require 'uri'
7
+ require 'http/cookie'
7
8
 
8
9
  # https://github.com/taganaka/polipus/blob/master/lib/polipus/page.rb
9
10
  module Digger
@@ -101,6 +102,10 @@ module Digger
101
102
  @jsonp ||= JSON.parse body.match(/^[^(]+?\((.+)\)[^)]*$/)[1]
102
103
  end
103
104
 
105
+ def cookies
106
+ @cookies ||= (headers['set-cookie'] || []).flat_map { |c| ::HTTP::Cookie.parse(c, url) }
107
+ end
108
+
104
109
  #
105
110
  # Discard links, a next call of page.links will return an empty array
106
111
  #
@@ -273,4 +278,4 @@ module Digger
273
278
  from_hash hash
274
279
  end
275
280
  end
276
- end
281
+ end
@@ -11,7 +11,7 @@ module Digger
11
11
 
12
12
  def safe_block(&default_block)
13
13
  if block.nil? || (block.is_a?(String) && block.strip.empty?)
14
- default_block
14
+ default_block || ->(v) { v }
15
15
  elsif block.respond_to?(:call)
16
16
  block
17
17
  else
@@ -32,25 +32,32 @@ module Digger
32
32
  TYPES_CSS = %w[css_one css_many].freeze
33
33
  TYPES_JSON = %w[json jsonp].freeze
34
34
 
35
- TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON
35
+ TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + ['cookie']
36
36
 
37
37
  def match_page(page)
38
38
  return unless page.success?
39
+
39
40
  if TYPES_REGEXP.include?(type) # regular expression
40
41
  regexp_match(page.body)
41
42
  elsif TYPES_CSS.include?(type) # css expression
42
43
  css_match(page.doc)
43
44
  elsif TYPES_JSON.include?(type)
44
45
  json_match(page)
46
+ else
47
+ cookie_get(page.cookies)
45
48
  end
46
49
  end
47
50
 
51
+ def cookie_get(cookies)
52
+ cookie = cookies.find { |c| c.name == value }&.value
53
+ safe_block.call(cookie)
54
+ end
55
+
48
56
  def json_match(page)
49
- block = safe_block { |j| j }
50
57
  json = page.send(type)
51
58
  keys = json_index_keys(value)
52
59
  match = json_fetch(json, keys)
53
- block.call(match)
60
+ safe_block.call(match)
54
61
  end
55
62
 
56
63
  def css_match(doc)
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = '0.1.7'.freeze
2
+ VERSION = '0.1.8'.freeze
3
3
  end
data/spec/page_spec.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'digger'
2
2
  require 'json'
3
3
  require 'uri'
4
+ require 'cgi'
4
5
 
5
6
  describe Digger::Page do
6
7
  it 'page json' do
@@ -15,13 +16,12 @@ describe Digger::Page do
15
16
 
16
17
  it 'fetch baidu' do
17
18
  http = Digger::HTTP.new
18
- page = http.fetch_page('http://www.baidu.com/')
19
+ page = http.fetch_page('http://baidu.com/')
19
20
  expect(page.code).to eq(200)
20
21
  end
21
22
 
22
- it 'page uri' do
23
- link ='https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
24
- link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
25
- p link
26
- end
27
- end
23
+ # it 'page uri' do
24
+ # link = 'https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
25
+ # link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#\w*$/, '')
26
+ # end
27
+ end
data/spec/pattern_spec.rb CHANGED
@@ -2,14 +2,19 @@ require 'digger'
2
2
  require 'json'
3
3
 
4
4
  describe Digger::Pattern do
5
- it 'json fetch' do
6
- json = JSON.parse('[{"a":1,"b":[1,2,3]}]')
7
- pt = Digger::Pattern.new
8
- expect(pt.json_fetch(json, '$[0]')['a']).to eq(1)
9
- expect(pt.json_fetch(json, '$[0].a')).to eq(1)
10
- expect(pt.json_fetch(json, '$[0].b').length).to eq(3)
11
- expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
12
- end
13
-
5
+ # it 'json fetch' do
6
+ # json = JSON.parse('[{"a":1,"b":[1,2,3]}]')
7
+ # pt = Digger::Pattern.new
8
+ # expect(pt.json_fetch(json, '$[0]')['a']).to eq(1)
9
+ # expect(pt.json_fetch(json, '$[0].a')).to eq(1)
10
+ # expect(pt.json_fetch(json, '$[0].b').length).to eq(3)
11
+ # expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
12
+ # end
14
13
 
15
- end
14
+ it 'parse cookoe' do
15
+ page = Digger::HTTP.new.fetch_page('https://xueqiu.com/')
16
+ pt = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
17
+ result = pt.match_page(page)
18
+ expect(result.length).to eq(42)
19
+ end
20
+ end
metadata CHANGED
@@ -1,71 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-26 00:00:00.000000000 Z
11
+ date: 2021-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
14
+ name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '2.0'
19
+ version: 12.3.3
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '2.0'
26
+ version: 12.3.3
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 12.3.3
33
+ version: '2.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 12.3.3
40
+ version: '2.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: nokogiri
42
+ name: http-cookie
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.6'
47
+ version: '1.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.6'
54
+ version: '1.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: http-cookie
56
+ name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.0'
61
+ version: '1.6'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.0'
68
+ version: '1.6'
69
69
  description: ''
70
70
  email:
71
71
  - xinkiang@gmail.com