digger 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 586e17ec8338bf102a91c8ee241632619a55b6b7
4
- data.tar.gz: d93040c059eaf4c4e34cc329c4938a324feea142
2
+ SHA256:
3
+ metadata.gz: 5c4a94163a25d4b53ad5b477040f69b8fccca026adc313f8f61759317c1bf198
4
+ data.tar.gz: 307b443277c16708103c172e5fb4ef4d833f7d2631a7f85779570a3cbeac8925
5
5
  SHA512:
6
- metadata.gz: 089a7c6b6b1f8904202cfa8f362cc30d39b1dbfaaff5be721ded38f9448fe03aed23c4986ff587ab60499bc661a7abe104d3096f9eb9bbd0fa91dae18abcd933
7
- data.tar.gz: 7ed6a2c324d55e634d86d1a450c12af045cba6049905e3a97d4eca3fe025fb11f92812c0e88d9c41cf5fa00eb44f454efb7efcfc598ad0b05257a6b0c6328a19
6
+ metadata.gz: 9da40123fd09615d0c69ca5104d1141b82981813ef8d175bc567a0f35e8f7dd868ce235bc0f265308e2133710f63c3ce8325d6b528ae060eccb77904c12e3139
7
+ data.tar.gz: c05be67df6db25345acfdc3615690c0467029dd40ffde262863923b6f0786696a16fd547277ec4d3764ed39b399174dd28e10ca7eb31ffb55f773e08b04f2986
data/digger.gemspec CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "bundler", "~> 2.0"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
 
24
24
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
data/lib/digger/page.rb CHANGED
@@ -95,7 +95,13 @@ module Digger
95
95
  end
96
96
  end
97
97
 
98
+ def json
99
+ @json ||= JSON.parse body
100
+ end
98
101
 
102
+ def jsonp
103
+ @jsonp ||= JSON.parse body.match(/^[^\(]+?\((.+)\)[^\)]*$/)[1]
104
+ end
99
105
 
100
106
  #
101
107
  # Discard links, a next call of page.links will return an empty array
@@ -27,33 +27,36 @@ module Digger
27
27
  end
28
28
 
29
29
  MATCH_MAX = 3
30
-
31
- TYPES = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many css_one css_many}
32
30
 
33
- def regexp?
34
- TYPES.index(type) <= MATCH_MAX + 1 # match_many in addition
35
- end
31
+ TYPES_REGEXP = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many}
32
+ TYPES_CSS = %w{css_one css_many}
33
+ TYPES_JSON = %w{json jsonp}
34
+
35
+ TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON
36
36
 
37
37
  def match_page(page, &callback)
38
38
  blk = callback || safe_block
39
- if regexp? # regular expression
40
- index = TYPES.index(type)
41
- blk ||= ->(text){text.strip}
39
+ if TYPES_REGEXP.include?(type) # regular expression
40
+ blk ||= ->(text){ text.strip }
42
41
  # content is String
43
42
  if type == 'match_many'
44
43
  match = page.body.gsub(value).to_a
45
44
  else
45
+ index = TYPES_REGEXP.index(type)
46
46
  matches = page.body.match(value)
47
47
  match = matches.nil? ? nil : matches[index]
48
48
  end
49
- else # css expression
50
- blk ||= ->(node){node.content.strip}
49
+ elsif TYPES_CSS.include?(type) # css expression
50
+ blk ||= ->(node){ node.content.strip }
51
51
  # content is Nokogiri::HTML::Document
52
52
  if type == 'css_one'
53
53
  match = page.doc.css(value).first
54
- elsif type == 'css_many' # css_many
54
+ else
55
55
  match = page.doc.css(value)
56
56
  end
57
+ elsif TYPES_JSON.include?(type)
58
+ json = page.send(type)
59
+ match = json_fetch(json, value)
57
60
  end
58
61
  if match.nil?
59
62
  nil
@@ -66,6 +69,23 @@ module Digger
66
69
  nil
67
70
  end
68
71
 
72
+ def json_fetch(json, keys)
73
+ if keys.is_a? String
74
+ # parse json keys like '$.k1.k2[0]'
75
+ parts = keys.match(/^\$[\S]*$/)[0].scan(/(\.([\w]+)|\[([\d]+)\])/).map do |p|
76
+ p[1].nil? ? { index: p[2].to_i } : { key: p[1] }
77
+ end
78
+ json_fetch(json, parts)
79
+ elsif keys.is_a? Array
80
+ if keys.length == 0
81
+ json
82
+ else
83
+ pt = keys.shift
84
+ json_fetch(json[pt[:index] || pt[:key]], keys)
85
+ end
86
+ end
87
+ end
88
+
69
89
  class Nokogiri::XML::Node
70
90
  %w{one many}.each do |name|
71
91
  define_method "inner_#{name}" do |css, &block|
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
data/spec/page_spec.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'digger'
2
+ require 'json'
3
+
4
+ describe Digger::Page do
5
+ it 'page json' do
6
+ json_str = '{"a":1,"b":[1,2,3]}'
7
+ j1 = Digger::Page.new('', body: json_str)
8
+ j2 = Digger::Page.new('', body: "hello(#{json_str});")
9
+ expect(j1.json['a']).to eq(1)
10
+ expect(j2.jsonp['a']).to eq(1)
11
+ expect(j1.json['b'][0]).to eq(1)
12
+ expect(j2.jsonp['b'][1]).to eq(2)
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ require 'digger'
2
+ require 'json'
3
+
4
+ describe Digger::Pattern do
5
+ it 'json fetch' do
6
+ json = JSON.parse('{"a":1,"b":[1,2,3]}')
7
+ pt = Digger::Pattern.new
8
+ expect(pt.json_fetch(json, '$')['a']).to eq(1)
9
+ expect(pt.json_fetch(json, '$.a')).to eq(1)
10
+ expect(pt.json_fetch(json, '$.b').length).to eq(3)
11
+ expect(pt.json_fetch(json, '$.b[2]')).to eq(3)
12
+ end
13
+
14
+
15
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-01 00:00:00.000000000 Z
11
+ date: 2021-12-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.7'
19
+ version: '2.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.7'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -87,12 +87,14 @@ files:
87
87
  - lib/digger/pattern.rb
88
88
  - lib/digger/version.rb
89
89
  - spec/digger_spec.rb
90
+ - spec/page_spec.rb
91
+ - spec/pattern_spec.rb
90
92
  - spec/validate_spec.rb
91
93
  homepage: ''
92
94
  licenses:
93
95
  - MIT
94
96
  metadata: {}
95
- post_install_message:
97
+ post_install_message:
96
98
  rdoc_options: []
97
99
  require_paths:
98
100
  - lib
@@ -107,11 +109,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
109
  - !ruby/object:Gem::Version
108
110
  version: '0'
109
111
  requirements: []
110
- rubyforge_project:
111
- rubygems_version: 2.2.2
112
- signing_key:
112
+ rubygems_version: 3.2.32
113
+ signing_key:
113
114
  specification_version: 4
114
115
  summary: Dig need stractual infomation from web page.
115
116
  test_files:
116
117
  - spec/digger_spec.rb
118
+ - spec/page_spec.rb
119
+ - spec/pattern_spec.rb
117
120
  - spec/validate_spec.rb