digger 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 586e17ec8338bf102a91c8ee241632619a55b6b7
4
- data.tar.gz: d93040c059eaf4c4e34cc329c4938a324feea142
2
+ SHA256:
3
+ metadata.gz: 5c4a94163a25d4b53ad5b477040f69b8fccca026adc313f8f61759317c1bf198
4
+ data.tar.gz: 307b443277c16708103c172e5fb4ef4d833f7d2631a7f85779570a3cbeac8925
5
5
  SHA512:
6
- metadata.gz: 089a7c6b6b1f8904202cfa8f362cc30d39b1dbfaaff5be721ded38f9448fe03aed23c4986ff587ab60499bc661a7abe104d3096f9eb9bbd0fa91dae18abcd933
7
- data.tar.gz: 7ed6a2c324d55e634d86d1a450c12af045cba6049905e3a97d4eca3fe025fb11f92812c0e88d9c41cf5fa00eb44f454efb7efcfc598ad0b05257a6b0c6328a19
6
+ metadata.gz: 9da40123fd09615d0c69ca5104d1141b82981813ef8d175bc567a0f35e8f7dd868ce235bc0f265308e2133710f63c3ce8325d6b528ae060eccb77904c12e3139
7
+ data.tar.gz: c05be67df6db25345acfdc3615690c0467029dd40ffde262863923b6f0786696a16fd547277ec4d3764ed39b399174dd28e10ca7eb31ffb55f773e08b04f2986
data/digger.gemspec CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "bundler", "~> 2.0"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
 
24
24
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
data/lib/digger/page.rb CHANGED
@@ -95,7 +95,13 @@ module Digger
95
95
  end
96
96
  end
97
97
 
98
+ def json
99
+ @json ||= JSON.parse body
100
+ end
98
101
 
102
+ def jsonp
103
+ @jsonp ||= JSON.parse body.match(/^[^\(]+?\((.+)\)[^\)]*$/)[1]
104
+ end
99
105
 
100
106
  #
101
107
  # Discard links, a next call of page.links will return an empty array
@@ -27,33 +27,36 @@ module Digger
27
27
  end
28
28
 
29
29
  MATCH_MAX = 3
30
-
31
- TYPES = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many css_one css_many}
32
30
 
33
- def regexp?
34
- TYPES.index(type) <= MATCH_MAX + 1 # match_many in addition
35
- end
31
+ TYPES_REGEXP = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many}
32
+ TYPES_CSS = %w{css_one css_many}
33
+ TYPES_JSON = %w{json jsonp}
34
+
35
+ TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON
36
36
 
37
37
  def match_page(page, &callback)
38
38
  blk = callback || safe_block
39
- if regexp? # regular expression
40
- index = TYPES.index(type)
41
- blk ||= ->(text){text.strip}
39
+ if TYPES_REGEXP.include?(type) # regular expression
40
+ blk ||= ->(text){ text.strip }
42
41
  # content is String
43
42
  if type == 'match_many'
44
43
  match = page.body.gsub(value).to_a
45
44
  else
45
+ index = TYPES_REGEXP.index(type)
46
46
  matches = page.body.match(value)
47
47
  match = matches.nil? ? nil : matches[index]
48
48
  end
49
- else # css expression
50
- blk ||= ->(node){node.content.strip}
49
+ elsif TYPES_CSS.include?(type) # css expression
50
+ blk ||= ->(node){ node.content.strip }
51
51
  # content is Nokogiri::HTML::Document
52
52
  if type == 'css_one'
53
53
  match = page.doc.css(value).first
54
- elsif type == 'css_many' # css_many
54
+ else
55
55
  match = page.doc.css(value)
56
56
  end
57
+ elsif TYPES_JSON.include?(type)
58
+ json = page.send(type)
59
+ match = json_fetch(json, value)
57
60
  end
58
61
  if match.nil?
59
62
  nil
@@ -66,6 +69,23 @@ module Digger
66
69
  nil
67
70
  end
68
71
 
72
+ def json_fetch(json, keys)
73
+ if keys.is_a? String
74
+ # parse json keys like '$.k1.k2[0]'
75
+ parts = keys.match(/^\$[\S]*$/)[0].scan(/(\.([\w]+)|\[([\d]+)\])/).map do |p|
76
+ p[1].nil? ? { index: p[2].to_i } : { key: p[1] }
77
+ end
78
+ json_fetch(json, parts)
79
+ elsif keys.is_a? Array
80
+ if keys.length == 0
81
+ json
82
+ else
83
+ pt = keys.shift
84
+ json_fetch(json[pt[:index] || pt[:key]], keys)
85
+ end
86
+ end
87
+ end
88
+
69
89
  class Nokogiri::XML::Node
70
90
  %w{one many}.each do |name|
71
91
  define_method "inner_#{name}" do |css, &block|
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
data/spec/page_spec.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'digger'
2
+ require 'json'
3
+
4
+ describe Digger::Page do
5
+ it 'page json' do
6
+ json_str = '{"a":1,"b":[1,2,3]}'
7
+ j1 = Digger::Page.new('', body: json_str)
8
+ j2 = Digger::Page.new('', body: "hello(#{json_str});")
9
+ expect(j1.json['a']).to eq(1)
10
+ expect(j2.jsonp['a']).to eq(1)
11
+ expect(j1.json['b'][0]).to eq(1)
12
+ expect(j2.jsonp['b'][1]).to eq(2)
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ require 'digger'
2
+ require 'json'
3
+
4
+ describe Digger::Pattern do
5
+ it 'json fetch' do
6
+ json = JSON.parse('{"a":1,"b":[1,2,3]}')
7
+ pt = Digger::Pattern.new
8
+ expect(pt.json_fetch(json, '$')['a']).to eq(1)
9
+ expect(pt.json_fetch(json, '$.a')).to eq(1)
10
+ expect(pt.json_fetch(json, '$.b').length).to eq(3)
11
+ expect(pt.json_fetch(json, '$.b[2]')).to eq(3)
12
+ end
13
+
14
+
15
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-01 00:00:00.000000000 Z
11
+ date: 2021-12-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.7'
19
+ version: '2.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.7'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -87,12 +87,14 @@ files:
87
87
  - lib/digger/pattern.rb
88
88
  - lib/digger/version.rb
89
89
  - spec/digger_spec.rb
90
+ - spec/page_spec.rb
91
+ - spec/pattern_spec.rb
90
92
  - spec/validate_spec.rb
91
93
  homepage: ''
92
94
  licenses:
93
95
  - MIT
94
96
  metadata: {}
95
- post_install_message:
97
+ post_install_message:
96
98
  rdoc_options: []
97
99
  require_paths:
98
100
  - lib
@@ -107,11 +109,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
109
  - !ruby/object:Gem::Version
108
110
  version: '0'
109
111
  requirements: []
110
- rubyforge_project:
111
- rubygems_version: 2.2.2
112
- signing_key:
112
+ rubygems_version: 3.2.32
113
+ signing_key:
113
114
  specification_version: 4
114
115
  summary: Dig need stractual infomation from web page.
115
116
  test_files:
116
117
  - spec/digger_spec.rb
118
+ - spec/page_spec.rb
119
+ - spec/pattern_spec.rb
117
120
  - spec/validate_spec.rb