digger 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/digger.gemspec +1 -1
- data/lib/digger/page.rb +6 -0
- data/lib/digger/pattern.rb +31 -11
- data/lib/digger/version.rb +1 -1
- data/spec/page_spec.rb +14 -0
- data/spec/pattern_spec.rb +15 -0
- metadata +12 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5c4a94163a25d4b53ad5b477040f69b8fccca026adc313f8f61759317c1bf198
|
4
|
+
data.tar.gz: 307b443277c16708103c172e5fb4ef4d833f7d2631a7f85779570a3cbeac8925
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9da40123fd09615d0c69ca5104d1141b82981813ef8d175bc567a0f35e8f7dd868ce235bc0f265308e2133710f63c3ce8325d6b528ae060eccb77904c12e3139
|
7
|
+
data.tar.gz: c05be67df6db25345acfdc3615690c0467029dd40ffde262863923b6f0786696a16fd547277ec4d3764ed39b399174dd28e10ca7eb31ffb55f773e08b04f2986
|
data/digger.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_development_dependency "bundler", "~>
|
21
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
22
22
|
spec.add_development_dependency "rake", "~> 10.0"
|
23
23
|
|
24
24
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
data/lib/digger/page.rb
CHANGED
@@ -95,7 +95,13 @@ module Digger
|
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
98
|
+
def json
|
99
|
+
@json ||= JSON.parse body
|
100
|
+
end
|
98
101
|
|
102
|
+
def jsonp
|
103
|
+
@jsonp ||= JSON.parse body.match(/^[^\(]+?\((.+)\)[^\)]*$/)[1]
|
104
|
+
end
|
99
105
|
|
100
106
|
#
|
101
107
|
# Discard links, a next call of page.links will return an empty array
|
data/lib/digger/pattern.rb
CHANGED
@@ -27,33 +27,36 @@ module Digger
|
|
27
27
|
end
|
28
28
|
|
29
29
|
MATCH_MAX = 3
|
30
|
-
|
31
|
-
TYPES = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many css_one css_many}
|
32
30
|
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
TYPES_REGEXP = 0.upto(MATCH_MAX).map{|i| "match_#{i}"} + %w{match_many}
|
32
|
+
TYPES_CSS = %w{css_one css_many}
|
33
|
+
TYPES_JSON = %w{json jsonp}
|
34
|
+
|
35
|
+
TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON
|
36
36
|
|
37
37
|
def match_page(page, &callback)
|
38
38
|
blk = callback || safe_block
|
39
|
-
if
|
40
|
-
|
41
|
-
blk ||= ->(text){text.strip}
|
39
|
+
if TYPES_REGEXP.include?(type) # regular expression
|
40
|
+
blk ||= ->(text){ text.strip }
|
42
41
|
# content is String
|
43
42
|
if type == 'match_many'
|
44
43
|
match = page.body.gsub(value).to_a
|
45
44
|
else
|
45
|
+
index = TYPES_REGEXP.index(type)
|
46
46
|
matches = page.body.match(value)
|
47
47
|
match = matches.nil? ? nil : matches[index]
|
48
48
|
end
|
49
|
-
|
50
|
-
blk ||= ->(node){node.content.strip}
|
49
|
+
elsif TYPES_CSS.include?(type) # css expression
|
50
|
+
blk ||= ->(node){ node.content.strip }
|
51
51
|
# content is Nokogiri::HTML::Document
|
52
52
|
if type == 'css_one'
|
53
53
|
match = page.doc.css(value).first
|
54
|
-
|
54
|
+
else
|
55
55
|
match = page.doc.css(value)
|
56
56
|
end
|
57
|
+
elsif TYPES_JSON.include?(type)
|
58
|
+
json = page.send(type)
|
59
|
+
match = json_fetch(json, value)
|
57
60
|
end
|
58
61
|
if match.nil?
|
59
62
|
nil
|
@@ -66,6 +69,23 @@ module Digger
|
|
66
69
|
nil
|
67
70
|
end
|
68
71
|
|
72
|
+
def json_fetch(json, keys)
|
73
|
+
if keys.is_a? String
|
74
|
+
# parse json keys like '$.k1.k2[0]'
|
75
|
+
parts = keys.match(/^\$[\S]*$/)[0].scan(/(\.([\w]+)|\[([\d]+)\])/).map do |p|
|
76
|
+
p[1].nil? ? { index: p[2].to_i } : { key: p[1] }
|
77
|
+
end
|
78
|
+
json_fetch(json, parts)
|
79
|
+
elsif keys.is_a? Array
|
80
|
+
if keys.length == 0
|
81
|
+
json
|
82
|
+
else
|
83
|
+
pt = keys.shift
|
84
|
+
json_fetch(json[pt[:index] || pt[:key]], keys)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
69
89
|
class Nokogiri::XML::Node
|
70
90
|
%w{one many}.each do |name|
|
71
91
|
define_method "inner_#{name}" do |css, &block|
|
data/lib/digger/version.rb
CHANGED
data/spec/page_spec.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'digger'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
describe Digger::Page do
|
5
|
+
it 'page json' do
|
6
|
+
json_str = '{"a":1,"b":[1,2,3]}'
|
7
|
+
j1 = Digger::Page.new('', body: json_str)
|
8
|
+
j2 = Digger::Page.new('', body: "hello(#{json_str});")
|
9
|
+
expect(j1.json['a']).to eq(1)
|
10
|
+
expect(j2.jsonp['a']).to eq(1)
|
11
|
+
expect(j1.json['b'][0]).to eq(1)
|
12
|
+
expect(j2.jsonp['b'][1]).to eq(2)
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'digger'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
describe Digger::Pattern do
|
5
|
+
it 'json fetch' do
|
6
|
+
json = JSON.parse('{"a":1,"b":[1,2,3]}')
|
7
|
+
pt = Digger::Pattern.new
|
8
|
+
expect(pt.json_fetch(json, '$')['a']).to eq(1)
|
9
|
+
expect(pt.json_fetch(json, '$.a')).to eq(1)
|
10
|
+
expect(pt.json_fetch(json, '$.b').length).to eq(3)
|
11
|
+
expect(pt.json_fetch(json, '$.b[2]')).to eq(3)
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '2.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,12 +87,14 @@ files:
|
|
87
87
|
- lib/digger/pattern.rb
|
88
88
|
- lib/digger/version.rb
|
89
89
|
- spec/digger_spec.rb
|
90
|
+
- spec/page_spec.rb
|
91
|
+
- spec/pattern_spec.rb
|
90
92
|
- spec/validate_spec.rb
|
91
93
|
homepage: ''
|
92
94
|
licenses:
|
93
95
|
- MIT
|
94
96
|
metadata: {}
|
95
|
-
post_install_message:
|
97
|
+
post_install_message:
|
96
98
|
rdoc_options: []
|
97
99
|
require_paths:
|
98
100
|
- lib
|
@@ -107,11 +109,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
109
|
- !ruby/object:Gem::Version
|
108
110
|
version: '0'
|
109
111
|
requirements: []
|
110
|
-
|
111
|
-
|
112
|
-
signing_key:
|
112
|
+
rubygems_version: 3.2.32
|
113
|
+
signing_key:
|
113
114
|
specification_version: 4
|
114
115
|
summary: Dig need stractual infomation from web page.
|
115
116
|
test_files:
|
116
117
|
- spec/digger_spec.rb
|
118
|
+
- spec/page_spec.rb
|
119
|
+
- spec/pattern_spec.rb
|
117
120
|
- spec/validate_spec.rb
|