digger 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/digger.gemspec +1 -1
- data/lib/digger/page.rb +2 -10
- data/lib/digger/version.rb +1 -1
- data/spec/digger_spec.rb +6 -7
- data/spec/page_spec.rb +13 -0
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f3e89f179fa868ecd2879180d1fbfbf03ba0ebee3731b9c8b4741d22663ff4aa
|
4
|
+
data.tar.gz: 1b27e4a1446e9835203bf5497aeebc3bc4ab58998a0fc443eeaaf7e7ec86c2c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5671e5d2484ca744e5c75f97beeb473e1970291fc094c0274714af27cc847ef47bcf3b1b81534e6f299f35648fdf3aabec9d90777e4b2fbe49dc2629c048f610
|
7
|
+
data.tar.gz: 496534bb394d17792dc7173759b83c0dd62b8569a241b2a6a57e60a14ea99ddafd7e030fe16480c5782ec319f6dc1d3563919f7202026fb008522dd26cae6c01
|
data/digger.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 2.0"
|
22
|
-
spec.add_development_dependency "rake", "
|
22
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
23
23
|
|
24
24
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
25
25
|
spec.add_runtime_dependency 'http-cookie', '~> 1.0'
|
data/lib/digger/page.rb
CHANGED
@@ -3,6 +3,7 @@ require 'json'
|
|
3
3
|
require 'ostruct'
|
4
4
|
require 'set'
|
5
5
|
require 'kconv'
|
6
|
+
require 'uri'
|
6
7
|
|
7
8
|
# https://github.com/taganaka/polipus/blob/master/lib/polipus/page.rb
|
8
9
|
module Digger
|
@@ -186,16 +187,7 @@ module Digger
|
|
186
187
|
def to_absolute(link)
|
187
188
|
return nil if link.nil?
|
188
189
|
|
189
|
-
|
190
|
-
|
191
|
-
# remove anchor
|
192
|
-
link =
|
193
|
-
begin
|
194
|
-
URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/, '')))
|
195
|
-
rescue URI::Error
|
196
|
-
return nil
|
197
|
-
end
|
198
|
-
|
190
|
+
link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
|
199
191
|
relative = begin
|
200
192
|
URI(link)
|
201
193
|
rescue URI::Error
|
data/lib/digger/version.rb
CHANGED
data/spec/digger_spec.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
require 'digger'
|
2
2
|
|
3
3
|
http = Digger::HTTP.new
|
4
|
-
page = http.fetch_page('http://
|
4
|
+
page = http.fetch_page('http://www.baidu.com/')
|
5
5
|
|
6
|
-
pattern = Digger::Pattern.new({type: 'css_many', value: '
|
6
|
+
pattern = Digger::Pattern.new({ type: 'css_many', value: '#s-top-left>a' })
|
7
7
|
|
8
8
|
class Item < Digger::Model
|
9
|
-
css_many sites: '
|
10
|
-
css_one logo: '.logo'
|
9
|
+
css_many sites: '#s-top-left>a'
|
11
10
|
validate_presence :sites
|
12
|
-
validate_includeness :sites
|
11
|
+
validate_includeness :sites
|
13
12
|
end
|
14
13
|
|
15
14
|
describe Digger do
|
@@ -19,12 +18,12 @@ describe Digger do
|
|
19
18
|
|
20
19
|
it "pattern should match content" do
|
21
20
|
sites = pattern.match_page(page)
|
22
|
-
expect(sites.include?('
|
21
|
+
expect(sites.include?('新闻')).to eq(true)
|
23
22
|
end
|
24
23
|
|
25
24
|
it "model should dig content" do
|
26
25
|
item = Item.new.match_page(page)
|
27
|
-
expect(item[:sites].include?('
|
26
|
+
expect(item[:sites].include?('新闻')).to be(true)
|
28
27
|
end
|
29
28
|
|
30
29
|
it "validation support" do
|
data/spec/page_spec.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'digger'
|
2
2
|
require 'json'
|
3
|
+
require 'uri'
|
3
4
|
|
4
5
|
describe Digger::Page do
|
5
6
|
it 'page json' do
|
@@ -11,4 +12,16 @@ describe Digger::Page do
|
|
11
12
|
expect(j1.json['b'][0]).to eq(1)
|
12
13
|
expect(j2.jsonp['b'][1]).to eq(2)
|
13
14
|
end
|
15
|
+
|
16
|
+
it 'fetch baidu' do
|
17
|
+
http = Digger::HTTP.new
|
18
|
+
page = http.fetch_page('http://www.baidu.com/')
|
19
|
+
expect(page.code).to eq(200)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'page uri' do
|
23
|
+
link ='https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
|
24
|
+
link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
|
25
|
+
p link
|
26
|
+
end
|
14
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 12.3.3
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 12.3.3
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|