digger 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/digger.gemspec +1 -1
- data/lib/digger/page.rb +2 -10
- data/lib/digger/version.rb +1 -1
- data/spec/digger_spec.rb +6 -7
- data/spec/page_spec.rb +13 -0
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f3e89f179fa868ecd2879180d1fbfbf03ba0ebee3731b9c8b4741d22663ff4aa
|
4
|
+
data.tar.gz: 1b27e4a1446e9835203bf5497aeebc3bc4ab58998a0fc443eeaaf7e7ec86c2c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5671e5d2484ca744e5c75f97beeb473e1970291fc094c0274714af27cc847ef47bcf3b1b81534e6f299f35648fdf3aabec9d90777e4b2fbe49dc2629c048f610
|
7
|
+
data.tar.gz: 496534bb394d17792dc7173759b83c0dd62b8569a241b2a6a57e60a14ea99ddafd7e030fe16480c5782ec319f6dc1d3563919f7202026fb008522dd26cae6c01
|
data/digger.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 2.0"
|
22
|
-
spec.add_development_dependency "rake", "
|
22
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
23
23
|
|
24
24
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
25
25
|
spec.add_runtime_dependency 'http-cookie', '~> 1.0'
|
data/lib/digger/page.rb
CHANGED
@@ -3,6 +3,7 @@ require 'json'
|
|
3
3
|
require 'ostruct'
|
4
4
|
require 'set'
|
5
5
|
require 'kconv'
|
6
|
+
require 'uri'
|
6
7
|
|
7
8
|
# https://github.com/taganaka/polipus/blob/master/lib/polipus/page.rb
|
8
9
|
module Digger
|
@@ -186,16 +187,7 @@ module Digger
|
|
186
187
|
def to_absolute(link)
|
187
188
|
return nil if link.nil?
|
188
189
|
|
189
|
-
|
190
|
-
|
191
|
-
# remove anchor
|
192
|
-
link =
|
193
|
-
begin
|
194
|
-
URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/, '')))
|
195
|
-
rescue URI::Error
|
196
|
-
return nil
|
197
|
-
end
|
198
|
-
|
190
|
+
link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
|
199
191
|
relative = begin
|
200
192
|
URI(link)
|
201
193
|
rescue URI::Error
|
data/lib/digger/version.rb
CHANGED
data/spec/digger_spec.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
require 'digger'
|
2
2
|
|
3
3
|
http = Digger::HTTP.new
|
4
|
-
page = http.fetch_page('http://
|
4
|
+
page = http.fetch_page('http://www.baidu.com/')
|
5
5
|
|
6
|
-
pattern = Digger::Pattern.new({type: 'css_many', value: '
|
6
|
+
pattern = Digger::Pattern.new({ type: 'css_many', value: '#s-top-left>a' })
|
7
7
|
|
8
8
|
class Item < Digger::Model
|
9
|
-
css_many sites: '
|
10
|
-
css_one logo: '.logo'
|
9
|
+
css_many sites: '#s-top-left>a'
|
11
10
|
validate_presence :sites
|
12
|
-
validate_includeness :sites
|
11
|
+
validate_includeness :sites
|
13
12
|
end
|
14
13
|
|
15
14
|
describe Digger do
|
@@ -19,12 +18,12 @@ describe Digger do
|
|
19
18
|
|
20
19
|
it "pattern should match content" do
|
21
20
|
sites = pattern.match_page(page)
|
22
|
-
expect(sites.include?('
|
21
|
+
expect(sites.include?('新闻')).to eq(true)
|
23
22
|
end
|
24
23
|
|
25
24
|
it "model should dig content" do
|
26
25
|
item = Item.new.match_page(page)
|
27
|
-
expect(item[:sites].include?('
|
26
|
+
expect(item[:sites].include?('新闻')).to be(true)
|
28
27
|
end
|
29
28
|
|
30
29
|
it "validation support" do
|
data/spec/page_spec.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'digger'
|
2
2
|
require 'json'
|
3
|
+
require 'uri'
|
3
4
|
|
4
5
|
describe Digger::Page do
|
5
6
|
it 'page json' do
|
@@ -11,4 +12,16 @@ describe Digger::Page do
|
|
11
12
|
expect(j1.json['b'][0]).to eq(1)
|
12
13
|
expect(j2.jsonp['b'][1]).to eq(2)
|
13
14
|
end
|
15
|
+
|
16
|
+
it 'fetch baidu' do
|
17
|
+
http = Digger::HTTP.new
|
18
|
+
page = http.fetch_page('http://www.baidu.com/')
|
19
|
+
expect(page.code).to eq(200)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'page uri' do
|
23
|
+
link ='https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
|
24
|
+
link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
|
25
|
+
p link
|
26
|
+
end
|
14
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 12.3.3
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 12.3.3
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|