digger 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c4a94163a25d4b53ad5b477040f69b8fccca026adc313f8f61759317c1bf198
4
- data.tar.gz: 307b443277c16708103c172e5fb4ef4d833f7d2631a7f85779570a3cbeac8925
3
+ metadata.gz: f3e89f179fa868ecd2879180d1fbfbf03ba0ebee3731b9c8b4741d22663ff4aa
4
+ data.tar.gz: 1b27e4a1446e9835203bf5497aeebc3bc4ab58998a0fc443eeaaf7e7ec86c2c7
5
5
  SHA512:
6
- metadata.gz: 9da40123fd09615d0c69ca5104d1141b82981813ef8d175bc567a0f35e8f7dd868ce235bc0f265308e2133710f63c3ce8325d6b528ae060eccb77904c12e3139
7
- data.tar.gz: c05be67df6db25345acfdc3615690c0467029dd40ffde262863923b6f0786696a16fd547277ec4d3764ed39b399174dd28e10ca7eb31ffb55f773e08b04f2986
6
+ metadata.gz: 5671e5d2484ca744e5c75f97beeb473e1970291fc094c0274714af27cc847ef47bcf3b1b81534e6f299f35648fdf3aabec9d90777e4b2fbe49dc2629c048f610
7
+ data.tar.gz: 496534bb394d17792dc7173759b83c0dd62b8569a241b2a6a57e60a14ea99ddafd7e030fe16480c5782ec319f6dc1d3563919f7202026fb008522dd26cae6c01
data/digger.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 2.0"
22
- spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rake", ">= 12.3.3"
23
23
 
24
24
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
25
25
  spec.add_runtime_dependency 'http-cookie', '~> 1.0'
data/lib/digger/page.rb CHANGED
@@ -3,6 +3,7 @@ require 'json'
3
3
  require 'ostruct'
4
4
  require 'set'
5
5
  require 'kconv'
6
+ require 'uri'
6
7
 
7
8
  # https://github.com/taganaka/polipus/blob/master/lib/polipus/page.rb
8
9
  module Digger
@@ -186,16 +187,7 @@ module Digger
186
187
  def to_absolute(link)
187
188
  return nil if link.nil?
188
189
 
189
- # link = link.to_s.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
190
-
191
- # remove anchor
192
- link =
193
- begin
194
- URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/, '')))
195
- rescue URI::Error
196
- return nil
197
- end
198
-
190
+ link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
199
191
  relative = begin
200
192
  URI(link)
201
193
  rescue URI::Error
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
data/spec/digger_spec.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  require 'digger'
2
2
 
3
3
  http = Digger::HTTP.new
4
- page = http.fetch_page('http://nan.so/')
4
+ page = http.fetch_page('http://www.baidu.com/')
5
5
 
6
- pattern = Digger::Pattern.new({type: 'css_many', value: '.sites>a>span' })
6
+ pattern = Digger::Pattern.new({ type: 'css_many', value: '#s-top-left>a' })
7
7
 
8
8
  class Item < Digger::Model
9
- css_many sites: '.sites>a>span'
10
- css_one logo: '.logo'
9
+ css_many sites: '#s-top-left>a'
11
10
  validate_presence :sites
12
- validate_includeness :sites, :logo
11
+ validate_includeness :sites
13
12
  end
14
13
 
15
14
  describe Digger do
@@ -19,12 +18,12 @@ describe Digger do
19
18
 
20
19
  it "pattern should match content" do
21
20
  sites = pattern.match_page(page)
22
- expect(sites.include?('百度网盘')).to eq(true)
21
+ expect(sites.include?('新闻')).to eq(true)
23
22
  end
24
23
 
25
24
  it "model should dig content" do
26
25
  item = Item.new.match_page(page)
27
- expect(item[:sites].include?('读远')).to be(true)
26
+ expect(item[:sites].include?('新闻')).to be(true)
28
27
  end
29
28
 
30
29
  it "validation support" do
data/spec/page_spec.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'digger'
2
2
  require 'json'
3
+ require 'uri'
3
4
 
4
5
  describe Digger::Page do
5
6
  it 'page json' do
@@ -11,4 +12,16 @@ describe Digger::Page do
11
12
  expect(j1.json['b'][0]).to eq(1)
12
13
  expect(j2.jsonp['b'][1]).to eq(2)
13
14
  end
15
+
16
+ it 'fetch baidu' do
17
+ http = Digger::HTTP.new
18
+ page = http.fetch_page('http://www.baidu.com/')
19
+ expect(page.code).to eq(200)
20
+ end
21
+
22
+ it 'page uri' do
23
+ link ='https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
24
+ link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
25
+ p link
26
+ end
14
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-25 00:00:00.000000000 Z
11
+ date: 2021-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -28,16 +28,16 @@ dependencies:
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: 12.3.3
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: 12.3.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement