digger 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c4a94163a25d4b53ad5b477040f69b8fccca026adc313f8f61759317c1bf198
4
- data.tar.gz: 307b443277c16708103c172e5fb4ef4d833f7d2631a7f85779570a3cbeac8925
3
+ metadata.gz: f3e89f179fa868ecd2879180d1fbfbf03ba0ebee3731b9c8b4741d22663ff4aa
4
+ data.tar.gz: 1b27e4a1446e9835203bf5497aeebc3bc4ab58998a0fc443eeaaf7e7ec86c2c7
5
5
  SHA512:
6
- metadata.gz: 9da40123fd09615d0c69ca5104d1141b82981813ef8d175bc567a0f35e8f7dd868ce235bc0f265308e2133710f63c3ce8325d6b528ae060eccb77904c12e3139
7
- data.tar.gz: c05be67df6db25345acfdc3615690c0467029dd40ffde262863923b6f0786696a16fd547277ec4d3764ed39b399174dd28e10ca7eb31ffb55f773e08b04f2986
6
+ metadata.gz: 5671e5d2484ca744e5c75f97beeb473e1970291fc094c0274714af27cc847ef47bcf3b1b81534e6f299f35648fdf3aabec9d90777e4b2fbe49dc2629c048f610
7
+ data.tar.gz: 496534bb394d17792dc7173759b83c0dd62b8569a241b2a6a57e60a14ea99ddafd7e030fe16480c5782ec319f6dc1d3563919f7202026fb008522dd26cae6c01
data/digger.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 2.0"
22
- spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rake", ">= 12.3.3"
23
23
 
24
24
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
25
25
  spec.add_runtime_dependency 'http-cookie', '~> 1.0'
data/lib/digger/page.rb CHANGED
@@ -3,6 +3,7 @@ require 'json'
3
3
  require 'ostruct'
4
4
  require 'set'
5
5
  require 'kconv'
6
+ require 'uri'
6
7
 
7
8
  # https://github.com/taganaka/polipus/blob/master/lib/polipus/page.rb
8
9
  module Digger
@@ -186,16 +187,7 @@ module Digger
186
187
  def to_absolute(link)
187
188
  return nil if link.nil?
188
189
 
189
- # link = link.to_s.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
190
-
191
- # remove anchor
192
- link =
193
- begin
194
- URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/, '')))
195
- rescue URI::Error
196
- return nil
197
- end
198
-
190
+ link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
199
191
  relative = begin
200
192
  URI(link)
201
193
  rescue URI::Error
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
data/spec/digger_spec.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  require 'digger'
2
2
 
3
3
  http = Digger::HTTP.new
4
- page = http.fetch_page('http://nan.so/')
4
+ page = http.fetch_page('http://www.baidu.com/')
5
5
 
6
- pattern = Digger::Pattern.new({type: 'css_many', value: '.sites>a>span' })
6
+ pattern = Digger::Pattern.new({ type: 'css_many', value: '#s-top-left>a' })
7
7
 
8
8
  class Item < Digger::Model
9
- css_many sites: '.sites>a>span'
10
- css_one logo: '.logo'
9
+ css_many sites: '#s-top-left>a'
11
10
  validate_presence :sites
12
- validate_includeness :sites, :logo
11
+ validate_includeness :sites
13
12
  end
14
13
 
15
14
  describe Digger do
@@ -19,12 +18,12 @@ describe Digger do
19
18
 
20
19
  it "pattern should match content" do
21
20
  sites = pattern.match_page(page)
22
- expect(sites.include?('百度网盘')).to eq(true)
21
+ expect(sites.include?('新闻')).to eq(true)
23
22
  end
24
23
 
25
24
  it "model should dig content" do
26
25
  item = Item.new.match_page(page)
27
- expect(item[:sites].include?('读远')).to be(true)
26
+ expect(item[:sites].include?('新闻')).to be(true)
28
27
  end
29
28
 
30
29
  it "validation support" do
data/spec/page_spec.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'digger'
2
2
  require 'json'
3
+ require 'uri'
3
4
 
4
5
  describe Digger::Page do
5
6
  it 'page json' do
@@ -11,4 +12,16 @@ describe Digger::Page do
11
12
  expect(j1.json['b'][0]).to eq(1)
12
13
  expect(j2.jsonp['b'][1]).to eq(2)
13
14
  end
15
+
16
+ it 'fetch baidu' do
17
+ http = Digger::HTTP.new
18
+ page = http.fetch_page('http://www.baidu.com/')
19
+ expect(page.code).to eq(200)
20
+ end
21
+
22
+ it 'page uri' do
23
+ link ='https://www.baidu.com/s?wd=%E5%93%88%E5%93%88#hello'
24
+ link = link.to_s.encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(/#[\w]*$/, '')
25
+ p link
26
+ end
14
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-25 00:00:00.000000000 Z
11
+ date: 2021-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -28,16 +28,16 @@ dependencies:
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: 12.3.3
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: 12.3.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement