url_canonicalize 0.1.5 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -2
- data/Gemfile +4 -3
- data/Guardfile +1 -0
- data/LICENSE +1 -1
- data/README.md +7 -0
- data/Rakefile +1 -0
- data/circle.yml +1 -3
- data/lib/monkey_patches/addressable/uri.rb +1 -0
- data/lib/monkey_patches/string.rb +1 -0
- data/lib/monkey_patches/uri.rb +1 -0
- data/lib/url_canonicalize.rb +1 -0
- data/lib/url_canonicalize/exception.rb +1 -0
- data/lib/url_canonicalize/http.rb +1 -0
- data/lib/url_canonicalize/request.rb +3 -2
- data/lib/url_canonicalize/response.rb +1 -0
- data/lib/url_canonicalize/uri.rb +1 -0
- data/lib/url_canonicalize/version.rb +2 -1
- data/url_canonicalize.gemspec +3 -3
- metadata +9 -10
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1493f185d340eabc6a80605d684beab885d5801f
|
4
|
+
data.tar.gz: fdaf73d4b81441aea7bacb84ce0319fca52f7c68
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a838ce734c8d63474f7673ada3e027724042ab16651f6608178dafbdff8f695bc979e1177b057f926ad2c7f04de4d0006d803c663f03bfc93945ecc62ecff0d6
|
7
|
+
data.tar.gz: df6a62963d4d43adab2df50bacf771351a9ee4f788a3ff1e0e7f6144aeee945ed21a22368511d141235f49646a44e653faf7bff99e37cc76f78f7b729448b6e1
|
data/.gitignore
CHANGED
@@ -45,8 +45,8 @@ build-iPhoneSimulator/
|
|
45
45
|
# for a library or gem, you might want to ignore these files since the code is
|
46
46
|
# intended to run in multiple environments; otherwise, check them in:
|
47
47
|
# Gemfile.lock
|
48
|
-
|
49
|
-
|
48
|
+
.ruby-version
|
49
|
+
.ruby-gemset
|
50
50
|
|
51
51
|
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
52
52
|
.rvmrc
|
data/Gemfile
CHANGED
@@ -1,17 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
source 'https://rubygems.org'
|
2
3
|
|
3
4
|
gemspec
|
4
5
|
|
5
6
|
group :test do
|
7
|
+
gem 'coveralls', require: false
|
6
8
|
gem 'rspec'
|
7
9
|
gem 'rspec_junit_formatter'
|
10
|
+
gem 'simplecov', '~> 0.13'
|
8
11
|
gem 'webmock'
|
9
|
-
gem 'simplecov'
|
10
|
-
gem 'coveralls', require: false
|
11
12
|
end
|
12
13
|
|
13
14
|
local_gemfile = 'Gemfile.local'
|
14
15
|
|
15
16
|
if File.exist?(local_gemfile)
|
16
|
-
eval(File.read(local_gemfile)) # rubocop:disable
|
17
|
+
eval(File.read(local_gemfile)) # rubocop:disable Security/Eval
|
17
18
|
end
|
data/Guardfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
# URLCanonicalize
|
2
|
+
[](https://rubygems.org/gems/url_canonicalize)
|
3
|
+
[](https://rubygems.org/gems/url_canonicalize)
|
4
|
+
[](https://circleci.com/gh/dominicsayers/url_canonicalize)
|
5
|
+
[](https://codeclimate.com/github/dominicsayers/url_canonicalize)
|
6
|
+
[](https://coveralls.io/github/Xenapto/url_canonicalize?branch=master)
|
7
|
+
[](https://dependencyci.com/github/dominicsayers/url_canonicalize)
|
8
|
+
[](https://hakiri.io/github/dominicsayers/url_canonicalize/master)
|
2
9
|
|
3
10
|
URLCanonicalize is a Ruby gem that finds the canonical version of a URL. It
|
4
11
|
provides `canonicalize` methods for the String, URI::HTTP, URI::HTTPS and
|
data/Rakefile
CHANGED
data/circle.yml
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
machine:
|
2
2
|
ruby:
|
3
|
-
version: 2.
|
4
|
-
|
3
|
+
version: 2.4.0
|
5
4
|
dependencies:
|
6
5
|
pre:
|
7
6
|
- echo "export rvm_ignore_gemsets_flag=1" >> ~/.rvmrc
|
8
7
|
- gem install bundler
|
9
8
|
override:
|
10
9
|
- bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3 --full-index
|
11
|
-
|
12
10
|
test:
|
13
11
|
override:
|
14
12
|
- bundle exec rspec:
|
data/lib/monkey_patches/uri.rb
CHANGED
data/lib/url_canonicalize.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module URLCanonicalize
|
2
3
|
# Make an HTTP request
|
3
4
|
class Request
|
@@ -38,7 +39,7 @@ module URLCanonicalize
|
|
38
39
|
end
|
39
40
|
|
40
41
|
def handle_success
|
41
|
-
@canonical_url = $LAST_MATCH_INFO['url'] if response['link']
|
42
|
+
@canonical_url = $LAST_MATCH_INFO['url'] if (response['link'] || '').match?(/<(?<url>.+)>\s*;\s*rel="canonical"/i)
|
42
43
|
|
43
44
|
if http_method == :head
|
44
45
|
self.http_method = :get
|
@@ -147,7 +148,7 @@ module URLCanonicalize
|
|
147
148
|
# requester after a few attempts. For these sites we'll use GET requests
|
148
149
|
# only
|
149
150
|
def check_http_method
|
150
|
-
@http_method = :get if host
|
151
|
+
@http_method = :get if host.match?(/(linkedin|crunchbase).com/)
|
151
152
|
end
|
152
153
|
|
153
154
|
def relative_to_absolute(partial_url)
|
data/lib/url_canonicalize/uri.rb
CHANGED
data/url_canonicalize.gemspec
CHANGED
@@ -7,12 +7,12 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.name = 'url_canonicalize'
|
8
8
|
s.version = URLCanonicalize::VERSION
|
9
9
|
s.authors = ['Dominic Sayers']
|
10
|
-
s.email = ['
|
10
|
+
s.email = ['dominic@sayers.cc']
|
11
11
|
s.summary = 'Finds the canonical version of a URL'
|
12
12
|
s.description = 'Rubygem that finds the canonical version of a URL by '\
|
13
13
|
'providing #canonicalize methods for the String, URI::HTTP'\
|
14
14
|
', URI::HTTPS and Addressable::URI classes'
|
15
|
-
s.homepage = 'https://github.com/
|
15
|
+
s.homepage = 'https://github.com/dominicsayers/url_canonicalize'
|
16
16
|
s.license = 'MIT'
|
17
17
|
|
18
18
|
s.files = `git ls-files`.split($RS).reject do |file|
|
@@ -24,5 +24,5 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.require_paths = ['lib']
|
25
25
|
|
26
26
|
s.add_dependency 'addressable', '~> 2' # To normalize URLs
|
27
|
-
s.add_dependency 'nokogiri', '
|
27
|
+
s.add_dependency 'nokogiri', '>= 1.6.8' # To look for <link rel="canonical" ...> in HTML
|
28
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-03-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -28,20 +28,20 @@ dependencies:
|
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.6.8
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 1.6.8
|
41
41
|
description: 'Rubygem that finds the canonical version of a URL by providing #canonicalize
|
42
42
|
methods for the String, URI::HTTP, URI::HTTPS and Addressable::URI classes'
|
43
43
|
email:
|
44
|
-
-
|
44
|
+
- dominic@sayers.cc
|
45
45
|
executables: []
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
@@ -51,7 +51,6 @@ files:
|
|
51
51
|
- ".rspec"
|
52
52
|
- ".rubocop.yml"
|
53
53
|
- ".ruby-gemset"
|
54
|
-
- ".ruby-version"
|
55
54
|
- Gemfile
|
56
55
|
- Gemfile.local.example
|
57
56
|
- Guardfile
|
@@ -70,7 +69,7 @@ files:
|
|
70
69
|
- lib/url_canonicalize/uri.rb
|
71
70
|
- lib/url_canonicalize/version.rb
|
72
71
|
- url_canonicalize.gemspec
|
73
|
-
homepage: https://github.com/
|
72
|
+
homepage: https://github.com/dominicsayers/url_canonicalize
|
74
73
|
licenses:
|
75
74
|
- MIT
|
76
75
|
metadata: {}
|
@@ -90,7 +89,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
89
|
version: '0'
|
91
90
|
requirements: []
|
92
91
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.6.
|
92
|
+
rubygems_version: 2.6.10
|
94
93
|
signing_key:
|
95
94
|
specification_version: 4
|
96
95
|
summary: Finds the canonical version of a URL
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.1.9
|