richurls 0.5.5 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/Gemfile.lock +29 -26
- data/README.md +18 -1
- data/lib/patron_browser.rb +25 -0
- data/lib/richurls.rb +23 -1
- data/lib/url_fetcher.rb +12 -9
- data/lib/url_helper.rb +16 -18
- data/richurls.gemspec +2 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fae0a9f1de4986a531f74b56b352b51a004f6aedd3fc3dad551019afba58676f
|
4
|
+
data.tar.gz: 65c3b687abfe9de86e4101437eaa0530a46bff9203683e9b6fb69234fa9f39b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 380a50c4f02c34a16d92cb10a748cd0356dc4ac9eddfa6390f2d5cb95c930a907910f076ae68f3f9af4028dfd5fbab6712cd6650e9aa1a5664eed0204dcffa7f
|
7
|
+
data.tar.gz: 01c47002c302abd9a667003726276e597a7ec510f522d27d31af20078df7bfbede75a5a7c39433af48927c6ac57d418a336d5016bbf1bf5ab3ed410abf403428
|
data/.rubocop.yml
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
AllCops:
|
2
2
|
TargetRubyVersion: 2.7
|
3
|
+
NewCops: disable
|
3
4
|
|
4
5
|
Lint/SuppressedException:
|
5
6
|
Enabled: false
|
@@ -37,3 +38,5 @@ Naming/MemoizedInstanceVariableName:
|
|
37
38
|
Enabled: false
|
38
39
|
Style/RegexpLiteral:
|
39
40
|
Enabled: false
|
41
|
+
Style/IfUnlessModifier:
|
42
|
+
Enabled: false
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
richurls (0.
|
4
|
+
richurls (0.6.1)
|
5
5
|
addressable (~> 2)
|
6
6
|
oj (~> 3)
|
7
7
|
ox (~> 2)
|
@@ -12,41 +12,44 @@ GEM
|
|
12
12
|
specs:
|
13
13
|
addressable (2.7.0)
|
14
14
|
public_suffix (>= 2.0.2, < 5.0)
|
15
|
-
ast (2.4.
|
16
|
-
diff-lcs (1.
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
ast (~> 2.4.0)
|
15
|
+
ast (2.4.2)
|
16
|
+
diff-lcs (1.4.4)
|
17
|
+
oj (3.11.3)
|
18
|
+
ox (2.14.2)
|
19
|
+
parallel (1.20.1)
|
20
|
+
parser (3.0.0.0)
|
21
|
+
ast (~> 2.4.1)
|
23
22
|
patron (0.13.3)
|
24
|
-
public_suffix (4.0.
|
23
|
+
public_suffix (4.0.6)
|
25
24
|
rainbow (3.0.0)
|
26
|
-
redis (4.
|
25
|
+
redis (4.2.5)
|
26
|
+
regexp_parser (2.1.1)
|
27
27
|
rexml (3.2.4)
|
28
|
-
rspec (3.
|
29
|
-
rspec-core (~> 3.
|
30
|
-
rspec-expectations (~> 3.
|
31
|
-
rspec-mocks (~> 3.
|
32
|
-
rspec-core (3.
|
33
|
-
rspec-support (~> 3.
|
34
|
-
rspec-expectations (3.
|
28
|
+
rspec (3.10.0)
|
29
|
+
rspec-core (~> 3.10.0)
|
30
|
+
rspec-expectations (~> 3.10.0)
|
31
|
+
rspec-mocks (~> 3.10.0)
|
32
|
+
rspec-core (3.10.1)
|
33
|
+
rspec-support (~> 3.10.0)
|
34
|
+
rspec-expectations (3.10.1)
|
35
35
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
|
-
rspec-support (~> 3.
|
37
|
-
rspec-mocks (3.
|
36
|
+
rspec-support (~> 3.10.0)
|
37
|
+
rspec-mocks (3.10.2)
|
38
38
|
diff-lcs (>= 1.2.0, < 2.0)
|
39
|
-
rspec-support (~> 3.
|
40
|
-
rspec-support (3.
|
41
|
-
rubocop (0.
|
42
|
-
jaro_winkler (~> 1.5.1)
|
39
|
+
rspec-support (~> 3.10.0)
|
40
|
+
rspec-support (3.10.2)
|
41
|
+
rubocop (0.93.1)
|
43
42
|
parallel (~> 1.10)
|
44
|
-
parser (>= 2.7.
|
43
|
+
parser (>= 2.7.1.5)
|
45
44
|
rainbow (>= 2.2.2, < 4.0)
|
45
|
+
regexp_parser (>= 1.8)
|
46
46
|
rexml
|
47
|
+
rubocop-ast (>= 0.6.0)
|
47
48
|
ruby-progressbar (~> 1.7)
|
48
49
|
unicode-display_width (>= 1.4.0, < 2.0)
|
49
|
-
|
50
|
+
rubocop-ast (1.4.1)
|
51
|
+
parser (>= 2.7.1.5)
|
52
|
+
ruby-progressbar (1.11.0)
|
50
53
|
unicode-display_width (1.7.0)
|
51
54
|
|
52
55
|
PLATFORMS
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=
|
1
|
+
# richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=main)](https://travis-ci.com/WeTransfer/richurls)
|
2
2
|
A gem which can enrich urls with speed.
|
3
3
|
|
4
4
|
**Installation**
|
@@ -85,3 +85,20 @@ RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
|
|
85
85
|
This `cache_time` will be accessible through the `time` parameters in the `set`
|
86
86
|
and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
|
87
87
|
please.
|
88
|
+
|
89
|
+
**Swapping browsers**
|
90
|
+
|
91
|
+
The default browser is `curl` in the form of `Patron`. However if you feel
|
92
|
+
like swapping to a different 'browser' like `HTTParty`, `RestClient` or something
|
93
|
+
like `Ferrum` feel free to do so. You can swap it by doing:
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
class CustomBrowser < RichUrls::Browser
|
97
|
+
def remote_call(url)
|
98
|
+
# Please make sure to return the variables in the function as such:
|
99
|
+
[status, redirected_url, body]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
RichUrls.browser = CustomBrowser.new
|
104
|
+
```
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'patron'
|
2
|
+
|
3
|
+
module RichUrls
|
4
|
+
DEFAULT_TIMEOUT = 10 # seconds
|
5
|
+
|
6
|
+
class Browser
|
7
|
+
def remote_call(_url)
|
8
|
+
raise NotImplementedError,
|
9
|
+
'subclasses of Browser need a remote_call method'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class PatronBrowser < Browser
|
14
|
+
def remote_call(url)
|
15
|
+
session = Patron::Session.new(
|
16
|
+
timeout: DEFAULT_TIMEOUT,
|
17
|
+
headers: RichUrls.headers
|
18
|
+
)
|
19
|
+
|
20
|
+
response = session.get(url)
|
21
|
+
|
22
|
+
[response.status, response.url, response.body]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/richurls.rb
CHANGED
@@ -5,6 +5,7 @@ require 'digest'
|
|
5
5
|
require_relative 'cache'
|
6
6
|
require_relative 'url_fetcher'
|
7
7
|
require_relative 'body_decorator'
|
8
|
+
require_relative 'patron_browser'
|
8
9
|
|
9
10
|
module RichUrls
|
10
11
|
class MalformedURLError < StandardError; end
|
@@ -13,6 +14,19 @@ module RichUrls
|
|
13
14
|
@cache || Cache::None.new
|
14
15
|
end
|
15
16
|
|
17
|
+
def self.browser=(browser)
|
18
|
+
unless browser.is_a? Browser
|
19
|
+
raise ArgumentError,
|
20
|
+
'browser needs to be of a RichUrls::Browser type'
|
21
|
+
end
|
22
|
+
|
23
|
+
@browser ||= browser
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.browser
|
27
|
+
@browser || PatronBrowser.new
|
28
|
+
end
|
29
|
+
|
16
30
|
def self.cache=(wrapper)
|
17
31
|
unless wrapper.is_a? Cache::Wrapper
|
18
32
|
raise ArgumentError,
|
@@ -22,11 +36,19 @@ module RichUrls
|
|
22
36
|
@cache ||= wrapper
|
23
37
|
end
|
24
38
|
|
39
|
+
def self.headers=(headers)
|
40
|
+
@headers ||= headers
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.headers
|
44
|
+
@headers || {}
|
45
|
+
end
|
46
|
+
|
25
47
|
def self.enrich(url, filter: [], cache_time: nil)
|
26
48
|
unless URI::DEFAULT_PARSER.make_regexp.match?(url)
|
27
49
|
raise MalformedURLError, "this url is malformed: #{url}"
|
28
50
|
end
|
29
51
|
|
30
|
-
UrlFetcher.fetch(url, filter, cache_time)
|
52
|
+
UrlFetcher.fetch(url, filter, browser: browser, cache_time: cache_time)
|
31
53
|
end
|
32
54
|
end
|
data/lib/url_fetcher.rb
CHANGED
@@ -6,15 +6,21 @@ module RichUrls
|
|
6
6
|
|
7
7
|
class UrlFetcherError < StandardError; end
|
8
8
|
|
9
|
-
def self.fetch(
|
10
|
-
|
9
|
+
def self.fetch(
|
10
|
+
url,
|
11
|
+
attributes = [],
|
12
|
+
browser: PatronBrowser.new,
|
13
|
+
cache_time: nil
|
14
|
+
)
|
15
|
+
new(url, attributes, browser, cache_time).fetch
|
11
16
|
end
|
12
17
|
|
13
18
|
private_class_method :new
|
14
19
|
|
15
|
-
def initialize(url, attributes, cache_time)
|
20
|
+
def initialize(url, attributes, browser, cache_time)
|
16
21
|
@url = url
|
17
22
|
@attributes = attributes
|
23
|
+
@browser = browser
|
18
24
|
@cache_time = cache_time
|
19
25
|
end
|
20
26
|
|
@@ -36,13 +42,10 @@ module RichUrls
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def patron_call
|
39
|
-
|
40
|
-
response = session.get(@url)
|
45
|
+
status, return_url, body = @browser.remote_call(@url)
|
41
46
|
|
42
|
-
if
|
43
|
-
decorated = BodyDecorator.decorate(
|
44
|
-
response.url, response.body, @attributes
|
45
|
-
)
|
47
|
+
if status < 400
|
48
|
+
decorated = BodyDecorator.decorate(return_url, body, @attributes)
|
46
49
|
RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
|
47
50
|
decorated
|
48
51
|
else
|
data/lib/url_helper.rb
CHANGED
@@ -9,34 +9,32 @@ class UrlHelper
|
|
9
9
|
|
10
10
|
def initialize(domain, url)
|
11
11
|
@domain = domain
|
12
|
-
|
13
|
-
# In some rare cases it appears to be that URL's are ending with a
|
14
|
-
# single whitespace character resulting in an invalid URL.
|
15
|
-
@url = url&.strip
|
12
|
+
@url = url
|
16
13
|
end
|
17
14
|
|
18
15
|
def url
|
19
16
|
return if @url.nil?
|
20
|
-
return Addressable::URI.escape(@url) if valid_url?
|
21
17
|
|
22
|
-
|
18
|
+
parsed = Addressable::URI.parse(@url)
|
19
|
+
full_url = valid?(parsed) ? parsed.to_s : domain_uri
|
20
|
+
Addressable::URI.escape(full_url)
|
21
|
+
rescue Addressable::URI::InvalidURIError
|
23
22
|
end
|
24
23
|
|
25
24
|
private
|
26
25
|
|
27
|
-
def
|
28
|
-
|
29
|
-
base = domain_uri.scheme + '://' + domain_uri.host
|
30
|
-
escaped_url = Addressable::URI.escape(@url)
|
31
|
-
|
32
|
-
if @url.start_with?('/')
|
33
|
-
base + escaped_url
|
34
|
-
else
|
35
|
-
base + domain_uri.path + '/' + escaped_url
|
36
|
-
end
|
26
|
+
def valid?(parsed)
|
27
|
+
parsed.host && (parsed.scheme || @url.start_with?('//'))
|
37
28
|
end
|
38
29
|
|
39
|
-
def
|
40
|
-
|
30
|
+
def domain_uri
|
31
|
+
domain = Addressable::URI.parse(@domain)
|
32
|
+
domain.query = nil
|
33
|
+
domain.path = if @url.start_with?('/')
|
34
|
+
@url
|
35
|
+
else
|
36
|
+
domain.path + '/' + @url
|
37
|
+
end
|
38
|
+
domain.to_s
|
41
39
|
end
|
42
40
|
end
|
data/richurls.gemspec
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = 'richurls'
|
6
|
-
spec.version = '0.
|
6
|
+
spec.version = '0.6.1'
|
7
7
|
spec.authors = ['grdw']
|
8
8
|
spec.email = ['gerard@wetransfer.com']
|
9
9
|
|
@@ -11,6 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.description = 'Service which enriches URLs fast and cheap'
|
12
12
|
spec.homepage = 'https://github.com/wetransfer/richurls'
|
13
13
|
spec.license = 'GPL-3.0'
|
14
|
+
spec.required_ruby_version = '~> 2.7'
|
14
15
|
|
15
16
|
spec.metadata['homepage_uri'] = spec.homepage
|
16
17
|
spec.metadata['source_code_uri'] = 'https://github.com/wetransfer/richurls'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: richurls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- grdw
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -153,6 +153,7 @@ files:
|
|
153
153
|
- lib/parsers/property.rb
|
154
154
|
- lib/parsers/provider_display_parser.rb
|
155
155
|
- lib/parsers/url.rb
|
156
|
+
- lib/patron_browser.rb
|
156
157
|
- lib/richurls.rb
|
157
158
|
- lib/url_fetcher.rb
|
158
159
|
- lib/url_helper.rb
|
@@ -165,15 +166,15 @@ metadata:
|
|
165
166
|
homepage_uri: https://github.com/wetransfer/richurls
|
166
167
|
source_code_uri: https://github.com/wetransfer/richurls
|
167
168
|
changelog_uri: https://github.com/wetransfer/richurls/CHANGELOG.md
|
168
|
-
post_install_message:
|
169
|
+
post_install_message:
|
169
170
|
rdoc_options: []
|
170
171
|
require_paths:
|
171
172
|
- lib
|
172
173
|
required_ruby_version: !ruby/object:Gem::Requirement
|
173
174
|
requirements:
|
174
|
-
- - "
|
175
|
+
- - "~>"
|
175
176
|
- !ruby/object:Gem::Version
|
176
|
-
version: '
|
177
|
+
version: '2.7'
|
177
178
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
179
|
requirements:
|
179
180
|
- - ">="
|
@@ -181,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
182
|
version: '0'
|
182
183
|
requirements: []
|
183
184
|
rubygems_version: 3.1.2
|
184
|
-
signing_key:
|
185
|
+
signing_key:
|
185
186
|
specification_version: 4
|
186
187
|
summary: Service which enriches URLs
|
187
188
|
test_files: []
|