richurls 0.5.8 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +18 -1
- data/lib/patron_browser.rb +25 -0
- data/lib/richurls.rb +15 -1
- data/lib/url_fetcher.rb +12 -13
- data/richurls.gemspec +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 145d6a2838880f17633feaaf112591a56742fc575994381b9ff6f643cc57cfeb
|
4
|
+
data.tar.gz: 0cc73d7141e804757dc96be1fad2f16e0efdb89632f7d7fb2ed8b40d26a82282
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c914e3714a4fa9d46cfe1d64ec84291c2161ab430e91519f636170a848c0f6dfcb57c5c54ec511382386e1dbab9bb0d83de76474f0b531277e23f7f1c4374d70
|
7
|
+
data.tar.gz: aec44007b71df681a63452331fbe4a3fa2c38d3e7203308f5a6e0e5c664d9c146dbff12fd00dd78da48f8e8f46846ec6eaec9648ee51254f2d82eee2b6454ac2
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
richurls (0.
|
4
|
+
richurls (0.6.0)
|
5
5
|
addressable (~> 2)
|
6
6
|
oj (~> 3)
|
7
7
|
ox (~> 2)
|
@@ -14,7 +14,7 @@ GEM
|
|
14
14
|
public_suffix (>= 2.0.2, < 5.0)
|
15
15
|
ast (2.4.0)
|
16
16
|
diff-lcs (1.3)
|
17
|
-
oj (3.10.
|
17
|
+
oj (3.10.8)
|
18
18
|
ox (2.13.2)
|
19
19
|
parallel (1.19.1)
|
20
20
|
parser (2.7.1.2)
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=
|
1
|
+
# richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=main)](https://travis-ci.com/WeTransfer/richurls)
|
2
2
|
A gem which can enrich urls with speed.
|
3
3
|
|
4
4
|
**Installation**
|
@@ -85,3 +85,20 @@ RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
|
|
85
85
|
This `cache_time` will be accessible through the `time` parameters in the `set`
|
86
86
|
and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
|
87
87
|
please.
|
88
|
+
|
89
|
+
**Swapping browsers**
|
90
|
+
|
91
|
+
The default browser is `curl` in the form of `Patron`. However if you feel
|
92
|
+
like swapping to a different 'browser' like `HTTParty`, `RestClient` or something
|
93
|
+
like `Ferrum` feel free to do so. You can swap it by doing:
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
class CustomBrowser < RichUrls::Browser
|
97
|
+
def remote_call(url)
|
98
|
+
# Please make sure to return the variables in the function as such:
|
99
|
+
[status, redirected_url, body]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
RichUrls.browser = CustomBrowser.new
|
104
|
+
```
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'patron'
|
2
|
+
|
3
|
+
module RichUrls
|
4
|
+
DEFAULT_TIMEOUT = 10 # seconds
|
5
|
+
|
6
|
+
class Browser
|
7
|
+
def remote_call(_url)
|
8
|
+
raise NotImplementedError,
|
9
|
+
'subclasses of Browser need a remote_call method'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class PatronBrowser < Browser
|
14
|
+
def remote_call(url)
|
15
|
+
session = Patron::Session.new(
|
16
|
+
timeout: DEFAULT_TIMEOUT,
|
17
|
+
headers: RichUrls.headers
|
18
|
+
)
|
19
|
+
|
20
|
+
response = session.get(url)
|
21
|
+
|
22
|
+
[response.status, response.url, response.body]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/richurls.rb
CHANGED
@@ -5,6 +5,7 @@ require 'digest'
|
|
5
5
|
require_relative 'cache'
|
6
6
|
require_relative 'url_fetcher'
|
7
7
|
require_relative 'body_decorator'
|
8
|
+
require_relative 'patron_browser'
|
8
9
|
|
9
10
|
module RichUrls
|
10
11
|
class MalformedURLError < StandardError; end
|
@@ -13,6 +14,19 @@ module RichUrls
|
|
13
14
|
@cache || Cache::None.new
|
14
15
|
end
|
15
16
|
|
17
|
+
def self.browser=(browser)
|
18
|
+
unless browser.is_a? Browser
|
19
|
+
raise ArgumentError,
|
20
|
+
'browser needs to be of a RichUrls::Browser type'
|
21
|
+
end
|
22
|
+
|
23
|
+
@browser ||= browser
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.browser
|
27
|
+
@browser || PatronBrowser.new
|
28
|
+
end
|
29
|
+
|
16
30
|
def self.cache=(wrapper)
|
17
31
|
unless wrapper.is_a? Cache::Wrapper
|
18
32
|
raise ArgumentError,
|
@@ -35,6 +49,6 @@ module RichUrls
|
|
35
49
|
raise MalformedURLError, "this url is malformed: #{url}"
|
36
50
|
end
|
37
51
|
|
38
|
-
UrlFetcher.fetch(url, filter, cache_time)
|
52
|
+
UrlFetcher.fetch(url, filter, browser: browser, cache_time: cache_time)
|
39
53
|
end
|
40
54
|
end
|
data/lib/url_fetcher.rb
CHANGED
@@ -6,15 +6,21 @@ module RichUrls
|
|
6
6
|
|
7
7
|
class UrlFetcherError < StandardError; end
|
8
8
|
|
9
|
-
def self.fetch(
|
10
|
-
|
9
|
+
def self.fetch(
|
10
|
+
url,
|
11
|
+
attributes = [],
|
12
|
+
browser: PatronBrowser.new,
|
13
|
+
cache_time: nil
|
14
|
+
)
|
15
|
+
new(url, attributes, browser, cache_time).fetch
|
11
16
|
end
|
12
17
|
|
13
18
|
private_class_method :new
|
14
19
|
|
15
|
-
def initialize(url, attributes, cache_time)
|
20
|
+
def initialize(url, attributes, browser, cache_time)
|
16
21
|
@url = url
|
17
22
|
@attributes = attributes
|
23
|
+
@browser = browser
|
18
24
|
@cache_time = cache_time
|
19
25
|
end
|
20
26
|
|
@@ -36,17 +42,10 @@ module RichUrls
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def patron_call
|
39
|
-
|
40
|
-
timeout: DEFAULT_TIMEOUT,
|
41
|
-
headers: RichUrls.headers
|
42
|
-
)
|
45
|
+
status, return_url, body = @browser.remote_call(@url)
|
43
46
|
|
44
|
-
|
45
|
-
|
46
|
-
if response.status < 400
|
47
|
-
decorated = BodyDecorator.decorate(
|
48
|
-
response.url, response.body, @attributes
|
49
|
-
)
|
47
|
+
if status < 400
|
48
|
+
decorated = BodyDecorator.decorate(return_url, body, @attributes)
|
50
49
|
RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
|
51
50
|
decorated
|
52
51
|
else
|
data/richurls.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: richurls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- grdw
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -153,6 +153,7 @@ files:
|
|
153
153
|
- lib/parsers/property.rb
|
154
154
|
- lib/parsers/provider_display_parser.rb
|
155
155
|
- lib/parsers/url.rb
|
156
|
+
- lib/patron_browser.rb
|
156
157
|
- lib/richurls.rb
|
157
158
|
- lib/url_fetcher.rb
|
158
159
|
- lib/url_helper.rb
|