richurls 0.5.5 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 693d986c664aa571150f5c426a7ee496125e615321c1156d2c0eaf0b3f0db6e7
4
- data.tar.gz: 4699ba24e5a8f2ed3ccfd2e8aa632c4243033e72d0f6db5b54e5faa16313514d
3
+ metadata.gz: fae0a9f1de4986a531f74b56b352b51a004f6aedd3fc3dad551019afba58676f
4
+ data.tar.gz: 65c3b687abfe9de86e4101437eaa0530a46bff9203683e9b6fb69234fa9f39b7
5
5
  SHA512:
6
- metadata.gz: b5d7b35375f42c0bdfecdbd05179c984c1ec0d05669c3067280bbcea38bf9f9a34e79794ce587d4f23d0b26dfd1f45a664a6575551eccea7038bd89f3a168eb1
7
- data.tar.gz: 04c2167a8ee0dad5a80447522a96d188a9687771e49609bf0857aa0d223fb47659a73de66225b376ce23000271f8dbb7546f48e72cc3ace62a1aa1fd6e130b92
6
+ metadata.gz: 380a50c4f02c34a16d92cb10a748cd0356dc4ac9eddfa6390f2d5cb95c930a907910f076ae68f3f9af4028dfd5fbab6712cd6650e9aa1a5664eed0204dcffa7f
7
+ data.tar.gz: 01c47002c302abd9a667003726276e597a7ec510f522d27d31af20078df7bfbede75a5a7c39433af48927c6ac57d418a336d5016bbf1bf5ab3ed410abf403428
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
2
  TargetRubyVersion: 2.7
3
+ NewCops: disable
3
4
 
4
5
  Lint/SuppressedException:
5
6
  Enabled: false
@@ -37,3 +38,5 @@ Naming/MemoizedInstanceVariableName:
37
38
  Enabled: false
38
39
  Style/RegexpLiteral:
39
40
  Enabled: false
41
+ Style/IfUnlessModifier:
42
+ Enabled: false
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- richurls (0.5.5)
4
+ richurls (0.6.1)
5
5
  addressable (~> 2)
6
6
  oj (~> 3)
7
7
  ox (~> 2)
@@ -12,41 +12,44 @@ GEM
12
12
  specs:
13
13
  addressable (2.7.0)
14
14
  public_suffix (>= 2.0.2, < 5.0)
15
- ast (2.4.0)
16
- diff-lcs (1.3)
17
- jaro_winkler (1.5.4)
18
- oj (3.10.6)
19
- ox (2.13.2)
20
- parallel (1.19.1)
21
- parser (2.7.1.1)
22
- ast (~> 2.4.0)
15
+ ast (2.4.2)
16
+ diff-lcs (1.4.4)
17
+ oj (3.11.3)
18
+ ox (2.14.2)
19
+ parallel (1.20.1)
20
+ parser (3.0.0.0)
21
+ ast (~> 2.4.1)
23
22
  patron (0.13.3)
24
- public_suffix (4.0.4)
23
+ public_suffix (4.0.6)
25
24
  rainbow (3.0.0)
26
- redis (4.1.4)
25
+ redis (4.2.5)
26
+ regexp_parser (2.1.1)
27
27
  rexml (3.2.4)
28
- rspec (3.9.0)
29
- rspec-core (~> 3.9.0)
30
- rspec-expectations (~> 3.9.0)
31
- rspec-mocks (~> 3.9.0)
32
- rspec-core (3.9.1)
33
- rspec-support (~> 3.9.1)
34
- rspec-expectations (3.9.1)
28
+ rspec (3.10.0)
29
+ rspec-core (~> 3.10.0)
30
+ rspec-expectations (~> 3.10.0)
31
+ rspec-mocks (~> 3.10.0)
32
+ rspec-core (3.10.1)
33
+ rspec-support (~> 3.10.0)
34
+ rspec-expectations (3.10.1)
35
35
  diff-lcs (>= 1.2.0, < 2.0)
36
- rspec-support (~> 3.9.0)
37
- rspec-mocks (3.9.1)
36
+ rspec-support (~> 3.10.0)
37
+ rspec-mocks (3.10.2)
38
38
  diff-lcs (>= 1.2.0, < 2.0)
39
- rspec-support (~> 3.9.0)
40
- rspec-support (3.9.2)
41
- rubocop (0.82.0)
42
- jaro_winkler (~> 1.5.1)
39
+ rspec-support (~> 3.10.0)
40
+ rspec-support (3.10.2)
41
+ rubocop (0.93.1)
43
42
  parallel (~> 1.10)
44
- parser (>= 2.7.0.1)
43
+ parser (>= 2.7.1.5)
45
44
  rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 1.8)
46
46
  rexml
47
+ rubocop-ast (>= 0.6.0)
47
48
  ruby-progressbar (~> 1.7)
48
49
  unicode-display_width (>= 1.4.0, < 2.0)
49
- ruby-progressbar (1.10.1)
50
+ rubocop-ast (1.4.1)
51
+ parser (>= 2.7.1.5)
52
+ ruby-progressbar (1.11.0)
50
53
  unicode-display_width (1.7.0)
51
54
 
52
55
  PLATFORMS
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=master)](https://travis-ci.com/WeTransfer/richurls)
1
+ # richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=main)](https://travis-ci.com/WeTransfer/richurls)
2
2
  A gem which can enrich urls with speed.
3
3
 
4
4
  **Installation**
@@ -85,3 +85,20 @@ RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
85
85
  This `cache_time` will be accessible through the `time` parameters in the `set`
86
86
  and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
87
87
  please.
88
+
89
+ **Swapping browsers**
90
+
91
+ The default browser is `curl` in the form of `Patron`. However if you feel
92
+ like swapping to a different 'browser' like `HTTParty`, `RestClient` or something
93
+ like `Ferrum` feel free to do so. You can swap it by doing:
94
+
95
+ ```ruby
96
+ class CustomBrowser < RichUrls::Browser
97
+ def remote_call(url)
98
+ # Please make sure to return the variables in the function as such:
99
+ [status, redirected_url, body]
100
+ end
101
+ end
102
+
103
+ RichUrls.browser = CustomBrowser.new
104
+ ```
@@ -0,0 +1,25 @@
1
+ require 'patron'
2
+
3
+ module RichUrls
4
+ DEFAULT_TIMEOUT = 10 # seconds
5
+
6
+ class Browser
7
+ def remote_call(_url)
8
+ raise NotImplementedError,
9
+ 'subclasses of Browser need a remote_call method'
10
+ end
11
+ end
12
+
13
+ class PatronBrowser < Browser
14
+ def remote_call(url)
15
+ session = Patron::Session.new(
16
+ timeout: DEFAULT_TIMEOUT,
17
+ headers: RichUrls.headers
18
+ )
19
+
20
+ response = session.get(url)
21
+
22
+ [response.status, response.url, response.body]
23
+ end
24
+ end
25
+ end
data/lib/richurls.rb CHANGED
@@ -5,6 +5,7 @@ require 'digest'
5
5
  require_relative 'cache'
6
6
  require_relative 'url_fetcher'
7
7
  require_relative 'body_decorator'
8
+ require_relative 'patron_browser'
8
9
 
9
10
  module RichUrls
10
11
  class MalformedURLError < StandardError; end
@@ -13,6 +14,19 @@ module RichUrls
13
14
  @cache || Cache::None.new
14
15
  end
15
16
 
17
+ def self.browser=(browser)
18
+ unless browser.is_a? Browser
19
+ raise ArgumentError,
20
+ 'browser needs to be of a RichUrls::Browser type'
21
+ end
22
+
23
+ @browser ||= browser
24
+ end
25
+
26
+ def self.browser
27
+ @browser || PatronBrowser.new
28
+ end
29
+
16
30
  def self.cache=(wrapper)
17
31
  unless wrapper.is_a? Cache::Wrapper
18
32
  raise ArgumentError,
@@ -22,11 +36,19 @@ module RichUrls
22
36
  @cache ||= wrapper
23
37
  end
24
38
 
39
+ def self.headers=(headers)
40
+ @headers ||= headers
41
+ end
42
+
43
+ def self.headers
44
+ @headers || {}
45
+ end
46
+
25
47
  def self.enrich(url, filter: [], cache_time: nil)
26
48
  unless URI::DEFAULT_PARSER.make_regexp.match?(url)
27
49
  raise MalformedURLError, "this url is malformed: #{url}"
28
50
  end
29
51
 
30
- UrlFetcher.fetch(url, filter, cache_time)
52
+ UrlFetcher.fetch(url, filter, browser: browser, cache_time: cache_time)
31
53
  end
32
54
  end
data/lib/url_fetcher.rb CHANGED
@@ -6,15 +6,21 @@ module RichUrls
6
6
 
7
7
  class UrlFetcherError < StandardError; end
8
8
 
9
- def self.fetch(url, attributes = [], cache_time = nil)
10
- new(url, attributes, cache_time).fetch
9
+ def self.fetch(
10
+ url,
11
+ attributes = [],
12
+ browser: PatronBrowser.new,
13
+ cache_time: nil
14
+ )
15
+ new(url, attributes, browser, cache_time).fetch
11
16
  end
12
17
 
13
18
  private_class_method :new
14
19
 
15
- def initialize(url, attributes, cache_time)
20
+ def initialize(url, attributes, browser, cache_time)
16
21
  @url = url
17
22
  @attributes = attributes
23
+ @browser = browser
18
24
  @cache_time = cache_time
19
25
  end
20
26
 
@@ -36,13 +42,10 @@ module RichUrls
36
42
  end
37
43
 
38
44
  def patron_call
39
- session = Patron::Session.new(timeout: DEFAULT_TIMEOUT)
40
- response = session.get(@url)
45
+ status, return_url, body = @browser.remote_call(@url)
41
46
 
42
- if response.status < 400
43
- decorated = BodyDecorator.decorate(
44
- response.url, response.body, @attributes
45
- )
47
+ if status < 400
48
+ decorated = BodyDecorator.decorate(return_url, body, @attributes)
46
49
  RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
47
50
  decorated
48
51
  else
data/lib/url_helper.rb CHANGED
@@ -9,34 +9,32 @@ class UrlHelper
9
9
 
10
10
  def initialize(domain, url)
11
11
  @domain = domain
12
-
13
- # In some rare cases it appears to be that URL's are ending with a
14
- # single whitespace character resulting in an invalid URL.
15
- @url = url&.strip
12
+ @url = url
16
13
  end
17
14
 
18
15
  def url
19
16
  return if @url.nil?
20
- return Addressable::URI.escape(@url) if valid_url?
21
17
 
22
- build_url
18
+ parsed = Addressable::URI.parse(@url)
19
+ full_url = valid?(parsed) ? parsed.to_s : domain_uri
20
+ Addressable::URI.escape(full_url)
21
+ rescue Addressable::URI::InvalidURIError
23
22
  end
24
23
 
25
24
  private
26
25
 
27
- def build_url
28
- domain_uri = URI(@domain)
29
- base = domain_uri.scheme + '://' + domain_uri.host
30
- escaped_url = Addressable::URI.escape(@url)
31
-
32
- if @url.start_with?('/')
33
- base + escaped_url
34
- else
35
- base + domain_uri.path + '/' + escaped_url
36
- end
26
+ def valid?(parsed)
27
+ parsed.host && (parsed.scheme || @url.start_with?('//'))
37
28
  end
38
29
 
39
- def valid_url?
40
- @url.start_with?('//') || @url =~ URI::DEFAULT_PARSER.make_regexp
30
+ def domain_uri
31
+ domain = Addressable::URI.parse(@domain)
32
+ domain.query = nil
33
+ domain.path = if @url.start_with?('/')
34
+ @url
35
+ else
36
+ domain.path + '/' + @url
37
+ end
38
+ domain.to_s
41
39
  end
42
40
  end
data/richurls.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = 'richurls'
6
- spec.version = '0.5.5'
6
+ spec.version = '0.6.1'
7
7
  spec.authors = ['grdw']
8
8
  spec.email = ['gerard@wetransfer.com']
9
9
 
@@ -11,6 +11,7 @@ Gem::Specification.new do |spec|
11
11
  spec.description = 'Service which enriches URLs fast and cheap'
12
12
  spec.homepage = 'https://github.com/wetransfer/richurls'
13
13
  spec.license = 'GPL-3.0'
14
+ spec.required_ruby_version = '~> 2.7'
14
15
 
15
16
  spec.metadata['homepage_uri'] = spec.homepage
16
17
  spec.metadata['source_code_uri'] = 'https://github.com/wetransfer/richurls'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: richurls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - grdw
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-08 00:00:00.000000000 Z
11
+ date: 2021-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -153,6 +153,7 @@ files:
153
153
  - lib/parsers/property.rb
154
154
  - lib/parsers/provider_display_parser.rb
155
155
  - lib/parsers/url.rb
156
+ - lib/patron_browser.rb
156
157
  - lib/richurls.rb
157
158
  - lib/url_fetcher.rb
158
159
  - lib/url_helper.rb
@@ -165,15 +166,15 @@ metadata:
165
166
  homepage_uri: https://github.com/wetransfer/richurls
166
167
  source_code_uri: https://github.com/wetransfer/richurls
167
168
  changelog_uri: https://github.com/wetransfer/richurls/CHANGELOG.md
168
- post_install_message:
169
+ post_install_message:
169
170
  rdoc_options: []
170
171
  require_paths:
171
172
  - lib
172
173
  required_ruby_version: !ruby/object:Gem::Requirement
173
174
  requirements:
174
- - - ">="
175
+ - - "~>"
175
176
  - !ruby/object:Gem::Version
176
- version: '0'
177
+ version: '2.7'
177
178
  required_rubygems_version: !ruby/object:Gem::Requirement
178
179
  requirements:
179
180
  - - ">="
@@ -181,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  version: '0'
182
183
  requirements: []
183
184
  rubygems_version: 3.1.2
184
- signing_key:
185
+ signing_key:
185
186
  specification_version: 4
186
187
  summary: Service which enriches URLs
187
188
  test_files: []