richurls 0.5.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 693d986c664aa571150f5c426a7ee496125e615321c1156d2c0eaf0b3f0db6e7
4
- data.tar.gz: 4699ba24e5a8f2ed3ccfd2e8aa632c4243033e72d0f6db5b54e5faa16313514d
3
+ metadata.gz: fae0a9f1de4986a531f74b56b352b51a004f6aedd3fc3dad551019afba58676f
4
+ data.tar.gz: 65c3b687abfe9de86e4101437eaa0530a46bff9203683e9b6fb69234fa9f39b7
5
5
  SHA512:
6
- metadata.gz: b5d7b35375f42c0bdfecdbd05179c984c1ec0d05669c3067280bbcea38bf9f9a34e79794ce587d4f23d0b26dfd1f45a664a6575551eccea7038bd89f3a168eb1
7
- data.tar.gz: 04c2167a8ee0dad5a80447522a96d188a9687771e49609bf0857aa0d223fb47659a73de66225b376ce23000271f8dbb7546f48e72cc3ace62a1aa1fd6e130b92
6
+ metadata.gz: 380a50c4f02c34a16d92cb10a748cd0356dc4ac9eddfa6390f2d5cb95c930a907910f076ae68f3f9af4028dfd5fbab6712cd6650e9aa1a5664eed0204dcffa7f
7
+ data.tar.gz: 01c47002c302abd9a667003726276e597a7ec510f522d27d31af20078df7bfbede75a5a7c39433af48927c6ac57d418a336d5016bbf1bf5ab3ed410abf403428
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
2
  TargetRubyVersion: 2.7
3
+ NewCops: disable
3
4
 
4
5
  Lint/SuppressedException:
5
6
  Enabled: false
@@ -37,3 +38,5 @@ Naming/MemoizedInstanceVariableName:
37
38
  Enabled: false
38
39
  Style/RegexpLiteral:
39
40
  Enabled: false
41
+ Style/IfUnlessModifier:
42
+ Enabled: false
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- richurls (0.5.5)
4
+ richurls (0.6.1)
5
5
  addressable (~> 2)
6
6
  oj (~> 3)
7
7
  ox (~> 2)
@@ -12,41 +12,44 @@ GEM
12
12
  specs:
13
13
  addressable (2.7.0)
14
14
  public_suffix (>= 2.0.2, < 5.0)
15
- ast (2.4.0)
16
- diff-lcs (1.3)
17
- jaro_winkler (1.5.4)
18
- oj (3.10.6)
19
- ox (2.13.2)
20
- parallel (1.19.1)
21
- parser (2.7.1.1)
22
- ast (~> 2.4.0)
15
+ ast (2.4.2)
16
+ diff-lcs (1.4.4)
17
+ oj (3.11.3)
18
+ ox (2.14.2)
19
+ parallel (1.20.1)
20
+ parser (3.0.0.0)
21
+ ast (~> 2.4.1)
23
22
  patron (0.13.3)
24
- public_suffix (4.0.4)
23
+ public_suffix (4.0.6)
25
24
  rainbow (3.0.0)
26
- redis (4.1.4)
25
+ redis (4.2.5)
26
+ regexp_parser (2.1.1)
27
27
  rexml (3.2.4)
28
- rspec (3.9.0)
29
- rspec-core (~> 3.9.0)
30
- rspec-expectations (~> 3.9.0)
31
- rspec-mocks (~> 3.9.0)
32
- rspec-core (3.9.1)
33
- rspec-support (~> 3.9.1)
34
- rspec-expectations (3.9.1)
28
+ rspec (3.10.0)
29
+ rspec-core (~> 3.10.0)
30
+ rspec-expectations (~> 3.10.0)
31
+ rspec-mocks (~> 3.10.0)
32
+ rspec-core (3.10.1)
33
+ rspec-support (~> 3.10.0)
34
+ rspec-expectations (3.10.1)
35
35
  diff-lcs (>= 1.2.0, < 2.0)
36
- rspec-support (~> 3.9.0)
37
- rspec-mocks (3.9.1)
36
+ rspec-support (~> 3.10.0)
37
+ rspec-mocks (3.10.2)
38
38
  diff-lcs (>= 1.2.0, < 2.0)
39
- rspec-support (~> 3.9.0)
40
- rspec-support (3.9.2)
41
- rubocop (0.82.0)
42
- jaro_winkler (~> 1.5.1)
39
+ rspec-support (~> 3.10.0)
40
+ rspec-support (3.10.2)
41
+ rubocop (0.93.1)
43
42
  parallel (~> 1.10)
44
- parser (>= 2.7.0.1)
43
+ parser (>= 2.7.1.5)
45
44
  rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 1.8)
46
46
  rexml
47
+ rubocop-ast (>= 0.6.0)
47
48
  ruby-progressbar (~> 1.7)
48
49
  unicode-display_width (>= 1.4.0, < 2.0)
49
- ruby-progressbar (1.10.1)
50
+ rubocop-ast (1.4.1)
51
+ parser (>= 2.7.1.5)
52
+ ruby-progressbar (1.11.0)
50
53
  unicode-display_width (1.7.0)
51
54
 
52
55
  PLATFORMS
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=master)](https://travis-ci.com/WeTransfer/richurls)
1
+ # richurls [![Build Status](https://travis-ci.com/WeTransfer/richurls.svg?branch=main)](https://travis-ci.com/WeTransfer/richurls)
2
2
  A gem which can enrich urls with speed.
3
3
 
4
4
  **Installation**
@@ -85,3 +85,20 @@ RichUrls.enrich('https://wetransfer.com', cache_time: 3600)
85
85
  This `cache_time` will be accessible through the `time` parameters in the `set`
86
86
  and `extend` methods on the `Cache::Wrapper`-instance and can be used as you
87
87
  please.
88
+
89
+ **Swapping browsers**
90
+
91
+ The default browser is `curl` in the form of `Patron`. However if you feel
92
+ like swapping to a different 'browser' like `HTTParty`, `RestClient` or something
93
+ like `Ferrum` feel free to do so. You can swap it by doing:
94
+
95
+ ```ruby
96
+ class CustomBrowser < RichUrls::Browser
97
+ def remote_call(url)
98
+ # Please make sure to return the variables in the function as such:
99
+ [status, redirected_url, body]
100
+ end
101
+ end
102
+
103
+ RichUrls.browser = CustomBrowser.new
104
+ ```
@@ -0,0 +1,25 @@
1
+ require 'patron'
2
+
3
+ module RichUrls
4
+ DEFAULT_TIMEOUT = 10 # seconds
5
+
6
+ class Browser
7
+ def remote_call(_url)
8
+ raise NotImplementedError,
9
+ 'subclasses of Browser need a remote_call method'
10
+ end
11
+ end
12
+
13
+ class PatronBrowser < Browser
14
+ def remote_call(url)
15
+ session = Patron::Session.new(
16
+ timeout: DEFAULT_TIMEOUT,
17
+ headers: RichUrls.headers
18
+ )
19
+
20
+ response = session.get(url)
21
+
22
+ [response.status, response.url, response.body]
23
+ end
24
+ end
25
+ end
data/lib/richurls.rb CHANGED
@@ -5,6 +5,7 @@ require 'digest'
5
5
  require_relative 'cache'
6
6
  require_relative 'url_fetcher'
7
7
  require_relative 'body_decorator'
8
+ require_relative 'patron_browser'
8
9
 
9
10
  module RichUrls
10
11
  class MalformedURLError < StandardError; end
@@ -13,6 +14,19 @@ module RichUrls
13
14
  @cache || Cache::None.new
14
15
  end
15
16
 
17
+ def self.browser=(browser)
18
+ unless browser.is_a? Browser
19
+ raise ArgumentError,
20
+ 'browser needs to be of a RichUrls::Browser type'
21
+ end
22
+
23
+ @browser ||= browser
24
+ end
25
+
26
+ def self.browser
27
+ @browser || PatronBrowser.new
28
+ end
29
+
16
30
  def self.cache=(wrapper)
17
31
  unless wrapper.is_a? Cache::Wrapper
18
32
  raise ArgumentError,
@@ -22,11 +36,19 @@ module RichUrls
22
36
  @cache ||= wrapper
23
37
  end
24
38
 
39
+ def self.headers=(headers)
40
+ @headers ||= headers
41
+ end
42
+
43
+ def self.headers
44
+ @headers || {}
45
+ end
46
+
25
47
  def self.enrich(url, filter: [], cache_time: nil)
26
48
  unless URI::DEFAULT_PARSER.make_regexp.match?(url)
27
49
  raise MalformedURLError, "this url is malformed: #{url}"
28
50
  end
29
51
 
30
- UrlFetcher.fetch(url, filter, cache_time)
52
+ UrlFetcher.fetch(url, filter, browser: browser, cache_time: cache_time)
31
53
  end
32
54
  end
data/lib/url_fetcher.rb CHANGED
@@ -6,15 +6,21 @@ module RichUrls
6
6
 
7
7
  class UrlFetcherError < StandardError; end
8
8
 
9
- def self.fetch(url, attributes = [], cache_time = nil)
10
- new(url, attributes, cache_time).fetch
9
+ def self.fetch(
10
+ url,
11
+ attributes = [],
12
+ browser: PatronBrowser.new,
13
+ cache_time: nil
14
+ )
15
+ new(url, attributes, browser, cache_time).fetch
11
16
  end
12
17
 
13
18
  private_class_method :new
14
19
 
15
- def initialize(url, attributes, cache_time)
20
+ def initialize(url, attributes, browser, cache_time)
16
21
  @url = url
17
22
  @attributes = attributes
23
+ @browser = browser
18
24
  @cache_time = cache_time
19
25
  end
20
26
 
@@ -36,13 +42,10 @@ module RichUrls
36
42
  end
37
43
 
38
44
  def patron_call
39
- session = Patron::Session.new(timeout: DEFAULT_TIMEOUT)
40
- response = session.get(@url)
45
+ status, return_url, body = @browser.remote_call(@url)
41
46
 
42
- if response.status < 400
43
- decorated = BodyDecorator.decorate(
44
- response.url, response.body, @attributes
45
- )
47
+ if status < 400
48
+ decorated = BodyDecorator.decorate(return_url, body, @attributes)
46
49
  RichUrls.cache.set(digest, Oj.dump(decorated), @cache_time)
47
50
  decorated
48
51
  else
data/lib/url_helper.rb CHANGED
@@ -9,34 +9,32 @@ class UrlHelper
9
9
 
10
10
  def initialize(domain, url)
11
11
  @domain = domain
12
-
13
- # In some rare cases it appears to be that URL's are ending with a
14
- # single whitespace character resulting in an invalid URL.
15
- @url = url&.strip
12
+ @url = url
16
13
  end
17
14
 
18
15
  def url
19
16
  return if @url.nil?
20
- return Addressable::URI.escape(@url) if valid_url?
21
17
 
22
- build_url
18
+ parsed = Addressable::URI.parse(@url)
19
+ full_url = valid?(parsed) ? parsed.to_s : domain_uri
20
+ Addressable::URI.escape(full_url)
21
+ rescue Addressable::URI::InvalidURIError
23
22
  end
24
23
 
25
24
  private
26
25
 
27
- def build_url
28
- domain_uri = URI(@domain)
29
- base = domain_uri.scheme + '://' + domain_uri.host
30
- escaped_url = Addressable::URI.escape(@url)
31
-
32
- if @url.start_with?('/')
33
- base + escaped_url
34
- else
35
- base + domain_uri.path + '/' + escaped_url
36
- end
26
+ def valid?(parsed)
27
+ parsed.host && (parsed.scheme || @url.start_with?('//'))
37
28
  end
38
29
 
39
- def valid_url?
40
- @url.start_with?('//') || @url =~ URI::DEFAULT_PARSER.make_regexp
30
+ def domain_uri
31
+ domain = Addressable::URI.parse(@domain)
32
+ domain.query = nil
33
+ domain.path = if @url.start_with?('/')
34
+ @url
35
+ else
36
+ domain.path + '/' + @url
37
+ end
38
+ domain.to_s
41
39
  end
42
40
  end
data/richurls.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = 'richurls'
6
- spec.version = '0.5.5'
6
+ spec.version = '0.6.1'
7
7
  spec.authors = ['grdw']
8
8
  spec.email = ['gerard@wetransfer.com']
9
9
 
@@ -11,6 +11,7 @@ Gem::Specification.new do |spec|
11
11
  spec.description = 'Service which enriches URLs fast and cheap'
12
12
  spec.homepage = 'https://github.com/wetransfer/richurls'
13
13
  spec.license = 'GPL-3.0'
14
+ spec.required_ruby_version = '~> 2.7'
14
15
 
15
16
  spec.metadata['homepage_uri'] = spec.homepage
16
17
  spec.metadata['source_code_uri'] = 'https://github.com/wetransfer/richurls'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: richurls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - grdw
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-08 00:00:00.000000000 Z
11
+ date: 2021-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -153,6 +153,7 @@ files:
153
153
  - lib/parsers/property.rb
154
154
  - lib/parsers/provider_display_parser.rb
155
155
  - lib/parsers/url.rb
156
+ - lib/patron_browser.rb
156
157
  - lib/richurls.rb
157
158
  - lib/url_fetcher.rb
158
159
  - lib/url_helper.rb
@@ -165,15 +166,15 @@ metadata:
165
166
  homepage_uri: https://github.com/wetransfer/richurls
166
167
  source_code_uri: https://github.com/wetransfer/richurls
167
168
  changelog_uri: https://github.com/wetransfer/richurls/CHANGELOG.md
168
- post_install_message:
169
+ post_install_message:
169
170
  rdoc_options: []
170
171
  require_paths:
171
172
  - lib
172
173
  required_ruby_version: !ruby/object:Gem::Requirement
173
174
  requirements:
174
- - - ">="
175
+ - - "~>"
175
176
  - !ruby/object:Gem::Version
176
- version: '0'
177
+ version: '2.7'
177
178
  required_rubygems_version: !ruby/object:Gem::Requirement
178
179
  requirements:
179
180
  - - ">="
@@ -181,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  version: '0'
182
183
  requirements: []
183
184
  rubygems_version: 3.1.2
184
- signing_key:
185
+ signing_key:
185
186
  specification_version: 4
186
187
  summary: Service which enriches URLs
187
188
  test_files: []