embiggen 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a28132ab5fd7ca7bd8f3e4b0d7594b74398bae6afc4da9d59ebec70fb10f701
4
- data.tar.gz: fd7e54727d47b932edef1c211379c792cd925b5c9b8c4f2171f8b22dcc7e42cf
3
+ metadata.gz: 2bf4ba25d8acb0f2f5e168445f2e857fd255503a6d666d418c530a84381282a9
4
+ data.tar.gz: d6d0816294987d333ae32179c6101cbd29e9591e16eb7cafb9e1bf211b92f544
5
5
  SHA512:
6
- metadata.gz: 8c616360339cd2d4295af9e5f79d96470eed0c15909bf58b8eff3921b99e9f4e69a1264bed3457a19ea3296e589e7a12aa089b0309aca023df820491782c278f
7
- data.tar.gz: b12bbe5fd3e40446dc4aa1922d1b989bd7a576de66c9da3351c495209f48b240e0688b713c6e1ecb1d42744d00df8a0c8fa4033ee2aa28e53b368f78a09c0073
6
+ metadata.gz: 244b5a974ab6fbb38cbcce0c5e88c4f651e75b6f05a21c3e3fc67159875659dbc0d089ed7e856a85a1fe2db6b5881a5acd72af35320e91137bb4929ce7b45d1e
7
+ data.tar.gz: b4c0a86839b657d7f37b0d0cac6b8db4db0c2585696b5dc51643cd6e5a4b40a549ed40f9f1ca3d2616edb8dcae45d36822a1a5536909f99b085657a0fc84c984
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  A Ruby library to expand shortened URLs.
4
4
 
5
- **Current version:** 1.9.0
5
+ **Current version:** 1.10.0
6
6
  **Supported Ruby versions:** >= 2.7
7
7
 
8
8
  ## Installation
@@ -61,6 +61,40 @@ Embiggen.configure do |config|
61
61
  end
62
62
  ```
63
63
 
64
+ ## Non-redirect shorteners
65
+
66
+ Some URL shorteners (such as [LinkedIn's lnkd.in](https://lnkd.in)) do not
67
+ issue HTTP redirects when used with an external domain.
68
+ Instead, they serve an HTML page containing the
69
+ destination URL in a known element. Embiggen supports these by fetching the
70
+ page and extracting the link via a CSS selector.
71
+
72
+ Embiggen ships with a default list of known non-redirect shorteners in
73
+ [`non_redirect_shorteners.yml`](https://github.com/altmetric/embiggen/blob/master/non_redirect_shorteners.yml),
74
+ mapping each domain to the CSS selector used to locate the destination link.
75
+
76
+ ```ruby
77
+ # Expanding a LinkedIn shortened URL
78
+ Embiggen::URI('https://lnkd.in/eB25Z2yS').expand
79
+ #=> #<URI::HTTPS https://example.com/article>
80
+ ```
81
+
82
+ You can add your own non-redirect shorteners or remove existing ones via
83
+ `Embiggen.configure`:
84
+
85
+ ```ruby
86
+ Embiggen.configure do |config|
87
+ # Add a new non-redirect shortener
88
+ config.non_redirect_shorteners['myshorten.er'] = 'article a.destination'
89
+
90
+ # Remove a specific shortener
91
+ config.non_redirect_shorteners.delete('lnkd.in')
92
+
93
+ # Opt out of the feature entirely
94
+ config.non_redirect_shorteners.clear
95
+ end
96
+ ```
97
+
64
98
  ## Shorteners
65
99
 
66
100
  Embiggen ships with a default list of URL shortening service domains (c.f.
@@ -191,7 +225,9 @@ Override the following settings:
191
225
  * `redirects`: the default number of redirects to follow (can be overridden by
192
226
  passing options to `Embiggen::URI#expand`);
193
227
  * `shorteners`: the list of domains of shortening services, c.f.
194
- [Shorteners](#shorteners).
228
+ [Shorteners](#shorteners);
229
+ * `non_redirect_shorteners`: the mapping of non-redirect shortener domains to
230
+ CSS selectors, c.f. [Non-redirect shorteners](#non-redirect-shorteners).
195
231
 
196
232
  ## Acknowledgements
197
233
 
@@ -1,10 +1,12 @@
1
1
  # encoding: utf-8
2
2
  require 'embiggen/shortener_list'
3
+ require 'embiggen/non_redirect_shortener_list'
4
+ require 'yaml'
3
5
 
4
6
  module Embiggen
5
7
  class Configuration
6
8
  class << self
7
- attr_writer :timeout, :redirects, :shorteners
9
+ attr_writer :timeout, :redirects, :shorteners, :non_redirect_shorteners
8
10
  end
9
11
 
10
12
  def self.timeout
@@ -24,5 +26,14 @@ module Embiggen
24
26
  file_path = File.expand_path('../../shorteners.txt', __dir__)
25
27
  File.readlines(file_path).map(&:chomp)
26
28
  end
29
+
30
+ def self.non_redirect_shorteners
31
+ @non_redirect_shorteners ||= NonRedirectShortenerList.new(non_redirect_shorteners_from_file)
32
+ end
33
+
34
+ def self.non_redirect_shorteners_from_file
35
+ file_path = File.expand_path('../../non_redirect_shorteners.yml', __dir__)
36
+ YAML.safe_load(File.read(file_path))
37
+ end
27
38
  end
28
39
  end
@@ -0,0 +1,42 @@
1
+ require 'embiggen/error'
2
+ require 'net/http'
3
+ require 'nokogiri'
4
+
5
+ module Embiggen
6
+ class HtmlClient
7
+ attr_reader :uri
8
+
9
+ def initialize(uri)
10
+ @uri = uri
11
+ @http = ::Net::HTTP.new(uri.host, uri.port)
12
+ @http.use_ssl = true if uri.scheme == 'https'
13
+ end
14
+
15
+ def follow(timeout, selector)
16
+ response = request(timeout)
17
+ return unless response.is_a?(::Net::HTTPOK)
18
+
19
+ document = Nokogiri::HTML(response.body)
20
+ element = document.at_css(selector)
21
+ element&.[]('href')
22
+ rescue ::Timeout::Error => e
23
+ raise NetworkError.new(
24
+ "Timeout::Error: could not follow #{uri}: #{e.message}", uri
25
+ )
26
+ rescue StandardError => e
27
+ raise NetworkError.new(
28
+ "StandardError: could not follow #{uri}: #{e.message}", uri
29
+ )
30
+ end
31
+
32
+ private
33
+
34
+ def request(timeout)
35
+ request = ::Net::HTTP::Get.new(uri.request_uri)
36
+ @http.open_timeout = timeout
37
+ @http.read_timeout = timeout
38
+
39
+ @http.request(request)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,41 @@
1
+ module Embiggen
2
+ class NonRedirectShortenerList
3
+ attr_reader :domains
4
+
5
+ def initialize(domains)
6
+ @domains = domains.to_h.transform_keys { |domain| host_pattern(domain) }
7
+ end
8
+
9
+ def supported?(uri)
10
+ !selector_for(uri).nil?
11
+ end
12
+
13
+ def selector_for(uri)
14
+ _, selector = domains.find { |pattern, _| uri.host =~ pattern }
15
+ selector
16
+ end
17
+
18
+ def []=(domain, selector)
19
+ domains[host_pattern(domain)] = selector
20
+ end
21
+
22
+ def delete(domain)
23
+ domains.delete(host_pattern(domain))
24
+ end
25
+
26
+ def clear
27
+ domains.clear
28
+ self
29
+ end
30
+
31
+ def size
32
+ domains.size
33
+ end
34
+
35
+ private
36
+
37
+ def host_pattern(domain)
38
+ /\b#{Regexp.escape(domain)}\z/i
39
+ end
40
+ end
41
+ end
@@ -1,4 +1,5 @@
1
1
  require 'forwardable'
2
+ require 'set'
2
3
 
3
4
  module Embiggen
4
5
  class ShortenerList
@@ -16,7 +17,15 @@ module Embiggen
16
17
  end
17
18
 
18
19
  def +(other)
19
- self.class.new(domains + other)
20
+ other_patterns = if other.respond_to?(:domains)
21
+ other.domains
22
+ else
23
+ Set.new(other.map { |d| host_pattern(d) })
24
+ end
25
+
26
+ self.class.allocate.tap do |result|
27
+ result.instance_variable_set(:@domains, domains | other_patterns)
28
+ end
20
29
  end
21
30
 
22
31
  def <<(domain)
@@ -32,7 +41,7 @@ module Embiggen
32
41
  def_delegators :domains, :size, :empty?, :each
33
42
 
34
43
  def host_pattern(domain)
35
- /\b#{domain}\z/i
44
+ /\b#{Regexp.escape(domain)}\z/i
36
45
  end
37
46
  end
38
47
  end
data/lib/embiggen/uri.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'embiggen/configuration'
2
2
  require 'embiggen/error'
3
+ require 'embiggen/html_client'
3
4
  require 'embiggen/http_client'
4
5
  require 'addressable/uri'
5
6
  require 'uri'
@@ -44,6 +45,14 @@ module Embiggen
44
45
  timeout = request_options.fetch(:timeout) { Configuration.timeout }
45
46
 
46
47
  location = http_client.follow(timeout)
48
+
49
+ if location.nil?
50
+ non_redirect_shorteners = Configuration.non_redirect_shorteners
51
+ if non_redirect_shorteners.supported?(uri)
52
+ location = HtmlClient.new(uri).follow(timeout, non_redirect_shorteners.selector_for(uri))
53
+ end
54
+ end
55
+
47
56
  unless followable?(location)
48
57
  fail BadShortenedURI.new(
49
58
  "following #{uri} did not redirect", uri
@@ -0,0 +1,2 @@
1
+ lnkd.in: "main a"
2
+ ebx.sh: "body a#urlToFollow"
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require 'embiggen/html_client'
3
+
4
+ module Embiggen
5
+ RSpec.describe HtmlClient do
6
+ describe '#follow' do
7
+ let(:uri) { URI('https://lnkd.in/eB25Z2yS') }
8
+ let(:client) { described_class.new(uri) }
9
+
10
+ it 'returns the href from the matched element' do
11
+ stub_request(:get, 'https://lnkd.in/eB25Z2yS')
12
+ .to_return(
13
+ status: 200,
14
+ body: '<html><body><main><a href="https://example.com/article">https://example.com/article</a></main></body></html>',
15
+ headers: { 'Content-Type' => 'text/html' }
16
+ )
17
+
18
+ expect(client.follow(1, 'main a')).to eq('https://example.com/article')
19
+ end
20
+
21
+ it 'returns nil when the response is not 200 OK' do
22
+ stub_request(:get, 'https://lnkd.in/eB25Z2yS').to_return(status: 302, headers: { 'Location' => 'https://example.com' })
23
+
24
+ expect(client.follow(1, 'main a')).to be_nil
25
+ end
26
+
27
+ it 'returns nil when no element matches the selector' do
28
+ stub_request(:get, 'https://lnkd.in/eB25Z2yS')
29
+ .to_return(status: 200, body: '<html><body><p>No link here</p></body></html>')
30
+
31
+ expect(client.follow(1, 'main a')).to be_nil
32
+ end
33
+
34
+ it 'raises a network error if the URI times out' do
35
+ stub_request(:get, 'https://lnkd.in/eB25Z2yS').to_timeout
36
+
37
+ expect { client.follow(1, 'main a') }.to raise_error(NetworkError)
38
+ end
39
+
40
+ it 'raises a network error if the connection resets' do
41
+ stub_request(:get, 'https://lnkd.in/eB25Z2yS').to_raise(::Errno::ECONNRESET)
42
+
43
+ expect { client.follow(1, 'main a') }.to raise_error(NetworkError)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,46 @@
1
+ require 'embiggen/non_redirect_shortener_list'
2
+
3
+ RSpec.describe Embiggen::NonRedirectShortenerList do
4
+ describe '#supported?' do
5
+ it 'returns true for a URI whose host is in the list' do
6
+ list = described_class.new('lnkd.in' => 'main a')
7
+
8
+ expect(list.supported?(URI('https://lnkd.in/eB25Z2yS'))).to be(true)
9
+ end
10
+
11
+ it 'returns false for a URI whose host is not in the list' do
12
+ list = described_class.new('lnkd.in' => 'main a')
13
+
14
+ expect(list.supported?(URI('https://example.com/foo'))).to be(false)
15
+ end
16
+ end
17
+
18
+ describe '#selector_for' do
19
+ it 'returns the selector for a matching URI' do
20
+ list = described_class.new('lnkd.in' => 'main a')
21
+
22
+ expect(list.selector_for(URI('https://lnkd.in/eB25Z2yS'))).to eq('main a')
23
+ end
24
+
25
+ it 'returns nil for a non-matching URI' do
26
+ list = described_class.new('lnkd.in' => 'main a')
27
+
28
+ expect(list.selector_for(URI('https://example.com/foo'))).to be_nil
29
+ end
30
+
31
+ it 'returns nil if the URI only matches due to an unescaped dot' do
32
+ list = described_class.new('lnkd.in' => 'main a')
33
+
34
+ expect(list.selector_for(URI('https://lnkdXin/foo'))).to be_nil
35
+ end
36
+ end
37
+
38
+ describe '#clear' do
39
+ it 'removes all entries' do
40
+ list = described_class.new('lnkd.in' => 'main a')
41
+ list.clear
42
+
43
+ expect(list.size).to eq(0)
44
+ end
45
+ end
46
+ end
@@ -33,6 +33,12 @@ RSpec.describe Embiggen::ShortenerList do
33
33
 
34
34
  expect(list).to include(URI('http://www.bit.ly/foo'))
35
35
  end
36
+
37
+ it 'returns false if a URL host only matches due to an unescaped dot' do
38
+ list = described_class.new(%w[i.ea.com])
39
+
40
+ expect(list).not_to include(URI('http://www.ikea.com/foo'))
41
+ end
36
42
  end
37
43
 
38
44
  describe '#<<' do
@@ -82,6 +88,20 @@ RSpec.describe Embiggen::ShortenerList do
82
88
 
83
89
  expect(list).to include(URI('http://a.com/foo'))
84
90
  end
91
+
92
+ it 'retains the original domains when combining two lists' do
93
+ list = described_class.new(%w[bit.ly])
94
+ list += described_class.new(%w[a.com])
95
+
96
+ expect(list).to include(URI('http://bit.ly/foo'))
97
+ end
98
+
99
+ it 'returns a list with the combined size' do
100
+ list = described_class.new(%w[bit.ly])
101
+ list += described_class.new(%w[a.com])
102
+
103
+ expect(list.size).to eq(2)
104
+ end
85
105
  end
86
106
 
87
107
  it 'is enumerable for 1.8 compatiblity' do
@@ -209,6 +209,26 @@ module Embiggen
209
209
  expect(uri.expand).to eq(URI('http://www.altmetric.com'))
210
210
  end
211
211
 
212
+ it 'expands non-redirect shorteners by parsing HTML' do
213
+ stub_request(:get, 'https://lnkd.in/eB25Z2yS')
214
+ .to_return(
215
+ status: 200,
216
+ body: '<html><body><main><a href="https://example.com/article">https://example.com/article</a></main></body></html>',
217
+ headers: { 'Content-Type' => 'text/html' }
218
+ )
219
+ uri = described_class.new(URI('https://lnkd.in/eB25Z2yS'))
220
+
221
+ expect(uri.expand).to eq(URI('https://example.com/article'))
222
+ end
223
+
224
+ it 'raises an error if the non-redirect shortener HTML contains no matching element' do
225
+ stub_request(:get, 'https://lnkd.in/bad')
226
+ .to_return(status: 200, body: '<html><body><p>No link</p></body></html>')
227
+ uri = described_class.new(URI('https://lnkd.in/bad'))
228
+
229
+ expect { uri.expand }.to raise_error(BadShortenedURI)
230
+ end
231
+
212
232
  after do
213
233
  Configuration.redirects = 5
214
234
  Configuration.shorteners.delete('altmetric.it')
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embiggen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  - Jonathan Hernandez
9
- autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2026-04-23 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: addressable
@@ -25,6 +24,20 @@ dependencies:
25
24
  - - "~>"
26
25
  - !ruby/object:Gem::Version
27
26
  version: '2.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
28
41
  - !ruby/object:Gem::Dependency
29
42
  name: rspec
30
43
  requirement: !ruby/object:Gem::Requirement
@@ -94,11 +107,16 @@ files:
94
107
  - lib/embiggen.rb
95
108
  - lib/embiggen/configuration.rb
96
109
  - lib/embiggen/error.rb
110
+ - lib/embiggen/html_client.rb
97
111
  - lib/embiggen/http_client.rb
112
+ - lib/embiggen/non_redirect_shortener_list.rb
98
113
  - lib/embiggen/shortener_list.rb
99
114
  - lib/embiggen/uri.rb
115
+ - non_redirect_shorteners.yml
100
116
  - shorteners.txt
101
117
  - spec/embiggen/configuration_spec.rb
118
+ - spec/embiggen/html_client_spec.rb
119
+ - spec/embiggen/non_redirect_shortener_list_spec.rb
102
120
  - spec/embiggen/shortener_list_spec.rb
103
121
  - spec/embiggen/uri_spec.rb
104
122
  - spec/embiggen_spec.rb
@@ -107,7 +125,6 @@ homepage: https://github.com/altmetric/embiggen
107
125
  licenses:
108
126
  - MIT
109
127
  metadata: {}
110
- post_install_message:
111
128
  rdoc_options: []
112
129
  require_paths:
113
130
  - lib
@@ -122,13 +139,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
139
  - !ruby/object:Gem::Version
123
140
  version: '0'
124
141
  requirements: []
125
- rubygems_version: 3.1.6
126
- signing_key:
142
+ rubygems_version: 3.6.9
127
143
  specification_version: 4
128
144
  summary: A library to expand shortened URLs
129
145
  test_files:
130
- - spec/spec_helper.rb
131
- - spec/embiggen_spec.rb
132
146
  - spec/embiggen/configuration_spec.rb
147
+ - spec/embiggen/html_client_spec.rb
148
+ - spec/embiggen/non_redirect_shortener_list_spec.rb
133
149
  - spec/embiggen/shortener_list_spec.rb
134
150
  - spec/embiggen/uri_spec.rb
151
+ - spec/embiggen_spec.rb
152
+ - spec/spec_helper.rb