true_url 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 496c04ece239c5302c9548b0a4b7d93630125172
4
+ data.tar.gz: 74c68ce403f401579bbc9b354b283595b5516302
5
+ SHA512:
6
+ metadata.gz: cfbf6e4099c911e66ff6061ecd98b0e98c60b8411659904d57d76c9bf1b4a2da207f0a19b9f11597ff8bf22a8ae716a7d49706b0a46730b6e824c99e970542cd
7
+ data.tar.gz: c186b1d04c781de99d61d64b9b3052e2f667ba140cba10c05db5e51ab0b43beddc0ee20d2318a534b84ed7ac8019294949a4fd18bcfe50a4492230431e13db6a
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ build.bat
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format doc
3
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in true_url.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Jonathan Wong
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1 @@
1
+ # true_url
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+ RSpec::Core::RakeTask.new :spec
4
+
5
+ task default: :spec
@@ -0,0 +1,39 @@
1
+ class TrueURL
2
+ class Context
3
+ attr_reader :original_url, :options, :attributes, :working_url
4
+
5
+ def initialize(original_url, options)
6
+ @original_url = parse(original_url)
7
+ @options = options
8
+ @finalized = false
9
+ @attributes = {}
10
+
11
+ set_working_url(original_url)
12
+ end
13
+
14
+ def set_working_url(url, base_url = nil)
15
+ @working_url = base_url.nil? ? parse(url) : parse(base_url).join(parse(url))
16
+
17
+ # If the URL has no scheme, then we assume HTTP
18
+ if @working_url.scheme.nil?
19
+ @working_url = url.to_s.start_with?('//') ? parse("http:#{url}") : parse("http://#{url}")
20
+ end
21
+
22
+ @working_url.normalize
23
+ end
24
+
25
+ def finalize
26
+ @finalized = true
27
+ end
28
+
29
+ def finalized?
30
+ @finalized
31
+ end
32
+
33
+ private
34
+
35
+ def parse(url)
36
+ (url.is_a? Addressable::URI) ? url : Addressable::URI.parse(url)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,56 @@
1
+ class TrueURL
2
+ module Strategy
3
+ class DailyMotion
4
+ def execute(context)
5
+ path = context.working_url.path
6
+
7
+ if context.working_url.host == 'dai.ly'
8
+ video_id = path[1..-1]
9
+
10
+ elsif path[0..6] == '/video/'
11
+ video_id = clean_video_id(path)
12
+
13
+ elsif path[0..6] == '/embed/'
14
+ video_id = path[13..-1]
15
+
16
+ elsif path[0..9] == '/playlist/'
17
+ playlist_id = clean_playlist_id(path)
18
+ end
19
+
20
+ unless video_id.nil?
21
+ context.set_working_url("https://www.dailymotion.com/video/#{video_id}")
22
+ context.finalize
23
+ context.attributes[:embed_url] = "https://www.dailymotion.com/embed/video/#{video_id}"
24
+ end
25
+
26
+ unless playlist_id.nil?
27
+ context.set_working_url("https://www.dailymotion.com/playlist/#{playlist_id}")
28
+ context.finalize
29
+ end
30
+
31
+ # DailyMotion supports both HTTP and HTTPS and doesn't redirect between them, so we prefer HTTPS
32
+ context.working_url.scheme = 'https'
33
+ end
34
+
35
+ def clean_video_id(path)
36
+ if path.index('_')
37
+ path[7..path.index('_') - 1]
38
+ else
39
+ path[7..-1]
40
+ end
41
+ end
42
+
43
+ def clean_playlist_id(path)
44
+ cpath = path[10..-1]
45
+
46
+ if cpath.index('_')
47
+ cpath[0..cpath.index('_') - 1]
48
+ elsif cpath.index('/')
49
+ cpath[0..cpath.index('/') - 1]
50
+ else
51
+ cpath[0..-1]
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,25 @@
1
+ class TrueURL
2
+ module Strategy
3
+ class NicoVideo
4
+ def execute(context)
5
+ path = context.working_url.path
6
+
7
+ if path[0..6] == '/watch/'
8
+ video_id = path.split('/')[2]
9
+
10
+ elsif path[0..12] == '/thumb_watch/'
11
+ video_id = path.split('/')[2]
12
+ end
13
+
14
+ if video_id
15
+ context.set_working_url("http://www.nicovideo.jp/watch/#{video_id}")
16
+ context.finalize
17
+ context.attributes[:embed_url] = "http://embed.nicovideo.jp/watch/#{video_id}"
18
+ end
19
+
20
+ # Nico Video only supports HTTP
21
+ context.working_url.scheme = 'http'
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ class TrueURL
2
+ module Strategy
3
+ class Twitter
4
+ def execute(context)
5
+ fragment = context.working_url.fragment
6
+
7
+ # Special handling to collapse Twitter hashbang (#!) URLs
8
+ unless fragment.nil?
9
+ if fragment.start_with?('!/')
10
+ context.working_url.path = fragment[1..-1]
11
+ context.working_url.fragment = nil
12
+ end
13
+ end
14
+
15
+ path = context.working_url.path
16
+
17
+ if path =~ /^\/\w+\/status\/\d+/
18
+ parts = path.split('/')
19
+ context.set_working_url("https://twitter.com/#{parts[1].downcase}/status/#{parts[3]}")
20
+ context.finalize
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,28 @@
1
+ class TrueURL
2
+ module Strategy
3
+ class Vimeo
4
+ def execute(context)
5
+ path = context.working_url.path
6
+
7
+ if context.working_url.host == 'player.vimeo.com'
8
+ video_id = path[7..-1]
9
+
10
+ elsif path =~ /^\/channels\/\w+\/\d+$/
11
+ video_id = path.split('/').last
12
+
13
+ elsif path =~ /^\/\d+$/
14
+ video_id = path[1..-1]
15
+ end
16
+
17
+ if video_id
18
+ context.set_working_url("https://vimeo.com/#{video_id}")
19
+ context.finalize
20
+ context.attributes[:embed_url] = "https://player.vimeo.com/video/#{video_id}"
21
+ end
22
+
23
+ # Vimeo supports both HTTP and HTTPS and doesn't redirect between them, so we prefer HTTPS
24
+ context.working_url.scheme = 'https'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,39 @@
1
+ class TrueURL
2
+ module Strategy
3
+ class YouTube
4
+ def execute(context)
5
+ path = context.working_url.path
6
+
7
+ if context.working_url.host == 'youtu.be'
8
+ video_id = path[1..-1]
9
+
10
+ elsif path == '/watch'
11
+ video_id = context.working_url.query_values['v']
12
+
13
+ elsif path == '/playlist'
14
+ playlist_id = context.working_url.query_values['list']
15
+
16
+ elsif path[0..17] == '/embed/videoseries'
17
+ playlist_id = context.working_url.query_values['list']
18
+
19
+ elsif path[0..6] == '/embed/'
20
+ video_id = path[7..-1]
21
+ end
22
+
23
+ unless video_id.nil?
24
+ context.set_working_url("https://www.youtube.com/watch?v=#{video_id}")
25
+ context.finalize
26
+ context.attributes[:embed_url] = "https://www.youtube.com/embed/#{video_id}"
27
+ context.attributes[:embed_url_private] = "https://www.youtube-nocookie.com/embed/#{video_id}"
28
+ end
29
+
30
+ unless playlist_id.nil?
31
+ context.set_working_url("https://www.youtube.com/playlist?list=#{playlist_id}")
32
+ context.finalize
33
+ context.attributes[:embed_url] = "https://www.youtube.com/embed/videoseries?list=#{playlist_id}"
34
+ context.attributes[:embed_url_private] = "https://www.youtube-nocookie.com/embed/videoseries?list=#{playlist_id}"
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,23 @@
1
+ require 'true_url/strategy/dailymotion'
2
+ require 'true_url/strategy/nicovideo'
3
+ require 'true_url/strategy/twitter'
4
+ require 'true_url/strategy/vimeo'
5
+ require 'true_url/strategy/youtube'
6
+
7
+ class TrueURL
8
+ module Strategy
9
+ def self.default_list
10
+ [
11
+ [/youtube.com$/, TrueURL::Strategy::YouTube.new],
12
+ [/youtube-nocookie.com$/, TrueURL::Strategy::YouTube.new],
13
+ [/^youtu.be$/, TrueURL::Strategy::YouTube.new],
14
+ [/dailymotion.com$/, TrueURL::Strategy::DailyMotion.new],
15
+ [/^dai.ly$/, TrueURL::Strategy::DailyMotion.new],
16
+ [/vimeo.com$/, TrueURL::Strategy::Vimeo.new],
17
+ [/nicovideo.jp$/, TrueURL::Strategy::NicoVideo.new],
18
+ [/^nico.ms$/, TrueURL::Strategy::NicoVideo.new],
19
+ [/twitter.com$/, TrueURL::Strategy::Twitter.new]
20
+ ]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,3 @@
1
+ class TrueURL
2
+ VERSION = '0.0.1'.freeze
3
+ end
data/lib/true_url.rb ADDED
@@ -0,0 +1,140 @@
1
+ require 'addressable/uri'
2
+
3
+ class TrueURL
4
+ autoload :Version, 'true_url/version'
5
+ autoload :Context, 'true_url/context'
6
+ autoload :Strategy, 'true_url/strategy'
7
+
8
+ attr_accessor :context, :strategies
9
+
10
+ OPTIONS = {
11
+ scheme_override: nil, # Possible choices: "https", "http", nil (preserve scheme)
12
+ fetch: true # Whether to fetch the URL
13
+ }.freeze
14
+
15
+ QUERY_VALUES_TO_REMOVE = %w(
16
+ utm_source
17
+ utm_medium
18
+ utm_term
19
+ utm_content
20
+ utm_campaign
21
+ sms_ss
22
+ awesm
23
+ xtor
24
+ PHPSESSID
25
+ ).freeze
26
+
27
+ def initialize(url, options = {})
28
+ @context = TrueURL::Context.new(url, OPTIONS.merge(options))
29
+ @strategies = TrueURL::Strategy.default_list
30
+ @executed = false
31
+ end
32
+
33
+ def canonical
34
+ execute
35
+ @context.working_url.to_s
36
+ end
37
+
38
+ def attributes
39
+ execute
40
+ @context.attributes
41
+ end
42
+
43
+ private
44
+
45
+ def execute
46
+ return if @executed
47
+
48
+ execute_strategies
49
+
50
+ unless @context.finalized?
51
+ if attempt_fetch?
52
+ fetch
53
+ execute_strategies
54
+ end
55
+ end
56
+
57
+ scheme_override
58
+ remove_fragments
59
+ clean_query_values
60
+
61
+ @executed = true
62
+ end
63
+
64
+ def execute_strategies
65
+ @strategies.each do |s|
66
+ match_criteria = s[0]
67
+ strategy = s[1]
68
+
69
+ strategy.execute(@context) unless @context.finalized? || !strategy_match?(match_criteria)
70
+ end
71
+ end
72
+
73
+ def strategy_match?(match_criteria)
74
+ return true if match_criteria.nil?
75
+
76
+ host = @context.working_url.host
77
+ host.nil? ? false : host.match(match_criteria)
78
+ end
79
+
80
+ def attempt_fetch?
81
+ return false unless @context.options[:fetch]
82
+
83
+ # Must at least have a host, otherwise we can't find the site to crawl
84
+ return false if @context.working_url.host.nil?
85
+
86
+ # We only support HTTP or HTTPS
87
+ %w(http https).include?(@context.working_url.scheme)
88
+ end
89
+
90
+ def fetch
91
+ require 'http' unless defined? HTTP
92
+
93
+ starting_url = @context.working_url
94
+
95
+ response = HTTP.follow
96
+ .get(starting_url)
97
+
98
+ canonical_url = find_canonical_header(response.headers) || find_canonical_url(response.to_s) || response.uri
99
+ @context.set_working_url(canonical_url, starting_url)
100
+ end
101
+
102
+ def find_canonical_header(headers)
103
+ return if headers['Link'].nil?
104
+
105
+ links = (headers['Link'].is_a? String) ? [headers['Link']] : headers['Link']
106
+ links.each { |link| return link.split(/[<>;]/)[1] if link.end_with?('rel="canonical"') }
107
+ nil
108
+ end
109
+
110
+ def find_canonical_url(html)
111
+ require 'nokogiri' unless defined? Nokogiri::HTML
112
+
113
+ doc = Nokogiri::HTML(html)
114
+
115
+ elem = doc.at('link[rel="canonical"]')
116
+ canonical_url = elem['href'] unless elem.nil?
117
+
118
+ elem = doc.at('meta[property="og:url"]')
119
+ og_url = elem['content'] unless elem.nil?
120
+
121
+ canonical_url || og_url
122
+ end
123
+
124
+ def scheme_override
125
+ @context.working_url.scheme = @context.options[:scheme_override] unless @context.options[:scheme_override].nil?
126
+ end
127
+
128
+ def remove_fragments
129
+ @context.working_url.fragment = nil
130
+ end
131
+
132
+ def clean_query_values
133
+ query_values = @context.working_url.query_values
134
+
135
+ unless query_values.nil?
136
+ QUERY_VALUES_TO_REMOVE.each { |p| query_values.delete(p) }
137
+ @context.working_url.query_values = query_values.empty? ? nil : query_values
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,96 @@
1
+ require 'true_url'
2
+
3
+ # This file was generated by the `rspec --init` command. Conventionally, all
4
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
5
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
6
+ # this file to always be loaded, without a need to explicitly require it in any
7
+ # files.
8
+ #
9
+ # Given that it is always loaded, you are encouraged to keep this file as
10
+ # light-weight as possible. Requiring heavyweight dependencies from this file
11
+ # will add to the boot time of your test suite on EVERY test run, even for an
12
+ # individual file that may not need all of that loaded. Instead, consider making
13
+ # a separate helper file that requires the additional dependencies and performs
14
+ # the additional setup, and require it from the spec files that actually need
15
+ # it.
16
+ #
17
+ # The `.rspec` file also contains a few flags that are not defaults but that
18
+ # users commonly want.
19
+ #
20
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
21
+ RSpec.configure do |config|
22
+ # rspec-expectations config goes here. You can use an alternate
23
+ # assertion/expectation library such as wrong or the stdlib/minitest
24
+ # assertions if you prefer.
25
+ config.expect_with :rspec do |expectations|
26
+ # This option will default to `true` in RSpec 4. It makes the `description`
27
+ # and `failure_message` of custom matchers include text for helper methods
28
+ # defined using `chain`, e.g.:
29
+ # be_bigger_than(2).and_smaller_than(4).description
30
+ # # => "be bigger than 2 and smaller than 4"
31
+ # ...rather than:
32
+ # # => "be bigger than 2"
33
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
34
+ end
35
+
36
+ # rspec-mocks config goes here. You can use an alternate test double
37
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
38
+ config.mock_with :rspec do |mocks|
39
+ # Prevents you from mocking or stubbing a method that does not exist on
40
+ # a real object. This is generally recommended, and will default to
41
+ # `true` in RSpec 4.
42
+ mocks.verify_partial_doubles = true
43
+ end
44
+
45
+ # The settings below are suggested to provide a good initial experience
46
+ # with RSpec, but feel free to customize to your heart's content.
47
+ # # These two settings work together to allow you to limit a spec run
48
+ # # to individual examples or groups you care about by tagging them with
49
+ # # `:focus` metadata. When nothing is tagged with `:focus`, all examples
50
+ # # get run.
51
+ # config.filter_run :focus
52
+ # config.run_all_when_everything_filtered = true
53
+ #
54
+ # # Allows RSpec to persist some state between runs in order to support
55
+ # # the `--only-failures` and `--next-failure` CLI options. We recommend
56
+ # # you configure your source control system to ignore this file.
57
+ # config.example_status_persistence_file_path = "spec/examples.txt"
58
+ #
59
+ # # Limits the available syntax to the non-monkey patched syntax that is
60
+ # # recommended. For more details, see:
61
+ # # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
62
+ # # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
63
+ # # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
64
+ # config.disable_monkey_patching!
65
+ #
66
+ # # This setting enables warnings. It's recommended, but in some cases may
67
+ # # be too noisy due to issues in dependencies.
68
+ # config.warnings = true
69
+ #
70
+ # # Many RSpec users commonly either run the entire suite or an individual
71
+ # # file, and it's useful to allow more verbose output when running an
72
+ # # individual spec file.
73
+ # if config.files_to_run.one?
74
+ # # Use the documentation formatter for detailed output,
75
+ # # unless a formatter has already been configured
76
+ # # (e.g. via a command-line flag).
77
+ # config.default_formatter = 'doc'
78
+ # end
79
+ #
80
+ # # Print the 10 slowest examples and example groups at the
81
+ # # end of the spec run, to help surface which specs are running
82
+ # # particularly slow.
83
+ # config.profile_examples = 10
84
+ #
85
+ # # Run specs in random order to surface order dependencies. If you find an
86
+ # # order dependency and want to debug it, you can fix the order by providing
87
+ # # the seed, which is printed after each run.
88
+ # # --seed 1234
89
+ # config.order = :random
90
+ #
91
+ # # Seed global randomization in this process using the `--seed` CLI option.
92
+ # # Setting this allows you to use `--seed` to deterministically reproduce
93
+ # # test failures related to randomization by passing the same `--seed` value
94
+ # # as the one that triggered the failure.
95
+ # Kernel.srand config.seed
96
+ end
@@ -0,0 +1,204 @@
1
+ require 'spec_helper'
2
+
3
+ def gc(unclean_url, options = {})
4
+ TrueURL.new(unclean_url, options).canonical
5
+ end
6
+
7
+ describe TrueURL do
8
+ describe 'YouTube' do
9
+ it 'supports direct video links' do
10
+ t = 'https://www.youtube.com/watch?v=RDocnbkHjhI'
11
+ expect(gc('https://www.youtube.com/watch?v=RDocnbkHjhI')).to eq t
12
+ expect(gc('https://www.youtube.com/watch?v=RDocnbkHjhI&feature=youtu.be&list=PLs4hTtftqnlAkiQNdWn6bbKUr-P1wuSm0')).to eq t
13
+ expect(gc('https://youtu.be/RDocnbkHjhI?list=PLs4hTtftqnlAkiQNdWn6bbKUr-P1wuSm0')).to eq t
14
+ end
15
+
16
+ it 'supports embedded video links' do
17
+ t = 'https://www.youtube.com/watch?v=RDocnbkHjhI'
18
+ expect(gc('https://www.youtube.com/embed/RDocnbkHjhI?list=PLs4hTtftqnlAkiQNdWn6bbKUr-P1wuSm0')).to eq t
19
+ expect(gc('https://www.youtube-nocookie.com/embed/RDocnbkHjhI?list=PLs4hTtftqnlAkiQNdWn6bbKUr-P1wuSm0&amp;controls=0&amp;showinfo=0')).to eq t
20
+ end
21
+
22
+ it 'supports direct playlist links' do
23
+ t = 'https://www.youtube.com/playlist?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY'
24
+ expect(gc('https://www.youtube.com/playlist?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY')).to eq t
25
+ expect(gc('https://www.youtube.com/playlist?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY')).to eq t
26
+ end
27
+
28
+ it 'supports embedded playlist links' do
29
+ t = 'https://www.youtube.com/playlist?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY'
30
+ expect(gc('https://www.youtube.com/embed/videoseries?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY')).to eq t
31
+ expect(gc('https://www.youtube-nocookie.com/embed/videoseries?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY')).to eq t
32
+ end
33
+
34
+ it 'supports direct channel links' do
35
+ t = 'https://www.youtube.com/user/WatchMojo'
36
+ expect(gc('https://www.youtube.com/channel/UCaWd5_7JhbQBe4dknZhsHJg')).to eq t
37
+ end
38
+
39
+ it 'supports retrieving embed links as attributes' do
40
+ x = TrueURL.new('https://www.youtube-nocookie.com/embed/videoseries?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY')
41
+ expect(x.attributes[:embed_url]).to eq 'https://www.youtube.com/embed/videoseries?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY'
42
+
43
+ x = TrueURL.new('https://www.youtube.com/embed/RDocnbkHjhI?list=PLs4hTtftqnlAkiQNdWn6bbKUr-P1wuSm0&amp;controls=0&amp;showinfo=0')
44
+ expect(x.attributes[:embed_url_private]).to eq 'https://www.youtube-nocookie.com/embed/RDocnbkHjhI'
45
+ end
46
+ end
47
+
48
+ describe 'DailyMotion' do
49
+ it 'supports direct video links' do
50
+ t = 'https://www.dailymotion.com/video/x2k01a9'
51
+ expect(gc('http://dai.ly/x2k01a9')).to eq t
52
+ expect(gc('http://www.dailymotion.com/video/x2k01a9_battlefield-what-s-it-like-to-be-in-a-real-life-video-game_fun')).to eq t
53
+ end
54
+
55
+ it 'supports embedded video links' do
56
+ t = 'https://www.dailymotion.com/video/x2k01a9'
57
+ expect(gc('http://www.dailymotion.com/embed/video/x2k01a9?autoPlay=1&start=40')).to eq t
58
+ end
59
+
60
+ it 'supports direct playlist links' do
61
+ t = 'https://www.dailymotion.com/playlist/x1ybux'
62
+ expect(gc('https://www.dailymotion.com/playlist/x1ybux/1#video=xlbw3e')).to eq t
63
+ expect(gc('https://www.dailymotion.com/playlist/x1ybux')).to eq t
64
+ expect(gc('http://www.dailymotion.com/playlist/x1ybux_ODNandfinally_amazing-world-records/1#video=xlbw3e')).to eq t
65
+ end
66
+
67
+ it 'supports retrieving embed links as attributes' do
68
+ x = TrueURL.new('http://www.dailymotion.com/video/x2k01a9_battlefield-what-s-it-like-to-be-in-a-real-life-video-game_fun')
69
+ expect(x.attributes[:embed_url]).to eq 'https://www.dailymotion.com/embed/video/x2k01a9'
70
+ end
71
+
72
+ it 'supports force HTTPS' do
73
+ t = 'https://www.dailymotion.com/ODNandfinally'
74
+ expect(gc('http://www.dailymotion.com/ODNandfinally')).to eq t
75
+ end
76
+ end
77
+
78
+ describe 'Vimeo' do
79
+ it 'supports direct video links' do
80
+ t = 'https://vimeo.com/122258599'
81
+ expect(gc('https://vimeo.com/channels/staffpicks/122258599')).to eq t
82
+ expect(gc('http://vimeo.com/122258599')).to eq t
83
+ end
84
+
85
+ it 'supports embedded video links' do
86
+ t = 'https://vimeo.com/122258599'
87
+ expect(gc('https://player.vimeo.com/video/122258599?loop=1&color=c9ff23&title=0')).to eq t
88
+ end
89
+
90
+ it "supports Vimeo's relative canonical links" do
91
+ t = 'https://vimeo.com/channels/staffpicks'
92
+ expect(gc('http://vimeo.com/channels/staffpicks?some=silly&params=here')).to eq t
93
+ end
94
+
95
+ it 'supports retrieving embed links as attributes' do
96
+ x = TrueURL.new('https://vimeo.com/channels/staffpicks/122258599')
97
+ expect(x.attributes[:embed_url]).to eq 'https://player.vimeo.com/video/122258599'
98
+ end
99
+
100
+ it 'supports force HTTPS' do
101
+ t = 'https://vimeo.com/user3190002'
102
+ expect(gc('http://vimeo.com/user3190002')).to eq t
103
+ end
104
+ end
105
+
106
+ describe 'Nico Nico Douga' do
107
+ it 'should work with direct and embedded video links' do
108
+ t = 'http://www.nicovideo.jp/watch/sm25956031'
109
+ expect(gc('http://ext.nicovideo.jp/thumb_watch/sm25956031?w=490&h=307')).to eq t
110
+ expect(gc('http://embed.nicovideo.jp/watch/sm25956031/script?w=490&h=307&redirect=1')).to eq t
111
+ expect(gc('http://embed.nicovideo.jp/watch/sm25956031?oldScript=1')).to eq t
112
+ end
113
+
114
+ it 'supports retrieving embed links as attributes' do
115
+ x = TrueURL.new('http://ext.nicovideo.jp/thumb_watch/sm25956031?w=490&h=307')
116
+ expect(x.attributes[:embed_url]).to eq 'http://embed.nicovideo.jp/watch/sm25956031'
117
+ end
118
+ end
119
+
120
+ describe 'Twitter' do
121
+ it 'supports direct links to tweets' do
122
+ t = 'https://twitter.com/gangsta_project/status/578483098284748801'
123
+ expect(gc('https://twitter.com/GANGSTA_Project/status/578483098284748801/photo/1')).to eq t
124
+ expect(gc('https://twitter.com/GANGSTA_Project/status/578483098284748801/')).to eq t
125
+ end
126
+
127
+ it 'should handle hashbangs' do
128
+ t = 'https://twitter.com/gangsta_project/status/578483098284748801'
129
+ expect(gc('https://twitter.com/#!/GANGSTA_Project/status/578483098284748801/')).to eq t
130
+ end
131
+ end
132
+
133
+ describe 'URL Shorteners' do
134
+ it 'should work with t.co' do
135
+ t = 'http://www.prdaily.com/Main/Articles/3_essential_skills_for_todays_PR_pro__18404.aspx'
136
+ expect(gc('http://t.co/fvaGuRa5Za')).to eq t
137
+ expect(gc('https://t.co/fvaGuRa5Za')).to eq t
138
+ end
139
+
140
+ it 'should work with fb.me' do
141
+ t = 'https://www.facebook.com/aksuperdance/posts/1388968827814771'
142
+ expect(gc('http://fb.me/8qm5kW89k')).to eq t
143
+ end
144
+
145
+ it 'should work with ift.tt' do
146
+ t = 'http://tedxtaipei.com/articles/the_best_kindergarten_you_have_ever_seen/'
147
+ expect(gc('http://ift.tt/2iCbPy8')).to eq t
148
+ end
149
+
150
+ it 'should work with compounded URL shorteners' do
151
+ t = 'https://www.youtube.com/watch?v=jLhjsPjR-xk'
152
+ expect(gc('https://t.co/g4NYtZE3lW')).to eq t # http://bit.ly/2iCKic3 --> http://youtu.be/jLhjsPjR-xk?list=PLVL8S3lUHf0RqD7TZ6hohWk8Sd3asaqnY
153
+ end
154
+ end
155
+
156
+ describe 'WordPress' do
157
+ it 'supports missing trailing slashes' do
158
+ t = 'http://wowjapan.asia/2015/04/anime-gargantia-on-the-verdurous-planet-2nd-season-cancelled/'
159
+ expect(gc('http://wowjapan.asia/2015/04/anime-gargantia-on-the-verdurous-planet-2nd-season-cancelled')).to eq t
160
+ end
161
+ end
162
+
163
+ describe 'Blogger' do
164
+ it 'supports missing localized Blogger domains' do
165
+ t = 'http://thevikiblog.blogspot.com/2015/12/soompi-ios-android.html'
166
+ expect(gc('http://thevikiblog.blogspot.sg/2015/12/soompi-ios-android.html')).to eq t
167
+ end
168
+ end
169
+
170
+ describe 'Other Scenarios' do
171
+ it 'supports missing schemes' do
172
+ t = 'http://wowjapan.asia/2015/04/anime-gargantia-on-the-verdurous-planet-2nd-season-cancelled/'
173
+ expect(gc('//wowjapan.asia/2015/04/anime-gargantia-on-the-verdurous-planet-2nd-season-cancelled/')).to eq t
174
+ end
175
+
176
+ it 'supports scheme override' do
177
+ t = 'https://wowjapan.asia/2015/04/anime-gargantia-on-the-verdurous-planet-2nd-season-cancelled/'
178
+ expect(gc('//wowjapan.asia/2015/04/anime-gargantia-on-the-verdurous-planet-2nd-season-cancelled/', scheme_override: 'https')).to eq t
179
+ end
180
+
181
+ it 'supports CDJapan' do
182
+ t = 'http://www.cdjapan.co.jp/product/MDR-1012'
183
+ expect(gc('http://www.cdjapan.co.jp/aff/click.cgi/e86NDzbdSLQ/4323/A323439/detailview.html?KEY=MDR-1012')).to eq t
184
+ end
185
+
186
+ it 'supports MyAnimeList' do
187
+ t = 'https://myanimelist.net/forum/?topicid=1371295'
188
+ expect(gc('https://myanimelist.net/forum?topicid=1371295&goto=newpost')).to eq t
189
+ end
190
+
191
+ it 'supports URLs with escapable characters' do
192
+ t = 'http://goboiano.com/news/2568-attack-on-titan%2527s-first-live-action-trailer-finally-launches'
193
+ expect(gc("http://media.goboiano.com/news/2568-attack-on-titan's-first-live-action-trailer-finally-launches")).to eq t
194
+
195
+ t = 'http://randomc.net/image/Kekkai%20Sensen/Kekkai%20Sensen%20-%2001%20-%20Large%2001.jpg'
196
+ expect(gc('http://randomc.net/image/Kekkai Sensen/Kekkai Sensen - 01 - Large 01.jpg')).to eq t
197
+ end
198
+
199
+ it 'supports canonical HTTP headers' do
200
+ t = 'http://www.seoreviewtools.com/canonical-url-location-checker/'
201
+ expect(gc('http://www.seoreviewtools.com/tests/canonical-header.php')).to eq t
202
+ end
203
+ end
204
+ end
data/true_url.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ require File.expand_path('../lib/true_url/version', __FILE__)
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'true_url'
5
+ spec.version = TrueURL::VERSION
6
+ spec.authors = ['Jonathan Wong']
7
+ spec.email = ['jonathan@armchairtheorist.com']
8
+ spec.summary = 'A multi-strategy approach to find the absolutely cleanest and most likely canonical URL of any given URL.'
9
+ spec.homepage = 'http://github.com/armchairtheorist/true_url'
10
+ spec.license = 'MIT'
11
+
12
+ spec.files = `git ls-files`.split("\n")
13
+ spec.test_files = `git ls-files -- {spec}/*`.split("\n")
14
+ spec.require_paths = ['lib']
15
+
16
+ spec.add_development_dependency 'rspec', '~> 0'
17
+ spec.add_development_dependency 'rake', '~> 0'
18
+ spec.add_development_dependency 'http', '~> 2.1', ">= 2.1.0"
19
+ spec.add_development_dependency 'nokogiri', '~> 1.6', ">= 1.6.8"
20
+
21
+ spec.add_runtime_dependency 'addressable', '~> 2.4', ">= 2.4.0"
22
+ end
metadata ADDED
@@ -0,0 +1,151 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: true_url
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan Wong
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-12-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: http
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.1'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 2.1.0
51
+ type: :development
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '2.1'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 2.1.0
61
+ - !ruby/object:Gem::Dependency
62
+ name: nokogiri
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.6'
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: 1.6.8
71
+ type: :development
72
+ prerelease: false
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - "~>"
76
+ - !ruby/object:Gem::Version
77
+ version: '1.6'
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 1.6.8
81
+ - !ruby/object:Gem::Dependency
82
+ name: addressable
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '2.4'
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 2.4.0
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '2.4'
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 2.4.0
101
+ description:
102
+ email:
103
+ - jonathan@armchairtheorist.com
104
+ executables: []
105
+ extensions: []
106
+ extra_rdoc_files: []
107
+ files:
108
+ - ".gitignore"
109
+ - ".rspec"
110
+ - Gemfile
111
+ - LICENSE.txt
112
+ - README.md
113
+ - Rakefile
114
+ - lib/true_url.rb
115
+ - lib/true_url/context.rb
116
+ - lib/true_url/strategy.rb
117
+ - lib/true_url/strategy/dailymotion.rb
118
+ - lib/true_url/strategy/nicovideo.rb
119
+ - lib/true_url/strategy/twitter.rb
120
+ - lib/true_url/strategy/vimeo.rb
121
+ - lib/true_url/strategy/youtube.rb
122
+ - lib/true_url/version.rb
123
+ - spec/spec_helper.rb
124
+ - spec/true_url_spec.rb
125
+ - true_url.gemspec
126
+ homepage: http://github.com/armchairtheorist/true_url
127
+ licenses:
128
+ - MIT
129
+ metadata: {}
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 2.5.1
147
+ signing_key:
148
+ specification_version: 4
149
+ summary: A multi-strategy approach to find the absolutely cleanest and most likely
150
+ canonical URL of any given URL.
151
+ test_files: []