true_url 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bba8a863389ecda1e8c6de716c3243d68e22af78
4
- data.tar.gz: 620de279d14cefa673b61b9024b903c1131452d4
3
+ metadata.gz: fe4a8259bda1a0227d7c85cebd88661f208bd4ce
4
+ data.tar.gz: 14b26af3018b5d04472c06a0cbb6a925cf06332a
5
5
  SHA512:
6
- metadata.gz: db04004b8be8e1071fb05cd5c6f39406f22023d1f46bb2ece6f6f9d5f01a33e056f5182e68d76148a9f6ac077c85f68ab494f1982744459cb9de57113ff85910
7
- data.tar.gz: 54dafb4f8b4656cde6318bbff7f3f656ce869df3efe510b38b59ac96c59c7788b3bc65ed5a4293e6749c6fabaea199faa1a8ef5e60918f2e638a4453b336793b
6
+ metadata.gz: b6a37a1a3087e12d242949196983b3d65da44c76f7c82c17f7e1e3058c28cc398b0ed87dbd80a8d2ada9895c22490e04a8bef1d3aeae3be0031a950bf064cfe3
7
+ data.tar.gz: 6770fdfb57aff9f0a471986bc61e980c6a15400f0ec15b841c325f0def08202c24f6b00da25d9ceb7c21082a21587a11d2115fe9629bb65c96e6f40eb0f04653
@@ -0,0 +1,38 @@
1
+ require 'http'
2
+ require 'nokogiri'
3
+
4
+ class TrueURL
5
+ module Fetch
6
+ extend self
7
+
8
+ def execute(context)
9
+ starting_url = context.working_url
10
+
11
+ response = HTTP.follow
12
+ .get(starting_url)
13
+
14
+ canonical_url = find_canonical_header(response.headers) || find_canonical_url(response.to_s) || response.uri
15
+ context.set_working_url(canonical_url, starting_url)
16
+ end
17
+
18
+ def find_canonical_header(headers)
19
+ return if headers['Link'].nil?
20
+
21
+ links = headers['Link'].is_a?(String) ? [headers['Link']] : headers['Link']
22
+ links.each { |link| return link.split(/[<>;]/)[1] if link.end_with?('rel="canonical"') }
23
+ nil
24
+ end
25
+
26
+ def find_canonical_url(html)
27
+ doc = Nokogiri::HTML(html)
28
+
29
+ elem = doc.at('link[rel="canonical"]')
30
+ canonical_url = elem['href'] unless elem.nil?
31
+
32
+ elem = doc.at('meta[property="og:url"]')
33
+ og_url = elem['content'] unless elem.nil?
34
+
35
+ canonical_url || og_url
36
+ end
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  class TrueURL
2
- VERSION = '0.0.3'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: true_url
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Wong
@@ -116,6 +116,7 @@ files:
116
116
  - Rakefile
117
117
  - lib/true_url.rb
118
118
  - lib/true_url/context.rb
119
+ - lib/true_url/fetch.rb
119
120
  - lib/true_url/strategy.rb
120
121
  - lib/true_url/strategy/dailymotion.rb
121
122
  - lib/true_url/strategy/nicovideo.rb