true_url 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/true_url/fetch.rb +38 -0
- data/lib/true_url/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe4a8259bda1a0227d7c85cebd88661f208bd4ce
|
4
|
+
data.tar.gz: 14b26af3018b5d04472c06a0cbb6a925cf06332a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b6a37a1a3087e12d242949196983b3d65da44c76f7c82c17f7e1e3058c28cc398b0ed87dbd80a8d2ada9895c22490e04a8bef1d3aeae3be0031a950bf064cfe3
|
7
|
+
data.tar.gz: 6770fdfb57aff9f0a471986bc61e980c6a15400f0ec15b841c325f0def08202c24f6b00da25d9ceb7c21082a21587a11d2115fe9629bb65c96e6f40eb0f04653
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'http'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class TrueURL
|
5
|
+
module Fetch
|
6
|
+
extend self
|
7
|
+
|
8
|
+
def execute(context)
|
9
|
+
starting_url = context.working_url
|
10
|
+
|
11
|
+
response = HTTP.follow
|
12
|
+
.get(starting_url)
|
13
|
+
|
14
|
+
canonical_url = find_canonical_header(response.headers) || find_canonical_url(response.to_s) || response.uri
|
15
|
+
context.set_working_url(canonical_url, starting_url)
|
16
|
+
end
|
17
|
+
|
18
|
+
def find_canonical_header(headers)
|
19
|
+
return if headers['Link'].nil?
|
20
|
+
|
21
|
+
links = headers['Link'].is_a?(String) ? [headers['Link']] : headers['Link']
|
22
|
+
links.each { |link| return link.split(/[<>;]/)[1] if link.end_with?('rel="canonical"') }
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def find_canonical_url(html)
|
27
|
+
doc = Nokogiri::HTML(html)
|
28
|
+
|
29
|
+
elem = doc.at('link[rel="canonical"]')
|
30
|
+
canonical_url = elem['href'] unless elem.nil?
|
31
|
+
|
32
|
+
elem = doc.at('meta[property="og:url"]')
|
33
|
+
og_url = elem['content'] unless elem.nil?
|
34
|
+
|
35
|
+
canonical_url || og_url
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/true_url/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: true_url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Wong
|
@@ -116,6 +116,7 @@ files:
|
|
116
116
|
- Rakefile
|
117
117
|
- lib/true_url.rb
|
118
118
|
- lib/true_url/context.rb
|
119
|
+
- lib/true_url/fetch.rb
|
119
120
|
- lib/true_url/strategy.rb
|
120
121
|
- lib/true_url/strategy/dailymotion.rb
|
121
122
|
- lib/true_url/strategy/nicovideo.rb
|