raev 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/Gemfile.lock +0 -2
- data/README.md +4 -11
- data/VERSION +1 -1
- data/lib/raev/url.rb +27 -26
- data/lib/raev.rb +0 -3
- data/raev.gemspec +3 -6
- data/test/test_url.rb +11 -20
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e99b301f4885c807426b83c31d4d9c5404c763e
|
4
|
+
data.tar.gz: cd0e1a7a522efdda1869c704625aa2921227696e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e33460b3be3e68a48e6cec8d24e69f70595490959addc494e657ef12fb10f8085bd5dec51bc9c74d257e0ca12c5645bd6beca15c58dcb50f8b752dd81ae3eb27
|
7
|
+
data.tar.gz: bd0ec76ed70319849b758d4250a5dc3ad65f453e3138e0ffeedb074c8f704b756ff12be57c5670048dbb8f31ea59c62db59014699cc358ee6b8d2803aa7a0a5e
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -60,7 +60,6 @@ GEM
|
|
60
60
|
rack (2.0.3)
|
61
61
|
rake (12.1.0)
|
62
62
|
rdoc (5.1.0)
|
63
|
-
redirect_follower (0.1.1)
|
64
63
|
sanitize (4.5.0)
|
65
64
|
crass (~> 1.0.2)
|
66
65
|
nokogiri (>= 1.4.4)
|
@@ -87,7 +86,6 @@ DEPENDENCIES
|
|
87
86
|
jeweler (= 2.3.7)
|
88
87
|
json (>= 2.1.0)
|
89
88
|
nokogiri (>= 1.8.0)
|
90
|
-
redirect_follower (>= 0.1.1)
|
91
89
|
sanitize (>= 2.1.0)
|
92
90
|
shoulda
|
93
91
|
test-unit (= 3.2.4)
|
data/README.md
CHANGED
@@ -22,31 +22,24 @@ Usage
|
|
22
22
|
Get the domain name from an url without the `www.` subdomain.
|
23
23
|
|
24
24
|
```ruby
|
25
|
-
Raev.
|
25
|
+
Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
|
26
26
|
# => "indiegames.com"
|
27
27
|
```
|
28
28
|
|
29
29
|
Remove UTM analytics parameters from an url.
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
Raev.
|
33
|
-
# => "http://
|
32
|
+
Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
33
|
+
# => "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change"
|
34
34
|
```
|
35
35
|
|
36
36
|
Resolve a shortened or proxied url.
|
37
37
|
|
38
38
|
```ruby
|
39
|
-
Raev.url("http://sbn.to/WRgXfl").
|
39
|
+
Raev.url("http://sbn.to/WRgXfl").url
|
40
40
|
# => "http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers"
|
41
41
|
```
|
42
42
|
|
43
|
-
Resolve a shortend or proxied url and remove UTM analytics parameters.
|
44
|
-
|
45
|
-
```ruby
|
46
|
-
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/").resolved_and_clean
|
47
|
-
# => "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/"
|
48
|
-
```
|
49
|
-
|
50
43
|
Fetch Twitter handle from url.
|
51
44
|
|
52
45
|
```ruby
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/lib/raev/url.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "chronic"
|
2
2
|
require "json"
|
3
3
|
require "sanitize"
|
4
|
+
require 'net/http'
|
4
5
|
|
5
6
|
module Raev
|
6
7
|
|
@@ -33,48 +34,33 @@ module Raev
|
|
33
34
|
REGEX_PAGE_TITLE = / +/
|
34
35
|
|
35
36
|
attr_reader :url
|
37
|
+
attr_reader :body
|
36
38
|
attr_reader :doc
|
37
39
|
|
38
40
|
def initialize(url)
|
39
|
-
|
41
|
+
fetch(url)
|
42
|
+
@url = Url.remove_utm(@url)
|
40
43
|
@doc = nil
|
41
44
|
@linked_data = nil
|
42
45
|
end
|
43
46
|
|
44
|
-
def base
|
45
|
-
base_url =
|
47
|
+
def self.base(url)
|
48
|
+
base_url = url.split('/'.freeze)[2]
|
46
49
|
base_url.gsub!('www.'.freeze, ''.freeze) unless base_url.nil?
|
47
50
|
base_url
|
48
51
|
end
|
49
52
|
|
50
|
-
def
|
51
|
-
unless
|
52
|
-
utm_index =
|
53
|
+
def self.remove_utm(url)
|
54
|
+
unless url.nil?
|
55
|
+
utm_index = url.index(REGEX_UTM)
|
53
56
|
unless(utm_index.nil?)
|
54
|
-
|
57
|
+
url = url.slice(0, utm_index)
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
58
|
-
|
61
|
+
url
|
59
62
|
end
|
60
63
|
|
61
|
-
def resolved
|
62
|
-
unless @url.nil?
|
63
|
-
begin
|
64
|
-
return RedirectFollower(@url, 5)
|
65
|
-
rescue => ex
|
66
|
-
puts "Could not resolve #{@url}. #{ex.class}: #{ex.message}"
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
@url
|
71
|
-
end
|
72
|
-
|
73
|
-
def resolved_and_clean
|
74
|
-
resolved_url = Url.new(self.resolved)
|
75
|
-
resolved_url.clean
|
76
|
-
end
|
77
|
-
|
78
64
|
def without_http
|
79
65
|
@url.sub("http://".freeze, "".freeze)
|
80
66
|
end
|
@@ -232,7 +218,7 @@ module Raev
|
|
232
218
|
|
233
219
|
def document
|
234
220
|
if @doc.nil?
|
235
|
-
@doc = Nokogiri::HTML(
|
221
|
+
@doc = Nokogiri::HTML(@body)
|
236
222
|
end
|
237
223
|
|
238
224
|
@doc
|
@@ -252,5 +238,20 @@ module Raev
|
|
252
238
|
@linked_data
|
253
239
|
end
|
254
240
|
|
241
|
+
def fetch(uri_str, limit = 10)
|
242
|
+
raise ArgumentError, 'too many HTTP redirects' if limit == 0
|
243
|
+
|
244
|
+
response = Net::HTTP.get_response(URI(uri_str))
|
245
|
+
|
246
|
+
case response
|
247
|
+
when Net::HTTPSuccess then
|
248
|
+
@url = uri_str
|
249
|
+
@body = response.body
|
250
|
+
when Net::HTTPRedirection then
|
251
|
+
fetch(response['location'], limit - 1)
|
252
|
+
else
|
253
|
+
# TODO handle Not Found
|
254
|
+
end
|
255
|
+
end
|
255
256
|
end
|
256
257
|
end
|
data/lib/raev.rb
CHANGED
data/raev.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: raev 0.
|
5
|
+
# stub: raev 0.3.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "raev".freeze
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.3.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Andreas Zecher".freeze]
|
14
|
-
s.date = "2017-09-
|
14
|
+
s.date = "2017-09-19"
|
15
15
|
s.description = "Fetch, parse and normalize meta data from websites.".freeze
|
16
16
|
s.email = "andreas@madebypixelate.com".freeze
|
17
17
|
s.extra_rdoc_files = [
|
@@ -48,7 +48,6 @@ Gem::Specification.new do |s|
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
49
|
s.add_runtime_dependency(%q<json>.freeze, [">= 2.1.0"])
|
50
50
|
s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
51
|
-
s.add_runtime_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
52
51
|
s.add_runtime_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
53
52
|
s.add_runtime_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
54
53
|
s.add_development_dependency(%q<shoulda>.freeze, [">= 0"])
|
@@ -58,7 +57,6 @@ Gem::Specification.new do |s|
|
|
58
57
|
else
|
59
58
|
s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
|
60
59
|
s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
61
|
-
s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
62
60
|
s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
63
61
|
s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
64
62
|
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
@@ -69,7 +67,6 @@ Gem::Specification.new do |s|
|
|
69
67
|
else
|
70
68
|
s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
|
71
69
|
s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
72
|
-
s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
73
70
|
s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
74
71
|
s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
75
72
|
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
data/test/test_url.rb
CHANGED
@@ -5,29 +5,20 @@ require 'raev'
|
|
5
5
|
|
6
6
|
class UrlTest < Test::Unit::TestCase
|
7
7
|
should "parse base url" do
|
8
|
-
|
9
|
-
assert_equal url.base, "indiegames.com"
|
8
|
+
assert_equal Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html"), "indiegames.com"
|
10
9
|
end
|
11
10
|
|
12
11
|
should "clean url" do
|
13
|
-
|
14
|
-
assert_equal "http://
|
15
|
-
|
16
|
-
url = Raev.url("http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
17
|
-
assert_equal "http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937", url.clean
|
18
|
-
|
19
|
-
url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
|
20
|
-
assert_equal "http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.clean
|
12
|
+
cleaned_url = Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
13
|
+
assert_equal "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change", cleaned_url
|
21
14
|
end
|
22
|
-
|
23
|
-
should "resolve url" do
|
24
|
-
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
25
|
-
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+fingergaming+%28FingerGaming%29", url.resolved
|
26
|
-
end
|
27
|
-
|
28
|
-
should "resolve and clean url" do
|
15
|
+
|
16
|
+
should "resolve url" do
|
29
17
|
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
30
|
-
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.
|
18
|
+
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.url
|
19
|
+
|
20
|
+
url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
|
21
|
+
assert_equal "https://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.url
|
31
22
|
end
|
32
23
|
|
33
24
|
should "get twitter handle" do
|
@@ -36,8 +27,8 @@ class UrlTest < Test::Unit::TestCase
|
|
36
27
|
end
|
37
28
|
|
38
29
|
should "get rss feed" do
|
39
|
-
url = Raev.url("
|
40
|
-
assert_equal "
|
30
|
+
url = Raev.url("https://www.polygon.com")
|
31
|
+
assert_equal "https://www.polygon.com/rss/index.xml", url.feed
|
41
32
|
|
42
33
|
url = Raev.url("http://arstechnica.com")
|
43
34
|
assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raev
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andreas Zecher
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.8.0
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: redirect_follower
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.1
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.1
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: sanitize
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|