raev 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/Gemfile.lock +0 -2
- data/README.md +4 -11
- data/VERSION +1 -1
- data/lib/raev/url.rb +27 -26
- data/lib/raev.rb +0 -3
- data/raev.gemspec +3 -6
- data/test/test_url.rb +11 -20
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e99b301f4885c807426b83c31d4d9c5404c763e
|
4
|
+
data.tar.gz: cd0e1a7a522efdda1869c704625aa2921227696e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e33460b3be3e68a48e6cec8d24e69f70595490959addc494e657ef12fb10f8085bd5dec51bc9c74d257e0ca12c5645bd6beca15c58dcb50f8b752dd81ae3eb27
|
7
|
+
data.tar.gz: bd0ec76ed70319849b758d4250a5dc3ad65f453e3138e0ffeedb074c8f704b756ff12be57c5670048dbb8f31ea59c62db59014699cc358ee6b8d2803aa7a0a5e
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -60,7 +60,6 @@ GEM
|
|
60
60
|
rack (2.0.3)
|
61
61
|
rake (12.1.0)
|
62
62
|
rdoc (5.1.0)
|
63
|
-
redirect_follower (0.1.1)
|
64
63
|
sanitize (4.5.0)
|
65
64
|
crass (~> 1.0.2)
|
66
65
|
nokogiri (>= 1.4.4)
|
@@ -87,7 +86,6 @@ DEPENDENCIES
|
|
87
86
|
jeweler (= 2.3.7)
|
88
87
|
json (>= 2.1.0)
|
89
88
|
nokogiri (>= 1.8.0)
|
90
|
-
redirect_follower (>= 0.1.1)
|
91
89
|
sanitize (>= 2.1.0)
|
92
90
|
shoulda
|
93
91
|
test-unit (= 3.2.4)
|
data/README.md
CHANGED
@@ -22,31 +22,24 @@ Usage
|
|
22
22
|
Get the domain name from an url without the `www.` subdomain.
|
23
23
|
|
24
24
|
```ruby
|
25
|
-
Raev.
|
25
|
+
Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
|
26
26
|
# => "indiegames.com"
|
27
27
|
```
|
28
28
|
|
29
29
|
Remove UTM analytics parameters from an url.
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
Raev.
|
33
|
-
# => "http://
|
32
|
+
Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
33
|
+
# => "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change"
|
34
34
|
```
|
35
35
|
|
36
36
|
Resolve a shortened or proxied url.
|
37
37
|
|
38
38
|
```ruby
|
39
|
-
Raev.url("http://sbn.to/WRgXfl").
|
39
|
+
Raev.url("http://sbn.to/WRgXfl").url
|
40
40
|
# => "http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers"
|
41
41
|
```
|
42
42
|
|
43
|
-
Resolve a shortend or proxied url and remove UTM analytics parameters.
|
44
|
-
|
45
|
-
```ruby
|
46
|
-
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/").resolved_and_clean
|
47
|
-
# => "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/"
|
48
|
-
```
|
49
|
-
|
50
43
|
Fetch Twitter handle from url.
|
51
44
|
|
52
45
|
```ruby
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/lib/raev/url.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "chronic"
|
2
2
|
require "json"
|
3
3
|
require "sanitize"
|
4
|
+
require 'net/http'
|
4
5
|
|
5
6
|
module Raev
|
6
7
|
|
@@ -33,48 +34,33 @@ module Raev
|
|
33
34
|
REGEX_PAGE_TITLE = / +/
|
34
35
|
|
35
36
|
attr_reader :url
|
37
|
+
attr_reader :body
|
36
38
|
attr_reader :doc
|
37
39
|
|
38
40
|
def initialize(url)
|
39
|
-
|
41
|
+
fetch(url)
|
42
|
+
@url = Url.remove_utm(@url)
|
40
43
|
@doc = nil
|
41
44
|
@linked_data = nil
|
42
45
|
end
|
43
46
|
|
44
|
-
def base
|
45
|
-
base_url =
|
47
|
+
def self.base(url)
|
48
|
+
base_url = url.split('/'.freeze)[2]
|
46
49
|
base_url.gsub!('www.'.freeze, ''.freeze) unless base_url.nil?
|
47
50
|
base_url
|
48
51
|
end
|
49
52
|
|
50
|
-
def
|
51
|
-
unless
|
52
|
-
utm_index =
|
53
|
+
def self.remove_utm(url)
|
54
|
+
unless url.nil?
|
55
|
+
utm_index = url.index(REGEX_UTM)
|
53
56
|
unless(utm_index.nil?)
|
54
|
-
|
57
|
+
url = url.slice(0, utm_index)
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
58
|
-
|
61
|
+
url
|
59
62
|
end
|
60
63
|
|
61
|
-
def resolved
|
62
|
-
unless @url.nil?
|
63
|
-
begin
|
64
|
-
return RedirectFollower(@url, 5)
|
65
|
-
rescue => ex
|
66
|
-
puts "Could not resolve #{@url}. #{ex.class}: #{ex.message}"
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
@url
|
71
|
-
end
|
72
|
-
|
73
|
-
def resolved_and_clean
|
74
|
-
resolved_url = Url.new(self.resolved)
|
75
|
-
resolved_url.clean
|
76
|
-
end
|
77
|
-
|
78
64
|
def without_http
|
79
65
|
@url.sub("http://".freeze, "".freeze)
|
80
66
|
end
|
@@ -232,7 +218,7 @@ module Raev
|
|
232
218
|
|
233
219
|
def document
|
234
220
|
if @doc.nil?
|
235
|
-
@doc = Nokogiri::HTML(
|
221
|
+
@doc = Nokogiri::HTML(@body)
|
236
222
|
end
|
237
223
|
|
238
224
|
@doc
|
@@ -252,5 +238,20 @@ module Raev
|
|
252
238
|
@linked_data
|
253
239
|
end
|
254
240
|
|
241
|
+
def fetch(uri_str, limit = 10)
|
242
|
+
raise ArgumentError, 'too many HTTP redirects' if limit == 0
|
243
|
+
|
244
|
+
response = Net::HTTP.get_response(URI(uri_str))
|
245
|
+
|
246
|
+
case response
|
247
|
+
when Net::HTTPSuccess then
|
248
|
+
@url = uri_str
|
249
|
+
@body = response.body
|
250
|
+
when Net::HTTPRedirection then
|
251
|
+
fetch(response['location'], limit - 1)
|
252
|
+
else
|
253
|
+
# TODO handle Not Found
|
254
|
+
end
|
255
|
+
end
|
255
256
|
end
|
256
257
|
end
|
data/lib/raev.rb
CHANGED
data/raev.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: raev 0.
|
5
|
+
# stub: raev 0.3.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "raev".freeze
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.3.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Andreas Zecher".freeze]
|
14
|
-
s.date = "2017-09-
|
14
|
+
s.date = "2017-09-19"
|
15
15
|
s.description = "Fetch, parse and normalize meta data from websites.".freeze
|
16
16
|
s.email = "andreas@madebypixelate.com".freeze
|
17
17
|
s.extra_rdoc_files = [
|
@@ -48,7 +48,6 @@ Gem::Specification.new do |s|
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
49
|
s.add_runtime_dependency(%q<json>.freeze, [">= 2.1.0"])
|
50
50
|
s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
51
|
-
s.add_runtime_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
52
51
|
s.add_runtime_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
53
52
|
s.add_runtime_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
54
53
|
s.add_development_dependency(%q<shoulda>.freeze, [">= 0"])
|
@@ -58,7 +57,6 @@ Gem::Specification.new do |s|
|
|
58
57
|
else
|
59
58
|
s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
|
60
59
|
s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
61
|
-
s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
62
60
|
s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
63
61
|
s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
64
62
|
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
@@ -69,7 +67,6 @@ Gem::Specification.new do |s|
|
|
69
67
|
else
|
70
68
|
s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
|
71
69
|
s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
72
|
-
s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
73
70
|
s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
74
71
|
s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
75
72
|
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
data/test/test_url.rb
CHANGED
@@ -5,29 +5,20 @@ require 'raev'
|
|
5
5
|
|
6
6
|
class UrlTest < Test::Unit::TestCase
|
7
7
|
should "parse base url" do
|
8
|
-
|
9
|
-
assert_equal url.base, "indiegames.com"
|
8
|
+
assert_equal Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html"), "indiegames.com"
|
10
9
|
end
|
11
10
|
|
12
11
|
should "clean url" do
|
13
|
-
|
14
|
-
assert_equal "http://
|
15
|
-
|
16
|
-
url = Raev.url("http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
17
|
-
assert_equal "http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937", url.clean
|
18
|
-
|
19
|
-
url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
|
20
|
-
assert_equal "http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.clean
|
12
|
+
cleaned_url = Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
13
|
+
assert_equal "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change", cleaned_url
|
21
14
|
end
|
22
|
-
|
23
|
-
should "resolve url" do
|
24
|
-
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
25
|
-
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+fingergaming+%28FingerGaming%29", url.resolved
|
26
|
-
end
|
27
|
-
|
28
|
-
should "resolve and clean url" do
|
15
|
+
|
16
|
+
should "resolve url" do
|
29
17
|
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
30
|
-
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.
|
18
|
+
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.url
|
19
|
+
|
20
|
+
url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
|
21
|
+
assert_equal "https://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.url
|
31
22
|
end
|
32
23
|
|
33
24
|
should "get twitter handle" do
|
@@ -36,8 +27,8 @@ class UrlTest < Test::Unit::TestCase
|
|
36
27
|
end
|
37
28
|
|
38
29
|
should "get rss feed" do
|
39
|
-
url = Raev.url("
|
40
|
-
assert_equal "
|
30
|
+
url = Raev.url("https://www.polygon.com")
|
31
|
+
assert_equal "https://www.polygon.com/rss/index.xml", url.feed
|
41
32
|
|
42
33
|
url = Raev.url("http://arstechnica.com")
|
43
34
|
assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raev
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andreas Zecher
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.8.0
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: redirect_follower
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.1
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.1
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: sanitize
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|