raev 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e3c9accf41425df615fb4f1b864fc8075fe0940b
4
- data.tar.gz: d8c643c647b0ef2d46a02c23003a9d266ed1818e
3
+ metadata.gz: 0e99b301f4885c807426b83c31d4d9c5404c763e
4
+ data.tar.gz: cd0e1a7a522efdda1869c704625aa2921227696e
5
5
  SHA512:
6
- metadata.gz: a107b4380e62a65f2ce83c1477a16f8f083d2bc32e4665fe63e74838f797af94c8e52292601aa7414e88da686ca6e78238c65e228d123ef1e6b38890c4c93eac
7
- data.tar.gz: e2d2964362e7d42de36f44f0feb4b8265d418faa0245eeb2e7d47fb80a433f45993cd176b54ba52b1e0a2f4d7f9cc2c14d54fe88cfffe7a35d8b264afc6a1446
6
+ metadata.gz: e33460b3be3e68a48e6cec8d24e69f70595490959addc494e657ef12fb10f8085bd5dec51bc9c74d257e0ca12c5645bd6beca15c58dcb50f8b752dd81ae3eb27
7
+ data.tar.gz: bd0ec76ed70319849b758d4250a5dc3ad65f453e3138e0ffeedb074c8f704b756ff12be57c5670048dbb8f31ea59c62db59014699cc358ee6b8d2803aa7a0a5e
data/Gemfile CHANGED
@@ -2,7 +2,6 @@ source "http://rubygems.org"
2
2
 
3
3
  gem "json", '>= 2.1.0'
4
4
  gem "nokogiri", ">= 1.8.0"
5
- gem "redirect_follower", ">= 0.1.1"
6
5
  gem "sanitize", ">= 2.1.0"
7
6
  gem "chronic", ">=0.10.2"
8
7
 
data/Gemfile.lock CHANGED
@@ -60,7 +60,6 @@ GEM
60
60
  rack (2.0.3)
61
61
  rake (12.1.0)
62
62
  rdoc (5.1.0)
63
- redirect_follower (0.1.1)
64
63
  sanitize (4.5.0)
65
64
  crass (~> 1.0.2)
66
65
  nokogiri (>= 1.4.4)
@@ -87,7 +86,6 @@ DEPENDENCIES
87
86
  jeweler (= 2.3.7)
88
87
  json (>= 2.1.0)
89
88
  nokogiri (>= 1.8.0)
90
- redirect_follower (>= 0.1.1)
91
89
  sanitize (>= 2.1.0)
92
90
  shoulda
93
91
  test-unit (= 3.2.4)
data/README.md CHANGED
@@ -22,31 +22,24 @@ Usage
22
22
  Get the domain name from an url without the `www.` subdomain.
23
23
 
24
24
  ```ruby
25
- Raev.url("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html").base
25
+ Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
26
26
  # => "indiegames.com"
27
27
  ```
28
28
 
29
29
  Remove UTM analytics parameters from an url.
30
30
 
31
31
  ```ruby
32
- Raev.url("http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html?utm_campaign=touch_lab_bot&utm_medium=twitter&utm_source=am6_feedtweet").clean
33
- # => "http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html"
32
+ Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
33
+ # => "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change"
34
34
  ```
35
35
 
36
36
  Resolve a shortened or proxied url.
37
37
 
38
38
  ```ruby
39
- Raev.url("http://sbn.to/WRgXfl").resolved
39
+ Raev.url("http://sbn.to/WRgXfl").url
40
40
  # => "http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers"
41
41
  ```
42
42
 
43
- Resolve a shortend or proxied url and remove UTM analytics parameters.
44
-
45
- ```ruby
46
- url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/").resolved_and_clean
47
- # => "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/"
48
- ```
49
-
50
43
  Fetch Twitter handle from url.
51
44
 
52
45
  ```ruby
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.3.0
data/lib/raev/url.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "chronic"
2
2
  require "json"
3
3
  require "sanitize"
4
+ require 'net/http'
4
5
 
5
6
  module Raev
6
7
 
@@ -33,48 +34,33 @@ module Raev
33
34
  REGEX_PAGE_TITLE = / +/
34
35
 
35
36
  attr_reader :url
37
+ attr_reader :body
36
38
  attr_reader :doc
37
39
 
38
40
  def initialize(url)
39
- @url = url
41
+ fetch(url)
42
+ @url = Url.remove_utm(@url)
40
43
  @doc = nil
41
44
  @linked_data = nil
42
45
  end
43
46
 
44
- def base
45
- base_url = @url.split('/'.freeze)[2]
47
+ def self.base(url)
48
+ base_url = url.split('/'.freeze)[2]
46
49
  base_url.gsub!('www.'.freeze, ''.freeze) unless base_url.nil?
47
50
  base_url
48
51
  end
49
52
 
50
- def clean
51
- unless @url.nil?
52
- utm_index = @url.index(REGEX_UTM)
53
+ def self.remove_utm(url)
54
+ unless url.nil?
55
+ utm_index = url.index(REGEX_UTM)
53
56
  unless(utm_index.nil?)
54
- return url.slice(0, utm_index)
57
+ url = url.slice(0, utm_index)
55
58
  end
56
59
  end
57
60
 
58
- @url
61
+ url
59
62
  end
60
63
 
61
- def resolved
62
- unless @url.nil?
63
- begin
64
- return RedirectFollower(@url, 5)
65
- rescue => ex
66
- puts "Could not resolve #{@url}. #{ex.class}: #{ex.message}"
67
- end
68
- end
69
-
70
- @url
71
- end
72
-
73
- def resolved_and_clean
74
- resolved_url = Url.new(self.resolved)
75
- resolved_url.clean
76
- end
77
-
78
64
  def without_http
79
65
  @url.sub("http://".freeze, "".freeze)
80
66
  end
@@ -232,7 +218,7 @@ module Raev
232
218
 
233
219
  def document
234
220
  if @doc.nil?
235
- @doc = Nokogiri::HTML(open(@url))
221
+ @doc = Nokogiri::HTML(@body)
236
222
  end
237
223
 
238
224
  @doc
@@ -252,5 +238,20 @@ module Raev
252
238
  @linked_data
253
239
  end
254
240
 
241
+ def fetch(uri_str, limit = 10)
242
+ raise ArgumentError, 'too many HTTP redirects' if limit == 0
243
+
244
+ response = Net::HTTP.get_response(URI(uri_str))
245
+
246
+ case response
247
+ when Net::HTTPSuccess then
248
+ @url = uri_str
249
+ @body = response.body
250
+ when Net::HTTPRedirection then
251
+ fetch(response['location'], limit - 1)
252
+ else
253
+ # TODO handle Not Found
254
+ end
255
+ end
255
256
  end
256
257
  end
data/lib/raev.rb CHANGED
@@ -1,7 +1,4 @@
1
1
  require 'nokogiri'
2
- require 'redirect_follower'
3
- require 'open-uri'
4
-
5
2
  require 'raev/article'
6
3
  require 'raev/author'
7
4
  require 'raev/parser'
data/raev.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: raev 0.2.4 ruby lib
5
+ # stub: raev 0.3.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "raev".freeze
9
- s.version = "0.2.4"
9
+ s.version = "0.3.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Andreas Zecher".freeze]
14
- s.date = "2017-09-18"
14
+ s.date = "2017-09-19"
15
15
  s.description = "Fetch, parse and normalize meta data from websites.".freeze
16
16
  s.email = "andreas@madebypixelate.com".freeze
17
17
  s.extra_rdoc_files = [
@@ -48,7 +48,6 @@ Gem::Specification.new do |s|
48
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
49
  s.add_runtime_dependency(%q<json>.freeze, [">= 2.1.0"])
50
50
  s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
51
- s.add_runtime_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
52
51
  s.add_runtime_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
53
52
  s.add_runtime_dependency(%q<chronic>.freeze, [">= 0.10.2"])
54
53
  s.add_development_dependency(%q<shoulda>.freeze, [">= 0"])
@@ -58,7 +57,6 @@ Gem::Specification.new do |s|
58
57
  else
59
58
  s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
60
59
  s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
61
- s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
62
60
  s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
63
61
  s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
64
62
  s.add_dependency(%q<shoulda>.freeze, [">= 0"])
@@ -69,7 +67,6 @@ Gem::Specification.new do |s|
69
67
  else
70
68
  s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
71
69
  s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
72
- s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
73
70
  s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
74
71
  s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
75
72
  s.add_dependency(%q<shoulda>.freeze, [">= 0"])
data/test/test_url.rb CHANGED
@@ -5,29 +5,20 @@ require 'raev'
5
5
 
6
6
  class UrlTest < Test::Unit::TestCase
7
7
  should "parse base url" do
8
- url = Raev.url("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
9
- assert_equal url.base, "indiegames.com"
8
+ assert_equal Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html"), "indiegames.com"
10
9
  end
11
10
 
12
11
  should "clean url" do
13
- url = Raev.url("http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html?utm_campaign=touch_lab_bot&utm_medium=twitter&utm_source=am6_feedtweet")
14
- assert_equal "http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html", url.clean
15
-
16
- url = Raev.url("http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
17
- assert_equal "http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937", url.clean
18
-
19
- url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
20
- assert_equal "http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.clean
12
+ cleaned_url = Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
13
+ assert_equal "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change", cleaned_url
21
14
  end
22
-
23
- should "resolve url" do
24
- url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
25
- assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+fingergaming+%28FingerGaming%29", url.resolved
26
- end
27
-
28
- should "resolve and clean url" do
15
+
16
+ should "resolve url" do
29
17
  url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
30
- assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.resolved_and_clean
18
+ assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.url
19
+
20
+ url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
21
+ assert_equal "https://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.url
31
22
  end
32
23
 
33
24
  should "get twitter handle" do
@@ -36,8 +27,8 @@ class UrlTest < Test::Unit::TestCase
36
27
  end
37
28
 
38
29
  should "get rss feed" do
39
- url = Raev.url("http://www.polygon.com")
40
- assert_equal "http://www.polygon.com/rss/index.xml", url.feed
30
+ url = Raev.url("https://www.polygon.com")
31
+ assert_equal "https://www.polygon.com/rss/index.xml", url.feed
41
32
 
42
33
  url = Raev.url("http://arstechnica.com")
43
34
  assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: raev
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andreas Zecher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-18 00:00:00.000000000 Z
11
+ date: 2017-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.8.0
41
- - !ruby/object:Gem::Dependency
42
- name: redirect_follower
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: 0.1.1
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: 0.1.1
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: sanitize
57
43
  requirement: !ruby/object:Gem::Requirement