raev 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e3c9accf41425df615fb4f1b864fc8075fe0940b
4
- data.tar.gz: d8c643c647b0ef2d46a02c23003a9d266ed1818e
3
+ metadata.gz: 0e99b301f4885c807426b83c31d4d9c5404c763e
4
+ data.tar.gz: cd0e1a7a522efdda1869c704625aa2921227696e
5
5
  SHA512:
6
- metadata.gz: a107b4380e62a65f2ce83c1477a16f8f083d2bc32e4665fe63e74838f797af94c8e52292601aa7414e88da686ca6e78238c65e228d123ef1e6b38890c4c93eac
7
- data.tar.gz: e2d2964362e7d42de36f44f0feb4b8265d418faa0245eeb2e7d47fb80a433f45993cd176b54ba52b1e0a2f4d7f9cc2c14d54fe88cfffe7a35d8b264afc6a1446
6
+ metadata.gz: e33460b3be3e68a48e6cec8d24e69f70595490959addc494e657ef12fb10f8085bd5dec51bc9c74d257e0ca12c5645bd6beca15c58dcb50f8b752dd81ae3eb27
7
+ data.tar.gz: bd0ec76ed70319849b758d4250a5dc3ad65f453e3138e0ffeedb074c8f704b756ff12be57c5670048dbb8f31ea59c62db59014699cc358ee6b8d2803aa7a0a5e
data/Gemfile CHANGED
@@ -2,7 +2,6 @@ source "http://rubygems.org"
2
2
 
3
3
  gem "json", '>= 2.1.0'
4
4
  gem "nokogiri", ">= 1.8.0"
5
- gem "redirect_follower", ">= 0.1.1"
6
5
  gem "sanitize", ">= 2.1.0"
7
6
  gem "chronic", ">=0.10.2"
8
7
 
data/Gemfile.lock CHANGED
@@ -60,7 +60,6 @@ GEM
60
60
  rack (2.0.3)
61
61
  rake (12.1.0)
62
62
  rdoc (5.1.0)
63
- redirect_follower (0.1.1)
64
63
  sanitize (4.5.0)
65
64
  crass (~> 1.0.2)
66
65
  nokogiri (>= 1.4.4)
@@ -87,7 +86,6 @@ DEPENDENCIES
87
86
  jeweler (= 2.3.7)
88
87
  json (>= 2.1.0)
89
88
  nokogiri (>= 1.8.0)
90
- redirect_follower (>= 0.1.1)
91
89
  sanitize (>= 2.1.0)
92
90
  shoulda
93
91
  test-unit (= 3.2.4)
data/README.md CHANGED
@@ -22,31 +22,24 @@ Usage
22
22
  Get the domain name from an url without the `www.` subdomain.
23
23
 
24
24
  ```ruby
25
- Raev.url("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html").base
25
+ Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
26
26
  # => "indiegames.com"
27
27
  ```
28
28
 
29
29
  Remove UTM analytics parameters from an url.
30
30
 
31
31
  ```ruby
32
- Raev.url("http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html?utm_campaign=touch_lab_bot&utm_medium=twitter&utm_source=am6_feedtweet").clean
33
- # => "http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html"
32
+ Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
33
+ # => "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change"
34
34
  ```
35
35
 
36
36
  Resolve a shortened or proxied url.
37
37
 
38
38
  ```ruby
39
- Raev.url("http://sbn.to/WRgXfl").resolved
39
+ Raev.url("http://sbn.to/WRgXfl").url
40
40
  # => "http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers"
41
41
  ```
42
42
 
43
- Resolve a shortend or proxied url and remove UTM analytics parameters.
44
-
45
- ```ruby
46
- url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/").resolved_and_clean
47
- # => "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/"
48
- ```
49
-
50
43
  Fetch Twitter handle from url.
51
44
 
52
45
  ```ruby
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.3.0
data/lib/raev/url.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "chronic"
2
2
  require "json"
3
3
  require "sanitize"
4
+ require 'net/http'
4
5
 
5
6
  module Raev
6
7
 
@@ -33,48 +34,33 @@ module Raev
33
34
  REGEX_PAGE_TITLE = / +/
34
35
 
35
36
  attr_reader :url
37
+ attr_reader :body
36
38
  attr_reader :doc
37
39
 
38
40
  def initialize(url)
39
- @url = url
41
+ fetch(url)
42
+ @url = Url.remove_utm(@url)
40
43
  @doc = nil
41
44
  @linked_data = nil
42
45
  end
43
46
 
44
- def base
45
- base_url = @url.split('/'.freeze)[2]
47
+ def self.base(url)
48
+ base_url = url.split('/'.freeze)[2]
46
49
  base_url.gsub!('www.'.freeze, ''.freeze) unless base_url.nil?
47
50
  base_url
48
51
  end
49
52
 
50
- def clean
51
- unless @url.nil?
52
- utm_index = @url.index(REGEX_UTM)
53
+ def self.remove_utm(url)
54
+ unless url.nil?
55
+ utm_index = url.index(REGEX_UTM)
53
56
  unless(utm_index.nil?)
54
- return url.slice(0, utm_index)
57
+ url = url.slice(0, utm_index)
55
58
  end
56
59
  end
57
60
 
58
- @url
61
+ url
59
62
  end
60
63
 
61
- def resolved
62
- unless @url.nil?
63
- begin
64
- return RedirectFollower(@url, 5)
65
- rescue => ex
66
- puts "Could not resolve #{@url}. #{ex.class}: #{ex.message}"
67
- end
68
- end
69
-
70
- @url
71
- end
72
-
73
- def resolved_and_clean
74
- resolved_url = Url.new(self.resolved)
75
- resolved_url.clean
76
- end
77
-
78
64
  def without_http
79
65
  @url.sub("http://".freeze, "".freeze)
80
66
  end
@@ -232,7 +218,7 @@ module Raev
232
218
 
233
219
  def document
234
220
  if @doc.nil?
235
- @doc = Nokogiri::HTML(open(@url))
221
+ @doc = Nokogiri::HTML(@body)
236
222
  end
237
223
 
238
224
  @doc
@@ -252,5 +238,20 @@ module Raev
252
238
  @linked_data
253
239
  end
254
240
 
241
+ def fetch(uri_str, limit = 10)
242
+ raise ArgumentError, 'too many HTTP redirects' if limit == 0
243
+
244
+ response = Net::HTTP.get_response(URI(uri_str))
245
+
246
+ case response
247
+ when Net::HTTPSuccess then
248
+ @url = uri_str
249
+ @body = response.body
250
+ when Net::HTTPRedirection then
251
+ fetch(response['location'], limit - 1)
252
+ else
253
+ # TODO handle Not Found
254
+ end
255
+ end
255
256
  end
256
257
  end
data/lib/raev.rb CHANGED
@@ -1,7 +1,4 @@
1
1
  require 'nokogiri'
2
- require 'redirect_follower'
3
- require 'open-uri'
4
-
5
2
  require 'raev/article'
6
3
  require 'raev/author'
7
4
  require 'raev/parser'
data/raev.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: raev 0.2.4 ruby lib
5
+ # stub: raev 0.3.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "raev".freeze
9
- s.version = "0.2.4"
9
+ s.version = "0.3.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Andreas Zecher".freeze]
14
- s.date = "2017-09-18"
14
+ s.date = "2017-09-19"
15
15
  s.description = "Fetch, parse and normalize meta data from websites.".freeze
16
16
  s.email = "andreas@madebypixelate.com".freeze
17
17
  s.extra_rdoc_files = [
@@ -48,7 +48,6 @@ Gem::Specification.new do |s|
48
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
49
  s.add_runtime_dependency(%q<json>.freeze, [">= 2.1.0"])
50
50
  s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
51
- s.add_runtime_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
52
51
  s.add_runtime_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
53
52
  s.add_runtime_dependency(%q<chronic>.freeze, [">= 0.10.2"])
54
53
  s.add_development_dependency(%q<shoulda>.freeze, [">= 0"])
@@ -58,7 +57,6 @@ Gem::Specification.new do |s|
58
57
  else
59
58
  s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
60
59
  s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
61
- s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
62
60
  s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
63
61
  s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
64
62
  s.add_dependency(%q<shoulda>.freeze, [">= 0"])
@@ -69,7 +67,6 @@ Gem::Specification.new do |s|
69
67
  else
70
68
  s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
71
69
  s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
72
- s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
73
70
  s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
74
71
  s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
75
72
  s.add_dependency(%q<shoulda>.freeze, [">= 0"])
data/test/test_url.rb CHANGED
@@ -5,29 +5,20 @@ require 'raev'
5
5
 
6
6
  class UrlTest < Test::Unit::TestCase
7
7
  should "parse base url" do
8
- url = Raev.url("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
9
- assert_equal url.base, "indiegames.com"
8
+ assert_equal Raev::Url.base("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html"), "indiegames.com"
10
9
  end
11
10
 
12
11
  should "clean url" do
13
- url = Raev.url("http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html?utm_campaign=touch_lab_bot&utm_medium=twitter&utm_source=am6_feedtweet")
14
- assert_equal "http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html", url.clean
15
-
16
- url = Raev.url("http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
17
- assert_equal "http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937", url.clean
18
-
19
- url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
20
- assert_equal "http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.clean
12
+ cleaned_url = Raev::Url.remove_utm("http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
13
+ assert_equal "http://www.ign.com/articles/2011/06/24/new-controllers-for-the-disabled-debuts-and-promises-change", cleaned_url
21
14
  end
22
-
23
- should "resolve url" do
24
- url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
25
- assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+fingergaming+%28FingerGaming%29", url.resolved
26
- end
27
-
28
- should "resolve and clean url" do
15
+
16
+ should "resolve url" do
29
17
  url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
30
- assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.resolved_and_clean
18
+ assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.url
19
+
20
+ url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
21
+ assert_equal "https://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.url
31
22
  end
32
23
 
33
24
  should "get twitter handle" do
@@ -36,8 +27,8 @@ class UrlTest < Test::Unit::TestCase
36
27
  end
37
28
 
38
29
  should "get rss feed" do
39
- url = Raev.url("http://www.polygon.com")
40
- assert_equal "http://www.polygon.com/rss/index.xml", url.feed
30
+ url = Raev.url("https://www.polygon.com")
31
+ assert_equal "https://www.polygon.com/rss/index.xml", url.feed
41
32
 
42
33
  url = Raev.url("http://arstechnica.com")
43
34
  assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: raev
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andreas Zecher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-18 00:00:00.000000000 Z
11
+ date: 2017-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.8.0
41
- - !ruby/object:Gem::Dependency
42
- name: redirect_follower
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: 0.1.1
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: 0.1.1
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: sanitize
57
43
  requirement: !ruby/object:Gem::Requirement