raev 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +26 -0
- data/LICENSE.txt +20 -0
- data/README.md +68 -0
- data/Rakefile +56 -0
- data/VERSION +1 -0
- data/lib/raev/author.rb +37 -0
- data/lib/raev/parser.rb +10 -0
- data/lib/raev/url.rb +94 -0
- data/lib/raev.rb +17 -0
- data/raev.gemspec +78 -0
- data/test/helper.rb +18 -0
- data/test/test_raev.rb +77 -0
- metadata +197 -0
data/.document
ADDED
data/Gemfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
source "http://rubygems.org"
|
|
2
|
+
|
|
3
|
+
gem "nokogiri", ">= 1.5.3"
|
|
4
|
+
gem "redirect_follower", ">= 0.1.1"
|
|
5
|
+
|
|
6
|
+
group :development do
|
|
7
|
+
gem "shoulda", ">= 0"
|
|
8
|
+
gem "bundler", "~> 1.3.4"
|
|
9
|
+
gem "jeweler", "~> 1.6.4"
|
|
10
|
+
gem "rcov", ">= 0"
|
|
11
|
+
gem "test-unit", "~> 2.5.4"
|
|
12
|
+
end
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
GEM
|
|
2
|
+
remote: http://rubygems.org/
|
|
3
|
+
specs:
|
|
4
|
+
git (1.2.5)
|
|
5
|
+
jeweler (1.6.4)
|
|
6
|
+
bundler (~> 1.0)
|
|
7
|
+
git (>= 1.2.5)
|
|
8
|
+
rake
|
|
9
|
+
nokogiri (1.5.9)
|
|
10
|
+
rake (0.9.2)
|
|
11
|
+
rcov (0.9.10)
|
|
12
|
+
redirect_follower (0.1.1)
|
|
13
|
+
shoulda (2.11.3)
|
|
14
|
+
test-unit (2.5.4)
|
|
15
|
+
|
|
16
|
+
PLATFORMS
|
|
17
|
+
ruby
|
|
18
|
+
|
|
19
|
+
DEPENDENCIES
|
|
20
|
+
bundler (~> 1.3.4)
|
|
21
|
+
jeweler (~> 1.6.4)
|
|
22
|
+
nokogiri (>= 1.5.3)
|
|
23
|
+
rcov
|
|
24
|
+
redirect_follower (>= 0.1.1)
|
|
25
|
+
shoulda
|
|
26
|
+
test-unit (~> 2.5.4)
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2011 Andreas Zecher
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Raev
|
|
2
|
+
====
|
|
3
|
+
|
|
4
|
+
Raev is a Ruby gem for fetching, parsing and normalizing meta data from websites. It was extracted from http://promoterapp.com.
|
|
5
|
+
|
|
6
|
+
Usage
|
|
7
|
+
-----
|
|
8
|
+
|
|
9
|
+
Get the domain name from an url without the `www.` subdomain.
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
Raev.url("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html").base
|
|
13
|
+
# => "indiegames.com"
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Remove UTM analytics parameters from an url.
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
Raev.url("http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html?utm_campaign=touch_lab_bot&utm_medium=twitter&utm_source=am6_feedtweet").clean
|
|
20
|
+
# => "http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Resolve a shortened or proxied url.
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
Raev.url("http://sbn.to/WRgXfl").resolved
|
|
27
|
+
# => "http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Resolve a shortend or proxied url and remove UTM analytics parameters.
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/").resolved_and_clean
|
|
34
|
+
# => "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Fetch Twitter handle from url.
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
Raev.url("http://www.polygon.com").twitter
|
|
41
|
+
# => "polygon"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Fetch RSS feed from url.
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
Raev.url("http://www.polygon.com").feed
|
|
48
|
+
# => "http://www.polygon.com/rss/index.xml"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Normalize author name. Capitalizes name, strips whitespace, ignores email addresses and removes silly nicknames in quotes. Returns nil for empty strings or non-names like *Editor* or *Staff*.
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
Raev.normalize_author("andreas@somedomain.com (Andreas)")
|
|
55
|
+
# => "Andreas"
|
|
56
|
+
|
|
57
|
+
Raev.normalize_author("andreas")
|
|
58
|
+
# => "Andreas"
|
|
59
|
+
|
|
60
|
+
Raev.normalize_author("Andreas 'Pixelate' Zecher")
|
|
61
|
+
# => "Andreas Zecher"
|
|
62
|
+
|
|
63
|
+
Raev.normalize_author("Editor")
|
|
64
|
+
# => nil
|
|
65
|
+
|
|
66
|
+
Raev.normalize_author(" ")
|
|
67
|
+
# => nil
|
|
68
|
+
```
|
data/Rakefile
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'bundler'
|
|
5
|
+
begin
|
|
6
|
+
Bundler.setup(:default, :development)
|
|
7
|
+
rescue Bundler::BundlerError => e
|
|
8
|
+
$stderr.puts e.message
|
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
10
|
+
exit e.status_code
|
|
11
|
+
end
|
|
12
|
+
require 'rake'
|
|
13
|
+
|
|
14
|
+
require 'jeweler'
|
|
15
|
+
Jeweler::Tasks.new do |gem|
|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
|
17
|
+
gem.name = "raev"
|
|
18
|
+
gem.homepage = "http://github.com/pixelate/raev"
|
|
19
|
+
gem.license = "MIT"
|
|
20
|
+
gem.summary = "Fetch, parse and normalize meta data from websites."
|
|
21
|
+
gem.description = "Fetch, parse and normalize meta data from websites."
|
|
22
|
+
gem.email = "andreas@madebypixelate.com"
|
|
23
|
+
gem.authors = ["Andreas Zecher"]
|
|
24
|
+
|
|
25
|
+
# dependencies defined in Gemfile
|
|
26
|
+
gem.add_dependency 'nokogiri', '>= 1.4.4'
|
|
27
|
+
gem.add_dependency 'redirect_follower', '>= 0.1.1'
|
|
28
|
+
end
|
|
29
|
+
Jeweler::RubygemsDotOrgTasks.new
|
|
30
|
+
|
|
31
|
+
require 'rake/testtask'
|
|
32
|
+
Rake::TestTask.new(:test) do |test|
|
|
33
|
+
test.libs << 'lib' << 'test'
|
|
34
|
+
test.pattern = 'test/**/test_*.rb'
|
|
35
|
+
test.verbose = true
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
require 'rcov/rcovtask'
|
|
39
|
+
Rcov::RcovTask.new do |test|
|
|
40
|
+
test.libs << 'test'
|
|
41
|
+
test.pattern = 'test/**/test_*.rb'
|
|
42
|
+
test.verbose = true
|
|
43
|
+
test.rcov_opts << '--exclude "gems/*"'
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
task :default => :test
|
|
47
|
+
|
|
48
|
+
require 'rake/rdoctask'
|
|
49
|
+
Rake::RDocTask.new do |rdoc|
|
|
50
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
51
|
+
|
|
52
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
53
|
+
rdoc.title = "raev #{version}"
|
|
54
|
+
rdoc.rdoc_files.include('README*')
|
|
55
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
56
|
+
end
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.1.10
|
data/lib/raev/author.rb
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Raev
|
|
2
|
+
|
|
3
|
+
class Author
|
|
4
|
+
|
|
5
|
+
def self.normalize_name author_name
|
|
6
|
+
if author_name.nil?
|
|
7
|
+
return nil
|
|
8
|
+
else
|
|
9
|
+
# Strip whitespace
|
|
10
|
+
author = author_name.strip
|
|
11
|
+
if author.empty?
|
|
12
|
+
return nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Ignore common strings that are not names of people
|
|
16
|
+
no_authors = ["admin", "blogs", "editor", "staff"]
|
|
17
|
+
|
|
18
|
+
if no_authors.include?(author.downcase)
|
|
19
|
+
return nil
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Parse notation "andreas@somedomain.com (Andreas)"
|
|
24
|
+
m = /\((.*)\)/.match(author)
|
|
25
|
+
unless m.nil?
|
|
26
|
+
author = m[1]
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Remove nickname quotes
|
|
30
|
+
author = author.gsub(/\"(.*)\"/, "").gsub(/\'(.*)\'/, "").gsub(" ", " ")
|
|
31
|
+
|
|
32
|
+
# Capitalize
|
|
33
|
+
return author.split(' ').map(&:capitalize).join(' ')
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/raev/parser.rb
ADDED
data/lib/raev/url.rb
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
module Raev
|
|
2
|
+
|
|
3
|
+
class Url
|
|
4
|
+
|
|
5
|
+
attr_reader :url
|
|
6
|
+
attr_reader :doc
|
|
7
|
+
|
|
8
|
+
def initialize(url)
|
|
9
|
+
@doc = nil
|
|
10
|
+
@url = url
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def base
|
|
14
|
+
base_url = @url.split('/')[2]
|
|
15
|
+
base_url = base_url.gsub('www.', '') unless base_url.nil?
|
|
16
|
+
base_url
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def clean
|
|
20
|
+
unless @url.nil?
|
|
21
|
+
utm_index = @url.index(/(\?|&)utm_/)
|
|
22
|
+
unless(utm_index.nil?)
|
|
23
|
+
return url.slice(0, utm_index)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
@url
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def resolved
|
|
31
|
+
unless @url.nil?
|
|
32
|
+
begin
|
|
33
|
+
return RedirectFollower(@url, 5)
|
|
34
|
+
rescue => ex
|
|
35
|
+
puts "Could not resolve #{@url}. #{ex.class}: #{ex.message}"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
@url
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def resolved_and_clean
|
|
43
|
+
resolved_url = Url.new(self.resolved)
|
|
44
|
+
resolved_url.clean
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def without_http
|
|
48
|
+
@url.sub("http://", "")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def twitter
|
|
52
|
+
node = document.css('a:match_href("twitter.com")', Raev::Parser.new)
|
|
53
|
+
|
|
54
|
+
if node.first
|
|
55
|
+
twitter_url = node.first["href"]
|
|
56
|
+
twitter_url.split('/').last
|
|
57
|
+
else
|
|
58
|
+
nil
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def feed
|
|
63
|
+
feed_url = nil
|
|
64
|
+
|
|
65
|
+
node = document.css('link[type="application/rss+xml"][rel="alternate"]')
|
|
66
|
+
|
|
67
|
+
if node.first
|
|
68
|
+
feed_url = node.first["href"]
|
|
69
|
+
else
|
|
70
|
+
node = document.css('a:match_href("http://feeds.")', Raev::Parser.new)
|
|
71
|
+
|
|
72
|
+
if node.first
|
|
73
|
+
feed_url = node.first["href"]
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if feed_url && feed_url[0,1] == "/"
|
|
78
|
+
feed_url = @url + feed_url
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
feed_url
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def document
|
|
87
|
+
if @doc.nil?
|
|
88
|
+
@doc = Nokogiri::HTML(open(@url))
|
|
89
|
+
else
|
|
90
|
+
@doc
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
data/lib/raev.rb
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'redirect_follower'
|
|
3
|
+
require 'open-uri'
|
|
4
|
+
|
|
5
|
+
require 'raev/author'
|
|
6
|
+
require 'raev/parser'
|
|
7
|
+
require 'raev/url'
|
|
8
|
+
|
|
9
|
+
module Raev
|
|
10
|
+
def self.url url
|
|
11
|
+
Raev::Url.new(url)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.normalize_author author_name
|
|
15
|
+
Raev::Author.normalize_name(author_name)
|
|
16
|
+
end
|
|
17
|
+
end
|
data/raev.gemspec
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = %q{raev}
|
|
8
|
+
s.version = "0.1.10"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["Andreas Zecher"]
|
|
12
|
+
s.date = %q{2013-06-15}
|
|
13
|
+
s.description = %q{Fetch, parse and normalize meta data from websites.}
|
|
14
|
+
s.email = %q{andreas@madebypixelate.com}
|
|
15
|
+
s.extra_rdoc_files = [
|
|
16
|
+
"LICENSE.txt",
|
|
17
|
+
"README.md"
|
|
18
|
+
]
|
|
19
|
+
s.files = [
|
|
20
|
+
".document",
|
|
21
|
+
"Gemfile",
|
|
22
|
+
"Gemfile.lock",
|
|
23
|
+
"LICENSE.txt",
|
|
24
|
+
"README.md",
|
|
25
|
+
"Rakefile",
|
|
26
|
+
"VERSION",
|
|
27
|
+
"lib/raev.rb",
|
|
28
|
+
"lib/raev/author.rb",
|
|
29
|
+
"lib/raev/parser.rb",
|
|
30
|
+
"lib/raev/url.rb",
|
|
31
|
+
"raev.gemspec",
|
|
32
|
+
"test/helper.rb",
|
|
33
|
+
"test/test_raev.rb"
|
|
34
|
+
]
|
|
35
|
+
s.homepage = %q{http://github.com/pixelate/raev}
|
|
36
|
+
s.licenses = ["MIT"]
|
|
37
|
+
s.require_paths = ["lib"]
|
|
38
|
+
s.rubygems_version = %q{1.3.6}
|
|
39
|
+
s.summary = %q{Fetch, parse and normalize meta data from websites.}
|
|
40
|
+
|
|
41
|
+
if s.respond_to? :specification_version then
|
|
42
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
43
|
+
s.specification_version = 3
|
|
44
|
+
|
|
45
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
46
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.3"])
|
|
47
|
+
s.add_runtime_dependency(%q<redirect_follower>, [">= 0.1.1"])
|
|
48
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
|
49
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.3.4"])
|
|
50
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
|
51
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
|
52
|
+
s.add_development_dependency(%q<test-unit>, ["~> 2.5.4"])
|
|
53
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
|
|
54
|
+
s.add_runtime_dependency(%q<redirect_follower>, [">= 0.1.1"])
|
|
55
|
+
else
|
|
56
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.3"])
|
|
57
|
+
s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
|
|
58
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
|
59
|
+
s.add_dependency(%q<bundler>, ["~> 1.3.4"])
|
|
60
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
|
61
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
|
62
|
+
s.add_dependency(%q<test-unit>, ["~> 2.5.4"])
|
|
63
|
+
s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
|
|
64
|
+
s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
|
|
65
|
+
end
|
|
66
|
+
else
|
|
67
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.3"])
|
|
68
|
+
s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
|
|
69
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
|
70
|
+
s.add_dependency(%q<bundler>, ["~> 1.3.4"])
|
|
71
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
|
72
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
|
73
|
+
s.add_dependency(%q<test-unit>, ["~> 2.5.4"])
|
|
74
|
+
s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
|
|
75
|
+
s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
data/test/helper.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'bundler'
|
|
3
|
+
begin
|
|
4
|
+
Bundler.setup(:default, :development)
|
|
5
|
+
rescue Bundler::BundlerError => e
|
|
6
|
+
$stderr.puts e.message
|
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
8
|
+
exit e.status_code
|
|
9
|
+
end
|
|
10
|
+
require 'test/unit'
|
|
11
|
+
require 'shoulda'
|
|
12
|
+
|
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
15
|
+
require 'raev'
|
|
16
|
+
|
|
17
|
+
class Test::Unit::TestCase
|
|
18
|
+
end
|
data/test/test_raev.rb
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
|
|
3
|
+
require 'helper'
|
|
4
|
+
require 'raev/author'
|
|
5
|
+
require 'raev/url'
|
|
6
|
+
|
|
7
|
+
class TestRaev < Test::Unit::TestCase
|
|
8
|
+
should "parse base url" do
|
|
9
|
+
url = Raev.url("http://indiegames.com/2011/05/c418_minecraft_volume_alpha.html")
|
|
10
|
+
assert_equal url.base, "indiegames.com"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
should "clean url" do
|
|
14
|
+
url = Raev.url("http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html?utm_campaign=touch_lab_bot&utm_medium=twitter&utm_source=am6_feedtweet")
|
|
15
|
+
assert_equal "http://ipodtouchlab.com/2011/01/iphone-ipad-app-sale-20110117.html", url.clean
|
|
16
|
+
|
|
17
|
+
url = Raev.url("http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+ignfeeds%2Fgames+%28IGN+Videogames%29")
|
|
18
|
+
assert_equal "http://games.ign.com/articles/117/1178937p1.html?RSSwhen2011-06-24_082700&RSSid=1178937", url.clean
|
|
19
|
+
|
|
20
|
+
url = Raev.url("http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html")
|
|
21
|
+
assert_equal "http://boingboing.net/2011/08/09/ea-tried-to-buy-minecraft-studio.html", url.clean
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
should "resolve url" do
|
|
25
|
+
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
|
26
|
+
assert_equal "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+fingergaming+%28FingerGaming%29", url.resolved
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
should "resolve and clean url" do
|
|
30
|
+
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
|
31
|
+
assert_equal "http://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.resolved_and_clean
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
should "get twitter handle" do
|
|
35
|
+
url = Raev.url("http://www.polygon.com")
|
|
36
|
+
assert_equal "polygon", url.twitter
|
|
37
|
+
|
|
38
|
+
url = Raev.url("http://penny-arcade.com/report")
|
|
39
|
+
assert_equal "thepareport", url.twitter
|
|
40
|
+
|
|
41
|
+
url = Raev.url("http://kotaku.com")
|
|
42
|
+
assert_equal nil, url.twitter
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
should "get rss feed" do
|
|
46
|
+
url = Raev.url("http://www.polygon.com")
|
|
47
|
+
assert_equal "http://www.polygon.com/rss/index.xml", url.feed
|
|
48
|
+
|
|
49
|
+
url = Raev.url("http://penny-arcade.com/report")
|
|
50
|
+
assert_equal "http://feeds.penny-arcade.com/pa-report", url.feed
|
|
51
|
+
|
|
52
|
+
url = Raev.url("http://arstechnica.com")
|
|
53
|
+
assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
|
|
54
|
+
|
|
55
|
+
url = Raev.url("http://www.edge-online.com")
|
|
56
|
+
assert_equal "http://www.edge-online.com/feed/", url.feed
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
should "get twitter and rss feed" do
|
|
60
|
+
url = Raev.url("http://www.polygon.com")
|
|
61
|
+
assert_equal "http://www.polygon.com/rss/index.xml", url.feed
|
|
62
|
+
assert_equal "polygon", url.twitter
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
should "parse author from rss entry" do
|
|
66
|
+
assert_equal "Andreas", Raev.normalize_author("andreas@somedomain.com (Andreas)")
|
|
67
|
+
assert_equal "Andreas Zecher", Raev.normalize_author("Andreas \"Pixelate\" Zecher")
|
|
68
|
+
assert_equal "Andreas Zecher", Raev.normalize_author("Andreas 'Pixelate' Zecher")
|
|
69
|
+
assert_equal "Andreas", Raev.normalize_author("andreas")
|
|
70
|
+
assert_equal nil, Raev.normalize_author("Admin")
|
|
71
|
+
assert_equal nil, Raev.normalize_author("Blogs")
|
|
72
|
+
assert_equal nil, Raev.normalize_author("Editor")
|
|
73
|
+
assert_equal nil, Raev.normalize_author("Staff")
|
|
74
|
+
assert_equal nil, Raev.normalize_author(" ")
|
|
75
|
+
assert_equal nil, Raev.normalize_author(nil)
|
|
76
|
+
end
|
|
77
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: raev
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
prerelease: false
|
|
5
|
+
segments:
|
|
6
|
+
- 0
|
|
7
|
+
- 1
|
|
8
|
+
- 10
|
|
9
|
+
version: 0.1.10
|
|
10
|
+
platform: ruby
|
|
11
|
+
authors:
|
|
12
|
+
- Andreas Zecher
|
|
13
|
+
autorequire:
|
|
14
|
+
bindir: bin
|
|
15
|
+
cert_chain: []
|
|
16
|
+
|
|
17
|
+
date: 2013-06-15 00:00:00 +02:00
|
|
18
|
+
default_executable:
|
|
19
|
+
dependencies:
|
|
20
|
+
- !ruby/object:Gem::Dependency
|
|
21
|
+
type: :runtime
|
|
22
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
segments:
|
|
27
|
+
- 1
|
|
28
|
+
- 5
|
|
29
|
+
- 3
|
|
30
|
+
version: 1.5.3
|
|
31
|
+
name: nokogiri
|
|
32
|
+
requirement: *id001
|
|
33
|
+
prerelease: false
|
|
34
|
+
- !ruby/object:Gem::Dependency
|
|
35
|
+
type: :runtime
|
|
36
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
segments:
|
|
41
|
+
- 0
|
|
42
|
+
- 1
|
|
43
|
+
- 1
|
|
44
|
+
version: 0.1.1
|
|
45
|
+
name: redirect_follower
|
|
46
|
+
requirement: *id002
|
|
47
|
+
prerelease: false
|
|
48
|
+
- !ruby/object:Gem::Dependency
|
|
49
|
+
type: :development
|
|
50
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
segments:
|
|
55
|
+
- 0
|
|
56
|
+
version: "0"
|
|
57
|
+
name: shoulda
|
|
58
|
+
requirement: *id003
|
|
59
|
+
prerelease: false
|
|
60
|
+
- !ruby/object:Gem::Dependency
|
|
61
|
+
type: :development
|
|
62
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
|
63
|
+
requirements:
|
|
64
|
+
- - ~>
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
segments:
|
|
67
|
+
- 1
|
|
68
|
+
- 3
|
|
69
|
+
- 4
|
|
70
|
+
version: 1.3.4
|
|
71
|
+
name: bundler
|
|
72
|
+
requirement: *id004
|
|
73
|
+
prerelease: false
|
|
74
|
+
- !ruby/object:Gem::Dependency
|
|
75
|
+
type: :development
|
|
76
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
|
77
|
+
requirements:
|
|
78
|
+
- - ~>
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
segments:
|
|
81
|
+
- 1
|
|
82
|
+
- 6
|
|
83
|
+
- 4
|
|
84
|
+
version: 1.6.4
|
|
85
|
+
name: jeweler
|
|
86
|
+
requirement: *id005
|
|
87
|
+
prerelease: false
|
|
88
|
+
- !ruby/object:Gem::Dependency
|
|
89
|
+
type: :development
|
|
90
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - ">="
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
segments:
|
|
95
|
+
- 0
|
|
96
|
+
version: "0"
|
|
97
|
+
name: rcov
|
|
98
|
+
requirement: *id006
|
|
99
|
+
prerelease: false
|
|
100
|
+
- !ruby/object:Gem::Dependency
|
|
101
|
+
type: :development
|
|
102
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
|
103
|
+
requirements:
|
|
104
|
+
- - ~>
|
|
105
|
+
- !ruby/object:Gem::Version
|
|
106
|
+
segments:
|
|
107
|
+
- 2
|
|
108
|
+
- 5
|
|
109
|
+
- 4
|
|
110
|
+
version: 2.5.4
|
|
111
|
+
name: test-unit
|
|
112
|
+
requirement: *id007
|
|
113
|
+
prerelease: false
|
|
114
|
+
- !ruby/object:Gem::Dependency
|
|
115
|
+
type: :runtime
|
|
116
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
|
117
|
+
requirements:
|
|
118
|
+
- - ">="
|
|
119
|
+
- !ruby/object:Gem::Version
|
|
120
|
+
segments:
|
|
121
|
+
- 1
|
|
122
|
+
- 4
|
|
123
|
+
- 4
|
|
124
|
+
version: 1.4.4
|
|
125
|
+
name: nokogiri
|
|
126
|
+
requirement: *id008
|
|
127
|
+
prerelease: false
|
|
128
|
+
- !ruby/object:Gem::Dependency
|
|
129
|
+
type: :runtime
|
|
130
|
+
version_requirements: &id009 !ruby/object:Gem::Requirement
|
|
131
|
+
requirements:
|
|
132
|
+
- - ">="
|
|
133
|
+
- !ruby/object:Gem::Version
|
|
134
|
+
segments:
|
|
135
|
+
- 0
|
|
136
|
+
- 1
|
|
137
|
+
- 1
|
|
138
|
+
version: 0.1.1
|
|
139
|
+
name: redirect_follower
|
|
140
|
+
requirement: *id009
|
|
141
|
+
prerelease: false
|
|
142
|
+
description: Fetch, parse and normalize meta data from websites.
|
|
143
|
+
email: andreas@madebypixelate.com
|
|
144
|
+
executables: []
|
|
145
|
+
|
|
146
|
+
extensions: []
|
|
147
|
+
|
|
148
|
+
extra_rdoc_files:
|
|
149
|
+
- LICENSE.txt
|
|
150
|
+
- README.md
|
|
151
|
+
files:
|
|
152
|
+
- .document
|
|
153
|
+
- Gemfile
|
|
154
|
+
- Gemfile.lock
|
|
155
|
+
- LICENSE.txt
|
|
156
|
+
- README.md
|
|
157
|
+
- Rakefile
|
|
158
|
+
- VERSION
|
|
159
|
+
- lib/raev.rb
|
|
160
|
+
- lib/raev/author.rb
|
|
161
|
+
- lib/raev/parser.rb
|
|
162
|
+
- lib/raev/url.rb
|
|
163
|
+
- raev.gemspec
|
|
164
|
+
- test/helper.rb
|
|
165
|
+
- test/test_raev.rb
|
|
166
|
+
has_rdoc: true
|
|
167
|
+
homepage: http://github.com/pixelate/raev
|
|
168
|
+
licenses:
|
|
169
|
+
- MIT
|
|
170
|
+
post_install_message:
|
|
171
|
+
rdoc_options: []
|
|
172
|
+
|
|
173
|
+
require_paths:
|
|
174
|
+
- lib
|
|
175
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
176
|
+
requirements:
|
|
177
|
+
- - ">="
|
|
178
|
+
- !ruby/object:Gem::Version
|
|
179
|
+
segments:
|
|
180
|
+
- 0
|
|
181
|
+
version: "0"
|
|
182
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
|
+
requirements:
|
|
184
|
+
- - ">="
|
|
185
|
+
- !ruby/object:Gem::Version
|
|
186
|
+
segments:
|
|
187
|
+
- 0
|
|
188
|
+
version: "0"
|
|
189
|
+
requirements: []
|
|
190
|
+
|
|
191
|
+
rubyforge_project:
|
|
192
|
+
rubygems_version: 1.3.6
|
|
193
|
+
signing_key:
|
|
194
|
+
specification_version: 3
|
|
195
|
+
summary: Fetch, parse and normalize meta data from websites.
|
|
196
|
+
test_files: []
|
|
197
|
+
|