raev 0.1.10 → 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +20 -0
- data/VERSION +1 -1
- data/lib/raev/url.rb +34 -0
- data/raev.gemspec +2 -2
- data/test/test_raev.rb +33 -1
- metadata +3 -3
data/README.md
CHANGED
@@ -3,6 +3,19 @@ Raev
|
|
3
3
|
|
4
4
|
Raev is a Ruby gem for fetching, parsing and normalizing meta data from websites. It was extracted from http://promoterapp.com.
|
5
5
|
|
6
|
+
Install
|
7
|
+
--------
|
8
|
+
|
9
|
+
```shell
|
10
|
+
gem install raev
|
11
|
+
```
|
12
|
+
or add the following line to Gemfile:
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
gem 'raev'
|
16
|
+
```
|
17
|
+
and run `bundle install` from your shell.
|
18
|
+
|
6
19
|
Usage
|
7
20
|
-----
|
8
21
|
|
@@ -48,6 +61,13 @@ Raev.url("http://www.polygon.com").feed
|
|
48
61
|
# => "http://www.polygon.com/rss/index.xml"
|
49
62
|
```
|
50
63
|
|
64
|
+
Fetch headline from url. Removes double spaces.
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
Raev.url("http://www.polygon.com/e3-2013/2013/6/14/4429126/the-indie-eight-ps4").headline
|
68
|
+
# => "The Indie Eight: Polygon talks with the showcase indies launching on PS4"
|
69
|
+
```
|
70
|
+
|
51
71
|
Normalize author name. Capitalizes name, strips whitespace, ignores email addresses and removes silly nicknames in quotes. Returns nil for empty strings or non-names like *Editor* or *Staff*.
|
52
72
|
|
53
73
|
```
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.11
|
data/lib/raev/url.rb
CHANGED
@@ -81,6 +81,40 @@ module Raev
|
|
81
81
|
feed_url
|
82
82
|
end
|
83
83
|
|
84
|
+
def headline
|
85
|
+
page_title = nil
|
86
|
+
|
87
|
+
node = document.css(".twitter-share-button")
|
88
|
+
|
89
|
+
if node.first
|
90
|
+
if node.first['data-text']
|
91
|
+
page_title = node.first['data-text']
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
if page_title.nil?
|
96
|
+
document.css("head meta").each do |meta|
|
97
|
+
if meta['property'] == 'og:title' || meta['property'] == 'twitter:title'
|
98
|
+
page_title = meta['content']
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if page_title.nil?
|
104
|
+
node = document.css("#article h1, a[rel=\"bookmark\"], h2[itemprop=\"name\"]")
|
105
|
+
|
106
|
+
if node.first
|
107
|
+
page_title = node.first.content
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
unless page_title.nil?
|
112
|
+
page_title.gsub!(/ +/, ' ')
|
113
|
+
end
|
114
|
+
|
115
|
+
page_title
|
116
|
+
end
|
117
|
+
|
84
118
|
private
|
85
119
|
|
86
120
|
def document
|
data/raev.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{raev}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.11"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Andreas Zecher"]
|
12
|
-
s.date = %q{2013-06-
|
12
|
+
s.date = %q{2013-06-16}
|
13
13
|
s.description = %q{Fetch, parse and normalize meta data from websites.}
|
14
14
|
s.email = %q{andreas@madebypixelate.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/test_raev.rb
CHANGED
@@ -56,6 +56,38 @@ class TestRaev < Test::Unit::TestCase
|
|
56
56
|
assert_equal "http://www.edge-online.com/feed/", url.feed
|
57
57
|
end
|
58
58
|
|
59
|
+
should "get headline from url" do
|
60
|
+
url = Raev.url("http://www.polygon.com/e3-2013/2013/6/14/4429126/the-indie-eight-ps4")
|
61
|
+
assert_equal "The Indie Eight: Polygon talks with the showcase indies launching on PS4", url.headline
|
62
|
+
|
63
|
+
url = Raev.url("http://penny-arcade.com/report/article/the-future-improved-hands-on-with-the-high-definition-oculus-rift-dev-kit")
|
64
|
+
assert_equal "The future, improved: hands on with the high-definition Oculus Rift dev kit", url.headline
|
65
|
+
|
66
|
+
url = Raev.url("http://kotaku.com/the-world-of-a-link-to-the-past-has-changed-in-the-new-513424187")
|
67
|
+
assert_equal "The World of A Link To The Past Has Changed in the New 3DS Zelda", url.headline
|
68
|
+
|
69
|
+
url = Raev.url("http://arstechnica.com/gaming/2012/03/journey-a-hauntingly-beautiful-art-house-film-disguised-as-a-game/")
|
70
|
+
assert_equal "Journey: A hauntingly beautiful art house film disguised as a game", url.headline
|
71
|
+
|
72
|
+
url = Raev.url("http://www.creativeapplications.net/games/below-new-from-the-creators-of-sword-sworcery/")
|
73
|
+
assert_equal "Below – New from the creators of Sword & Sworcery", url.headline
|
74
|
+
|
75
|
+
url = Raev.url("http://www.edge-online.com/news/e3-2013-watch-the-first-hohokum-gameplay-footage-here/")
|
76
|
+
assert_equal "E3 2013: watch the first Hohokum gameplay footage here", url.headline
|
77
|
+
|
78
|
+
url = Raev.url("http://www.giantbomb.com/videos/e3-2013-fez-ii-announcement-teaser/2300-7606/")
|
79
|
+
assert_equal "E3 2013: Fez II Announcement Teaser", url.headline
|
80
|
+
|
81
|
+
url = Raev.url("http://indiegames.com/2013/06/indie_fund_backing_for_two_new.html")
|
82
|
+
assert_equal "Indie Fund backing two new titles for Double Fine", url.headline
|
83
|
+
|
84
|
+
url = Raev.url("http://killscreendaily.com/articles/news/cheat-sheet-614/")
|
85
|
+
assert_equal "Pixels on canvas, Spielberg's predictions, and Polytron's glorious tease", url.headline
|
86
|
+
|
87
|
+
url = Raev.url("http://www.rockpapershotgun.com/2013/06/05/i-spy-an-open-beta-for-spy-party/")
|
88
|
+
assert_equal "I Spy An Open Beta For Spy Party", url.headline
|
89
|
+
end
|
90
|
+
|
59
91
|
should "get twitter and rss feed" do
|
60
92
|
url = Raev.url("http://www.polygon.com")
|
61
93
|
assert_equal "http://www.polygon.com/rss/index.xml", url.feed
|
@@ -73,5 +105,5 @@ class TestRaev < Test::Unit::TestCase
|
|
73
105
|
assert_equal nil, Raev.normalize_author("Staff")
|
74
106
|
assert_equal nil, Raev.normalize_author(" ")
|
75
107
|
assert_equal nil, Raev.normalize_author(nil)
|
76
|
-
end
|
108
|
+
end
|
77
109
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 11
|
9
|
+
version: 0.1.11
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Andreas Zecher
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2013-06-
|
17
|
+
date: 2013-06-16 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|