raev 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +6 -6
- data/Gemfile.lock +46 -41
- data/README.md +10 -0
- data/VERSION +1 -1
- data/lib/raev/article.rb +18 -22
- data/lib/raev/author.rb +20 -9
- data/lib/raev/parser.rb +1 -1
- data/lib/raev/url.rb +58 -52
- data/raev.gemspec +40 -40
- data/test/test_url.rb +14 -17
- metadata +17 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3c9accf41425df615fb4f1b864fc8075fe0940b
|
4
|
+
data.tar.gz: d8c643c647b0ef2d46a02c23003a9d266ed1818e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a107b4380e62a65f2ce83c1477a16f8f083d2bc32e4665fe63e74838f797af94c8e52292601aa7414e88da686ca6e78238c65e228d123ef1e6b38890c4c93eac
|
7
|
+
data.tar.gz: e2d2964362e7d42de36f44f0feb4b8265d418faa0245eeb2e7d47fb80a433f45993cd176b54ba52b1e0a2f4d7f9cc2c14d54fe88cfffe7a35d8b264afc6a1446
|
data/Gemfile
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
gem "json", '>= 1.
|
4
|
-
gem "nokogiri", ">= 1.
|
3
|
+
gem "json", '>= 2.1.0'
|
4
|
+
gem "nokogiri", ">= 1.8.0"
|
5
5
|
gem "redirect_follower", ">= 0.1.1"
|
6
6
|
gem "sanitize", ">= 2.1.0"
|
7
|
-
gem "chronic", ">=0.
|
7
|
+
gem "chronic", ">=0.10.2"
|
8
8
|
|
9
9
|
group :development do
|
10
10
|
gem "shoulda", ">= 0"
|
11
|
-
gem "bundler", "~> 1.
|
12
|
-
gem "jeweler", "2.
|
13
|
-
gem "test-unit", "
|
11
|
+
gem "bundler", "~> 1.14.6"
|
12
|
+
gem "jeweler", "2.3.7"
|
13
|
+
gem "test-unit", "3.2.4"
|
14
14
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,91 +1,96 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activesupport (
|
4
|
+
activesupport (5.1.4)
|
5
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
5
6
|
i18n (~> 0.7)
|
6
|
-
json (~> 1.7, >= 1.7.7)
|
7
7
|
minitest (~> 5.1)
|
8
|
-
thread_safe (~> 0.3, >= 0.3.4)
|
9
8
|
tzinfo (~> 1.1)
|
10
9
|
addressable (2.4.0)
|
11
|
-
builder (3.2.
|
10
|
+
builder (3.2.3)
|
12
11
|
chronic (0.10.2)
|
12
|
+
concurrent-ruby (1.0.5)
|
13
13
|
crass (1.0.2)
|
14
14
|
descendants_tracker (0.0.4)
|
15
15
|
thread_safe (~> 0.3, >= 0.3.1)
|
16
16
|
faraday (0.9.2)
|
17
17
|
multipart-post (>= 1.2, < 3)
|
18
18
|
git (1.3.0)
|
19
|
-
github_api (0.
|
19
|
+
github_api (0.16.0)
|
20
20
|
addressable (~> 2.4.0)
|
21
21
|
descendants_tracker (~> 0.0.4)
|
22
22
|
faraday (~> 0.8, < 0.10)
|
23
23
|
hashie (>= 3.4)
|
24
|
-
|
25
|
-
|
24
|
+
mime-types (>= 1.16, < 3.0)
|
25
|
+
oauth2 (~> 1.0)
|
26
|
+
hashie (3.5.6)
|
26
27
|
highline (1.7.8)
|
27
|
-
i18n (0.
|
28
|
-
jeweler (2.
|
28
|
+
i18n (0.8.6)
|
29
|
+
jeweler (2.3.7)
|
29
30
|
builder
|
30
|
-
bundler (>= 1
|
31
|
+
bundler (>= 1)
|
31
32
|
git (>= 1.2.5)
|
32
|
-
github_api
|
33
|
+
github_api (~> 0.16.0)
|
33
34
|
highline (>= 1.6.15)
|
34
35
|
nokogiri (>= 1.5.10)
|
36
|
+
psych (~> 2.2)
|
35
37
|
rake
|
36
38
|
rdoc
|
37
|
-
|
38
|
-
json (1.
|
39
|
-
jwt (1.5.
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
semver2
|
40
|
+
json (2.1.0)
|
41
|
+
jwt (1.5.6)
|
42
|
+
mime-types (2.99.3)
|
43
|
+
mini_portile2 (2.2.0)
|
44
|
+
minitest (5.10.3)
|
45
|
+
multi_json (1.12.2)
|
46
|
+
multi_xml (0.6.0)
|
44
47
|
multipart-post (2.0.0)
|
45
|
-
nokogiri (1.
|
46
|
-
mini_portile2 (~> 2.
|
47
|
-
nokogumbo (1.4.
|
48
|
+
nokogiri (1.8.0)
|
49
|
+
mini_portile2 (~> 2.2.0)
|
50
|
+
nokogumbo (1.4.13)
|
48
51
|
nokogiri
|
49
|
-
oauth2 (1.
|
50
|
-
faraday (>= 0.8, < 0.
|
51
|
-
jwt (~> 1.0
|
52
|
+
oauth2 (1.4.0)
|
53
|
+
faraday (>= 0.8, < 0.13)
|
54
|
+
jwt (~> 1.0)
|
52
55
|
multi_json (~> 1.3)
|
53
56
|
multi_xml (~> 0.5)
|
54
57
|
rack (>= 1.2, < 3)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
58
|
+
power_assert (1.1.0)
|
59
|
+
psych (2.2.4)
|
60
|
+
rack (2.0.3)
|
61
|
+
rake (12.1.0)
|
62
|
+
rdoc (5.1.0)
|
59
63
|
redirect_follower (0.1.1)
|
60
|
-
sanitize (4.0
|
64
|
+
sanitize (4.5.0)
|
61
65
|
crass (~> 1.0.2)
|
62
66
|
nokogiri (>= 1.4.4)
|
63
67
|
nokogumbo (~> 1.4.1)
|
64
|
-
|
68
|
+
semver2 (3.4.2)
|
65
69
|
shoulda (3.5.0)
|
66
70
|
shoulda-context (~> 1.0, >= 1.0.1)
|
67
71
|
shoulda-matchers (>= 1.4.1, < 3.0)
|
68
|
-
shoulda-context (1.2.
|
72
|
+
shoulda-context (1.2.2)
|
69
73
|
shoulda-matchers (2.8.0)
|
70
74
|
activesupport (>= 3.0.0)
|
71
|
-
test-unit (2.
|
72
|
-
|
73
|
-
|
75
|
+
test-unit (3.2.4)
|
76
|
+
power_assert
|
77
|
+
thread_safe (0.3.6)
|
78
|
+
tzinfo (1.2.3)
|
74
79
|
thread_safe (~> 0.1)
|
75
80
|
|
76
81
|
PLATFORMS
|
77
82
|
ruby
|
78
83
|
|
79
84
|
DEPENDENCIES
|
80
|
-
bundler (~> 1.
|
81
|
-
chronic (>= 0.
|
82
|
-
jeweler (= 2.
|
83
|
-
json (>= 1.
|
84
|
-
nokogiri (>= 1.
|
85
|
+
bundler (~> 1.14.6)
|
86
|
+
chronic (>= 0.10.2)
|
87
|
+
jeweler (= 2.3.7)
|
88
|
+
json (>= 2.1.0)
|
89
|
+
nokogiri (>= 1.8.0)
|
85
90
|
redirect_follower (>= 0.1.1)
|
86
91
|
sanitize (>= 2.1.0)
|
87
92
|
shoulda
|
88
|
-
test-unit (
|
93
|
+
test-unit (= 3.2.4)
|
89
94
|
|
90
95
|
BUNDLED WITH
|
91
|
-
1.
|
96
|
+
1.14.6
|
data/README.md
CHANGED
@@ -68,6 +68,16 @@ Raev.url("http://www.polygon.com/e3-2013/2013/6/14/4429126/the-indie-eight-ps4")
|
|
68
68
|
# => "The Indie Eight: Polygon talks with the showcase indies launching on PS4"
|
69
69
|
```
|
70
70
|
|
71
|
+
Parse review scores.
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
review = Raev.url("http://www.gamesradar.com/superhot-review/")
|
75
|
+
review.ratingValue
|
76
|
+
# => 4.5
|
77
|
+
review.bestRating
|
78
|
+
# => 5.0
|
79
|
+
```
|
80
|
+
|
71
81
|
Normalize author name. Capitalizes name, strips whitespace, ignores email addresses and removes silly nicknames in quotes. Returns nil for empty strings or non-names like *Editor* or *Staff*.
|
72
82
|
|
73
83
|
```
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.4
|
data/lib/raev/article.rb
CHANGED
@@ -2,65 +2,61 @@ module Raev
|
|
2
2
|
|
3
3
|
class Article
|
4
4
|
|
5
|
+
REGEX_NODE_EMPTY = /\A *\z/
|
6
|
+
|
5
7
|
attr_reader :body
|
6
8
|
attr_reader :doc
|
7
|
-
|
9
|
+
|
8
10
|
def initialize(body)
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
@doc = Nokogiri::HTML::DocumentFragment.parse(
|
12
|
+
replace_non_breaking_space(body)
|
13
|
+
)
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
replace_divs_with_paragraphs(@doc)
|
16
|
+
remove_empty_paragraphs(@doc)
|
17
|
+
remove_extra_linebreaks(@doc)
|
16
18
|
|
17
|
-
@body = @doc.to_s.gsub("\n", "")
|
19
|
+
@body = @doc.to_s.gsub("\n".freeze, "".freeze)
|
18
20
|
end
|
19
21
|
|
20
22
|
private
|
21
23
|
|
22
24
|
def replace_non_breaking_space(str)
|
23
|
-
str.gsub(" ", " ")
|
25
|
+
str.gsub(" ".freeze, " ".freeze)
|
24
26
|
end
|
25
27
|
|
26
28
|
def replace_divs_with_paragraphs(doc)
|
27
|
-
doc.css("div").each do |node|
|
28
|
-
if node.css("p").length == 0
|
29
|
-
node.name = "p"
|
29
|
+
doc.css("div".freeze).each do |node|
|
30
|
+
if node.css("p".freeze).length == 0
|
31
|
+
node.name = "p".freeze
|
30
32
|
end
|
31
33
|
end
|
32
|
-
|
33
|
-
doc
|
34
34
|
end
|
35
35
|
|
36
36
|
def remove_empty_paragraphs(doc)
|
37
|
-
doc.css("p").each do |node|
|
37
|
+
doc.css("p".freeze).each do |node|
|
38
38
|
if node_empty?(node)
|
39
39
|
node.remove
|
40
40
|
end
|
41
41
|
end
|
42
|
-
|
43
|
-
doc
|
44
42
|
end
|
45
43
|
|
46
44
|
def remove_extra_linebreaks(doc)
|
47
|
-
doc.css("br").each do |node|
|
45
|
+
doc.css("br".freeze).each do |node|
|
48
46
|
next_node = node.next
|
49
47
|
|
50
48
|
if next_node
|
51
|
-
if next_node.matches?("br") || node_empty?(next_node)
|
49
|
+
if next_node.matches?("br".freeze) || node_empty?(next_node)
|
52
50
|
node.remove
|
53
51
|
end
|
54
52
|
else
|
55
53
|
node.remove
|
56
54
|
end
|
57
55
|
end
|
58
|
-
|
59
|
-
doc
|
60
56
|
end
|
61
57
|
|
62
58
|
def node_empty?(node)
|
63
|
-
node.element_children.empty? &&
|
59
|
+
node.element_children.empty? && REGEX_NODE_EMPTY.match(node.inner_text)
|
64
60
|
end
|
65
61
|
|
66
62
|
end
|
data/lib/raev/author.rb
CHANGED
@@ -2,6 +2,17 @@ module Raev
|
|
2
2
|
|
3
3
|
class Author
|
4
4
|
|
5
|
+
NO_AUTHOR_STRINGS = [
|
6
|
+
"admin".freeze,
|
7
|
+
"blogs".freeze,
|
8
|
+
"editor".freeze,
|
9
|
+
"staff".freeze
|
10
|
+
]
|
11
|
+
|
12
|
+
REGEX_EMAIL_WITH_NAME = /\((.*)\)/
|
13
|
+
REGEX_QUOTES = /\'(.*)\'/
|
14
|
+
REGEX_DOUBLE_QUOTES = /\"(.*)\"/
|
15
|
+
|
5
16
|
def self.normalize_name author_name
|
6
17
|
if author_name.nil?
|
7
18
|
return nil
|
@@ -12,28 +23,28 @@ module Raev
|
|
12
23
|
return nil
|
13
24
|
end
|
14
25
|
|
15
|
-
# Ignore common strings that are not names of people
|
16
|
-
|
17
|
-
|
18
|
-
if no_authors.include?(author.downcase)
|
26
|
+
# Ignore common strings that are not names of people
|
27
|
+
if NO_AUTHOR_STRINGS.include?(author.downcase)
|
19
28
|
return nil
|
20
29
|
end
|
21
30
|
end
|
22
31
|
|
23
32
|
# Parse notation "andreas@somedomain.com (Andreas)"
|
24
|
-
m =
|
33
|
+
m = REGEX_EMAIL_WITH_NAME.match(author)
|
25
34
|
unless m.nil?
|
26
35
|
author = m[1]
|
27
36
|
end
|
28
|
-
|
37
|
+
|
29
38
|
# Remove nickname quotes
|
30
|
-
author
|
39
|
+
author.gsub!(REGEX_DOUBLE_QUOTES, "".freeze)
|
40
|
+
author.gsub!(REGEX_QUOTES, "".freeze)
|
41
|
+
author.gsub!(" ".freeze, " ".freeze)
|
31
42
|
|
32
43
|
# Remove "by"
|
33
|
-
author
|
44
|
+
author.gsub!("by ".freeze, "".freeze)
|
34
45
|
|
35
46
|
# Capitalize
|
36
|
-
return author.split(' ').map(&:capitalize).join(' ')
|
47
|
+
return author.split(' '.freeze).map(&:capitalize).join(' '.freeze)
|
37
48
|
end
|
38
49
|
|
39
50
|
end
|
data/lib/raev/parser.rb
CHANGED
data/lib/raev/url.rb
CHANGED
@@ -5,9 +5,35 @@ require "sanitize"
|
|
5
5
|
module Raev
|
6
6
|
|
7
7
|
class Url
|
8
|
+
|
9
|
+
AUTHOR_CSS_SELECTORS = [
|
10
|
+
'.c-byline__item a'.freeze,
|
11
|
+
'.author-info .name'.freeze,
|
12
|
+
'.author-top a'.freeze,
|
13
|
+
'.yt-user-info a'.freeze,
|
14
|
+
'a[rel~="author"]'.freeze,
|
15
|
+
'a[itemprop~="author"]'.freeze,
|
16
|
+
'.author h3 a'.freeze,
|
17
|
+
'.author'.freeze,
|
18
|
+
'.posted-by a'.freeze,
|
19
|
+
'.entryAuthor a'.freeze,
|
20
|
+
'a.names'.freeze,
|
21
|
+
'a.byline-author'.freeze,
|
22
|
+
'.byline a'.freeze,
|
23
|
+
'.author.vcard a'.freeze,
|
24
|
+
'p.info a'.freeze,
|
25
|
+
'.author-name'.freeze,
|
26
|
+
'.upcased'.freeze,
|
27
|
+
'a[rel~="nofollow"]'.freeze
|
28
|
+
]
|
29
|
+
|
30
|
+
REGEX_UTM = /(\?|&)utm_/
|
31
|
+
REGEX_URL_DATE = /[0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2}/
|
32
|
+
REGEX_ENTRY_DATE = /[^a-zA-Z0-9\s]/
|
33
|
+
REGEX_PAGE_TITLE = / +/
|
34
|
+
|
8
35
|
attr_reader :url
|
9
36
|
attr_reader :doc
|
10
|
-
attr_reader :linked_data
|
11
37
|
|
12
38
|
def initialize(url)
|
13
39
|
@url = url
|
@@ -16,14 +42,14 @@ module Raev
|
|
16
42
|
end
|
17
43
|
|
18
44
|
def base
|
19
|
-
base_url = @url.split('/')[2]
|
20
|
-
base_url
|
45
|
+
base_url = @url.split('/'.freeze)[2]
|
46
|
+
base_url.gsub!('www.'.freeze, ''.freeze) unless base_url.nil?
|
21
47
|
base_url
|
22
48
|
end
|
23
49
|
|
24
50
|
def clean
|
25
51
|
unless @url.nil?
|
26
|
-
utm_index = @url.index(
|
52
|
+
utm_index = @url.index(REGEX_UTM)
|
27
53
|
unless(utm_index.nil?)
|
28
54
|
return url.slice(0, utm_index)
|
29
55
|
end
|
@@ -50,15 +76,15 @@ module Raev
|
|
50
76
|
end
|
51
77
|
|
52
78
|
def without_http
|
53
|
-
@url.sub("http://", "")
|
79
|
+
@url.sub("http://".freeze, "".freeze)
|
54
80
|
end
|
55
81
|
|
56
82
|
def twitter
|
57
|
-
node = document.css('a:match_href("twitter.com")', Raev::Parser.new)
|
83
|
+
node = document.css('a:match_href("twitter.com")'.freeze, Raev::Parser.new)
|
58
84
|
|
59
85
|
if node.first
|
60
86
|
twitter_url = node.first["href"]
|
61
|
-
twitter_url.split('/').last
|
87
|
+
twitter_url.split('/'.freeze).last
|
62
88
|
else
|
63
89
|
nil
|
64
90
|
end
|
@@ -67,19 +93,19 @@ module Raev
|
|
67
93
|
def feed
|
68
94
|
feed_url = nil
|
69
95
|
|
70
|
-
node = document.css('link[type="application/rss+xml"][rel="alternate"]')
|
96
|
+
node = document.css('link[type="application/rss+xml"][rel="alternate"]'.freeze)
|
71
97
|
|
72
98
|
if node.first
|
73
99
|
feed_url = node.first["href"]
|
74
100
|
else
|
75
|
-
node = document.css('a:match_href("http://feeds.")', Raev::Parser.new)
|
101
|
+
node = document.css('a:match_href("http://feeds.")'.freeze, Raev::Parser.new)
|
76
102
|
|
77
103
|
if node.first
|
78
104
|
feed_url = node.first["href"]
|
79
105
|
end
|
80
106
|
end
|
81
107
|
|
82
|
-
if feed_url && feed_url[0,1] == "/"
|
108
|
+
if feed_url && feed_url[0,1] == "/".freeze
|
83
109
|
feed_url = @url + feed_url
|
84
110
|
end
|
85
111
|
|
@@ -93,7 +119,7 @@ module Raev
|
|
93
119
|
|
94
120
|
page_title = nil
|
95
121
|
|
96
|
-
node = document.css(".twitter-share-button")
|
122
|
+
node = document.css(".twitter-share-button".freeze)
|
97
123
|
|
98
124
|
if node.first
|
99
125
|
if node.first['data-text']
|
@@ -102,15 +128,15 @@ module Raev
|
|
102
128
|
end
|
103
129
|
|
104
130
|
if page_title.nil?
|
105
|
-
document.css("head meta").each do |meta|
|
106
|
-
if meta['property'] == 'og:title' || meta['property'] == 'twitter:title'
|
131
|
+
document.css("head meta".freeze).each do |meta|
|
132
|
+
if meta['property'] == 'og:title'.freeze || meta['property'] == 'twitter:title'.freeze
|
107
133
|
page_title = meta['content']
|
108
134
|
end
|
109
135
|
end
|
110
136
|
end
|
111
137
|
|
112
138
|
if page_title.nil?
|
113
|
-
node = document.css("#article h1, a[rel=\"bookmark\"], h2[itemprop=\"name\"]")
|
139
|
+
node = document.css("#article h1, a[rel=\"bookmark\"], h2[itemprop=\"name\"]".freeze)
|
114
140
|
|
115
141
|
if node.first
|
116
142
|
page_title = node.first.content
|
@@ -118,7 +144,7 @@ module Raev
|
|
118
144
|
end
|
119
145
|
|
120
146
|
unless page_title.nil?
|
121
|
-
page_title.gsub!(
|
147
|
+
page_title.gsub!(REGEX_PAGE_TITLE, ' '.freeze)
|
122
148
|
end
|
123
149
|
|
124
150
|
page_title
|
@@ -129,20 +155,20 @@ module Raev
|
|
129
155
|
return Date.parse(linked_data["datePublished"])
|
130
156
|
end
|
131
157
|
|
132
|
-
date_elements = @url.match(
|
158
|
+
date_elements = @url.match(REGEX_URL_DATE).to_s.split("/".freeze)
|
133
159
|
|
134
160
|
if date_elements.size == 3
|
135
161
|
return Date.new(date_elements[0].to_i, date_elements[1].to_i, date_elements[2].to_i)
|
136
162
|
else
|
137
|
-
node = document.search("meta[itemprop='datePublished'], meta[name='pub_date']").first
|
163
|
+
node = document.search("meta[itemprop='datePublished'], meta[name='pub_date']".freeze).first
|
138
164
|
|
139
165
|
if node
|
140
|
-
return Date.parse(node.attribute("content"))
|
166
|
+
return Date.parse(node.attribute("content".freeze))
|
141
167
|
else
|
142
|
-
node = document.search(".entryDate, .entrydate").first
|
168
|
+
node = document.search(".entryDate, .entrydate".freeze).first
|
143
169
|
|
144
170
|
if node
|
145
|
-
return Chronic.parse(node.content.gsub(
|
171
|
+
return Chronic.parse(node.content.gsub(REGEX_ENTRY_DATE, "".freeze).strip)
|
146
172
|
end
|
147
173
|
end
|
148
174
|
end
|
@@ -151,33 +177,13 @@ module Raev
|
|
151
177
|
end
|
152
178
|
|
153
179
|
def author
|
154
|
-
node = document.search('meta[name="author"]').first
|
180
|
+
node = document.search('meta[name="author"]'.freeze).first
|
155
181
|
|
156
|
-
if node && node.attribute("content")
|
157
|
-
return node.attribute("content").value
|
182
|
+
if node && node.attribute("content".freeze)
|
183
|
+
return node.attribute("content".freeze).value
|
158
184
|
end
|
159
185
|
|
160
|
-
|
161
|
-
'.author-info .name',
|
162
|
-
'.author-top a',
|
163
|
-
'.yt-user-info a',
|
164
|
-
'a[rel~="author"]',
|
165
|
-
'a[itemprop~="author"]',
|
166
|
-
'.author h3 a',
|
167
|
-
'.author',
|
168
|
-
'.posted-by a',
|
169
|
-
'.entryAuthor a',
|
170
|
-
'a.names',
|
171
|
-
'a.byline-author',
|
172
|
-
'.byline a',
|
173
|
-
'.author.vcard a',
|
174
|
-
'p.info a',
|
175
|
-
'.author-name',
|
176
|
-
'.upcased',
|
177
|
-
'a[rel~="nofollow"]'
|
178
|
-
]
|
179
|
-
|
180
|
-
node = document.search(cssSelectors.join(", ")).first
|
186
|
+
node = document.search(AUTHOR_CSS_SELECTORS.join(", ".freeze)).first
|
181
187
|
|
182
188
|
if node
|
183
189
|
words = node.content.split.size
|
@@ -187,15 +193,15 @@ module Raev
|
|
187
193
|
end
|
188
194
|
end
|
189
195
|
|
190
|
-
""
|
196
|
+
"".freeze
|
191
197
|
end
|
192
198
|
|
193
199
|
def ratingValue
|
194
|
-
node = document.search('*[itemprop="ratingValue"]').first
|
200
|
+
node = document.search('*[itemprop="ratingValue"]'.freeze).first
|
195
201
|
|
196
202
|
if node
|
197
|
-
if node.attribute("content")
|
198
|
-
value = node.attribute("content").value
|
203
|
+
if node.attribute("content".freeze)
|
204
|
+
value = node.attribute("content".freeze).value
|
199
205
|
else
|
200
206
|
value = node.content
|
201
207
|
end
|
@@ -209,11 +215,11 @@ module Raev
|
|
209
215
|
end
|
210
216
|
|
211
217
|
def bestRating
|
212
|
-
node = document.search('*[itemprop="bestRating"]').first
|
218
|
+
node = document.search('*[itemprop="bestRating"]'.freeze).first
|
213
219
|
|
214
220
|
if node
|
215
|
-
if node.attribute("content")
|
216
|
-
value = node.attribute("content").value
|
221
|
+
if node.attribute("content".freeze)
|
222
|
+
value = node.attribute("content".freeze).value
|
217
223
|
|
218
224
|
if value
|
219
225
|
return value.to_f
|
@@ -236,7 +242,7 @@ module Raev
|
|
236
242
|
|
237
243
|
def linked_data
|
238
244
|
if @linked_data.nil?
|
239
|
-
node = document.css("script[type=\"application/ld+json\"]")
|
245
|
+
node = document.css("script[type=\"application/ld+json\"]".freeze)
|
240
246
|
|
241
247
|
if node.first
|
242
248
|
@linked_data = JSON.parse(node.first.content)
|
data/raev.gemspec
CHANGED
@@ -2,18 +2,18 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: raev 0.2.
|
5
|
+
# stub: raev 0.2.4 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
|
-
s.name = "raev"
|
9
|
-
s.version = "0.2.
|
8
|
+
s.name = "raev".freeze
|
9
|
+
s.version = "0.2.4"
|
10
10
|
|
11
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
-
s.require_paths = ["lib"]
|
13
|
-
s.authors = ["Andreas Zecher"]
|
14
|
-
s.date = "
|
15
|
-
s.description = "Fetch, parse and normalize meta data from websites."
|
16
|
-
s.email = "andreas@madebypixelate.com"
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["Andreas Zecher".freeze]
|
14
|
+
s.date = "2017-09-18"
|
15
|
+
s.description = "Fetch, parse and normalize meta data from websites.".freeze
|
16
|
+
s.email = "andreas@madebypixelate.com".freeze
|
17
17
|
s.extra_rdoc_files = [
|
18
18
|
"LICENSE.txt",
|
19
19
|
"README.md"
|
@@ -37,45 +37,45 @@ Gem::Specification.new do |s|
|
|
37
37
|
"test/test_author.rb",
|
38
38
|
"test/test_url.rb"
|
39
39
|
]
|
40
|
-
s.homepage = "http://github.com/pixelate/raev"
|
41
|
-
s.licenses = ["MIT"]
|
42
|
-
s.rubygems_version = "2.
|
43
|
-
s.summary = "Fetch, parse and normalize meta data from websites."
|
40
|
+
s.homepage = "http://github.com/pixelate/raev".freeze
|
41
|
+
s.licenses = ["MIT".freeze]
|
42
|
+
s.rubygems_version = "2.6.13".freeze
|
43
|
+
s.summary = "Fetch, parse and normalize meta data from websites.".freeze
|
44
44
|
|
45
45
|
if s.respond_to? :specification_version then
|
46
46
|
s.specification_version = 4
|
47
47
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
-
s.add_runtime_dependency(%q<json
|
50
|
-
s.add_runtime_dependency(%q<nokogiri
|
51
|
-
s.add_runtime_dependency(%q<redirect_follower
|
52
|
-
s.add_runtime_dependency(%q<sanitize
|
53
|
-
s.add_runtime_dependency(%q<chronic
|
54
|
-
s.add_development_dependency(%q<shoulda
|
55
|
-
s.add_development_dependency(%q<bundler
|
56
|
-
s.add_development_dependency(%q<jeweler
|
57
|
-
s.add_development_dependency(%q<test-unit
|
49
|
+
s.add_runtime_dependency(%q<json>.freeze, [">= 2.1.0"])
|
50
|
+
s.add_runtime_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
51
|
+
s.add_runtime_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
52
|
+
s.add_runtime_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
53
|
+
s.add_runtime_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
54
|
+
s.add_development_dependency(%q<shoulda>.freeze, [">= 0"])
|
55
|
+
s.add_development_dependency(%q<bundler>.freeze, ["~> 1.14.6"])
|
56
|
+
s.add_development_dependency(%q<jeweler>.freeze, ["= 2.3.7"])
|
57
|
+
s.add_development_dependency(%q<test-unit>.freeze, ["= 3.2.4"])
|
58
58
|
else
|
59
|
-
s.add_dependency(%q<json
|
60
|
-
s.add_dependency(%q<nokogiri
|
61
|
-
s.add_dependency(%q<redirect_follower
|
62
|
-
s.add_dependency(%q<sanitize
|
63
|
-
s.add_dependency(%q<chronic
|
64
|
-
s.add_dependency(%q<shoulda
|
65
|
-
s.add_dependency(%q<bundler
|
66
|
-
s.add_dependency(%q<jeweler
|
67
|
-
s.add_dependency(%q<test-unit
|
59
|
+
s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
|
60
|
+
s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
61
|
+
s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
62
|
+
s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
63
|
+
s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
64
|
+
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
65
|
+
s.add_dependency(%q<bundler>.freeze, ["~> 1.14.6"])
|
66
|
+
s.add_dependency(%q<jeweler>.freeze, ["= 2.3.7"])
|
67
|
+
s.add_dependency(%q<test-unit>.freeze, ["= 3.2.4"])
|
68
68
|
end
|
69
69
|
else
|
70
|
-
s.add_dependency(%q<json
|
71
|
-
s.add_dependency(%q<nokogiri
|
72
|
-
s.add_dependency(%q<redirect_follower
|
73
|
-
s.add_dependency(%q<sanitize
|
74
|
-
s.add_dependency(%q<chronic
|
75
|
-
s.add_dependency(%q<shoulda
|
76
|
-
s.add_dependency(%q<bundler
|
77
|
-
s.add_dependency(%q<jeweler
|
78
|
-
s.add_dependency(%q<test-unit
|
70
|
+
s.add_dependency(%q<json>.freeze, [">= 2.1.0"])
|
71
|
+
s.add_dependency(%q<nokogiri>.freeze, [">= 1.8.0"])
|
72
|
+
s.add_dependency(%q<redirect_follower>.freeze, [">= 0.1.1"])
|
73
|
+
s.add_dependency(%q<sanitize>.freeze, [">= 2.1.0"])
|
74
|
+
s.add_dependency(%q<chronic>.freeze, [">= 0.10.2"])
|
75
|
+
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
76
|
+
s.add_dependency(%q<bundler>.freeze, ["~> 1.14.6"])
|
77
|
+
s.add_dependency(%q<jeweler>.freeze, ["= 2.3.7"])
|
78
|
+
s.add_dependency(%q<test-unit>.freeze, ["= 3.2.4"])
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
data/test/test_url.rb
CHANGED
@@ -22,17 +22,17 @@ class UrlTest < Test::Unit::TestCase
|
|
22
22
|
|
23
23
|
should "resolve url" do
|
24
24
|
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
25
|
-
assert_equal "
|
25
|
+
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+fingergaming+%28FingerGaming%29", url.resolved
|
26
26
|
end
|
27
27
|
|
28
28
|
should "resolve and clean url" do
|
29
29
|
url = Raev.url("http://feedproxy.google.com/~r/fingergaming/~3/nBkNwBLq-U8/")
|
30
|
-
assert_equal "
|
30
|
+
assert_equal "https://www.gamasutra.com/topic/smartphone-tablet/fg/2011/01/21/zynga-acquires-drop7-developer-areacode/", url.resolved_and_clean
|
31
31
|
end
|
32
32
|
|
33
33
|
should "get twitter handle" do
|
34
|
-
url = Raev.url("
|
35
|
-
assert_equal
|
34
|
+
url = Raev.url("https://www.polygon.com")
|
35
|
+
assert_equal "Polygon", url.twitter
|
36
36
|
end
|
37
37
|
|
38
38
|
should "get rss feed" do
|
@@ -43,14 +43,14 @@ class UrlTest < Test::Unit::TestCase
|
|
43
43
|
assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
|
44
44
|
|
45
45
|
url = Raev.url("http://www.kotaku.com")
|
46
|
-
assert_equal "
|
46
|
+
assert_equal "https://kotaku.com/rss", url.feed
|
47
47
|
end
|
48
48
|
|
49
49
|
should "get headline from url" do
|
50
|
-
url = Raev.url("
|
50
|
+
url = Raev.url("https://www.polygon.com/e3-2013/2013/6/14/4429126/the-indie-eight-ps4")
|
51
51
|
assert_equal "The Indie Eight: Polygon talks with the showcase indies launching on PS4", url.headline
|
52
52
|
|
53
|
-
url = Raev.url("
|
53
|
+
url = Raev.url("https://kotaku.com/the-world-of-a-link-to-the-past-has-changed-in-the-new-513424187")
|
54
54
|
assert_equal "The World of A Link To The Past Has Changed in the New 3DS Zelda", url.headline
|
55
55
|
|
56
56
|
url = Raev.url("http://arstechnica.com/gaming/2012/03/journey-a-hauntingly-beautiful-art-house-film-disguised-as-a-game/")
|
@@ -65,7 +65,7 @@ class UrlTest < Test::Unit::TestCase
|
|
65
65
|
url = Raev.url("http://indiegames.com/2013/06/indie_fund_backing_for_two_new.html")
|
66
66
|
assert_equal "Indie Fund backing two new titles for Double Fine", url.headline
|
67
67
|
|
68
|
-
url = Raev.url("https://
|
68
|
+
url = Raev.url("https://killscreen.com/articles/news/cheat-sheet-614/")
|
69
69
|
assert_equal "Pixels on canvas, Spielberg's predictions, and Polytron's glorious tease - Kill Screen", url.headline
|
70
70
|
|
71
71
|
url = Raev.url("https://www.rockpapershotgun.com/2013/06/05/i-spy-an-open-beta-for-spy-party/")
|
@@ -73,21 +73,18 @@ class UrlTest < Test::Unit::TestCase
|
|
73
73
|
end
|
74
74
|
|
75
75
|
should "get pubdate from url" do
|
76
|
-
url = Raev.url("
|
76
|
+
url = Raev.url("https://www.polygon.com/2015/5/18/8620223/witcher-3-guide-witcher-2-witcher")
|
77
77
|
assert_equal_date Date.new(2015, 5, 18), url.pubdate
|
78
78
|
|
79
|
-
url = Raev.url("
|
79
|
+
url = Raev.url("https://kotaku.com/this-week-destiny-got-a-hell-of-a-lot-better-1706391634")
|
80
80
|
assert_equal_date Date.new(2015, 5, 23), url.pubdate
|
81
81
|
|
82
82
|
url = Raev.url("https://www.rockpapershotgun.com/2014/07/03/beauty-beheld-future-unfolding/")
|
83
83
|
assert_equal_date Date.new(2014, 7, 3), url.pubdate
|
84
84
|
|
85
|
-
url = Raev.url("
|
85
|
+
url = Raev.url("https://jayisgames.com/review/the-black-forest-finding-friends.php")
|
86
86
|
assert_equal_date Date.new(2009, 12, 9), url.pubdate
|
87
87
|
|
88
|
-
url = Raev.url("http://boingboing.net/2007/03/21/understanding-games.html")
|
89
|
-
assert_equal_date Date.new(2007, 3, 21), url.pubdate
|
90
|
-
|
91
88
|
url = Raev.url("http://www.wired.com/2014/09/upcoming-a-gorgeous-adventure-game-that-mutates-for-each-player/")
|
92
89
|
assert_equal_date Date.new(2014, 9, 4), url.pubdate
|
93
90
|
|
@@ -102,17 +99,17 @@ class UrlTest < Test::Unit::TestCase
|
|
102
99
|
url = Raev.url("http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers")
|
103
100
|
assert_equal "Colin Campbell", url.author
|
104
101
|
|
105
|
-
url = Raev.url("
|
102
|
+
url = Raev.url("https://kotaku.com/worth-reading-some-kickstarters-are-lying-about-game-b-1706340013")
|
106
103
|
assert_equal "Patrick Klepek", url.author
|
107
104
|
|
108
|
-
url = Raev.url("https://
|
105
|
+
url = Raev.url("https://killscreen.com/articles/future-unfolding-wonder/")
|
109
106
|
assert_equal "Jess Joho", url.author
|
110
107
|
|
111
108
|
url = Raev.url("http://www.creativeapplications.net/games/future-unfolding-procedurally-generated-world-both-beautiful-and-dangerous/")
|
112
109
|
assert_equal "Filip Visnjic", Raev::normalize_author(url.author)
|
113
110
|
|
114
111
|
url = Raev.url("https://www.youtube.com/watch?v=FmZYPMsq5m4")
|
115
|
-
assert_equal "
|
112
|
+
assert_equal "PsiSyn", url.author
|
116
113
|
end
|
117
114
|
|
118
115
|
should "get score" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raev
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andreas Zecher
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 2.1.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 2.1.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
33
|
+
version: 1.8.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.
|
40
|
+
version: 1.8.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: redirect_follower
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
75
|
+
version: 0.10.2
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
82
|
+
version: 0.10.2
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: shoulda
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,42 +100,42 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.
|
103
|
+
version: 1.14.6
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.
|
110
|
+
version: 1.14.6
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: jeweler
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - '='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 2.
|
117
|
+
version: 2.3.7
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - '='
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 2.
|
124
|
+
version: 2.3.7
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: test-unit
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - '='
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 2.
|
131
|
+
version: 3.2.4
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - '='
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 2.
|
138
|
+
version: 3.2.4
|
139
139
|
description: Fetch, parse and normalize meta data from websites.
|
140
140
|
email: andreas@madebypixelate.com
|
141
141
|
executables: []
|
@@ -181,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
181
|
version: '0'
|
182
182
|
requirements: []
|
183
183
|
rubyforge_project:
|
184
|
-
rubygems_version: 2.
|
184
|
+
rubygems_version: 2.6.13
|
185
185
|
signing_key:
|
186
186
|
specification_version: 4
|
187
187
|
summary: Fetch, parse and normalize meta data from websites.
|