raev 0.1.12 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 55c78f5f5edfe18398bc184a0a8268978182d09f
4
- data.tar.gz: 550566eba663b2f9cc5930f98be4f95a9c023ddd
3
+ metadata.gz: 64988202ffa1289c5d9e0f6fe59767e1bf6c2b2d
4
+ data.tar.gz: 1fc08d2929d84fbb849a81c6bd4a843eb952b644
5
5
  SHA512:
6
- metadata.gz: c83322a8f28c89ad3e28fa7739b244a17c711c68cca5e5d2060f7403ad44b7323eece461c69659f345916d5908c401a7e6a898ce73f1dd9f855a4344334bfa3f
7
- data.tar.gz: 529f05c783b388f391a22864efb10968d879163bbb8b41bf02f2d61decd83eb07780eb6a4aaccf7298b1729006c1ac3c69b9f50a024436b36d98ce95238a9fab
6
+ metadata.gz: 42cf3c6f84cc0061b7b5b51e43796546c9e03d1a5064d20b187783f065a354a63b90cbe8ae91254436a915d75c0459dae6eabfb6a9ffbb95e6197add643245fa
7
+ data.tar.gz: f97e2730ca39ad035a0891701cdfc65cf7b1208cd313299a26146079ecaf38d5f4b9f69a36e23017be70f58153452cdbb4391b44c1b4dfcedfeb193ca15c0874
data/Gemfile CHANGED
@@ -1,11 +1,14 @@
1
1
  source "http://rubygems.org"
2
2
 
3
+ gem "json", '>= 1.8.1'
3
4
  gem "nokogiri", ">= 1.6.1"
4
5
  gem "redirect_follower", ">= 0.1.1"
6
+ gem "sanitize", ">= 2.1.0"
7
+ gem "chronic", ">=0.9.1"
5
8
 
6
9
  group :development do
7
10
  gem "shoulda", ">= 0"
8
- gem "bundler", "~> 1.6.3"
11
+ gem "bundler", "~> 1.7.2"
9
12
  gem "jeweler", "2.0.1"
10
13
  gem "test-unit", "~> 2.5.4"
11
14
  end
data/Gemfile.lock CHANGED
@@ -9,6 +9,8 @@ GEM
9
9
  tzinfo (~> 1.1)
10
10
  addressable (2.3.6)
11
11
  builder (3.2.2)
12
+ chronic (0.10.2)
13
+ crass (1.0.2)
12
14
  descendants_tracker (0.0.4)
13
15
  thread_safe (~> 0.3, >= 0.3.1)
14
16
  faraday (0.9.0)
@@ -43,6 +45,8 @@ GEM
43
45
  multipart-post (2.0.0)
44
46
  nokogiri (1.6.4.1)
45
47
  mini_portile (~> 0.6.0)
48
+ nokogumbo (1.4.1)
49
+ nokogiri
46
50
  oauth2 (1.0.0)
47
51
  faraday (>= 0.8, < 0.10)
48
52
  jwt (~> 1.0)
@@ -54,6 +58,10 @@ GEM
54
58
  rdoc (4.1.2)
55
59
  json (~> 1.4)
56
60
  redirect_follower (0.1.1)
61
+ sanitize (4.0.0)
62
+ crass (~> 1.0.2)
63
+ nokogiri (>= 1.4.4)
64
+ nokogumbo (= 1.4.1)
57
65
  shoulda (3.5.0)
58
66
  shoulda-context (~> 1.0, >= 1.0.1)
59
67
  shoulda-matchers (>= 1.4.1, < 3.0)
@@ -69,9 +77,12 @@ PLATFORMS
69
77
  ruby
70
78
 
71
79
  DEPENDENCIES
72
- bundler (~> 1.6.3)
80
+ bundler (~> 1.7.2)
81
+ chronic (>= 0.9.1)
73
82
  jeweler (= 2.0.1)
83
+ json (>= 1.8.1)
74
84
  nokogiri (>= 1.6.1)
75
85
  redirect_follower (>= 0.1.1)
86
+ sanitize (>= 2.1.0)
76
87
  shoulda
77
88
  test-unit (~> 2.5.4)
data/Rakefile CHANGED
@@ -21,10 +21,6 @@ Jeweler::Tasks.new do |gem|
21
21
  gem.description = "Fetch, parse and normalize meta data from websites."
22
22
  gem.email = "andreas@madebypixelate.com"
23
23
  gem.authors = ["Andreas Zecher"]
24
-
25
- # dependencies defined in Gemfile
26
- gem.add_dependency 'nokogiri', '>= 1.4.4'
27
- gem.add_dependency 'redirect_follower', '>= 0.1.1'
28
24
  end
29
25
  Jeweler::RubygemsDotOrgTasks.new
30
26
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.12
1
+ 0.2.1
data/lib/raev/url.rb CHANGED
@@ -1,13 +1,18 @@
1
+ require "chronic"
2
+ require "json"
3
+ require "sanitize"
4
+
1
5
  module Raev
2
6
 
3
7
  class Url
4
-
5
8
  attr_reader :url
6
9
  attr_reader :doc
10
+ attr_reader :linked_data
7
11
 
8
12
  def initialize(url)
9
- @doc = nil
10
13
  @url = url
14
+ @doc = nil
15
+ @linked_data = nil
11
16
  end
12
17
 
13
18
  def base
@@ -82,6 +87,10 @@ module Raev
82
87
  end
83
88
 
84
89
  def headline
90
+ if linked_data && linked_data["headline"]
91
+ return Sanitize.clean(linked_data["headline"])
92
+ end
93
+
85
94
  page_title = nil
86
95
 
87
96
  node = document.css(".twitter-share-button")
@@ -114,15 +123,88 @@ module Raev
114
123
 
115
124
  page_title
116
125
  end
126
+
127
+ def pubdate
128
+ if linked_data && linked_data["datePublished"]
129
+ return Date.parse(linked_data["datePublished"])
130
+ end
131
+
132
+ date_elements = @url.match(/[0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2}/).to_s.split("/")
133
+
134
+ if date_elements.size == 3
135
+ return Date.new(date_elements[0].to_i, date_elements[1].to_i, date_elements[2].to_i)
136
+ else
137
+ node = document.search("meta[itemprop='datePublished'], meta[name='pub_date']").first
138
+
139
+ if node
140
+ return Date.parse(node.attribute("content"))
141
+ else
142
+ node = document.search(".entryDate, .entrydate").first
143
+
144
+ if node
145
+ return Chronic.parse(node.content.gsub(/[^a-zA-Z0-9\s]/,"").strip)
146
+ end
147
+ end
148
+ end
149
+
150
+ nil
151
+ end
117
152
 
153
+ def author
154
+ cssSelectors = [
155
+ '.author-info .name',
156
+ '.author-top a',
157
+ '.yt-user-name',
158
+ 'a[rel~="author"]',
159
+ 'a[itemprop~="author"]',
160
+ '.author h3 a',
161
+ '.author',
162
+ '.posted-by a',
163
+ '.entryAuthor a',
164
+ 'a.names',
165
+ 'a.byline-author',
166
+ '.byline a',
167
+ '.author.vcard a',
168
+ 'p.info a',
169
+ '.author-name',
170
+ '.upcased',
171
+ 'a[rel~="nofollow"]'
172
+ ]
173
+
174
+ node = document.search(cssSelectors.join(", ")).first
175
+
176
+ if node
177
+ words = node.content.split.size
178
+
179
+ if words <= 4
180
+ return Sanitize.clean(node.content).strip[0..255]
181
+ end
182
+ end
183
+
184
+ ""
185
+ end
186
+
118
187
  private
119
188
 
120
189
  def document
121
190
  if @doc.nil?
122
191
  @doc = Nokogiri::HTML(open(@url))
123
- else
124
- @doc
125
192
  end
193
+
194
+ @doc
195
+ end
196
+
197
+ def linked_data
198
+ if @linked_data.nil?
199
+ node = document.css("script[type=\"application/ld+json\"]")
200
+
201
+ if node.first
202
+ @linked_data = JSON.parse(node.first.content)
203
+ end
204
+ end
205
+
206
+ @linked_data
126
207
  end
208
+
127
209
  end
128
210
  end
data/raev.gemspec CHANGED
@@ -2,14 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: raev 0.2.1 ruby lib
5
6
 
6
7
  Gem::Specification.new do |s|
7
8
  s.name = "raev"
8
- s.version = "0.1.12"
9
+ s.version = "0.2.1"
9
10
 
10
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
11
13
  s.authors = ["Andreas Zecher"]
12
- s.date = "2014-11-26"
14
+ s.date = "2015-08-20"
13
15
  s.description = "Fetch, parse and normalize meta data from websites."
14
16
  s.email = "andreas@madebypixelate.com"
15
17
  s.extra_rdoc_files = [
@@ -37,41 +39,43 @@ Gem::Specification.new do |s|
37
39
  ]
38
40
  s.homepage = "http://github.com/pixelate/raev"
39
41
  s.licenses = ["MIT"]
40
- s.require_paths = ["lib"]
41
- s.rubygems_version = "2.0.3"
42
+ s.rubygems_version = "2.4.5"
42
43
  s.summary = "Fetch, parse and normalize meta data from websites."
43
44
 
44
45
  if s.respond_to? :specification_version then
45
46
  s.specification_version = 4
46
47
 
47
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_runtime_dependency(%q<json>, [">= 1.8.1"])
48
50
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.6.1"])
49
51
  s.add_runtime_dependency(%q<redirect_follower>, [">= 0.1.1"])
52
+ s.add_runtime_dependency(%q<sanitize>, [">= 2.1.0"])
53
+ s.add_runtime_dependency(%q<chronic>, [">= 0.9.1"])
50
54
  s.add_development_dependency(%q<shoulda>, [">= 0"])
51
- s.add_development_dependency(%q<bundler>, ["~> 1.6.3"])
55
+ s.add_development_dependency(%q<bundler>, ["~> 1.7.2"])
52
56
  s.add_development_dependency(%q<jeweler>, ["= 2.0.1"])
53
57
  s.add_development_dependency(%q<test-unit>, ["~> 2.5.4"])
54
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
55
- s.add_runtime_dependency(%q<redirect_follower>, [">= 0.1.1"])
56
58
  else
59
+ s.add_dependency(%q<json>, [">= 1.8.1"])
57
60
  s.add_dependency(%q<nokogiri>, [">= 1.6.1"])
58
61
  s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
62
+ s.add_dependency(%q<sanitize>, [">= 2.1.0"])
63
+ s.add_dependency(%q<chronic>, [">= 0.9.1"])
59
64
  s.add_dependency(%q<shoulda>, [">= 0"])
60
- s.add_dependency(%q<bundler>, ["~> 1.6.3"])
65
+ s.add_dependency(%q<bundler>, ["~> 1.7.2"])
61
66
  s.add_dependency(%q<jeweler>, ["= 2.0.1"])
62
67
  s.add_dependency(%q<test-unit>, ["~> 2.5.4"])
63
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
64
- s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
65
68
  end
66
69
  else
70
+ s.add_dependency(%q<json>, [">= 1.8.1"])
67
71
  s.add_dependency(%q<nokogiri>, [">= 1.6.1"])
68
72
  s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
73
+ s.add_dependency(%q<sanitize>, [">= 2.1.0"])
74
+ s.add_dependency(%q<chronic>, [">= 0.9.1"])
69
75
  s.add_dependency(%q<shoulda>, [">= 0"])
70
- s.add_dependency(%q<bundler>, ["~> 1.6.3"])
76
+ s.add_dependency(%q<bundler>, ["~> 1.7.2"])
71
77
  s.add_dependency(%q<jeweler>, ["= 2.0.1"])
72
78
  s.add_dependency(%q<test-unit>, ["~> 2.5.4"])
73
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
74
- s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
75
79
  end
76
80
  end
77
81
 
data/test/test_url.rb CHANGED
@@ -42,8 +42,8 @@ class UrlTest < Test::Unit::TestCase
42
42
  url = Raev.url("http://arstechnica.com")
43
43
  assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
44
44
 
45
- url = Raev.url("http://www.edge-online.com")
46
- assert_equal "http://www.edge-online.com/feed/", url.feed
45
+ url = Raev.url("http://www.kotaku.com")
46
+ assert_equal "http://feeds.gawker.com/kotaku/full", url.feed
47
47
  end
48
48
 
49
49
  should "get headline from url" do
@@ -59,9 +59,6 @@ class UrlTest < Test::Unit::TestCase
59
59
  url = Raev.url("http://www.creativeapplications.net/games/below-new-from-the-creators-of-sword-sworcery/")
60
60
  assert_equal "Below – New from the creators of Sword & Sworcery", url.headline
61
61
 
62
- url = Raev.url("http://www.edge-online.com/news/e3-2013-watch-the-first-hohokum-gameplay-footage-here/")
63
- assert_equal "E3 2013: watch the first Hohokum gameplay footage here", url.headline
64
-
65
62
  url = Raev.url("http://www.giantbomb.com/videos/e3-2013-fez-ii-announcement-teaser/2300-7606/")
66
63
  assert_equal "E3 2013: Fez II Announcement Teaser", url.headline
67
64
 
@@ -74,4 +71,51 @@ class UrlTest < Test::Unit::TestCase
74
71
  url = Raev.url("http://www.rockpapershotgun.com/2013/06/05/i-spy-an-open-beta-for-spy-party/")
75
72
  assert_equal "I Spy An Open Beta For Spy Party", url.headline
76
73
  end
74
+
75
+ should "get pubdate from url" do
76
+ url = Raev.url("http://www.polygon.com/2015/5/18/8620223/witcher-3-guide-witcher-2-witcher")
77
+ assert_equal_date Date.new(2015, 5, 18), url.pubdate
78
+
79
+ url = Raev.url("http://kotaku.com/this-week-destiny-got-a-hell-of-a-lot-better-1706391634")
80
+ assert_equal_date Date.new(2015, 5, 23), url.pubdate
81
+
82
+ url = Raev.url("http://www.rockpapershotgun.com/2014/07/03/beauty-beheld-future-unfolding/")
83
+ assert_equal_date Date.new(2014, 7, 3), url.pubdate
84
+
85
+ url = Raev.url("http://jayisgames.com/review/the-black-forest-finding-friends.php")
86
+ assert_equal_date Date.new(2009, 12, 9), url.pubdate
87
+
88
+ url = Raev.url("http://boingboing.net/2007/03/21/understanding-games.html")
89
+ assert_equal_date Date.new(2007, 3, 21), url.pubdate
90
+
91
+ url = Raev.url("http://www.wired.com/2014/09/upcoming-a-gorgeous-adventure-game-that-mutates-for-each-player/")
92
+ assert_equal_date Date.new(2014, 9, 4), url.pubdate
93
+
94
+ url = Raev.url("http://www.pcgamer.com/harebrained-schemes-hints-at-something-new-from-an-old-franchise/")
95
+ assert_equal_date Date.new(2015, 5, 21), url.pubdate
96
+ end
97
+
98
+ should "get author from url" do
99
+ url = Raev.url("http://www.rockpapershotgun.com/2014/07/03/beauty-beheld-future-unfolding/")
100
+ assert_equal "Adam Smith", url.author
101
+
102
+ url = Raev.url("http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers")
103
+ assert_equal "Colin Campbell", url.author
104
+
105
+ url = Raev.url("http://kotaku.com/worth-reading-some-kickstarters-are-lying-about-game-b-1706340013")
106
+ assert_equal "Patrick Klepek", url.author
107
+
108
+ url = Raev.url("http://killscreendaily.com/articles/future-unfolding-wonder/")
109
+ assert_equal "Jess Joho", url.author
110
+
111
+ url = Raev.url("http://www.creativeapplications.net/games/future-unfolding-procedurally-generated-world-both-beautiful-and-dangerous/")
112
+ assert_equal "Filip Visnjic", url.author
113
+ end
114
+
115
+ private
116
+
117
+ def assert_equal_date dateA, dateB
118
+ assert_equal dateA.strftime('%Y-%m-%d'), dateB.strftime('%Y-%m-%d')
119
+ end
120
+
77
121
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: raev
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andreas Zecher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-26 00:00:00.000000000 Z
11
+ date: 2015-08-20 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.8.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.8.1
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: nokogiri
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,89 +53,89 @@ dependencies:
39
53
  - !ruby/object:Gem::Version
40
54
  version: 0.1.1
41
55
  - !ruby/object:Gem::Dependency
42
- name: shoulda
56
+ name: sanitize
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - '>='
46
60
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
61
+ version: 2.1.0
62
+ type: :runtime
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - '>='
53
67
  - !ruby/object:Gem::Version
54
- version: '0'
68
+ version: 2.1.0
55
69
  - !ruby/object:Gem::Dependency
56
- name: bundler
70
+ name: chronic
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
- - - ~>
73
+ - - '>='
60
74
  - !ruby/object:Gem::Version
61
- version: 1.6.3
62
- type: :development
75
+ version: 0.9.1
76
+ type: :runtime
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
- - - ~>
80
+ - - '>='
67
81
  - !ruby/object:Gem::Version
68
- version: 1.6.3
82
+ version: 0.9.1
69
83
  - !ruby/object:Gem::Dependency
70
- name: jeweler
84
+ name: shoulda
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - '='
87
+ - - '>='
74
88
  - !ruby/object:Gem::Version
75
- version: 2.0.1
89
+ version: '0'
76
90
  type: :development
77
91
  prerelease: false
78
92
  version_requirements: !ruby/object:Gem::Requirement
79
93
  requirements:
80
- - - '='
94
+ - - '>='
81
95
  - !ruby/object:Gem::Version
82
- version: 2.0.1
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
- name: test-unit
98
+ name: bundler
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ~>
88
102
  - !ruby/object:Gem::Version
89
- version: 2.5.4
103
+ version: 1.7.2
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - ~>
95
109
  - !ruby/object:Gem::Version
96
- version: 2.5.4
110
+ version: 1.7.2
97
111
  - !ruby/object:Gem::Dependency
98
- name: nokogiri
112
+ name: jeweler
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - '>='
115
+ - - '='
102
116
  - !ruby/object:Gem::Version
103
- version: 1.4.4
104
- type: :runtime
117
+ version: 2.0.1
118
+ type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
- - - '>='
122
+ - - '='
109
123
  - !ruby/object:Gem::Version
110
- version: 1.4.4
124
+ version: 2.0.1
111
125
  - !ruby/object:Gem::Dependency
112
- name: redirect_follower
126
+ name: test-unit
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
- - - '>='
129
+ - - ~>
116
130
  - !ruby/object:Gem::Version
117
- version: 0.1.1
118
- type: :runtime
131
+ version: 2.5.4
132
+ type: :development
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
- - - '>='
136
+ - - ~>
123
137
  - !ruby/object:Gem::Version
124
- version: 0.1.1
138
+ version: 2.5.4
125
139
  description: Fetch, parse and normalize meta data from websites.
126
140
  email: andreas@madebypixelate.com
127
141
  executables: []
@@ -167,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
181
  version: '0'
168
182
  requirements: []
169
183
  rubyforge_project:
170
- rubygems_version: 2.0.3
184
+ rubygems_version: 2.4.5
171
185
  signing_key:
172
186
  specification_version: 4
173
187
  summary: Fetch, parse and normalize meta data from websites.