raev 0.1.12 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 55c78f5f5edfe18398bc184a0a8268978182d09f
4
- data.tar.gz: 550566eba663b2f9cc5930f98be4f95a9c023ddd
3
+ metadata.gz: 64988202ffa1289c5d9e0f6fe59767e1bf6c2b2d
4
+ data.tar.gz: 1fc08d2929d84fbb849a81c6bd4a843eb952b644
5
5
  SHA512:
6
- metadata.gz: c83322a8f28c89ad3e28fa7739b244a17c711c68cca5e5d2060f7403ad44b7323eece461c69659f345916d5908c401a7e6a898ce73f1dd9f855a4344334bfa3f
7
- data.tar.gz: 529f05c783b388f391a22864efb10968d879163bbb8b41bf02f2d61decd83eb07780eb6a4aaccf7298b1729006c1ac3c69b9f50a024436b36d98ce95238a9fab
6
+ metadata.gz: 42cf3c6f84cc0061b7b5b51e43796546c9e03d1a5064d20b187783f065a354a63b90cbe8ae91254436a915d75c0459dae6eabfb6a9ffbb95e6197add643245fa
7
+ data.tar.gz: f97e2730ca39ad035a0891701cdfc65cf7b1208cd313299a26146079ecaf38d5f4b9f69a36e23017be70f58153452cdbb4391b44c1b4dfcedfeb193ca15c0874
data/Gemfile CHANGED
@@ -1,11 +1,14 @@
1
1
  source "http://rubygems.org"
2
2
 
3
+ gem "json", '>= 1.8.1'
3
4
  gem "nokogiri", ">= 1.6.1"
4
5
  gem "redirect_follower", ">= 0.1.1"
6
+ gem "sanitize", ">= 2.1.0"
7
+ gem "chronic", ">=0.9.1"
5
8
 
6
9
  group :development do
7
10
  gem "shoulda", ">= 0"
8
- gem "bundler", "~> 1.6.3"
11
+ gem "bundler", "~> 1.7.2"
9
12
  gem "jeweler", "2.0.1"
10
13
  gem "test-unit", "~> 2.5.4"
11
14
  end
data/Gemfile.lock CHANGED
@@ -9,6 +9,8 @@ GEM
9
9
  tzinfo (~> 1.1)
10
10
  addressable (2.3.6)
11
11
  builder (3.2.2)
12
+ chronic (0.10.2)
13
+ crass (1.0.2)
12
14
  descendants_tracker (0.0.4)
13
15
  thread_safe (~> 0.3, >= 0.3.1)
14
16
  faraday (0.9.0)
@@ -43,6 +45,8 @@ GEM
43
45
  multipart-post (2.0.0)
44
46
  nokogiri (1.6.4.1)
45
47
  mini_portile (~> 0.6.0)
48
+ nokogumbo (1.4.1)
49
+ nokogiri
46
50
  oauth2 (1.0.0)
47
51
  faraday (>= 0.8, < 0.10)
48
52
  jwt (~> 1.0)
@@ -54,6 +58,10 @@ GEM
54
58
  rdoc (4.1.2)
55
59
  json (~> 1.4)
56
60
  redirect_follower (0.1.1)
61
+ sanitize (4.0.0)
62
+ crass (~> 1.0.2)
63
+ nokogiri (>= 1.4.4)
64
+ nokogumbo (= 1.4.1)
57
65
  shoulda (3.5.0)
58
66
  shoulda-context (~> 1.0, >= 1.0.1)
59
67
  shoulda-matchers (>= 1.4.1, < 3.0)
@@ -69,9 +77,12 @@ PLATFORMS
69
77
  ruby
70
78
 
71
79
  DEPENDENCIES
72
- bundler (~> 1.6.3)
80
+ bundler (~> 1.7.2)
81
+ chronic (>= 0.9.1)
73
82
  jeweler (= 2.0.1)
83
+ json (>= 1.8.1)
74
84
  nokogiri (>= 1.6.1)
75
85
  redirect_follower (>= 0.1.1)
86
+ sanitize (>= 2.1.0)
76
87
  shoulda
77
88
  test-unit (~> 2.5.4)
data/Rakefile CHANGED
@@ -21,10 +21,6 @@ Jeweler::Tasks.new do |gem|
21
21
  gem.description = "Fetch, parse and normalize meta data from websites."
22
22
  gem.email = "andreas@madebypixelate.com"
23
23
  gem.authors = ["Andreas Zecher"]
24
-
25
- # dependencies defined in Gemfile
26
- gem.add_dependency 'nokogiri', '>= 1.4.4'
27
- gem.add_dependency 'redirect_follower', '>= 0.1.1'
28
24
  end
29
25
  Jeweler::RubygemsDotOrgTasks.new
30
26
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.12
1
+ 0.2.1
data/lib/raev/url.rb CHANGED
@@ -1,13 +1,18 @@
1
+ require "chronic"
2
+ require "json"
3
+ require "sanitize"
4
+
1
5
  module Raev
2
6
 
3
7
  class Url
4
-
5
8
  attr_reader :url
6
9
  attr_reader :doc
10
+ attr_reader :linked_data
7
11
 
8
12
  def initialize(url)
9
- @doc = nil
10
13
  @url = url
14
+ @doc = nil
15
+ @linked_data = nil
11
16
  end
12
17
 
13
18
  def base
@@ -82,6 +87,10 @@ module Raev
82
87
  end
83
88
 
84
89
  def headline
90
+ if linked_data && linked_data["headline"]
91
+ return Sanitize.clean(linked_data["headline"])
92
+ end
93
+
85
94
  page_title = nil
86
95
 
87
96
  node = document.css(".twitter-share-button")
@@ -114,15 +123,88 @@ module Raev
114
123
 
115
124
  page_title
116
125
  end
126
+
127
+ def pubdate
128
+ if linked_data && linked_data["datePublished"]
129
+ return Date.parse(linked_data["datePublished"])
130
+ end
131
+
132
+ date_elements = @url.match(/[0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2}/).to_s.split("/")
133
+
134
+ if date_elements.size == 3
135
+ return Date.new(date_elements[0].to_i, date_elements[1].to_i, date_elements[2].to_i)
136
+ else
137
+ node = document.search("meta[itemprop='datePublished'], meta[name='pub_date']").first
138
+
139
+ if node
140
+ return Date.parse(node.attribute("content"))
141
+ else
142
+ node = document.search(".entryDate, .entrydate").first
143
+
144
+ if node
145
+ return Chronic.parse(node.content.gsub(/[^a-zA-Z0-9\s]/,"").strip)
146
+ end
147
+ end
148
+ end
149
+
150
+ nil
151
+ end
117
152
 
153
+ def author
154
+ cssSelectors = [
155
+ '.author-info .name',
156
+ '.author-top a',
157
+ '.yt-user-name',
158
+ 'a[rel~="author"]',
159
+ 'a[itemprop~="author"]',
160
+ '.author h3 a',
161
+ '.author',
162
+ '.posted-by a',
163
+ '.entryAuthor a',
164
+ 'a.names',
165
+ 'a.byline-author',
166
+ '.byline a',
167
+ '.author.vcard a',
168
+ 'p.info a',
169
+ '.author-name',
170
+ '.upcased',
171
+ 'a[rel~="nofollow"]'
172
+ ]
173
+
174
+ node = document.search(cssSelectors.join(", ")).first
175
+
176
+ if node
177
+ words = node.content.split.size
178
+
179
+ if words <= 4
180
+ return Sanitize.clean(node.content).strip[0..255]
181
+ end
182
+ end
183
+
184
+ ""
185
+ end
186
+
118
187
  private
119
188
 
120
189
  def document
121
190
  if @doc.nil?
122
191
  @doc = Nokogiri::HTML(open(@url))
123
- else
124
- @doc
125
192
  end
193
+
194
+ @doc
195
+ end
196
+
197
+ def linked_data
198
+ if @linked_data.nil?
199
+ node = document.css("script[type=\"application/ld+json\"]")
200
+
201
+ if node.first
202
+ @linked_data = JSON.parse(node.first.content)
203
+ end
204
+ end
205
+
206
+ @linked_data
126
207
  end
208
+
127
209
  end
128
210
  end
data/raev.gemspec CHANGED
@@ -2,14 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: raev 0.2.1 ruby lib
5
6
 
6
7
  Gem::Specification.new do |s|
7
8
  s.name = "raev"
8
- s.version = "0.1.12"
9
+ s.version = "0.2.1"
9
10
 
10
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
11
13
  s.authors = ["Andreas Zecher"]
12
- s.date = "2014-11-26"
14
+ s.date = "2015-08-20"
13
15
  s.description = "Fetch, parse and normalize meta data from websites."
14
16
  s.email = "andreas@madebypixelate.com"
15
17
  s.extra_rdoc_files = [
@@ -37,41 +39,43 @@ Gem::Specification.new do |s|
37
39
  ]
38
40
  s.homepage = "http://github.com/pixelate/raev"
39
41
  s.licenses = ["MIT"]
40
- s.require_paths = ["lib"]
41
- s.rubygems_version = "2.0.3"
42
+ s.rubygems_version = "2.4.5"
42
43
  s.summary = "Fetch, parse and normalize meta data from websites."
43
44
 
44
45
  if s.respond_to? :specification_version then
45
46
  s.specification_version = 4
46
47
 
47
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_runtime_dependency(%q<json>, [">= 1.8.1"])
48
50
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.6.1"])
49
51
  s.add_runtime_dependency(%q<redirect_follower>, [">= 0.1.1"])
52
+ s.add_runtime_dependency(%q<sanitize>, [">= 2.1.0"])
53
+ s.add_runtime_dependency(%q<chronic>, [">= 0.9.1"])
50
54
  s.add_development_dependency(%q<shoulda>, [">= 0"])
51
- s.add_development_dependency(%q<bundler>, ["~> 1.6.3"])
55
+ s.add_development_dependency(%q<bundler>, ["~> 1.7.2"])
52
56
  s.add_development_dependency(%q<jeweler>, ["= 2.0.1"])
53
57
  s.add_development_dependency(%q<test-unit>, ["~> 2.5.4"])
54
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
55
- s.add_runtime_dependency(%q<redirect_follower>, [">= 0.1.1"])
56
58
  else
59
+ s.add_dependency(%q<json>, [">= 1.8.1"])
57
60
  s.add_dependency(%q<nokogiri>, [">= 1.6.1"])
58
61
  s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
62
+ s.add_dependency(%q<sanitize>, [">= 2.1.0"])
63
+ s.add_dependency(%q<chronic>, [">= 0.9.1"])
59
64
  s.add_dependency(%q<shoulda>, [">= 0"])
60
- s.add_dependency(%q<bundler>, ["~> 1.6.3"])
65
+ s.add_dependency(%q<bundler>, ["~> 1.7.2"])
61
66
  s.add_dependency(%q<jeweler>, ["= 2.0.1"])
62
67
  s.add_dependency(%q<test-unit>, ["~> 2.5.4"])
63
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
64
- s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
65
68
  end
66
69
  else
70
+ s.add_dependency(%q<json>, [">= 1.8.1"])
67
71
  s.add_dependency(%q<nokogiri>, [">= 1.6.1"])
68
72
  s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
73
+ s.add_dependency(%q<sanitize>, [">= 2.1.0"])
74
+ s.add_dependency(%q<chronic>, [">= 0.9.1"])
69
75
  s.add_dependency(%q<shoulda>, [">= 0"])
70
- s.add_dependency(%q<bundler>, ["~> 1.6.3"])
76
+ s.add_dependency(%q<bundler>, ["~> 1.7.2"])
71
77
  s.add_dependency(%q<jeweler>, ["= 2.0.1"])
72
78
  s.add_dependency(%q<test-unit>, ["~> 2.5.4"])
73
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
74
- s.add_dependency(%q<redirect_follower>, [">= 0.1.1"])
75
79
  end
76
80
  end
77
81
 
data/test/test_url.rb CHANGED
@@ -42,8 +42,8 @@ class UrlTest < Test::Unit::TestCase
42
42
  url = Raev.url("http://arstechnica.com")
43
43
  assert_equal "http://feeds.arstechnica.com/arstechnica/index/", url.feed
44
44
 
45
- url = Raev.url("http://www.edge-online.com")
46
- assert_equal "http://www.edge-online.com/feed/", url.feed
45
+ url = Raev.url("http://www.kotaku.com")
46
+ assert_equal "http://feeds.gawker.com/kotaku/full", url.feed
47
47
  end
48
48
 
49
49
  should "get headline from url" do
@@ -59,9 +59,6 @@ class UrlTest < Test::Unit::TestCase
59
59
  url = Raev.url("http://www.creativeapplications.net/games/below-new-from-the-creators-of-sword-sworcery/")
60
60
  assert_equal "Below – New from the creators of Sword & Sworcery", url.headline
61
61
 
62
- url = Raev.url("http://www.edge-online.com/news/e3-2013-watch-the-first-hohokum-gameplay-footage-here/")
63
- assert_equal "E3 2013: watch the first Hohokum gameplay footage here", url.headline
64
-
65
62
  url = Raev.url("http://www.giantbomb.com/videos/e3-2013-fez-ii-announcement-teaser/2300-7606/")
66
63
  assert_equal "E3 2013: Fez II Announcement Teaser", url.headline
67
64
 
@@ -74,4 +71,51 @@ class UrlTest < Test::Unit::TestCase
74
71
  url = Raev.url("http://www.rockpapershotgun.com/2013/06/05/i-spy-an-open-beta-for-spy-party/")
75
72
  assert_equal "I Spy An Open Beta For Spy Party", url.headline
76
73
  end
74
+
75
+ should "get pubdate from url" do
76
+ url = Raev.url("http://www.polygon.com/2015/5/18/8620223/witcher-3-guide-witcher-2-witcher")
77
+ assert_equal_date Date.new(2015, 5, 18), url.pubdate
78
+
79
+ url = Raev.url("http://kotaku.com/this-week-destiny-got-a-hell-of-a-lot-better-1706391634")
80
+ assert_equal_date Date.new(2015, 5, 23), url.pubdate
81
+
82
+ url = Raev.url("http://www.rockpapershotgun.com/2014/07/03/beauty-beheld-future-unfolding/")
83
+ assert_equal_date Date.new(2014, 7, 3), url.pubdate
84
+
85
+ url = Raev.url("http://jayisgames.com/review/the-black-forest-finding-friends.php")
86
+ assert_equal_date Date.new(2009, 12, 9), url.pubdate
87
+
88
+ url = Raev.url("http://boingboing.net/2007/03/21/understanding-games.html")
89
+ assert_equal_date Date.new(2007, 3, 21), url.pubdate
90
+
91
+ url = Raev.url("http://www.wired.com/2014/09/upcoming-a-gorgeous-adventure-game-that-mutates-for-each-player/")
92
+ assert_equal_date Date.new(2014, 9, 4), url.pubdate
93
+
94
+ url = Raev.url("http://www.pcgamer.com/harebrained-schemes-hints-at-something-new-from-an-old-franchise/")
95
+ assert_equal_date Date.new(2015, 5, 21), url.pubdate
96
+ end
97
+
98
+ should "get author from url" do
99
+ url = Raev.url("http://www.rockpapershotgun.com/2014/07/03/beauty-beheld-future-unfolding/")
100
+ assert_equal "Adam Smith", url.author
101
+
102
+ url = Raev.url("http://www.polygon.com/features/2013/3/25/4128022/gdc-gathering-of-game-makers")
103
+ assert_equal "Colin Campbell", url.author
104
+
105
+ url = Raev.url("http://kotaku.com/worth-reading-some-kickstarters-are-lying-about-game-b-1706340013")
106
+ assert_equal "Patrick Klepek", url.author
107
+
108
+ url = Raev.url("http://killscreendaily.com/articles/future-unfolding-wonder/")
109
+ assert_equal "Jess Joho", url.author
110
+
111
+ url = Raev.url("http://www.creativeapplications.net/games/future-unfolding-procedurally-generated-world-both-beautiful-and-dangerous/")
112
+ assert_equal "Filip Visnjic", url.author
113
+ end
114
+
115
+ private
116
+
117
+ def assert_equal_date dateA, dateB
118
+ assert_equal dateA.strftime('%Y-%m-%d'), dateB.strftime('%Y-%m-%d')
119
+ end
120
+
77
121
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: raev
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andreas Zecher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-26 00:00:00.000000000 Z
11
+ date: 2015-08-20 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.8.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.8.1
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: nokogiri
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,89 +53,89 @@ dependencies:
39
53
  - !ruby/object:Gem::Version
40
54
  version: 0.1.1
41
55
  - !ruby/object:Gem::Dependency
42
- name: shoulda
56
+ name: sanitize
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - '>='
46
60
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
61
+ version: 2.1.0
62
+ type: :runtime
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - '>='
53
67
  - !ruby/object:Gem::Version
54
- version: '0'
68
+ version: 2.1.0
55
69
  - !ruby/object:Gem::Dependency
56
- name: bundler
70
+ name: chronic
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
- - - ~>
73
+ - - '>='
60
74
  - !ruby/object:Gem::Version
61
- version: 1.6.3
62
- type: :development
75
+ version: 0.9.1
76
+ type: :runtime
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
- - - ~>
80
+ - - '>='
67
81
  - !ruby/object:Gem::Version
68
- version: 1.6.3
82
+ version: 0.9.1
69
83
  - !ruby/object:Gem::Dependency
70
- name: jeweler
84
+ name: shoulda
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - '='
87
+ - - '>='
74
88
  - !ruby/object:Gem::Version
75
- version: 2.0.1
89
+ version: '0'
76
90
  type: :development
77
91
  prerelease: false
78
92
  version_requirements: !ruby/object:Gem::Requirement
79
93
  requirements:
80
- - - '='
94
+ - - '>='
81
95
  - !ruby/object:Gem::Version
82
- version: 2.0.1
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
- name: test-unit
98
+ name: bundler
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ~>
88
102
  - !ruby/object:Gem::Version
89
- version: 2.5.4
103
+ version: 1.7.2
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - ~>
95
109
  - !ruby/object:Gem::Version
96
- version: 2.5.4
110
+ version: 1.7.2
97
111
  - !ruby/object:Gem::Dependency
98
- name: nokogiri
112
+ name: jeweler
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - '>='
115
+ - - '='
102
116
  - !ruby/object:Gem::Version
103
- version: 1.4.4
104
- type: :runtime
117
+ version: 2.0.1
118
+ type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
- - - '>='
122
+ - - '='
109
123
  - !ruby/object:Gem::Version
110
- version: 1.4.4
124
+ version: 2.0.1
111
125
  - !ruby/object:Gem::Dependency
112
- name: redirect_follower
126
+ name: test-unit
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
- - - '>='
129
+ - - ~>
116
130
  - !ruby/object:Gem::Version
117
- version: 0.1.1
118
- type: :runtime
131
+ version: 2.5.4
132
+ type: :development
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
- - - '>='
136
+ - - ~>
123
137
  - !ruby/object:Gem::Version
124
- version: 0.1.1
138
+ version: 2.5.4
125
139
  description: Fetch, parse and normalize meta data from websites.
126
140
  email: andreas@madebypixelate.com
127
141
  executables: []
@@ -167,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
181
  version: '0'
168
182
  requirements: []
169
183
  rubyforge_project:
170
- rubygems_version: 2.0.3
184
+ rubygems_version: 2.4.5
171
185
  signing_key:
172
186
  specification_version: 4
173
187
  summary: Fetch, parse and normalize meta data from websites.