google_news 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +0 -2
- data/README.md +19 -1
- data/Rakefile +16 -0
- data/google_news.gemspec +16 -15
- data/lib/google_news/version.rb +1 -1
- data/lib/google_news.rb +43 -5
- metadata +17 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6168625d6e729f8fa19728bb27caed64749d600c55550edb80c4370775b2eb96
|
|
4
|
+
data.tar.gz: 91c7cdb1d9b6ad61e621f3c914cbf117c2dd7aabc86bc5e62e4856a500d2f6f0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 447e5eb18577d9cb5c0cfce07e68c1527224de5113838ea173d4a2b2f1fe84ff1a84bd58000358b0a5d8d8a257bdb43b0ae5fd13f7be4ad00b5c505021d3764f
|
|
7
|
+
data.tar.gz: 2736ba54da2962201979a53d57029c15d52cb4c2985ea0690e6731887cfff04aab582848e7d860a87c2f410a71ab6126c5d84a40942f4082ac7c104b7c28d91e
|
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.4.6
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -73,7 +73,25 @@ require 'google_news'
|
|
|
73
73
|
|
|
74
74
|
results = GoogleNews.headlines n: 5 # get top 5 news in default language (country: us, language: en)
|
|
75
75
|
puts results
|
|
76
|
-
# => [
|
|
76
|
+
# => [
|
|
77
|
+
# {
|
|
78
|
+
# :title => "Title of the news article",
|
|
79
|
+
# :link => "https://link.to/the/article",
|
|
80
|
+
# :pub_date => Time("Wed, 01 Jan 2024 00:00:00 GMT"),
|
|
81
|
+
# :descriptions => [
|
|
82
|
+
# {
|
|
83
|
+
# :title => "First article, like :title",
|
|
84
|
+
# :link => "https://link.to/the/first/article",
|
|
85
|
+
# :author => "Author Name",
|
|
86
|
+
# },
|
|
87
|
+
# {
|
|
88
|
+
# :title => "Second article, same topic, similiar to first",
|
|
89
|
+
# :link => "https://link.to/the/second/article",
|
|
90
|
+
# :author => "Another Author Name",
|
|
91
|
+
# },
|
|
92
|
+
# ...
|
|
93
|
+
# ],
|
|
94
|
+
# ]
|
|
77
95
|
```
|
|
78
96
|
|
|
79
97
|
|
data/Rakefile
CHANGED
|
@@ -14,4 +14,20 @@ task :console do
|
|
|
14
14
|
Pry.start
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
+
namespace :example do
|
|
18
|
+
task :headlines do
|
|
19
|
+
require_relative 'lib/google_news'
|
|
20
|
+
headlines = GoogleNews.headlines(n: 10, language: 'de', country: 'de')
|
|
21
|
+
headlines.each do |item|
|
|
22
|
+
puts "Title: #{item[:title]}"
|
|
23
|
+
puts "Link: #{item[:link]}"
|
|
24
|
+
puts "Published At: #{item[:pub_date].class}"
|
|
25
|
+
puts "Descriptions: #{item[:descriptions]}"
|
|
26
|
+
puts "-" * 40
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
17
33
|
RSpec::Core::RakeTask.new(:spec)
|
data/google_news.gemspec
CHANGED
|
@@ -4,15 +4,15 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
|
4
4
|
require 'google_news/version'
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |spec|
|
|
7
|
-
spec.name
|
|
8
|
-
spec.version
|
|
9
|
-
spec.executables
|
|
10
|
-
spec.authors
|
|
11
|
-
spec.email
|
|
7
|
+
spec.name = "google_news"
|
|
8
|
+
spec.version = GoogleNews::VERSION
|
|
9
|
+
spec.executables = %w[google_news]
|
|
10
|
+
spec.authors = ["Matthäus J. N. Beyrle"]
|
|
11
|
+
spec.email = ["google_news.gemspec@mail.magynhard.de"]
|
|
12
12
|
|
|
13
|
-
spec.summary
|
|
14
|
-
spec.homepage
|
|
15
|
-
spec.license
|
|
13
|
+
spec.summary = %q{Get Google News headlines from Ruby or the command line.}
|
|
14
|
+
spec.homepage = "https://github.com/magynhard/google_news"
|
|
15
|
+
spec.license = "MIT"
|
|
16
16
|
|
|
17
17
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
|
18
18
|
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
|
@@ -26,13 +26,14 @@ Gem::Specification.new do |spec|
|
|
|
26
26
|
|
|
27
27
|
spec.require_paths = ['lib']
|
|
28
28
|
|
|
29
|
-
spec.add_runtime_dependency 'rss',
|
|
29
|
+
spec.add_runtime_dependency 'rss', '>= 0.2.6'
|
|
30
30
|
spec.add_runtime_dependency 'open-uri', '>= 0.1.0'
|
|
31
31
|
|
|
32
|
-
spec.add_development_dependency 'bundler',
|
|
33
|
-
spec.add_development_dependency 'rake',
|
|
34
|
-
spec.add_development_dependency 'rspec',
|
|
35
|
-
spec.add_development_dependency 'pry',
|
|
36
|
-
spec.add_development_dependency 'fiddle',
|
|
37
|
-
spec.add_development_dependency 'ostruct',
|
|
32
|
+
spec.add_development_dependency 'bundler', '>= 2.7.1'
|
|
33
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
|
34
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
35
|
+
spec.add_development_dependency 'pry', '~> 0.15.2'
|
|
36
|
+
spec.add_development_dependency 'fiddle', '~> 1.1.8'
|
|
37
|
+
spec.add_development_dependency 'ostruct', '~> 0.6.3'
|
|
38
|
+
spec.add_development_dependency 'nokogiri', '~> 1.18'
|
|
38
39
|
end
|
data/lib/google_news/version.rb
CHANGED
data/lib/google_news.rb
CHANGED
|
@@ -5,14 +5,15 @@
|
|
|
5
5
|
require 'open-uri'
|
|
6
6
|
require 'rss'
|
|
7
7
|
require 'uri'
|
|
8
|
+
require 'nokogiri'
|
|
8
9
|
|
|
9
10
|
require_relative 'google_news/version'
|
|
10
11
|
|
|
11
12
|
module GoogleNews
|
|
12
13
|
HEADLINES_RSS = 'https://news.google.com/news/rss'.freeze
|
|
13
|
-
TOPICS_RSS
|
|
14
|
-
GEO_RSS
|
|
15
|
-
SEARCH_RSS
|
|
14
|
+
TOPICS_RSS = 'https://news.google.com/news/rss/headlines/section/topic/'.freeze
|
|
15
|
+
GEO_RSS = 'https://news.google.com/news/rss/headlines/section/geo/'.freeze
|
|
16
|
+
SEARCH_RSS = 'https://news.google.com/rss/search?q='.freeze
|
|
16
17
|
|
|
17
18
|
TOPICS = %w[WORLD NATION BUSINESS TECHNOLOGY ENTERTAINMENT SPORTS SCIENCE HEALTH].freeze
|
|
18
19
|
|
|
@@ -201,17 +202,18 @@ module GoogleNews
|
|
|
201
202
|
# Convert an RSS item to a hash
|
|
202
203
|
#
|
|
203
204
|
# @param [RSS::Rss::Channel::Item] item
|
|
204
|
-
# @return [Hash] with keys :title, :link, :pub_date, :
|
|
205
|
+
# @return [Hash] with keys :title, :link, :pub_date, :descriptions [:title, :link, :author], and :raw_item
|
|
205
206
|
#
|
|
206
207
|
def self.item_to_hash(item)
|
|
207
208
|
{
|
|
208
209
|
title: item.title,
|
|
209
210
|
link: extract_link(item),
|
|
210
211
|
pub_date: (item.respond_to?(:pubDate) ? item.pubDate : nil),
|
|
211
|
-
|
|
212
|
+
descriptions: (item.respond_to?(:description) ? parse_description(item.description) : nil),
|
|
212
213
|
raw_item: item
|
|
213
214
|
}
|
|
214
215
|
end
|
|
216
|
+
|
|
215
217
|
#
|
|
216
218
|
# Extract the link from an RSS item
|
|
217
219
|
#
|
|
@@ -225,6 +227,42 @@ module GoogleNews
|
|
|
225
227
|
nil
|
|
226
228
|
end
|
|
227
229
|
|
|
230
|
+
#
|
|
231
|
+
# Description can consist of a <a> tag with a link, containing the title, followed by a <font> tag with the authors name.
|
|
232
|
+
#
|
|
233
|
+
# But it can also consist of a bunch of news, inside a <ol> list, containing <li> items with a <a> tag, followed by a <font> tag each.
|
|
234
|
+
#
|
|
235
|
+
# This method creates a array of hashes with :title, :link and :author keys for each news item found in the description.
|
|
236
|
+
#
|
|
237
|
+
# @param [String] description as HTML
|
|
238
|
+
# @return [Array<Hash>] array of news items with :title, :link and :author keys
|
|
239
|
+
#
|
|
240
|
+
def self.parse_description(description)
|
|
241
|
+
return nil if description.nil?
|
|
242
|
+
doc = Nokogiri::HTML(description)
|
|
243
|
+
news_items = []
|
|
244
|
+
if doc.at_css('ol')
|
|
245
|
+
doc.css('ol li').each do |li|
|
|
246
|
+
a_tag = li.at_css('a')
|
|
247
|
+
font_tag = li.at_css('font')
|
|
248
|
+
news_items << {
|
|
249
|
+
title: a_tag ? a_tag.text : nil,
|
|
250
|
+
link: a_tag ? a_tag['href'] : nil,
|
|
251
|
+
author: font_tag ? font_tag.text : nil
|
|
252
|
+
}
|
|
253
|
+
end
|
|
254
|
+
else
|
|
255
|
+
a_tag = doc.at_css('a')
|
|
256
|
+
font_tag = doc.at_css('font')
|
|
257
|
+
news_items << {
|
|
258
|
+
title: a_tag ? a_tag.text : nil,
|
|
259
|
+
link: a_tag ? a_tag['href'] : nil,
|
|
260
|
+
author: font_tag ? font_tag.text : nil
|
|
261
|
+
}
|
|
262
|
+
end
|
|
263
|
+
news_items
|
|
264
|
+
end
|
|
265
|
+
|
|
228
266
|
#
|
|
229
267
|
# Default User-Agent string for HTTP requests
|
|
230
268
|
#
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: google_news
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Matthäus J. N. Beyrle
|
|
@@ -121,6 +121,20 @@ dependencies:
|
|
|
121
121
|
- - "~>"
|
|
122
122
|
- !ruby/object:Gem::Version
|
|
123
123
|
version: 0.6.3
|
|
124
|
+
- !ruby/object:Gem::Dependency
|
|
125
|
+
name: nokogiri
|
|
126
|
+
requirement: !ruby/object:Gem::Requirement
|
|
127
|
+
requirements:
|
|
128
|
+
- - "~>"
|
|
129
|
+
- !ruby/object:Gem::Version
|
|
130
|
+
version: '1.18'
|
|
131
|
+
type: :development
|
|
132
|
+
prerelease: false
|
|
133
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
134
|
+
requirements:
|
|
135
|
+
- - "~>"
|
|
136
|
+
- !ruby/object:Gem::Version
|
|
137
|
+
version: '1.18'
|
|
124
138
|
email:
|
|
125
139
|
- google_news.gemspec@mail.magynhard.de
|
|
126
140
|
executables:
|
|
@@ -131,6 +145,7 @@ files:
|
|
|
131
145
|
- ".gitattributes"
|
|
132
146
|
- ".gitignore"
|
|
133
147
|
- ".rspec"
|
|
148
|
+
- ".ruby-version"
|
|
134
149
|
- ".travis.yml"
|
|
135
150
|
- CHANGELOG.md
|
|
136
151
|
- CODE_OF_CONDUCT.md
|
|
@@ -169,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
169
184
|
- !ruby/object:Gem::Version
|
|
170
185
|
version: '0'
|
|
171
186
|
requirements: []
|
|
172
|
-
rubygems_version: 3.7.
|
|
187
|
+
rubygems_version: 3.7.2
|
|
173
188
|
specification_version: 4
|
|
174
189
|
summary: Get Google News headlines from Ruby or the command line.
|
|
175
190
|
test_files: []
|