google_news 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d82fabbe3456707329c9d55f22433ff8cf03ecc2528f35f805f3a33c5a006e4d
4
- data.tar.gz: 05ac38bfcec8edc401ba73d13f4b95bc45d5caebee93e72c6626d90a38d2324c
3
+ metadata.gz: 6168625d6e729f8fa19728bb27caed64749d600c55550edb80c4370775b2eb96
4
+ data.tar.gz: 91c7cdb1d9b6ad61e621f3c914cbf117c2dd7aabc86bc5e62e4856a500d2f6f0
5
5
  SHA512:
6
- metadata.gz: 81a86f5527947e62673d615d6efb9fe7c2bc3c4206cfa60a805f88e8eb6cc3b360a9d73b30b232c87a551d2527130a2316694f07bc597fa8f7c21c72ad60f402
7
- data.tar.gz: 4640d37f2b90cee7a4081ca023ecb892da77bdabbff2b3d19049dbf674bb6651d0b916db4b08227d06cb1ca8aee7129002b7cc8b546e88811a8134ee19239c09
6
+ metadata.gz: 447e5eb18577d9cb5c0cfce07e68c1527224de5113838ea173d4a2b2f1fe84ff1a84bd58000358b0a5d8d8a257bdb43b0ae5fd13f7be4ad00b5c505021d3764f
7
+ data.tar.gz: 2736ba54da2962201979a53d57029c15d52cb4c2985ea0690e6731887cfff04aab582848e7d860a87c2f410a71ab6126c5d84a40942f4082ac7c104b7c28d91e
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.4.6
data/CHANGELOG.md CHANGED
@@ -1,2 +1,5 @@
1
+ # 0.1.0
2
+ - refactored headline output to have parsed description(s) as Array with Hashes instead of one string
3
+
1
4
  # 0.0.1
2
5
  - initial alpha release with basic features, most is untested
data/Gemfile CHANGED
@@ -2,5 +2,3 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in google_news.gemspec
4
4
  gemspec
5
-
6
-
data/README.md CHANGED
@@ -73,7 +73,25 @@ require 'google_news'
73
73
 
74
74
  results = GoogleNews.headlines n: 5 # get top 5 news in default language (country: us, language: en)
75
75
  puts results
76
- # => [{"title"=>"Title of the news article", "link"=>"https://link.to/the/article", "pubDate"=>"Wed, 01 Jan 2024 00:00:00 GMT", "source"=>"Source Name"}, ...]
76
+ # => [
77
+ # {
78
+ # :title => "Title of the news article",
79
+ # :link => "https://link.to/the/article",
80
+ # :pub_date => Time("Wed, 01 Jan 2024 00:00:00 GMT"),
81
+ # :descriptions => [
82
+ # {
83
+ # :title => "First article, like :title",
84
+ # :link => "https://link.to/the/first/article",
85
+ # :author => "Author Name",
86
+ # },
87
+ # {
88
+ # :title => "Second article, same topic, similiar to first",
89
+ # :link => "https://link.to/the/second/article",
90
+ # :author => "Another Author Name",
91
+ # },
92
+ # ...
93
+ # ],
94
+ # ]
77
95
  ```
78
96
 
79
97
 
data/Rakefile CHANGED
@@ -14,4 +14,20 @@ task :console do
14
14
  Pry.start
15
15
  end
16
16
 
17
+ namespace :example do
18
+ task :headlines do
19
+ require_relative 'lib/google_news'
20
+ headlines = GoogleNews.headlines(n: 10, language: 'de', country: 'de')
21
+ headlines.each do |item|
22
+ puts "Title: #{item[:title]}"
23
+ puts "Link: #{item[:link]}"
24
+ puts "Published At: #{item[:pub_date].class}"
25
+ puts "Descriptions: #{item[:descriptions]}"
26
+ puts "-" * 40
27
+ end
28
+ end
29
+
30
+ end
31
+
32
+
17
33
  RSpec::Core::RakeTask.new(:spec)
data/google_news.gemspec CHANGED
@@ -4,15 +4,15 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'google_news/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "google_news"
8
- spec.version = GoogleNews::VERSION
9
- spec.executables = %w[google_news]
10
- spec.authors = ["Matthäus J. N. Beyrle"]
11
- spec.email = ["google_news.gemspec@mail.magynhard.de"]
7
+ spec.name = "google_news"
8
+ spec.version = GoogleNews::VERSION
9
+ spec.executables = %w[google_news]
10
+ spec.authors = ["Matthäus J. N. Beyrle"]
11
+ spec.email = ["google_news.gemspec@mail.magynhard.de"]
12
12
 
13
- spec.summary = %q{Get Google News headlines from Ruby or the command line.}
14
- spec.homepage = "https://github.com/magynhard/google_news"
15
- spec.license = "MIT"
13
+ spec.summary = %q{Get Google News headlines from Ruby or the command line.}
14
+ spec.homepage = "https://github.com/magynhard/google_news"
15
+ spec.license = "MIT"
16
16
 
17
17
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
18
18
  spec.metadata['allowed_push_host'] = "https://rubygems.org"
@@ -26,13 +26,14 @@ Gem::Specification.new do |spec|
26
26
 
27
27
  spec.require_paths = ['lib']
28
28
 
29
- spec.add_runtime_dependency 'rss', '>= 0.2.6'
29
+ spec.add_runtime_dependency 'rss', '>= 0.2.6'
30
30
  spec.add_runtime_dependency 'open-uri', '>= 0.1.0'
31
31
 
32
- spec.add_development_dependency 'bundler', '>= 2.7.1'
33
- spec.add_development_dependency 'rake', '~> 12.0'
34
- spec.add_development_dependency 'rspec', '~> 3.0'
35
- spec.add_development_dependency 'pry', '~> 0.15.2'
36
- spec.add_development_dependency 'fiddle', '~> 1.1.8'
37
- spec.add_development_dependency 'ostruct', '~> 0.6.3'
32
+ spec.add_development_dependency 'bundler', '>= 2.7.1'
33
+ spec.add_development_dependency 'rake', '~> 12.0'
34
+ spec.add_development_dependency 'rspec', '~> 3.0'
35
+ spec.add_development_dependency 'pry', '~> 0.15.2'
36
+ spec.add_development_dependency 'fiddle', '~> 1.1.8'
37
+ spec.add_development_dependency 'ostruct', '~> 0.6.3'
38
+ spec.add_development_dependency 'nokogiri', '~> 1.18'
38
39
  end
@@ -1,3 +1,3 @@
1
1
  module GoogleNews
2
- VERSION = '0.0.2'.freeze
2
+ VERSION = '0.1.0'.freeze
3
3
  end
data/lib/google_news.rb CHANGED
@@ -5,14 +5,15 @@
5
5
  require 'open-uri'
6
6
  require 'rss'
7
7
  require 'uri'
8
+ require 'nokogiri'
8
9
 
9
10
  require_relative 'google_news/version'
10
11
 
11
12
  module GoogleNews
12
13
  HEADLINES_RSS = 'https://news.google.com/news/rss'.freeze
13
- TOPICS_RSS = 'https://news.google.com/news/rss/headlines/section/topic/'.freeze
14
- GEO_RSS = 'https://news.google.com/news/rss/headlines/section/geo/'.freeze
15
- SEARCH_RSS = 'https://news.google.com/rss/search?q='.freeze
14
+ TOPICS_RSS = 'https://news.google.com/news/rss/headlines/section/topic/'.freeze
15
+ GEO_RSS = 'https://news.google.com/news/rss/headlines/section/geo/'.freeze
16
+ SEARCH_RSS = 'https://news.google.com/rss/search?q='.freeze
16
17
 
17
18
  TOPICS = %w[WORLD NATION BUSINESS TECHNOLOGY ENTERTAINMENT SPORTS SCIENCE HEALTH].freeze
18
19
 
@@ -201,17 +202,18 @@ module GoogleNews
201
202
  # Convert an RSS item to a hash
202
203
  #
203
204
  # @param [RSS::Rss::Channel::Item] item
204
- # @return [Hash] with keys :title, :link, :pub_date, :description, and :raw_item
205
+ # @return [Hash] with keys :title, :link, :pub_date, :descriptions [:title, :link, :author], and :raw_item
205
206
  #
206
207
  def self.item_to_hash(item)
207
208
  {
208
209
  title: item.title,
209
210
  link: extract_link(item),
210
211
  pub_date: (item.respond_to?(:pubDate) ? item.pubDate : nil),
211
- description: (item.respond_to?(:description) ? item.description : nil),
212
+ descriptions: (item.respond_to?(:description) ? parse_description(item.description) : nil),
212
213
  raw_item: item
213
214
  }
214
215
  end
216
+
215
217
  #
216
218
  # Extract the link from an RSS item
217
219
  #
@@ -225,6 +227,42 @@ module GoogleNews
225
227
  nil
226
228
  end
227
229
 
230
+ #
231
+ # Description can consist of a <a> tag with a link, containing the title, followed by a <font> tag with the authors name.
232
+ #
233
+ # But it can also consist of a bunch of news, inside a <ol> list, containing <li> items with a <a> tag, followed by a <font> tag each.
234
+ #
235
+ # This method creates a array of hashes with :title, :link and :author keys for each news item found in the description.
236
+ #
237
+ # @param [String] description as HTML
238
+ # @return [Array<Hash>] array of news items with :title, :link and :author keys
239
+ #
240
+ def self.parse_description(description)
241
+ return nil if description.nil?
242
+ doc = Nokogiri::HTML(description)
243
+ news_items = []
244
+ if doc.at_css('ol')
245
+ doc.css('ol li').each do |li|
246
+ a_tag = li.at_css('a')
247
+ font_tag = li.at_css('font')
248
+ news_items << {
249
+ title: a_tag ? a_tag.text : nil,
250
+ link: a_tag ? a_tag['href'] : nil,
251
+ author: font_tag ? font_tag.text : nil
252
+ }
253
+ end
254
+ else
255
+ a_tag = doc.at_css('a')
256
+ font_tag = doc.at_css('font')
257
+ news_items << {
258
+ title: a_tag ? a_tag.text : nil,
259
+ link: a_tag ? a_tag['href'] : nil,
260
+ author: font_tag ? font_tag.text : nil
261
+ }
262
+ end
263
+ news_items
264
+ end
265
+
228
266
  #
229
267
  # Default User-Agent string for HTTP requests
230
268
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_news
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthäus J. N. Beyrle
@@ -121,6 +121,20 @@ dependencies:
121
121
  - - "~>"
122
122
  - !ruby/object:Gem::Version
123
123
  version: 0.6.3
124
+ - !ruby/object:Gem::Dependency
125
+ name: nokogiri
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '1.18'
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '1.18'
124
138
  email:
125
139
  - google_news.gemspec@mail.magynhard.de
126
140
  executables:
@@ -131,6 +145,7 @@ files:
131
145
  - ".gitattributes"
132
146
  - ".gitignore"
133
147
  - ".rspec"
148
+ - ".ruby-version"
134
149
  - ".travis.yml"
135
150
  - CHANGELOG.md
136
151
  - CODE_OF_CONDUCT.md
@@ -169,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
169
184
  - !ruby/object:Gem::Version
170
185
  version: '0'
171
186
  requirements: []
172
- rubygems_version: 3.7.1
187
+ rubygems_version: 3.7.2
173
188
  specification_version: 4
174
189
  summary: Get Google News headlines from Ruby or the command line.
175
190
  test_files: []