buzzwords 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 28d272a397f727d163056668a9b1998d4c79835c
4
+ data.tar.gz: 473704a343deca9793d00414f4bebe81b24cfd95
5
+ SHA512:
6
+ metadata.gz: 79103b449a23f0c2888fead2a6c78ead5bc4259f42463e38ce09d2462e52698a5cd6b29dcb74b956c51b5b18b44f403410e3d8610ef6216cd574479efa312fb0
7
+ data.tar.gz: 6a0383662d3a023780d9684c7f305394f8d8e09cd9f3d7c85bc66e7c36408678ecb7738e80e0431c94932a972463e52bc3490b3e63a80fd21cc1587aa89d4045
@@ -0,0 +1,80 @@
1
+ require 'mechanize'
2
+
3
+ require_relative 'buzzwords/ny_times.rb'
4
+ require_relative 'buzzwords/washington_post.rb'
5
+ require_relative 'buzzwords/cnn.rb'
6
+ require_relative 'buzzwords/reuters.rb'
7
+ require_relative 'buzzwords/stopwords.rb'
8
+
9
+ class Buzzwords
10
+ MECH = Mechanize.new
11
+
12
+ @aggregate_headlines = []
13
+ @word_occurrences = Hash.new(0)
14
+ @top_words = nil
15
+
16
+ class << self
17
+ attr_accessor :aggregate_headlines, :word_occurrences, :top_words
18
+ end
19
+
20
+ def self.generate_buzz
21
+ display_loading
22
+ retrieve_nytimes_headlines
23
+ retrieve_wapo_headlines
24
+ retrieve_cnn_headlines
25
+ retrieve_reuters_headlines
26
+ filter_stopwords
27
+ count_word_occurrences
28
+ determine_top_words
29
+ display_top_words
30
+ end
31
+
32
+ def self.display_loading
33
+ puts "Loading current buzzwords..."
34
+ end
35
+
36
+ def self.retrieve_nytimes_headlines
37
+ self.aggregate_headlines += parse_headlines(NYTimes.new.headlines)
38
+ end
39
+
40
+ def self.retrieve_wapo_headlines
41
+ self.aggregate_headlines += parse_headlines(WashingtonPost.new.headlines)
42
+ end
43
+
44
+ def self.retrieve_cnn_headlines
45
+ self.aggregate_headlines += parse_headlines(CNN.new.headlines)
46
+ end
47
+
48
+ def self.retrieve_reuters_headlines
49
+ self.aggregate_headlines += parse_headlines(Reuters.new.headlines)
50
+ end
51
+
52
+ def self.parse_headlines(data)
53
+ data.map(&:split).flatten.map do |word|
54
+ word.delete('/\A[\W]/').delete('/[\W]\z/').gsub(/\'s/, '')
55
+ end
56
+ end
57
+
58
+ def self.filter_stopwords
59
+ self.aggregate_headlines = aggregate_headlines.select do |word|
60
+ Stopwords.valid?(word)
61
+ end
62
+ end
63
+
64
+ def self.count_word_occurrences
65
+ aggregate_headlines.each do |word|
66
+ word_occurrences[word] += 1
67
+ end
68
+ end
69
+
70
+ def self.determine_top_words
71
+ self.top_words = word_occurrences.sort_by { |k, v| v }.reverse.take(20)
72
+ .reject{ |word| word.last < 3 }
73
+ .map { |word| word.first }
74
+ end
75
+
76
+ def self.display_top_words
77
+ puts "Today's top buzzwords are: "
78
+ puts top_words.map { |word| " - #{word}" }
79
+ end
80
+ end
@@ -0,0 +1,15 @@
1
+ class CNN
2
+ attr_reader :webpage, :headlines
3
+
4
+ CNN_US_HEADLINES_URL = "https://www.cnn.com/us"
5
+
6
+ def initialize
7
+ @webpage = Buzzwords::MECH.get(CNN_US_HEADLINES_URL)
8
+ end
9
+
10
+ def headlines
11
+ webpage.search('.cd__headline-text').map do |headline|
12
+ headline.text.downcase.strip
13
+ end.slice(0, 25)
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ class NYTimes
2
+ attr_reader :webpage, :headlines
3
+
4
+ US_HEADLINE_URL = "https://www.nytimes.com/section/us"
5
+
6
+ def initialize
7
+ @webpage = Buzzwords::MECH.get(US_HEADLINE_URL)
8
+ end
9
+
10
+ def headlines
11
+ webpage.search('.initial-set .headline').map do |headline|
12
+ headline.text.downcase.strip
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ class Reuters
2
+ attr_reader :webpage, :headlines
3
+
4
+ US_HEADLINE_URL = "https://www.reuters.com/news/us"
5
+
6
+ def initialize
7
+ @webpage = Buzzwords::MECH.get(US_HEADLINE_URL)
8
+ end
9
+
10
+ def headlines
11
+ webpage.search('.FeedItemHeadline_full a').map do |headline|
12
+ headline.text.downcase.strip
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,59 @@
1
+ # https://github.com/brez/stopwords
2
+ # Copyright ©2011 John Bresnik
3
+
4
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
5
+ # files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy,
6
+ # modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
7
+ # is furnished to do so, subject to the following conditions:
8
+
9
+ # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10
+
11
+ # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12
+ # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
13
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15
+
16
+ module Stopwords
17
+
18
+ STOP_WORDS = [
19
+ 'a', 'am', 'cannot','into','our','thus','about','co','is','ours','to','above',
20
+ 'could','it','ourselves','together','across','down','its','out','too',
21
+ 'after','during','itself','over','toward','afterwards','each','last','own',
22
+ 'towards','again','eg','latter','per','under','against','either','latterly',
23
+ 'perhaps','until','all','else','least','rather','up','almost','elsewhere',
24
+ 'less','same','upon','alone','enough','ltd','seem','us','along','etc',
25
+ 'many','seemed','very','already','even','may','seeming','via','also','ever',
26
+ 'me','seems','was','although','every','meanwhile','several','we','always',
27
+ 'everyone','might','she','well','among','everything','more','should','were',
28
+ 'amongst','everywhere','moreover','since','what','an','except','most','so',
29
+ 'whatever','and','few','mostly','some','when','another','first','much',
30
+ 'somehow','whence','any','for','must','someone','whenever','anyhow',
31
+ 'former','my','something','where','anyone','formerly','myself','sometime',
32
+ 'whereafter','anything','from','namely','sometimes','whereas','anywhere',
33
+ 'further','neither','somewhere','whereby','are','had','never','still',
34
+ 'wherein','around','has','nevertheless','such','whereupon','as','have',
35
+ 'next','than','wherever','at','he','no','that','whether','be','hence',
36
+ 'nobody','the','whither','became','her','none','their','which','because',
37
+ 'here','noone','them','while','become','hereafter','nor','themselves','who',
38
+ 'becomes','hereby','not','then','whoever','becoming','herein','nothing',
39
+ 'thence','whole','been','hereupon','now','there','whom','before','hers',
40
+ 'nowhere','thereafter','whose','beforehand','herself','of','thereby','why',
41
+ 'behind','him','off','therefore','will','being','himself','often','therein',
42
+ 'with','below','his','on','thereupon','within','beside','how','once',
43
+ 'these','without','besides','however','one','they','would','between','i',
44
+ 'only','this','yet','beyond','ie','onto','those','you','both','if','or',
45
+ 'though','your','but','in','other','through','yours','by','inc','others',
46
+ 'throughout','yourself','can','indeed','otherwise','thru','yourselves', 'says',
47
+ 'new', 'shows', 'man', 'woman', 'got', 'see', 'people'
48
+ ]
49
+ TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
50
+
51
+ def self.is?(token)
52
+ STOP_WORDS.member?(token)
53
+ end
54
+
55
+ def self.valid?(token)
56
+ (((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
57
+ end
58
+
59
+ end
@@ -0,0 +1,15 @@
1
+ class WashingtonPost
2
+ attr_reader :webpage, :headlines
3
+
4
+ WAPO_URL = "https://www.washingtonpost.com"
5
+
6
+ def initialize
7
+ @webpage = Buzzwords::MECH.get(WAPO_URL)
8
+ end
9
+
10
+ def headlines
11
+ webpage.search('#main-content .headline a').map do |headline|
12
+ headline.text.downcase.strip
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,116 @@
1
+ require 'buzzwords'
2
+
3
+ describe 'buzzwords' do
4
+ context '.display_loading' do
5
+ it 'displays load message' do
6
+ expect { Buzzwords.display_loading }.to output(/Loading current buzzwords...\n/).to_stdout
7
+ end
8
+ end
9
+
10
+ context 'retrieve headlines' do
11
+ before(:each) { Buzzwords.aggregate_headlines = [] }
12
+
13
+ context '.retrieve_nytimes_headlines' do
14
+ it 'adds NYT headlines to aggregate' do
15
+ Buzzwords.retrieve_nytimes_headlines
16
+ expect(Buzzwords.aggregate_headlines).not_to be_empty
17
+ end
18
+ end
19
+
20
+ context '.retrieve_wapo_headlines' do
21
+ it 'adds Wapo headlines to aggregate' do
22
+ Buzzwords.retrieve_wapo_headlines
23
+ expect(Buzzwords.aggregate_headlines).not_to be_empty
24
+ end
25
+ end
26
+
27
+ context '.retrieve_cnn_headlines' do
28
+ it 'adds CNN headlines to aggregate' do
29
+ Buzzwords.retrieve_cnn_headlines
30
+ expect(Buzzwords.aggregate_headlines).not_to be_empty
31
+ end
32
+ end
33
+
34
+ context '.retrieve_reuters_headlines' do
35
+ it 'adds Reuters headlines to aggregate' do
36
+ Buzzwords.retrieve_reuters_headlines
37
+ expect(Buzzwords.aggregate_headlines).not_to be_empty
38
+ end
39
+ end
40
+ end
41
+
42
+ context '.parse_headlines' do
43
+ before(:all) { @headlines = Buzzwords.parse_headlines(NYTimes.new.headlines) }
44
+
45
+ it 'should return an array' do
46
+ expect(@headlines).to be_an_instance_of(Array)
47
+ end
48
+
49
+ it 'should only include properly formatted words' do
50
+ @headlines.each do |headline|
51
+ expect(headline).not_to include('/(\A[\W]||[\W]\z)/')
52
+ expect(headline).not_to include('/\'s/')
53
+ end
54
+ end
55
+ end
56
+
57
+ context '.filter_stopwords' do
58
+ before { Buzzwords.retrieve_nytimes_headlines }
59
+
60
+ it 'should filter stopwords from aggregate headline list' do
61
+ Buzzwords.filter_stopwords
62
+ expect(Buzzwords.aggregate_headlines).not_to include(*Stopwords::STOP_WORDS)
63
+ end
64
+ end
65
+
66
+ context '.count_word_occurrences' do
67
+ before(:all) do
68
+ Buzzwords.retrieve_cnn_headlines
69
+ Buzzwords.count_word_occurrences
70
+ end
71
+
72
+ it 'should return a hash' do
73
+ expect(Buzzwords.word_occurrences).to be_an_instance_of(Hash)
74
+ expect(Buzzwords.word_occurrences).not_to be_empty
75
+ end
76
+
77
+ it 'should have integer values in the hash' do
78
+ expect(Buzzwords.word_occurrences.values).to all(be_an(Integer))
79
+ end
80
+ end
81
+
82
+ context '.determine_top_words' do
83
+ before(:all) do
84
+ Buzzwords.retrieve_nytimes_headlines
85
+ Buzzwords.retrieve_cnn_headlines
86
+ Buzzwords.retrieve_reuters_headlines
87
+ Buzzwords.retrieve_wapo_headlines
88
+ Buzzwords.filter_stopwords
89
+ Buzzwords.count_word_occurrences
90
+ @top_words = Buzzwords.determine_top_words
91
+ end
92
+
93
+ it 'should return a non-empty array' do
94
+ expect(@top_words).to be_an_instance_of(Array)
95
+ end
96
+
97
+ it 'should return less than 21 words' do
98
+ expect(@top_words.length).to be <= 20
99
+ end
100
+
101
+ it 'should return list of unique words' do
102
+ expect(@top_words.uniq).to match(@top_words)
103
+ end
104
+ end
105
+
106
+ context '.display_top_words' do
107
+ it 'displays title heading' do
108
+ expect { Buzzwords.display_top_words }.to output(/Today's top buzzwords are: \n/).to_stdout
109
+ end
110
+
111
+ it 'displays list of words' do
112
+ Buzzwords.top_words = ['hello', 'hi', 'yes']
113
+ expect { Buzzwords.display_top_words }.to output(/ - hello\n - hi\n - yes\n/).to_stdout
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,11 @@
1
+ require './lib/buzzwords/cnn.rb'
2
+
3
+ describe 'cnn' do
4
+ context '#headlines' do
5
+ it 'returns a non-empty array' do
6
+ headlines = CNN.new.headlines
7
+ expect(headlines).not_to be_empty
8
+ expect(headlines).to be_an_instance_of(Array)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require './lib/buzzwords/ny_times.rb'
2
+
3
+ describe 'NY Times' do
4
+ context '#headlines' do
5
+ it 'returns a non-empty array' do
6
+ headlines = NYTimes.new.headlines
7
+ expect(headlines).not_to be_empty
8
+ expect(headlines).to be_an_instance_of(Array)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require './lib/buzzwords/reuters.rb'
2
+
3
+ describe 'Reuters' do
4
+ context '#headlines' do
5
+ it 'returns a non-empty array' do
6
+ headlines = Reuters.new.headlines
7
+ expect(headlines).not_to be_empty
8
+ expect(headlines).to be_an_instance_of(Array)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require './lib/buzzwords/washington_post.rb'
2
+
3
+ describe 'Washington Post' do
4
+ context '#headlines' do
5
+ it 'returns a non-empty array' do
6
+ headlines = WashingtonPost.new.headlines
7
+ expect(headlines).not_to be_empty
8
+ expect(headlines).to be_an_instance_of(Array)
9
+ end
10
+ end
11
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: buzzwords
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Elizabeth Tackett
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-10-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.7.6
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '2.7'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.7.6
33
+ - !ruby/object:Gem::Dependency
34
+ name: rspec
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.8'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.8'
47
+ description: A simple IRB tool that generates and displays the day's most popular
48
+ buzzwords from news publications including The New York Times, The Washington Post,
49
+ CNN, and Reuters.
50
+ email: emctackett@gmail.com
51
+ executables: []
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - lib/buzzwords.rb
56
+ - lib/buzzwords/cnn.rb
57
+ - lib/buzzwords/ny_times.rb
58
+ - lib/buzzwords/reuters.rb
59
+ - lib/buzzwords/stopwords.rb
60
+ - lib/buzzwords/washington_post.rb
61
+ - spec/buzzwords_spec.rb
62
+ - spec/cnn_spec.rb
63
+ - spec/ny_times_spec.rb
64
+ - spec/reuters_spec.rb
65
+ - spec/washington_post_spec.rb
66
+ homepage: http://github.com/emctackett
67
+ licenses:
68
+ - MIT
69
+ metadata: {}
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - "~>"
77
+ - !ruby/object:Gem::Version
78
+ version: 2.4.2
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 2.6.13
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: A basic IRB tool to instantly generate the day's media buzzwords.
90
+ test_files:
91
+ - spec/buzzwords_spec.rb
92
+ - spec/cnn_spec.rb
93
+ - spec/ny_times_spec.rb
94
+ - spec/reuters_spec.rb
95
+ - spec/washington_post_spec.rb