RubyGems - buzzwords - Versions diffs - 1.0.0 - Mend

buzzwords 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +7 -0
data/lib/buzzwords.rb +80 -0
data/lib/buzzwords/cnn.rb +15 -0
data/lib/buzzwords/ny_times.rb +15 -0
data/lib/buzzwords/reuters.rb +15 -0
data/lib/buzzwords/stopwords.rb +59 -0
data/lib/buzzwords/washington_post.rb +15 -0
data/spec/buzzwords_spec.rb +116 -0
data/spec/cnn_spec.rb +11 -0
data/spec/ny_times_spec.rb +11 -0
data/spec/reuters_spec.rb +11 -0
data/spec/washington_post_spec.rb +11 -0
metadata +95 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 28d272a397f727d163056668a9b1998d4c79835c
+  data.tar.gz: 473704a343deca9793d00414f4bebe81b24cfd95
+SHA512:
+  metadata.gz: 79103b449a23f0c2888fead2a6c78ead5bc4259f42463e38ce09d2462e52698a5cd6b29dcb74b956c51b5b18b44f403410e3d8610ef6216cd574479efa312fb0
+  data.tar.gz: 6a0383662d3a023780d9684c7f305394f8d8e09cd9f3d7c85bc66e7c36408678ecb7738e80e0431c94932a972463e52bc3490b3e63a80fd21cc1587aa89d4045

data/lib/buzzwords.rb ADDED

@@ -0,0 +1,80 @@
+require 'mechanize'
+require_relative 'buzzwords/ny_times.rb'
+require_relative 'buzzwords/washington_post.rb'
+require_relative 'buzzwords/cnn.rb'
+require_relative 'buzzwords/reuters.rb'
+require_relative 'buzzwords/stopwords.rb'
+class Buzzwords
+  MECH = Mechanize.new
+  @aggregate_headlines = []
+  @word_occurrences = Hash.new(0)
+  @top_words = nil
+  class << self
+    attr_accessor :aggregate_headlines, :word_occurrences, :top_words
+  end
+  def self.generate_buzz
+    display_loading
+    retrieve_nytimes_headlines
+    retrieve_wapo_headlines
+    retrieve_cnn_headlines
+    retrieve_reuters_headlines
+    filter_stopwords
+    count_word_occurrences
+    determine_top_words
+    display_top_words
+  end
+  def self.display_loading
+    puts "Loading current buzzwords..."
+  end
+  def self.retrieve_nytimes_headlines
+    self.aggregate_headlines += parse_headlines(NYTimes.new.headlines)
+  end
+  def self.retrieve_wapo_headlines
+    self.aggregate_headlines += parse_headlines(WashingtonPost.new.headlines)
+  end
+  def self.retrieve_cnn_headlines
+    self.aggregate_headlines += parse_headlines(CNN.new.headlines)
+  end
+  def self.retrieve_reuters_headlines
+    self.aggregate_headlines += parse_headlines(Reuters.new.headlines)
+  end
+  def self.parse_headlines(data)
+    data.map(&:split).flatten.map do |word|
+      word.delete('/\A[\W]/').delete('/[\W]\z/').gsub(/\'s/, '')
+    end
+  end
+  def self.filter_stopwords
+    self.aggregate_headlines = aggregate_headlines.select do |word|
+      Stopwords.valid?(word)
+    end
+  end
+  def self.count_word_occurrences
+    aggregate_headlines.each do |word|
+      word_occurrences[word] += 1
+    end
+  end
+  def self.determine_top_words
+    self.top_words = word_occurrences.sort_by { |k, v| v }.reverse.take(20)
+                    .reject{ |word| word.last < 3 }
+                    .map { |word| word.first }
+  end
+  def self.display_top_words
+    puts "Today's top buzzwords are: "
+    puts top_words.map { |word| " - #{word}" }
+  end
+end

data/lib/buzzwords/cnn.rb ADDED

@@ -0,0 +1,15 @@
+class CNN
+  attr_reader :webpage, :headlines
+  CNN_US_HEADLINES_URL = "https://www.cnn.com/us"
+  def initialize
+    @webpage = Buzzwords::MECH.get(CNN_US_HEADLINES_URL)
+  end
+  def headlines
+    webpage.search('.cd__headline-text').map do |headline|
+      headline.text.downcase.strip
+    end.slice(0, 25)
+  end
+end

data/lib/buzzwords/ny_times.rb ADDED

@@ -0,0 +1,15 @@
+class NYTimes
+  attr_reader :webpage, :headlines
+  US_HEADLINE_URL = "https://www.nytimes.com/section/us"
+  def initialize
+    @webpage = Buzzwords::MECH.get(US_HEADLINE_URL)
+  end
+  def headlines
+    webpage.search('.initial-set .headline').map do |headline|
+      headline.text.downcase.strip
+    end
+  end
+end

data/lib/buzzwords/reuters.rb ADDED

@@ -0,0 +1,15 @@
+class Reuters
+  attr_reader :webpage, :headlines
+  US_HEADLINE_URL = "https://www.reuters.com/news/us"
+  def initialize
+    @webpage = Buzzwords::MECH.get(US_HEADLINE_URL)
+  end
+  def headlines
+    webpage.search('.FeedItemHeadline_full a').map do |headline|
+      headline.text.downcase.strip
+    end
+  end
+end

data/lib/buzzwords/stopwords.rb ADDED

@@ -0,0 +1,59 @@
+# https://github.com/brez/stopwords
+# Copyright ©2011 John Bresnik
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+# files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy,
+# modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
+# is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+module Stopwords
+  STOP_WORDS = [
+    'a', 'am', 'cannot','into','our','thus','about','co','is','ours','to','above',
+    'could','it','ourselves','together','across','down','its','out','too',
+    'after','during','itself','over','toward','afterwards','each','last','own',
+    'towards','again','eg','latter','per','under','against','either','latterly',
+    'perhaps','until','all','else','least','rather','up','almost','elsewhere',
+    'less','same','upon','alone','enough','ltd','seem','us','along','etc',
+    'many','seemed','very','already','even','may','seeming','via','also','ever',
+    'me','seems','was','although','every','meanwhile','several','we','always',
+    'everyone','might','she','well','among','everything','more','should','were',
+    'amongst','everywhere','moreover','since','what','an','except','most','so',
+    'whatever','and','few','mostly','some','when','another','first','much',
+    'somehow','whence','any','for','must','someone','whenever','anyhow',
+    'former','my','something','where','anyone','formerly','myself','sometime',
+    'whereafter','anything','from','namely','sometimes','whereas','anywhere',
+    'further','neither','somewhere','whereby','are','had','never','still',
+    'wherein','around','has','nevertheless','such','whereupon','as','have',
+    'next','than','wherever','at','he','no','that','whether','be','hence',
+    'nobody','the','whither','became','her','none','their','which','because',
+    'here','noone','them','while','become','hereafter','nor','themselves','who',
+    'becomes','hereby','not','then','whoever','becoming','herein','nothing',
+    'thence','whole','been','hereupon','now','there','whom','before','hers',
+    'nowhere','thereafter','whose','beforehand','herself','of','thereby','why',
+    'behind','him','off','therefore','will','being','himself','often','therein',
+    'with','below','his','on','thereupon','within','beside','how','once',
+    'these','without','besides','however','one','they','would','between','i',
+    'only','this','yet','beyond','ie','onto','those','you','both','if','or',
+    'though','your','but','in','other','through','yours','by','inc','others',
+    'throughout','yourself','can','indeed','otherwise','thru','yourselves', 'says',
+    'new', 'shows', 'man', 'woman', 'got', 'see', 'people'
+    ]
+  TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
+  def self.is?(token)
+    STOP_WORDS.member?(token)
+  end
+  def self.valid?(token)
+    (((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
+  end
+end

data/lib/buzzwords/washington_post.rb ADDED

@@ -0,0 +1,15 @@
+class WashingtonPost
+  attr_reader :webpage, :headlines
+  WAPO_URL = "https://www.washingtonpost.com"
+  def initialize
+    @webpage = Buzzwords::MECH.get(WAPO_URL)
+  end
+  def headlines
+    webpage.search('#main-content .headline a').map do |headline|
+      headline.text.downcase.strip
+    end
+  end
+end

data/spec/buzzwords_spec.rb ADDED

@@ -0,0 +1,116 @@
+require 'buzzwords'
+describe 'buzzwords' do
+  context '.display_loading' do
+    it 'displays load message' do
+      expect { Buzzwords.display_loading }.to output(/Loading current buzzwords...\n/).to_stdout
+    end
+  end
+  context 'retrieve headlines' do
+    before(:each) { Buzzwords.aggregate_headlines = [] }
+    context '.retrieve_nytimes_headlines' do
+      it 'adds NYT headlines to aggregate' do
+        Buzzwords.retrieve_nytimes_headlines
+        expect(Buzzwords.aggregate_headlines).not_to be_empty
+      end
+    end
+    context '.retrieve_wapo_headlines' do
+      it 'adds Wapo headlines to aggregate' do
+        Buzzwords.retrieve_wapo_headlines
+        expect(Buzzwords.aggregate_headlines).not_to be_empty
+      end
+    end
+    context '.retrieve_cnn_headlines' do
+      it 'adds CNN headlines to aggregate' do
+        Buzzwords.retrieve_cnn_headlines
+        expect(Buzzwords.aggregate_headlines).not_to be_empty
+      end
+    end
+    context '.retrieve_reuters_headlines' do
+      it 'adds Reuters headlines to aggregate' do
+        Buzzwords.retrieve_reuters_headlines
+        expect(Buzzwords.aggregate_headlines).not_to be_empty
+      end
+    end
+  end
+  context '.parse_headlines' do
+    before(:all) { @headlines = Buzzwords.parse_headlines(NYTimes.new.headlines) }
+    it 'should return an array' do
+      expect(@headlines).to be_an_instance_of(Array)
+    end
+    it 'should only include properly formatted words' do
+      @headlines.each do |headline|
+        expect(headline).not_to include('/(\A[\W]||[\W]\z)/')
+        expect(headline).not_to include('/\'s/')
+      end
+    end
+  end
+  context '.filter_stopwords' do
+    before { Buzzwords.retrieve_nytimes_headlines }
+    it 'should filter stopwords from aggregate headline list' do
+      Buzzwords.filter_stopwords
+      expect(Buzzwords.aggregate_headlines).not_to include(*Stopwords::STOP_WORDS)
+    end
+  end
+  context '.count_word_occurrences' do
+    before(:all) do
+      Buzzwords.retrieve_cnn_headlines
+      Buzzwords.count_word_occurrences
+    end
+    it 'should return a hash' do
+      expect(Buzzwords.word_occurrences).to be_an_instance_of(Hash)
+      expect(Buzzwords.word_occurrences).not_to be_empty
+    end
+    it 'should have integer values in the hash' do
+      expect(Buzzwords.word_occurrences.values).to all(be_an(Integer))
+    end
+  end
+  context '.determine_top_words' do
+    before(:all) do
+      Buzzwords.retrieve_nytimes_headlines
+      Buzzwords.retrieve_cnn_headlines
+      Buzzwords.retrieve_reuters_headlines
+      Buzzwords.retrieve_wapo_headlines
+      Buzzwords.filter_stopwords
+      Buzzwords.count_word_occurrences
+      @top_words = Buzzwords.determine_top_words
+    end
+    it 'should return a non-empty array' do
+      expect(@top_words).to be_an_instance_of(Array)
+    end
+    it 'should return less than 21 words' do
+      expect(@top_words.length).to be <= 20
+    end
+    it 'should return list of unique words' do
+      expect(@top_words.uniq).to match(@top_words)
+    end
+  end
+  context '.display_top_words' do
+    it 'displays title heading' do
+      expect { Buzzwords.display_top_words }.to output(/Today's top buzzwords are: \n/).to_stdout
+    end
+    it 'displays list of words' do
+      Buzzwords.top_words = ['hello', 'hi', 'yes']
+      expect { Buzzwords.display_top_words }.to output(/ - hello\n - hi\n - yes\n/).to_stdout
+    end
+  end
+end

data/spec/cnn_spec.rb ADDED

@@ -0,0 +1,11 @@
+require './lib/buzzwords/cnn.rb'
+describe 'cnn' do
+  context '#headlines' do
+    it 'returns a non-empty array' do
+      headlines = CNN.new.headlines
+      expect(headlines).not_to be_empty
+      expect(headlines).to be_an_instance_of(Array)
+    end
+  end
+end

data/spec/ny_times_spec.rb ADDED

@@ -0,0 +1,11 @@
+require './lib/buzzwords/ny_times.rb'
+describe 'NY Times' do
+  context '#headlines' do
+    it 'returns a non-empty array' do
+      headlines = NYTimes.new.headlines
+      expect(headlines).not_to be_empty
+      expect(headlines).to be_an_instance_of(Array)
+    end
+  end
+end

data/spec/reuters_spec.rb ADDED

@@ -0,0 +1,11 @@
+require './lib/buzzwords/reuters.rb'
+describe 'Reuters' do
+  context '#headlines' do
+    it 'returns a non-empty array' do
+      headlines = Reuters.new.headlines
+      expect(headlines).not_to be_empty
+      expect(headlines).to be_an_instance_of(Array)
+    end
+  end
+end

data/spec/washington_post_spec.rb ADDED

@@ -0,0 +1,11 @@
+require './lib/buzzwords/washington_post.rb'
+describe 'Washington Post' do
+  context '#headlines' do
+    it 'returns a non-empty array' do
+      headlines = WashingtonPost.new.headlines
+      expect(headlines).not_to be_empty
+      expect(headlines).to be_an_instance_of(Array)
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,95 @@
+--- !ruby/object:Gem::Specification
+name: buzzwords
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- Elizabeth Tackett
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2018-10-08 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: mechanize
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.7'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.7.6
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.7'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.7.6
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.8'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.8'
+description: A simple IRB tool that generates and displays the day's most popular
+  buzzwords from news publications including The New York Times, The Washington Post,
+  CNN, and Reuters.
+email: emctackett@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/buzzwords.rb
+- lib/buzzwords/cnn.rb
+- lib/buzzwords/ny_times.rb
+- lib/buzzwords/reuters.rb
+- lib/buzzwords/stopwords.rb
+- lib/buzzwords/washington_post.rb
+- spec/buzzwords_spec.rb
+- spec/cnn_spec.rb
+- spec/ny_times_spec.rb
+- spec/reuters_spec.rb
+- spec/washington_post_spec.rb
+homepage: http://github.com/emctackett
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - "~>"
+    - !ruby/object:Gem::Version
+      version: 2.4.2
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.6.13
+signing_key:
+specification_version: 4
+summary: A basic IRB tool to instantly generate the day's media buzzwords.
+test_files:
+- spec/buzzwords_spec.rb
+- spec/cnn_spec.rb
+- spec/ny_times_spec.rb
+- spec/reuters_spec.rb
+- spec/washington_post_spec.rb