RubyGems - top-headlines - Versions diffs - 0.1.1 → 0.1.2 - Mend

top-headlines 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2a2a2df27d4eb8df01ffb013a7922f875a5f6df8
-  data.tar.gz: cc8e7ab0f3f42bdb9e840ea7bf9a046968518702
+  metadata.gz: 95addc3e40b6793ecd773848f2394e2b2903d032
+  data.tar.gz: 63e758e38c0b5d7017bbe6ae76f8fb0621ed4d41
 SHA512:
-  metadata.gz: 00415e1a0c2754063dcb051eb6baa2997ab9d42210f4894ebaca5b63ddc67b80c765519cb6b28545c1206cba69de059e697b074f7acaabcb270c46740ff63553
-  data.tar.gz: a385c06973c60440be61041f1a83b92e734a4ce27ed4b7c73307b8ae4c549da7b80a806481bed0c3505e9448141b0bec563a7d13fcd37d6d4519cab53a4d10e6
+  metadata.gz: cd46edc49f87d1b58a3cda6e4416ce6f15c56e387260ad2d2ce1d24e3de0c7452ecfda7bd61c2a3521ad6e0d3136c833c13a30951bc876a2f7052d5dab4ef5cc
+  data.tar.gz: e487345aa1328e64da21b8ac2d2bbb994a502ed7574aeba167dbe84dbde072126e305506f60c1572951ccff9ddfafa0b6ee65e40844b4abec0d6a733ff54240e

data/README.md CHANGED Viewed

@@ -20,13 +20,14 @@ Or install it yourself as:
 ## Usage
-In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that gethe gem dynamically scrapes even more headlines.
+In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that the gem dynamically scrapes even more headlines.
+```
 If you'd like to view how I made the gem, here's a set of videos that captures almost all of it:
   1) https://drive.google.com/file/d/0B-xsMiWmDyyzcGk3MmlTc0xQOXM/view?usp=sharing
   2) https://drive.google.com/file/d/0B-xsMiWmDyyzNDFyS01icFMtams/view?usp=sharing
   3) https://drive.google.com/file/d/0B-xsMiWmDyyzU0VGNGJ5QkpaOUU/view?usp=sharing
   4) https://drive.google.com/file/d/0B-xsMiWmDyyzbEdzX0ZlOVcwM2M/view?usp=sharing
+```
 ## Development

data/lib/top-headlines/cli.rb CHANGED Viewed

@@ -99,8 +99,9 @@ class TopHeadlines::CLI
         puts "\nSelect another headline number to open full article in the browser."
         print "YOUR SELECTION: "
         @num = gets.strip.upcase
+        @input = @num if @num == "EXIT"
       end
-    invalid_entry if @num != "EXIT"
+    invalid_entry unless @input == "EXIT"
   end
   def invalid_entry

data/lib/top-headlines/source.rb CHANGED Viewed

@@ -4,28 +4,81 @@ class TopHeadlines::Source
     "CNN" => {
       url: "http://www.cnn.com/",
       headlines_selector: "div.column.zn__column--idx-1 span.cd__headline-text",
-      urls_selector: "div.column.zn__column--idx-1"
+      urls_selector: "div.column.zn__column--idx-1",
+      child_selector: "a"
       },
     "MSNBC" => {
       url: "http://www.msnbc.com/",
       headlines_selector: "span.featured-slider-menu__item__link__title",
-      urls_selector: "ul.featured-slider-menu"
+      urls_selector: "ul.featured-slider-menu",
+      child_selector: "a"
     },
     "FOX" => {
       url: "http://www.foxnews.com/",
       headlines_selector: "section#trending li a",
-      urls_selector: "section#trending li"
+      urls_selector: "section#trending li",
+      child_selector: "a"
     },
     "NYTIMES" => {
       url: "http://www.nytimes.com/",
       headlines_selector: "section#top-news h2.story-heading a",
-      urls_selector: "section#top-news h2.story-heading"
+      urls_selector: "section#top-news h2.story-heading",
+      child_selector: "a"
     },
     "BLOOMBERG" => {
       url: "http://www.bloomberg.com/",
       headlines_selector: "section.top-news-v3 h1 a",
-      urls_selector: "section.top-news-v3 h1"
-    }
+      urls_selector: "section.top-news-v3 h1",
+      child_selector: "a"
+    },
+    "GUARDIAN" => {
+      url: "http://www.theguardian.com/us",
+      headlines_selector: "section#headlines div.fc-container__inner div.fc-item__container a.u-faux-block-link__overlay.js-headline-text",
+      urls_selector: "section#headlines div.fc-container__inner div.fc-item__container",
+      child_selector: "a.u-faux-block-link__overlay.js-headline-text"
+    },
+    "HUFF POST" => {
+      url: "http://www.huffingtonpost.com/",
+      headlines_selector: "div#center_entries_container h2 a",
+      urls_selector: "div#center_entries_container h2",
+      child_selector: "a"
+    },
+    "FORBES" => {
+      url: "http://www.forbes.com/",
+      headlines_selector: "h4",
+      urls_selector: "h4",
+      child_selector: "a"
+    },
+    "WSJ" => {
+      url: "http://www.wsj.com/",
+      headlines_selector: "a.wsj-headline-link",
+      urls_selector: "div.cb-col",
+      child_selector: "a.wsj-headline-link"
+    },
+    # "REDDIT" => { ## 429 Error
+    #   url: "https://www.reddit.com/r/news/",
+    #   headlines_selector: "p.title a.title.may-blank",
+    #   urls_selector: "p.title",
+    #   child_selector: "a"
+    # },
+    # "BBC" => {
+    #   url: "http://www.bbc.com/news",
+    #   headlines_selector: "div.column--primary span.title-link__title-text",
+    #   urls_selector: "div.column--primary",
+    #   child_selector: "a.title-link" ## NEEDS WORK returns e.g. /news/world-middle-east-36180184
+    # },
+    # "CBS" => {
+    #   url: "http://www.cbsnews.com/",
+    #   headlines_selector: "div.col-5.nocontent h3.title",
+    #   urls_selector: "div.col-5.nocontent",
+    #   child_selector: "a" ## NEEDS WORK – only select a child of parent h3.title
+    # },
+    # "YAHOO" => {
+    #   url: "https://www.yahoo.com/news/",
+    #   headlines_selector: "div#mrt-node-Col1-1-WideHero h3",
+    #   urls_selector: "div#mrt-node-Col1-1-WideHero",
+    #   child_selector: "a" ## NEEDS WORK – only select a child of parent h3
+    # },
   }
   def self.all
@@ -46,15 +99,16 @@ class TopHeadlines::Source
     headlines_selector = source[:headlines_selector]
     doc = Nokogiri::HTML(open(page_url))
-    headlines = doc.css(headlines_selector).map {|headline| headline.text}
+    headlines = doc.css(headlines_selector).map {|headline| headline.text.gsub("â", "'").gsub(/\n/,"").gsub(/\t/,"").strip}
   end
   def self.scrape_urls(source)
     source = SOURCES[source]
     page_url = source[:url]
     urls_selector = source[:urls_selector]
+    child_selector = source[:child_selector]
     doc = Nokogiri::HTML(open(page_url))
-    urls = doc.css(urls_selector).children.css('a').map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
+    urls = doc.css(urls_selector).children.css(child_selector).map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
   end
 end

data/lib/top-headlines/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TopHeadlines
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

data/top-headlines-0.1.1.gem ADDED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: top-headlines
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - zachnewburgh
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-04-30 00:00:00.000000000 Z
+date: 2016-05-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -104,6 +104,7 @@ files:
 - lib/top-headlines/source.rb
 - lib/top-headlines/version.rb
 - top-headlines-0.1.0.gem
+- top-headlines-0.1.1.gem
 - top-headlines.gemspec
 homepage: https://github.com/zachnewburgh/top-headlines-cli-gem
 licenses: