RubyGems - top-headlines - Versions diffs - 0.1.1 → 0.1.2 - Mend

top-headlines 0.1.1 → 0.1.2

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2a2a2df27d4eb8df01ffb013a7922f875a5f6df8
-  data.tar.gz: cc8e7ab0f3f42bdb9e840ea7bf9a046968518702
+  metadata.gz: 95addc3e40b6793ecd773848f2394e2b2903d032
+  data.tar.gz: 63e758e38c0b5d7017bbe6ae76f8fb0621ed4d41
 SHA512:
-  metadata.gz: 00415e1a0c2754063dcb051eb6baa2997ab9d42210f4894ebaca5b63ddc67b80c765519cb6b28545c1206cba69de059e697b074f7acaabcb270c46740ff63553
-  data.tar.gz: a385c06973c60440be61041f1a83b92e734a4ce27ed4b7c73307b8ae4c549da7b80a806481bed0c3505e9448141b0bec563a7d13fcd37d6d4519cab53a4d10e6
+  metadata.gz: cd46edc49f87d1b58a3cda6e4416ce6f15c56e387260ad2d2ce1d24e3de0c7452ecfda7bd61c2a3521ad6e0d3136c833c13a30951bc876a2f7052d5dab4ef5cc
+  data.tar.gz: e487345aa1328e64da21b8ac2d2bbb994a502ed7574aeba167dbe84dbde072126e305506f60c1572951ccff9ddfafa0b6ee65e40844b4abec0d6a733ff54240e

data/README.md CHANGED Viewed

@@ -20,13 +20,14 @@ Or install it yourself as:
 ## Usage
-In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that gethe gem dynamically scrapes even more headlines.
+In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that the gem dynamically scrapes even more headlines.
+```
 If you'd like to view how I made the gem, here's a set of videos that captures almost all of it:
   1) https://drive.google.com/file/d/0B-xsMiWmDyyzcGk3MmlTc0xQOXM/view?usp=sharing
   2) https://drive.google.com/file/d/0B-xsMiWmDyyzNDFyS01icFMtams/view?usp=sharing
   3) https://drive.google.com/file/d/0B-xsMiWmDyyzU0VGNGJ5QkpaOUU/view?usp=sharing
   4) https://drive.google.com/file/d/0B-xsMiWmDyyzbEdzX0ZlOVcwM2M/view?usp=sharing
+```
 ## Development

data/lib/top-headlines/cli.rb CHANGED Viewed

@@ -99,8 +99,9 @@ class TopHeadlines::CLI
         puts "\nSelect another headline number to open full article in the browser."
         print "YOUR SELECTION: "
         @num = gets.strip.upcase
+        @input = @num if @num == "EXIT"
       end
-    invalid_entry if @num != "EXIT"
+    invalid_entry unless @input == "EXIT"
   end
   def invalid_entry

data/lib/top-headlines/source.rb CHANGED Viewed

@@ -4,28 +4,81 @@ class TopHeadlines::Source
     "CNN" => {
       url: "http://www.cnn.com/",
       headlines_selector: "div.column.zn__column--idx-1 span.cd__headline-text",
-      urls_selector: "div.column.zn__column--idx-1"
+      urls_selector: "div.column.zn__column--idx-1",
+      child_selector: "a"
       },
     "MSNBC" => {
       url: "http://www.msnbc.com/",
       headlines_selector: "span.featured-slider-menu__item__link__title",
-      urls_selector: "ul.featured-slider-menu"
+      urls_selector: "ul.featured-slider-menu",
+      child_selector: "a"
     },
     "FOX" => {
       url: "http://www.foxnews.com/",
       headlines_selector: "section#trending li a",
-      urls_selector: "section#trending li"
+      urls_selector: "section#trending li",
+      child_selector: "a"
     },
     "NYTIMES" => {
       url: "http://www.nytimes.com/",
       headlines_selector: "section#top-news h2.story-heading a",
-      urls_selector: "section#top-news h2.story-heading"
+      urls_selector: "section#top-news h2.story-heading",
+      child_selector: "a"
     },
     "BLOOMBERG" => {
       url: "http://www.bloomberg.com/",
       headlines_selector: "section.top-news-v3 h1 a",
-      urls_selector: "section.top-news-v3 h1"
-    }
+      urls_selector: "section.top-news-v3 h1",
+      child_selector: "a"
+    },
+    "GUARDIAN" => {
+      url: "http://www.theguardian.com/us",
+      headlines_selector: "section#headlines div.fc-container__inner div.fc-item__container a.u-faux-block-link__overlay.js-headline-text",
+      urls_selector: "section#headlines div.fc-container__inner div.fc-item__container",
+      child_selector: "a.u-faux-block-link__overlay.js-headline-text"
+    },
+    "HUFF POST" => {
+      url: "http://www.huffingtonpost.com/",
+      headlines_selector: "div#center_entries_container h2 a",
+      urls_selector: "div#center_entries_container h2",
+      child_selector: "a"
+    },
+    "FORBES" => {
+      url: "http://www.forbes.com/",
+      headlines_selector: "h4",
+      urls_selector: "h4",
+      child_selector: "a"
+    },
+    "WSJ" => {
+      url: "http://www.wsj.com/",
+      headlines_selector: "a.wsj-headline-link",
+      urls_selector: "div.cb-col",
+      child_selector: "a.wsj-headline-link"
+    },
+    # "REDDIT" => { ## 429 Error
+    #   url: "https://www.reddit.com/r/news/",
+    #   headlines_selector: "p.title a.title.may-blank",
+    #   urls_selector: "p.title",
+    #   child_selector: "a"
+    # },
+    # "BBC" => {
+    #   url: "http://www.bbc.com/news",
+    #   headlines_selector: "div.column--primary span.title-link__title-text",
+    #   urls_selector: "div.column--primary",
+    #   child_selector: "a.title-link" ## NEEDS WORK returns e.g. /news/world-middle-east-36180184
+    # },
+    # "CBS" => {
+    #   url: "http://www.cbsnews.com/",
+    #   headlines_selector: "div.col-5.nocontent h3.title",
+    #   urls_selector: "div.col-5.nocontent",
+    #   child_selector: "a" ## NEEDS WORK – only select a child of parent h3.title
+    # },
+    # "YAHOO" => {
+    #   url: "https://www.yahoo.com/news/",
+    #   headlines_selector: "div#mrt-node-Col1-1-WideHero h3",
+    #   urls_selector: "div#mrt-node-Col1-1-WideHero",
+    #   child_selector: "a" ## NEEDS WORK – only select a child of parent h3
+    # },
   }
   def self.all
@@ -46,15 +99,16 @@ class TopHeadlines::Source
     headlines_selector = source[:headlines_selector]
     doc = Nokogiri::HTML(open(page_url))
-    headlines = doc.css(headlines_selector).map {|headline| headline.text}
+    headlines = doc.css(headlines_selector).map {|headline| headline.text.gsub("â", "'").gsub(/\n/,"").gsub(/\t/,"").strip}
   end
   def self.scrape_urls(source)
     source = SOURCES[source]
     page_url = source[:url]
     urls_selector = source[:urls_selector]
+    child_selector = source[:child_selector]
     doc = Nokogiri::HTML(open(page_url))
-    urls = doc.css(urls_selector).children.css('a').map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
+    urls = doc.css(urls_selector).children.css(child_selector).map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
   end
 end

data/lib/top-headlines/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TopHeadlines
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

data/top-headlines-0.1.1.gem ADDED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: top-headlines
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - zachnewburgh
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-04-30 00:00:00.000000000 Z
+date: 2016-05-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -104,6 +104,7 @@ files:
 - lib/top-headlines/source.rb
 - lib/top-headlines/version.rb
 - top-headlines-0.1.0.gem
+- top-headlines-0.1.1.gem
 - top-headlines.gemspec
 homepage: https://github.com/zachnewburgh/top-headlines-cli-gem
 licenses: