top-headlines 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2a2a2df27d4eb8df01ffb013a7922f875a5f6df8
4
- data.tar.gz: cc8e7ab0f3f42bdb9e840ea7bf9a046968518702
3
+ metadata.gz: 95addc3e40b6793ecd773848f2394e2b2903d032
4
+ data.tar.gz: 63e758e38c0b5d7017bbe6ae76f8fb0621ed4d41
5
5
  SHA512:
6
- metadata.gz: 00415e1a0c2754063dcb051eb6baa2997ab9d42210f4894ebaca5b63ddc67b80c765519cb6b28545c1206cba69de059e697b074f7acaabcb270c46740ff63553
7
- data.tar.gz: a385c06973c60440be61041f1a83b92e734a4ce27ed4b7c73307b8ae4c549da7b80a806481bed0c3505e9448141b0bec563a7d13fcd37d6d4519cab53a4d10e6
6
+ metadata.gz: cd46edc49f87d1b58a3cda6e4416ce6f15c56e387260ad2d2ce1d24e3de0c7452ecfda7bd61c2a3521ad6e0d3136c833c13a30951bc876a2f7052d5dab4ef5cc
7
+ data.tar.gz: e487345aa1328e64da21b8ac2d2bbb994a502ed7574aeba167dbe84dbde072126e305506f60c1572951ccff9ddfafa0b6ee65e40844b4abec0d6a733ff54240e
data/README.md CHANGED
@@ -20,13 +20,14 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that gethe gem dynamically scrapes even more headlines.
24
-
23
+ In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that the gem dynamically scrapes even more headlines.
24
+ ```
25
25
  If you'd like to view how I made the gem, here's a set of videos that captures almost all of it:
26
26
  1) https://drive.google.com/file/d/0B-xsMiWmDyyzcGk3MmlTc0xQOXM/view?usp=sharing
27
27
  2) https://drive.google.com/file/d/0B-xsMiWmDyyzNDFyS01icFMtams/view?usp=sharing
28
28
  3) https://drive.google.com/file/d/0B-xsMiWmDyyzU0VGNGJ5QkpaOUU/view?usp=sharing
29
29
  4) https://drive.google.com/file/d/0B-xsMiWmDyyzbEdzX0ZlOVcwM2M/view?usp=sharing
30
+ ```
30
31
 
31
32
  ## Development
32
33
 
@@ -99,8 +99,9 @@ class TopHeadlines::CLI
99
99
  puts "\nSelect another headline number to open full article in the browser."
100
100
  print "YOUR SELECTION: "
101
101
  @num = gets.strip.upcase
102
+ @input = @num if @num == "EXIT"
102
103
  end
103
- invalid_entry if @num != "EXIT"
104
+ invalid_entry unless @input == "EXIT"
104
105
  end
105
106
 
106
107
  def invalid_entry
@@ -4,28 +4,81 @@ class TopHeadlines::Source
4
4
  "CNN" => {
5
5
  url: "http://www.cnn.com/",
6
6
  headlines_selector: "div.column.zn__column--idx-1 span.cd__headline-text",
7
- urls_selector: "div.column.zn__column--idx-1"
7
+ urls_selector: "div.column.zn__column--idx-1",
8
+ child_selector: "a"
8
9
  },
9
10
  "MSNBC" => {
10
11
  url: "http://www.msnbc.com/",
11
12
  headlines_selector: "span.featured-slider-menu__item__link__title",
12
- urls_selector: "ul.featured-slider-menu"
13
+ urls_selector: "ul.featured-slider-menu",
14
+ child_selector: "a"
13
15
  },
14
16
  "FOX" => {
15
17
  url: "http://www.foxnews.com/",
16
18
  headlines_selector: "section#trending li a",
17
- urls_selector: "section#trending li"
19
+ urls_selector: "section#trending li",
20
+ child_selector: "a"
18
21
  },
19
22
  "NYTIMES" => {
20
23
  url: "http://www.nytimes.com/",
21
24
  headlines_selector: "section#top-news h2.story-heading a",
22
- urls_selector: "section#top-news h2.story-heading"
25
+ urls_selector: "section#top-news h2.story-heading",
26
+ child_selector: "a"
23
27
  },
24
28
  "BLOOMBERG" => {
25
29
  url: "http://www.bloomberg.com/",
26
30
  headlines_selector: "section.top-news-v3 h1 a",
27
- urls_selector: "section.top-news-v3 h1"
28
- }
31
+ urls_selector: "section.top-news-v3 h1",
32
+ child_selector: "a"
33
+ },
34
+ "GUARDIAN" => {
35
+ url: "http://www.theguardian.com/us",
36
+ headlines_selector: "section#headlines div.fc-container__inner div.fc-item__container a.u-faux-block-link__overlay.js-headline-text",
37
+ urls_selector: "section#headlines div.fc-container__inner div.fc-item__container",
38
+ child_selector: "a.u-faux-block-link__overlay.js-headline-text"
39
+ },
40
+ "HUFF POST" => {
41
+ url: "http://www.huffingtonpost.com/",
42
+ headlines_selector: "div#center_entries_container h2 a",
43
+ urls_selector: "div#center_entries_container h2",
44
+ child_selector: "a"
45
+ },
46
+ "FORBES" => {
47
+ url: "http://www.forbes.com/",
48
+ headlines_selector: "h4",
49
+ urls_selector: "h4",
50
+ child_selector: "a"
51
+ },
52
+ "WSJ" => {
53
+ url: "http://www.wsj.com/",
54
+ headlines_selector: "a.wsj-headline-link",
55
+ urls_selector: "div.cb-col",
56
+ child_selector: "a.wsj-headline-link"
57
+ },
58
+ # "REDDIT" => { ## 429 Error
59
+ # url: "https://www.reddit.com/r/news/",
60
+ # headlines_selector: "p.title a.title.may-blank",
61
+ # urls_selector: "p.title",
62
+ # child_selector: "a"
63
+ # },
64
+ # "BBC" => {
65
+ # url: "http://www.bbc.com/news",
66
+ # headlines_selector: "div.column--primary span.title-link__title-text",
67
+ # urls_selector: "div.column--primary",
68
+ # child_selector: "a.title-link" ## NEEDS WORK returns e.g. /news/world-middle-east-36180184
69
+ # },
70
+ # "CBS" => {
71
+ # url: "http://www.cbsnews.com/",
72
+ # headlines_selector: "div.col-5.nocontent h3.title",
73
+ # urls_selector: "div.col-5.nocontent",
74
+ # child_selector: "a" ## NEEDS WORK – only select a child of parent h3.title
75
+ # },
76
+ # "YAHOO" => {
77
+ # url: "https://www.yahoo.com/news/",
78
+ # headlines_selector: "div#mrt-node-Col1-1-WideHero h3",
79
+ # urls_selector: "div#mrt-node-Col1-1-WideHero",
80
+ # child_selector: "a" ## NEEDS WORK – only select a child of parent h3
81
+ # },
29
82
  }
30
83
 
31
84
  def self.all
@@ -46,15 +99,16 @@ class TopHeadlines::Source
46
99
  headlines_selector = source[:headlines_selector]
47
100
 
48
101
  doc = Nokogiri::HTML(open(page_url))
49
- headlines = doc.css(headlines_selector).map {|headline| headline.text}
102
+ headlines = doc.css(headlines_selector).map {|headline| headline.text.gsub("â", "'").gsub(/\n/,"").gsub(/\t/,"").strip}
50
103
  end
51
104
 
52
105
  def self.scrape_urls(source)
53
106
  source = SOURCES[source]
54
107
  page_url = source[:url]
55
108
  urls_selector = source[:urls_selector]
109
+ child_selector = source[:child_selector]
56
110
 
57
111
  doc = Nokogiri::HTML(open(page_url))
58
- urls = doc.css(urls_selector).children.css('a').map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
112
+ urls = doc.css(urls_selector).children.css(child_selector).map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
59
113
  end
60
114
  end
@@ -1,3 +1,3 @@
1
1
  module TopHeadlines
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: top-headlines
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - zachnewburgh
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-04-30 00:00:00.000000000 Z
11
+ date: 2016-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,6 +104,7 @@ files:
104
104
  - lib/top-headlines/source.rb
105
105
  - lib/top-headlines/version.rb
106
106
  - top-headlines-0.1.0.gem
107
+ - top-headlines-0.1.1.gem
107
108
  - top-headlines.gemspec
108
109
  homepage: https://github.com/zachnewburgh/top-headlines-cli-gem
109
110
  licenses: