top-headlines 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2a2a2df27d4eb8df01ffb013a7922f875a5f6df8
4
- data.tar.gz: cc8e7ab0f3f42bdb9e840ea7bf9a046968518702
3
+ metadata.gz: 95addc3e40b6793ecd773848f2394e2b2903d032
4
+ data.tar.gz: 63e758e38c0b5d7017bbe6ae76f8fb0621ed4d41
5
5
  SHA512:
6
- metadata.gz: 00415e1a0c2754063dcb051eb6baa2997ab9d42210f4894ebaca5b63ddc67b80c765519cb6b28545c1206cba69de059e697b074f7acaabcb270c46740ff63553
7
- data.tar.gz: a385c06973c60440be61041f1a83b92e734a4ce27ed4b7c73307b8ae4c549da7b80a806481bed0c3505e9448141b0bec563a7d13fcd37d6d4519cab53a4d10e6
6
+ metadata.gz: cd46edc49f87d1b58a3cda6e4416ce6f15c56e387260ad2d2ce1d24e3de0c7452ecfda7bd61c2a3521ad6e0d3136c833c13a30951bc876a2f7052d5dab4ef5cc
7
+ data.tar.gz: e487345aa1328e64da21b8ac2d2bbb994a502ed7574aeba167dbe84dbde072126e305506f60c1572951ccff9ddfafa0b6ee65e40844b4abec0d6a733ff54240e
data/README.md CHANGED
@@ -20,13 +20,14 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that gethe gem dynamically scrapes even more headlines.
24
-
23
+ In lib/top-headlines/source.rb, users will find a SOURCES hash. Users may add their own favorite news sources to the hash, so that the gem dynamically scrapes even more headlines.
24
+ ```
25
25
  If you'd like to view how I made the gem, here's a set of videos that captures almost all of it:
26
26
  1) https://drive.google.com/file/d/0B-xsMiWmDyyzcGk3MmlTc0xQOXM/view?usp=sharing
27
27
  2) https://drive.google.com/file/d/0B-xsMiWmDyyzNDFyS01icFMtams/view?usp=sharing
28
28
  3) https://drive.google.com/file/d/0B-xsMiWmDyyzU0VGNGJ5QkpaOUU/view?usp=sharing
29
29
  4) https://drive.google.com/file/d/0B-xsMiWmDyyzbEdzX0ZlOVcwM2M/view?usp=sharing
30
+ ```
30
31
 
31
32
  ## Development
32
33
 
@@ -99,8 +99,9 @@ class TopHeadlines::CLI
99
99
  puts "\nSelect another headline number to open full article in the browser."
100
100
  print "YOUR SELECTION: "
101
101
  @num = gets.strip.upcase
102
+ @input = @num if @num == "EXIT"
102
103
  end
103
- invalid_entry if @num != "EXIT"
104
+ invalid_entry unless @input == "EXIT"
104
105
  end
105
106
 
106
107
  def invalid_entry
@@ -4,28 +4,81 @@ class TopHeadlines::Source
4
4
  "CNN" => {
5
5
  url: "http://www.cnn.com/",
6
6
  headlines_selector: "div.column.zn__column--idx-1 span.cd__headline-text",
7
- urls_selector: "div.column.zn__column--idx-1"
7
+ urls_selector: "div.column.zn__column--idx-1",
8
+ child_selector: "a"
8
9
  },
9
10
  "MSNBC" => {
10
11
  url: "http://www.msnbc.com/",
11
12
  headlines_selector: "span.featured-slider-menu__item__link__title",
12
- urls_selector: "ul.featured-slider-menu"
13
+ urls_selector: "ul.featured-slider-menu",
14
+ child_selector: "a"
13
15
  },
14
16
  "FOX" => {
15
17
  url: "http://www.foxnews.com/",
16
18
  headlines_selector: "section#trending li a",
17
- urls_selector: "section#trending li"
19
+ urls_selector: "section#trending li",
20
+ child_selector: "a"
18
21
  },
19
22
  "NYTIMES" => {
20
23
  url: "http://www.nytimes.com/",
21
24
  headlines_selector: "section#top-news h2.story-heading a",
22
- urls_selector: "section#top-news h2.story-heading"
25
+ urls_selector: "section#top-news h2.story-heading",
26
+ child_selector: "a"
23
27
  },
24
28
  "BLOOMBERG" => {
25
29
  url: "http://www.bloomberg.com/",
26
30
  headlines_selector: "section.top-news-v3 h1 a",
27
- urls_selector: "section.top-news-v3 h1"
28
- }
31
+ urls_selector: "section.top-news-v3 h1",
32
+ child_selector: "a"
33
+ },
34
+ "GUARDIAN" => {
35
+ url: "http://www.theguardian.com/us",
36
+ headlines_selector: "section#headlines div.fc-container__inner div.fc-item__container a.u-faux-block-link__overlay.js-headline-text",
37
+ urls_selector: "section#headlines div.fc-container__inner div.fc-item__container",
38
+ child_selector: "a.u-faux-block-link__overlay.js-headline-text"
39
+ },
40
+ "HUFF POST" => {
41
+ url: "http://www.huffingtonpost.com/",
42
+ headlines_selector: "div#center_entries_container h2 a",
43
+ urls_selector: "div#center_entries_container h2",
44
+ child_selector: "a"
45
+ },
46
+ "FORBES" => {
47
+ url: "http://www.forbes.com/",
48
+ headlines_selector: "h4",
49
+ urls_selector: "h4",
50
+ child_selector: "a"
51
+ },
52
+ "WSJ" => {
53
+ url: "http://www.wsj.com/",
54
+ headlines_selector: "a.wsj-headline-link",
55
+ urls_selector: "div.cb-col",
56
+ child_selector: "a.wsj-headline-link"
57
+ },
58
+ # "REDDIT" => { ## 429 Error
59
+ # url: "https://www.reddit.com/r/news/",
60
+ # headlines_selector: "p.title a.title.may-blank",
61
+ # urls_selector: "p.title",
62
+ # child_selector: "a"
63
+ # },
64
+ # "BBC" => {
65
+ # url: "http://www.bbc.com/news",
66
+ # headlines_selector: "div.column--primary span.title-link__title-text",
67
+ # urls_selector: "div.column--primary",
68
+ # child_selector: "a.title-link" ## NEEDS WORK returns e.g. /news/world-middle-east-36180184
69
+ # },
70
+ # "CBS" => {
71
+ # url: "http://www.cbsnews.com/",
72
+ # headlines_selector: "div.col-5.nocontent h3.title",
73
+ # urls_selector: "div.col-5.nocontent",
74
+ # child_selector: "a" ## NEEDS WORK – only select a child of parent h3.title
75
+ # },
76
+ # "YAHOO" => {
77
+ # url: "https://www.yahoo.com/news/",
78
+ # headlines_selector: "div#mrt-node-Col1-1-WideHero h3",
79
+ # urls_selector: "div#mrt-node-Col1-1-WideHero",
80
+ # child_selector: "a" ## NEEDS WORK – only select a child of parent h3
81
+ # },
29
82
  }
30
83
 
31
84
  def self.all
@@ -46,15 +99,16 @@ class TopHeadlines::Source
46
99
  headlines_selector = source[:headlines_selector]
47
100
 
48
101
  doc = Nokogiri::HTML(open(page_url))
49
- headlines = doc.css(headlines_selector).map {|headline| headline.text}
102
+ headlines = doc.css(headlines_selector).map {|headline| headline.text.gsub("â", "'").gsub(/\n/,"").gsub(/\t/,"").strip}
50
103
  end
51
104
 
52
105
  def self.scrape_urls(source)
53
106
  source = SOURCES[source]
54
107
  page_url = source[:url]
55
108
  urls_selector = source[:urls_selector]
109
+ child_selector = source[:child_selector]
56
110
 
57
111
  doc = Nokogiri::HTML(open(page_url))
58
- urls = doc.css(urls_selector).children.css('a').map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
112
+ urls = doc.css(urls_selector).children.css(child_selector).map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value}
59
113
  end
60
114
  end
@@ -1,3 +1,3 @@
1
1
  module TopHeadlines
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: top-headlines
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - zachnewburgh
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-04-30 00:00:00.000000000 Z
11
+ date: 2016-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,6 +104,7 @@ files:
104
104
  - lib/top-headlines/source.rb
105
105
  - lib/top-headlines/version.rb
106
106
  - top-headlines-0.1.0.gem
107
+ - top-headlines-0.1.1.gem
107
108
  - top-headlines.gemspec
108
109
  homepage: https://github.com/zachnewburgh/top-headlines-cli-gem
109
110
  licenses: