sport_headlines 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sport_headlines/cli.rb +5 -11
- data/lib/sport_headlines/scraper.rb +2 -4
- data/lib/sport_headlines/site.rb +1 -22
- data/lib/sport_headlines/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c4570739b5730954331ea2da8dce7ea88f5773c6
|
|
4
|
+
data.tar.gz: 28b40ebd16b513ab529f23ff0a0f970014ebc875
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 14948ea2982ce98a0ed03bf4da39da65b4ad3718c91d4d37f478b43307d70051417ad14db83b97922e3e480013d77cc2f288896c227f6397af5716b6e4ae1278
|
|
7
|
+
data.tar.gz: 0c76d614001d5c52bdc0214f7d31c5bb7d685440b2a222d3967738bc6c2177980d9307608572850a3f21f3da6fca7c1e48473841fd5e0d159bb6b95819e8aa95
|
data/lib/sport_headlines/cli.rb
CHANGED
|
@@ -7,7 +7,7 @@ class SportHeadlines::CLI
|
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
def start
|
|
10
|
-
|
|
10
|
+
sites = SportHeadlines::Site.all
|
|
11
11
|
scraper = SportHeadlines::Scraper
|
|
12
12
|
puts ""
|
|
13
13
|
input = nil
|
|
@@ -18,19 +18,13 @@ class SportHeadlines::CLI
|
|
|
18
18
|
puts ""
|
|
19
19
|
input = gets.strip
|
|
20
20
|
if input.to_i.between?(1,sites.size)
|
|
21
|
-
|
|
22
|
-
site.scrape_headlines!
|
|
23
|
-
# scraper.scrape_site_headlines(sites[input.to_i-1])
|
|
24
|
-
|
|
21
|
+
scraper.scrape_site_headlines(sites[input.to_i-1])
|
|
25
22
|
puts "Select an article to read its content."
|
|
26
23
|
puts ""
|
|
27
|
-
|
|
24
|
+
sites[input.to_i-1].list_articles
|
|
28
25
|
article_input = gets.strip
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
article.print_content
|
|
32
|
-
|
|
33
|
-
# sites[input.to_i-1].articles[article_input.to_i - 1].print_content
|
|
26
|
+
scraper.scrape_article(sites[input.to_i-1].articles[article_input.to_i - 1])
|
|
27
|
+
sites[input.to_i-1].articles[article_input.to_i - 1].print_content
|
|
34
28
|
end
|
|
35
29
|
end
|
|
36
30
|
end
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
module SportHeadlines::Scraper
|
|
2
2
|
|
|
3
3
|
def self.scrape_site_headlines(site)
|
|
4
|
-
binding.pry
|
|
5
4
|
doc = Nokogiri::HTML(open(site.site_url))
|
|
6
5
|
site.clear_articles
|
|
7
6
|
if site.site_name == "ESPN"
|
|
@@ -9,9 +8,8 @@ module SportHeadlines::Scraper
|
|
|
9
8
|
new_article ||= SportHeadlines::Article.new
|
|
10
9
|
new_article.title ||= headline.search("a").text
|
|
11
10
|
new_article.article_url ||= site.site_url + headline.search("a").attribute("href").value
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
new_article.site = site # has two jobs, first, association the site with the article, then inform the site of the new article
|
|
11
|
+
site.add_article(new_article)
|
|
12
|
+
new_article.site = site
|
|
15
13
|
end
|
|
16
14
|
elsif site.site_name == "Bleacher Report"
|
|
17
15
|
doc.search(".headlineArticles li").each do |headline|
|
data/lib/sport_headlines/site.rb
CHANGED
|
@@ -6,7 +6,7 @@ class SportHeadlines::Site
|
|
|
6
6
|
"Bleacher Report" => "http://bleacherreport.com/",
|
|
7
7
|
"Pro Football Talk" => "http://profootballtalk.nbcsports.com/"
|
|
8
8
|
}
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
@@all = []
|
|
11
11
|
|
|
12
12
|
def initialize(site_name, site_url)
|
|
@@ -31,9 +31,7 @@ class SportHeadlines::Site
|
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
def add_article(article)
|
|
34
|
-
# self is the site, article is the article
|
|
35
34
|
self.articles << article
|
|
36
|
-
article.site = self # reciprocal relationship - when we add ana rticle to a site, we also inform the article of it's relationship to this site
|
|
37
35
|
end
|
|
38
36
|
|
|
39
37
|
def list_articles
|
|
@@ -42,23 +40,4 @@ class SportHeadlines::Site
|
|
|
42
40
|
end
|
|
43
41
|
end
|
|
44
42
|
|
|
45
|
-
def self.find(input)
|
|
46
|
-
self.all[input.to_i-1]
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def scrape_headlines!
|
|
50
|
-
SportHeadlines::Scraper.scrape_site_headlines(self)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def find_article(input)
|
|
54
|
-
self.articles[input.to_i - 1]
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
def scrape_article(input)
|
|
58
|
-
# we want this method to both scrape the article and return it.
|
|
59
|
-
article = self.find_article(input)
|
|
60
|
-
SportHeadlines::Scraper.scrape_article(article)
|
|
61
|
-
article
|
|
62
|
-
end
|
|
63
|
-
|
|
64
43
|
end
|