nytimes_cli 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/console +14 -0
- data/bin/nytimes_cli +10 -0
- data/bin/setup +8 -0
- data/lib/article.rb +84 -0
- data/lib/nytimes_cli/cli.rb +163 -0
- data/lib/nytimes_cli/navagation.rb +32 -0
- data/lib/nytimes_cli/version.rb +3 -0
- data/lib/nytimes_cli.rb +0 -0
- data/lib/scraper.rb +69 -0
- metadata +177 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 485c4d2f7a55939cdd8e07e1d4dfa0abb180527d
|
4
|
+
data.tar.gz: ee2ad28060dbcb739b3c3578ffbf42066768f9a3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 66367a084ca445b35f9474dd40d25e395637078836dbf8cd6df71d7b837a9d27cc9d134f99c458d1a84551f363b76fcd036f73dc7753aa9b2ee40a3706ff3b17
|
7
|
+
data.tar.gz: 7e0e385204a68906811263591d24ad0102a37b4783dd0fe0a0b9aa95bbbb72c3fb0303f1f314a39c6aba77ae4f70e8db1251942c931341ae36d93578dd545a7c
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "nytimes_cli"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/nytimes_cli
ADDED
data/bin/setup
ADDED
data/lib/article.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
class Article
|
2
|
+
|
3
|
+
attr_accessor :title, :author, :url, :story
|
4
|
+
|
5
|
+
@@articles = []
|
6
|
+
|
7
|
+
|
8
|
+
def initialize(title = nil, url = nil, author = nil, story = nil)
|
9
|
+
@title = title
|
10
|
+
@author = author
|
11
|
+
@url = url
|
12
|
+
@story = story
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def self.create_articles_from_array(array)
|
17
|
+
|
18
|
+
array.each do |article|
|
19
|
+
title = article[:title]
|
20
|
+
url = article[:url]
|
21
|
+
a = Article.new(title, url)
|
22
|
+
@@articles << a
|
23
|
+
end
|
24
|
+
@@articles
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.all
|
28
|
+
@@articles
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def self.create_article_from_hash(hash)
|
33
|
+
a = Article.new
|
34
|
+
a.title = hash[:title]
|
35
|
+
a.url = hash[:url]
|
36
|
+
a.author = hash[:author]
|
37
|
+
string = hash[:story]
|
38
|
+
a.story = self.word_wrap(hash[:story])
|
39
|
+
a
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.word_wrap(text)
|
43
|
+
|
44
|
+
paragraphs = []
|
45
|
+
|
46
|
+
paragraphs = text.split(/\n/)
|
47
|
+
finshed_text = ""
|
48
|
+
|
49
|
+
paragraphs.each do |p|
|
50
|
+
final_p = self.p_wrap(p, 78)
|
51
|
+
finshed_text << "\n" + final_p
|
52
|
+
end
|
53
|
+
finshed_text
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
#following method was taken from https://www.safaribooksonline.com/library/view/ruby-cookbook/0596523696/ch01s15.html
|
59
|
+
def self.p_wrap(s, width=78)
|
60
|
+
lines = []
|
61
|
+
line = ""
|
62
|
+
s.split(/\s+/).each do |word|
|
63
|
+
if line.size + word.size >= width
|
64
|
+
lines << line
|
65
|
+
line = word
|
66
|
+
elsif line.empty?
|
67
|
+
line = word
|
68
|
+
else
|
69
|
+
line << " " << word
|
70
|
+
end
|
71
|
+
end
|
72
|
+
lines << line if line
|
73
|
+
return lines.join "\n"
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# cli controller
|
2
|
+
|
3
|
+
class NytimesCli::CLI
|
4
|
+
|
5
|
+
attr_accessor :index
|
6
|
+
|
7
|
+
def call
|
8
|
+
puts ""
|
9
|
+
puts ""
|
10
|
+
puts "Welcome to Nytimes CLI"
|
11
|
+
puts "All articles © The New York Times"
|
12
|
+
puts "http://www.nytimes.com"
|
13
|
+
puts ""
|
14
|
+
puts ""
|
15
|
+
front
|
16
|
+
puts ""
|
17
|
+
puts ""
|
18
|
+
puts "There are currently " + num_articles + " articles on the front page of nytimes.com."
|
19
|
+
puts ""
|
20
|
+
num = get_articles.length
|
21
|
+
index(num)
|
22
|
+
interface_logic
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
def get_articles_from_front_page
|
30
|
+
array = Scraper.scrape_front_page
|
31
|
+
articles = Article.create_articles_from_array(array)
|
32
|
+
articles
|
33
|
+
end
|
34
|
+
|
35
|
+
def num_articles
|
36
|
+
get_articles.length.to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def input_to_index(num)
|
40
|
+
num.to_i - 1
|
41
|
+
end
|
42
|
+
|
43
|
+
def front
|
44
|
+
print_articles(0, 9)
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
#TODO
|
49
|
+
def print_articles(first_index, last_index )
|
50
|
+
array = get_articles_from_front_page
|
51
|
+
a = array[first_index..last_index]
|
52
|
+
a.each do |article|
|
53
|
+
all_articles = get_articles
|
54
|
+
i = all_articles.index(article) + 1
|
55
|
+
puts i.to_s + ". " + article.title
|
56
|
+
end
|
57
|
+
puts ""
|
58
|
+
puts ""
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def index(num)
|
63
|
+
@index = Navagation.new(num)
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_articles
|
67
|
+
Article.all
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_article_url(num)
|
71
|
+
i = input_to_index(num)
|
72
|
+
a = get_articles[i]
|
73
|
+
a.url
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_article(url)
|
77
|
+
hash = Scraper.scrape_article(url)
|
78
|
+
article = Article.create_article_from_hash(hash)
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
def print_article(num)
|
83
|
+
url = get_article_url(num)
|
84
|
+
a = get_article(url)
|
85
|
+
puts ""
|
86
|
+
puts ""
|
87
|
+
puts a.title
|
88
|
+
puts ""
|
89
|
+
puts a.author
|
90
|
+
puts ""
|
91
|
+
puts a.story
|
92
|
+
puts ""
|
93
|
+
puts ""
|
94
|
+
puts "Link to nytimes.com: " + a.url
|
95
|
+
puts ""
|
96
|
+
puts ""
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
def show_manual
|
101
|
+
|
102
|
+
puts "man -> displays manual"
|
103
|
+
puts "front -> shows first ten headlines"
|
104
|
+
puts "more -> shows 10 more headlines"
|
105
|
+
puts "less -> shows previous 10 headlines"
|
106
|
+
puts "exit -> terminates program"
|
107
|
+
puts ""
|
108
|
+
puts ""
|
109
|
+
puts "All articles © The New York Times"
|
110
|
+
puts "http://www.nytimes.com"
|
111
|
+
puts ""
|
112
|
+
puts ""
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
def more
|
117
|
+
@index.more
|
118
|
+
print_articles(@index.start_index, @index.end_index)
|
119
|
+
end
|
120
|
+
|
121
|
+
def less
|
122
|
+
@index.less
|
123
|
+
print_articles(@index.start_index, @index.end_index)
|
124
|
+
end
|
125
|
+
|
126
|
+
def interface_logic
|
127
|
+
while true
|
128
|
+
puts "Enter the number you would like to read or 'man' to see a list of commands."
|
129
|
+
answer = gets.chomp
|
130
|
+
if answer.upcase == "EXIT"
|
131
|
+
puts ""
|
132
|
+
puts "Ciao. Auf Wiedersehen. Goodbye."
|
133
|
+
puts ""
|
134
|
+
puts ""
|
135
|
+
exit
|
136
|
+
elsif answer.to_i.class == Fixnum && answer.to_i != 0
|
137
|
+
print_article(answer)
|
138
|
+
elsif answer.upcase == "MAN"
|
139
|
+
show_manual
|
140
|
+
elsif answer.upcase == "MORE"
|
141
|
+
more
|
142
|
+
elsif answer.upcase == "LESS"
|
143
|
+
less
|
144
|
+
elsif answer.upcase == "FRONT"
|
145
|
+
print_articles(0, 9)
|
146
|
+
else
|
147
|
+
puts ""
|
148
|
+
puts ""
|
149
|
+
puts "That was not a valid entry. Please try again."
|
150
|
+
puts ""
|
151
|
+
puts ""
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class Navagation
|
2
|
+
|
3
|
+
attr_accessor :start_index, :end_index, :articles_length
|
4
|
+
|
5
|
+
def initialize(length)
|
6
|
+
@start_index = 0
|
7
|
+
@end_index = 9
|
8
|
+
@articles_length = length
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
def more
|
13
|
+
if @end_index + 10 < @articles_length
|
14
|
+
@start_index += 10
|
15
|
+
@end_index += 10
|
16
|
+
else
|
17
|
+
@start_index = @articles_length - 9
|
18
|
+
@end_index = @articles_length
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def less
|
23
|
+
if @start_index >= 10
|
24
|
+
@start_index -= 10
|
25
|
+
@end_index = @start_index + 9
|
26
|
+
else
|
27
|
+
@start_index = 0
|
28
|
+
@end_index = 9
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/lib/nytimes_cli.rb
ADDED
File without changes
|
data/lib/scraper.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
|
3
|
+
class Scraper
|
4
|
+
|
5
|
+
attr_accessor :title, :url, :author, :story
|
6
|
+
|
7
|
+
BASE_URL = "http://www.nytimes.com"
|
8
|
+
|
9
|
+
def self.scrape_front_page
|
10
|
+
|
11
|
+
agent = Mechanize.new
|
12
|
+
index = agent.get(BASE_URL)
|
13
|
+
front_page_articles = []
|
14
|
+
index.css(".story-heading").each do |story|
|
15
|
+
val = story.css("a").text
|
16
|
+
next if val.nil? || val == false || val == ""
|
17
|
+
hash = {
|
18
|
+
|
19
|
+
:title => story.css("a").text.strip,
|
20
|
+
:url => story.css('a').attribute('href').value
|
21
|
+
}
|
22
|
+
|
23
|
+
front_page_articles << hash
|
24
|
+
end
|
25
|
+
|
26
|
+
front_page_articles
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.scrape_article(url)
|
30
|
+
|
31
|
+
agent = Mechanize.new
|
32
|
+
article = agent.get(url)
|
33
|
+
stories = []
|
34
|
+
story_hash = {}
|
35
|
+
|
36
|
+
title = article.search("//*[@id='headline']").text
|
37
|
+
author = article.search('.byline-author').text
|
38
|
+
|
39
|
+
story_hash[:title] = title
|
40
|
+
story_hash[:author] = author
|
41
|
+
story_hash[:url] = url
|
42
|
+
|
43
|
+
article_string = ""
|
44
|
+
|
45
|
+
article.search(".story-body *").each do |paragraph|
|
46
|
+
|
47
|
+
if !article_string.include?(paragraph.children.text)
|
48
|
+
|
49
|
+
if paragraph.name == "p" && paragraph.children.text != "Advertisement"
|
50
|
+
article_string << paragraph.children.text + "\n" + "\n"
|
51
|
+
elsif paragraph.name == "h4" && !paragraph.children.text.nil?
|
52
|
+
article_string << paragraph.children.text + "\n" + "\n"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
story_hash[:story] = article_string
|
58
|
+
|
59
|
+
story_hash
|
60
|
+
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
end
|
69
|
+
|
metadata
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nytimes_cli
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.8
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- interestinall
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-06-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.12'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.12'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: require_all
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 1.3.3
|
65
|
+
type: :development
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "~>"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '1.3'
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 1.3.3
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: mechanize
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '2.7'
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: 2.7.4
|
85
|
+
type: :development
|
86
|
+
prerelease: false
|
87
|
+
version_requirements: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - "~>"
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '2.7'
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 2.7.4
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
name: require_all
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - "~>"
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '1.3'
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: 1.3.3
|
105
|
+
type: :runtime
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '1.3'
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 1.3.3
|
115
|
+
- !ruby/object:Gem::Dependency
|
116
|
+
name: mechanize
|
117
|
+
requirement: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - "~>"
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '2.7'
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 2.7.4
|
125
|
+
type: :runtime
|
126
|
+
prerelease: false
|
127
|
+
version_requirements: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '2.7'
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: 2.7.4
|
135
|
+
description: Allows users to browse nytimes.com and read articles from the command
|
136
|
+
line.
|
137
|
+
email:
|
138
|
+
- justin.lefurjah@gmail.com
|
139
|
+
executables:
|
140
|
+
- nytimes_cli
|
141
|
+
extensions: []
|
142
|
+
extra_rdoc_files: []
|
143
|
+
files:
|
144
|
+
- bin/console
|
145
|
+
- bin/nytimes_cli
|
146
|
+
- bin/setup
|
147
|
+
- lib/article.rb
|
148
|
+
- lib/nytimes_cli.rb
|
149
|
+
- lib/nytimes_cli/cli.rb
|
150
|
+
- lib/nytimes_cli/navagation.rb
|
151
|
+
- lib/nytimes_cli/version.rb
|
152
|
+
- lib/scraper.rb
|
153
|
+
homepage: https://github.com/interestinall/nytimes_cli
|
154
|
+
licenses:
|
155
|
+
- MIT
|
156
|
+
metadata: {}
|
157
|
+
post_install_message:
|
158
|
+
rdoc_options: []
|
159
|
+
require_paths:
|
160
|
+
- lib
|
161
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ">="
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '0'
|
171
|
+
requirements: []
|
172
|
+
rubyforge_project:
|
173
|
+
rubygems_version: 2.6.4
|
174
|
+
signing_key:
|
175
|
+
specification_version: 4
|
176
|
+
summary: Command line interface for nytimes.com
|
177
|
+
test_files: []
|