random_poetry_scraper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8d58c193c1849127eed0b58051f1ebca61a2b2e0
4
+ data.tar.gz: 37f25de62e6b6b7874850d45ca49d4f35810f7df
5
+ SHA512:
6
+ metadata.gz: 4324444b189d84102775c9859b740a43a7e5b2fcfbf5c6e9d506339f7d5c8639ea668106032dc551ecf0e93dd519dbe2d48918dcf220f70bd27a1e3aeb476023
7
+ data.tar.gz: 809411354811b7779f3922eb3cd7d8ade7068baffdb073b44a67c9f8b08392873dee27b6c556f9a578b0ee955ec9ed9442b9516004ce7111070809ce6baa7830
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "random_poetry_scraper"
5
+
6
+ CorpusGenerator::CLI.new.call
@@ -0,0 +1,4 @@
1
+ module CorpusGenerator::Concerns
2
+ module Displayable
3
+ end
4
+ end
@@ -0,0 +1,16 @@
1
+ require 'pry'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'trollop'
5
+ require 'json'
6
+
7
+ require "random_poetry_scraper/version"
8
+ require "concerns/concerns"
9
+ require "random_poetry_scraper/cli"
10
+ require "random_poetry_scraper/poet"
11
+ require "random_poetry_scraper/poem"
12
+ require "random_poetry_scraper/scraper"
13
+
14
+ module CorpusGenerator
15
+ # Your code goes here...
16
+ end
@@ -0,0 +1,283 @@
1
+ class CorpusGenerator::CLI
2
+ attr_accessor :current_poems_alphabetized, :current_poets_alphabetized
3
+
4
+ def call(commandline_options = nil)
5
+
6
+ commandline_options = accept_command_line_options if !commandline_options
7
+
8
+ if commandline_options == {}
9
+ handle_no_options_passed_message
10
+ else
11
+ if commandline_options[:num_poems] == nil
12
+ handle_no_num_poems_message
13
+ elsif commandline_options[:json] && commandline_options[:pleasure]
14
+ handle_json_and_pleasure_message
15
+ else
16
+ handle_valid_command_line_options(commandline_options)
17
+ end
18
+ end
19
+ end
20
+
21
+ def accept_command_line_options
22
+ opts = Trollop::options do
23
+ version <<-EOS
24
+ šŸ“– Random Poetry Scraper
25
+ Version 1.0 | July 2018
26
+ Hayden Betts | @hayden_betts
27
+ EOS
28
+
29
+ banner <<~EOS
30
+ \nšŸ“– Random Poetry Scraper is a command line gem which returns the text of poems scraped from poemhunter.com.
31
+ \n
32
+ Usage:
33
+ \n
34
+ EOS
35
+ opt :num_poems, "Number of poems to return", :type => :integer
36
+ opt :json, "Output poems and their attributes directly to json"
37
+ opt :pleasure, "Scrape poems and then enter a CLI for pleasure reading"
38
+ end
39
+ opts.select { |k, v| opts[k] }
40
+ end
41
+
42
+ def handle_no_options_passed_message
43
+ puts ""
44
+ puts "šŸ“– Random Poetry Scraper requires you to pass in options indicating"
45
+ puts "the number of poems you would like to return, and their desired output format."
46
+ puts "Run with --help for help."
47
+ puts ""
48
+ end
49
+
50
+ def handle_json_and_pleasure_message
51
+ puts "Cannot run with both the --json and --pleasure flags selected"
52
+ puts "Run with --help for help."
53
+ end
54
+
55
+ def handle_no_num_poems_message
56
+ puts "Cannot run without a --num-poems selected"
57
+ puts "Run with --help for help."
58
+ end
59
+
60
+ def handle_valid_command_line_options(commandline_options)
61
+
62
+ if commandline_options[:pleasure]
63
+ get_poems_with_status_updates(commandline_options[:num_poems])
64
+ pleasure_reading_menu
65
+ elsif commandline_options[:json]
66
+ get_poems_without_status_updates(commandline_options[:num_poems])
67
+ json = CorpusGenerator::Poem.poems_to_json(self.current_poems_alphabetized)
68
+ puts json
69
+ return json
70
+ end
71
+
72
+ end
73
+
74
+ def get_poems_with_status_updates(num_poems)
75
+ num_poems.times do |i|
76
+ poem_attributes = CorpusGenerator::Scraper.new.scrape_poem_page
77
+
78
+ # TODO possibly factor out?
79
+ if poem = CorpusGenerator::Poem.new(poem_attributes)
80
+ puts "#{i + 1} poem(s) fetched succesfully."
81
+ else
82
+ puts "Failed. Trying again."
83
+ i -= 1
84
+ end
85
+ end
86
+
87
+ set_current_poems_alphabetically
88
+
89
+ end
90
+
91
+ def get_poems_without_status_updates(num_poems)
92
+ num_poems.times do |i|
93
+ poem_attributes = CorpusGenerator::Scraper.new.scrape_poem_page
94
+
95
+ # TODO possibly factor out?
96
+ if poem = CorpusGenerator::Poem.new(poem_attributes)
97
+ next
98
+ else
99
+ i -= 1
100
+ end
101
+ end
102
+
103
+ set_current_poems_alphabetically
104
+
105
+ end
106
+
107
+ def set_current_poems_alphabetically
108
+ self.current_poems_alphabetized = CorpusGenerator::Poem.all.sort_by {|poem| poem.name}
109
+ end
110
+
111
+ ##
112
+ # => Function to facilitate returning JSON directly
113
+ ##
114
+
115
+ ##
116
+ # => The pleasure reading interface
117
+ ##
118
+
119
+ def pleasure_reading_menu
120
+ input = nil
121
+
122
+ until input == 'exit'
123
+
124
+ pleasure_reading_menu_instructions
125
+ input = gets.strip
126
+
127
+ case input
128
+ when 'poems'
129
+ poem_selection_menu
130
+ when 'poets'
131
+ poet_selection_menu
132
+ when 'exit'
133
+ goodbye
134
+ else
135
+ puts "Invalid input! Please select a valid option!"
136
+ end
137
+ end
138
+ end
139
+
140
+ def poem_selection_menu(poet = nil)
141
+ input = nil
142
+
143
+ while input != 'menu'
144
+
145
+ poem_selection_instructions
146
+ list_current_poems
147
+
148
+ input = gets.strip
149
+ input_valid = input.to_i > 0 && input.to_i <= self.current_poems_alphabetized.size
150
+
151
+ if input_valid
152
+ selected_poem = self.current_poems_alphabetized[input.to_i - 1]
153
+ display_poem(selected_poem)
154
+ quit_or_continue_reading
155
+ next
156
+ elsif input == 'exit'
157
+ goodbye
158
+ elsif input != 'menu'
159
+ puts "Please enter a valid poem selection!"
160
+ end
161
+ end
162
+ end
163
+
164
+ def poet_selection_menu
165
+ input = nil
166
+
167
+ while input != 'menu'
168
+
169
+ poet_selection_instructions
170
+ list_poets_alphabetically
171
+
172
+ input = gets.strip
173
+
174
+ valid_index = input.to_i > 0 && input.to_i <= self.current_poets_alphabetized.size
175
+
176
+ if valid_index
177
+ selected_poet = self.current_poets_alphabetized[input.to_i - 1]
178
+ set_poems_alphabetically_by_poet(selected_poet)
179
+ poem_selection_menu
180
+ elsif input == 'exit'
181
+ goodbye
182
+ elsif input != 'menu'
183
+ puts "Please enter a valid poet selection!"
184
+ end
185
+
186
+ puts "To return to the poet selection menu, enter 'menu'"
187
+ end
188
+ end
189
+
190
+ ##
191
+ # => Pleasure Reading Interface General Helpers
192
+ ##
193
+
194
+ def pleasure_reading_menu_instructions
195
+ puts pleasure_reading_header
196
+
197
+ puts "How would you like to find poems to read?"
198
+ puts "To see a list of poems, type 'poems', and press enter."
199
+ puts "To see a list of poets, type 'poets', and press enter."
200
+ puts "To end the program, type 'exit', and press enter."
201
+ puts ""
202
+ end
203
+
204
+ def pleasure_reading_header
205
+ header_path = File.join( File.dirname(__FILE__), './pleasure_reading_header' )
206
+
207
+ File.read(header_path)
208
+ end
209
+
210
+ def poem_selection_instructions
211
+ puts "\nType the number of a poem to read it."
212
+ puts "Or type menu to go up one menu."
213
+ puts "Or type exit to end the program."
214
+ puts ""
215
+ end
216
+
217
+ def goodbye
218
+ puts "Thanks for using Pleasure reader!"
219
+ exit
220
+ end
221
+
222
+
223
+ ##
224
+ # => Pleasure Reading Interface Poem Selection Menu Helpers
225
+ ##
226
+
227
+ def list_current_poems
228
+ self.current_poems_alphabetized.each.with_index(1) do |poem, index|
229
+ puts "#{index}. #{poem.name} - #{poem.poet.name}"
230
+ end
231
+ puts ""
232
+ end
233
+
234
+ def display_poem(poem)
235
+
236
+ title_string = "#{poem.name} - #{poem.poet.name}"
237
+
238
+ puts ""
239
+ puts Array.new(title_string.length, "*").join('')
240
+ puts title_string
241
+ puts Array.new(title_string.length, "*").join('')
242
+ puts ""
243
+ puts "\n#{poem.text}"
244
+ puts ""
245
+ puts Array.new(title_string.length, "*").join('')
246
+
247
+ end
248
+
249
+ def quit_or_continue_reading
250
+ puts ""
251
+ puts "To exit now, type exit"
252
+ puts "To return to the list of poems, press enter"
253
+ puts ""
254
+
255
+ input = nil
256
+ input = gets.strip
257
+ goodbye if input == 'exit'
258
+ end
259
+
260
+ ##
261
+ # => Pleasure Reading Interface Poet Selection Menu Helpers
262
+ ##
263
+
264
+ def poet_selection_instructions
265
+ puts ""
266
+ puts "Type the number of a poet whose poems you would like to read."
267
+ puts "Or type menu to go up one menu."
268
+ puts "Or type exit to end the program."
269
+ puts ""
270
+ end
271
+
272
+ def list_poets_alphabetically
273
+ self.current_poets_alphabetized = CorpusGenerator::Poet.all.sort_by {|poet| poet.name}
274
+ self.current_poets_alphabetized.each.with_index(1) do |poet, index|
275
+ puts "#{index}. #{poet.name}"
276
+ end
277
+ puts ""
278
+ end
279
+
280
+ def set_poems_alphabetically_by_poet(selected_poet)
281
+ self.current_poems_alphabetized = selected_poet.poems.sort_by {|poem| poem.name}
282
+ end
283
+ end
@@ -0,0 +1,6 @@
1
+ ____ _ ____ _
2
+ | _ \ | | ___ __ _ ___ _ _ _ __ ___ | _ \ ___ __ _ __| | ___ _ __
3
+ | |_) || | / _ \ / _` |/ __|| | | || '__|/ _ \ | |_) |/ _ \ / _` | / _` | / _ \| '__|
4
+ | __/ | || __/| (_| |\__ \| |_| || | | __/ | _ <| __/| (_| || (_| || __/| |
5
+ |_| |_| \___| \__,_||___/ \__,_||_| \___| |_| \_\\___| \__,_| \__,_| \___||_|
6
+
@@ -0,0 +1,53 @@
1
+ class CorpusGenerator::Poem
2
+ attr_accessor :name, :text, :poet
3
+
4
+ @@all = []
5
+
6
+ # TODO do you actually want to save all poems as soon
7
+ # as you initialize them?
8
+
9
+ def initialize(attributes_hash)
10
+ attributes_hash.each do |attribute, value|
11
+ self.send("#{attribute}=", value)
12
+ end
13
+ self.class.all << self
14
+ end
15
+
16
+ # this expects that if there is a poet attribute, it will have a name.
17
+ # that might hardcode too much knowledge about the structure of the hash?
18
+ def poet=(poet_attributes)
19
+ poet = CorpusGenerator::Poet.find_or_create(poet_attributes)
20
+ @poet = poet
21
+ poet.add_poem(self)
22
+ end
23
+
24
+ def as_hash
25
+ {
26
+ "name" => self.name,
27
+ "text" => self.text,
28
+ "poet" => {
29
+ "name" => self.poet.name,
30
+ "profile_url" => self.poet.profile_url,
31
+ }
32
+ }
33
+ end
34
+
35
+ # Class Methods
36
+
37
+ def self.all
38
+ @@all
39
+ end
40
+
41
+ def self.poems_to_hash(poems)
42
+ poems_hash = {"poems" => []}
43
+ poems.each do |poem|
44
+ poems_hash["poems"] << poem.as_hash
45
+ end
46
+ poems_hash
47
+ end
48
+
49
+ def self.poems_to_json(poems)
50
+ self.poems_to_hash(poems).to_json
51
+ end
52
+
53
+ end
@@ -0,0 +1,40 @@
1
+ class CorpusGenerator::Poet
2
+ attr_accessor :name, :profile_url, :poems
3
+
4
+ @@all = []
5
+
6
+ def initialize(attributes_hash)
7
+
8
+ self.poems = []
9
+
10
+ attributes_hash.each do |attribute, value|
11
+ self.send("#{attribute}=", value)
12
+ end
13
+ self.class.all << self
14
+ end
15
+
16
+ def add_poem(poem)
17
+ self.poems << poem unless self.poems.detect {|poem| poem == self}
18
+ end
19
+
20
+ # Class Methods
21
+
22
+ def self.all
23
+ @@all
24
+ end
25
+
26
+ def self.find_by_name(name)
27
+ self.all.detect {|poet| poet.name == name}
28
+ end
29
+
30
+
31
+ def self.find_or_create(attributes)
32
+ # TODO assumes that there will be a name attribute in hash
33
+ # ...assumes that a hash will be passed in
34
+ if poet = self.find_by_name(attributes[:name])
35
+ return poet
36
+ else
37
+ self.new(attributes)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,22 @@
1
+ class CorpusGenerator::Scraper
2
+ attr_accessor :html_doc
3
+ ROOT_LINK = "https://w0.poemhunter.com"
4
+ BROWSE_LINK = ROOT_LINK + "/members/random-poem/"
5
+
6
+ def initialize
7
+ self.html_doc = Nokogiri::HTML(open(BROWSE_LINK))
8
+ end
9
+
10
+ def scrape_poem_page
11
+ poem_attributes = {}
12
+ poem_attributes[:name] = html_doc.css(".poem").css("h2").text
13
+ poem_attributes[:text] = html_doc.css(".poem").css("p").inner_html.gsub("<br>", "\n").gsub(/\r\n[\t]+/, "")
14
+ if poet_name = html_doc.css(".poet").text
15
+ poem_attributes[:poet] = {}
16
+ poem_attributes[:poet][:name] = poet_name
17
+ poem_attributes[:poet][:profile_url] = ROOT_LINK + html_doc.css(".poem a").attr("href").value
18
+ end
19
+
20
+ poem_attributes == [] ? nil : poem_attributes
21
+ end
22
+ end
@@ -0,0 +1,2 @@
1
+ module CorpusGenerator
2
+ end
metadata ADDED
@@ -0,0 +1,154 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: random_poetry_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hayden Betts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-07-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.16'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 1.8.4
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 1.8.4
83
+ - !ruby/object:Gem::Dependency
84
+ name: trollop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '2.0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '2.0'
111
+ description: Random Poetry Scraper is a command line gem which returns the the attributes
112
+ of a configurable number of poems scraped from poemhunter.com. The gem allows you
113
+ to consume the poems either through a JSON dump or through a command line "pleasure
114
+ reading" interface.
115
+ email: haydenbetts@gmail.com
116
+ executables:
117
+ - random_poetry_scraper
118
+ extensions: []
119
+ extra_rdoc_files: []
120
+ files:
121
+ - bin/random_poetry_scraper
122
+ - lib/concerns/concerns.rb
123
+ - lib/random_poetry_scraper.rb
124
+ - lib/random_poetry_scraper/cli.rb
125
+ - lib/random_poetry_scraper/pleasure_reading_header
126
+ - lib/random_poetry_scraper/poem.rb
127
+ - lib/random_poetry_scraper/poet.rb
128
+ - lib/random_poetry_scraper/scraper.rb
129
+ - lib/random_poetry_scraper/version.rb
130
+ homepage: https://github.com/haydenbetts/poetry-scraper-cli-app
131
+ licenses:
132
+ - MIT
133
+ metadata: {}
134
+ post_install_message:
135
+ rdoc_options: []
136
+ require_paths:
137
+ - lib
138
+ required_ruby_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ requirements: []
149
+ rubyforge_project:
150
+ rubygems_version: 2.6.13
151
+ signing_key:
152
+ specification_version: 4
153
+ summary: Scrape and return a configurable number of random poems from poemhunter.com/
154
+ test_files: []