nhkore 0.3.7 → 0.3.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,23 +1,11 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
@@ -30,122 +18,122 @@ require 'nhkore/util'
30
18
  module NHKore
31
19
  module CLI
32
20
  ###
33
- # @author Jonathan Bradley Whited (@esotericpig)
21
+ # @author Jonathan Bradley Whited
34
22
  # @since 0.3.0
35
23
  ###
36
24
  module SearchCmd
37
- def build_search_cmd()
25
+ def build_search_cmd
38
26
  app = self
39
-
40
- @search_cmd = @app_cmd.define_command() do
27
+
28
+ @search_cmd = @app_cmd.define_command do
41
29
  name 'search'
42
30
  usage 'search [OPTIONS] [COMMAND]...'
43
31
  aliases :se,:sea
44
32
  summary "Search for links to NHK News Web (Easy) (aliases: #{app.color_alias('se sea')})"
45
-
46
- description <<-EOD
33
+
34
+ description <<-DESC
47
35
  Search for links (using a Search Engine, etc.) to NHK News Web (Easy) &
48
36
  save to folder: #{SearchLinks::DEFAULT_DIR}
49
- EOD
50
-
51
- option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
37
+ DESC
38
+
39
+ option :i,:in,<<-DESC,argument: :required,transform: lambda { |value|
52
40
  file to read instead of URL (for offline testing and/or slow internet;
53
41
  see '--show-*' options)
54
- EOD
42
+ DESC
55
43
  app.check_empty_opt(:in,value)
56
- end
57
- option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
44
+ }
45
+ option :o,:out,<<-DESC,argument: :required,transform: lambda { |value|
58
46
  'directory/file' to save links to; if you only specify a directory or a file, it will attach the
59
47
  appropriate default directory/file name
60
48
  (defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE})
61
- EOD
49
+ DESC
62
50
  app.check_empty_opt(:out,value)
63
- end
51
+ }
64
52
  option :r,:results,'number of results per page to request from search',argument: :required,
65
- default: SearchScraper::DEFAULT_RESULT_COUNT,transform: -> (value) do
66
- value = value.to_i()
67
- value = 1 if value < 1
68
- value
69
- end
70
- option nil,:'show-count',<<-EOD
53
+ default: SearchScraper::DEFAULT_RESULT_COUNT,transform: lambda { |value|
54
+ value = value.to_i
55
+ value = 1 if value < 1
56
+ value
57
+ }
58
+ option nil,:'show-count',<<-DESC
71
59
  show the number of links scraped and exit;
72
60
  useful for manually writing/updating scripts (but not for use in a variable);
73
61
  implies '--dry-run' option
74
- EOD
75
- option nil,:'show-urls',<<-EOD
62
+ DESC
63
+ option nil,:'show-urls',<<-DESC
76
64
  show the URLs -- if any -- used when searching & scraping and exit;
77
65
  you can download these for offline testing and/or slow internet
78
66
  (see '--in' option)
79
- EOD
80
-
67
+ DESC
68
+
81
69
  run do |opts,args,cmd|
82
- opts.each() do |key,value|
83
- key = key.to_s()
84
-
70
+ opts.each do |key,value|
71
+ key = key.to_s
72
+
85
73
  if key.include?('show')
86
74
  raise CLIError,"must specify a sub command for option[#{key}]"
87
75
  end
88
76
  end
89
-
77
+
90
78
  puts cmd.help
91
79
  end
92
80
  end
93
-
94
- @search_easy_cmd = @search_cmd.define_command() do
81
+
82
+ @search_easy_cmd = @search_cmd.define_command do
95
83
  name 'easy'
96
84
  usage 'easy [OPTIONS] [COMMAND]...'
97
85
  aliases :e,:ez
98
86
  summary "Search for NHK News Web Easy (Yasashii) links (aliases: #{app.color_alias('e ez')})"
99
-
100
- description <<-EOD
87
+
88
+ description <<-DESC
101
89
  Search for NHK News Web Easy (Yasashii) links &
102
90
  save to file: #{SearchLinks::DEFAULT_YASASHII_FILE}
103
- EOD
104
-
91
+ DESC
92
+
105
93
  run do |opts,args,cmd|
106
94
  app.refresh_cmd(opts,args,cmd)
107
- app.run_search_help()
95
+ app.run_search_help
108
96
  end
109
97
  end
110
-
111
- @search_regular_cmd = @search_cmd.define_command() do
98
+
99
+ @search_regular_cmd = @search_cmd.define_command do
112
100
  name 'regular'
113
101
  usage 'regular [OPTIONS] [COMMAND]...'
114
102
  aliases :r,:reg
115
103
  summary "Search for NHK News Web Regular (Futsuu) links (aliases: #{app.color_alias('r reg')})"
116
-
117
- description <<-EOD
104
+
105
+ description <<-DESC
118
106
  Search for NHK News Web Regular (Futsuu) links &
119
107
  save to file: #{SearchLinks::DEFAULT_FUTSUU_FILE}
120
- EOD
121
-
108
+ DESC
109
+
122
110
  run do |opts,args,cmd|
123
111
  app.refresh_cmd(opts,args,cmd)
124
- app.run_search_help()
112
+ app.run_search_help
125
113
  end
126
114
  end
127
-
128
- @search_bing_cmd = Cri::Command.define() do
115
+
116
+ @search_bing_cmd = Cri::Command.define do
129
117
  name 'bing'
130
118
  usage 'bing [OPTIONS] [COMMAND]...'
131
119
  aliases :b
132
120
  summary "Search bing.com for links (aliases: #{app.color_alias('b')})"
133
-
134
- description <<-EOD
121
+
122
+ description <<-DESC
135
123
  Search bing.com for links & save to folder: #{SearchLinks::DEFAULT_DIR}
136
- EOD
137
-
124
+ DESC
125
+
138
126
  run do |opts,args,cmd|
139
127
  app.refresh_cmd(opts,args,cmd)
140
- app.run_search_cmd(cmd.supercommand.name.to_sym(),:bing)
128
+ app.run_search_cmd(cmd.supercommand.name.to_sym,:bing)
141
129
  end
142
130
  end
143
-
131
+
144
132
  # dup()/clone() must be called for `cmd.supercommand` to work appropriately.
145
- @search_easy_cmd.add_command @search_bing_cmd.dup()
146
- @search_regular_cmd.add_command @search_bing_cmd.dup()
133
+ @search_easy_cmd.add_command @search_bing_cmd.dup
134
+ @search_regular_cmd.add_command @search_bing_cmd.dup
147
135
  end
148
-
136
+
149
137
  def run_search_cmd(nhk_type,search_type)
150
138
  case nhk_type
151
139
  when :easy
@@ -153,130 +141,132 @@ module CLI
153
141
  when :regular
154
142
  nhk_type = :futsuu
155
143
  end
156
-
144
+
157
145
  return if show_search_urls(search_type)
158
-
146
+
159
147
  @cmd_opts[:dry_run] = true if @cmd_opts[:show_count]
160
-
148
+
161
149
  build_in_file(:in)
162
-
150
+
163
151
  case nhk_type
164
152
  when :futsuu
165
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
153
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,
154
+ default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
166
155
  when :yasashii
167
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
156
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,
157
+ default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
168
158
  else
169
159
  raise ArgumentError,"invalid nhk_type[#{nhk_type}]"
170
160
  end
171
-
161
+
172
162
  return unless check_in_file(:in,empty_ok: true)
173
163
  return unless check_out_file(:out)
174
-
164
+
175
165
  dry_run = @cmd_opts[:dry_run]
176
166
  in_file = @cmd_opts[:in]
177
167
  out_file = @cmd_opts[:out]
178
168
  result_count = @cmd_opts[:results]
179
- result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?()
169
+ result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?
180
170
  show_count = @cmd_opts[:show_count]
181
-
171
+
182
172
  start_spin("Scraping #{search_type}") unless show_count
183
-
184
- is_file = !in_file.nil?()
173
+
174
+ is_file = !in_file.nil?
185
175
  links = nil
186
176
  new_links = [] # For --dry-run
187
- next_page = NextPage.new()
177
+ next_page = NextPage.new
188
178
  page_count = 0
189
179
  page_num = 1
190
180
  url = in_file # nil will use default URL, else a file
191
-
181
+
192
182
  # Load previous links for 'scraped?' vars.
193
183
  if File.exist?(out_file)
194
184
  links = SearchLinks.load_file(out_file)
195
185
  else
196
- links = SearchLinks.new()
186
+ links = SearchLinks.new
197
187
  end
198
-
188
+
199
189
  links_count = links.length
200
-
190
+
201
191
  if show_count
202
192
  scraped_count = 0
203
-
204
- links.links.values.each() do |link|
205
- scraped_count += 1 if link.scraped?()
193
+
194
+ links.links.each_value do |link|
195
+ scraped_count += 1 if link.scraped?
206
196
  end
207
-
197
+
208
198
  puts "#{scraped_count} of #{links_count} links scraped."
209
-
199
+
210
200
  return
211
201
  end
212
-
213
- range = (0..10000) # Do a range to prevent an infinite loop; ichiman!
214
-
202
+
203
+ range = (0..10_000) # Do a range to prevent an infinite loop; ichiman!
204
+
215
205
  case search_type
216
206
  # Anything that extends SearchScraper.
217
207
  when :bing
218
- range.each() do
208
+ range.each do
219
209
  scraper = nil
220
-
210
+
221
211
  case search_type
222
212
  when :bing
223
213
  scraper = BingScraper.new(nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
224
214
  else
225
215
  raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
226
216
  end
227
-
217
+
228
218
  next_page = scraper.scrape(links,next_page)
229
-
219
+
230
220
  new_links.concat(links.links.values[links_count..-1])
231
221
  links_count = links.length
232
222
  page_count = next_page.count if next_page.count > 0
233
-
234
- update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
235
- "new_links=#{new_links.length})")
236
-
237
- break if next_page.empty?()
238
-
223
+
224
+ update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}," \
225
+ " new_links=#{new_links.length})")
226
+
227
+ break if next_page.empty?
228
+
239
229
  page_num += 1
240
230
  url = next_page.url
241
-
242
- sleep_scraper()
231
+
232
+ sleep_scraper
243
233
  end
244
234
  else
245
235
  raise ArgumentError,"invalid search_type[#{search_type}]"
246
236
  end
247
-
248
- stop_spin()
237
+
238
+ stop_spin
249
239
  puts
250
240
  puts 'Last URL scraped:'
251
241
  puts "> #{url}"
252
242
  puts
253
-
243
+
254
244
  if dry_run
255
- new_links.each() do |link|
245
+ new_links.each do |link|
256
246
  puts link.to_s(mini: true)
257
247
  end
258
248
  else
259
249
  links.save_file(out_file)
260
-
250
+
261
251
  puts 'Saved scraped links to file:'
262
252
  puts "> #{out_file}"
263
253
  end
264
254
  end
265
-
266
- def run_search_help()
255
+
256
+ def run_search_help
267
257
  if @cmd_opts[:show_count] || @cmd_opts[:show_urls]
268
- run_search_cmd(@cmd.name.to_sym(),nil)
258
+ run_search_cmd(@cmd.name.to_sym,nil)
269
259
  else
270
260
  puts @cmd.help
271
261
  end
272
262
  end
273
-
263
+
274
264
  def show_search_urls(search_type)
275
265
  return false unless @cmd_opts[:show_urls]
276
-
266
+
277
267
  count = @cmd_opts[:results]
278
- count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil?()
279
-
268
+ count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil?
269
+
280
270
  case search_type
281
271
  when :bing
282
272
  puts 'Bing:'
@@ -285,7 +275,7 @@ module CLI
285
275
  else
286
276
  raise CLIError,'must specify a sub command for option[show-urls]'
287
277
  end
288
-
278
+
289
279
  return true
290
280
  end
291
281
  end