nhkore 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -46,15 +46,16 @@ require 'nhkore/search_link'
46
46
  require 'nhkore/search_scraper'
47
47
  require 'nhkore/sifter'
48
48
  require 'nhkore/splitter'
49
+ require 'nhkore/user_agents'
49
50
  require 'nhkore/util'
50
51
  require 'nhkore/variator'
51
52
  require 'nhkore/version'
52
53
  require 'nhkore/word'
53
54
 
54
- require 'nhkore/cli/bing_cmd'
55
55
  require 'nhkore/cli/fx_cmd'
56
56
  require 'nhkore/cli/get_cmd'
57
57
  require 'nhkore/cli/news_cmd'
58
+ require 'nhkore/cli/search_cmd'
58
59
  require 'nhkore/cli/sift_cmd'
59
60
 
60
61
 
@@ -24,6 +24,7 @@
24
24
  require 'cri'
25
25
  require 'highline'
26
26
  require 'rainbow'
27
+ require 'set'
27
28
  require 'tty-progressbar'
28
29
  require 'tty-spinner'
29
30
 
@@ -31,10 +32,10 @@ require 'nhkore/error'
31
32
  require 'nhkore/util'
32
33
  require 'nhkore/version'
33
34
 
34
- require 'nhkore/cli/bing_cmd'
35
35
  require 'nhkore/cli/fx_cmd'
36
36
  require 'nhkore/cli/get_cmd'
37
37
  require 'nhkore/cli/news_cmd'
38
+ require 'nhkore/cli/search_cmd'
38
39
  require 'nhkore/cli/sift_cmd'
39
40
 
40
41
 
@@ -47,30 +48,20 @@ module NHKore
47
48
  end
48
49
 
49
50
  ###
50
- # For disabling color output.
51
+ # For disabling/enabling color output.
51
52
  #
52
53
  # @author Jonathan Bradley Whited (@esotericpig)
53
- # @since 0.2.0
54
+ # @since 0.2.1
54
55
  ###
55
- module CriStringFormatterExt
56
- def blue(str)
57
- return str
58
- end
59
-
60
- def bold(str)
61
- return str
62
- end
56
+ module CriColorExt
57
+ @@color = true
63
58
 
64
- def green(str)
65
- return str
59
+ def color=(color)
60
+ @@color = color
66
61
  end
67
62
 
68
- def red(str)
69
- return str
70
- end
71
-
72
- def yellow(str)
73
- return str
63
+ def color?(io)
64
+ return @@color
74
65
  end
75
66
  end
76
67
 
@@ -79,14 +70,19 @@ module NHKore
79
70
  # @since 0.2.0
80
71
  ###
81
72
  class App
82
- include CLI::BingCmd
83
73
  include CLI::FXCmd
84
74
  include CLI::GetCmd
85
75
  include CLI::NewsCmd
76
+ include CLI::SearchCmd
86
77
  include CLI::SiftCmd
87
78
 
88
79
  NAME = 'nhkore'
89
80
 
81
+ DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
82
+
83
+ COLOR_OPTS = [:c,:color]
84
+ NO_COLOR_OPTS = [:C,:'no-color']
85
+
90
86
  SPINNER_MSG = '[:spinner] :title:detail...'
91
87
  CLASSIC_SPINNER = TTY::Spinner.new(SPINNER_MSG,format: :classic)
92
88
  DEFAULT_SPINNER = TTY::Spinner.new(SPINNER_MSG,interval: 5,
@@ -94,8 +90,9 @@ module NHKore
94
90
  NO_SPINNER = {} # Still outputs status & stores tokens
95
91
  NO_SPINNER_MSG = '%{title}%{detail}...'
96
92
 
97
- DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
98
-
93
+ attr_reader :cmd
94
+ attr_reader :cmd_args
95
+ attr_reader :cmd_opts
99
96
  attr_accessor :progress_bar
100
97
  attr_accessor :scraper_kargs
101
98
  attr_accessor :sleep_time
@@ -119,10 +116,10 @@ module NHKore
119
116
 
120
117
  build_app_cmd()
121
118
 
122
- build_bing_cmd()
123
119
  build_fx_cmd()
124
120
  build_get_cmd()
125
121
  build_news_cmd()
122
+ build_search_cmd()
126
123
  build_sift_cmd()
127
124
  build_version_cmd()
128
125
 
@@ -130,18 +127,24 @@ module NHKore
130
127
  end
131
128
 
132
129
  def autodetect_color()
133
- disable = false
130
+ Cri::Platform.singleton_class.prepend(CriColorExt)
131
+
132
+ color = nil # Must be nil, not true/false
134
133
 
135
- if !$stdout.tty?() || ENV['TERM'] == 'dumb'
136
- disable = true
137
- elsif !@args.empty?()
134
+ if !@args.empty?()
138
135
  # Kind of hacky, but necessary for Rainbow.
139
136
 
140
- no_color_args = Set['-C','--no-color']
137
+ color_opts = opts_to_set(COLOR_OPTS)
138
+ no_color_opts = opts_to_set(NO_COLOR_OPTS)
141
139
 
142
140
  @args.each() do |arg|
143
- if no_color_args.include?(arg)
144
- disable = true
141
+ if color_opts.include?(arg)
142
+ color = true
143
+ break
144
+ end
145
+
146
+ if no_color_opts.include?(arg)
147
+ color = false
145
148
  break
146
149
  end
147
150
 
@@ -149,11 +152,11 @@ module NHKore
149
152
  end
150
153
  end
151
154
 
152
- if disable
153
- disable_color()
154
- else
155
- @rainbow.enabled = true # Force it in case Rainbow auto-disabled it
155
+ if color.nil?()
156
+ color = ($stdout.tty?() && ENV['TERM'] != 'dumb')
156
157
  end
158
+
159
+ enable_color(color)
157
160
  end
158
161
 
159
162
  def build_app_cmd()
@@ -171,12 +174,15 @@ module NHKore
171
174
  This is similar to a core word/vocabulary list.
172
175
  EOD
173
176
 
174
- flag :c,:'classic-fx',<<-EOD do |value,cmd|
177
+ flag :s,:'classic-fx',<<-EOD do |value,cmd|
175
178
  use classic spinner/progress special effects (in case of no Unicode support) when running long tasks
176
179
  EOD
177
180
  app.progress_bar = :classic
178
181
  app.spinner = CLASSIC_SPINNER
179
182
  end
183
+ flag COLOR_OPTS[0],COLOR_OPTS[1],%q{force color output (for commands like '| less -R')} do |value,cmd|
184
+ app.enable_color(true)
185
+ end
180
186
  flag :n,:'dry-run',<<-EOD
181
187
  do a dry run without making changes; do not write to files, create directories, etc.
182
188
  EOD
@@ -194,8 +200,8 @@ module NHKore
194
200
 
195
201
  app.scraper_kargs[:max_retries] = value
196
202
  end
197
- flag :C,:'no-color','disable color output' do |value,cmd|
198
- app.disable_color()
203
+ flag NO_COLOR_OPTS[0],NO_COLOR_OPTS[1],'disable color output' do |value,cmd|
204
+ app.enable_color(false)
199
205
  end
200
206
  flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |value,cmd|
201
207
  app.progress_bar = :no
@@ -223,7 +229,7 @@ module NHKore
223
229
  app.sleep_time = value.to_f()
224
230
  app.sleep_time = 0.0 if app.sleep_time < 0.0
225
231
  end
226
- option :t,:'timeout',<<-EOD,argument: :required do |value,cmd|
232
+ option :t,:timeout,<<-EOD,argument: :required do |value,cmd|
227
233
  seconds for all URL timeouts: [open, read] (-1 or decimal >= 0)
228
234
  EOD
229
235
  value = value.to_f()
@@ -232,6 +238,14 @@ module NHKore
232
238
  app.scraper_kargs[:open_timeout] = value
233
239
  app.scraper_kargs[:read_timeout] = value
234
240
  end
241
+ option :u,:'user-agent',<<-EOD,argument: :required do |value,cmd|
242
+ HTTP header field 'User-Agent' to use instead of a random one
243
+ EOD
244
+ value = app.check_empty_opt(:'user-agent',value)
245
+
246
+ app.scraper_kargs[:header] ||= {}
247
+ app.scraper_kargs[:header]['user-agent'] = value
248
+ end
235
249
  # Big V, not small.
236
250
  flag :V,:version,'show the version and exit' do |value,cmd|
237
251
  app.show_version()
@@ -399,8 +413,8 @@ module NHKore
399
413
 
400
414
  force = @cmd_opts[:force]
401
415
 
402
- if !force && Dir.exist?(out_dir)
403
- puts 'Warning: output directory already exists!'
416
+ if !force && Dir.exist?(out_dir) && !Dir.empty?(out_dir)
417
+ puts 'Warning: output directory already exists with files!'
404
418
  puts ' : Files inside of this directory may be overwritten!'
405
419
  puts "> '#{out_dir}'"
406
420
 
@@ -478,9 +492,18 @@ module NHKore
478
492
  return color(str).green
479
493
  end
480
494
 
481
- def disable_color()
482
- Cri::StringFormatter.prepend(CriStringFormatterExt)
483
- @rainbow.enabled = false
495
+ def enable_color(enabled)
496
+ Cri::Platform.color = enabled
497
+ @rainbow.enabled = enabled
498
+ end
499
+
500
+ def opts_to_set(ary)
501
+ set = Set.new()
502
+
503
+ set.add("-#{ary[0].to_s()}") unless ary[0].nil?()
504
+ set.add("--#{ary[1].to_s()}") unless ary[1].nil?()
505
+
506
+ return set
484
507
  end
485
508
 
486
509
  def refresh_cmd(opts,args,cmd)
@@ -124,7 +124,7 @@ module NHKore
124
124
  # - https://www3.nhk.or.jp/news/easy/k10012118911000/k10012118911000.html
125
125
  # - '</p><br><「<ruby>台風<rt>たいふう</rt></ruby>'
126
126
 
127
- @str_or_io = @str_or_io.read() if @str_or_io.respond_to?(:read)
127
+ read()
128
128
 
129
129
  # To add a new one, simply add '|(...)' on a newline and test $#.
130
130
  @str_or_io = @str_or_io.gsub(/
@@ -281,7 +281,7 @@ module NHKore
281
281
  scraper = DictScraper.new(dict_url,missingno: @missingno,parse_url: false,**@kargs)
282
282
  rescue OpenURI::HTTPError => e
283
283
  if retries == 0 && e.to_s().include?('404')
284
- @str_or_io = @str_or_io.read() if @str_or_io.respond_to?(:read)
284
+ read()
285
285
 
286
286
  scraper = ArticleScraper.new(@url,str_or_io: @str_or_io,**@kargs)
287
287
 
@@ -38,7 +38,7 @@ module CLI
38
38
 
39
39
  description <<-EOD
40
40
  Test if the special effects work on your command line:\n
41
- - #{App::NAME} [-c/-X] fx
41
+ - #{App::NAME} [-s/-X] fx
42
42
  EOD
43
43
 
44
44
  flag :a,:all,'test all special effects regardless of global options'
@@ -99,15 +99,14 @@ module CLI
99
99
 
100
100
  return if dry_run
101
101
 
102
- Tempfile.create([App::NAME,'.zip'],binmode: true) do |file|
102
+ Tempfile.create(["#{App::NAME}_",'.zip'],binmode: true) do |file|
103
103
  puts
104
- puts 'Downloading to temp file:'
104
+ puts "Downloading #{GET_URL_FILENAME} to temp file:"
105
105
  puts "> #{file.path}"
106
- puts
107
106
 
108
107
  len = down.size
109
- len = DEFAULT_GET_LENGTH if len.nil?()
110
- bar = build_progress_bar("Downloading #{GET_URL_FILENAME}",download: true,total: len)
108
+ len = DEFAULT_GET_LENGTH if len.nil?() || len < 1
109
+ bar = build_progress_bar('> Downloading',download: true,total: len)
111
110
 
112
111
  bar.start()
113
112
 
@@ -120,9 +119,12 @@ module CLI
120
119
  file.close()
121
120
  bar.finish()
122
121
 
123
- start_spin("Extracting #{GET_URL_FILENAME}")
122
+ puts
123
+ puts "Extracting #{GET_URL_FILENAME}..."
124
124
 
125
- Zip.on_exists_proc = force # true will force overwriting files on extract()
125
+ # We manually ask the user whether to overwrite each file, so set this to
126
+ # true so that Zip extract() will force overwrites and not raise an error.
127
+ Zip.on_exists_proc = true
126
128
 
127
129
  Zip::File.open(file) do |zip_file|
128
130
  zip_file.each() do |entry|
@@ -130,17 +132,30 @@ module CLI
130
132
  raise ZipError,"unsafe entry name[#{entry.name}] in Zip file"
131
133
  end
132
134
 
133
- name = File.basename(entry.name)
135
+ name = Util.strip_web_str(File.basename(entry.name))
136
+
137
+ next if name.empty?()
138
+
139
+ out_file = File.join(out_dir,name)
134
140
 
135
- update_spin_detail(" (file=#{name})")
141
+ puts "> #{name}"
136
142
 
137
- entry.extract(File.join(out_dir,name))
143
+ if !force && File.exist?(out_file)
144
+ puts
145
+ puts 'Warning: output file already exists!'
146
+ puts "> '#{out_file}'"
147
+
148
+ overwrite = @high.agree('Overwrite this file (yes/no)? ')
149
+ puts
150
+
151
+ next unless overwrite
152
+ end
153
+
154
+ entry.extract(out_file)
138
155
  end
139
156
  end
140
157
 
141
- stop_spin()
142
158
  puts
143
-
144
159
  puts "Extracted #{GET_URL_FILENAME} to directory:"
145
160
  puts "> #{out_dir}"
146
161
  end
@@ -82,8 +82,8 @@ module CLI
82
82
  value
83
83
  end
84
84
  option :l,:links,<<-EOD,argument: :required,transform: -> (value) do
85
- 'directory/file' of article links (from a Search Engine) to scrape (see '#{App::NAME} bing';
86
- defaults: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}, #{SearchLinks::DEFAULT_BING_FUTSUU_FILE})
85
+ 'directory/file' of article links to scrape (see '#{App::NAME} search';
86
+ defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE})
87
87
  EOD
88
88
  app.check_empty_opt(:links,value)
89
89
  end
@@ -170,12 +170,12 @@ module CLI
170
170
 
171
171
  case type
172
172
  when :futsuu
173
- build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_FUTSUU_FILENAME)
173
+ build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
174
174
  build_out_file(:out,default_dir: News::DEFAULT_DIR,default_filename: FutsuuNews::DEFAULT_FILENAME)
175
175
 
176
176
  news_name = 'Regular'
177
177
  when :yasashii
178
- build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_YASASHII_FILENAME)
178
+ build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
179
179
  build_out_file(:out,default_dir: News::DEFAULT_DIR,default_filename: YasashiiNews::DEFAULT_FILENAME)
180
180
 
181
181
  news_name = 'Easy'
@@ -236,10 +236,22 @@ module CLI
236
236
  })
237
237
 
238
238
  if url.nil?()
239
- links.each() do |key,link|
239
+ # Why store each() and do `links_len` instead of `links-len - 1`?
240
+ #
241
+ # If links contains 5 entries and you scrape all 5, then the output of
242
+ # update_spin_detail() will end on 4, so all of this complexity is so
243
+ # that update_spin_detail() only needs to be written/updated on one line.
244
+
245
+ links_each = links.links.values.each()
246
+ links_len = links.length()
247
+
248
+ 0.upto(links_len) do |i|
240
249
  update_spin_detail(" (scraped=#{scrape_count}, considered=#{link_count += 1})")
241
250
 
242
- break if scrape_count >= max_scrapes
251
+ break if i >= links_len || scrape_count >= max_scrapes
252
+
253
+ link = links_each.next()
254
+
243
255
  next if !like.nil?() && !link.url.to_s().downcase().include?(like)
244
256
  next if !redo_scrapes && scraped_news_article?(news,link)
245
257
 
@@ -248,7 +260,7 @@ module CLI
248
260
  if (new_url = scrape_news_article(url,link: link,new_articles: new_articles,news: news))
249
261
  # --show-dict
250
262
  url = new_url
251
- scrape_count = max_scrapes - 1
263
+ scrape_count = max_scrapes - 1 # Break on next iteration for update_spin_detail()
252
264
  end
253
265
 
254
266
  # Break on next iteration for update_spin_detail().
@@ -31,37 +31,37 @@ module NHKore
31
31
  module CLI
32
32
  ###
33
33
  # @author Jonathan Bradley Whited (@esotericpig)
34
- # @since 0.2.0
34
+ # @since 0.3.0
35
35
  ###
36
- module BingCmd
37
- def build_bing_cmd()
36
+ module SearchCmd
37
+ def build_search_cmd()
38
38
  app = self
39
39
 
40
- @bing_cmd = @app_cmd.define_command() do
41
- name 'bing'
42
- usage 'bing [OPTIONS] [COMMAND]...'
43
- aliases :b
44
- summary "Search bing.com for links to NHK News Web (Easy) (aliases: #{app.color_alias('b')})"
40
+ @search_cmd = @app_cmd.define_command() do
41
+ name 'search'
42
+ usage 'search [OPTIONS] [COMMAND]...'
43
+ aliases :se,:sea
44
+ summary "Search for links to NHK News Web (Easy) (aliases: #{app.color_alias('se sea')})"
45
45
 
46
46
  description <<-EOD
47
- Search bing.com for links to NHK News Web (Easy) &
47
+ Search for links (using a Search Engine, etc.) to NHK News Web (Easy) &
48
48
  save to folder: #{SearchLinks::DEFAULT_DIR}
49
49
  EOD
50
50
 
51
51
  option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
52
- HTML file to read instead of URL (for offline testing and/or slow internet;
53
- see '--show-urls' option)
52
+ file to read instead of URL (for offline testing and/or slow internet;
53
+ see '--show-*' options)
54
54
  EOD
55
55
  app.check_empty_opt(:in,value)
56
56
  end
57
57
  option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
58
58
  'directory/file' to save links to; if you only specify a directory or a file, it will attach the
59
59
  appropriate default directory/file name
60
- (defaults: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}, #{SearchLinks::DEFAULT_BING_FUTSUU_FILE})
60
+ (defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE})
61
61
  EOD
62
62
  app.check_empty_opt(:out,value)
63
63
  end
64
- option :r,:results,'number of results per page to request from Bing',argument: :required,
64
+ option :r,:results,'number of results per page to request from search',argument: :required,
65
65
  default: SearchScraper::DEFAULT_RESULT_COUNT,transform: -> (value) do
66
66
  value = value.to_i()
67
67
  value = 1 if value < 1
@@ -72,21 +72,26 @@ module CLI
72
72
  useful for manually writing/updating scripts (but not for use in a variable);
73
73
  implies '--dry-run' option
74
74
  EOD
75
- option nil,:'show-urls',<<-EOD do |value,cmd|
76
- show the URLs used when scraping and exit; you can download these for offline testing and/or
77
- slow internet (see '--in' option)
75
+ option nil,:'show-urls',<<-EOD
76
+ show the URLs -- if any -- used when searching & scraping and exit;
77
+ you can download these for offline testing and/or slow internet
78
+ (see '--in' option)
78
79
  EOD
79
- puts "Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE)}"
80
- puts "Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE)}"
81
- exit
82
- end
83
80
 
84
81
  run do |opts,args,cmd|
82
+ opts.each() do |key,value|
83
+ key = key.to_s()
84
+
85
+ if key.include?('show')
86
+ raise CLIError.new("must specify a sub command for option[#{key}]")
87
+ end
88
+ end
89
+
85
90
  puts cmd.help
86
91
  end
87
92
  end
88
93
 
89
- @bing_easy_cmd = @bing_cmd.define_command() do
94
+ @search_easy_cmd = @search_cmd.define_command() do
90
95
  name 'easy'
91
96
  usage 'easy [OPTIONS] [COMMAND]...'
92
97
  aliases :e,:ez
@@ -94,16 +99,16 @@ module CLI
94
99
 
95
100
  description <<-EOD
96
101
  Search for NHK News Web Easy (Yasashii) links &
97
- save to file: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}
102
+ save to file: #{SearchLinks::DEFAULT_YASASHII_FILE}
98
103
  EOD
99
104
 
100
105
  run do |opts,args,cmd|
101
106
  app.refresh_cmd(opts,args,cmd)
102
- app.run_bing_cmd(:yasashii)
107
+ app.run_search_help()
103
108
  end
104
109
  end
105
110
 
106
- @bing_regular_cmd = @bing_cmd.define_command() do
111
+ @search_regular_cmd = @search_cmd.define_command() do
107
112
  name 'regular'
108
113
  usage 'regular [OPTIONS] [COMMAND]...'
109
114
  aliases :r,:reg
@@ -111,28 +116,57 @@ module CLI
111
116
 
112
117
  description <<-EOD
113
118
  Search for NHK News Web Regular (Futsuu) links &
114
- save to file: #{SearchLinks::DEFAULT_BING_FUTSUU_FILE}
119
+ save to file: #{SearchLinks::DEFAULT_FUTSUU_FILE}
115
120
  EOD
116
121
 
117
122
  run do |opts,args,cmd|
118
123
  app.refresh_cmd(opts,args,cmd)
119
- app.run_bing_cmd(:futsuu)
124
+ app.run_search_help()
120
125
  end
121
126
  end
127
+
128
+ @search_bing_cmd = Cri::Command.define() do
129
+ name 'bing'
130
+ usage 'bing [OPTIONS] [COMMAND]...'
131
+ aliases :b
132
+ summary "Search bing.com for links (aliases: #{app.color_alias('b')})"
133
+
134
+ description <<-EOD
135
+ Search bing.com for links & save to folder: #{SearchLinks::DEFAULT_DIR}
136
+ EOD
137
+
138
+ run do |opts,args,cmd|
139
+ app.refresh_cmd(opts,args,cmd)
140
+ app.run_search_cmd(cmd.supercommand.name.to_sym(),:bing)
141
+ end
142
+ end
143
+
144
+ # dup()/clone() must be called for `cmd.supercommand` to work appropriately.
145
+ @search_easy_cmd.add_command @search_bing_cmd.dup()
146
+ @search_regular_cmd.add_command @search_bing_cmd.dup()
122
147
  end
123
148
 
124
- def run_bing_cmd(type)
149
+ def run_search_cmd(nhk_type,search_type)
150
+ case nhk_type
151
+ when :easy
152
+ nhk_type = :yasashii
153
+ when :regular
154
+ nhk_type = :futsuu
155
+ end
156
+
157
+ return if show_search_urls(search_type)
158
+
125
159
  @cmd_opts[:dry_run] = true if @cmd_opts[:show_count]
126
160
 
127
161
  build_in_file(:in)
128
162
 
129
- case type
163
+ case nhk_type
130
164
  when :futsuu
131
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_FUTSUU_FILENAME)
165
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
132
166
  when :yasashii
133
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_YASASHII_FILENAME)
167
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
134
168
  else
135
- raise ArgumentError,"invalid type[#{type}]"
169
+ raise ArgumentError,"invalid nhk_type[#{nhk_type}]"
136
170
  end
137
171
 
138
172
  return unless check_in_file(:in,empty_ok: true)
@@ -145,7 +179,7 @@ module CLI
145
179
  result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?()
146
180
  show_count = @cmd_opts[:show_count]
147
181
 
148
- start_spin('Scraping bing.com') unless show_count
182
+ start_spin("Scraping #{search_type}") unless show_count
149
183
 
150
184
  is_file = !in_file.nil?()
151
185
  links = nil
@@ -176,30 +210,43 @@ module CLI
176
210
  return
177
211
  end
178
212
 
179
- # Do a range to prevent an infinite loop. Ichiman!
180
- (0..10000).each() do
181
- scraper = BingScraper.new(type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
182
-
183
- next_page = scraper.scrape(links,next_page)
184
-
185
- new_links.concat(links.links.values[links_count..-1])
186
- links_count = links.length
187
- page_count = next_page.count if next_page.count > 0
188
-
189
- update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
190
- "new_links=#{new_links.length})")
191
-
192
- break if next_page.empty?()
193
-
194
- page_num += 1
195
- url = next_page.url
196
-
197
- sleep_scraper()
213
+ range = (0..10000) # Do a range to prevent an infinite loop; ichiman!
214
+
215
+ case search_type
216
+ # Anything that extends SearchScraper.
217
+ when :bing
218
+ range.each() do
219
+ scraper = nil
220
+
221
+ case search_type
222
+ when :bing
223
+ scraper = BingScraper.new(nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
224
+ else
225
+ raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
226
+ end
227
+
228
+ next_page = scraper.scrape(links,next_page)
229
+
230
+ new_links.concat(links.links.values[links_count..-1])
231
+ links_count = links.length
232
+ page_count = next_page.count if next_page.count > 0
233
+
234
+ update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
235
+ "new_links=#{new_links.length})")
236
+
237
+ break if next_page.empty?()
238
+
239
+ page_num += 1
240
+ url = next_page.url
241
+
242
+ sleep_scraper()
243
+ end
244
+ else
245
+ raise ArgumentError,"invalid search_type[#{search_type}]"
198
246
  end
199
247
 
200
248
  stop_spin()
201
249
  puts
202
-
203
250
  puts 'Last URL scraped:'
204
251
  puts "> #{url}"
205
252
  puts
@@ -215,6 +262,32 @@ module CLI
215
262
  puts "> #{out_file}"
216
263
  end
217
264
  end
265
+
266
+ def run_search_help()
267
+ if @cmd_opts[:show_count] || @cmd_opts[:show_urls]
268
+ run_search_cmd(@cmd.name.to_sym(),nil)
269
+ else
270
+ puts @cmd.help
271
+ end
272
+ end
273
+
274
+ def show_search_urls(search_type)
275
+ return false unless @cmd_opts[:show_urls]
276
+
277
+ count = @cmd_opts[:results]
278
+ count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil?()
279
+
280
+ case search_type
281
+ when :bing
282
+ puts 'Bing:'
283
+ puts "> Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE,count: count)}"
284
+ puts "> Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE,count: count)}"
285
+ else
286
+ raise CLIError.new('must specify a sub command for option[show-urls]')
287
+ end
288
+
289
+ return true
290
+ end
218
291
  end
219
292
  end
220
293
  end