nhkore 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,15 +46,16 @@ require 'nhkore/search_link'
46
46
  require 'nhkore/search_scraper'
47
47
  require 'nhkore/sifter'
48
48
  require 'nhkore/splitter'
49
+ require 'nhkore/user_agents'
49
50
  require 'nhkore/util'
50
51
  require 'nhkore/variator'
51
52
  require 'nhkore/version'
52
53
  require 'nhkore/word'
53
54
 
54
- require 'nhkore/cli/bing_cmd'
55
55
  require 'nhkore/cli/fx_cmd'
56
56
  require 'nhkore/cli/get_cmd'
57
57
  require 'nhkore/cli/news_cmd'
58
+ require 'nhkore/cli/search_cmd'
58
59
  require 'nhkore/cli/sift_cmd'
59
60
 
60
61
 
@@ -24,6 +24,7 @@
24
24
  require 'cri'
25
25
  require 'highline'
26
26
  require 'rainbow'
27
+ require 'set'
27
28
  require 'tty-progressbar'
28
29
  require 'tty-spinner'
29
30
 
@@ -31,10 +32,10 @@ require 'nhkore/error'
31
32
  require 'nhkore/util'
32
33
  require 'nhkore/version'
33
34
 
34
- require 'nhkore/cli/bing_cmd'
35
35
  require 'nhkore/cli/fx_cmd'
36
36
  require 'nhkore/cli/get_cmd'
37
37
  require 'nhkore/cli/news_cmd'
38
+ require 'nhkore/cli/search_cmd'
38
39
  require 'nhkore/cli/sift_cmd'
39
40
 
40
41
 
@@ -47,30 +48,20 @@ module NHKore
47
48
  end
48
49
 
49
50
  ###
50
- # For disabling color output.
51
+ # For disabling/enabling color output.
51
52
  #
52
53
  # @author Jonathan Bradley Whited (@esotericpig)
53
- # @since 0.2.0
54
+ # @since 0.2.1
54
55
  ###
55
- module CriStringFormatterExt
56
- def blue(str)
57
- return str
58
- end
59
-
60
- def bold(str)
61
- return str
62
- end
56
+ module CriColorExt
57
+ @@color = true
63
58
 
64
- def green(str)
65
- return str
59
+ def color=(color)
60
+ @@color = color
66
61
  end
67
62
 
68
- def red(str)
69
- return str
70
- end
71
-
72
- def yellow(str)
73
- return str
63
+ def color?(io)
64
+ return @@color
74
65
  end
75
66
  end
76
67
 
@@ -79,14 +70,19 @@ module NHKore
79
70
  # @since 0.2.0
80
71
  ###
81
72
  class App
82
- include CLI::BingCmd
83
73
  include CLI::FXCmd
84
74
  include CLI::GetCmd
85
75
  include CLI::NewsCmd
76
+ include CLI::SearchCmd
86
77
  include CLI::SiftCmd
87
78
 
88
79
  NAME = 'nhkore'
89
80
 
81
+ DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
82
+
83
+ COLOR_OPTS = [:c,:color]
84
+ NO_COLOR_OPTS = [:C,:'no-color']
85
+
90
86
  SPINNER_MSG = '[:spinner] :title:detail...'
91
87
  CLASSIC_SPINNER = TTY::Spinner.new(SPINNER_MSG,format: :classic)
92
88
  DEFAULT_SPINNER = TTY::Spinner.new(SPINNER_MSG,interval: 5,
@@ -94,8 +90,9 @@ module NHKore
94
90
  NO_SPINNER = {} # Still outputs status & stores tokens
95
91
  NO_SPINNER_MSG = '%{title}%{detail}...'
96
92
 
97
- DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
98
-
93
+ attr_reader :cmd
94
+ attr_reader :cmd_args
95
+ attr_reader :cmd_opts
99
96
  attr_accessor :progress_bar
100
97
  attr_accessor :scraper_kargs
101
98
  attr_accessor :sleep_time
@@ -119,10 +116,10 @@ module NHKore
119
116
 
120
117
  build_app_cmd()
121
118
 
122
- build_bing_cmd()
123
119
  build_fx_cmd()
124
120
  build_get_cmd()
125
121
  build_news_cmd()
122
+ build_search_cmd()
126
123
  build_sift_cmd()
127
124
  build_version_cmd()
128
125
 
@@ -130,18 +127,24 @@ module NHKore
130
127
  end
131
128
 
132
129
  def autodetect_color()
133
- disable = false
130
+ Cri::Platform.singleton_class.prepend(CriColorExt)
131
+
132
+ color = nil # Must be nil, not true/false
134
133
 
135
- if !$stdout.tty?() || ENV['TERM'] == 'dumb'
136
- disable = true
137
- elsif !@args.empty?()
134
+ if !@args.empty?()
138
135
  # Kind of hacky, but necessary for Rainbow.
139
136
 
140
- no_color_args = Set['-C','--no-color']
137
+ color_opts = opts_to_set(COLOR_OPTS)
138
+ no_color_opts = opts_to_set(NO_COLOR_OPTS)
141
139
 
142
140
  @args.each() do |arg|
143
- if no_color_args.include?(arg)
144
- disable = true
141
+ if color_opts.include?(arg)
142
+ color = true
143
+ break
144
+ end
145
+
146
+ if no_color_opts.include?(arg)
147
+ color = false
145
148
  break
146
149
  end
147
150
 
@@ -149,11 +152,11 @@ module NHKore
149
152
  end
150
153
  end
151
154
 
152
- if disable
153
- disable_color()
154
- else
155
- @rainbow.enabled = true # Force it in case Rainbow auto-disabled it
155
+ if color.nil?()
156
+ color = ($stdout.tty?() && ENV['TERM'] != 'dumb')
156
157
  end
158
+
159
+ enable_color(color)
157
160
  end
158
161
 
159
162
  def build_app_cmd()
@@ -171,12 +174,15 @@ module NHKore
171
174
  This is similar to a core word/vocabulary list.
172
175
  EOD
173
176
 
174
- flag :c,:'classic-fx',<<-EOD do |value,cmd|
177
+ flag :s,:'classic-fx',<<-EOD do |value,cmd|
175
178
  use classic spinner/progress special effects (in case of no Unicode support) when running long tasks
176
179
  EOD
177
180
  app.progress_bar = :classic
178
181
  app.spinner = CLASSIC_SPINNER
179
182
  end
183
+ flag COLOR_OPTS[0],COLOR_OPTS[1],%q{force color output (for commands like '| less -R')} do |value,cmd|
184
+ app.enable_color(true)
185
+ end
180
186
  flag :n,:'dry-run',<<-EOD
181
187
  do a dry run without making changes; do not write to files, create directories, etc.
182
188
  EOD
@@ -194,8 +200,8 @@ module NHKore
194
200
 
195
201
  app.scraper_kargs[:max_retries] = value
196
202
  end
197
- flag :C,:'no-color','disable color output' do |value,cmd|
198
- app.disable_color()
203
+ flag NO_COLOR_OPTS[0],NO_COLOR_OPTS[1],'disable color output' do |value,cmd|
204
+ app.enable_color(false)
199
205
  end
200
206
  flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |value,cmd|
201
207
  app.progress_bar = :no
@@ -223,7 +229,7 @@ module NHKore
223
229
  app.sleep_time = value.to_f()
224
230
  app.sleep_time = 0.0 if app.sleep_time < 0.0
225
231
  end
226
- option :t,:'timeout',<<-EOD,argument: :required do |value,cmd|
232
+ option :t,:timeout,<<-EOD,argument: :required do |value,cmd|
227
233
  seconds for all URL timeouts: [open, read] (-1 or decimal >= 0)
228
234
  EOD
229
235
  value = value.to_f()
@@ -232,6 +238,14 @@ module NHKore
232
238
  app.scraper_kargs[:open_timeout] = value
233
239
  app.scraper_kargs[:read_timeout] = value
234
240
  end
241
+ option :u,:'user-agent',<<-EOD,argument: :required do |value,cmd|
242
+ HTTP header field 'User-Agent' to use instead of a random one
243
+ EOD
244
+ value = app.check_empty_opt(:'user-agent',value)
245
+
246
+ app.scraper_kargs[:header] ||= {}
247
+ app.scraper_kargs[:header]['user-agent'] = value
248
+ end
235
249
  # Big V, not small.
236
250
  flag :V,:version,'show the version and exit' do |value,cmd|
237
251
  app.show_version()
@@ -399,8 +413,8 @@ module NHKore
399
413
 
400
414
  force = @cmd_opts[:force]
401
415
 
402
- if !force && Dir.exist?(out_dir)
403
- puts 'Warning: output directory already exists!'
416
+ if !force && Dir.exist?(out_dir) && !Dir.empty?(out_dir)
417
+ puts 'Warning: output directory already exists with files!'
404
418
  puts ' : Files inside of this directory may be overwritten!'
405
419
  puts "> '#{out_dir}'"
406
420
 
@@ -478,9 +492,18 @@ module NHKore
478
492
  return color(str).green
479
493
  end
480
494
 
481
- def disable_color()
482
- Cri::StringFormatter.prepend(CriStringFormatterExt)
483
- @rainbow.enabled = false
495
+ def enable_color(enabled)
496
+ Cri::Platform.color = enabled
497
+ @rainbow.enabled = enabled
498
+ end
499
+
500
+ def opts_to_set(ary)
501
+ set = Set.new()
502
+
503
+ set.add("-#{ary[0].to_s()}") unless ary[0].nil?()
504
+ set.add("--#{ary[1].to_s()}") unless ary[1].nil?()
505
+
506
+ return set
484
507
  end
485
508
 
486
509
  def refresh_cmd(opts,args,cmd)
@@ -124,7 +124,7 @@ module NHKore
124
124
  # - https://www3.nhk.or.jp/news/easy/k10012118911000/k10012118911000.html
125
125
  # - '</p><br><「<ruby>台風<rt>たいふう</rt></ruby>'
126
126
 
127
- @str_or_io = @str_or_io.read() if @str_or_io.respond_to?(:read)
127
+ read()
128
128
 
129
129
  # To add a new one, simply add '|(...)' on a newline and test $#.
130
130
  @str_or_io = @str_or_io.gsub(/
@@ -281,7 +281,7 @@ module NHKore
281
281
  scraper = DictScraper.new(dict_url,missingno: @missingno,parse_url: false,**@kargs)
282
282
  rescue OpenURI::HTTPError => e
283
283
  if retries == 0 && e.to_s().include?('404')
284
- @str_or_io = @str_or_io.read() if @str_or_io.respond_to?(:read)
284
+ read()
285
285
 
286
286
  scraper = ArticleScraper.new(@url,str_or_io: @str_or_io,**@kargs)
287
287
 
@@ -38,7 +38,7 @@ module CLI
38
38
 
39
39
  description <<-EOD
40
40
  Test if the special effects work on your command line:\n
41
- - #{App::NAME} [-c/-X] fx
41
+ - #{App::NAME} [-s/-X] fx
42
42
  EOD
43
43
 
44
44
  flag :a,:all,'test all special effects regardless of global options'
@@ -99,15 +99,14 @@ module CLI
99
99
 
100
100
  return if dry_run
101
101
 
102
- Tempfile.create([App::NAME,'.zip'],binmode: true) do |file|
102
+ Tempfile.create(["#{App::NAME}_",'.zip'],binmode: true) do |file|
103
103
  puts
104
- puts 'Downloading to temp file:'
104
+ puts "Downloading #{GET_URL_FILENAME} to temp file:"
105
105
  puts "> #{file.path}"
106
- puts
107
106
 
108
107
  len = down.size
109
- len = DEFAULT_GET_LENGTH if len.nil?()
110
- bar = build_progress_bar("Downloading #{GET_URL_FILENAME}",download: true,total: len)
108
+ len = DEFAULT_GET_LENGTH if len.nil?() || len < 1
109
+ bar = build_progress_bar('> Downloading',download: true,total: len)
111
110
 
112
111
  bar.start()
113
112
 
@@ -120,9 +119,12 @@ module CLI
120
119
  file.close()
121
120
  bar.finish()
122
121
 
123
- start_spin("Extracting #{GET_URL_FILENAME}")
122
+ puts
123
+ puts "Extracting #{GET_URL_FILENAME}..."
124
124
 
125
- Zip.on_exists_proc = force # true will force overwriting files on extract()
125
+ # We manually ask the user whether to overwrite each file, so set this to
126
+ # true so that Zip extract() will force overwrites and not raise an error.
127
+ Zip.on_exists_proc = true
126
128
 
127
129
  Zip::File.open(file) do |zip_file|
128
130
  zip_file.each() do |entry|
@@ -130,17 +132,30 @@ module CLI
130
132
  raise ZipError,"unsafe entry name[#{entry.name}] in Zip file"
131
133
  end
132
134
 
133
- name = File.basename(entry.name)
135
+ name = Util.strip_web_str(File.basename(entry.name))
136
+
137
+ next if name.empty?()
138
+
139
+ out_file = File.join(out_dir,name)
134
140
 
135
- update_spin_detail(" (file=#{name})")
141
+ puts "> #{name}"
136
142
 
137
- entry.extract(File.join(out_dir,name))
143
+ if !force && File.exist?(out_file)
144
+ puts
145
+ puts 'Warning: output file already exists!'
146
+ puts "> '#{out_file}'"
147
+
148
+ overwrite = @high.agree('Overwrite this file (yes/no)? ')
149
+ puts
150
+
151
+ next unless overwrite
152
+ end
153
+
154
+ entry.extract(out_file)
138
155
  end
139
156
  end
140
157
 
141
- stop_spin()
142
158
  puts
143
-
144
159
  puts "Extracted #{GET_URL_FILENAME} to directory:"
145
160
  puts "> #{out_dir}"
146
161
  end
@@ -82,8 +82,8 @@ module CLI
82
82
  value
83
83
  end
84
84
  option :l,:links,<<-EOD,argument: :required,transform: -> (value) do
85
- 'directory/file' of article links (from a Search Engine) to scrape (see '#{App::NAME} bing';
86
- defaults: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}, #{SearchLinks::DEFAULT_BING_FUTSUU_FILE})
85
+ 'directory/file' of article links to scrape (see '#{App::NAME} search';
86
+ defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE})
87
87
  EOD
88
88
  app.check_empty_opt(:links,value)
89
89
  end
@@ -170,12 +170,12 @@ module CLI
170
170
 
171
171
  case type
172
172
  when :futsuu
173
- build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_FUTSUU_FILENAME)
173
+ build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
174
174
  build_out_file(:out,default_dir: News::DEFAULT_DIR,default_filename: FutsuuNews::DEFAULT_FILENAME)
175
175
 
176
176
  news_name = 'Regular'
177
177
  when :yasashii
178
- build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_YASASHII_FILENAME)
178
+ build_in_file(:links,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
179
179
  build_out_file(:out,default_dir: News::DEFAULT_DIR,default_filename: YasashiiNews::DEFAULT_FILENAME)
180
180
 
181
181
  news_name = 'Easy'
@@ -236,10 +236,22 @@ module CLI
236
236
  })
237
237
 
238
238
  if url.nil?()
239
- links.each() do |key,link|
239
+ # Why store each() and do `links_len` instead of `links-len - 1`?
240
+ #
241
+ # If links contains 5 entries and you scrape all 5, then the output of
242
+ # update_spin_detail() will end on 4, so all of this complexity is so
243
+ # that update_spin_detail() only needs to be written/updated on one line.
244
+
245
+ links_each = links.links.values.each()
246
+ links_len = links.length()
247
+
248
+ 0.upto(links_len) do |i|
240
249
  update_spin_detail(" (scraped=#{scrape_count}, considered=#{link_count += 1})")
241
250
 
242
- break if scrape_count >= max_scrapes
251
+ break if i >= links_len || scrape_count >= max_scrapes
252
+
253
+ link = links_each.next()
254
+
243
255
  next if !like.nil?() && !link.url.to_s().downcase().include?(like)
244
256
  next if !redo_scrapes && scraped_news_article?(news,link)
245
257
 
@@ -248,7 +260,7 @@ module CLI
248
260
  if (new_url = scrape_news_article(url,link: link,new_articles: new_articles,news: news))
249
261
  # --show-dict
250
262
  url = new_url
251
- scrape_count = max_scrapes - 1
263
+ scrape_count = max_scrapes - 1 # Break on next iteration for update_spin_detail()
252
264
  end
253
265
 
254
266
  # Break on next iteration for update_spin_detail().
@@ -31,37 +31,37 @@ module NHKore
31
31
  module CLI
32
32
  ###
33
33
  # @author Jonathan Bradley Whited (@esotericpig)
34
- # @since 0.2.0
34
+ # @since 0.3.0
35
35
  ###
36
- module BingCmd
37
- def build_bing_cmd()
36
+ module SearchCmd
37
+ def build_search_cmd()
38
38
  app = self
39
39
 
40
- @bing_cmd = @app_cmd.define_command() do
41
- name 'bing'
42
- usage 'bing [OPTIONS] [COMMAND]...'
43
- aliases :b
44
- summary "Search bing.com for links to NHK News Web (Easy) (aliases: #{app.color_alias('b')})"
40
+ @search_cmd = @app_cmd.define_command() do
41
+ name 'search'
42
+ usage 'search [OPTIONS] [COMMAND]...'
43
+ aliases :se,:sea
44
+ summary "Search for links to NHK News Web (Easy) (aliases: #{app.color_alias('se sea')})"
45
45
 
46
46
  description <<-EOD
47
- Search bing.com for links to NHK News Web (Easy) &
47
+ Search for links (using a Search Engine, etc.) to NHK News Web (Easy) &
48
48
  save to folder: #{SearchLinks::DEFAULT_DIR}
49
49
  EOD
50
50
 
51
51
  option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
52
- HTML file to read instead of URL (for offline testing and/or slow internet;
53
- see '--show-urls' option)
52
+ file to read instead of URL (for offline testing and/or slow internet;
53
+ see '--show-*' options)
54
54
  EOD
55
55
  app.check_empty_opt(:in,value)
56
56
  end
57
57
  option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
58
58
  'directory/file' to save links to; if you only specify a directory or a file, it will attach the
59
59
  appropriate default directory/file name
60
- (defaults: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}, #{SearchLinks::DEFAULT_BING_FUTSUU_FILE})
60
+ (defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE})
61
61
  EOD
62
62
  app.check_empty_opt(:out,value)
63
63
  end
64
- option :r,:results,'number of results per page to request from Bing',argument: :required,
64
+ option :r,:results,'number of results per page to request from search',argument: :required,
65
65
  default: SearchScraper::DEFAULT_RESULT_COUNT,transform: -> (value) do
66
66
  value = value.to_i()
67
67
  value = 1 if value < 1
@@ -72,21 +72,26 @@ module CLI
72
72
  useful for manually writing/updating scripts (but not for use in a variable);
73
73
  implies '--dry-run' option
74
74
  EOD
75
- option nil,:'show-urls',<<-EOD do |value,cmd|
76
- show the URLs used when scraping and exit; you can download these for offline testing and/or
77
- slow internet (see '--in' option)
75
+ option nil,:'show-urls',<<-EOD
76
+ show the URLs -- if any -- used when searching & scraping and exit;
77
+ you can download these for offline testing and/or slow internet
78
+ (see '--in' option)
78
79
  EOD
79
- puts "Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE)}"
80
- puts "Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE)}"
81
- exit
82
- end
83
80
 
84
81
  run do |opts,args,cmd|
82
+ opts.each() do |key,value|
83
+ key = key.to_s()
84
+
85
+ if key.include?('show')
86
+ raise CLIError.new("must specify a sub command for option[#{key}]")
87
+ end
88
+ end
89
+
85
90
  puts cmd.help
86
91
  end
87
92
  end
88
93
 
89
- @bing_easy_cmd = @bing_cmd.define_command() do
94
+ @search_easy_cmd = @search_cmd.define_command() do
90
95
  name 'easy'
91
96
  usage 'easy [OPTIONS] [COMMAND]...'
92
97
  aliases :e,:ez
@@ -94,16 +99,16 @@ module CLI
94
99
 
95
100
  description <<-EOD
96
101
  Search for NHK News Web Easy (Yasashii) links &
97
- save to file: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}
102
+ save to file: #{SearchLinks::DEFAULT_YASASHII_FILE}
98
103
  EOD
99
104
 
100
105
  run do |opts,args,cmd|
101
106
  app.refresh_cmd(opts,args,cmd)
102
- app.run_bing_cmd(:yasashii)
107
+ app.run_search_help()
103
108
  end
104
109
  end
105
110
 
106
- @bing_regular_cmd = @bing_cmd.define_command() do
111
+ @search_regular_cmd = @search_cmd.define_command() do
107
112
  name 'regular'
108
113
  usage 'regular [OPTIONS] [COMMAND]...'
109
114
  aliases :r,:reg
@@ -111,28 +116,57 @@ module CLI
111
116
 
112
117
  description <<-EOD
113
118
  Search for NHK News Web Regular (Futsuu) links &
114
- save to file: #{SearchLinks::DEFAULT_BING_FUTSUU_FILE}
119
+ save to file: #{SearchLinks::DEFAULT_FUTSUU_FILE}
115
120
  EOD
116
121
 
117
122
  run do |opts,args,cmd|
118
123
  app.refresh_cmd(opts,args,cmd)
119
- app.run_bing_cmd(:futsuu)
124
+ app.run_search_help()
120
125
  end
121
126
  end
127
+
128
+ @search_bing_cmd = Cri::Command.define() do
129
+ name 'bing'
130
+ usage 'bing [OPTIONS] [COMMAND]...'
131
+ aliases :b
132
+ summary "Search bing.com for links (aliases: #{app.color_alias('b')})"
133
+
134
+ description <<-EOD
135
+ Search bing.com for links & save to folder: #{SearchLinks::DEFAULT_DIR}
136
+ EOD
137
+
138
+ run do |opts,args,cmd|
139
+ app.refresh_cmd(opts,args,cmd)
140
+ app.run_search_cmd(cmd.supercommand.name.to_sym(),:bing)
141
+ end
142
+ end
143
+
144
+ # dup()/clone() must be called for `cmd.supercommand` to work appropriately.
145
+ @search_easy_cmd.add_command @search_bing_cmd.dup()
146
+ @search_regular_cmd.add_command @search_bing_cmd.dup()
122
147
  end
123
148
 
124
- def run_bing_cmd(type)
149
+ def run_search_cmd(nhk_type,search_type)
150
+ case nhk_type
151
+ when :easy
152
+ nhk_type = :yasashii
153
+ when :regular
154
+ nhk_type = :futsuu
155
+ end
156
+
157
+ return if show_search_urls(search_type)
158
+
125
159
  @cmd_opts[:dry_run] = true if @cmd_opts[:show_count]
126
160
 
127
161
  build_in_file(:in)
128
162
 
129
- case type
163
+ case nhk_type
130
164
  when :futsuu
131
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_FUTSUU_FILENAME)
165
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
132
166
  when :yasashii
133
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_YASASHII_FILENAME)
167
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
134
168
  else
135
- raise ArgumentError,"invalid type[#{type}]"
169
+ raise ArgumentError,"invalid nhk_type[#{nhk_type}]"
136
170
  end
137
171
 
138
172
  return unless check_in_file(:in,empty_ok: true)
@@ -145,7 +179,7 @@ module CLI
145
179
  result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?()
146
180
  show_count = @cmd_opts[:show_count]
147
181
 
148
- start_spin('Scraping bing.com') unless show_count
182
+ start_spin("Scraping #{search_type}") unless show_count
149
183
 
150
184
  is_file = !in_file.nil?()
151
185
  links = nil
@@ -176,30 +210,43 @@ module CLI
176
210
  return
177
211
  end
178
212
 
179
- # Do a range to prevent an infinite loop. Ichiman!
180
- (0..10000).each() do
181
- scraper = BingScraper.new(type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
182
-
183
- next_page = scraper.scrape(links,next_page)
184
-
185
- new_links.concat(links.links.values[links_count..-1])
186
- links_count = links.length
187
- page_count = next_page.count if next_page.count > 0
188
-
189
- update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
190
- "new_links=#{new_links.length})")
191
-
192
- break if next_page.empty?()
193
-
194
- page_num += 1
195
- url = next_page.url
196
-
197
- sleep_scraper()
213
+ range = (0..10000) # Do a range to prevent an infinite loop; ichiman!
214
+
215
+ case search_type
216
+ # Anything that extends SearchScraper.
217
+ when :bing
218
+ range.each() do
219
+ scraper = nil
220
+
221
+ case search_type
222
+ when :bing
223
+ scraper = BingScraper.new(nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
224
+ else
225
+ raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
226
+ end
227
+
228
+ next_page = scraper.scrape(links,next_page)
229
+
230
+ new_links.concat(links.links.values[links_count..-1])
231
+ links_count = links.length
232
+ page_count = next_page.count if next_page.count > 0
233
+
234
+ update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
235
+ "new_links=#{new_links.length})")
236
+
237
+ break if next_page.empty?()
238
+
239
+ page_num += 1
240
+ url = next_page.url
241
+
242
+ sleep_scraper()
243
+ end
244
+ else
245
+ raise ArgumentError,"invalid search_type[#{search_type}]"
198
246
  end
199
247
 
200
248
  stop_spin()
201
249
  puts
202
-
203
250
  puts 'Last URL scraped:'
204
251
  puts "> #{url}"
205
252
  puts
@@ -215,6 +262,32 @@ module CLI
215
262
  puts "> #{out_file}"
216
263
  end
217
264
  end
265
+
266
+ def run_search_help()
267
+ if @cmd_opts[:show_count] || @cmd_opts[:show_urls]
268
+ run_search_cmd(@cmd.name.to_sym(),nil)
269
+ else
270
+ puts @cmd.help
271
+ end
272
+ end
273
+
274
+ def show_search_urls(search_type)
275
+ return false unless @cmd_opts[:show_urls]
276
+
277
+ count = @cmd_opts[:results]
278
+ count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil?()
279
+
280
+ case search_type
281
+ when :bing
282
+ puts 'Bing:'
283
+ puts "> Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE,count: count)}"
284
+ puts "> Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE,count: count)}"
285
+ else
286
+ raise CLIError.new('must specify a sub command for option[show-urls]')
287
+ end
288
+
289
+ return true
290
+ end
218
291
  end
219
292
  end
220
293
  end