nhkore 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/nhkore/app.rb ADDED
@@ -0,0 +1,616 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'cri'
25
+ require 'highline'
26
+ require 'rainbow'
27
+ require 'tty-progressbar'
28
+ require 'tty-spinner'
29
+
30
+ require 'nhkore/error'
31
+ require 'nhkore/util'
32
+ require 'nhkore/version'
33
+
34
+ require 'nhkore/cli/bing_cmd'
35
+ require 'nhkore/cli/fx_cmd'
36
+ require 'nhkore/cli/get_cmd'
37
+ require 'nhkore/cli/news_cmd'
38
+ require 'nhkore/cli/sift_cmd'
39
+
40
+
41
+ module NHKore
42
+ ###
43
+ # @author Jonathan Bradley Whited (@esotericpig)
44
+ # @since 0.2.0
45
+ ###
46
+ module CLI
47
+ end
48
+
49
+ ###
50
+ # For disabling color output.
51
+ #
52
+ # @author Jonathan Bradley Whited (@esotericpig)
53
+ # @since 0.2.0
54
+ ###
55
+ module CriStringFormatterExt
56
+ def blue(str)
57
+ return str
58
+ end
59
+
60
+ def bold(str)
61
+ return str
62
+ end
63
+
64
+ def green(str)
65
+ return str
66
+ end
67
+
68
+ def red(str)
69
+ return str
70
+ end
71
+
72
+ def yellow(str)
73
+ return str
74
+ end
75
+ end
76
+
77
+ ###
78
+ # @author Jonathan Bradley Whited (@esotericpig)
79
+ # @since 0.2.0
80
+ ###
81
+ class App
82
+ include CLI::BingCmd
83
+ include CLI::FXCmd
84
+ include CLI::GetCmd
85
+ include CLI::NewsCmd
86
+ include CLI::SiftCmd
87
+
88
+ NAME = 'nhkore'
89
+
90
+ SPINNER_MSG = '[:spinner] :title:detail...'
91
+ CLASSIC_SPINNER = TTY::Spinner.new(SPINNER_MSG,format: :classic)
92
+ DEFAULT_SPINNER = TTY::Spinner.new(SPINNER_MSG,interval: 5,
93
+ frames: ['〜〜〜','日〜〜','日本〜','日本語'])
94
+ NO_SPINNER = {} # Still outputs status & stores tokens
95
+ NO_SPINNER_MSG = '%{title}%{detail}...'
96
+
97
+ DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
98
+
99
+ attr_accessor :progress_bar
100
+ attr_accessor :scraper_kargs
101
+ attr_accessor :sleep_time
102
+ attr_accessor :spinner
103
+
104
+ def initialize(args=ARGV)
105
+ super()
106
+
107
+ @args = args
108
+ @cmd = nil
109
+ @cmd_args = nil
110
+ @cmd_opts = nil
111
+ @high = HighLine.new()
112
+ @rainbow = Rainbow.new()
113
+ @progress_bar = :default # [:default, :classic, :no]
114
+ @scraper_kargs = {}
115
+ @sleep_time = DEFAULT_SLEEP_TIME
116
+ @spinner = DEFAULT_SPINNER
117
+
118
+ autodetect_color()
119
+
120
+ build_app_cmd()
121
+
122
+ build_bing_cmd()
123
+ build_fx_cmd()
124
+ build_get_cmd()
125
+ build_news_cmd()
126
+ build_sift_cmd()
127
+ build_version_cmd()
128
+
129
+ @app_cmd.add_command Cri::Command.new_basic_help()
130
+ end
131
+
132
+ def autodetect_color()
133
+ disable = false
134
+
135
+ if !$stdout.tty?() || ENV['TERM'] == 'dumb'
136
+ disable = true
137
+ elsif !@args.empty?()
138
+ # Kind of hacky, but necessary for Rainbow.
139
+
140
+ no_color_args = Set['-C','--no-color']
141
+
142
+ @args.each() do |arg|
143
+ if no_color_args.include?(arg)
144
+ disable = true
145
+ break
146
+ end
147
+
148
+ break if arg == '--'
149
+ end
150
+ end
151
+
152
+ if disable
153
+ disable_color()
154
+ else
155
+ @rainbow.enabled = true # Force it in case Rainbow auto-disabled it
156
+ end
157
+ end
158
+
159
+ def build_app_cmd()
160
+ app = self
161
+
162
+ @app_cmd = Cri::Command.define() do
163
+ name NAME
164
+ usage "#{NAME} [OPTIONS] [COMMAND]..."
165
+ summary 'NHK News Web (Easy) scraper for Japanese language learners.'
166
+
167
+ description <<-EOD
168
+ Scrapes NHK News Web (Easy) to create a list of each word and its
169
+ frequency (how many times it was used) for Japanese language learners.
170
+
171
+ This is similar to a core word/vocabulary list.
172
+ EOD
173
+
174
+ flag :c,:'classic-fx',<<-EOD do |value,cmd|
175
+ use classic spinner/progress special effects (in case of no Unicode support) when running long tasks
176
+ EOD
177
+ app.progress_bar = :classic
178
+ app.spinner = CLASSIC_SPINNER
179
+ end
180
+ flag :n,:'dry-run',<<-EOD
181
+ do a dry run without making changes; do not write to files, create directories, etc.
182
+ EOD
183
+ # Big F because dangerous.
184
+ flag :F,:force,"force overwriting files, creating directories, etc. (don't prompt); dangerous!"
185
+ flag :h,:help,'show this help' do |value,cmd|
186
+ puts cmd.help
187
+ exit
188
+ end
189
+ option :m,:'max-retry',<<-EOD,argument: :required,default: 3 do |value,cmd|
190
+ maximum number of times to retry URLs (-1 or integer >= 0)
191
+ EOD
192
+ value = value.to_i()
193
+ value = nil if value < 0
194
+
195
+ app.scraper_kargs[:max_retries] = value
196
+ end
197
+ flag :C,:'no-color','disable color output' do |value,cmd|
198
+ app.disable_color()
199
+ end
200
+ flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |value,cmd|
201
+ app.progress_bar = :no
202
+ app.spinner = NO_SPINNER
203
+ end
204
+ option :o,:'open-timeout',<<-EOD,argument: :required do |value,cmd|
205
+ seconds for URL open timeouts (-1 or decimal >= 0)
206
+ EOD
207
+ value = value.to_f()
208
+ value = nil if value < 0.0
209
+
210
+ app.scraper_kargs[:open_timeout] = value
211
+ end
212
+ option :r,:'read-timeout',<<-EOD,argument: :required do |value,cmd|
213
+ seconds for URL read timeouts (-1 or decimal >= 0)
214
+ EOD
215
+ value = value.to_f()
216
+ value = nil if value < 0.0
217
+
218
+ app.scraper_kargs[:read_timeout] = value
219
+ end
220
+ option :z,:sleep,<<-EOD,argument: :required,default: DEFAULT_SLEEP_TIME do |value,cmd|
221
+ seconds to sleep per scrape (i.e., per page/article) so don't get banned (i.e., fake being human)
222
+ EOD
223
+ app.sleep_time = value.to_f()
224
+ app.sleep_time = 0.0 if app.sleep_time < 0.0
225
+ end
226
+ option :t,:'timeout',<<-EOD,argument: :required do |value,cmd|
227
+ seconds for all URL timeouts: [open, read] (-1 or decimal >= 0)
228
+ EOD
229
+ value = value.to_f()
230
+ value = nil if value < 0.0
231
+
232
+ app.scraper_kargs[:open_timeout] = value
233
+ app.scraper_kargs[:read_timeout] = value
234
+ end
235
+ # Big V, not small.
236
+ flag :V,:version,'show the version and exit' do |value,cmd|
237
+ app.show_version()
238
+ exit
239
+ end
240
+
241
+ run do |opts,args,cmd|
242
+ puts cmd.help
243
+ end
244
+ end
245
+ end
246
+
247
+ def build_dir(opt_key,default_dir: '.')
248
+ # Protect against fat-fingering.
249
+ default_dir = Util.strip_web_str(default_dir)
250
+ dir = Util.strip_web_str(@cmd_opts[opt_key].to_s())
251
+
252
+ dir = default_dir if dir.empty?()
253
+
254
+ # '~' will expand to home, etc.
255
+ dir = File.expand_path(dir) unless dir.nil?()
256
+
257
+ return (@cmd_opts[opt_key] = dir)
258
+ end
259
+
260
+ def build_file(opt_key,default_dir: '.',default_filename: '')
261
+ # Protect against fat-fingering.
262
+ default_dir = Util.strip_web_str(default_dir)
263
+ default_filename = Util.strip_web_str(default_filename)
264
+ file = Util.strip_web_str(@cmd_opts[opt_key].to_s())
265
+
266
+ if file.empty?()
267
+ # Do not check default_dir.empty?().
268
+ if default_filename.empty?()
269
+ file = nil # nil is very important for BingScraper.init()!
270
+ else
271
+ file = File.join(default_dir,default_filename)
272
+ end
273
+ else
274
+ # Directory?
275
+ if File.directory?(file) || Util.dir_str?(file)
276
+ file = File.join(file,default_filename)
277
+ # File name only? (no directory)
278
+ elsif Util.filename_str?(file)
279
+ file = File.join(default_dir,file)
280
+ end
281
+ # Else, passed in both: 'directory/file'
282
+ end
283
+
284
+ # '~' will expand to home, etc.
285
+ file = File.expand_path(file) unless file.nil?()
286
+
287
+ return (@cmd_opts[opt_key] = file)
288
+ end
289
+
290
+ def build_in_dir(opt_key,**kargs)
291
+ return build_dir(opt_key,**kargs)
292
+ end
293
+
294
+ def build_in_file(opt_key,**kargs)
295
+ return build_file(opt_key,**kargs)
296
+ end
297
+
298
+ def build_out_dir(opt_key,**kargs)
299
+ return build_dir(opt_key,**kargs)
300
+ end
301
+
302
+ def build_out_file(opt_key,**kargs)
303
+ return build_file(opt_key,**kargs)
304
+ end
305
+
306
+ def build_progress_bar(title,download: false,total: 100,type: @progress_bar,width: 33,**kargs)
307
+ case type
308
+ when :default,:classic
309
+ msg = "#{title} [:bar] :percent :eta".dup()
310
+ msg << ' :byte_rate/s' if download
311
+
312
+ return TTY::ProgressBar.new(msg,total: total,width: width,**kargs) do |config|
313
+ if type == :default
314
+ config.incomplete = '.'
315
+ config.complete = '/'
316
+ config.head = 'o'
317
+ end
318
+
319
+ #config.frequency = 5 # For a big download, set this
320
+ config.interval = 1 if download
321
+ end
322
+ end
323
+
324
+ # :no
325
+ return NoProgressBar.new(title,total: total,**kargs)
326
+ end
327
+
328
+ def build_version_cmd()
329
+ app = self
330
+
331
+ @version_cmd = @app_cmd.define_command() do
332
+ name 'version'
333
+ usage 'version [OPTIONS] [COMMAND]...'
334
+ aliases :v
335
+ summary "Show the version and exit (aliases: #{app.color_alias('v')})"
336
+
337
+ run do |opts,args,cmd|
338
+ app.show_version()
339
+ end
340
+ end
341
+ end
342
+
343
+ def check_empty_opt(key,value)
344
+ value = Util.strip_web_str(value) unless value.nil?()
345
+
346
+ if value.nil?() || value.empty?()
347
+ raise CLIError,"option[#{key}] cannot be empty[#{value}]"
348
+ end
349
+
350
+ return value
351
+ end
352
+
353
+ def check_in_file(opt_key,empty_ok: false)
354
+ in_file = @cmd_opts[opt_key]
355
+
356
+ if Util.empty_web_str?(in_file)
357
+ if !empty_ok
358
+ raise CLIError,"empty input path name[#{in_file}] in option[#{opt_key}]"
359
+ end
360
+
361
+ @cmd_opts[opt_key] = nil # nil is very important for BingScraper.init()!
362
+
363
+ return true
364
+ end
365
+
366
+ in_file = Util.strip_web_str(in_file)
367
+
368
+ if !File.exist?(in_file)
369
+ raise CLIError,"input file[#{in_file}] does not exist for option[#{opt_key}]"
370
+ end
371
+
372
+ if File.directory?(in_file)
373
+ raise CLIError,"input file[#{in_file}] cannot be a directory for option[#{opt_key}]"
374
+ end
375
+
376
+ return true
377
+ end
378
+
379
+ def check_out_dir(opt_key)
380
+ out_dir = @cmd_opts[opt_key]
381
+
382
+ if Util.empty_web_str?(out_dir)
383
+ raise CLIError,"empty output directory[#{out_dir}] in option[#{opt_key}]"
384
+ end
385
+
386
+ out_dir = Util.strip_web_str(out_dir)
387
+
388
+ if File.file?(out_dir)
389
+ raise CLIError,"output directory[#{out_dir}] cannot be a file for option[#{opt_key}]"
390
+ end
391
+
392
+ if @cmd_opts[:dry_run]
393
+ puts 'No changes written (dry run).'
394
+ puts "> #{out_dir}"
395
+ puts
396
+
397
+ return true
398
+ end
399
+
400
+ force = @cmd_opts[:force]
401
+
402
+ if !force && Dir.exist?(out_dir)
403
+ puts 'Warning: output directory already exists!'
404
+ puts ' : Files inside of this directory may be overwritten!'
405
+ puts "> '#{out_dir}'"
406
+
407
+ return false unless @high.agree('Is this okay (yes/no)? ')
408
+ puts
409
+ end
410
+
411
+ if !Dir.exist?(out_dir)
412
+ if !force
413
+ puts 'Output directory does not exist.'
414
+ puts "> '#{out_dir}'"
415
+
416
+ return false unless @high.agree('Create this directory (yes/no)? ')
417
+ end
418
+
419
+ FileUtils.mkdir_p(out_dir,verbose: true)
420
+ puts
421
+ end
422
+
423
+ return true
424
+ end
425
+
426
+ def check_out_file(opt_key)
427
+ out_file = @cmd_opts[opt_key]
428
+
429
+ if Util.empty_web_str?(out_file)
430
+ raise CLIError,"empty output path name[#{out_file}] in option[#{opt_key}]"
431
+ end
432
+
433
+ out_file = Util.strip_web_str(out_file)
434
+
435
+ if File.directory?(out_file)
436
+ raise CLIError,"output file[#{out_file}] cannot be a directory for option[#{opt_key}]"
437
+ end
438
+
439
+ if @cmd_opts[:dry_run]
440
+ puts 'No changes written (dry run).'
441
+ puts "> #{out_file}"
442
+ puts
443
+
444
+ return true
445
+ end
446
+
447
+ force = @cmd_opts[:force]
448
+ out_dir = File.dirname(out_file)
449
+
450
+ if !force && File.exist?(out_file)
451
+ puts 'Warning: output file already exists!'
452
+ puts "> '#{out_file}'"
453
+
454
+ return false unless @high.agree('Overwrite this file (yes/no)? ')
455
+ puts
456
+ end
457
+
458
+ if !Dir.exist?(out_dir)
459
+ if !force
460
+ puts 'Output directory does not exist.'
461
+ puts "> '#{out_dir}'"
462
+
463
+ return false unless @high.agree('Create this directory (yes/no)? ')
464
+ end
465
+
466
+ FileUtils.mkdir_p(out_dir,verbose: true)
467
+ puts
468
+ end
469
+
470
+ return true
471
+ end
472
+
473
+ def color(str)
474
+ return @rainbow.wrap(str)
475
+ end
476
+
477
+ def color_alias(str)
478
+ return color(str).green
479
+ end
480
+
481
+ def disable_color()
482
+ Cri::StringFormatter.prepend(CriStringFormatterExt)
483
+ @rainbow.enabled = false
484
+ end
485
+
486
+ def refresh_cmd(opts,args,cmd)
487
+ new_opts = {}
488
+
489
+ # Change symbols with dashes to underscores,
490
+ # so don't have to type @cmd_opts[:'dry-run'] all the time.
491
+ opts.each() do |key,value|
492
+ key = key.to_s()
493
+ key = key.gsub('-','_')
494
+ key = key.to_sym()
495
+
496
+ new_opts[key] = value
497
+ end
498
+
499
+ @cmd = cmd
500
+ @cmd_args = args
501
+ @cmd_opts = new_opts
502
+
503
+ return self
504
+ end
505
+
506
+ def run()
507
+ @app_cmd.run(@args)
508
+ end
509
+
510
+ def show_version()
511
+ puts "#{NAME} v#{VERSION}"
512
+ end
513
+
514
+ def sleep_scraper()
515
+ sleep(@sleep_time)
516
+ end
517
+
518
+ def start_spin(title,detail: '')
519
+ if @spinner.is_a?(Hash)
520
+ @spinner[:detail] = detail
521
+ @spinner[:title] = title
522
+
523
+ puts (NO_SPINNER_MSG % @spinner)
524
+ else
525
+ @spinner.update(title: title,detail: detail)
526
+ @spinner.auto_spin()
527
+ end
528
+ end
529
+
530
+ def stop_spin()
531
+ if @spinner.is_a?(Hash)
532
+ puts (NO_SPINNER_MSG % @spinner) + ' done!'
533
+ else
534
+ @spinner.reset()
535
+ @spinner.stop('done!')
536
+ end
537
+ end
538
+
539
+ def update_spin_detail(detail)
540
+ if @spinner.is_a?(Hash)
541
+ @spinner[:detail] = detail
542
+
543
+ puts (NO_SPINNER_MSG % @spinner)
544
+ else
545
+ @spinner.tokens[:detail] = detail
546
+ end
547
+ end
548
+ end
549
+
550
+ ###
551
+ # @author Jonathan Bradley Whited (@esotericpig)
552
+ # @since 0.2.0
553
+ ###
554
+ class NoProgressBar
555
+ MSG = '%{title}... %{percent}%%'
556
+ PUT_INTERVAL = 100.0 / 6.25
557
+ MAX_PUT_INTERVAL = 100.0 + PUT_INTERVAL + 1.0
558
+
559
+ def initialize(title,total:,**tokens)
560
+ super()
561
+
562
+ @tokens = {title: title,total: total}
563
+
564
+ reset()
565
+
566
+ @tokens.merge!(tokens)
567
+ end
568
+
569
+ def reset()
570
+ @tokens[:advance] = 0
571
+ @tokens[:percent] = 0
572
+ @tokens[:progress] = 0
573
+ end
574
+
575
+ def advance(progress=1)
576
+ total = @tokens[:total]
577
+ progress = @tokens[:progress] + progress
578
+ progress = total if progress > total
579
+ percent = (progress.to_f() / total.to_f() * 100.0).round()
580
+
581
+ @tokens[:percent] = percent
582
+ @tokens[:progress] = progress
583
+
584
+ if percent < 99.0
585
+ # Only output at certain intervals.
586
+ advance = @tokens[:advance]
587
+ i = 0.0
588
+
589
+ while i <= MAX_PUT_INTERVAL
590
+ if advance < i
591
+ break if percent >= i # Output
592
+ return # Don't output
593
+ end
594
+
595
+ i += PUT_INTERVAL
596
+ end
597
+ end
598
+
599
+ @tokens[:advance] = percent
600
+
601
+ puts to_s()
602
+ end
603
+
604
+ def finish()
605
+ advance(@tokens[:total])
606
+ end
607
+
608
+ def start()
609
+ puts to_s()
610
+ end
611
+
612
+ def to_s()
613
+ return MSG % @tokens
614
+ end
615
+ end
616
+ end