nhkore 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/nhkore/app.rb ADDED
@@ -0,0 +1,616 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'cri'
25
+ require 'highline'
26
+ require 'rainbow'
27
+ require 'tty-progressbar'
28
+ require 'tty-spinner'
29
+
30
+ require 'nhkore/error'
31
+ require 'nhkore/util'
32
+ require 'nhkore/version'
33
+
34
+ require 'nhkore/cli/bing_cmd'
35
+ require 'nhkore/cli/fx_cmd'
36
+ require 'nhkore/cli/get_cmd'
37
+ require 'nhkore/cli/news_cmd'
38
+ require 'nhkore/cli/sift_cmd'
39
+
40
+
41
+ module NHKore
42
+ ###
43
+ # @author Jonathan Bradley Whited (@esotericpig)
44
+ # @since 0.2.0
45
+ ###
46
+ module CLI
47
+ end
48
+
49
+ ###
50
+ # For disabling color output.
51
+ #
52
+ # @author Jonathan Bradley Whited (@esotericpig)
53
+ # @since 0.2.0
54
+ ###
55
+ module CriStringFormatterExt
56
+ def blue(str)
57
+ return str
58
+ end
59
+
60
+ def bold(str)
61
+ return str
62
+ end
63
+
64
+ def green(str)
65
+ return str
66
+ end
67
+
68
+ def red(str)
69
+ return str
70
+ end
71
+
72
+ def yellow(str)
73
+ return str
74
+ end
75
+ end
76
+
77
+ ###
78
+ # @author Jonathan Bradley Whited (@esotericpig)
79
+ # @since 0.2.0
80
+ ###
81
+ class App
82
+ include CLI::BingCmd
83
+ include CLI::FXCmd
84
+ include CLI::GetCmd
85
+ include CLI::NewsCmd
86
+ include CLI::SiftCmd
87
+
88
+ NAME = 'nhkore'
89
+
90
+ SPINNER_MSG = '[:spinner] :title:detail...'
91
+ CLASSIC_SPINNER = TTY::Spinner.new(SPINNER_MSG,format: :classic)
92
+ DEFAULT_SPINNER = TTY::Spinner.new(SPINNER_MSG,interval: 5,
93
+ frames: ['〜〜〜','日〜〜','日本〜','日本語'])
94
+ NO_SPINNER = {} # Still outputs status & stores tokens
95
+ NO_SPINNER_MSG = '%{title}%{detail}...'
96
+
97
+ DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
98
+
99
+ attr_accessor :progress_bar
100
+ attr_accessor :scraper_kargs
101
+ attr_accessor :sleep_time
102
+ attr_accessor :spinner
103
+
104
+ def initialize(args=ARGV)
105
+ super()
106
+
107
+ @args = args
108
+ @cmd = nil
109
+ @cmd_args = nil
110
+ @cmd_opts = nil
111
+ @high = HighLine.new()
112
+ @rainbow = Rainbow.new()
113
+ @progress_bar = :default # [:default, :classic, :no]
114
+ @scraper_kargs = {}
115
+ @sleep_time = DEFAULT_SLEEP_TIME
116
+ @spinner = DEFAULT_SPINNER
117
+
118
+ autodetect_color()
119
+
120
+ build_app_cmd()
121
+
122
+ build_bing_cmd()
123
+ build_fx_cmd()
124
+ build_get_cmd()
125
+ build_news_cmd()
126
+ build_sift_cmd()
127
+ build_version_cmd()
128
+
129
+ @app_cmd.add_command Cri::Command.new_basic_help()
130
+ end
131
+
132
+ def autodetect_color()
133
+ disable = false
134
+
135
+ if !$stdout.tty?() || ENV['TERM'] == 'dumb'
136
+ disable = true
137
+ elsif !@args.empty?()
138
+ # Kind of hacky, but necessary for Rainbow.
139
+
140
+ no_color_args = Set['-C','--no-color']
141
+
142
+ @args.each() do |arg|
143
+ if no_color_args.include?(arg)
144
+ disable = true
145
+ break
146
+ end
147
+
148
+ break if arg == '--'
149
+ end
150
+ end
151
+
152
+ if disable
153
+ disable_color()
154
+ else
155
+ @rainbow.enabled = true # Force it in case Rainbow auto-disabled it
156
+ end
157
+ end
158
+
159
+ def build_app_cmd()
160
+ app = self
161
+
162
+ @app_cmd = Cri::Command.define() do
163
+ name NAME
164
+ usage "#{NAME} [OPTIONS] [COMMAND]..."
165
+ summary 'NHK News Web (Easy) scraper for Japanese language learners.'
166
+
167
+ description <<-EOD
168
+ Scrapes NHK News Web (Easy) to create a list of each word and its
169
+ frequency (how many times it was used) for Japanese language learners.
170
+
171
+ This is similar to a core word/vocabulary list.
172
+ EOD
173
+
174
+ flag :c,:'classic-fx',<<-EOD do |value,cmd|
175
+ use classic spinner/progress special effects (in case of no Unicode support) when running long tasks
176
+ EOD
177
+ app.progress_bar = :classic
178
+ app.spinner = CLASSIC_SPINNER
179
+ end
180
+ flag :n,:'dry-run',<<-EOD
181
+ do a dry run without making changes; do not write to files, create directories, etc.
182
+ EOD
183
+ # Big F because dangerous.
184
+ flag :F,:force,"force overwriting files, creating directories, etc. (don't prompt); dangerous!"
185
+ flag :h,:help,'show this help' do |value,cmd|
186
+ puts cmd.help
187
+ exit
188
+ end
189
+ option :m,:'max-retry',<<-EOD,argument: :required,default: 3 do |value,cmd|
190
+ maximum number of times to retry URLs (-1 or integer >= 0)
191
+ EOD
192
+ value = value.to_i()
193
+ value = nil if value < 0
194
+
195
+ app.scraper_kargs[:max_retries] = value
196
+ end
197
+ flag :C,:'no-color','disable color output' do |value,cmd|
198
+ app.disable_color()
199
+ end
200
+ flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |value,cmd|
201
+ app.progress_bar = :no
202
+ app.spinner = NO_SPINNER
203
+ end
204
+ option :o,:'open-timeout',<<-EOD,argument: :required do |value,cmd|
205
+ seconds for URL open timeouts (-1 or decimal >= 0)
206
+ EOD
207
+ value = value.to_f()
208
+ value = nil if value < 0.0
209
+
210
+ app.scraper_kargs[:open_timeout] = value
211
+ end
212
+ option :r,:'read-timeout',<<-EOD,argument: :required do |value,cmd|
213
+ seconds for URL read timeouts (-1 or decimal >= 0)
214
+ EOD
215
+ value = value.to_f()
216
+ value = nil if value < 0.0
217
+
218
+ app.scraper_kargs[:read_timeout] = value
219
+ end
220
+ option :z,:sleep,<<-EOD,argument: :required,default: DEFAULT_SLEEP_TIME do |value,cmd|
221
+ seconds to sleep per scrape (i.e., per page/article) so don't get banned (i.e., fake being human)
222
+ EOD
223
+ app.sleep_time = value.to_f()
224
+ app.sleep_time = 0.0 if app.sleep_time < 0.0
225
+ end
226
+ option :t,:'timeout',<<-EOD,argument: :required do |value,cmd|
227
+ seconds for all URL timeouts: [open, read] (-1 or decimal >= 0)
228
+ EOD
229
+ value = value.to_f()
230
+ value = nil if value < 0.0
231
+
232
+ app.scraper_kargs[:open_timeout] = value
233
+ app.scraper_kargs[:read_timeout] = value
234
+ end
235
+ # Big V, not small.
236
+ flag :V,:version,'show the version and exit' do |value,cmd|
237
+ app.show_version()
238
+ exit
239
+ end
240
+
241
+ run do |opts,args,cmd|
242
+ puts cmd.help
243
+ end
244
+ end
245
+ end
246
+
247
+ def build_dir(opt_key,default_dir: '.')
248
+ # Protect against fat-fingering.
249
+ default_dir = Util.strip_web_str(default_dir)
250
+ dir = Util.strip_web_str(@cmd_opts[opt_key].to_s())
251
+
252
+ dir = default_dir if dir.empty?()
253
+
254
+ # '~' will expand to home, etc.
255
+ dir = File.expand_path(dir) unless dir.nil?()
256
+
257
+ return (@cmd_opts[opt_key] = dir)
258
+ end
259
+
260
+ def build_file(opt_key,default_dir: '.',default_filename: '')
261
+ # Protect against fat-fingering.
262
+ default_dir = Util.strip_web_str(default_dir)
263
+ default_filename = Util.strip_web_str(default_filename)
264
+ file = Util.strip_web_str(@cmd_opts[opt_key].to_s())
265
+
266
+ if file.empty?()
267
+ # Do not check default_dir.empty?().
268
+ if default_filename.empty?()
269
+ file = nil # nil is very important for BingScraper.init()!
270
+ else
271
+ file = File.join(default_dir,default_filename)
272
+ end
273
+ else
274
+ # Directory?
275
+ if File.directory?(file) || Util.dir_str?(file)
276
+ file = File.join(file,default_filename)
277
+ # File name only? (no directory)
278
+ elsif Util.filename_str?(file)
279
+ file = File.join(default_dir,file)
280
+ end
281
+ # Else, passed in both: 'directory/file'
282
+ end
283
+
284
+ # '~' will expand to home, etc.
285
+ file = File.expand_path(file) unless file.nil?()
286
+
287
+ return (@cmd_opts[opt_key] = file)
288
+ end
289
+
290
+ def build_in_dir(opt_key,**kargs)
291
+ return build_dir(opt_key,**kargs)
292
+ end
293
+
294
+ def build_in_file(opt_key,**kargs)
295
+ return build_file(opt_key,**kargs)
296
+ end
297
+
298
+ def build_out_dir(opt_key,**kargs)
299
+ return build_dir(opt_key,**kargs)
300
+ end
301
+
302
+ def build_out_file(opt_key,**kargs)
303
+ return build_file(opt_key,**kargs)
304
+ end
305
+
306
+ def build_progress_bar(title,download: false,total: 100,type: @progress_bar,width: 33,**kargs)
307
+ case type
308
+ when :default,:classic
309
+ msg = "#{title} [:bar] :percent :eta".dup()
310
+ msg << ' :byte_rate/s' if download
311
+
312
+ return TTY::ProgressBar.new(msg,total: total,width: width,**kargs) do |config|
313
+ if type == :default
314
+ config.incomplete = '.'
315
+ config.complete = '/'
316
+ config.head = 'o'
317
+ end
318
+
319
+ #config.frequency = 5 # For a big download, set this
320
+ config.interval = 1 if download
321
+ end
322
+ end
323
+
324
+ # :no
325
+ return NoProgressBar.new(title,total: total,**kargs)
326
+ end
327
+
328
+ def build_version_cmd()
329
+ app = self
330
+
331
+ @version_cmd = @app_cmd.define_command() do
332
+ name 'version'
333
+ usage 'version [OPTIONS] [COMMAND]...'
334
+ aliases :v
335
+ summary "Show the version and exit (aliases: #{app.color_alias('v')})"
336
+
337
+ run do |opts,args,cmd|
338
+ app.show_version()
339
+ end
340
+ end
341
+ end
342
+
343
+ def check_empty_opt(key,value)
344
+ value = Util.strip_web_str(value) unless value.nil?()
345
+
346
+ if value.nil?() || value.empty?()
347
+ raise CLIError,"option[#{key}] cannot be empty[#{value}]"
348
+ end
349
+
350
+ return value
351
+ end
352
+
353
+ def check_in_file(opt_key,empty_ok: false)
354
+ in_file = @cmd_opts[opt_key]
355
+
356
+ if Util.empty_web_str?(in_file)
357
+ if !empty_ok
358
+ raise CLIError,"empty input path name[#{in_file}] in option[#{opt_key}]"
359
+ end
360
+
361
+ @cmd_opts[opt_key] = nil # nil is very important for BingScraper.init()!
362
+
363
+ return true
364
+ end
365
+
366
+ in_file = Util.strip_web_str(in_file)
367
+
368
+ if !File.exist?(in_file)
369
+ raise CLIError,"input file[#{in_file}] does not exist for option[#{opt_key}]"
370
+ end
371
+
372
+ if File.directory?(in_file)
373
+ raise CLIError,"input file[#{in_file}] cannot be a directory for option[#{opt_key}]"
374
+ end
375
+
376
+ return true
377
+ end
378
+
379
+ def check_out_dir(opt_key)
380
+ out_dir = @cmd_opts[opt_key]
381
+
382
+ if Util.empty_web_str?(out_dir)
383
+ raise CLIError,"empty output directory[#{out_dir}] in option[#{opt_key}]"
384
+ end
385
+
386
+ out_dir = Util.strip_web_str(out_dir)
387
+
388
+ if File.file?(out_dir)
389
+ raise CLIError,"output directory[#{out_dir}] cannot be a file for option[#{opt_key}]"
390
+ end
391
+
392
+ if @cmd_opts[:dry_run]
393
+ puts 'No changes written (dry run).'
394
+ puts "> #{out_dir}"
395
+ puts
396
+
397
+ return true
398
+ end
399
+
400
+ force = @cmd_opts[:force]
401
+
402
+ if !force && Dir.exist?(out_dir)
403
+ puts 'Warning: output directory already exists!'
404
+ puts ' : Files inside of this directory may be overwritten!'
405
+ puts "> '#{out_dir}'"
406
+
407
+ return false unless @high.agree('Is this okay (yes/no)? ')
408
+ puts
409
+ end
410
+
411
+ if !Dir.exist?(out_dir)
412
+ if !force
413
+ puts 'Output directory does not exist.'
414
+ puts "> '#{out_dir}'"
415
+
416
+ return false unless @high.agree('Create this directory (yes/no)? ')
417
+ end
418
+
419
+ FileUtils.mkdir_p(out_dir,verbose: true)
420
+ puts
421
+ end
422
+
423
+ return true
424
+ end
425
+
426
+ def check_out_file(opt_key)
427
+ out_file = @cmd_opts[opt_key]
428
+
429
+ if Util.empty_web_str?(out_file)
430
+ raise CLIError,"empty output path name[#{out_file}] in option[#{opt_key}]"
431
+ end
432
+
433
+ out_file = Util.strip_web_str(out_file)
434
+
435
+ if File.directory?(out_file)
436
+ raise CLIError,"output file[#{out_file}] cannot be a directory for option[#{opt_key}]"
437
+ end
438
+
439
+ if @cmd_opts[:dry_run]
440
+ puts 'No changes written (dry run).'
441
+ puts "> #{out_file}"
442
+ puts
443
+
444
+ return true
445
+ end
446
+
447
+ force = @cmd_opts[:force]
448
+ out_dir = File.dirname(out_file)
449
+
450
+ if !force && File.exist?(out_file)
451
+ puts 'Warning: output file already exists!'
452
+ puts "> '#{out_file}'"
453
+
454
+ return false unless @high.agree('Overwrite this file (yes/no)? ')
455
+ puts
456
+ end
457
+
458
+ if !Dir.exist?(out_dir)
459
+ if !force
460
+ puts 'Output directory does not exist.'
461
+ puts "> '#{out_dir}'"
462
+
463
+ return false unless @high.agree('Create this directory (yes/no)? ')
464
+ end
465
+
466
+ FileUtils.mkdir_p(out_dir,verbose: true)
467
+ puts
468
+ end
469
+
470
+ return true
471
+ end
472
+
473
+ def color(str)
474
+ return @rainbow.wrap(str)
475
+ end
476
+
477
+ def color_alias(str)
478
+ return color(str).green
479
+ end
480
+
481
+ def disable_color()
482
+ Cri::StringFormatter.prepend(CriStringFormatterExt)
483
+ @rainbow.enabled = false
484
+ end
485
+
486
+ def refresh_cmd(opts,args,cmd)
487
+ new_opts = {}
488
+
489
+ # Change symbols with dashes to underscores,
490
+ # so don't have to type @cmd_opts[:'dry-run'] all the time.
491
+ opts.each() do |key,value|
492
+ key = key.to_s()
493
+ key = key.gsub('-','_')
494
+ key = key.to_sym()
495
+
496
+ new_opts[key] = value
497
+ end
498
+
499
+ @cmd = cmd
500
+ @cmd_args = args
501
+ @cmd_opts = new_opts
502
+
503
+ return self
504
+ end
505
+
506
+ def run()
507
+ @app_cmd.run(@args)
508
+ end
509
+
510
+ def show_version()
511
+ puts "#{NAME} v#{VERSION}"
512
+ end
513
+
514
+ def sleep_scraper()
515
+ sleep(@sleep_time)
516
+ end
517
+
518
+ def start_spin(title,detail: '')
519
+ if @spinner.is_a?(Hash)
520
+ @spinner[:detail] = detail
521
+ @spinner[:title] = title
522
+
523
+ puts (NO_SPINNER_MSG % @spinner)
524
+ else
525
+ @spinner.update(title: title,detail: detail)
526
+ @spinner.auto_spin()
527
+ end
528
+ end
529
+
530
+ def stop_spin()
531
+ if @spinner.is_a?(Hash)
532
+ puts (NO_SPINNER_MSG % @spinner) + ' done!'
533
+ else
534
+ @spinner.reset()
535
+ @spinner.stop('done!')
536
+ end
537
+ end
538
+
539
+ def update_spin_detail(detail)
540
+ if @spinner.is_a?(Hash)
541
+ @spinner[:detail] = detail
542
+
543
+ puts (NO_SPINNER_MSG % @spinner)
544
+ else
545
+ @spinner.tokens[:detail] = detail
546
+ end
547
+ end
548
+ end
549
+
550
+ ###
551
+ # @author Jonathan Bradley Whited (@esotericpig)
552
+ # @since 0.2.0
553
+ ###
554
+ class NoProgressBar
555
+ MSG = '%{title}... %{percent}%%'
556
+ PUT_INTERVAL = 100.0 / 6.25
557
+ MAX_PUT_INTERVAL = 100.0 + PUT_INTERVAL + 1.0
558
+
559
+ def initialize(title,total:,**tokens)
560
+ super()
561
+
562
+ @tokens = {title: title,total: total}
563
+
564
+ reset()
565
+
566
+ @tokens.merge!(tokens)
567
+ end
568
+
569
+ def reset()
570
+ @tokens[:advance] = 0
571
+ @tokens[:percent] = 0
572
+ @tokens[:progress] = 0
573
+ end
574
+
575
+ def advance(progress=1)
576
+ total = @tokens[:total]
577
+ progress = @tokens[:progress] + progress
578
+ progress = total if progress > total
579
+ percent = (progress.to_f() / total.to_f() * 100.0).round()
580
+
581
+ @tokens[:percent] = percent
582
+ @tokens[:progress] = progress
583
+
584
+ if percent < 99.0
585
+ # Only output at certain intervals.
586
+ advance = @tokens[:advance]
587
+ i = 0.0
588
+
589
+ while i <= MAX_PUT_INTERVAL
590
+ if advance < i
591
+ break if percent >= i # Output
592
+ return # Don't output
593
+ end
594
+
595
+ i += PUT_INTERVAL
596
+ end
597
+ end
598
+
599
+ @tokens[:advance] = percent
600
+
601
+ puts to_s()
602
+ end
603
+
604
+ def finish()
605
+ advance(@tokens[:total])
606
+ end
607
+
608
+ def start()
609
+ puts to_s()
610
+ end
611
+
612
+ def to_s()
613
+ return MSG % @tokens
614
+ end
615
+ end
616
+ end