nhkore 0.3.3 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +97 -2
- data/Gemfile +0 -18
- data/Gemfile.lock +89 -0
- data/README.md +58 -30
- data/Rakefile +68 -42
- data/bin/nhkore +4 -15
- data/lib/nhkore.rb +8 -20
- data/lib/nhkore/app.rb +231 -236
- data/lib/nhkore/article.rb +56 -53
- data/lib/nhkore/article_scraper.rb +308 -289
- data/lib/nhkore/cleaner.rb +20 -32
- data/lib/nhkore/cli/fx_cmd.rb +41 -53
- data/lib/nhkore/cli/get_cmd.rb +59 -70
- data/lib/nhkore/cli/news_cmd.rb +145 -154
- data/lib/nhkore/cli/search_cmd.rb +110 -120
- data/lib/nhkore/cli/sift_cmd.rb +111 -227
- data/lib/nhkore/datetime_parser.rb +328 -0
- data/lib/nhkore/defn.rb +48 -55
- data/lib/nhkore/dict.rb +26 -38
- data/lib/nhkore/dict_scraper.rb +31 -40
- data/lib/nhkore/entry.rb +43 -55
- data/lib/nhkore/error.rb +16 -21
- data/lib/nhkore/fileable.rb +10 -21
- data/lib/nhkore/lib.rb +6 -17
- data/lib/nhkore/missingno.rb +21 -33
- data/lib/nhkore/news.rb +61 -66
- data/lib/nhkore/polisher.rb +22 -34
- data/lib/nhkore/scraper.rb +75 -82
- data/lib/nhkore/search_link.rb +85 -78
- data/lib/nhkore/search_scraper.rb +89 -92
- data/lib/nhkore/sifter.rb +157 -171
- data/lib/nhkore/splitter.rb +19 -31
- data/lib/nhkore/user_agents.rb +28 -32
- data/lib/nhkore/util.rb +72 -101
- data/lib/nhkore/variator.rb +20 -32
- data/lib/nhkore/version.rb +4 -16
- data/lib/nhkore/word.rb +105 -99
- data/nhkore.gemspec +58 -65
- data/samples/looper.rb +71 -0
- data/test/nhkore/test_helper.rb +3 -15
- data/test/nhkore_test.rb +6 -18
- metadata +53 -30
data/Rakefile
CHANGED
@@ -1,24 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
#--
|
5
|
-
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
7
|
-
#
|
8
|
-
# NHKore is free software: you can redistribute it and/or modify
|
9
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
10
|
-
# the Free Software Foundation, either version 3 of the License, or
|
11
|
-
# (at your option) any later version.
|
12
|
-
#
|
13
|
-
# NHKore is distributed in the hope that it will be useful,
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
-
# GNU Lesser General Public License for more details.
|
17
|
-
#
|
18
|
-
# You should have received a copy of the GNU Lesser General Public License
|
19
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
20
|
-
#++
|
21
|
-
|
22
4
|
|
23
5
|
require 'bundler/gem_tasks'
|
24
6
|
|
@@ -35,66 +17,110 @@ require 'nhkore/version'
|
|
35
17
|
|
36
18
|
PKG_DIR = 'pkg'
|
37
19
|
|
38
|
-
CLEAN.exclude('.git
|
20
|
+
CLEAN.exclude('{.git,core,stock}/**/*')
|
39
21
|
CLOBBER.include('doc/',File.join(PKG_DIR,''))
|
40
22
|
|
41
23
|
|
42
24
|
task default: [:test]
|
43
25
|
|
44
26
|
desc 'Generate documentation (YARDoc)'
|
45
|
-
task :
|
27
|
+
task doc: %i[yard yard_gfm_fix] do |task|
|
46
28
|
end
|
47
29
|
|
48
30
|
desc "Package '#{File.join(NHKore::Util::CORE_DIR,'')}' data as a Zip file into '#{File.join(PKG_DIR,'')}'"
|
49
31
|
task :pkg_core do |task|
|
50
32
|
mkdir_p PKG_DIR
|
51
|
-
|
33
|
+
|
52
34
|
pattern = File.join(NHKore::Util::CORE_DIR,'*.{csv,html,json,yml}')
|
53
35
|
zip_file = File.join(PKG_DIR,'nhkore-core.zip')
|
54
|
-
|
55
|
-
sh 'zip','-9rv',zip_file,*Dir.glob(pattern).sort
|
36
|
+
|
37
|
+
sh 'zip','-9rv',zip_file,*Dir.glob(pattern).sort
|
56
38
|
end
|
57
39
|
|
58
|
-
Rake::TestTask.new
|
40
|
+
Rake::TestTask.new do |task|
|
59
41
|
task.libs = ['lib','test']
|
60
42
|
task.pattern = File.join('test','**','*_test.rb')
|
61
43
|
task.description += ": '#{task.pattern}'"
|
62
|
-
task.verbose =
|
44
|
+
task.verbose = false
|
63
45
|
task.warning = true
|
64
46
|
end
|
65
47
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
48
|
+
# If you need to run a part after the 1st part,
|
49
|
+
# just type 'n' to not overwrite the file and then 'y' for continue.
|
50
|
+
desc "Update '#{File.join(NHKore::Util::CORE_DIR,'')}' files for release"
|
51
|
+
task :update_core do |task|
|
52
|
+
require 'highline'
|
53
|
+
|
54
|
+
continue_msg = "\nContinue (y/n)? "
|
55
|
+
|
56
|
+
cmd = ['ruby','-w','./lib/nhkore.rb','-t','300','-m','10']
|
57
|
+
hl = HighLine.new
|
58
|
+
|
59
|
+
next unless sh(*cmd,'se','ez','bing')
|
60
|
+
next unless hl.agree(continue_msg)
|
61
|
+
puts
|
62
|
+
|
63
|
+
next unless sh(*cmd,'news','-s','500','ez')
|
64
|
+
next unless hl.agree(continue_msg)
|
65
|
+
puts
|
66
|
+
|
67
|
+
next unless sh(*cmd,'sift','-e','csv' ,'ez')
|
68
|
+
puts
|
69
|
+
next unless sh(*cmd,'sift','-e','html','ez')
|
70
|
+
puts
|
71
|
+
next unless sh(*cmd,'sift','-e','json','ez')
|
72
|
+
puts
|
73
|
+
next unless sh(*cmd,'sift','-e','yml' ,'ez')
|
74
|
+
puts
|
75
|
+
end
|
76
|
+
|
77
|
+
# @since 0.3.6
|
78
|
+
desc 'Update showcase file for release'
|
79
|
+
task :update_showcase do |task|
|
80
|
+
require 'highline'
|
81
|
+
|
82
|
+
showcase_file = File.join('.','nhkore-ez.html')
|
83
|
+
|
84
|
+
hl = HighLine.new
|
85
|
+
|
86
|
+
next unless sh('ruby','-w','./lib/nhkore.rb',
|
87
|
+
'sift','ez','--no-eng',
|
88
|
+
'--out',showcase_file,
|
89
|
+
)
|
90
|
+
|
91
|
+
next unless hl.agree("\nMove the file (y/n)? ")
|
92
|
+
puts
|
93
|
+
next unless sh('mv','-iv',showcase_file,
|
94
|
+
File.join('..','esotericpig.github.io','showcase',''),
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
98
|
+
YARD::Rake::YardocTask.new do |task|
|
73
99
|
task.options += ['--template-path',File.join('yard','templates')]
|
74
100
|
task.options += ['--title',"NHKore v#{NHKore::VERSION} Doc"]
|
75
101
|
end
|
76
102
|
|
77
103
|
# Execute "rake yard_gfm_fix" for production.
|
78
104
|
# Execute "rake yard_gfm_fix[true]" for testing locally.
|
79
|
-
YardGhurt::GFMFixTask.new
|
105
|
+
YardGhurt::GFMFixTask.new do |task|
|
80
106
|
task.arg_names = [:dev]
|
81
107
|
task.dry_run = false
|
82
108
|
task.fix_code_langs = true
|
83
109
|
task.md_files = ['index.html']
|
84
|
-
|
85
|
-
task.before =
|
110
|
+
|
111
|
+
task.before = proc do |t,args|
|
86
112
|
# Delete this file as it's never used (index.html is an exact copy).
|
87
|
-
YardGhurt::Util.rm_exist(File.join(
|
88
|
-
|
113
|
+
YardGhurt::Util.rm_exist(File.join(t.doc_dir,'file.README.html'))
|
114
|
+
|
89
115
|
# Root dir of my GitHub Page for CSS/JS.
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
116
|
+
ghp_root = YardGhurt::Util.to_bool(args.dev) ? '../../esotericpig.github.io' : '../../..'
|
117
|
+
|
118
|
+
t.css_styles << %Q(<link rel="stylesheet" type="text/css" href="#{ghp_root}/css/prism.css" />)
|
119
|
+
t.js_scripts << %Q(<script src="#{ghp_root}/js/prism.js"></script>)
|
94
120
|
end
|
95
121
|
end
|
96
122
|
|
97
123
|
# Probably not useful for others.
|
98
|
-
YardGhurt::GHPSyncTask.new
|
124
|
+
YardGhurt::GHPSyncTask.new do |task|
|
99
125
|
task.ghp_dir = '../esotericpig.github.io/docs/nhkore/yardoc'
|
100
126
|
end
|
data/bin/nhkore
CHANGED
@@ -4,24 +4,13 @@
|
|
4
4
|
|
5
5
|
#--
|
6
6
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
7
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
8
|
+
#
|
9
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
10
|
#++
|
22
11
|
|
23
12
|
|
24
13
|
require 'nhkore'
|
25
14
|
|
26
15
|
|
27
|
-
NHKore.run
|
16
|
+
NHKore.run
|
data/lib/nhkore.rb
CHANGED
@@ -1,27 +1,15 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
TESTING = ($
|
12
|
+
TESTING = ($PROGRAM_NAME == __FILE__)
|
25
13
|
|
26
14
|
if TESTING
|
27
15
|
require 'rubygems'
|
@@ -39,16 +27,16 @@ require 'nhkore/cli/sift_cmd'
|
|
39
27
|
|
40
28
|
|
41
29
|
###
|
42
|
-
# @author Jonathan Bradley Whited
|
30
|
+
# @author Jonathan Bradley Whited
|
43
31
|
# @since 0.1.0
|
44
32
|
###
|
45
33
|
module NHKore
|
46
34
|
# @since 0.2.0
|
47
35
|
def self.run(args=ARGV)
|
48
36
|
app = App.new(args)
|
49
|
-
|
37
|
+
|
50
38
|
begin
|
51
|
-
app.run
|
39
|
+
app.run
|
52
40
|
rescue CLIError => e
|
53
41
|
puts "Error: #{e}"
|
54
42
|
exit 1
|
@@ -56,4 +44,4 @@ module NHKore
|
|
56
44
|
end
|
57
45
|
end
|
58
46
|
|
59
|
-
NHKore.run
|
47
|
+
NHKore.run if TESTING
|
data/lib/nhkore/app.rb
CHANGED
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -40,32 +28,32 @@ require 'nhkore/cli/sift_cmd'
|
|
40
28
|
|
41
29
|
module NHKore
|
42
30
|
###
|
43
|
-
# @author Jonathan Bradley Whited
|
31
|
+
# @author Jonathan Bradley Whited
|
44
32
|
# @since 0.2.0
|
45
33
|
###
|
46
34
|
module CLI
|
47
35
|
end
|
48
|
-
|
36
|
+
|
49
37
|
###
|
50
38
|
# For disabling/enabling color output.
|
51
|
-
#
|
52
|
-
# @author Jonathan Bradley Whited
|
39
|
+
#
|
40
|
+
# @author Jonathan Bradley Whited
|
53
41
|
# @since 0.2.1
|
54
42
|
###
|
55
43
|
module CriColorExt
|
56
|
-
|
57
|
-
|
44
|
+
@color = true
|
45
|
+
|
58
46
|
def color=(color)
|
59
|
-
|
47
|
+
@color = color
|
60
48
|
end
|
61
|
-
|
49
|
+
|
62
50
|
def color?(io)
|
63
|
-
return
|
51
|
+
return @color
|
64
52
|
end
|
65
53
|
end
|
66
|
-
|
54
|
+
|
67
55
|
###
|
68
|
-
# @author Jonathan Bradley Whited
|
56
|
+
# @author Jonathan Bradley Whited
|
69
57
|
# @since 0.2.0
|
70
58
|
###
|
71
59
|
class App
|
@@ -74,21 +62,20 @@ module NHKore
|
|
74
62
|
include CLI::NewsCmd
|
75
63
|
include CLI::SearchCmd
|
76
64
|
include CLI::SiftCmd
|
77
|
-
|
65
|
+
|
78
66
|
NAME = 'nhkore'
|
79
|
-
|
67
|
+
|
80
68
|
DEFAULT_SLEEP_TIME = 0.1 # So that sites don't ban us (i.e., think we are human)
|
81
|
-
|
82
|
-
COLOR_OPTS = [
|
83
|
-
NO_COLOR_OPTS = [
|
84
|
-
|
69
|
+
|
70
|
+
COLOR_OPTS = %i[c color].freeze
|
71
|
+
NO_COLOR_OPTS = %i[C no-color].freeze
|
72
|
+
|
85
73
|
SPINNER_MSG = '[:spinner] :title:detail...'
|
86
74
|
CLASSIC_SPINNER = TTY::Spinner.new(SPINNER_MSG,format: :classic)
|
87
75
|
DEFAULT_SPINNER = TTY::Spinner.new(SPINNER_MSG,interval: 5,
|
88
76
|
frames: ['〜〜〜','日〜〜','日本〜','日本語'])
|
89
|
-
NO_SPINNER = {} # Still outputs status & stores tokens
|
90
77
|
NO_SPINNER_MSG = '%{title}%{detail}...'
|
91
|
-
|
78
|
+
|
92
79
|
attr_reader :cmd
|
93
80
|
attr_reader :cmd_args
|
94
81
|
attr_reader :cmd_opts
|
@@ -96,107 +83,108 @@ module NHKore
|
|
96
83
|
attr_accessor :scraper_kargs
|
97
84
|
attr_accessor :sleep_time
|
98
85
|
attr_accessor :spinner
|
99
|
-
|
86
|
+
|
100
87
|
def initialize(args=ARGV)
|
101
88
|
super()
|
102
|
-
|
89
|
+
|
103
90
|
@args = args
|
104
91
|
@cmd = nil
|
105
92
|
@cmd_args = nil
|
106
93
|
@cmd_opts = nil
|
107
|
-
@high = HighLine.new
|
108
|
-
@rainbow = Rainbow.new
|
94
|
+
@high = HighLine.new
|
95
|
+
@rainbow = Rainbow.new
|
109
96
|
@progress_bar = :default # [:default, :classic, :no]
|
110
97
|
@scraper_kargs = {}
|
111
98
|
@sleep_time = DEFAULT_SLEEP_TIME
|
112
99
|
@spinner = DEFAULT_SPINNER
|
113
|
-
|
114
|
-
autodetect_color
|
115
|
-
|
116
|
-
build_app_cmd
|
117
|
-
|
118
|
-
build_fx_cmd
|
119
|
-
build_get_cmd
|
120
|
-
build_news_cmd
|
121
|
-
build_search_cmd
|
122
|
-
build_sift_cmd
|
123
|
-
build_version_cmd
|
124
|
-
|
125
|
-
@app_cmd.add_command Cri::Command.new_basic_help
|
126
|
-
end
|
127
|
-
|
128
|
-
def autodetect_color
|
100
|
+
|
101
|
+
autodetect_color
|
102
|
+
|
103
|
+
build_app_cmd
|
104
|
+
|
105
|
+
build_fx_cmd
|
106
|
+
build_get_cmd
|
107
|
+
build_news_cmd
|
108
|
+
build_search_cmd
|
109
|
+
build_sift_cmd
|
110
|
+
build_version_cmd
|
111
|
+
|
112
|
+
@app_cmd.add_command Cri::Command.new_basic_help
|
113
|
+
end
|
114
|
+
|
115
|
+
def autodetect_color
|
129
116
|
Cri::Platform.singleton_class.prepend(CriColorExt)
|
130
|
-
|
117
|
+
|
131
118
|
color = nil # Must be nil, not true/false
|
132
|
-
|
133
|
-
if !@args.empty?
|
119
|
+
|
120
|
+
if !@args.empty?
|
134
121
|
# Kind of hacky, but necessary for Rainbow.
|
135
|
-
|
122
|
+
|
136
123
|
color_opts = opts_to_set(COLOR_OPTS)
|
137
124
|
no_color_opts = opts_to_set(NO_COLOR_OPTS)
|
138
|
-
|
139
|
-
@args.each
|
125
|
+
|
126
|
+
@args.each do |arg|
|
140
127
|
if color_opts.include?(arg)
|
141
128
|
color = true
|
142
129
|
break
|
143
130
|
end
|
144
|
-
|
131
|
+
|
145
132
|
if no_color_opts.include?(arg)
|
146
133
|
color = false
|
147
134
|
break
|
148
135
|
end
|
149
|
-
|
136
|
+
|
150
137
|
break if arg == '--'
|
151
138
|
end
|
152
139
|
end
|
153
|
-
|
154
|
-
if color.nil?
|
155
|
-
|
140
|
+
|
141
|
+
if color.nil?
|
142
|
+
# - https://no-color.org/
|
143
|
+
color = ($stdout.tty? && ENV['TERM'] != 'dumb' && !ENV.key?('NO_COLOR'))
|
156
144
|
end
|
157
|
-
|
145
|
+
|
158
146
|
enable_color(color)
|
159
147
|
end
|
160
|
-
|
161
|
-
def build_app_cmd
|
148
|
+
|
149
|
+
def build_app_cmd
|
162
150
|
app = self
|
163
|
-
|
164
|
-
@app_cmd = Cri::Command.define
|
151
|
+
|
152
|
+
@app_cmd = Cri::Command.define do
|
165
153
|
name NAME
|
166
154
|
usage "#{NAME} [OPTIONS] [COMMAND]..."
|
167
155
|
summary 'NHK News Web (Easy) scraper for Japanese language learners.'
|
168
|
-
|
169
|
-
description <<-
|
156
|
+
|
157
|
+
description <<-DESC
|
170
158
|
Scrapes NHK News Web (Easy) to create a list of each word and its
|
171
159
|
frequency (how many times it was used) for Japanese language learners.
|
172
|
-
|
160
|
+
|
173
161
|
This is similar to a core word/vocabulary list.
|
174
|
-
|
175
|
-
|
176
|
-
flag :s,:'classic-fx',<<-
|
162
|
+
DESC
|
163
|
+
|
164
|
+
flag :s,:'classic-fx',<<-DESC do |value,cmd|
|
177
165
|
use classic spinner/progress special effects (in case of no Unicode support) when running long tasks
|
178
|
-
|
166
|
+
DESC
|
179
167
|
app.progress_bar = :classic
|
180
168
|
app.spinner = CLASSIC_SPINNER
|
181
169
|
end
|
182
|
-
flag COLOR_OPTS[0],COLOR_OPTS[1]
|
170
|
+
flag COLOR_OPTS[0],COLOR_OPTS[1],"force color output (for commands like '| less -R')" do |value,cmd|
|
183
171
|
app.enable_color(true)
|
184
172
|
end
|
185
|
-
flag :n,:'dry-run',<<-
|
173
|
+
flag :n,:'dry-run',<<-DESC
|
186
174
|
do a dry run without making changes; do not write to files, create directories, etc.
|
187
|
-
|
175
|
+
DESC
|
188
176
|
# Big F because dangerous.
|
189
177
|
flag :F,:force,"force overwriting files, creating directories, etc. (don't prompt); dangerous!"
|
190
178
|
flag :h,:help,'show this help' do |value,cmd|
|
191
179
|
puts cmd.help
|
192
180
|
exit
|
193
181
|
end
|
194
|
-
option :m,:'max-retry',<<-
|
182
|
+
option :m,:'max-retry',<<-DESC,argument: :required,default: 3 do |value,cmd|
|
195
183
|
maximum number of times to retry URLs (-1 or integer >= 0)
|
196
|
-
|
197
|
-
value = value.to_i
|
184
|
+
DESC
|
185
|
+
value = value.to_i
|
198
186
|
value = nil if value < 0
|
199
|
-
|
187
|
+
|
200
188
|
app.scraper_kargs[:max_retries] = value
|
201
189
|
end
|
202
190
|
flag NO_COLOR_OPTS[0],NO_COLOR_OPTS[1],'disable color output' do |value,cmd|
|
@@ -204,81 +192,80 @@ module NHKore
|
|
204
192
|
end
|
205
193
|
flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |value,cmd|
|
206
194
|
app.progress_bar = :no
|
207
|
-
app.spinner =
|
195
|
+
app.spinner = {} # Still outputs status & stores tokens
|
208
196
|
end
|
209
|
-
option :o,:'open-timeout',<<-
|
197
|
+
option :o,:'open-timeout',<<-DESC,argument: :required do |value,cmd|
|
210
198
|
seconds for URL open timeouts (-1 or decimal >= 0)
|
211
|
-
|
212
|
-
value = value.to_f
|
199
|
+
DESC
|
200
|
+
value = value.to_f
|
213
201
|
value = nil if value < 0.0
|
214
|
-
|
202
|
+
|
215
203
|
app.scraper_kargs[:open_timeout] = value
|
216
204
|
end
|
217
|
-
option :r,:'read-timeout',<<-
|
205
|
+
option :r,:'read-timeout',<<-DESC,argument: :required do |value,cmd|
|
218
206
|
seconds for URL read timeouts (-1 or decimal >= 0)
|
219
|
-
|
220
|
-
value = value.to_f
|
207
|
+
DESC
|
208
|
+
value = value.to_f
|
221
209
|
value = nil if value < 0.0
|
222
|
-
|
210
|
+
|
223
211
|
app.scraper_kargs[:read_timeout] = value
|
224
212
|
end
|
225
|
-
option :z,:sleep,<<-
|
213
|
+
option :z,:sleep,<<-DESC,argument: :required,default: DEFAULT_SLEEP_TIME do |value,cmd|
|
226
214
|
seconds to sleep per scrape (i.e., per page/article) so don't get banned (i.e., fake being human)
|
227
|
-
|
228
|
-
app.sleep_time = value.to_f
|
215
|
+
DESC
|
216
|
+
app.sleep_time = value.to_f
|
229
217
|
app.sleep_time = 0.0 if app.sleep_time < 0.0
|
230
218
|
end
|
231
|
-
option :t,:timeout,<<-
|
219
|
+
option :t,:timeout,<<-DESC,argument: :required do |value,cmd|
|
232
220
|
seconds for all URL timeouts: [open, read] (-1 or decimal >= 0)
|
233
|
-
|
234
|
-
value = value.to_f
|
221
|
+
DESC
|
222
|
+
value = value.to_f
|
235
223
|
value = nil if value < 0.0
|
236
|
-
|
224
|
+
|
237
225
|
app.scraper_kargs[:open_timeout] = value
|
238
226
|
app.scraper_kargs[:read_timeout] = value
|
239
227
|
end
|
240
|
-
option :u,:'user-agent',<<-
|
228
|
+
option :u,:'user-agent',<<-DESC,argument: :required do |value,cmd|
|
241
229
|
HTTP header field 'User-Agent' to use instead of a random one
|
242
|
-
|
230
|
+
DESC
|
243
231
|
value = app.check_empty_opt(:'user-agent',value)
|
244
|
-
|
232
|
+
|
245
233
|
app.scraper_kargs[:header] ||= {}
|
246
234
|
app.scraper_kargs[:header]['user-agent'] = value
|
247
235
|
end
|
248
|
-
|
249
|
-
|
250
|
-
app.show_version()
|
236
|
+
flag :v,:version,'show the version and exit' do |value,cmd|
|
237
|
+
app.show_version
|
251
238
|
exit
|
252
239
|
end
|
253
|
-
|
240
|
+
|
254
241
|
run do |opts,args,cmd|
|
255
242
|
puts cmd.help
|
256
243
|
end
|
257
244
|
end
|
258
245
|
end
|
259
|
-
|
246
|
+
|
260
247
|
def build_dir(opt_key,default_dir: '.')
|
261
248
|
# Protect against fat-fingering.
|
262
249
|
default_dir = Util.strip_web_str(default_dir)
|
263
|
-
dir = Util.strip_web_str(@cmd_opts[opt_key].to_s
|
264
|
-
|
265
|
-
dir = default_dir if dir.empty?
|
266
|
-
|
250
|
+
dir = Util.strip_web_str(@cmd_opts[opt_key].to_s)
|
251
|
+
|
252
|
+
dir = default_dir if dir.empty?
|
253
|
+
|
267
254
|
# '~' will expand to home, etc.
|
268
|
-
dir = File.expand_path(dir) unless dir.nil?
|
269
|
-
|
255
|
+
dir = File.expand_path(dir) unless dir.nil?
|
256
|
+
|
270
257
|
return (@cmd_opts[opt_key] = dir)
|
271
258
|
end
|
272
|
-
|
259
|
+
|
273
260
|
def build_file(opt_key,default_dir: '.',default_filename: '')
|
274
261
|
# Protect against fat-fingering.
|
275
262
|
default_dir = Util.strip_web_str(default_dir)
|
276
263
|
default_filename = Util.strip_web_str(default_filename)
|
277
|
-
file = Util.strip_web_str(@cmd_opts[opt_key].to_s
|
278
|
-
|
279
|
-
if file.empty?
|
264
|
+
file = Util.strip_web_str(@cmd_opts[opt_key].to_s)
|
265
|
+
|
266
|
+
if file.empty?
|
280
267
|
# Do not check default_dir.empty?().
|
281
|
-
if default_filename.empty?
|
268
|
+
if default_filename.empty?
|
282
269
|
file = nil # nil is very important for BingScraper.init()!
|
283
270
|
else
|
284
271
|
file = File.join(default_dir,default_filename)
|
@@ -293,347 +280,355 @@ module NHKore
|
|
293
280
|
end
|
294
281
|
# Else, passed in both: 'directory/file'
|
295
282
|
end
|
296
|
-
|
283
|
+
|
297
284
|
# '~' will expand to home, etc.
|
298
|
-
file = File.expand_path(file) unless file.nil?
|
299
|
-
|
285
|
+
file = File.expand_path(file) unless file.nil?
|
286
|
+
|
300
287
|
return (@cmd_opts[opt_key] = file)
|
301
288
|
end
|
302
|
-
|
289
|
+
|
303
290
|
def build_in_dir(opt_key,**kargs)
|
304
291
|
return build_dir(opt_key,**kargs)
|
305
292
|
end
|
306
|
-
|
293
|
+
|
307
294
|
def build_in_file(opt_key,**kargs)
|
308
295
|
return build_file(opt_key,**kargs)
|
309
296
|
end
|
310
|
-
|
297
|
+
|
311
298
|
def build_out_dir(opt_key,**kargs)
|
312
299
|
return build_dir(opt_key,**kargs)
|
313
300
|
end
|
314
|
-
|
301
|
+
|
315
302
|
def build_out_file(opt_key,**kargs)
|
316
303
|
return build_file(opt_key,**kargs)
|
317
304
|
end
|
318
|
-
|
305
|
+
|
319
306
|
def build_progress_bar(title,download: false,total: 100,type: @progress_bar,width: 33,**kargs)
|
320
307
|
case type
|
321
308
|
when :default,:classic
|
322
309
|
require 'tty-progressbar'
|
323
|
-
|
324
|
-
msg = "#{title} [:bar] :percent :eta".dup
|
310
|
+
|
311
|
+
msg = "#{title} [:bar] :percent :eta".dup
|
325
312
|
msg << ' :byte_rate/s' if download
|
326
|
-
|
313
|
+
|
327
314
|
return TTY::ProgressBar.new(msg,total: total,width: width,**kargs) do |config|
|
328
315
|
if type == :default
|
329
316
|
config.incomplete = '.'
|
330
317
|
config.complete = '/'
|
331
318
|
config.head = 'o'
|
332
319
|
end
|
333
|
-
|
320
|
+
|
334
321
|
#config.frequency = 5 # For a big download, set this
|
335
322
|
config.interval = 1 if download
|
336
323
|
end
|
337
324
|
end
|
338
|
-
|
325
|
+
|
339
326
|
# :no
|
340
327
|
return NoProgressBar.new(title,total: total,**kargs)
|
341
328
|
end
|
342
|
-
|
343
|
-
def build_version_cmd
|
329
|
+
|
330
|
+
def build_version_cmd
|
344
331
|
app = self
|
345
|
-
|
346
|
-
@version_cmd = @app_cmd.define_command
|
332
|
+
|
333
|
+
@version_cmd = @app_cmd.define_command do
|
347
334
|
name 'version'
|
348
335
|
usage 'version [OPTIONS] [COMMAND]...'
|
349
336
|
aliases :v
|
350
337
|
summary "Show the version and exit (aliases: #{app.color_alias('v')})"
|
351
|
-
|
338
|
+
|
352
339
|
run do |opts,args,cmd|
|
353
|
-
app.show_version
|
340
|
+
app.show_version
|
354
341
|
end
|
355
342
|
end
|
356
343
|
end
|
357
|
-
|
344
|
+
|
358
345
|
def check_empty_opt(key,value)
|
359
|
-
value = Util.strip_web_str(value) unless value.nil?
|
360
|
-
|
361
|
-
if value.nil?
|
346
|
+
value = Util.strip_web_str(value) unless value.nil?
|
347
|
+
|
348
|
+
if value.nil? || value.empty?
|
362
349
|
raise CLIError,"option[#{key}] cannot be empty[#{value}]"
|
363
350
|
end
|
364
|
-
|
351
|
+
|
365
352
|
return value
|
366
353
|
end
|
367
|
-
|
354
|
+
|
368
355
|
def check_in_file(opt_key,empty_ok: false)
|
369
356
|
in_file = @cmd_opts[opt_key]
|
370
|
-
|
357
|
+
|
371
358
|
if Util.empty_web_str?(in_file)
|
372
359
|
if !empty_ok
|
373
360
|
raise CLIError,"empty input path name[#{in_file}] in option[#{opt_key}]"
|
374
361
|
end
|
375
|
-
|
362
|
+
|
376
363
|
@cmd_opts[opt_key] = nil # nil is very important for BingScraper.init()!
|
377
|
-
|
364
|
+
|
378
365
|
return true
|
379
366
|
end
|
380
|
-
|
367
|
+
|
381
368
|
in_file = Util.strip_web_str(in_file)
|
382
|
-
|
369
|
+
|
383
370
|
if !File.exist?(in_file)
|
384
371
|
raise CLIError,"input file[#{in_file}] does not exist for option[#{opt_key}]"
|
385
372
|
end
|
386
|
-
|
373
|
+
|
387
374
|
if File.directory?(in_file)
|
388
375
|
raise CLIError,"input file[#{in_file}] cannot be a directory for option[#{opt_key}]"
|
389
376
|
end
|
390
|
-
|
377
|
+
|
391
378
|
return true
|
392
379
|
end
|
393
|
-
|
380
|
+
|
394
381
|
def check_out_dir(opt_key)
|
395
382
|
out_dir = @cmd_opts[opt_key]
|
396
|
-
|
383
|
+
|
397
384
|
if Util.empty_web_str?(out_dir)
|
398
385
|
raise CLIError,"empty output directory[#{out_dir}] in option[#{opt_key}]"
|
399
386
|
end
|
400
|
-
|
387
|
+
|
401
388
|
out_dir = Util.strip_web_str(out_dir)
|
402
|
-
|
389
|
+
|
403
390
|
if File.file?(out_dir)
|
404
391
|
raise CLIError,"output directory[#{out_dir}] cannot be a file for option[#{opt_key}]"
|
405
392
|
end
|
406
|
-
|
393
|
+
|
407
394
|
if @cmd_opts[:dry_run]
|
408
395
|
puts 'No changes written (dry run).'
|
409
396
|
puts "> #{out_dir}"
|
410
397
|
puts
|
411
|
-
|
398
|
+
|
412
399
|
return true
|
413
400
|
end
|
414
|
-
|
401
|
+
|
415
402
|
force = @cmd_opts[:force]
|
416
|
-
|
403
|
+
|
417
404
|
if !force && Dir.exist?(out_dir) && !Dir.empty?(out_dir)
|
418
405
|
puts 'Warning: output directory already exists with files!'
|
419
406
|
puts ' : Files inside of this directory may be overwritten!'
|
420
407
|
puts "> '#{out_dir}'"
|
421
|
-
|
408
|
+
|
422
409
|
return false unless @high.agree('Is this okay (yes/no)? ')
|
423
410
|
puts
|
424
411
|
end
|
425
|
-
|
412
|
+
|
426
413
|
if !Dir.exist?(out_dir)
|
427
414
|
if !force
|
428
415
|
puts 'Output directory does not exist.'
|
429
416
|
puts "> '#{out_dir}'"
|
430
|
-
|
417
|
+
|
431
418
|
return false unless @high.agree('Create this directory (yes/no)? ')
|
432
419
|
end
|
433
|
-
|
420
|
+
|
434
421
|
FileUtils.mkdir_p(out_dir,verbose: true)
|
435
422
|
puts
|
436
423
|
end
|
437
|
-
|
424
|
+
|
438
425
|
return true
|
439
426
|
end
|
440
|
-
|
427
|
+
|
441
428
|
def check_out_file(opt_key)
|
442
429
|
out_file = @cmd_opts[opt_key]
|
443
|
-
|
430
|
+
|
444
431
|
if Util.empty_web_str?(out_file)
|
445
432
|
raise CLIError,"empty output path name[#{out_file}] in option[#{opt_key}]"
|
446
433
|
end
|
447
|
-
|
434
|
+
|
448
435
|
out_file = Util.strip_web_str(out_file)
|
449
|
-
|
436
|
+
|
450
437
|
if File.directory?(out_file)
|
451
438
|
raise CLIError,"output file[#{out_file}] cannot be a directory for option[#{opt_key}]"
|
452
439
|
end
|
453
|
-
|
440
|
+
|
454
441
|
if @cmd_opts[:dry_run]
|
455
442
|
puts 'No changes written (dry run).'
|
456
443
|
puts "> #{out_file}"
|
457
444
|
puts
|
458
|
-
|
445
|
+
|
459
446
|
return true
|
460
447
|
end
|
461
|
-
|
448
|
+
|
462
449
|
force = @cmd_opts[:force]
|
463
450
|
out_dir = File.dirname(out_file)
|
464
|
-
|
451
|
+
|
465
452
|
if !force && File.exist?(out_file)
|
466
453
|
puts 'Warning: output file already exists!'
|
467
454
|
puts "> '#{out_file}'"
|
468
|
-
|
455
|
+
|
469
456
|
return false unless @high.agree('Overwrite this file (yes/no)? ')
|
470
457
|
puts
|
471
458
|
end
|
472
|
-
|
459
|
+
|
473
460
|
if !Dir.exist?(out_dir)
|
474
461
|
if !force
|
475
462
|
puts 'Output directory does not exist.'
|
476
463
|
puts "> '#{out_dir}'"
|
477
|
-
|
464
|
+
|
478
465
|
return false unless @high.agree('Create this directory (yes/no)? ')
|
479
466
|
end
|
480
|
-
|
467
|
+
|
481
468
|
FileUtils.mkdir_p(out_dir,verbose: true)
|
482
469
|
puts
|
483
470
|
end
|
484
|
-
|
471
|
+
|
485
472
|
return true
|
486
473
|
end
|
487
|
-
|
474
|
+
|
488
475
|
def color(str)
|
489
476
|
return @rainbow.wrap(str)
|
490
477
|
end
|
491
|
-
|
478
|
+
|
492
479
|
def color_alias(str)
|
493
480
|
return color(str).green
|
494
481
|
end
|
495
|
-
|
482
|
+
|
496
483
|
def enable_color(enabled)
|
497
484
|
Cri::Platform.color = enabled
|
498
485
|
@rainbow.enabled = enabled
|
499
486
|
end
|
500
|
-
|
487
|
+
|
501
488
|
def opts_to_set(ary)
|
502
|
-
set = Set.new
|
503
|
-
|
504
|
-
set.add("-#{ary[0]
|
505
|
-
set.add("--#{ary[1]
|
506
|
-
|
489
|
+
set = Set.new
|
490
|
+
|
491
|
+
set.add("-#{ary[0]}") unless ary[0].nil?
|
492
|
+
set.add("--#{ary[1]}") unless ary[1].nil?
|
493
|
+
|
507
494
|
return set
|
508
495
|
end
|
509
|
-
|
496
|
+
|
510
497
|
def refresh_cmd(opts,args,cmd)
|
511
498
|
new_opts = {}
|
512
|
-
|
499
|
+
|
513
500
|
# Change symbols with dashes to underscores,
|
514
501
|
# so don't have to type @cmd_opts[:'dry-run'] all the time.
|
515
|
-
opts.each
|
516
|
-
|
517
|
-
key = key.gsub('-','_')
|
518
|
-
|
519
|
-
|
502
|
+
opts.each do |key,value|
|
503
|
+
# %s(max-retry) => :max_retry
|
504
|
+
key = key.to_s.gsub('-','_').to_sym
|
505
|
+
|
520
506
|
new_opts[key] = value
|
521
507
|
end
|
522
|
-
|
508
|
+
|
509
|
+
# Cri has a default proc for default values
|
510
|
+
# that doesn't store the keys.
|
511
|
+
new_opts.default_proc = proc do |hash,key|
|
512
|
+
# :max_retry => %s(max-retry)
|
513
|
+
key = key.to_s.gsub('_','-').to_sym
|
514
|
+
|
515
|
+
opts.default_proc.call(hash,key)
|
516
|
+
end
|
517
|
+
|
523
518
|
@cmd = cmd
|
524
519
|
@cmd_args = args
|
525
520
|
@cmd_opts = new_opts
|
526
|
-
|
521
|
+
|
527
522
|
return self
|
528
523
|
end
|
529
|
-
|
530
|
-
def run
|
524
|
+
|
525
|
+
def run
|
531
526
|
@app_cmd.run(@args)
|
532
527
|
end
|
533
|
-
|
534
|
-
def show_version
|
528
|
+
|
529
|
+
def show_version
|
535
530
|
puts "#{NAME} v#{VERSION}"
|
536
531
|
end
|
537
|
-
|
538
|
-
def sleep_scraper
|
532
|
+
|
533
|
+
def sleep_scraper
|
539
534
|
sleep(@sleep_time)
|
540
535
|
end
|
541
|
-
|
536
|
+
|
542
537
|
def start_spin(title,detail: '')
|
543
538
|
if @spinner.is_a?(Hash)
|
544
539
|
@spinner[:detail] = detail
|
545
540
|
@spinner[:title] = title
|
546
|
-
|
547
|
-
puts
|
541
|
+
|
542
|
+
puts(NO_SPINNER_MSG % @spinner)
|
548
543
|
else
|
549
544
|
@spinner.update(title: title,detail: detail)
|
550
|
-
@spinner.auto_spin
|
545
|
+
@spinner.auto_spin
|
551
546
|
end
|
552
547
|
end
|
553
|
-
|
554
|
-
def stop_spin
|
548
|
+
|
549
|
+
def stop_spin
|
555
550
|
if @spinner.is_a?(Hash)
|
556
551
|
puts (NO_SPINNER_MSG % @spinner) + ' done!'
|
557
552
|
else
|
558
|
-
@spinner.reset
|
553
|
+
@spinner.reset
|
559
554
|
@spinner.stop('done!')
|
560
555
|
end
|
561
556
|
end
|
562
|
-
|
557
|
+
|
563
558
|
def update_spin_detail(detail)
|
564
559
|
if @spinner.is_a?(Hash)
|
565
560
|
@spinner[:detail] = detail
|
566
|
-
|
567
|
-
puts
|
561
|
+
|
562
|
+
puts(NO_SPINNER_MSG % @spinner)
|
568
563
|
else
|
569
564
|
@spinner.tokens[:detail] = detail
|
570
565
|
end
|
571
566
|
end
|
572
567
|
end
|
573
|
-
|
568
|
+
|
574
569
|
###
|
575
|
-
# @author Jonathan Bradley Whited
|
570
|
+
# @author Jonathan Bradley Whited
|
576
571
|
# @since 0.2.0
|
577
572
|
###
|
578
573
|
class NoProgressBar
|
579
574
|
MSG = '%{title}... %{percent}%%'
|
580
575
|
PUT_INTERVAL = 100.0 / 6.25
|
581
576
|
MAX_PUT_INTERVAL = 100.0 + PUT_INTERVAL + 1.0
|
582
|
-
|
577
|
+
|
583
578
|
def initialize(title,total:,**tokens)
|
584
579
|
super()
|
585
|
-
|
580
|
+
|
586
581
|
@tokens = {title: title,total: total}
|
587
|
-
|
588
|
-
reset
|
589
|
-
|
582
|
+
|
583
|
+
reset
|
584
|
+
|
590
585
|
@tokens.merge!(tokens)
|
591
586
|
end
|
592
|
-
|
593
|
-
def reset
|
587
|
+
|
588
|
+
def reset
|
594
589
|
@tokens[:advance] = 0
|
595
590
|
@tokens[:percent] = 0
|
596
591
|
@tokens[:progress] = 0
|
597
592
|
end
|
598
|
-
|
593
|
+
|
599
594
|
def advance(progress=1)
|
600
595
|
total = @tokens[:total]
|
601
596
|
progress = @tokens[:progress] + progress
|
602
597
|
progress = total if progress > total
|
603
|
-
percent = (progress.to_f
|
604
|
-
|
598
|
+
percent = (progress.to_f / total.to_f * 100.0).round
|
599
|
+
|
605
600
|
@tokens[:percent] = percent
|
606
601
|
@tokens[:progress] = progress
|
607
|
-
|
602
|
+
|
608
603
|
if percent < 99.0
|
609
604
|
# Only output at certain intervals.
|
610
605
|
advance = @tokens[:advance]
|
611
606
|
i = 0.0
|
612
|
-
|
607
|
+
|
613
608
|
while i <= MAX_PUT_INTERVAL
|
614
609
|
if advance < i
|
615
610
|
break if percent >= i # Output
|
616
611
|
return # Don't output
|
617
612
|
end
|
618
|
-
|
613
|
+
|
619
614
|
i += PUT_INTERVAL
|
620
615
|
end
|
621
616
|
end
|
622
|
-
|
617
|
+
|
623
618
|
@tokens[:advance] = percent
|
624
|
-
|
625
|
-
puts to_s
|
619
|
+
|
620
|
+
puts to_s
|
626
621
|
end
|
627
|
-
|
628
|
-
def finish
|
622
|
+
|
623
|
+
def finish
|
629
624
|
advance(@tokens[:total])
|
630
625
|
end
|
631
|
-
|
632
|
-
def start
|
633
|
-
puts to_s
|
626
|
+
|
627
|
+
def start
|
628
|
+
puts to_s
|
634
629
|
end
|
635
|
-
|
636
|
-
def to_s
|
630
|
+
|
631
|
+
def to_s
|
637
632
|
return MSG % @tokens
|
638
633
|
end
|
639
634
|
end
|