nhkore 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +18 -6
- data/Rakefile +11 -16
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +616 -0
- data/lib/nhkore/article.rb +130 -0
- data/lib/nhkore/article_scraper.rb +653 -0
- data/lib/nhkore/cleaner.rb +91 -0
- data/lib/nhkore/cli/bing_cmd.rb +220 -0
- data/lib/nhkore/cli/fx_cmd.rb +116 -0
- data/lib/nhkore/cli/get_cmd.rb +153 -0
- data/lib/nhkore/cli/news_cmd.rb +375 -0
- data/lib/nhkore/cli/sift_cmd.rb +382 -0
- data/lib/nhkore/defn.rb +104 -0
- data/lib/nhkore/dict.rb +80 -0
- data/lib/nhkore/dict_scraper.rb +76 -0
- data/lib/nhkore/entry.rb +104 -0
- data/lib/nhkore/error.rb +35 -0
- data/lib/nhkore/fileable.rb +48 -0
- data/lib/nhkore/missingno.rb +92 -0
- data/lib/nhkore/news.rb +176 -0
- data/lib/nhkore/polisher.rb +93 -0
- data/lib/nhkore/scraper.rb +137 -0
- data/lib/nhkore/search_link.rb +188 -0
- data/lib/nhkore/search_scraper.rb +152 -0
- data/lib/nhkore/sifter.rb +339 -0
- data/lib/nhkore/splitter.rb +90 -0
- data/lib/nhkore/util.rb +190 -0
- data/lib/nhkore/variator.rb +87 -0
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +134 -17
- data/lib/nhkore.rb +39 -40
- data/nhkore.gemspec +23 -8
- data/test/{nhkore_tester.rb → nhkore/test_helper.rb} +3 -1
- data/test/nhkore_test.rb +8 -6
- metadata +204 -11
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'nhkore/util'
|
25
|
+
require 'nhkore/word'
|
26
|
+
|
27
|
+
|
28
|
+
module NHKore
|
29
|
+
###
|
30
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
31
|
+
# @since 0.2.0
|
32
|
+
###
|
33
|
+
class Cleaner
|
34
|
+
def begin_clean(str)
|
35
|
+
return str
|
36
|
+
end
|
37
|
+
|
38
|
+
def clean(str)
|
39
|
+
str = begin_clean(str)
|
40
|
+
str = end_clean(str)
|
41
|
+
|
42
|
+
return str
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.clean_any(obj,cleaners)
|
46
|
+
return nil if obj.nil?()
|
47
|
+
|
48
|
+
cleaners = Array(cleaners)
|
49
|
+
|
50
|
+
return obj if cleaners.empty?()
|
51
|
+
|
52
|
+
if obj.is_a?(Word)
|
53
|
+
obj = Word.new(
|
54
|
+
kana: clean_any(obj.kana,cleaners),
|
55
|
+
kanji: clean_any(obj.kanji,cleaners),
|
56
|
+
word: obj
|
57
|
+
)
|
58
|
+
else # String
|
59
|
+
cleaners.each() do |cleaner|
|
60
|
+
obj = cleaner.clean(obj)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
return obj
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
###
|
69
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
70
|
+
# @since 0.2.0
|
71
|
+
###
|
72
|
+
class BasicCleaner < Cleaner
|
73
|
+
def end_clean(str)
|
74
|
+
# This is very simple, as Splitter will split on punctuation,
|
75
|
+
# and Polisher will remove the leftover punctuation, digits, etc.
|
76
|
+
# If this is stricter, then errors will be raised in ArticleScraper's
|
77
|
+
# scrape_dicwin_word() & scrape_ruby_word().
|
78
|
+
|
79
|
+
str = Util.unspace_web_str(str) # Who needs space in Japanese?
|
80
|
+
|
81
|
+
return str
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
###
|
86
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
87
|
+
# @since 0.2.0
|
88
|
+
###
|
89
|
+
class BestCleaner < BasicCleaner
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,220 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'nhkore/error'
|
25
|
+
require 'nhkore/search_link'
|
26
|
+
require 'nhkore/search_scraper'
|
27
|
+
require 'nhkore/util'
|
28
|
+
|
29
|
+
|
30
|
+
module NHKore
|
31
|
+
module CLI
|
32
|
+
###
|
33
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
34
|
+
# @since 0.2.0
|
35
|
+
###
|
36
|
+
module BingCmd
|
37
|
+
def build_bing_cmd()
|
38
|
+
app = self
|
39
|
+
|
40
|
+
@bing_cmd = @app_cmd.define_command() do
|
41
|
+
name 'bing'
|
42
|
+
usage 'bing [OPTIONS] [COMMAND]...'
|
43
|
+
aliases :b
|
44
|
+
summary "Search bing.com for links to NHK News Web (Easy) (aliases: #{app.color_alias('b')})"
|
45
|
+
|
46
|
+
description <<-EOD
|
47
|
+
Search bing.com for links to NHK News Web (Easy) &
|
48
|
+
save to folder: #{SearchLinks::DEFAULT_DIR}
|
49
|
+
EOD
|
50
|
+
|
51
|
+
option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
|
52
|
+
HTML file to read instead of URL (for offline testing and/or slow internet;
|
53
|
+
see '--show-urls' option)
|
54
|
+
EOD
|
55
|
+
app.check_empty_opt(:in,value)
|
56
|
+
end
|
57
|
+
option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
|
58
|
+
'directory/file' to save links to; if you only specify a directory or a file, it will attach the
|
59
|
+
appropriate default directory/file name
|
60
|
+
(defaults: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}, #{SearchLinks::DEFAULT_BING_FUTSUU_FILE})
|
61
|
+
EOD
|
62
|
+
app.check_empty_opt(:out,value)
|
63
|
+
end
|
64
|
+
option :r,:results,'number of results per page to request from Bing',argument: :required,
|
65
|
+
default: SearchScraper::DEFAULT_RESULT_COUNT,transform: -> (value) do
|
66
|
+
value = value.to_i()
|
67
|
+
value = 1 if value < 1
|
68
|
+
value
|
69
|
+
end
|
70
|
+
option nil,:'show-count',<<-EOD
|
71
|
+
show the number of links scraped and exit;
|
72
|
+
useful for manually writing/updating scripts (but not for use in a variable);
|
73
|
+
implies '--dry-run' option
|
74
|
+
EOD
|
75
|
+
option nil,:'show-urls',<<-EOD do |value,cmd|
|
76
|
+
show the URLs used when scraping and exit; you can download these for offline testing and/or
|
77
|
+
slow internet (see '--in' option)
|
78
|
+
EOD
|
79
|
+
puts "Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE)}"
|
80
|
+
puts "Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE)}"
|
81
|
+
exit
|
82
|
+
end
|
83
|
+
|
84
|
+
run do |opts,args,cmd|
|
85
|
+
puts cmd.help
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
@bing_easy_cmd = @bing_cmd.define_command() do
|
90
|
+
name 'easy'
|
91
|
+
usage 'easy [OPTIONS] [COMMAND]...'
|
92
|
+
aliases :e,:ez
|
93
|
+
summary "Search for NHK News Web Easy (Yasashii) links (aliases: #{app.color_alias('e ez')})"
|
94
|
+
|
95
|
+
description <<-EOD
|
96
|
+
Search for NHK News Web Easy (Yasashii) links &
|
97
|
+
save to file: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}
|
98
|
+
EOD
|
99
|
+
|
100
|
+
run do |opts,args,cmd|
|
101
|
+
app.refresh_cmd(opts,args,cmd)
|
102
|
+
app.run_bing_cmd(:yasashii)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
@bing_regular_cmd = @bing_cmd.define_command() do
|
107
|
+
name 'regular'
|
108
|
+
usage 'regular [OPTIONS] [COMMAND]...'
|
109
|
+
aliases :r,:reg
|
110
|
+
summary "Search for NHK News Web Regular (Futsuu) links (aliases: #{app.color_alias('r reg')})"
|
111
|
+
|
112
|
+
description <<-EOD
|
113
|
+
Search for NHK News Web Regular (Futsuu) links &
|
114
|
+
save to file: #{SearchLinks::DEFAULT_BING_FUTSUU_FILE}
|
115
|
+
EOD
|
116
|
+
|
117
|
+
run do |opts,args,cmd|
|
118
|
+
app.refresh_cmd(opts,args,cmd)
|
119
|
+
app.run_bing_cmd(:futsuu)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def run_bing_cmd(type)
|
125
|
+
@cmd_opts[:dry_run] = true if @cmd_opts[:show_count]
|
126
|
+
|
127
|
+
build_in_file(:in)
|
128
|
+
|
129
|
+
case type
|
130
|
+
when :futsuu
|
131
|
+
build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_FUTSUU_FILENAME)
|
132
|
+
when :yasashii
|
133
|
+
build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_YASASHII_FILENAME)
|
134
|
+
else
|
135
|
+
raise ArgumentError,"invalid type[#{type}]"
|
136
|
+
end
|
137
|
+
|
138
|
+
return unless check_in_file(:in,empty_ok: true)
|
139
|
+
return unless check_out_file(:out)
|
140
|
+
|
141
|
+
dry_run = @cmd_opts[:dry_run]
|
142
|
+
in_file = @cmd_opts[:in]
|
143
|
+
out_file = @cmd_opts[:out]
|
144
|
+
result_count = @cmd_opts[:results]
|
145
|
+
result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?()
|
146
|
+
show_count = @cmd_opts[:show_count]
|
147
|
+
|
148
|
+
start_spin('Scraping bing.com') unless show_count
|
149
|
+
|
150
|
+
is_file = !in_file.nil?()
|
151
|
+
links = nil
|
152
|
+
new_links = [] # For --dry-run
|
153
|
+
next_page = NextPage.new()
|
154
|
+
page_count = 0
|
155
|
+
page_num = 1
|
156
|
+
url = in_file # nil will use default URL, else a file
|
157
|
+
|
158
|
+
# Load previous links for 'scraped?' vars.
|
159
|
+
if File.exist?(out_file)
|
160
|
+
links = SearchLinks.load_file(out_file)
|
161
|
+
else
|
162
|
+
links = SearchLinks.new()
|
163
|
+
end
|
164
|
+
|
165
|
+
links_count = links.length
|
166
|
+
|
167
|
+
if show_count
|
168
|
+
scraped_count = 0
|
169
|
+
|
170
|
+
links.links.values.each() do |link|
|
171
|
+
scraped_count += 1 if link.scraped?()
|
172
|
+
end
|
173
|
+
|
174
|
+
puts "#{scraped_count} of #{links_count} links scraped."
|
175
|
+
|
176
|
+
return
|
177
|
+
end
|
178
|
+
|
179
|
+
# Do a range to prevent an infinite loop. Ichiman!
|
180
|
+
(0..10000).each() do
|
181
|
+
scraper = BingScraper.new(type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
|
182
|
+
|
183
|
+
next_page = scraper.scrape(links,next_page)
|
184
|
+
|
185
|
+
new_links.concat(links.links.values[links_count..-1])
|
186
|
+
links_count = links.length
|
187
|
+
page_count = next_page.count if next_page.count > 0
|
188
|
+
|
189
|
+
update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
|
190
|
+
"new_links=#{new_links.length})")
|
191
|
+
|
192
|
+
break if next_page.empty?()
|
193
|
+
|
194
|
+
page_num += 1
|
195
|
+
url = next_page.url
|
196
|
+
|
197
|
+
sleep_scraper()
|
198
|
+
end
|
199
|
+
|
200
|
+
stop_spin()
|
201
|
+
puts
|
202
|
+
|
203
|
+
puts 'Last URL scraped:'
|
204
|
+
puts "> #{url}"
|
205
|
+
puts
|
206
|
+
|
207
|
+
if dry_run
|
208
|
+
new_links.each() do |link|
|
209
|
+
puts link.to_s(mini: true)
|
210
|
+
end
|
211
|
+
else
|
212
|
+
links.save_file(out_file)
|
213
|
+
|
214
|
+
puts 'Saved scraped links to file:'
|
215
|
+
puts "> #{out_file}"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
module NHKore
|
25
|
+
module CLI
|
26
|
+
###
|
27
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
28
|
+
# @since 0.2.0
|
29
|
+
###
|
30
|
+
module FXCmd
|
31
|
+
def build_fx_cmd()
|
32
|
+
app = self
|
33
|
+
|
34
|
+
@fx_cmd = @app_cmd.define_command() do
|
35
|
+
name 'fx'
|
36
|
+
usage 'fx [OPTIONS] [COMMAND]...'
|
37
|
+
summary 'Test spinner/progress special effects (for running long tasks)'
|
38
|
+
|
39
|
+
description <<-EOD
|
40
|
+
Test if the special effects work on your command line:\n
|
41
|
+
- #{App::NAME} [-c/-X] fx
|
42
|
+
EOD
|
43
|
+
|
44
|
+
flag :a,:all,'test all special effects regardless of global options'
|
45
|
+
|
46
|
+
run do |opts,args,cmd|
|
47
|
+
app.refresh_cmd(opts,args,cmd)
|
48
|
+
app.run_fx_cmd()
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def run_fx_cmd()
|
54
|
+
test_fx_progress_bar()
|
55
|
+
test_fx_spinner()
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_fx_progress_bar()
|
59
|
+
bars = nil
|
60
|
+
|
61
|
+
if @cmd_opts[:all]
|
62
|
+
bars = [:default,:classic,:no]
|
63
|
+
else
|
64
|
+
bars = [@progress_bar]
|
65
|
+
end
|
66
|
+
|
67
|
+
bars.each() do |bar|
|
68
|
+
name = (bars.length == 1) ? 'User' : bar.to_s().capitalize()
|
69
|
+
bar = build_progress_bar("Testing #{name} progress",download: false,type: bar)
|
70
|
+
|
71
|
+
bar.start()
|
72
|
+
|
73
|
+
0.upto(99) do
|
74
|
+
sleep(0.05)
|
75
|
+
bar.advance()
|
76
|
+
end
|
77
|
+
|
78
|
+
bar.finish()
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_fx_spinner()
|
83
|
+
app_spinner = @spinner
|
84
|
+
spinners = nil
|
85
|
+
|
86
|
+
if @cmd_opts[:all]
|
87
|
+
spinners = {
|
88
|
+
default: App::DEFAULT_SPINNER,
|
89
|
+
classic: App::CLASSIC_SPINNER,
|
90
|
+
no: App::NO_SPINNER
|
91
|
+
}
|
92
|
+
else
|
93
|
+
spinners = {
|
94
|
+
user: app_spinner
|
95
|
+
}
|
96
|
+
end
|
97
|
+
|
98
|
+
spinners.each() do |name,spinner|
|
99
|
+
@spinner = spinner
|
100
|
+
|
101
|
+
start_spin("Testing #{name.to_s().capitalize()} spinner")
|
102
|
+
|
103
|
+
1.upto(3) do |i|
|
104
|
+
sleep(1.1)
|
105
|
+
update_spin_detail(" (#{i}/3)")
|
106
|
+
end
|
107
|
+
|
108
|
+
stop_spin()
|
109
|
+
end
|
110
|
+
|
111
|
+
# Reset back to users'.
|
112
|
+
@spinner = app_spinner
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'down/net_http'
|
25
|
+
require 'tempfile'
|
26
|
+
require 'zip'
|
27
|
+
|
28
|
+
require 'nhkore/util'
|
29
|
+
|
30
|
+
|
31
|
+
module NHKore
|
32
|
+
module CLI
|
33
|
+
###
|
34
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
35
|
+
# @since 0.2.0
|
36
|
+
###
|
37
|
+
module GetCmd
|
38
|
+
DEFAULT_GET_CHUNK_SIZE = 4 * 1024
|
39
|
+
DEFAULT_GET_URL_LENGTH = 5_000_000 # Just a generous estimation used as a fallback; may be outdated
|
40
|
+
GET_URL_FILENAME = 'nhkore-core.zip'
|
41
|
+
GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}"
|
42
|
+
|
43
|
+
def build_get_cmd()
|
44
|
+
app = self
|
45
|
+
|
46
|
+
@get_cmd = @app_cmd.define_command() do
|
47
|
+
name 'get'
|
48
|
+
usage 'get [OPTIONS] [COMMAND]...'
|
49
|
+
aliases :g
|
50
|
+
summary "Download NHKore's pre-scraped files from the latest release (aliases: #{app.color_alias('g')})"
|
51
|
+
|
52
|
+
description <<-EOD
|
53
|
+
Download NHKore's pre-scraped files from the latest release &
|
54
|
+
save to folder: #{Util::CORE_DIR}
|
55
|
+
|
56
|
+
Note: the latest NHK articles may not have been scraped yet.
|
57
|
+
EOD
|
58
|
+
|
59
|
+
option :o,:out,'directory to save downloaded files to',argument: :required,default: Util::CORE_DIR,
|
60
|
+
transform: -> (value) do
|
61
|
+
app.check_empty_opt(:out,value)
|
62
|
+
end
|
63
|
+
flag nil,:'show-url','show download URL and exit (for downloading manually)' do |value,cmd|
|
64
|
+
puts GET_URL
|
65
|
+
exit
|
66
|
+
end
|
67
|
+
|
68
|
+
run do |opts,args,cmd|
|
69
|
+
app.refresh_cmd(opts,args,cmd)
|
70
|
+
app.run_get_cmd()
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def run_get_cmd()
|
76
|
+
build_out_dir(:out,default_dir: Util::CORE_DIR)
|
77
|
+
|
78
|
+
return unless check_out_dir(:out)
|
79
|
+
|
80
|
+
chunk_size = DEFAULT_GET_CHUNK_SIZE
|
81
|
+
down = nil
|
82
|
+
dry_run = @cmd_opts[:dry_run]
|
83
|
+
force = @cmd_opts[:force]
|
84
|
+
max_retries = @scraper_kargs[:max_retries]
|
85
|
+
max_retries = 3 if max_retries.nil?()
|
86
|
+
out_dir = @cmd_opts[:out]
|
87
|
+
|
88
|
+
begin
|
89
|
+
start_spin('Opening URL')
|
90
|
+
|
91
|
+
begin
|
92
|
+
down = Down::NetHttp.open(GET_URL,rewindable: false,**@scraper_kargs)
|
93
|
+
rescue Down::ConnectionError
|
94
|
+
raise if (max_retries -= 1) < 0
|
95
|
+
retry
|
96
|
+
end
|
97
|
+
|
98
|
+
stop_spin()
|
99
|
+
|
100
|
+
return if dry_run
|
101
|
+
|
102
|
+
Tempfile.create([App::NAME,'.zip'],binmode: true) do |file|
|
103
|
+
puts
|
104
|
+
puts 'Downloading to temp file:'
|
105
|
+
puts "> #{file.path}"
|
106
|
+
puts
|
107
|
+
|
108
|
+
len = down.size
|
109
|
+
len = DEFAULT_GET_LENGTH if len.nil?()
|
110
|
+
bar = build_progress_bar("Downloading #{GET_URL_FILENAME}",download: true,total: len)
|
111
|
+
|
112
|
+
bar.start()
|
113
|
+
|
114
|
+
while !down.eof?()
|
115
|
+
file.write(down.read(chunk_size))
|
116
|
+
bar.advance(chunk_size)
|
117
|
+
end
|
118
|
+
|
119
|
+
down.close()
|
120
|
+
file.close()
|
121
|
+
bar.finish()
|
122
|
+
|
123
|
+
start_spin("Extracting #{GET_URL_FILENAME}")
|
124
|
+
|
125
|
+
Zip.on_exists_proc = force # true will force overwriting files on extract()
|
126
|
+
|
127
|
+
Zip::File.open(file) do |zip_file|
|
128
|
+
zip_file.each() do |entry|
|
129
|
+
if !entry.name_safe?()
|
130
|
+
raise ZipError,"unsafe entry name[#{entry.name}] in Zip file"
|
131
|
+
end
|
132
|
+
|
133
|
+
name = File.basename(entry.name)
|
134
|
+
|
135
|
+
update_spin_detail(" (file=#{name})")
|
136
|
+
|
137
|
+
entry.extract(File.join(out_dir,name))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
stop_spin()
|
142
|
+
puts
|
143
|
+
|
144
|
+
puts "Extracted #{GET_URL_FILENAME} to directory:"
|
145
|
+
puts "> #{out_dir}"
|
146
|
+
end
|
147
|
+
ensure
|
148
|
+
down.close() if !down.nil?() && !down.closed?()
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|