nhkore 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +18 -6
- data/Rakefile +11 -16
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +616 -0
- data/lib/nhkore/article.rb +130 -0
- data/lib/nhkore/article_scraper.rb +653 -0
- data/lib/nhkore/cleaner.rb +91 -0
- data/lib/nhkore/cli/bing_cmd.rb +220 -0
- data/lib/nhkore/cli/fx_cmd.rb +116 -0
- data/lib/nhkore/cli/get_cmd.rb +153 -0
- data/lib/nhkore/cli/news_cmd.rb +375 -0
- data/lib/nhkore/cli/sift_cmd.rb +382 -0
- data/lib/nhkore/defn.rb +104 -0
- data/lib/nhkore/dict.rb +80 -0
- data/lib/nhkore/dict_scraper.rb +76 -0
- data/lib/nhkore/entry.rb +104 -0
- data/lib/nhkore/error.rb +35 -0
- data/lib/nhkore/fileable.rb +48 -0
- data/lib/nhkore/missingno.rb +92 -0
- data/lib/nhkore/news.rb +176 -0
- data/lib/nhkore/polisher.rb +93 -0
- data/lib/nhkore/scraper.rb +137 -0
- data/lib/nhkore/search_link.rb +188 -0
- data/lib/nhkore/search_scraper.rb +152 -0
- data/lib/nhkore/sifter.rb +339 -0
- data/lib/nhkore/splitter.rb +90 -0
- data/lib/nhkore/util.rb +190 -0
- data/lib/nhkore/variator.rb +87 -0
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +134 -17
- data/lib/nhkore.rb +39 -40
- data/nhkore.gemspec +23 -8
- data/test/{nhkore_tester.rb → nhkore/test_helper.rb} +3 -1
- data/test/nhkore_test.rb +8 -6
- metadata +204 -11
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'nhkore/util'
|
25
|
+
require 'nhkore/word'
|
26
|
+
|
27
|
+
|
28
|
+
module NHKore
|
29
|
+
###
|
30
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
31
|
+
# @since 0.2.0
|
32
|
+
###
|
33
|
+
class Cleaner
|
34
|
+
def begin_clean(str)
|
35
|
+
return str
|
36
|
+
end
|
37
|
+
|
38
|
+
def clean(str)
|
39
|
+
str = begin_clean(str)
|
40
|
+
str = end_clean(str)
|
41
|
+
|
42
|
+
return str
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.clean_any(obj,cleaners)
|
46
|
+
return nil if obj.nil?()
|
47
|
+
|
48
|
+
cleaners = Array(cleaners)
|
49
|
+
|
50
|
+
return obj if cleaners.empty?()
|
51
|
+
|
52
|
+
if obj.is_a?(Word)
|
53
|
+
obj = Word.new(
|
54
|
+
kana: clean_any(obj.kana,cleaners),
|
55
|
+
kanji: clean_any(obj.kanji,cleaners),
|
56
|
+
word: obj
|
57
|
+
)
|
58
|
+
else # String
|
59
|
+
cleaners.each() do |cleaner|
|
60
|
+
obj = cleaner.clean(obj)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
return obj
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
###
|
69
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
70
|
+
# @since 0.2.0
|
71
|
+
###
|
72
|
+
class BasicCleaner < Cleaner
|
73
|
+
def end_clean(str)
|
74
|
+
# This is very simple, as Splitter will split on punctuation,
|
75
|
+
# and Polisher will remove the leftover punctuation, digits, etc.
|
76
|
+
# If this is stricter, then errors will be raised in ArticleScraper's
|
77
|
+
# scrape_dicwin_word() & scrape_ruby_word().
|
78
|
+
|
79
|
+
str = Util.unspace_web_str(str) # Who needs space in Japanese?
|
80
|
+
|
81
|
+
return str
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
###
|
86
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
87
|
+
# @since 0.2.0
|
88
|
+
###
|
89
|
+
class BestCleaner < BasicCleaner
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,220 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'nhkore/error'
|
25
|
+
require 'nhkore/search_link'
|
26
|
+
require 'nhkore/search_scraper'
|
27
|
+
require 'nhkore/util'
|
28
|
+
|
29
|
+
|
30
|
+
module NHKore
|
31
|
+
module CLI
|
32
|
+
###
|
33
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
34
|
+
# @since 0.2.0
|
35
|
+
###
|
36
|
+
module BingCmd
|
37
|
+
def build_bing_cmd()
|
38
|
+
app = self
|
39
|
+
|
40
|
+
@bing_cmd = @app_cmd.define_command() do
|
41
|
+
name 'bing'
|
42
|
+
usage 'bing [OPTIONS] [COMMAND]...'
|
43
|
+
aliases :b
|
44
|
+
summary "Search bing.com for links to NHK News Web (Easy) (aliases: #{app.color_alias('b')})"
|
45
|
+
|
46
|
+
description <<-EOD
|
47
|
+
Search bing.com for links to NHK News Web (Easy) &
|
48
|
+
save to folder: #{SearchLinks::DEFAULT_DIR}
|
49
|
+
EOD
|
50
|
+
|
51
|
+
option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
|
52
|
+
HTML file to read instead of URL (for offline testing and/or slow internet;
|
53
|
+
see '--show-urls' option)
|
54
|
+
EOD
|
55
|
+
app.check_empty_opt(:in,value)
|
56
|
+
end
|
57
|
+
option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
|
58
|
+
'directory/file' to save links to; if you only specify a directory or a file, it will attach the
|
59
|
+
appropriate default directory/file name
|
60
|
+
(defaults: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}, #{SearchLinks::DEFAULT_BING_FUTSUU_FILE})
|
61
|
+
EOD
|
62
|
+
app.check_empty_opt(:out,value)
|
63
|
+
end
|
64
|
+
option :r,:results,'number of results per page to request from Bing',argument: :required,
|
65
|
+
default: SearchScraper::DEFAULT_RESULT_COUNT,transform: -> (value) do
|
66
|
+
value = value.to_i()
|
67
|
+
value = 1 if value < 1
|
68
|
+
value
|
69
|
+
end
|
70
|
+
option nil,:'show-count',<<-EOD
|
71
|
+
show the number of links scraped and exit;
|
72
|
+
useful for manually writing/updating scripts (but not for use in a variable);
|
73
|
+
implies '--dry-run' option
|
74
|
+
EOD
|
75
|
+
option nil,:'show-urls',<<-EOD do |value,cmd|
|
76
|
+
show the URLs used when scraping and exit; you can download these for offline testing and/or
|
77
|
+
slow internet (see '--in' option)
|
78
|
+
EOD
|
79
|
+
puts "Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE)}"
|
80
|
+
puts "Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE)}"
|
81
|
+
exit
|
82
|
+
end
|
83
|
+
|
84
|
+
run do |opts,args,cmd|
|
85
|
+
puts cmd.help
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
@bing_easy_cmd = @bing_cmd.define_command() do
|
90
|
+
name 'easy'
|
91
|
+
usage 'easy [OPTIONS] [COMMAND]...'
|
92
|
+
aliases :e,:ez
|
93
|
+
summary "Search for NHK News Web Easy (Yasashii) links (aliases: #{app.color_alias('e ez')})"
|
94
|
+
|
95
|
+
description <<-EOD
|
96
|
+
Search for NHK News Web Easy (Yasashii) links &
|
97
|
+
save to file: #{SearchLinks::DEFAULT_BING_YASASHII_FILE}
|
98
|
+
EOD
|
99
|
+
|
100
|
+
run do |opts,args,cmd|
|
101
|
+
app.refresh_cmd(opts,args,cmd)
|
102
|
+
app.run_bing_cmd(:yasashii)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
@bing_regular_cmd = @bing_cmd.define_command() do
|
107
|
+
name 'regular'
|
108
|
+
usage 'regular [OPTIONS] [COMMAND]...'
|
109
|
+
aliases :r,:reg
|
110
|
+
summary "Search for NHK News Web Regular (Futsuu) links (aliases: #{app.color_alias('r reg')})"
|
111
|
+
|
112
|
+
description <<-EOD
|
113
|
+
Search for NHK News Web Regular (Futsuu) links &
|
114
|
+
save to file: #{SearchLinks::DEFAULT_BING_FUTSUU_FILE}
|
115
|
+
EOD
|
116
|
+
|
117
|
+
run do |opts,args,cmd|
|
118
|
+
app.refresh_cmd(opts,args,cmd)
|
119
|
+
app.run_bing_cmd(:futsuu)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def run_bing_cmd(type)
|
125
|
+
@cmd_opts[:dry_run] = true if @cmd_opts[:show_count]
|
126
|
+
|
127
|
+
build_in_file(:in)
|
128
|
+
|
129
|
+
case type
|
130
|
+
when :futsuu
|
131
|
+
build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_FUTSUU_FILENAME)
|
132
|
+
when :yasashii
|
133
|
+
build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_BING_YASASHII_FILENAME)
|
134
|
+
else
|
135
|
+
raise ArgumentError,"invalid type[#{type}]"
|
136
|
+
end
|
137
|
+
|
138
|
+
return unless check_in_file(:in,empty_ok: true)
|
139
|
+
return unless check_out_file(:out)
|
140
|
+
|
141
|
+
dry_run = @cmd_opts[:dry_run]
|
142
|
+
in_file = @cmd_opts[:in]
|
143
|
+
out_file = @cmd_opts[:out]
|
144
|
+
result_count = @cmd_opts[:results]
|
145
|
+
result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?()
|
146
|
+
show_count = @cmd_opts[:show_count]
|
147
|
+
|
148
|
+
start_spin('Scraping bing.com') unless show_count
|
149
|
+
|
150
|
+
is_file = !in_file.nil?()
|
151
|
+
links = nil
|
152
|
+
new_links = [] # For --dry-run
|
153
|
+
next_page = NextPage.new()
|
154
|
+
page_count = 0
|
155
|
+
page_num = 1
|
156
|
+
url = in_file # nil will use default URL, else a file
|
157
|
+
|
158
|
+
# Load previous links for 'scraped?' vars.
|
159
|
+
if File.exist?(out_file)
|
160
|
+
links = SearchLinks.load_file(out_file)
|
161
|
+
else
|
162
|
+
links = SearchLinks.new()
|
163
|
+
end
|
164
|
+
|
165
|
+
links_count = links.length
|
166
|
+
|
167
|
+
if show_count
|
168
|
+
scraped_count = 0
|
169
|
+
|
170
|
+
links.links.values.each() do |link|
|
171
|
+
scraped_count += 1 if link.scraped?()
|
172
|
+
end
|
173
|
+
|
174
|
+
puts "#{scraped_count} of #{links_count} links scraped."
|
175
|
+
|
176
|
+
return
|
177
|
+
end
|
178
|
+
|
179
|
+
# Do a range to prevent an infinite loop. Ichiman!
|
180
|
+
(0..10000).each() do
|
181
|
+
scraper = BingScraper.new(type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
|
182
|
+
|
183
|
+
next_page = scraper.scrape(links,next_page)
|
184
|
+
|
185
|
+
new_links.concat(links.links.values[links_count..-1])
|
186
|
+
links_count = links.length
|
187
|
+
page_count = next_page.count if next_page.count > 0
|
188
|
+
|
189
|
+
update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
|
190
|
+
"new_links=#{new_links.length})")
|
191
|
+
|
192
|
+
break if next_page.empty?()
|
193
|
+
|
194
|
+
page_num += 1
|
195
|
+
url = next_page.url
|
196
|
+
|
197
|
+
sleep_scraper()
|
198
|
+
end
|
199
|
+
|
200
|
+
stop_spin()
|
201
|
+
puts
|
202
|
+
|
203
|
+
puts 'Last URL scraped:'
|
204
|
+
puts "> #{url}"
|
205
|
+
puts
|
206
|
+
|
207
|
+
if dry_run
|
208
|
+
new_links.each() do |link|
|
209
|
+
puts link.to_s(mini: true)
|
210
|
+
end
|
211
|
+
else
|
212
|
+
links.save_file(out_file)
|
213
|
+
|
214
|
+
puts 'Saved scraped links to file:'
|
215
|
+
puts "> #{out_file}"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
module NHKore
|
25
|
+
module CLI
|
26
|
+
###
|
27
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
28
|
+
# @since 0.2.0
|
29
|
+
###
|
30
|
+
module FXCmd
|
31
|
+
def build_fx_cmd()
|
32
|
+
app = self
|
33
|
+
|
34
|
+
@fx_cmd = @app_cmd.define_command() do
|
35
|
+
name 'fx'
|
36
|
+
usage 'fx [OPTIONS] [COMMAND]...'
|
37
|
+
summary 'Test spinner/progress special effects (for running long tasks)'
|
38
|
+
|
39
|
+
description <<-EOD
|
40
|
+
Test if the special effects work on your command line:\n
|
41
|
+
- #{App::NAME} [-c/-X] fx
|
42
|
+
EOD
|
43
|
+
|
44
|
+
flag :a,:all,'test all special effects regardless of global options'
|
45
|
+
|
46
|
+
run do |opts,args,cmd|
|
47
|
+
app.refresh_cmd(opts,args,cmd)
|
48
|
+
app.run_fx_cmd()
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def run_fx_cmd()
|
54
|
+
test_fx_progress_bar()
|
55
|
+
test_fx_spinner()
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_fx_progress_bar()
|
59
|
+
bars = nil
|
60
|
+
|
61
|
+
if @cmd_opts[:all]
|
62
|
+
bars = [:default,:classic,:no]
|
63
|
+
else
|
64
|
+
bars = [@progress_bar]
|
65
|
+
end
|
66
|
+
|
67
|
+
bars.each() do |bar|
|
68
|
+
name = (bars.length == 1) ? 'User' : bar.to_s().capitalize()
|
69
|
+
bar = build_progress_bar("Testing #{name} progress",download: false,type: bar)
|
70
|
+
|
71
|
+
bar.start()
|
72
|
+
|
73
|
+
0.upto(99) do
|
74
|
+
sleep(0.05)
|
75
|
+
bar.advance()
|
76
|
+
end
|
77
|
+
|
78
|
+
bar.finish()
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_fx_spinner()
|
83
|
+
app_spinner = @spinner
|
84
|
+
spinners = nil
|
85
|
+
|
86
|
+
if @cmd_opts[:all]
|
87
|
+
spinners = {
|
88
|
+
default: App::DEFAULT_SPINNER,
|
89
|
+
classic: App::CLASSIC_SPINNER,
|
90
|
+
no: App::NO_SPINNER
|
91
|
+
}
|
92
|
+
else
|
93
|
+
spinners = {
|
94
|
+
user: app_spinner
|
95
|
+
}
|
96
|
+
end
|
97
|
+
|
98
|
+
spinners.each() do |name,spinner|
|
99
|
+
@spinner = spinner
|
100
|
+
|
101
|
+
start_spin("Testing #{name.to_s().capitalize()} spinner")
|
102
|
+
|
103
|
+
1.upto(3) do |i|
|
104
|
+
sleep(1.1)
|
105
|
+
update_spin_detail(" (#{i}/3)")
|
106
|
+
end
|
107
|
+
|
108
|
+
stop_spin()
|
109
|
+
end
|
110
|
+
|
111
|
+
# Reset back to users'.
|
112
|
+
@spinner = app_spinner
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'down/net_http'
|
25
|
+
require 'tempfile'
|
26
|
+
require 'zip'
|
27
|
+
|
28
|
+
require 'nhkore/util'
|
29
|
+
|
30
|
+
|
31
|
+
module NHKore
|
32
|
+
module CLI
|
33
|
+
###
|
34
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
35
|
+
# @since 0.2.0
|
36
|
+
###
|
37
|
+
module GetCmd
|
38
|
+
DEFAULT_GET_CHUNK_SIZE = 4 * 1024
|
39
|
+
DEFAULT_GET_URL_LENGTH = 5_000_000 # Just a generous estimation used as a fallback; may be outdated
|
40
|
+
GET_URL_FILENAME = 'nhkore-core.zip'
|
41
|
+
GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}"
|
42
|
+
|
43
|
+
def build_get_cmd()
|
44
|
+
app = self
|
45
|
+
|
46
|
+
@get_cmd = @app_cmd.define_command() do
|
47
|
+
name 'get'
|
48
|
+
usage 'get [OPTIONS] [COMMAND]...'
|
49
|
+
aliases :g
|
50
|
+
summary "Download NHKore's pre-scraped files from the latest release (aliases: #{app.color_alias('g')})"
|
51
|
+
|
52
|
+
description <<-EOD
|
53
|
+
Download NHKore's pre-scraped files from the latest release &
|
54
|
+
save to folder: #{Util::CORE_DIR}
|
55
|
+
|
56
|
+
Note: the latest NHK articles may not have been scraped yet.
|
57
|
+
EOD
|
58
|
+
|
59
|
+
option :o,:out,'directory to save downloaded files to',argument: :required,default: Util::CORE_DIR,
|
60
|
+
transform: -> (value) do
|
61
|
+
app.check_empty_opt(:out,value)
|
62
|
+
end
|
63
|
+
flag nil,:'show-url','show download URL and exit (for downloading manually)' do |value,cmd|
|
64
|
+
puts GET_URL
|
65
|
+
exit
|
66
|
+
end
|
67
|
+
|
68
|
+
run do |opts,args,cmd|
|
69
|
+
app.refresh_cmd(opts,args,cmd)
|
70
|
+
app.run_get_cmd()
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def run_get_cmd()
|
76
|
+
build_out_dir(:out,default_dir: Util::CORE_DIR)
|
77
|
+
|
78
|
+
return unless check_out_dir(:out)
|
79
|
+
|
80
|
+
chunk_size = DEFAULT_GET_CHUNK_SIZE
|
81
|
+
down = nil
|
82
|
+
dry_run = @cmd_opts[:dry_run]
|
83
|
+
force = @cmd_opts[:force]
|
84
|
+
max_retries = @scraper_kargs[:max_retries]
|
85
|
+
max_retries = 3 if max_retries.nil?()
|
86
|
+
out_dir = @cmd_opts[:out]
|
87
|
+
|
88
|
+
begin
|
89
|
+
start_spin('Opening URL')
|
90
|
+
|
91
|
+
begin
|
92
|
+
down = Down::NetHttp.open(GET_URL,rewindable: false,**@scraper_kargs)
|
93
|
+
rescue Down::ConnectionError
|
94
|
+
raise if (max_retries -= 1) < 0
|
95
|
+
retry
|
96
|
+
end
|
97
|
+
|
98
|
+
stop_spin()
|
99
|
+
|
100
|
+
return if dry_run
|
101
|
+
|
102
|
+
Tempfile.create([App::NAME,'.zip'],binmode: true) do |file|
|
103
|
+
puts
|
104
|
+
puts 'Downloading to temp file:'
|
105
|
+
puts "> #{file.path}"
|
106
|
+
puts
|
107
|
+
|
108
|
+
len = down.size
|
109
|
+
len = DEFAULT_GET_LENGTH if len.nil?()
|
110
|
+
bar = build_progress_bar("Downloading #{GET_URL_FILENAME}",download: true,total: len)
|
111
|
+
|
112
|
+
bar.start()
|
113
|
+
|
114
|
+
while !down.eof?()
|
115
|
+
file.write(down.read(chunk_size))
|
116
|
+
bar.advance(chunk_size)
|
117
|
+
end
|
118
|
+
|
119
|
+
down.close()
|
120
|
+
file.close()
|
121
|
+
bar.finish()
|
122
|
+
|
123
|
+
start_spin("Extracting #{GET_URL_FILENAME}")
|
124
|
+
|
125
|
+
Zip.on_exists_proc = force # true will force overwriting files on extract()
|
126
|
+
|
127
|
+
Zip::File.open(file) do |zip_file|
|
128
|
+
zip_file.each() do |entry|
|
129
|
+
if !entry.name_safe?()
|
130
|
+
raise ZipError,"unsafe entry name[#{entry.name}] in Zip file"
|
131
|
+
end
|
132
|
+
|
133
|
+
name = File.basename(entry.name)
|
134
|
+
|
135
|
+
update_spin_detail(" (file=#{name})")
|
136
|
+
|
137
|
+
entry.extract(File.join(out_dir,name))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
stop_spin()
|
142
|
+
puts
|
143
|
+
|
144
|
+
puts "Extracted #{GET_URL_FILENAME} to directory:"
|
145
|
+
puts "> #{out_dir}"
|
146
|
+
end
|
147
|
+
ensure
|
148
|
+
down.close() if !down.nil?() && !down.closed?()
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|