nhkore 0.3.3 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,23 +1,11 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
@@ -30,122 +18,122 @@ require 'nhkore/util'
30
18
  module NHKore
31
19
  module CLI
32
20
  ###
33
- # @author Jonathan Bradley Whited (@esotericpig)
21
+ # @author Jonathan Bradley Whited
34
22
  # @since 0.3.0
35
23
  ###
36
24
  module SearchCmd
37
- def build_search_cmd()
25
+ def build_search_cmd
38
26
  app = self
39
-
40
- @search_cmd = @app_cmd.define_command() do
27
+
28
+ @search_cmd = @app_cmd.define_command do
41
29
  name 'search'
42
30
  usage 'search [OPTIONS] [COMMAND]...'
43
31
  aliases :se,:sea
44
32
  summary "Search for links to NHK News Web (Easy) (aliases: #{app.color_alias('se sea')})"
45
-
46
- description <<-EOD
33
+
34
+ description <<-DESC
47
35
  Search for links (using a Search Engine, etc.) to NHK News Web (Easy) &
48
36
  save to folder: #{SearchLinks::DEFAULT_DIR}
49
- EOD
50
-
51
- option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
37
+ DESC
38
+
39
+ option :i,:in,<<-DESC,argument: :required,transform: lambda { |value|
52
40
  file to read instead of URL (for offline testing and/or slow internet;
53
41
  see '--show-*' options)
54
- EOD
42
+ DESC
55
43
  app.check_empty_opt(:in,value)
56
- end
57
- option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
44
+ }
45
+ option :o,:out,<<-DESC,argument: :required,transform: lambda { |value|
58
46
  'directory/file' to save links to; if you only specify a directory or a file, it will attach the
59
47
  appropriate default directory/file name
60
48
  (defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE})
61
- EOD
49
+ DESC
62
50
  app.check_empty_opt(:out,value)
63
- end
51
+ }
64
52
  option :r,:results,'number of results per page to request from search',argument: :required,
65
- default: SearchScraper::DEFAULT_RESULT_COUNT,transform: -> (value) do
66
- value = value.to_i()
67
- value = 1 if value < 1
68
- value
69
- end
70
- option nil,:'show-count',<<-EOD
53
+ default: SearchScraper::DEFAULT_RESULT_COUNT,transform: lambda { |value|
54
+ value = value.to_i
55
+ value = 1 if value < 1
56
+ value
57
+ }
58
+ option nil,:'show-count',<<-DESC
71
59
  show the number of links scraped and exit;
72
60
  useful for manually writing/updating scripts (but not for use in a variable);
73
61
  implies '--dry-run' option
74
- EOD
75
- option nil,:'show-urls',<<-EOD
62
+ DESC
63
+ option nil,:'show-urls',<<-DESC
76
64
  show the URLs -- if any -- used when searching & scraping and exit;
77
65
  you can download these for offline testing and/or slow internet
78
66
  (see '--in' option)
79
- EOD
80
-
67
+ DESC
68
+
81
69
  run do |opts,args,cmd|
82
- opts.each() do |key,value|
83
- key = key.to_s()
84
-
70
+ opts.each do |key,value|
71
+ key = key.to_s
72
+
85
73
  if key.include?('show')
86
- raise CLIError.new("must specify a sub command for option[#{key}]")
74
+ raise CLIError,"must specify a sub command for option[#{key}]"
87
75
  end
88
76
  end
89
-
77
+
90
78
  puts cmd.help
91
79
  end
92
80
  end
93
-
94
- @search_easy_cmd = @search_cmd.define_command() do
81
+
82
+ @search_easy_cmd = @search_cmd.define_command do
95
83
  name 'easy'
96
84
  usage 'easy [OPTIONS] [COMMAND]...'
97
85
  aliases :e,:ez
98
86
  summary "Search for NHK News Web Easy (Yasashii) links (aliases: #{app.color_alias('e ez')})"
99
-
100
- description <<-EOD
87
+
88
+ description <<-DESC
101
89
  Search for NHK News Web Easy (Yasashii) links &
102
90
  save to file: #{SearchLinks::DEFAULT_YASASHII_FILE}
103
- EOD
104
-
91
+ DESC
92
+
105
93
  run do |opts,args,cmd|
106
94
  app.refresh_cmd(opts,args,cmd)
107
- app.run_search_help()
95
+ app.run_search_help
108
96
  end
109
97
  end
110
-
111
- @search_regular_cmd = @search_cmd.define_command() do
98
+
99
+ @search_regular_cmd = @search_cmd.define_command do
112
100
  name 'regular'
113
101
  usage 'regular [OPTIONS] [COMMAND]...'
114
102
  aliases :r,:reg
115
103
  summary "Search for NHK News Web Regular (Futsuu) links (aliases: #{app.color_alias('r reg')})"
116
-
117
- description <<-EOD
104
+
105
+ description <<-DESC
118
106
  Search for NHK News Web Regular (Futsuu) links &
119
107
  save to file: #{SearchLinks::DEFAULT_FUTSUU_FILE}
120
- EOD
121
-
108
+ DESC
109
+
122
110
  run do |opts,args,cmd|
123
111
  app.refresh_cmd(opts,args,cmd)
124
- app.run_search_help()
112
+ app.run_search_help
125
113
  end
126
114
  end
127
-
128
- @search_bing_cmd = Cri::Command.define() do
115
+
116
+ @search_bing_cmd = Cri::Command.define do
129
117
  name 'bing'
130
118
  usage 'bing [OPTIONS] [COMMAND]...'
131
119
  aliases :b
132
120
  summary "Search bing.com for links (aliases: #{app.color_alias('b')})"
133
-
134
- description <<-EOD
121
+
122
+ description <<-DESC
135
123
  Search bing.com for links & save to folder: #{SearchLinks::DEFAULT_DIR}
136
- EOD
137
-
124
+ DESC
125
+
138
126
  run do |opts,args,cmd|
139
127
  app.refresh_cmd(opts,args,cmd)
140
- app.run_search_cmd(cmd.supercommand.name.to_sym(),:bing)
128
+ app.run_search_cmd(cmd.supercommand.name.to_sym,:bing)
141
129
  end
142
130
  end
143
-
131
+
144
132
  # dup()/clone() must be called for `cmd.supercommand` to work appropriately.
145
- @search_easy_cmd.add_command @search_bing_cmd.dup()
146
- @search_regular_cmd.add_command @search_bing_cmd.dup()
133
+ @search_easy_cmd.add_command @search_bing_cmd.dup
134
+ @search_regular_cmd.add_command @search_bing_cmd.dup
147
135
  end
148
-
136
+
149
137
  def run_search_cmd(nhk_type,search_type)
150
138
  case nhk_type
151
139
  when :easy
@@ -153,139 +141,141 @@ module CLI
153
141
  when :regular
154
142
  nhk_type = :futsuu
155
143
  end
156
-
144
+
157
145
  return if show_search_urls(search_type)
158
-
146
+
159
147
  @cmd_opts[:dry_run] = true if @cmd_opts[:show_count]
160
-
148
+
161
149
  build_in_file(:in)
162
-
150
+
163
151
  case nhk_type
164
152
  when :futsuu
165
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
153
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,
154
+ default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME)
166
155
  when :yasashii
167
- build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
156
+ build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR,
157
+ default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME)
168
158
  else
169
159
  raise ArgumentError,"invalid nhk_type[#{nhk_type}]"
170
160
  end
171
-
161
+
172
162
  return unless check_in_file(:in,empty_ok: true)
173
163
  return unless check_out_file(:out)
174
-
164
+
175
165
  dry_run = @cmd_opts[:dry_run]
176
166
  in_file = @cmd_opts[:in]
177
167
  out_file = @cmd_opts[:out]
178
168
  result_count = @cmd_opts[:results]
179
- result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?()
169
+ result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?
180
170
  show_count = @cmd_opts[:show_count]
181
-
171
+
182
172
  start_spin("Scraping #{search_type}") unless show_count
183
-
184
- is_file = !in_file.nil?()
173
+
174
+ is_file = !in_file.nil?
185
175
  links = nil
186
176
  new_links = [] # For --dry-run
187
- next_page = NextPage.new()
177
+ next_page = NextPage.new
188
178
  page_count = 0
189
179
  page_num = 1
190
180
  url = in_file # nil will use default URL, else a file
191
-
181
+
192
182
  # Load previous links for 'scraped?' vars.
193
183
  if File.exist?(out_file)
194
184
  links = SearchLinks.load_file(out_file)
195
185
  else
196
- links = SearchLinks.new()
186
+ links = SearchLinks.new
197
187
  end
198
-
188
+
199
189
  links_count = links.length
200
-
190
+
201
191
  if show_count
202
192
  scraped_count = 0
203
-
204
- links.links.values.each() do |link|
205
- scraped_count += 1 if link.scraped?()
193
+
194
+ links.links.each_value do |link|
195
+ scraped_count += 1 if link.scraped?
206
196
  end
207
-
197
+
208
198
  puts "#{scraped_count} of #{links_count} links scraped."
209
-
199
+
210
200
  return
211
201
  end
212
-
213
- range = (0..10000) # Do a range to prevent an infinite loop; ichiman!
214
-
202
+
203
+ range = (0..10_000) # Do a range to prevent an infinite loop; ichiman!
204
+
215
205
  case search_type
216
206
  # Anything that extends SearchScraper.
217
207
  when :bing
218
- range.each() do
208
+ range.each do
219
209
  scraper = nil
220
-
210
+
221
211
  case search_type
222
212
  when :bing
223
213
  scraper = BingScraper.new(nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
224
214
  else
225
215
  raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
226
216
  end
227
-
217
+
228
218
  next_page = scraper.scrape(links,next_page)
229
-
219
+
230
220
  new_links.concat(links.links.values[links_count..-1])
231
221
  links_count = links.length
232
222
  page_count = next_page.count if next_page.count > 0
233
-
234
- update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}, " +
235
- "new_links=#{new_links.length})")
236
-
237
- break if next_page.empty?()
238
-
223
+
224
+ update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}," \
225
+ " new_links=#{new_links.length})")
226
+
227
+ break if next_page.empty?
228
+
239
229
  page_num += 1
240
230
  url = next_page.url
241
-
242
- sleep_scraper()
231
+
232
+ sleep_scraper
243
233
  end
244
234
  else
245
235
  raise ArgumentError,"invalid search_type[#{search_type}]"
246
236
  end
247
-
248
- stop_spin()
237
+
238
+ stop_spin
249
239
  puts
250
240
  puts 'Last URL scraped:'
251
241
  puts "> #{url}"
252
242
  puts
253
-
243
+
254
244
  if dry_run
255
- new_links.each() do |link|
245
+ new_links.each do |link|
256
246
  puts link.to_s(mini: true)
257
247
  end
258
248
  else
259
249
  links.save_file(out_file)
260
-
250
+
261
251
  puts 'Saved scraped links to file:'
262
252
  puts "> #{out_file}"
263
253
  end
264
254
  end
265
-
266
- def run_search_help()
255
+
256
+ def run_search_help
267
257
  if @cmd_opts[:show_count] || @cmd_opts[:show_urls]
268
- run_search_cmd(@cmd.name.to_sym(),nil)
258
+ run_search_cmd(@cmd.name.to_sym,nil)
269
259
  else
270
260
  puts @cmd.help
271
261
  end
272
262
  end
273
-
263
+
274
264
  def show_search_urls(search_type)
275
265
  return false unless @cmd_opts[:show_urls]
276
-
266
+
277
267
  count = @cmd_opts[:results]
278
- count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil?()
279
-
268
+ count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil?
269
+
280
270
  case search_type
281
271
  when :bing
282
272
  puts 'Bing:'
283
273
  puts "> Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE,count: count)}"
284
274
  puts "> Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE,count: count)}"
285
275
  else
286
- raise CLIError.new('must specify a sub command for option[show-urls]')
276
+ raise CLIError,'must specify a sub command for option[show-urls]'
287
277
  end
288
-
278
+
289
279
  return true
290
280
  end
291
281
  end
@@ -1,29 +1,18 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
24
12
  require 'date'
25
13
  require 'time'
26
14
 
15
+ require 'nhkore/datetime_parser'
27
16
  require 'nhkore/news'
28
17
  require 'nhkore/sifter'
29
18
  require 'nhkore/util'
@@ -32,298 +21,193 @@ require 'nhkore/util'
32
21
  module NHKore
33
22
  module CLI
34
23
  ###
35
- # @author Jonathan Bradley Whited (@esotericpig)
24
+ # @author Jonathan Bradley Whited
36
25
  # @since 0.2.0
37
26
  ###
38
27
  module SiftCmd
39
28
  DEFAULT_SIFT_EXT = :csv
40
29
  DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
41
30
  DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}"
42
- SIFT_EXTS = [:csv,:htm,:html,:json,:yaml,:yml]
43
-
44
- # Order matters.
45
- SIFT_DATETIME_FMTS = [
46
- '%Y-%m-%d %H:%M',
47
- '%Y-%m-%d %H',
48
- '%Y-%m-%d',
49
- '%m-%d %H:%M',
50
- '%Y-%m %H:%M',
51
- '%m-%d %H',
52
- '%Y-%m %H',
53
- '%m-%d',
54
- '%Y-%m',
55
- '%d %H:%M',
56
- '%y %H:%M',
57
- '%d %H',
58
- '%Y %H',
59
- '%H:%M',
60
- '%d',
61
- '%Y'
62
- ]
63
-
31
+ SIFT_EXTS = %i[csv htm html json yaml yml].freeze
32
+
64
33
  attr_accessor :sift_datetime_text
65
34
  attr_accessor :sift_search_criteria
66
-
67
- def build_sift_cmd()
35
+
36
+ def build_sift_cmd
68
37
  app = self
69
-
38
+
70
39
  @sift_datetime_text = nil
71
40
  @sift_search_criteria = nil
72
-
73
- @sift_cmd = @app_cmd.define_command() do
41
+
42
+ @sift_cmd = @app_cmd.define_command do
74
43
  name 'sift'
75
44
  usage 'sift [OPTIONS] [COMMAND]...'
76
45
  aliases :s
77
- summary "Sift NHK News Web (Easy) articles data for the frequency of words (aliases: #{app.color_alias('s')})"
78
-
79
- description <<-EOD
46
+ summary 'Sift NHK News Web (Easy) articles data for the frequency of words' \
47
+ " (aliases: #{app.color_alias('s')})"
48
+
49
+ description(<<-DESC)
80
50
  Sift NHK News Web (Easy) articles data for the frequency of words &
81
51
  save to folder: #{Sifter::DEFAULT_DIR}
82
- EOD
83
-
84
- option :d,:datetime,<<-EOD,argument: :required,transform: -> (value) do
52
+ DESC
53
+
54
+ option :d,:datetime,<<-DESC,argument: :required,transform: lambda { |value|
85
55
  date time to filter on; examples:
86
56
  '2020-7-1 13:10...2020-7-31 11:11';
87
57
  '2020-12' (2020, December 1st-31st);
88
58
  '7-4...7-9' (July 4th-9th of Current Year);
89
59
  '7-9' (July 9th of Current Year);
90
60
  '9' (9th of Current Year & Month)
91
- EOD
61
+ DESC
92
62
  app.sift_datetime_text = value # Save the original value for the file name
93
- value = app.parse_sift_datetime(value)
63
+
64
+ value = DatetimeParser.parse_range(value)
65
+
66
+ app.check_empty_opt(:datetime,value) if value.nil?
67
+
94
68
  value
95
- end
96
- option :e,:ext,<<-EOD,argument: :required,default: DEFAULT_SIFT_EXT,transform: -> (value) do
69
+ }
70
+ option :e,:ext,<<-DESC,argument: :required,default: DEFAULT_SIFT_EXT,transform: lambda { |value|
97
71
  type of file (extension) to save; valid options: [#{SIFT_EXTS.join(', ')}];
98
72
  not needed if you specify a file extension with the '--out' option: '--out sift.html'
99
- EOD
100
- value = Util.unspace_web_str(value).downcase().to_sym()
101
-
73
+ DESC
74
+ value = Util.unspace_web_str(value).downcase.to_sym
75
+
102
76
  raise CLIError,"invalid ext[#{value}] for option[#{ext}]" unless SIFT_EXTS.include?(value)
103
-
77
+
104
78
  value
105
- end
106
- option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
79
+ }
80
+ option :i,:in,<<-DESC,argument: :required,transform: lambda { |value|
107
81
  file of NHK News Web (Easy) articles data to sift (see '#{App::NAME} news';
108
82
  defaults: #{YasashiiNews::DEFAULT_FILE}, #{FutsuuNews::DEFAULT_FILE})
109
- EOD
83
+ DESC
110
84
  app.check_empty_opt(:in,value)
111
- end
85
+ }
112
86
  flag :D,:'no-defn','do not output the definitions for words (which can be quite long)'
113
87
  flag :E,:'no-eng','do not output the English translations for words'
114
- option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
88
+ option :o,:out,<<-DESC,argument: :required,transform: lambda { |value|
115
89
  'directory/file' to save sifted data to; if you only specify a directory or a file, it will attach
116
90
  the appropriate default directory/file name
117
91
  (defaults: #{DEFAULT_SIFT_YASASHII_FILE}, #{DEFAULT_SIFT_FUTSUU_FILE})
118
- EOD
92
+ DESC
119
93
  app.check_empty_opt(:out,value)
120
- end
121
- flag :H,'no-sha256',<<-EOD
94
+ }
95
+ flag :H,'no-sha256',<<-DESC
122
96
  if you used this option with the 'news' command, then you'll also need this option here
123
97
  to not fail on "duplicate" articles; see '#{App::NAME} news'
124
- EOD
98
+ DESC
125
99
  option :t,:title,'title to filter on, where search text only needs to be somewhere in the title',
126
100
  argument: :required
127
101
  option :u,:url,'URL to filter on, where search text only needs to be somewhere in the URL',
128
102
  argument: :required
129
-
103
+
130
104
  run do |opts,args,cmd|
131
105
  puts cmd.help
132
106
  end
133
107
  end
134
-
135
- @sift_easy_cmd = @sift_cmd.define_command() do
108
+
109
+ @sift_easy_cmd = @sift_cmd.define_command do
136
110
  name 'easy'
137
111
  usage 'easy [OPTIONS] [COMMAND]...'
138
112
  aliases :e,:ez
139
113
  summary "Sift NHK News Web Easy (Yasashii) articles data (aliases: #{app.color_alias('e ez')})"
140
-
141
- description <<-EOD
114
+
115
+ description <<-DESC
142
116
  Sift NHK News Web Easy (Yasashii) articles data for the frequency of words &
143
117
  save to file: #{DEFAULT_SIFT_YASASHII_FILE}
144
- EOD
145
-
118
+ DESC
119
+
146
120
  run do |opts,args,cmd|
147
121
  app.refresh_cmd(opts,args,cmd)
148
122
  app.run_sift_cmd(:yasashii)
149
123
  end
150
124
  end
151
-
152
- @sift_regular_cmd = @sift_cmd.define_command() do
125
+
126
+ @sift_regular_cmd = @sift_cmd.define_command do
153
127
  name 'regular'
154
128
  usage 'regular [OPTIONS] [COMMAND]...'
155
129
  aliases :r,:reg
156
130
  summary "Sift NHK News Web Regular (Futsuu) articles data (aliases: #{app.color_alias('r reg')})"
157
-
158
- description <<-EOD
131
+
132
+ description(<<-DESC)
159
133
  Sift NHK News Web Regular (Futsuu) articles data for the frequency of words &
160
134
  save to file: #{DEFAULT_SIFT_FUTSUU_FILE}
161
- EOD
162
-
135
+ DESC
136
+
163
137
  run do |opts,args,cmd|
164
138
  app.refresh_cmd(opts,args,cmd)
165
139
  app.run_sift_cmd(:futsuu)
166
140
  end
167
141
  end
168
142
  end
169
-
143
+
170
144
  def build_sift_filename(filename)
171
145
  @sift_search_criteria = []
172
-
173
- @sift_search_criteria << Util.strip_web_str(@sift_datetime_text.to_s())
174
- @sift_search_criteria << Util.strip_web_str(@cmd_opts[:title].to_s())
175
- @sift_search_criteria << Util.strip_web_str(@cmd_opts[:url].to_s())
176
- @sift_search_criteria.filter!() {|sc| !sc.empty?()}
177
-
178
- clean_regex = /[^[[:alnum:]]\-_\.]+/
179
- clean_search_criteria = ''.dup()
180
-
181
- @sift_search_criteria.each() do |sc|
146
+
147
+ @sift_search_criteria << Util.strip_web_str(@sift_datetime_text.to_s)
148
+ @sift_search_criteria << Util.strip_web_str(@cmd_opts[:title].to_s)
149
+ @sift_search_criteria << Util.strip_web_str(@cmd_opts[:url].to_s)
150
+ @sift_search_criteria.filter! { |sc| !sc.empty? }
151
+
152
+ clean_regex = /[^[[:alnum:]]\-_.]+/
153
+ clean_search_criteria = ''.dup
154
+
155
+ @sift_search_criteria.each do |sc|
182
156
  clean_search_criteria << sc.gsub(clean_regex,'')
183
157
  end
184
-
185
- @sift_search_criteria = @sift_search_criteria.empty?() ? nil : @sift_search_criteria.join(', ')
186
-
158
+
159
+ @sift_search_criteria = @sift_search_criteria.empty? ? nil : @sift_search_criteria.join(', ')
160
+
187
161
  # Limit the file name length.
188
162
  # If length is smaller, [..] still works appropriately.
189
163
  clean_search_criteria = clean_search_criteria[0..32]
190
-
191
- clean_search_criteria.prepend('_') unless clean_search_criteria.empty?()
192
-
164
+
165
+ clean_search_criteria.prepend('_') unless clean_search_criteria.empty?
166
+
193
167
  file_ext = @cmd_opts[:ext]
194
-
195
- if file_ext.nil?()
168
+
169
+ if file_ext.nil?
196
170
  # Try to get from '--out' if it exists.
197
- if !@cmd_opts[:out].nil?()
198
- file_ext = Util.unspace_web_str(File.extname(@cmd_opts[:out])).downcase()
171
+ if !@cmd_opts[:out].nil?
172
+ file_ext = Util.unspace_web_str(File.extname(@cmd_opts[:out])).downcase
199
173
  file_ext = file_ext.sub(/\A\./,'') # Remove '.'; can't be nil for to_sym()
200
- file_ext = file_ext.to_sym()
201
-
174
+ file_ext = file_ext.to_sym
175
+
202
176
  file_ext = nil unless SIFT_EXTS.include?(file_ext)
203
177
  end
204
-
205
- file_ext = DEFAULT_SIFT_EXT if file_ext.nil?()
178
+
179
+ file_ext = DEFAULT_SIFT_EXT if file_ext.nil?
206
180
  @cmd_opts[:ext] = file_ext
207
181
  end
208
-
182
+
209
183
  filename = "#{filename}#{clean_search_criteria}.#{file_ext}"
210
-
184
+
211
185
  return filename
212
186
  end
213
-
214
- # TODO: This should probably be moved into its own class, into Util, or into Sifter?
215
- def parse_sift_datetime(value)
216
- value = Util.reduce_space(value).strip() # Don't use unspace_web_str(), want spaces for formats
217
- value = value.split('...',2)
218
-
219
- check_empty_opt(:datetime,nil) if value.empty?() # For ''
220
-
221
- # Make a "to" and a "from" date time range.
222
- value << value[0].dup() if value.length == 1
223
-
224
- to_day = nil
225
- to_hour = 23
226
- to_minute = 59
227
- to_month = 12
228
- to_year = Util::MAX_SANE_YEAR
229
-
230
- value.each_with_index() do |v,i|
231
- v = check_empty_opt(:datetime,v) # For '...', '12-25...', or '...12-25'
232
-
233
- has_day = false
234
- has_hour = false
235
- has_minute = false
236
- has_month = false
237
- has_year = false
238
-
239
- SIFT_DATETIME_FMTS.each_with_index() do |fmt,i|
240
- begin
241
- # If don't do this, "%d" values will be parsed using "%d %H".
242
- # It seems as though strptime() ignores space.
243
- raise ArgumentError if !v.include?(' ') && fmt.include?(' ')
244
-
245
- # If don't do this, "%y" values will be parsed using "%d".
246
- raise ArgumentError if fmt == '%d' && v.length > 2
247
-
248
- v = Time.strptime(v,fmt,&Util.method(:guess_year))
249
-
250
- has_day = fmt.include?('%d')
251
- has_hour = fmt.include?('%H')
252
- has_minute = fmt.include?('%M')
253
- has_month = fmt.include?('%m')
254
- has_year = fmt.include?('%Y')
255
-
256
- break # No problem; this format worked
257
- rescue ArgumentError
258
- # Out of formats.
259
- raise if i >= (SIFT_DATETIME_FMTS.length - 1)
260
- end
261
- end
262
-
263
- # "From" date time.
264
- if i == 0
265
- # Set these so that "2012-7-4...7-9" will use the appropriate year
266
- # of "2012" for "7-9".
267
- to_day = v.day if has_day
268
- to_hour = v.hour if has_hour
269
- to_minute = v.min if has_minute
270
- to_month = v.month if has_month
271
- to_year = v.year if has_year
272
-
273
- v = Time.new(
274
- has_year ? v.year : Util::MIN_SANE_YEAR,
275
- has_month ? v.month : 1,
276
- has_day ? v.day : 1,
277
- has_hour ? v.hour : 0,
278
- has_minute ? v.min : 0
279
- )
280
- # "To" date time.
281
- else
282
- to_hour = v.hour if has_hour
283
- to_minute = v.min if has_minute
284
- to_month = v.month if has_month
285
- to_year = v.year if has_year
286
-
287
- if has_day
288
- to_day = v.day
289
- # Nothing passed from the "from" date time?
290
- elsif to_day.nil?()
291
- # Last day of month.
292
- to_day = Date.new(to_year,to_month,-1).day
293
- end
294
-
295
- v = Time.new(to_year,to_month,to_day,to_hour,to_minute)
296
- end
297
-
298
- value[i] = v
299
- end
300
-
301
- return value
302
- end
303
-
187
+
304
188
  def run_sift_cmd(type)
305
189
  news_name = nil
306
-
190
+
307
191
  case type
308
192
  when :futsuu
309
193
  build_in_file(:in,default_dir: News::DEFAULT_DIR,default_filename: FutsuuNews::DEFAULT_FILENAME)
310
194
  build_out_file(:out,default_dir: Sifter::DEFAULT_DIR,
311
195
  default_filename: build_sift_filename(Sifter::DEFAULT_FUTSUU_FILENAME))
312
-
196
+
313
197
  news_name = 'Regular'
314
198
  when :yasashii
315
199
  build_in_file(:in,default_dir: News::DEFAULT_DIR,default_filename: YasashiiNews::DEFAULT_FILENAME)
316
200
  build_out_file(:out,default_dir: Sifter::DEFAULT_DIR,
317
201
  default_filename: build_sift_filename(Sifter::DEFAULT_YASASHII_FILENAME))
318
-
202
+
319
203
  news_name = 'Easy'
320
204
  else
321
205
  raise ArgumentError,"invalid type[#{type}]"
322
206
  end
323
-
207
+
324
208
  return unless check_in_file(:in,empty_ok: false)
325
209
  return unless check_out_file(:out)
326
-
210
+
327
211
  datetime_filter = @cmd_opts[:datetime]
328
212
  dry_run = @cmd_opts[:dry_run]
329
213
  file_ext = @cmd_opts[:ext]
@@ -334,55 +218,55 @@ module CLI
334
218
  out_file = @cmd_opts[:out]
335
219
  title_filter = @cmd_opts[:title]
336
220
  url_filter = @cmd_opts[:url]
337
-
221
+
338
222
  start_spin("Sifting NHK News Web #{news_name} data")
339
-
223
+
340
224
  news = (type == :yasashii) ?
341
225
  YasashiiNews.load_file(in_file,overwrite: no_sha256) :
342
226
  FutsuuNews.load_file(in_file,overwrite: no_sha256)
343
-
227
+
344
228
  sifter = Sifter.new(news)
345
-
346
- sifter.filter_by_datetime(datetime_filter) unless datetime_filter.nil?()
347
- sifter.filter_by_title(title_filter) unless title_filter.nil?()
348
- sifter.filter_by_url(url_filter) unless url_filter.nil?()
229
+
230
+ sifter.filter_by_datetime(datetime_filter) unless datetime_filter.nil?
231
+ sifter.filter_by_title(title_filter) unless title_filter.nil?
232
+ sifter.filter_by_url(url_filter) unless url_filter.nil?
349
233
  sifter.ignore(:defn) if no_defn
350
234
  sifter.ignore(:eng) if no_eng
351
-
352
- sifter.caption = "NHK News Web #{news_name}".dup()
353
-
354
- if !@sift_search_criteria.nil?()
355
- if [:htm,:html].any?(file_ext)
356
- sifter.caption << " &mdash; #{Util.escape_html(@sift_search_criteria.to_s())}"
235
+
236
+ sifter.caption = "NHK News Web #{news_name}".dup
237
+
238
+ if !@sift_search_criteria.nil?
239
+ if %i[htm html].any?(file_ext)
240
+ sifter.caption << " &mdash; #{Util.escape_html(@sift_search_criteria.to_s)}"
357
241
  else
358
242
  sifter.caption << " -- #{@sift_search_criteria}"
359
243
  end
360
244
  end
361
-
245
+
362
246
  case file_ext
363
247
  when :csv
364
- sifter.put_csv!()
248
+ sifter.put_csv!
365
249
  when :htm,:html
366
- sifter.put_html!()
250
+ sifter.put_html!
367
251
  when :json
368
- sifter.put_json!()
252
+ sifter.put_json!
369
253
  when :yaml,:yml
370
- sifter.put_yaml!()
254
+ sifter.put_yaml!
371
255
  else
372
256
  raise ArgumentError,"invalid file ext[#{file_ext}]"
373
257
  end
374
-
375
- stop_spin()
258
+
259
+ stop_spin
376
260
  puts
377
-
261
+
378
262
  if dry_run
379
- puts sifter.to_s()
263
+ puts sifter.to_s
380
264
  else
381
265
  start_spin('Saving sifted data to file')
382
-
266
+
383
267
  sifter.save_file(out_file)
384
-
385
- stop_spin()
268
+
269
+ stop_spin
386
270
  puts "> #{out_file}"
387
271
  end
388
272
  end