nhkore 0.3.7 → 0.3.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,23 +1,11 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
@@ -33,192 +21,193 @@ require 'nhkore/util'
33
21
  module NHKore
34
22
  module CLI
35
23
  ###
36
- # @author Jonathan Bradley Whited (@esotericpig)
24
+ # @author Jonathan Bradley Whited
37
25
  # @since 0.2.0
38
26
  ###
39
27
  module SiftCmd
40
28
  DEFAULT_SIFT_EXT = :csv
41
29
  DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
42
30
  DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}"
43
- SIFT_EXTS = [:csv,:htm,:html,:json,:yaml,:yml]
44
-
31
+ SIFT_EXTS = %i[csv htm html json yaml yml].freeze
32
+
45
33
  attr_accessor :sift_datetime_text
46
34
  attr_accessor :sift_search_criteria
47
-
48
- def build_sift_cmd()
35
+
36
+ def build_sift_cmd
49
37
  app = self
50
-
38
+
51
39
  @sift_datetime_text = nil
52
40
  @sift_search_criteria = nil
53
-
54
- @sift_cmd = @app_cmd.define_command() do
41
+
42
+ @sift_cmd = @app_cmd.define_command do
55
43
  name 'sift'
56
44
  usage 'sift [OPTIONS] [COMMAND]...'
57
45
  aliases :s
58
- summary "Sift NHK News Web (Easy) articles data for the frequency of words (aliases: #{app.color_alias('s')})"
59
-
60
- description <<-EOD
46
+ summary 'Sift NHK News Web (Easy) articles data for the frequency of words' \
47
+ " (aliases: #{app.color_alias('s')})"
48
+
49
+ description(<<-DESC)
61
50
  Sift NHK News Web (Easy) articles data for the frequency of words &
62
51
  save to folder: #{Sifter::DEFAULT_DIR}
63
- EOD
64
-
65
- option :d,:datetime,<<-EOD,argument: :required,transform: -> (value) do
52
+ DESC
53
+
54
+ option :d,:datetime,<<-DESC,argument: :required,transform: lambda { |value|
66
55
  date time to filter on; examples:
67
56
  '2020-7-1 13:10...2020-7-31 11:11';
68
57
  '2020-12' (2020, December 1st-31st);
69
58
  '7-4...7-9' (July 4th-9th of Current Year);
70
59
  '7-9' (July 9th of Current Year);
71
60
  '9' (9th of Current Year & Month)
72
- EOD
61
+ DESC
73
62
  app.sift_datetime_text = value # Save the original value for the file name
74
-
63
+
75
64
  value = DatetimeParser.parse_range(value)
76
-
77
- app.check_empty_opt(:datetime,value) if value.nil?()
78
-
65
+
66
+ app.check_empty_opt(:datetime,value) if value.nil?
67
+
79
68
  value
80
- end
81
- option :e,:ext,<<-EOD,argument: :required,default: DEFAULT_SIFT_EXT,transform: -> (value) do
69
+ }
70
+ option :e,:ext,<<-DESC,argument: :required,default: DEFAULT_SIFT_EXT,transform: lambda { |value|
82
71
  type of file (extension) to save; valid options: [#{SIFT_EXTS.join(', ')}];
83
72
  not needed if you specify a file extension with the '--out' option: '--out sift.html'
84
- EOD
85
- value = Util.unspace_web_str(value).downcase().to_sym()
86
-
73
+ DESC
74
+ value = Util.unspace_web_str(value).downcase.to_sym
75
+
87
76
  raise CLIError,"invalid ext[#{value}] for option[#{ext}]" unless SIFT_EXTS.include?(value)
88
-
77
+
89
78
  value
90
- end
91
- option :i,:in,<<-EOD,argument: :required,transform: -> (value) do
79
+ }
80
+ option :i,:in,<<-DESC,argument: :required,transform: lambda { |value|
92
81
  file of NHK News Web (Easy) articles data to sift (see '#{App::NAME} news';
93
82
  defaults: #{YasashiiNews::DEFAULT_FILE}, #{FutsuuNews::DEFAULT_FILE})
94
- EOD
83
+ DESC
95
84
  app.check_empty_opt(:in,value)
96
- end
85
+ }
97
86
  flag :D,:'no-defn','do not output the definitions for words (which can be quite long)'
98
87
  flag :E,:'no-eng','do not output the English translations for words'
99
- option :o,:out,<<-EOD,argument: :required,transform: -> (value) do
88
+ option :o,:out,<<-DESC,argument: :required,transform: lambda { |value|
100
89
  'directory/file' to save sifted data to; if you only specify a directory or a file, it will attach
101
90
  the appropriate default directory/file name
102
91
  (defaults: #{DEFAULT_SIFT_YASASHII_FILE}, #{DEFAULT_SIFT_FUTSUU_FILE})
103
- EOD
92
+ DESC
104
93
  app.check_empty_opt(:out,value)
105
- end
106
- flag :H,'no-sha256',<<-EOD
94
+ }
95
+ flag :H,'no-sha256',<<-DESC
107
96
  if you used this option with the 'news' command, then you'll also need this option here
108
97
  to not fail on "duplicate" articles; see '#{App::NAME} news'
109
- EOD
98
+ DESC
110
99
  option :t,:title,'title to filter on, where search text only needs to be somewhere in the title',
111
100
  argument: :required
112
101
  option :u,:url,'URL to filter on, where search text only needs to be somewhere in the URL',
113
102
  argument: :required
114
-
103
+
115
104
  run do |opts,args,cmd|
116
105
  puts cmd.help
117
106
  end
118
107
  end
119
-
120
- @sift_easy_cmd = @sift_cmd.define_command() do
108
+
109
+ @sift_easy_cmd = @sift_cmd.define_command do
121
110
  name 'easy'
122
111
  usage 'easy [OPTIONS] [COMMAND]...'
123
112
  aliases :e,:ez
124
113
  summary "Sift NHK News Web Easy (Yasashii) articles data (aliases: #{app.color_alias('e ez')})"
125
-
126
- description <<-EOD
114
+
115
+ description <<-DESC
127
116
  Sift NHK News Web Easy (Yasashii) articles data for the frequency of words &
128
117
  save to file: #{DEFAULT_SIFT_YASASHII_FILE}
129
- EOD
130
-
118
+ DESC
119
+
131
120
  run do |opts,args,cmd|
132
121
  app.refresh_cmd(opts,args,cmd)
133
122
  app.run_sift_cmd(:yasashii)
134
123
  end
135
124
  end
136
-
137
- @sift_regular_cmd = @sift_cmd.define_command() do
125
+
126
+ @sift_regular_cmd = @sift_cmd.define_command do
138
127
  name 'regular'
139
128
  usage 'regular [OPTIONS] [COMMAND]...'
140
129
  aliases :r,:reg
141
130
  summary "Sift NHK News Web Regular (Futsuu) articles data (aliases: #{app.color_alias('r reg')})"
142
-
143
- description <<-EOD
131
+
132
+ description(<<-DESC)
144
133
  Sift NHK News Web Regular (Futsuu) articles data for the frequency of words &
145
134
  save to file: #{DEFAULT_SIFT_FUTSUU_FILE}
146
- EOD
147
-
135
+ DESC
136
+
148
137
  run do |opts,args,cmd|
149
138
  app.refresh_cmd(opts,args,cmd)
150
139
  app.run_sift_cmd(:futsuu)
151
140
  end
152
141
  end
153
142
  end
154
-
143
+
155
144
  def build_sift_filename(filename)
156
145
  @sift_search_criteria = []
157
-
158
- @sift_search_criteria << Util.strip_web_str(@sift_datetime_text.to_s())
159
- @sift_search_criteria << Util.strip_web_str(@cmd_opts[:title].to_s())
160
- @sift_search_criteria << Util.strip_web_str(@cmd_opts[:url].to_s())
161
- @sift_search_criteria.filter!() {|sc| !sc.empty?()}
162
-
163
- clean_regex = /[^[[:alnum:]]\-_\.]+/
164
- clean_search_criteria = ''.dup()
165
-
166
- @sift_search_criteria.each() do |sc|
146
+
147
+ @sift_search_criteria << Util.strip_web_str(@sift_datetime_text.to_s)
148
+ @sift_search_criteria << Util.strip_web_str(@cmd_opts[:title].to_s)
149
+ @sift_search_criteria << Util.strip_web_str(@cmd_opts[:url].to_s)
150
+ @sift_search_criteria.filter! { |sc| !sc.empty? }
151
+
152
+ clean_regex = /[^[[:alnum:]]\-_.]+/
153
+ clean_search_criteria = ''.dup
154
+
155
+ @sift_search_criteria.each do |sc|
167
156
  clean_search_criteria << sc.gsub(clean_regex,'')
168
157
  end
169
-
170
- @sift_search_criteria = @sift_search_criteria.empty?() ? nil : @sift_search_criteria.join(', ')
171
-
158
+
159
+ @sift_search_criteria = @sift_search_criteria.empty? ? nil : @sift_search_criteria.join(', ')
160
+
172
161
  # Limit the file name length.
173
162
  # If length is smaller, [..] still works appropriately.
174
163
  clean_search_criteria = clean_search_criteria[0..32]
175
-
176
- clean_search_criteria.prepend('_') unless clean_search_criteria.empty?()
177
-
164
+
165
+ clean_search_criteria.prepend('_') unless clean_search_criteria.empty?
166
+
178
167
  file_ext = @cmd_opts[:ext]
179
-
180
- if file_ext.nil?()
168
+
169
+ if file_ext.nil?
181
170
  # Try to get from '--out' if it exists.
182
- if !@cmd_opts[:out].nil?()
183
- file_ext = Util.unspace_web_str(File.extname(@cmd_opts[:out])).downcase()
171
+ if !@cmd_opts[:out].nil?
172
+ file_ext = Util.unspace_web_str(File.extname(@cmd_opts[:out])).downcase
184
173
  file_ext = file_ext.sub(/\A\./,'') # Remove '.'; can't be nil for to_sym()
185
- file_ext = file_ext.to_sym()
186
-
174
+ file_ext = file_ext.to_sym
175
+
187
176
  file_ext = nil unless SIFT_EXTS.include?(file_ext)
188
177
  end
189
-
190
- file_ext = DEFAULT_SIFT_EXT if file_ext.nil?()
178
+
179
+ file_ext = DEFAULT_SIFT_EXT if file_ext.nil?
191
180
  @cmd_opts[:ext] = file_ext
192
181
  end
193
-
182
+
194
183
  filename = "#{filename}#{clean_search_criteria}.#{file_ext}"
195
-
184
+
196
185
  return filename
197
186
  end
198
-
187
+
199
188
  def run_sift_cmd(type)
200
189
  news_name = nil
201
-
190
+
202
191
  case type
203
192
  when :futsuu
204
193
  build_in_file(:in,default_dir: News::DEFAULT_DIR,default_filename: FutsuuNews::DEFAULT_FILENAME)
205
194
  build_out_file(:out,default_dir: Sifter::DEFAULT_DIR,
206
195
  default_filename: build_sift_filename(Sifter::DEFAULT_FUTSUU_FILENAME))
207
-
196
+
208
197
  news_name = 'Regular'
209
198
  when :yasashii
210
199
  build_in_file(:in,default_dir: News::DEFAULT_DIR,default_filename: YasashiiNews::DEFAULT_FILENAME)
211
200
  build_out_file(:out,default_dir: Sifter::DEFAULT_DIR,
212
201
  default_filename: build_sift_filename(Sifter::DEFAULT_YASASHII_FILENAME))
213
-
202
+
214
203
  news_name = 'Easy'
215
204
  else
216
205
  raise ArgumentError,"invalid type[#{type}]"
217
206
  end
218
-
207
+
219
208
  return unless check_in_file(:in,empty_ok: false)
220
209
  return unless check_out_file(:out)
221
-
210
+
222
211
  datetime_filter = @cmd_opts[:datetime]
223
212
  dry_run = @cmd_opts[:dry_run]
224
213
  file_ext = @cmd_opts[:ext]
@@ -229,55 +218,55 @@ module CLI
229
218
  out_file = @cmd_opts[:out]
230
219
  title_filter = @cmd_opts[:title]
231
220
  url_filter = @cmd_opts[:url]
232
-
221
+
233
222
  start_spin("Sifting NHK News Web #{news_name} data")
234
-
223
+
235
224
  news = (type == :yasashii) ?
236
225
  YasashiiNews.load_file(in_file,overwrite: no_sha256) :
237
226
  FutsuuNews.load_file(in_file,overwrite: no_sha256)
238
-
227
+
239
228
  sifter = Sifter.new(news)
240
-
241
- sifter.filter_by_datetime(datetime_filter) unless datetime_filter.nil?()
242
- sifter.filter_by_title(title_filter) unless title_filter.nil?()
243
- sifter.filter_by_url(url_filter) unless url_filter.nil?()
229
+
230
+ sifter.filter_by_datetime(datetime_filter) unless datetime_filter.nil?
231
+ sifter.filter_by_title(title_filter) unless title_filter.nil?
232
+ sifter.filter_by_url(url_filter) unless url_filter.nil?
244
233
  sifter.ignore(:defn) if no_defn
245
234
  sifter.ignore(:eng) if no_eng
246
-
247
- sifter.caption = "NHK News Web #{news_name}".dup()
248
-
249
- if !@sift_search_criteria.nil?()
250
- if [:htm,:html].any?(file_ext)
251
- sifter.caption << " &mdash; #{Util.escape_html(@sift_search_criteria.to_s())}"
235
+
236
+ sifter.caption = "NHK News Web #{news_name}".dup
237
+
238
+ if !@sift_search_criteria.nil?
239
+ if %i[htm html].any?(file_ext)
240
+ sifter.caption << " &mdash; #{Util.escape_html(@sift_search_criteria.to_s)}"
252
241
  else
253
242
  sifter.caption << " -- #{@sift_search_criteria}"
254
243
  end
255
244
  end
256
-
245
+
257
246
  case file_ext
258
247
  when :csv
259
- sifter.put_csv!()
248
+ sifter.put_csv!
260
249
  when :htm,:html
261
- sifter.put_html!()
250
+ sifter.put_html!
262
251
  when :json
263
- sifter.put_json!()
252
+ sifter.put_json!
264
253
  when :yaml,:yml
265
- sifter.put_yaml!()
254
+ sifter.put_yaml!
266
255
  else
267
256
  raise ArgumentError,"invalid file ext[#{file_ext}]"
268
257
  end
269
-
270
- stop_spin()
258
+
259
+ stop_spin
271
260
  puts
272
-
261
+
273
262
  if dry_run
274
- puts sifter.to_s()
263
+ puts sifter.to_s
275
264
  else
276
265
  start_spin('Saving sifted data to file')
277
-
266
+
278
267
  sifter.save_file(out_file)
279
-
280
- stop_spin()
268
+
269
+ stop_spin
281
270
  puts "> #{out_file}"
282
271
  end
283
272
  end