nhkore 0.3.17 → 0.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/CHANGELOG.md +24 -1
- data/Gemfile +14 -1
- data/Gemfile.lock +29 -29
- data/README.md +2 -7
- data/Rakefile +19 -52
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +41 -46
- data/lib/nhkore/article.rb +9 -11
- data/lib/nhkore/article_scraper.rb +30 -29
- data/lib/nhkore/cleaner.rb +1 -3
- data/lib/nhkore/cli/fx_cmd.rb +17 -22
- data/lib/nhkore/cli/get_cmd.rb +5 -7
- data/lib/nhkore/cli/news_cmd.rb +14 -19
- data/lib/nhkore/cli/search_cmd.rb +11 -14
- data/lib/nhkore/cli/sift_cmd.rb +13 -15
- data/lib/nhkore/datetime_parser.rb +35 -37
- data/lib/nhkore/defn.rb +2 -4
- data/lib/nhkore/dict.rb +1 -3
- data/lib/nhkore/dict_scraper.rb +1 -3
- data/lib/nhkore/entry.rb +1 -3
- data/lib/nhkore/error.rb +1 -2
- data/lib/nhkore/fileable.rb +1 -2
- data/lib/nhkore/lib.rb +5 -12
- data/lib/nhkore/missingno.rb +1 -3
- data/lib/nhkore/news.rb +7 -10
- data/lib/nhkore/polisher.rb +1 -3
- data/lib/nhkore/scraper.rb +23 -13
- data/lib/nhkore/search_link.rb +11 -13
- data/lib/nhkore/search_scraper.rb +26 -15
- data/lib/nhkore/sifter.rb +7 -9
- data/lib/nhkore/splitter.rb +1 -3
- data/lib/nhkore/util.rb +8 -8
- data/lib/nhkore/variator.rb +3 -4
- data/lib/nhkore/version.rb +2 -3
- data/lib/nhkore/word.rb +8 -10
- data/lib/nhkore.rb +3 -11
- data/nhkore.gemspec +41 -47
- data/samples/looper.rb +1 -2
- data/test/nhkore/test_helper.rb +1 -8
- data/test/nhkore_test.rb +5 -9
- metadata +55 -139
- data/lib/nhkore/user_agents.rb +0 -1172
- data/yard/templates/default/layout/html/footer.erb +0 -5
@@ -3,12 +3,11 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'attr_bool'
|
13
12
|
require 'digest'
|
14
13
|
|
@@ -24,7 +23,6 @@ require 'nhkore/util'
|
|
24
23
|
require 'nhkore/variator'
|
25
24
|
require 'nhkore/word'
|
26
25
|
|
27
|
-
|
28
26
|
module NHKore
|
29
27
|
class ArticleScraper < Scraper
|
30
28
|
extend AttrBool::Ext
|
@@ -47,8 +45,8 @@ module NHKore
|
|
47
45
|
# instead of raising an error
|
48
46
|
# @param strict [true,false]
|
49
47
|
def initialize(url,cleaners: [BestCleaner.new],datetime: nil,dict: :scrape,missingno: nil,
|
50
|
-
|
51
|
-
|
48
|
+
polishers: [BestPolisher.new],splitter: BestSplitter.new,strict: true,
|
49
|
+
variators: [BestVariator.new],year: nil,**kargs)
|
52
50
|
super(url,**kargs)
|
53
51
|
|
54
52
|
@cleaners = Array(cleaners)
|
@@ -179,13 +177,13 @@ module NHKore
|
|
179
177
|
|
180
178
|
def scrape_content(doc,article)
|
181
179
|
tag = doc.css('div#js-article-body')
|
182
|
-
tag = doc.css('div.article-main__body') if tag.
|
183
|
-
tag = doc.css('div.article-body') if tag.
|
180
|
+
tag = doc.css('div.article-main__body') if tag.empty?
|
181
|
+
tag = doc.css('div.article-body') if tag.empty?
|
184
182
|
|
185
183
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
186
|
-
tag = doc.css('div#main') if tag.
|
184
|
+
tag = doc.css('div#main') if tag.empty? && !@strict
|
187
185
|
|
188
|
-
if tag.
|
186
|
+
if !tag.empty?
|
189
187
|
text = Util.unspace_web_str(tag.text.to_s)
|
190
188
|
|
191
189
|
if !text.empty?
|
@@ -202,14 +200,14 @@ module NHKore
|
|
202
200
|
raise ScrapeError,"could not scrape content at URL[#{@url}]"
|
203
201
|
end
|
204
202
|
|
205
|
-
def scrape_datetime(doc,futsuurl=nil)
|
203
|
+
def scrape_datetime(doc,futsuurl = nil)
|
206
204
|
year = scrape_year(doc,futsuurl)
|
207
205
|
|
208
206
|
# First, try with the id.
|
209
207
|
tag_name = 'p#js-article-date'
|
210
208
|
tag = doc.css(tag_name)
|
211
209
|
|
212
|
-
if tag.
|
210
|
+
if !tag.empty?
|
213
211
|
tag_text = tag[0].text
|
214
212
|
|
215
213
|
begin
|
@@ -226,7 +224,7 @@ module NHKore
|
|
226
224
|
tag_name = 'p.article-main__date'
|
227
225
|
tag = doc.css(tag_name)
|
228
226
|
|
229
|
-
if tag.
|
227
|
+
if !tag.empty?
|
230
228
|
tag_text = tag[0].text
|
231
229
|
|
232
230
|
begin
|
@@ -244,10 +242,10 @@ module NHKore
|
|
244
242
|
# - 'news20170331_k10010922481000'
|
245
243
|
tag = doc.css('body')
|
246
244
|
|
247
|
-
if tag.
|
245
|
+
if !tag.empty?
|
248
246
|
tag_id = tag[0]['id'].to_s.split('_',2)
|
249
247
|
|
250
|
-
if tag_id.
|
248
|
+
if !tag_id.empty?
|
251
249
|
tag_id = tag_id[0].gsub(/[^[[:digit:]]]+/,'')
|
252
250
|
|
253
251
|
if tag_id.length == 8
|
@@ -272,8 +270,8 @@ module NHKore
|
|
272
270
|
|
273
271
|
begin
|
274
272
|
scraper = DictScraper.new(dict_url,missingno: @missingno,parse_url: false,**@kargs)
|
275
|
-
rescue
|
276
|
-
if retries == 0
|
273
|
+
rescue Http404Error => e
|
274
|
+
if retries == 0
|
277
275
|
read
|
278
276
|
|
279
277
|
scraper = ArticleScraper.new(@url,str_or_io: @str_or_io,**@kargs)
|
@@ -283,7 +281,10 @@ module NHKore
|
|
283
281
|
|
284
282
|
retry
|
285
283
|
else
|
286
|
-
raise e.exception("
|
284
|
+
# raise e.exception("failed to scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
|
285
|
+
Util.warn("failed to scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
|
286
|
+
@dict = nil
|
287
|
+
return
|
287
288
|
end
|
288
289
|
end
|
289
290
|
|
@@ -297,7 +298,7 @@ module NHKore
|
|
297
298
|
# - 'news20170331_k10010922481000'
|
298
299
|
tag = doc.css('body')
|
299
300
|
|
300
|
-
if tag.
|
301
|
+
if !tag.empty?
|
301
302
|
tag_id = tag[0]['id'].to_s.split('_',2)
|
302
303
|
|
303
304
|
if tag_id.length == 2
|
@@ -360,7 +361,7 @@ module NHKore
|
|
360
361
|
# First, try with the id.
|
361
362
|
tag = doc.css('div#js-regular-news-wrapper')
|
362
363
|
|
363
|
-
if tag.
|
364
|
+
if !tag.empty?
|
364
365
|
link = scrape_link(tag[0])
|
365
366
|
|
366
367
|
return link unless link.nil?
|
@@ -369,7 +370,7 @@ module NHKore
|
|
369
370
|
# Second, try with the class.
|
370
371
|
tag = doc.css('div.link-to-normal')
|
371
372
|
|
372
|
-
if tag.
|
373
|
+
if !tag.empty?
|
373
374
|
link = scrape_link(tag[0])
|
374
375
|
|
375
376
|
return link unless link.nil?
|
@@ -385,7 +386,7 @@ module NHKore
|
|
385
386
|
def scrape_link(tag)
|
386
387
|
link = tag.css('a')
|
387
388
|
|
388
|
-
return nil if link.
|
389
|
+
return nil if link.empty?
|
389
390
|
|
390
391
|
link = Util.unspace_web_str(link[0]['href'].to_s)
|
391
392
|
|
@@ -493,24 +494,24 @@ module NHKore
|
|
493
494
|
tag = doc.css('h1.article-main__title')
|
494
495
|
tag_name = nil
|
495
496
|
|
496
|
-
if tag.
|
497
|
+
if tag.empty?
|
497
498
|
# - https://www3.nhk.or.jp/news/easy/em2024081312029/em2024081312029.html
|
498
499
|
tag = doc.css('h1.article-title') # No warning.
|
499
500
|
end
|
500
501
|
|
501
|
-
if tag.
|
502
|
+
if tag.empty?
|
502
503
|
# - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
503
504
|
tag_name = 'h1.article-eq__title'
|
504
505
|
tag = doc.css(tag_name)
|
505
506
|
end
|
506
|
-
if tag.
|
507
|
+
if tag.empty? && !@strict
|
507
508
|
# This shouldn't be used except for select sites.
|
508
509
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
509
510
|
tag_name = 'div#main h2'
|
510
511
|
tag = doc.css(tag_name)
|
511
512
|
end
|
512
513
|
|
513
|
-
if tag.
|
514
|
+
if !tag.empty?
|
514
515
|
Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?
|
515
516
|
|
516
517
|
result = scrape_and_add_words(tag,article)
|
@@ -548,8 +549,8 @@ module NHKore
|
|
548
549
|
|
549
550
|
if klass == 'dicwin' && !id.nil?
|
550
551
|
if dicwin
|
551
|
-
raise ScrapeError,"invalid dicWin class[#{child}] nested inside another dicWin class at" \
|
552
|
-
|
552
|
+
raise ScrapeError,"invalid dicWin class[#{child}] nested inside another dicWin class at " \
|
553
|
+
"URL[#{@url}]"
|
553
554
|
end
|
554
555
|
|
555
556
|
dicwin_id = id
|
@@ -582,11 +583,11 @@ module NHKore
|
|
582
583
|
return result
|
583
584
|
end
|
584
585
|
|
585
|
-
def scrape_year(doc,futsuurl=nil)
|
586
|
+
def scrape_year(doc,futsuurl = nil)
|
586
587
|
# First, try body's id.
|
587
588
|
tag = doc.css('body')
|
588
589
|
|
589
|
-
if tag.
|
590
|
+
if !tag.empty?
|
590
591
|
tag_id = tag[0]['id'].to_s.gsub(/[^[[:digit:]]]+/,'')
|
591
592
|
|
592
593
|
if tag_id.length >= 4
|
data/lib/nhkore/cleaner.rb
CHANGED
@@ -3,16 +3,14 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'nhkore/util'
|
13
12
|
require 'nhkore/word'
|
14
13
|
|
15
|
-
|
16
14
|
module NHKore
|
17
15
|
class Cleaner
|
18
16
|
def begin_clean(str)
|
data/lib/nhkore/cli/fx_cmd.rb
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
module NHKore
|
13
12
|
module CLI
|
14
13
|
module FXCmd
|
@@ -40,13 +39,11 @@ module CLI
|
|
40
39
|
end
|
41
40
|
|
42
41
|
def test_fx_progress_bar
|
43
|
-
bars =
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
bars = {user: @progress_bar}
|
49
|
-
end
|
42
|
+
bars = if @cmd_opts[:all]
|
43
|
+
{default: :default,classic: :classic,no: :no}
|
44
|
+
else
|
45
|
+
{user: @progress_bar}
|
46
|
+
end
|
50
47
|
|
51
48
|
bars.each do |name,bar|
|
52
49
|
name = name.to_s.capitalize
|
@@ -65,19 +62,17 @@ module CLI
|
|
65
62
|
|
66
63
|
def test_fx_spinner
|
67
64
|
app_spinner = @spinner
|
68
|
-
spinners =
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
}
|
80
|
-
end
|
65
|
+
spinners = if @cmd_opts[:all]
|
66
|
+
{
|
67
|
+
default: App::DEFAULT_SPINNER,
|
68
|
+
classic: App::CLASSIC_SPINNER,
|
69
|
+
no: {},
|
70
|
+
}
|
71
|
+
else
|
72
|
+
{
|
73
|
+
user: app_spinner
|
74
|
+
}
|
75
|
+
end
|
81
76
|
|
82
77
|
spinners.each do |name,spinner|
|
83
78
|
@spinner = spinner
|
data/lib/nhkore/cli/get_cmd.rb
CHANGED
@@ -3,22 +3,20 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'nhkore/util'
|
13
12
|
|
14
|
-
|
15
13
|
module NHKore
|
16
14
|
module CLI
|
17
15
|
module GetCmd
|
18
16
|
DEFAULT_GET_CHUNK_SIZE = 4 * 1024
|
19
17
|
DEFAULT_GET_URL_LENGTH = 11_000_000 # Just a generous estimation used as a fallback; may be outdated.
|
20
18
|
GET_URL_FILENAME = 'nhkore-core.zip'
|
21
|
-
GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}"
|
19
|
+
GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}".freeze
|
22
20
|
|
23
21
|
def build_get_cmd
|
24
22
|
app = self
|
@@ -27,8 +25,8 @@ module CLI
|
|
27
25
|
name 'get'
|
28
26
|
usage 'get [OPTIONS] [COMMAND]...'
|
29
27
|
aliases :g
|
30
|
-
summary "Download NHKore's pre-scraped files from the latest release" \
|
31
|
-
"
|
28
|
+
summary "Download NHKore's pre-scraped files from the latest release " \
|
29
|
+
"(aliases: #{app.color_alias('g')})"
|
32
30
|
|
33
31
|
description(<<-DESC)
|
34
32
|
Download NHKore's pre-scraped files from the latest release &
|
@@ -41,7 +39,7 @@ module CLI
|
|
41
39
|
transform: lambda { |value|
|
42
40
|
app.check_empty_opt(:out,value)
|
43
41
|
}
|
44
|
-
flag nil,:'show-url','show download URL and exit (for downloading manually)' do |
|
42
|
+
flag nil,:'show-url','show download URL and exit (for downloading manually)' do |_value,_cmd|
|
45
43
|
puts GET_URL
|
46
44
|
exit
|
47
45
|
end
|
data/lib/nhkore/cli/news_cmd.rb
CHANGED
@@ -3,12 +3,12 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
11
|
+
require 'fileutils'
|
12
12
|
require 'time'
|
13
13
|
|
14
14
|
require 'nhkore/datetime_parser'
|
@@ -18,7 +18,6 @@ require 'nhkore/news'
|
|
18
18
|
require 'nhkore/search_link'
|
19
19
|
require 'nhkore/util'
|
20
20
|
|
21
|
-
|
22
21
|
module NHKore
|
23
22
|
module CLI
|
24
23
|
module NewsCmd
|
@@ -112,7 +111,7 @@ module CLI
|
|
112
111
|
app.check_empty_opt(:url,value)
|
113
112
|
}
|
114
113
|
|
115
|
-
run do |
|
114
|
+
run do |_opts,_args,cmd|
|
116
115
|
puts cmd.help
|
117
116
|
end
|
118
117
|
end
|
@@ -197,10 +196,8 @@ module CLI
|
|
197
196
|
url = in_file.nil? ? Util.strip_web_str(@cmd_opts[:url].to_s) : in_file
|
198
197
|
url = nil if url.empty?
|
199
198
|
|
200
|
-
|
201
|
-
|
202
|
-
return unless check_in_file(:links,empty_ok: false)
|
203
|
-
end
|
199
|
+
# Then we must have a links file that exists.
|
200
|
+
return if url.nil? && !check_in_file(:links,empty_ok: false)
|
204
201
|
|
205
202
|
start_spin("Scraping NHK News Web #{news_name} articles")
|
206
203
|
|
@@ -208,16 +205,14 @@ module CLI
|
|
208
205
|
link_count = -1
|
209
206
|
links = File.exist?(links_file) ? SearchLinks.load_file(links_file) : SearchLinks.new
|
210
207
|
new_articles = [] # For --dry-run
|
211
|
-
news = nil
|
212
208
|
scrape_count = 0
|
213
209
|
|
214
|
-
if File.exist?(out_file)
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
end
|
210
|
+
news = if File.exist?(out_file)
|
211
|
+
(type == :yasashii) ? YasashiiNews.load_file(out_file,overwrite: no_sha256)
|
212
|
+
: FutsuuNews.load_file(out_file,overwrite: no_sha256)
|
213
|
+
else
|
214
|
+
(type == :yasashii) ? YasashiiNews.new : FutsuuNews.new
|
215
|
+
end
|
221
216
|
|
222
217
|
@news_article_scraper_kargs = @scraper_kargs.merge({
|
223
218
|
datetime: datetime,
|
@@ -302,9 +297,9 @@ module CLI
|
|
302
297
|
if show_dict
|
303
298
|
puts @cmd_opts[:show_dict] # Updated in scrape_news_article()
|
304
299
|
elsif dry_run
|
305
|
-
if new_articles.
|
306
|
-
raise CLIError,"scrape_count[#{scrape_count}] != new_articles[#{new_articles.length}];" \
|
307
|
-
|
300
|
+
if new_articles.empty?
|
301
|
+
raise CLIError,"scrape_count[#{scrape_count}] != new_articles[#{new_articles.length}]; " \
|
302
|
+
'internal code is broken'
|
308
303
|
elsif new_articles.length == 1
|
309
304
|
puts new_articles.first
|
310
305
|
else
|
@@ -3,18 +3,16 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'nhkore/error'
|
13
12
|
require 'nhkore/search_link'
|
14
13
|
require 'nhkore/search_scraper'
|
15
14
|
require 'nhkore/util'
|
16
15
|
|
17
|
-
|
18
16
|
module NHKore
|
19
17
|
module CLI
|
20
18
|
module SearchCmd
|
@@ -29,7 +27,7 @@ module CLI
|
|
29
27
|
|
30
28
|
description <<-DESC
|
31
29
|
Search for links (using a Search Engine, etc.) to NHK News Web (Easy) &
|
32
|
-
save to folder: #{SearchLinks::DEFAULT_DIR}
|
30
|
+
save to folder: '#{SearchLinks::DEFAULT_DIR}'
|
33
31
|
DESC
|
34
32
|
|
35
33
|
option :i,:in,<<-DESC,argument: :required,transform: lambda { |value|
|
@@ -40,7 +38,7 @@ module CLI
|
|
40
38
|
}
|
41
39
|
option :l,:loop,'number of times to repeat the search to ensure results',argument: :required,
|
42
40
|
transform: lambda { |value|
|
43
|
-
value = value.to_i
|
41
|
+
value = value.to_s.strip.to_i
|
44
42
|
value = 1 if value < 1
|
45
43
|
value
|
46
44
|
}
|
@@ -68,8 +66,8 @@ module CLI
|
|
68
66
|
(see '--in' option)
|
69
67
|
DESC
|
70
68
|
|
71
|
-
run do |opts,
|
72
|
-
opts.each do |key,
|
69
|
+
run do |opts,_args,cmd|
|
70
|
+
opts.each do |key,_value|
|
73
71
|
key = key.to_s
|
74
72
|
|
75
73
|
if key.include?('show')
|
@@ -176,16 +174,15 @@ module CLI
|
|
176
174
|
start_spin("Scraping #{search_type}") unless show_count
|
177
175
|
|
178
176
|
is_file = !in_file.nil?
|
179
|
-
links = nil
|
180
177
|
new_links = [] # For --dry-run
|
181
178
|
url = in_file # nil will use default URL, else a file
|
182
179
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
180
|
+
links = if File.exist?(out_file)
|
181
|
+
# Load previous links for 'scraped?' vars.
|
182
|
+
SearchLinks.load_file(out_file)
|
183
|
+
else
|
184
|
+
SearchLinks.new
|
185
|
+
end
|
189
186
|
|
190
187
|
links_count = links.length
|
191
188
|
|
data/lib/nhkore/cli/sift_cmd.rb
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'date'
|
13
12
|
require 'time'
|
14
13
|
|
@@ -17,13 +16,12 @@ require 'nhkore/news'
|
|
17
16
|
require 'nhkore/sifter'
|
18
17
|
require 'nhkore/util'
|
19
18
|
|
20
|
-
|
21
19
|
module NHKore
|
22
20
|
module CLI
|
23
21
|
module SiftCmd
|
24
22
|
DEFAULT_SIFT_EXT = :csv
|
25
|
-
DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
|
26
|
-
DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}"
|
23
|
+
DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}".freeze
|
24
|
+
DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}".freeze
|
27
25
|
SIFT_EXTS = %i[csv htm html json yaml yml].freeze
|
28
26
|
|
29
27
|
attr_accessor :sift_datetime_text
|
@@ -39,8 +37,8 @@ module CLI
|
|
39
37
|
name 'sift'
|
40
38
|
usage 'sift [OPTIONS] [COMMAND]...'
|
41
39
|
aliases :s
|
42
|
-
summary 'Sift NHK News Web (Easy) articles data for the frequency of words' \
|
43
|
-
"
|
40
|
+
summary 'Sift NHK News Web (Easy) articles data for the frequency of words ' \
|
41
|
+
"(aliases: #{app.color_alias('s')})"
|
44
42
|
|
45
43
|
description(<<-DESC)
|
46
44
|
Sift NHK News Web (Easy) articles data for the frequency of words &
|
@@ -93,11 +91,11 @@ module CLI
|
|
93
91
|
to not fail on "duplicate" articles; see '#{App::NAME} news'
|
94
92
|
DESC
|
95
93
|
option :t,:title,'title to filter on, where search text only needs to be somewhere in the title',
|
96
|
-
|
94
|
+
argument: :required
|
97
95
|
option :u,:url,'URL to filter on, where search text only needs to be somewhere in the URL',
|
98
|
-
|
96
|
+
argument: :required
|
99
97
|
|
100
|
-
run do |
|
98
|
+
run do |_opts,_args,cmd|
|
101
99
|
puts cmd.help
|
102
100
|
end
|
103
101
|
end
|
@@ -232,11 +230,11 @@ module CLI
|
|
232
230
|
sifter.caption = "NHK News Web #{news_name}".dup
|
233
231
|
|
234
232
|
if !@sift_search_criteria.nil?
|
235
|
-
if %i[htm html].any?(file_ext)
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
233
|
+
sifter.caption << if %i[htm html].any?(file_ext)
|
234
|
+
" — #{Util.escape_html(@sift_search_criteria.to_s)}"
|
235
|
+
else
|
236
|
+
" -- #{@sift_search_criteria}"
|
237
|
+
end
|
240
238
|
end
|
241
239
|
|
242
240
|
case file_ext
|