nhkore 0.3.17 → 0.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/CHANGELOG.md +14 -1
- data/Gemfile +13 -1
- data/Gemfile.lock +27 -29
- data/Rakefile +19 -52
- data/bin/nhkore +0 -2
- data/lib/nhkore/app.rb +40 -42
- data/lib/nhkore/article.rb +8 -10
- data/lib/nhkore/article_scraper.rb +29 -28
- data/lib/nhkore/cleaner.rb +0 -2
- data/lib/nhkore/cli/fx_cmd.rb +16 -21
- data/lib/nhkore/cli/get_cmd.rb +4 -6
- data/lib/nhkore/cli/news_cmd.rb +12 -18
- data/lib/nhkore/cli/search_cmd.rb +8 -11
- data/lib/nhkore/cli/sift_cmd.rb +12 -14
- data/lib/nhkore/datetime_parser.rb +34 -36
- data/lib/nhkore/defn.rb +1 -3
- data/lib/nhkore/dict.rb +0 -2
- data/lib/nhkore/dict_scraper.rb +0 -2
- data/lib/nhkore/entry.rb +0 -2
- data/lib/nhkore/error.rb +0 -1
- data/lib/nhkore/fileable.rb +0 -1
- data/lib/nhkore/lib.rb +0 -2
- data/lib/nhkore/missingno.rb +0 -2
- data/lib/nhkore/news.rb +6 -9
- data/lib/nhkore/polisher.rb +0 -2
- data/lib/nhkore/scraper.rb +10 -8
- data/lib/nhkore/search_link.rb +10 -12
- data/lib/nhkore/search_scraper.rb +11 -9
- data/lib/nhkore/sifter.rb +6 -8
- data/lib/nhkore/splitter.rb +0 -2
- data/lib/nhkore/user_agents.rb +0 -1
- data/lib/nhkore/util.rb +7 -7
- data/lib/nhkore/variator.rb +2 -3
- data/lib/nhkore/version.rb +1 -2
- data/lib/nhkore/word.rb +7 -9
- data/lib/nhkore.rb +1 -3
- data/nhkore.gemspec +35 -44
- data/samples/looper.rb +0 -1
- data/test/nhkore/test_helper.rb +0 -7
- data/test/nhkore_test.rb +4 -8
- metadata +28 -125
- data/yard/templates/default/layout/html/footer.erb +0 -5
@@ -8,7 +8,6 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'attr_bool'
|
13
12
|
require 'digest'
|
14
13
|
|
@@ -24,7 +23,6 @@ require 'nhkore/util'
|
|
24
23
|
require 'nhkore/variator'
|
25
24
|
require 'nhkore/word'
|
26
25
|
|
27
|
-
|
28
26
|
module NHKore
|
29
27
|
class ArticleScraper < Scraper
|
30
28
|
extend AttrBool::Ext
|
@@ -47,8 +45,8 @@ module NHKore
|
|
47
45
|
# instead of raising an error
|
48
46
|
# @param strict [true,false]
|
49
47
|
def initialize(url,cleaners: [BestCleaner.new],datetime: nil,dict: :scrape,missingno: nil,
|
50
|
-
|
51
|
-
|
48
|
+
polishers: [BestPolisher.new],splitter: BestSplitter.new,strict: true,
|
49
|
+
variators: [BestVariator.new],year: nil,**kargs)
|
52
50
|
super(url,**kargs)
|
53
51
|
|
54
52
|
@cleaners = Array(cleaners)
|
@@ -179,13 +177,13 @@ module NHKore
|
|
179
177
|
|
180
178
|
def scrape_content(doc,article)
|
181
179
|
tag = doc.css('div#js-article-body')
|
182
|
-
tag = doc.css('div.article-main__body') if tag.
|
183
|
-
tag = doc.css('div.article-body') if tag.
|
180
|
+
tag = doc.css('div.article-main__body') if tag.empty?
|
181
|
+
tag = doc.css('div.article-body') if tag.empty?
|
184
182
|
|
185
183
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
186
|
-
tag = doc.css('div#main') if tag.
|
184
|
+
tag = doc.css('div#main') if tag.empty? && !@strict
|
187
185
|
|
188
|
-
if tag.
|
186
|
+
if !tag.empty?
|
189
187
|
text = Util.unspace_web_str(tag.text.to_s)
|
190
188
|
|
191
189
|
if !text.empty?
|
@@ -202,14 +200,14 @@ module NHKore
|
|
202
200
|
raise ScrapeError,"could not scrape content at URL[#{@url}]"
|
203
201
|
end
|
204
202
|
|
205
|
-
def scrape_datetime(doc,futsuurl=nil)
|
203
|
+
def scrape_datetime(doc,futsuurl = nil)
|
206
204
|
year = scrape_year(doc,futsuurl)
|
207
205
|
|
208
206
|
# First, try with the id.
|
209
207
|
tag_name = 'p#js-article-date'
|
210
208
|
tag = doc.css(tag_name)
|
211
209
|
|
212
|
-
if tag.
|
210
|
+
if !tag.empty?
|
213
211
|
tag_text = tag[0].text
|
214
212
|
|
215
213
|
begin
|
@@ -226,7 +224,7 @@ module NHKore
|
|
226
224
|
tag_name = 'p.article-main__date'
|
227
225
|
tag = doc.css(tag_name)
|
228
226
|
|
229
|
-
if tag.
|
227
|
+
if !tag.empty?
|
230
228
|
tag_text = tag[0].text
|
231
229
|
|
232
230
|
begin
|
@@ -244,10 +242,10 @@ module NHKore
|
|
244
242
|
# - 'news20170331_k10010922481000'
|
245
243
|
tag = doc.css('body')
|
246
244
|
|
247
|
-
if tag.
|
245
|
+
if !tag.empty?
|
248
246
|
tag_id = tag[0]['id'].to_s.split('_',2)
|
249
247
|
|
250
|
-
if tag_id.
|
248
|
+
if !tag_id.empty?
|
251
249
|
tag_id = tag_id[0].gsub(/[^[[:digit:]]]+/,'')
|
252
250
|
|
253
251
|
if tag_id.length == 8
|
@@ -272,8 +270,8 @@ module NHKore
|
|
272
270
|
|
273
271
|
begin
|
274
272
|
scraper = DictScraper.new(dict_url,missingno: @missingno,parse_url: false,**@kargs)
|
275
|
-
rescue
|
276
|
-
if retries == 0
|
273
|
+
rescue Http404Error => e
|
274
|
+
if retries == 0
|
277
275
|
read
|
278
276
|
|
279
277
|
scraper = ArticleScraper.new(@url,str_or_io: @str_or_io,**@kargs)
|
@@ -283,7 +281,10 @@ module NHKore
|
|
283
281
|
|
284
282
|
retry
|
285
283
|
else
|
286
|
-
raise e.exception("
|
284
|
+
# raise e.exception("failed to scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
|
285
|
+
Util.warn("failed to scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
|
286
|
+
@dict = nil
|
287
|
+
return
|
287
288
|
end
|
288
289
|
end
|
289
290
|
|
@@ -297,7 +298,7 @@ module NHKore
|
|
297
298
|
# - 'news20170331_k10010922481000'
|
298
299
|
tag = doc.css('body')
|
299
300
|
|
300
|
-
if tag.
|
301
|
+
if !tag.empty?
|
301
302
|
tag_id = tag[0]['id'].to_s.split('_',2)
|
302
303
|
|
303
304
|
if tag_id.length == 2
|
@@ -360,7 +361,7 @@ module NHKore
|
|
360
361
|
# First, try with the id.
|
361
362
|
tag = doc.css('div#js-regular-news-wrapper')
|
362
363
|
|
363
|
-
if tag.
|
364
|
+
if !tag.empty?
|
364
365
|
link = scrape_link(tag[0])
|
365
366
|
|
366
367
|
return link unless link.nil?
|
@@ -369,7 +370,7 @@ module NHKore
|
|
369
370
|
# Second, try with the class.
|
370
371
|
tag = doc.css('div.link-to-normal')
|
371
372
|
|
372
|
-
if tag.
|
373
|
+
if !tag.empty?
|
373
374
|
link = scrape_link(tag[0])
|
374
375
|
|
375
376
|
return link unless link.nil?
|
@@ -385,7 +386,7 @@ module NHKore
|
|
385
386
|
def scrape_link(tag)
|
386
387
|
link = tag.css('a')
|
387
388
|
|
388
|
-
return nil if link.
|
389
|
+
return nil if link.empty?
|
389
390
|
|
390
391
|
link = Util.unspace_web_str(link[0]['href'].to_s)
|
391
392
|
|
@@ -493,24 +494,24 @@ module NHKore
|
|
493
494
|
tag = doc.css('h1.article-main__title')
|
494
495
|
tag_name = nil
|
495
496
|
|
496
|
-
if tag.
|
497
|
+
if tag.empty?
|
497
498
|
# - https://www3.nhk.or.jp/news/easy/em2024081312029/em2024081312029.html
|
498
499
|
tag = doc.css('h1.article-title') # No warning.
|
499
500
|
end
|
500
501
|
|
501
|
-
if tag.
|
502
|
+
if tag.empty?
|
502
503
|
# - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
503
504
|
tag_name = 'h1.article-eq__title'
|
504
505
|
tag = doc.css(tag_name)
|
505
506
|
end
|
506
|
-
if tag.
|
507
|
+
if tag.empty? && !@strict
|
507
508
|
# This shouldn't be used except for select sites.
|
508
509
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
509
510
|
tag_name = 'div#main h2'
|
510
511
|
tag = doc.css(tag_name)
|
511
512
|
end
|
512
513
|
|
513
|
-
if tag.
|
514
|
+
if !tag.empty?
|
514
515
|
Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?
|
515
516
|
|
516
517
|
result = scrape_and_add_words(tag,article)
|
@@ -548,8 +549,8 @@ module NHKore
|
|
548
549
|
|
549
550
|
if klass == 'dicwin' && !id.nil?
|
550
551
|
if dicwin
|
551
|
-
raise ScrapeError,"invalid dicWin class[#{child}] nested inside another dicWin class at" \
|
552
|
-
|
552
|
+
raise ScrapeError,"invalid dicWin class[#{child}] nested inside another dicWin class at " \
|
553
|
+
"URL[#{@url}]"
|
553
554
|
end
|
554
555
|
|
555
556
|
dicwin_id = id
|
@@ -582,11 +583,11 @@ module NHKore
|
|
582
583
|
return result
|
583
584
|
end
|
584
585
|
|
585
|
-
def scrape_year(doc,futsuurl=nil)
|
586
|
+
def scrape_year(doc,futsuurl = nil)
|
586
587
|
# First, try body's id.
|
587
588
|
tag = doc.css('body')
|
588
589
|
|
589
|
-
if tag.
|
590
|
+
if !tag.empty?
|
590
591
|
tag_id = tag[0]['id'].to_s.gsub(/[^[[:digit:]]]+/,'')
|
591
592
|
|
592
593
|
if tag_id.length >= 4
|
data/lib/nhkore/cleaner.rb
CHANGED
data/lib/nhkore/cli/fx_cmd.rb
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
module NHKore
|
13
12
|
module CLI
|
14
13
|
module FXCmd
|
@@ -40,13 +39,11 @@ module CLI
|
|
40
39
|
end
|
41
40
|
|
42
41
|
def test_fx_progress_bar
|
43
|
-
bars =
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
bars = {user: @progress_bar}
|
49
|
-
end
|
42
|
+
bars = if @cmd_opts[:all]
|
43
|
+
{default: :default,classic: :classic,no: :no}
|
44
|
+
else
|
45
|
+
{user: @progress_bar}
|
46
|
+
end
|
50
47
|
|
51
48
|
bars.each do |name,bar|
|
52
49
|
name = name.to_s.capitalize
|
@@ -65,19 +62,17 @@ module CLI
|
|
65
62
|
|
66
63
|
def test_fx_spinner
|
67
64
|
app_spinner = @spinner
|
68
|
-
spinners =
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
}
|
80
|
-
end
|
65
|
+
spinners = if @cmd_opts[:all]
|
66
|
+
{
|
67
|
+
default: App::DEFAULT_SPINNER,
|
68
|
+
classic: App::CLASSIC_SPINNER,
|
69
|
+
no: {},
|
70
|
+
}
|
71
|
+
else
|
72
|
+
{
|
73
|
+
user: app_spinner
|
74
|
+
}
|
75
|
+
end
|
81
76
|
|
82
77
|
spinners.each do |name,spinner|
|
83
78
|
@spinner = spinner
|
data/lib/nhkore/cli/get_cmd.rb
CHANGED
@@ -8,17 +8,15 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'nhkore/util'
|
13
12
|
|
14
|
-
|
15
13
|
module NHKore
|
16
14
|
module CLI
|
17
15
|
module GetCmd
|
18
16
|
DEFAULT_GET_CHUNK_SIZE = 4 * 1024
|
19
17
|
DEFAULT_GET_URL_LENGTH = 11_000_000 # Just a generous estimation used as a fallback; may be outdated.
|
20
18
|
GET_URL_FILENAME = 'nhkore-core.zip'
|
21
|
-
GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}"
|
19
|
+
GET_URL = "https://github.com/esotericpig/nhkore/releases/latest/download/#{GET_URL_FILENAME}".freeze
|
22
20
|
|
23
21
|
def build_get_cmd
|
24
22
|
app = self
|
@@ -27,8 +25,8 @@ module CLI
|
|
27
25
|
name 'get'
|
28
26
|
usage 'get [OPTIONS] [COMMAND]...'
|
29
27
|
aliases :g
|
30
|
-
summary "Download NHKore's pre-scraped files from the latest release" \
|
31
|
-
"
|
28
|
+
summary "Download NHKore's pre-scraped files from the latest release " \
|
29
|
+
"(aliases: #{app.color_alias('g')})"
|
32
30
|
|
33
31
|
description(<<-DESC)
|
34
32
|
Download NHKore's pre-scraped files from the latest release &
|
@@ -41,7 +39,7 @@ module CLI
|
|
41
39
|
transform: lambda { |value|
|
42
40
|
app.check_empty_opt(:out,value)
|
43
41
|
}
|
44
|
-
flag nil,:'show-url','show download URL and exit (for downloading manually)' do |
|
42
|
+
flag nil,:'show-url','show download URL and exit (for downloading manually)' do |_value,_cmd|
|
45
43
|
puts GET_URL
|
46
44
|
exit
|
47
45
|
end
|
data/lib/nhkore/cli/news_cmd.rb
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'time'
|
13
12
|
|
14
13
|
require 'nhkore/datetime_parser'
|
@@ -18,7 +17,6 @@ require 'nhkore/news'
|
|
18
17
|
require 'nhkore/search_link'
|
19
18
|
require 'nhkore/util'
|
20
19
|
|
21
|
-
|
22
20
|
module NHKore
|
23
21
|
module CLI
|
24
22
|
module NewsCmd
|
@@ -112,7 +110,7 @@ module CLI
|
|
112
110
|
app.check_empty_opt(:url,value)
|
113
111
|
}
|
114
112
|
|
115
|
-
run do |
|
113
|
+
run do |_opts,_args,cmd|
|
116
114
|
puts cmd.help
|
117
115
|
end
|
118
116
|
end
|
@@ -197,10 +195,8 @@ module CLI
|
|
197
195
|
url = in_file.nil? ? Util.strip_web_str(@cmd_opts[:url].to_s) : in_file
|
198
196
|
url = nil if url.empty?
|
199
197
|
|
200
|
-
|
201
|
-
|
202
|
-
return unless check_in_file(:links,empty_ok: false)
|
203
|
-
end
|
198
|
+
# Then we must have a links file that exists.
|
199
|
+
return if url.nil? && !check_in_file(:links,empty_ok: false)
|
204
200
|
|
205
201
|
start_spin("Scraping NHK News Web #{news_name} articles")
|
206
202
|
|
@@ -208,16 +204,14 @@ module CLI
|
|
208
204
|
link_count = -1
|
209
205
|
links = File.exist?(links_file) ? SearchLinks.load_file(links_file) : SearchLinks.new
|
210
206
|
new_articles = [] # For --dry-run
|
211
|
-
news = nil
|
212
207
|
scrape_count = 0
|
213
208
|
|
214
|
-
if File.exist?(out_file)
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
end
|
209
|
+
news = if File.exist?(out_file)
|
210
|
+
(type == :yasashii) ? YasashiiNews.load_file(out_file,overwrite: no_sha256)
|
211
|
+
: FutsuuNews.load_file(out_file,overwrite: no_sha256)
|
212
|
+
else
|
213
|
+
(type == :yasashii) ? YasashiiNews.new : FutsuuNews.new
|
214
|
+
end
|
221
215
|
|
222
216
|
@news_article_scraper_kargs = @scraper_kargs.merge({
|
223
217
|
datetime: datetime,
|
@@ -302,9 +296,9 @@ module CLI
|
|
302
296
|
if show_dict
|
303
297
|
puts @cmd_opts[:show_dict] # Updated in scrape_news_article()
|
304
298
|
elsif dry_run
|
305
|
-
if new_articles.
|
306
|
-
raise CLIError,"scrape_count[#{scrape_count}] != new_articles[#{new_articles.length}];" \
|
307
|
-
|
299
|
+
if new_articles.empty?
|
300
|
+
raise CLIError,"scrape_count[#{scrape_count}] != new_articles[#{new_articles.length}]; " \
|
301
|
+
'internal code is broken'
|
308
302
|
elsif new_articles.length == 1
|
309
303
|
puts new_articles.first
|
310
304
|
else
|
@@ -8,13 +8,11 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'nhkore/error'
|
13
12
|
require 'nhkore/search_link'
|
14
13
|
require 'nhkore/search_scraper'
|
15
14
|
require 'nhkore/util'
|
16
15
|
|
17
|
-
|
18
16
|
module NHKore
|
19
17
|
module CLI
|
20
18
|
module SearchCmd
|
@@ -68,8 +66,8 @@ module CLI
|
|
68
66
|
(see '--in' option)
|
69
67
|
DESC
|
70
68
|
|
71
|
-
run do |opts,
|
72
|
-
opts.each do |key,
|
69
|
+
run do |opts,_args,cmd|
|
70
|
+
opts.each do |key,_value|
|
73
71
|
key = key.to_s
|
74
72
|
|
75
73
|
if key.include?('show')
|
@@ -176,16 +174,15 @@ module CLI
|
|
176
174
|
start_spin("Scraping #{search_type}") unless show_count
|
177
175
|
|
178
176
|
is_file = !in_file.nil?
|
179
|
-
links = nil
|
180
177
|
new_links = [] # For --dry-run
|
181
178
|
url = in_file # nil will use default URL, else a file
|
182
179
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
180
|
+
links = if File.exist?(out_file)
|
181
|
+
# Load previous links for 'scraped?' vars.
|
182
|
+
SearchLinks.load_file(out_file)
|
183
|
+
else
|
184
|
+
SearchLinks.new
|
185
|
+
end
|
189
186
|
|
190
187
|
links_count = links.length
|
191
188
|
|
data/lib/nhkore/cli/sift_cmd.rb
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'date'
|
13
12
|
require 'time'
|
14
13
|
|
@@ -17,13 +16,12 @@ require 'nhkore/news'
|
|
17
16
|
require 'nhkore/sifter'
|
18
17
|
require 'nhkore/util'
|
19
18
|
|
20
|
-
|
21
19
|
module NHKore
|
22
20
|
module CLI
|
23
21
|
module SiftCmd
|
24
22
|
DEFAULT_SIFT_EXT = :csv
|
25
|
-
DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
|
26
|
-
DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}"
|
23
|
+
DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}".freeze
|
24
|
+
DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}".freeze
|
27
25
|
SIFT_EXTS = %i[csv htm html json yaml yml].freeze
|
28
26
|
|
29
27
|
attr_accessor :sift_datetime_text
|
@@ -39,8 +37,8 @@ module CLI
|
|
39
37
|
name 'sift'
|
40
38
|
usage 'sift [OPTIONS] [COMMAND]...'
|
41
39
|
aliases :s
|
42
|
-
summary 'Sift NHK News Web (Easy) articles data for the frequency of words' \
|
43
|
-
"
|
40
|
+
summary 'Sift NHK News Web (Easy) articles data for the frequency of words ' \
|
41
|
+
"(aliases: #{app.color_alias('s')})"
|
44
42
|
|
45
43
|
description(<<-DESC)
|
46
44
|
Sift NHK News Web (Easy) articles data for the frequency of words &
|
@@ -93,11 +91,11 @@ module CLI
|
|
93
91
|
to not fail on "duplicate" articles; see '#{App::NAME} news'
|
94
92
|
DESC
|
95
93
|
option :t,:title,'title to filter on, where search text only needs to be somewhere in the title',
|
96
|
-
|
94
|
+
argument: :required
|
97
95
|
option :u,:url,'URL to filter on, where search text only needs to be somewhere in the URL',
|
98
|
-
|
96
|
+
argument: :required
|
99
97
|
|
100
|
-
run do |
|
98
|
+
run do |_opts,_args,cmd|
|
101
99
|
puts cmd.help
|
102
100
|
end
|
103
101
|
end
|
@@ -232,11 +230,11 @@ module CLI
|
|
232
230
|
sifter.caption = "NHK News Web #{news_name}".dup
|
233
231
|
|
234
232
|
if !@sift_search_criteria.nil?
|
235
|
-
if %i[htm html].any?(file_ext)
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
233
|
+
sifter.caption << if %i[htm html].any?(file_ext)
|
234
|
+
" — #{Util.escape_html(@sift_search_criteria.to_s)}"
|
235
|
+
else
|
236
|
+
" -- #{@sift_search_criteria}"
|
237
|
+
end
|
240
238
|
end
|
241
239
|
|
242
240
|
case file_ext
|
@@ -8,14 +8,12 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'attr_bool'
|
13
12
|
require 'date'
|
14
13
|
require 'time'
|
15
14
|
|
16
15
|
require 'nhkore/util'
|
17
16
|
|
18
|
-
|
19
17
|
module NHKore
|
20
18
|
class DatetimeParser
|
21
19
|
extend AttrBool::Ext
|
@@ -55,10 +53,10 @@ module NHKore
|
|
55
53
|
# Assume this millennium.
|
56
54
|
# So if the current year is 2200, and year is 150,
|
57
55
|
# then it will be 2000 + 150 = 2150.
|
58
|
-
|
56
|
+
elsif millennium >= 1000
|
59
57
|
# Assume previous millennium (2000 -> 1000),
|
60
58
|
# so year 999 will become 1999.
|
61
|
-
millennium -= 1000
|
59
|
+
millennium -= 1000
|
62
60
|
end
|
63
61
|
|
64
62
|
year = millennium + year
|
@@ -155,7 +153,7 @@ module NHKore
|
|
155
153
|
|
156
154
|
attr_reader? :min_or_max
|
157
155
|
|
158
|
-
def initialize(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
|
156
|
+
def initialize(year = nil,month = nil,day = nil,hour = nil,min = nil,sec = nil)
|
159
157
|
super()
|
160
158
|
|
161
159
|
set!(year,month,day,hour,min,sec)
|
@@ -185,66 +183,66 @@ module NHKore
|
|
185
183
|
@sec = other.sec unless @has_sec
|
186
184
|
has_small = true
|
187
185
|
else
|
188
|
-
if has_small
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
186
|
+
@sec = if has_small
|
187
|
+
jst_now.sec
|
188
|
+
else
|
189
|
+
is_from ? 0 : 59
|
190
|
+
end
|
193
191
|
end
|
194
192
|
|
195
193
|
if @has_min || other.has_min?
|
196
194
|
@min = other.min unless @has_min
|
197
195
|
has_small = true
|
198
196
|
else
|
199
|
-
if has_small
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
197
|
+
@min = if has_small
|
198
|
+
jst_now.min
|
199
|
+
else
|
200
|
+
is_from ? 0 : 59
|
201
|
+
end
|
204
202
|
end
|
205
203
|
|
206
204
|
if @has_hour || other.has_hour?
|
207
205
|
@hour = other.hour unless @has_hour
|
208
206
|
has_small = true
|
209
207
|
else
|
210
|
-
if has_small
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
208
|
+
@hour = if has_small
|
209
|
+
jst_now.hour
|
210
|
+
else
|
211
|
+
is_from ? 0 : 23
|
212
|
+
end
|
215
213
|
end
|
216
214
|
|
217
215
|
if @has_day || other.has_day?
|
218
216
|
@day = other.day unless @has_day
|
219
217
|
has_small = true
|
220
218
|
else
|
221
|
-
if has_small
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
219
|
+
@day = if has_small
|
220
|
+
jst_now.day
|
221
|
+
else
|
222
|
+
is_from ? 1 : :last_day
|
223
|
+
end
|
226
224
|
end
|
227
225
|
|
228
226
|
if @has_month || other.has_month?
|
229
227
|
@month = other.month unless @has_month
|
230
228
|
has_small = true
|
231
229
|
else
|
232
|
-
if has_small
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
230
|
+
@month = if has_small
|
231
|
+
jst_now.month
|
232
|
+
else
|
233
|
+
is_from ? 1 : 12
|
234
|
+
end
|
237
235
|
end
|
238
236
|
|
239
237
|
if @has_year || other.has_year?
|
240
238
|
@year = other.year unless @has_year
|
241
239
|
has_small = true # rubocop:disable Lint/UselessAssignment
|
242
240
|
else
|
243
|
-
if has_small
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
241
|
+
@year = if has_small
|
242
|
+
jst_now.year
|
243
|
+
else
|
244
|
+
is_from ? Util::MIN_SANE_YEAR : jst_now.year
|
245
|
+
end
|
248
246
|
end
|
249
247
|
|
250
248
|
# Must be after setting @year & @month.
|
@@ -289,7 +287,7 @@ module NHKore
|
|
289
287
|
return self
|
290
288
|
end
|
291
289
|
|
292
|
-
def set!(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
|
290
|
+
def set!(year = nil,month = nil,day = nil,hour = nil,min = nil,sec = nil)
|
293
291
|
@year = year
|
294
292
|
@month = month
|
295
293
|
@day = day
|
data/lib/nhkore/defn.rb
CHANGED
@@ -8,13 +8,11 @@
|
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'nokogiri'
|
13
12
|
|
14
13
|
require 'nhkore/util'
|
15
14
|
require 'nhkore/word'
|
16
15
|
|
17
|
-
|
18
16
|
module NHKore
|
19
17
|
class Defn
|
20
18
|
attr_reader :hyoukis
|
@@ -35,7 +33,7 @@ module NHKore
|
|
35
33
|
|
36
34
|
hyoukis = hash['hyouki']
|
37
35
|
|
38
|
-
hyoukis&.each
|
36
|
+
hyoukis&.each do |hyouki|
|
39
37
|
next if hyouki.nil?
|
40
38
|
next if (hyouki = Util.strip_web_str(hyouki)).empty?
|
41
39
|
|
data/lib/nhkore/dict.rb
CHANGED
data/lib/nhkore/dict_scraper.rb
CHANGED