nhkore 0.3.14 → 0.3.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/Gemfile.lock +41 -41
- data/README.md +3 -4
- data/Rakefile +3 -1
- data/lib/nhkore/app.rb +2 -17
- data/lib/nhkore/article.rb +5 -9
- data/lib/nhkore/article_scraper.rb +15 -14
- data/lib/nhkore/cleaner.rb +0 -12
- data/lib/nhkore/cli/fx_cmd.rb +0 -4
- data/lib/nhkore/cli/get_cmd.rb +0 -4
- data/lib/nhkore/cli/news_cmd.rb +29 -17
- data/lib/nhkore/cli/search_cmd.rb +0 -4
- data/lib/nhkore/cli/sift_cmd.rb +1 -5
- data/lib/nhkore/datetime_parser.rb +1 -5
- data/lib/nhkore/defn.rb +1 -5
- data/lib/nhkore/dict.rb +2 -5
- data/lib/nhkore/dict_scraper.rb +0 -4
- data/lib/nhkore/entry.rb +3 -9
- data/lib/nhkore/error.rb +1 -11
- data/lib/nhkore/fileable.rb +0 -4
- data/lib/nhkore/lib.rb +0 -3
- data/lib/nhkore/missingno.rb +2 -6
- data/lib/nhkore/news.rb +3 -15
- data/lib/nhkore/polisher.rb +0 -12
- data/lib/nhkore/scraper.rb +8 -5
- data/lib/nhkore/search_link.rb +9 -17
- data/lib/nhkore/search_scraper.rb +34 -24
- data/lib/nhkore/sifter.rb +7 -8
- data/lib/nhkore/splitter.rb +0 -18
- data/lib/nhkore/user_agents.rb +1 -4
- data/lib/nhkore/util.rb +0 -4
- data/lib/nhkore/variator.rb +0 -14
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +0 -4
- data/lib/nhkore.rb +0 -5
- data/nhkore.gemspec +10 -10
- data/samples/looper.rb +0 -3
- metadata +24 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f23192b04fc6a0c1cf225db4a029e3226346d27c4fe977ee05f1b522c40708bb
|
4
|
+
data.tar.gz: 3e71d5ef9eb60327cb9ced89a819de77b5cd8910e755332d5764dc95e8263f71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cec06bd51e86ddd7b30a052b4078a2932344996bd6d97c4b7d927d0bc8fbcecbd2fb21680957551857d3a152d33409aa7ee8e1dd496a271cb31c63cbe2eb2e9
|
7
|
+
data.tar.gz: 186c42412e35567aebe2afee71636fb58a28510bd6ab3e8b65df6adef5373860f25e87be71e5d8499dcf6b6babb0ee3aae796a01f818732bc53ad682e584e701
|
data/CHANGELOG.md
CHANGED
@@ -5,10 +5,17 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
Format is based on [Keep a Changelog v1.0.0](https://keepachangelog.com/en/1.0.0),
|
6
6
|
and this project adheres to [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
-
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.
|
8
|
+
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.16...HEAD)
|
9
9
|
-
|
10
10
|
|
11
11
|
|
12
|
+
## [v0.3.16] - 2024-08-14
|
13
|
+
|
14
|
+
### Fixed
|
15
|
+
- Fixed to work with new NHK pages.
|
16
|
+
- Updated gems.
|
17
|
+
|
18
|
+
|
12
19
|
## [v0.3.14] - 2022-07-24
|
13
20
|
|
14
21
|
### Added
|
data/Gemfile.lock
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
nhkore (0.3.
|
4
|
+
nhkore (0.3.16)
|
5
5
|
attr_bool (~> 0.2)
|
6
6
|
bimyou_segmenter (~> 1.2)
|
7
7
|
cri (~> 2.15)
|
8
|
-
down (~> 5.
|
9
|
-
highline (~>
|
8
|
+
down (~> 5.4)
|
9
|
+
highline (~> 3.1)
|
10
10
|
http-cookie (~> 1.0)
|
11
11
|
japanese_deinflector (~> 0.0)
|
12
|
-
nokogiri (~> 1.
|
12
|
+
nokogiri (~> 1.16)
|
13
13
|
psychgus (~> 1.3)
|
14
|
-
public_suffix (~>
|
14
|
+
public_suffix (~> 6.0)
|
15
15
|
rainbow (~> 3.1)
|
16
|
-
rss (~> 0.
|
16
|
+
rss (~> 0.3)
|
17
17
|
rubyzip (~> 2.3)
|
18
18
|
tiny_segmenter (~> 0.0)
|
19
19
|
tty-progressbar (~> 0.18)
|
@@ -22,43 +22,48 @@ PATH
|
|
22
22
|
GEM
|
23
23
|
remote: https://rubygems.org/
|
24
24
|
specs:
|
25
|
-
addressable (2.8.
|
26
|
-
public_suffix (>= 2.0.2, <
|
25
|
+
addressable (2.8.7)
|
26
|
+
public_suffix (>= 2.0.2, < 7.0)
|
27
27
|
attr_bool (0.2.2)
|
28
28
|
bimyou_segmenter (1.2.0)
|
29
|
-
cri (2.15.
|
30
|
-
domain_name (0.
|
31
|
-
|
32
|
-
down (5.3.1)
|
29
|
+
cri (2.15.12)
|
30
|
+
domain_name (0.6.20240107)
|
31
|
+
down (5.4.2)
|
33
32
|
addressable (~> 2.8)
|
34
|
-
highline (
|
35
|
-
|
33
|
+
highline (3.1.0)
|
34
|
+
reline
|
35
|
+
http-cookie (1.0.6)
|
36
36
|
domain_name (~> 0.5)
|
37
|
+
io-console (0.7.2)
|
37
38
|
japanese_deinflector (0.0.2)
|
38
|
-
mini_portile2 (2.8.
|
39
|
-
minitest (5.
|
40
|
-
nokogiri (1.
|
41
|
-
mini_portile2 (~> 2.8.
|
39
|
+
mini_portile2 (2.8.7)
|
40
|
+
minitest (5.25.0)
|
41
|
+
nokogiri (1.16.7)
|
42
|
+
mini_portile2 (~> 2.8.2)
|
42
43
|
racc (~> 1.4)
|
43
|
-
psych (
|
44
|
+
psych (5.1.2)
|
44
45
|
stringio
|
45
46
|
psychgus (1.3.4)
|
46
47
|
psych (>= 3.0)
|
47
|
-
public_suffix (
|
48
|
-
racc (1.
|
48
|
+
public_suffix (6.0.1)
|
49
|
+
racc (1.8.1)
|
49
50
|
rainbow (3.1.1)
|
50
|
-
rake (13.
|
51
|
+
rake (13.2.1)
|
51
52
|
raketeer (0.2.13)
|
52
53
|
rake
|
53
|
-
rdoc (6.
|
54
|
+
rdoc (6.7.0)
|
54
55
|
psych (>= 4.0.0)
|
55
|
-
redcarpet (3.
|
56
|
-
|
57
|
-
|
56
|
+
redcarpet (3.6.0)
|
57
|
+
reline (0.5.9)
|
58
|
+
io-console (~> 0.5)
|
59
|
+
rexml (3.3.5)
|
60
|
+
strscan
|
61
|
+
rss (0.3.1)
|
58
62
|
rexml
|
59
63
|
rubyzip (2.3.2)
|
60
|
-
stringio (3.
|
64
|
+
stringio (3.1.1)
|
61
65
|
strings-ansi (0.2.0)
|
66
|
+
strscan (3.1.0)
|
62
67
|
tiny_segmenter (0.0.6)
|
63
68
|
tty-cursor (0.7.1)
|
64
69
|
tty-progressbar (0.18.2)
|
@@ -66,16 +71,11 @@ GEM
|
|
66
71
|
tty-cursor (~> 0.7)
|
67
72
|
tty-screen (~> 0.8)
|
68
73
|
unicode-display_width (>= 1.6, < 3.0)
|
69
|
-
tty-screen (0.8.
|
74
|
+
tty-screen (0.8.2)
|
70
75
|
tty-spinner (0.9.3)
|
71
76
|
tty-cursor (~> 0.7)
|
72
|
-
|
73
|
-
|
74
|
-
unf_ext (0.0.8.2)
|
75
|
-
unicode-display_width (2.2.0)
|
76
|
-
webrick (1.7.0)
|
77
|
-
yard (0.9.28)
|
78
|
-
webrick (~> 1.7.0)
|
77
|
+
unicode-display_width (2.5.0)
|
78
|
+
yard (0.9.36)
|
79
79
|
yard_ghurt (1.2.1)
|
80
80
|
rake
|
81
81
|
yard
|
@@ -84,15 +84,15 @@ PLATFORMS
|
|
84
84
|
ruby
|
85
85
|
|
86
86
|
DEPENDENCIES
|
87
|
-
bundler (~> 2.
|
88
|
-
minitest (~> 5.
|
87
|
+
bundler (~> 2.5)
|
88
|
+
minitest (~> 5.25)
|
89
89
|
nhkore!
|
90
|
-
rake (~> 13.
|
90
|
+
rake (~> 13.2)
|
91
91
|
raketeer (~> 0.2)
|
92
|
-
rdoc (~> 6.
|
93
|
-
redcarpet (~> 3.
|
92
|
+
rdoc (~> 6.7)
|
93
|
+
redcarpet (~> 3.6)
|
94
94
|
yard (~> 0.9)
|
95
95
|
yard_ghurt (~> 1.2)
|
96
96
|
|
97
97
|
BUNDLED WITH
|
98
|
-
2.
|
98
|
+
2.5.17
|
data/README.md
CHANGED
@@ -867,12 +867,11 @@ This will update *core/* for you:
|
|
867
867
|
2. Update *core* package:
|
868
868
|
- `$ bundle exec rake update_core`
|
869
869
|
- `$ bundle exec rake clobber pkg_core`
|
870
|
-
3.
|
870
|
+
3. Commit & Push.
|
871
|
+
4. Create a new tag & release:
|
871
872
|
- Note: make sure to add *pkg/nhkore-core.zip*
|
872
873
|
- `$ gh release create v0 pkg/*.gem pkg/*.zip`
|
873
|
-
- `$ git pull`
|
874
|
-
4. Release to *GitHub Packages*:
|
875
|
-
- With *Raketary*: `$ raketary github_pkg`
|
874
|
+
- `$ git pull && git fetch`
|
876
875
|
5. Release to *RubyGems*:
|
877
876
|
- `$ bundle exec rake release`
|
878
877
|
|
data/Rakefile
CHANGED
@@ -56,6 +56,9 @@ task :update_core do |task|
|
|
56
56
|
cmd = ['ruby','-w','./lib/nhkore.rb','-t','300','-m','10']
|
57
57
|
hl = HighLine.new
|
58
58
|
|
59
|
+
next unless sh(*cmd,'se','--show-count','ez')
|
60
|
+
puts
|
61
|
+
|
59
62
|
next unless sh(*cmd,'se','-l','10','ez','bing')
|
60
63
|
next unless hl.agree(continue_msg)
|
61
64
|
puts
|
@@ -74,7 +77,6 @@ task :update_core do |task|
|
|
74
77
|
puts
|
75
78
|
end
|
76
79
|
|
77
|
-
# @since 0.3.6
|
78
80
|
desc 'Update showcase file for release'
|
79
81
|
task :update_showcase do |task|
|
80
82
|
require 'highline'
|
data/lib/nhkore/app.rb
CHANGED
@@ -27,18 +27,11 @@ require 'nhkore/cli/sift_cmd'
|
|
27
27
|
|
28
28
|
|
29
29
|
module NHKore
|
30
|
-
###
|
31
|
-
# @author Jonathan Bradley Whited
|
32
|
-
# @since 0.2.0
|
33
|
-
###
|
34
30
|
module CLI
|
35
31
|
end
|
36
32
|
|
37
33
|
###
|
38
34
|
# For disabling/enabling color output.
|
39
|
-
#
|
40
|
-
# @author Jonathan Bradley Whited
|
41
|
-
# @since 0.2.1
|
42
35
|
###
|
43
36
|
module CriColorExt
|
44
37
|
@color = true
|
@@ -52,10 +45,6 @@ module NHKore
|
|
52
45
|
end
|
53
46
|
end
|
54
47
|
|
55
|
-
###
|
56
|
-
# @author Jonathan Bradley Whited
|
57
|
-
# @since 0.2.0
|
58
|
-
###
|
59
48
|
class App
|
60
49
|
include CLI::FXCmd
|
61
50
|
include CLI::GetCmd
|
@@ -576,10 +565,6 @@ module NHKore
|
|
576
565
|
end
|
577
566
|
end
|
578
567
|
|
579
|
-
###
|
580
|
-
# @author Jonathan Bradley Whited
|
581
|
-
# @since 0.2.0
|
582
|
-
###
|
583
568
|
class NoProgressBar
|
584
569
|
MSG = '%{title}... %{percent}%%'
|
585
570
|
PUT_INTERVAL = 100.0 / 6.25
|
@@ -627,7 +612,7 @@ module NHKore
|
|
627
612
|
|
628
613
|
@tokens[:advance] = percent
|
629
614
|
|
630
|
-
puts
|
615
|
+
puts self
|
631
616
|
end
|
632
617
|
|
633
618
|
def finish
|
@@ -635,7 +620,7 @@ module NHKore
|
|
635
620
|
end
|
636
621
|
|
637
622
|
def start
|
638
|
-
puts
|
623
|
+
puts self
|
639
624
|
end
|
640
625
|
|
641
626
|
def to_s
|
data/lib/nhkore/article.rb
CHANGED
@@ -16,10 +16,6 @@ require 'nhkore/word'
|
|
16
16
|
|
17
17
|
|
18
18
|
module NHKore
|
19
|
-
###
|
20
|
-
# @author Jonathan Bradley Whited
|
21
|
-
# @since 0.2.0
|
22
|
-
###
|
23
19
|
class Article
|
24
20
|
attr_reader :datetime
|
25
21
|
attr_reader :futsuurl
|
@@ -29,7 +25,7 @@ module NHKore
|
|
29
25
|
attr_reader :words
|
30
26
|
|
31
27
|
def initialize
|
32
|
-
super
|
28
|
+
super
|
33
29
|
|
34
30
|
@datetime = nil
|
35
31
|
@futsuurl = nil
|
@@ -101,13 +97,13 @@ module NHKore
|
|
101
97
|
end
|
102
98
|
|
103
99
|
def futsuurl=(value)
|
104
|
-
# Don't store URI, store String.
|
105
|
-
@futsuurl = value
|
100
|
+
# Don't store URI, store String or nil.
|
101
|
+
@futsuurl = value&.to_s
|
106
102
|
end
|
107
103
|
|
108
104
|
def url=(value)
|
109
|
-
# Don't store URI, store String.
|
110
|
-
@url = value
|
105
|
+
# Don't store URI, store String or nil.
|
106
|
+
@url = value&.to_s
|
111
107
|
end
|
112
108
|
|
113
109
|
def to_s(mini: false)
|
@@ -26,10 +26,6 @@ require 'nhkore/word'
|
|
26
26
|
|
27
27
|
|
28
28
|
module NHKore
|
29
|
-
###
|
30
|
-
# @author Jonathan Bradley Whited
|
31
|
-
# @since 0.2.0
|
32
|
-
###
|
33
29
|
class ArticleScraper < Scraper
|
34
30
|
extend AttrBool::Ext
|
35
31
|
|
@@ -139,7 +135,13 @@ module NHKore
|
|
139
135
|
end
|
140
136
|
|
141
137
|
def parse_dicwin_id(str)
|
142
|
-
str = str.
|
138
|
+
str = str.to_s.strip.downcase
|
139
|
+
|
140
|
+
if str.start_with?('id-') # 'id-0000'
|
141
|
+
str = str.gsub(/\D+/,'')
|
142
|
+
else # 'RSHOK-K-003806'
|
143
|
+
# Same.
|
144
|
+
end
|
143
145
|
|
144
146
|
return nil if str.empty?
|
145
147
|
return str
|
@@ -235,8 +237,6 @@ module NHKore
|
|
235
237
|
# Ignore; try again below.
|
236
238
|
Util.warn("could not parse date time[#{tag_text}] from tag[#{tag_name}] at URL[#{@url}]: #{e}")
|
237
239
|
end
|
238
|
-
|
239
|
-
return datetime
|
240
240
|
end
|
241
241
|
|
242
242
|
# Third, try body's id.
|
@@ -393,7 +393,6 @@ module NHKore
|
|
393
393
|
return link
|
394
394
|
end
|
395
395
|
|
396
|
-
# @since 0.3.8
|
397
396
|
# @see https://www3.nhk.or.jp/news/easy/k10012759201000/k10012759201000.html
|
398
397
|
def scrape_ruby_words(tag,result: ScrapeWordsResult.new)
|
399
398
|
words = Word.scrape_ruby_tag(tag,missingno: @missingno,url: @url)
|
@@ -489,15 +488,21 @@ module NHKore
|
|
489
488
|
end
|
490
489
|
|
491
490
|
def scrape_title(doc,article)
|
491
|
+
# Not grabbing `<head><title>` because it doesn't have `<ruby>` tags.
|
492
|
+
|
492
493
|
tag = doc.css('h1.article-main__title')
|
493
494
|
tag_name = nil
|
494
495
|
|
496
|
+
if tag.length < 1
|
497
|
+
# - https://www3.nhk.or.jp/news/easy/em2024081312029/em2024081312029.html
|
498
|
+
tag = doc.css('h1.article-title') # No warning.
|
499
|
+
end
|
500
|
+
|
495
501
|
if tag.length < 1
|
496
502
|
# - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
497
503
|
tag_name = 'h1.article-eq__title'
|
498
504
|
tag = doc.css(tag_name)
|
499
505
|
end
|
500
|
-
|
501
506
|
if tag.length < 1 && !@strict
|
502
507
|
# This shouldn't be used except for select sites.
|
503
508
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
@@ -632,16 +637,12 @@ module NHKore
|
|
632
637
|
end
|
633
638
|
end
|
634
639
|
|
635
|
-
###
|
636
|
-
# @author Jonathan Bradley Whited
|
637
|
-
# @since 0.2.0
|
638
|
-
###
|
639
640
|
class ScrapeWordsResult
|
640
641
|
attr_reader :text
|
641
642
|
attr_reader :words
|
642
643
|
|
643
644
|
def initialize
|
644
|
-
super
|
645
|
+
super
|
645
646
|
|
646
647
|
@text = ''.dup
|
647
648
|
@words = []
|
data/lib/nhkore/cleaner.rb
CHANGED
@@ -14,10 +14,6 @@ require 'nhkore/word'
|
|
14
14
|
|
15
15
|
|
16
16
|
module NHKore
|
17
|
-
###
|
18
|
-
# @author Jonathan Bradley Whited
|
19
|
-
# @since 0.2.0
|
20
|
-
###
|
21
17
|
class Cleaner
|
22
18
|
def begin_clean(str)
|
23
19
|
return str
|
@@ -53,10 +49,6 @@ module NHKore
|
|
53
49
|
end
|
54
50
|
end
|
55
51
|
|
56
|
-
###
|
57
|
-
# @author Jonathan Bradley Whited
|
58
|
-
# @since 0.2.0
|
59
|
-
###
|
60
52
|
class BasicCleaner < Cleaner
|
61
53
|
def end_clean(str)
|
62
54
|
# This is very simple, as Splitter will split on punctuation,
|
@@ -70,10 +62,6 @@ module NHKore
|
|
70
62
|
end
|
71
63
|
end
|
72
64
|
|
73
|
-
###
|
74
|
-
# @author Jonathan Bradley Whited
|
75
|
-
# @since 0.2.0
|
76
|
-
###
|
77
65
|
class BestCleaner < BasicCleaner
|
78
66
|
end
|
79
67
|
end
|
data/lib/nhkore/cli/fx_cmd.rb
CHANGED
data/lib/nhkore/cli/get_cmd.rb
CHANGED
@@ -14,10 +14,6 @@ require 'nhkore/util'
|
|
14
14
|
|
15
15
|
module NHKore
|
16
16
|
module CLI
|
17
|
-
###
|
18
|
-
# @author Jonathan Bradley Whited
|
19
|
-
# @since 0.2.0
|
20
|
-
###
|
21
17
|
module GetCmd
|
22
18
|
DEFAULT_GET_CHUNK_SIZE = 4 * 1024
|
23
19
|
DEFAULT_GET_URL_LENGTH = 11_000_000 # Just a generous estimation used as a fallback; may be outdated.
|
data/lib/nhkore/cli/news_cmd.rb
CHANGED
@@ -21,10 +21,6 @@ require 'nhkore/util'
|
|
21
21
|
|
22
22
|
module NHKore
|
23
23
|
module CLI
|
24
|
-
###
|
25
|
-
# @author Jonathan Bradley Whited
|
26
|
-
# @since 0.2.0
|
27
|
-
###
|
28
24
|
module NewsCmd
|
29
25
|
DEFAULT_NEWS_SCRAPE = 1
|
30
26
|
|
@@ -255,16 +251,20 @@ module CLI
|
|
255
251
|
next if !redo_scrapes && scraped_news_article?(news,link)
|
256
252
|
|
257
253
|
url = link.url
|
254
|
+
result = scrape_news_article(url,link: link,new_articles: new_articles,news: news)
|
258
255
|
|
259
|
-
if
|
256
|
+
if result == :scraped
|
257
|
+
scrape_count += 1
|
258
|
+
elsif result == :unscraped
|
259
|
+
next
|
260
|
+
else
|
260
261
|
# --show-dict
|
261
|
-
url =
|
262
|
-
scrape_count = max_scrapes
|
262
|
+
url = result
|
263
|
+
scrape_count = max_scrapes # Break on next iteration for update_spin_detail().
|
263
264
|
end
|
264
265
|
|
265
266
|
# Break on next iteration for update_spin_detail().
|
266
|
-
next if
|
267
|
-
|
267
|
+
next if scrape_count >= max_scrapes
|
268
268
|
sleep_scraper
|
269
269
|
end
|
270
270
|
else
|
@@ -275,9 +275,8 @@ module CLI
|
|
275
275
|
links.add_link(link)
|
276
276
|
end
|
277
277
|
|
278
|
-
scrape_news_article(url,link: link,new_articles: new_articles,news: news)
|
279
|
-
|
280
|
-
scrape_count += 1
|
278
|
+
result = scrape_news_article(url,link: link,new_articles: new_articles,news: news)
|
279
|
+
scrape_count += 1 if result != :unscraped
|
281
280
|
end
|
282
281
|
|
283
282
|
stop_spin
|
@@ -338,9 +337,17 @@ module CLI
|
|
338
337
|
return scraper.url
|
339
338
|
end
|
340
339
|
|
341
|
-
scraper =
|
342
|
-
|
340
|
+
scraper = nil
|
341
|
+
|
342
|
+
begin
|
343
|
+
scraper = ArticleScraper.new(url,**@news_article_scraper_kargs)
|
344
|
+
rescue Http404Error
|
345
|
+
# - https://www3.nhk.or.jp/news/easy/k10014157491000/k10014157491000.html
|
346
|
+
Util.warn("Ignoring URL with 404 error: #{url}.")
|
347
|
+
return :unscraped
|
348
|
+
end
|
343
349
|
|
350
|
+
article = scraper.scrape
|
344
351
|
# run_news_cmd() handles overwriting with --redo or not
|
345
352
|
# using scraped_news_article?().
|
346
353
|
news.add_article(article,overwrite: true)
|
@@ -350,7 +357,7 @@ module CLI
|
|
350
357
|
|
351
358
|
new_articles << article
|
352
359
|
|
353
|
-
return
|
360
|
+
return :scraped # No --show-dict
|
354
361
|
end
|
355
362
|
|
356
363
|
def scraped_news_article?(news,link)
|
@@ -366,10 +373,15 @@ module CLI
|
|
366
373
|
end
|
367
374
|
|
368
375
|
if article.nil?
|
369
|
-
scraper =
|
376
|
+
scraper = nil
|
370
377
|
|
371
|
-
|
378
|
+
begin
|
379
|
+
scraper = ArticleScraper.new(link.url,**@news_article_scraper_kargs)
|
380
|
+
rescue Http404Error
|
381
|
+
return false
|
382
|
+
end
|
372
383
|
|
384
|
+
sha256 = scraper.scrape_sha256_only
|
373
385
|
article = news.article_with_sha256(sha256) if news.sha256?(sha256)
|
374
386
|
end
|
375
387
|
end
|
data/lib/nhkore/cli/sift_cmd.rb
CHANGED
@@ -20,10 +20,6 @@ require 'nhkore/util'
|
|
20
20
|
|
21
21
|
module NHKore
|
22
22
|
module CLI
|
23
|
-
###
|
24
|
-
# @author Jonathan Bradley Whited
|
25
|
-
# @since 0.2.0
|
26
|
-
###
|
27
23
|
module SiftCmd
|
28
24
|
DEFAULT_SIFT_EXT = :csv
|
29
25
|
DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
|
@@ -260,7 +256,7 @@ module CLI
|
|
260
256
|
puts
|
261
257
|
|
262
258
|
if dry_run
|
263
|
-
puts sifter
|
259
|
+
puts sifter
|
264
260
|
else
|
265
261
|
start_spin('Saving sifted data to file')
|
266
262
|
|
@@ -17,10 +17,6 @@ require 'nhkore/util'
|
|
17
17
|
|
18
18
|
|
19
19
|
module NHKore
|
20
|
-
###
|
21
|
-
# @author Jonathan Bradley Whited
|
22
|
-
# @since 0.3.4
|
23
|
-
###
|
24
20
|
class DatetimeParser
|
25
21
|
extend AttrBool::Ext
|
26
22
|
|
@@ -181,7 +177,7 @@ module NHKore
|
|
181
177
|
return self if @min_or_max
|
182
178
|
|
183
179
|
has_small = false
|
184
|
-
jst_now = Util.jst_now
|
180
|
+
jst_now = Util.jst_now
|
185
181
|
|
186
182
|
# Must be from smallest to biggest.
|
187
183
|
|
data/lib/nhkore/defn.rb
CHANGED
@@ -16,17 +16,13 @@ require 'nhkore/word'
|
|
16
16
|
|
17
17
|
|
18
18
|
module NHKore
|
19
|
-
###
|
20
|
-
# @author Jonathan Bradley Whited
|
21
|
-
# @since 0.2.0
|
22
|
-
###
|
23
19
|
class Defn
|
24
20
|
attr_reader :hyoukis
|
25
21
|
attr_accessor :text
|
26
22
|
attr_reader :words
|
27
23
|
|
28
24
|
def initialize
|
29
|
-
super
|
25
|
+
super
|
30
26
|
|
31
27
|
@hyoukis = []
|
32
28
|
@text = ''.dup
|
data/lib/nhkore/dict.rb
CHANGED
@@ -14,15 +14,11 @@ require 'nhkore/error'
|
|
14
14
|
|
15
15
|
|
16
16
|
module NHKore
|
17
|
-
###
|
18
|
-
# @author Jonathan Bradley Whited
|
19
|
-
# @since 0.2.0
|
20
|
-
###
|
21
17
|
class Dict
|
22
18
|
attr_reader :entries
|
23
19
|
|
24
20
|
def initialize
|
25
|
-
super
|
21
|
+
super
|
26
22
|
|
27
23
|
@entries = {}
|
28
24
|
end
|
@@ -39,6 +35,7 @@ module NHKore
|
|
39
35
|
dict = Dict.new
|
40
36
|
|
41
37
|
hash.each do |id,array|
|
38
|
+
id = id.to_s.strip.downcase # 'RSHOK-K-003806', '0000'
|
42
39
|
entry = Entry.scrape(id,array,missingno: missingno,url: url)
|
43
40
|
|
44
41
|
next if entry.nil?
|
data/lib/nhkore/dict_scraper.rb
CHANGED
data/lib/nhkore/entry.rb
CHANGED
@@ -14,10 +14,6 @@ require 'nhkore/util'
|
|
14
14
|
|
15
15
|
|
16
16
|
module NHKore
|
17
|
-
###
|
18
|
-
# @author Jonathan Bradley Whited
|
19
|
-
# @since 0.2.0
|
20
|
-
###
|
21
17
|
class Entry
|
22
18
|
HYOUKI_SEP = '・'
|
23
19
|
|
@@ -25,18 +21,16 @@ module NHKore
|
|
25
21
|
attr_accessor :id
|
26
22
|
|
27
23
|
def initialize
|
28
|
-
super
|
24
|
+
super
|
29
25
|
|
30
26
|
@defns = []
|
31
27
|
@id = nil
|
32
28
|
end
|
33
29
|
|
34
30
|
def build_defn
|
35
|
-
defns = []
|
36
31
|
i = 0
|
37
|
-
|
38
|
-
|
39
|
-
defns << "#{i += 1})#{defn}" # Japanese parenthesis
|
32
|
+
defns = @defns.map do |defn|
|
33
|
+
"#{i += 1})#{defn}" # Japanese parenthesis
|
40
34
|
end
|
41
35
|
|
42
36
|
return defns.join("\n")
|