nhkore 0.3.7 → 0.3.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +53 -2
- data/Gemfile +0 -18
- data/Gemfile.lock +36 -33
- data/README.md +36 -30
- data/Rakefile +38 -52
- data/bin/nhkore +4 -15
- data/lib/nhkore/app.rb +235 -234
- data/lib/nhkore/article.rb +39 -53
- data/lib/nhkore/article_scraper.rb +293 -285
- data/lib/nhkore/cleaner.rb +20 -32
- data/lib/nhkore/cli/fx_cmd.rb +41 -53
- data/lib/nhkore/cli/get_cmd.rb +59 -70
- data/lib/nhkore/cli/news_cmd.rb +143 -153
- data/lib/nhkore/cli/search_cmd.rb +108 -118
- data/lib/nhkore/cli/sift_cmd.rb +109 -120
- data/lib/nhkore/datetime_parser.rb +88 -104
- data/lib/nhkore/defn.rb +48 -55
- data/lib/nhkore/dict.rb +26 -38
- data/lib/nhkore/dict_scraper.rb +31 -40
- data/lib/nhkore/entry.rb +43 -55
- data/lib/nhkore/error.rb +16 -21
- data/lib/nhkore/fileable.rb +10 -21
- data/lib/nhkore/lib.rb +5 -17
- data/lib/nhkore/missingno.rb +21 -33
- data/lib/nhkore/news.rb +58 -72
- data/lib/nhkore/polisher.rb +22 -34
- data/lib/nhkore/scraper.rb +74 -83
- data/lib/nhkore/search_link.rb +62 -76
- data/lib/nhkore/search_scraper.rb +81 -92
- data/lib/nhkore/sifter.rb +157 -171
- data/lib/nhkore/splitter.rb +19 -31
- data/lib/nhkore/user_agents.rb +28 -32
- data/lib/nhkore/util.rb +72 -84
- data/lib/nhkore/variator.rb +20 -32
- data/lib/nhkore/version.rb +4 -16
- data/lib/nhkore/word.rb +99 -97
- data/lib/nhkore.rb +8 -20
- data/nhkore.gemspec +30 -51
- data/samples/looper.rb +18 -29
- data/test/nhkore/test_helper.rb +3 -15
- data/test/nhkore_test.rb +6 -18
- metadata +33 -24
data/lib/nhkore/article.rb
CHANGED
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -29,7 +17,7 @@ require 'nhkore/word'
|
|
29
17
|
|
30
18
|
module NHKore
|
31
19
|
###
|
32
|
-
# @author Jonathan Bradley Whited
|
20
|
+
# @author Jonathan Bradley Whited
|
33
21
|
# @since 0.2.0
|
34
22
|
###
|
35
23
|
class Article
|
@@ -39,10 +27,10 @@ module NHKore
|
|
39
27
|
attr_accessor :title
|
40
28
|
attr_reader :url
|
41
29
|
attr_reader :words
|
42
|
-
|
43
|
-
def initialize
|
30
|
+
|
31
|
+
def initialize
|
44
32
|
super()
|
45
|
-
|
33
|
+
|
46
34
|
@datetime = nil
|
47
35
|
@futsuurl = nil
|
48
36
|
@sha256 = nil
|
@@ -50,7 +38,7 @@ module NHKore
|
|
50
38
|
@url = nil
|
51
39
|
@words = {}
|
52
40
|
end
|
53
|
-
|
41
|
+
|
54
42
|
# Why does this not look up the kanji/kana only and then update the other
|
55
43
|
# kana/kanji part appropriately?
|
56
44
|
# - There are some words like +行って+. Without the kana, it's difficult to
|
@@ -60,52 +48,50 @@ module NHKore
|
|
60
48
|
# try to populate the other value.
|
61
49
|
def add_word(word,use_freq: false)
|
62
50
|
curr_word = words[word.key]
|
63
|
-
|
64
|
-
if curr_word.nil?
|
51
|
+
|
52
|
+
if curr_word.nil?
|
65
53
|
words[word.key] = word
|
66
54
|
curr_word = word
|
67
55
|
else
|
68
56
|
curr_word.freq += (use_freq ? word.freq : 1)
|
69
|
-
|
70
|
-
curr_word.defn = word.defn if word.defn.to_s
|
71
|
-
curr_word.eng = word.eng if word.eng.to_s
|
57
|
+
|
58
|
+
curr_word.defn = word.defn if word.defn.to_s.length > curr_word.defn.to_s.length
|
59
|
+
curr_word.eng = word.eng if word.eng.to_s.length > curr_word.eng.to_s.length
|
72
60
|
end
|
73
|
-
|
61
|
+
|
74
62
|
return curr_word
|
75
63
|
end
|
76
|
-
|
64
|
+
|
77
65
|
def encode_with(coder)
|
78
66
|
# Order matters.
|
79
|
-
|
80
|
-
coder[:datetime] = @datetime.nil?
|
67
|
+
|
68
|
+
coder[:datetime] = @datetime.nil? ? @datetime : @datetime.iso8601
|
81
69
|
coder[:title] = @title
|
82
|
-
coder[:url] = @url.nil?
|
83
|
-
coder[:futsuurl] = @futsuurl.nil?
|
70
|
+
coder[:url] = @url.nil? ? nil : @url.to_s
|
71
|
+
coder[:futsuurl] = @futsuurl.nil? ? nil : @futsuurl.to_s
|
84
72
|
coder[:sha256] = @sha256
|
85
73
|
coder[:words] = @words
|
86
74
|
end
|
87
|
-
|
75
|
+
|
88
76
|
def self.load_data(key,hash)
|
89
77
|
words = hash[:words]
|
90
|
-
|
91
|
-
article = Article.new
|
92
|
-
|
78
|
+
|
79
|
+
article = Article.new
|
80
|
+
|
93
81
|
article.datetime = hash[:datetime]
|
94
82
|
article.futsuurl = hash[:futsuurl]
|
95
83
|
article.sha256 = hash[:sha256]
|
96
84
|
article.title = hash[:title]
|
97
85
|
article.url = hash[:url]
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
article.words[k] = Word.load_data(k,h)
|
103
|
-
end
|
86
|
+
|
87
|
+
words&.each() do |k,h|
|
88
|
+
k = k.to_s # Change from a symbol
|
89
|
+
article.words[k] = Word.load_data(k,h)
|
104
90
|
end
|
105
|
-
|
91
|
+
|
106
92
|
return article
|
107
93
|
end
|
108
|
-
|
94
|
+
|
109
95
|
def datetime=(value)
|
110
96
|
if value.is_a?(Time)
|
111
97
|
@datetime = value
|
@@ -113,34 +99,34 @@ module NHKore
|
|
113
99
|
@datetime = Util.empty_web_str?(value) ? nil : Time.iso8601(value)
|
114
100
|
end
|
115
101
|
end
|
116
|
-
|
102
|
+
|
117
103
|
def futsuurl=(value)
|
118
104
|
# Don't store URI, store String.
|
119
|
-
@futsuurl = value.nil?
|
105
|
+
@futsuurl = value.nil? ? nil : value.to_s
|
120
106
|
end
|
121
|
-
|
107
|
+
|
122
108
|
def url=(value)
|
123
109
|
# Don't store URI, store String.
|
124
|
-
@url = value.nil?
|
110
|
+
@url = value.nil? ? nil : value.to_s
|
125
111
|
end
|
126
|
-
|
112
|
+
|
127
113
|
def to_s(mini: false)
|
128
|
-
s = ''.dup
|
129
|
-
|
114
|
+
s = ''.dup
|
115
|
+
|
130
116
|
s << "'#{@url}':"
|
131
117
|
s << "\n datetime: '#{@datetime}'"
|
132
118
|
s << "\n title: '#{@title}'"
|
133
119
|
s << "\n url: '#{@url}'"
|
134
120
|
s << "\n futsuurl: '#{@futsuurl}'"
|
135
121
|
s << "\n sha256: '#{@sha256}'"
|
136
|
-
|
122
|
+
|
137
123
|
if !mini
|
138
124
|
s << "\n words:"
|
139
|
-
@words.each
|
125
|
+
@words.each do |key,word|
|
140
126
|
s << "\n #{word}"
|
141
127
|
end
|
142
128
|
end
|
143
|
-
|
129
|
+
|
144
130
|
return s
|
145
131
|
end
|
146
132
|
end
|