nhkore 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +18 -6
- data/Rakefile +11 -16
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +616 -0
- data/lib/nhkore/article.rb +130 -0
- data/lib/nhkore/article_scraper.rb +653 -0
- data/lib/nhkore/cleaner.rb +91 -0
- data/lib/nhkore/cli/bing_cmd.rb +220 -0
- data/lib/nhkore/cli/fx_cmd.rb +116 -0
- data/lib/nhkore/cli/get_cmd.rb +153 -0
- data/lib/nhkore/cli/news_cmd.rb +375 -0
- data/lib/nhkore/cli/sift_cmd.rb +382 -0
- data/lib/nhkore/defn.rb +104 -0
- data/lib/nhkore/dict.rb +80 -0
- data/lib/nhkore/dict_scraper.rb +76 -0
- data/lib/nhkore/entry.rb +104 -0
- data/lib/nhkore/error.rb +35 -0
- data/lib/nhkore/fileable.rb +48 -0
- data/lib/nhkore/missingno.rb +92 -0
- data/lib/nhkore/news.rb +176 -0
- data/lib/nhkore/polisher.rb +93 -0
- data/lib/nhkore/scraper.rb +137 -0
- data/lib/nhkore/search_link.rb +188 -0
- data/lib/nhkore/search_scraper.rb +152 -0
- data/lib/nhkore/sifter.rb +339 -0
- data/lib/nhkore/splitter.rb +90 -0
- data/lib/nhkore/util.rb +190 -0
- data/lib/nhkore/variator.rb +87 -0
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +134 -17
- data/lib/nhkore.rb +39 -40
- data/nhkore.gemspec +23 -8
- data/test/{nhkore_tester.rb → nhkore/test_helper.rb} +3 -1
- data/test/nhkore_test.rb +8 -6
- metadata +204 -11
@@ -0,0 +1,130 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of NHKore.
|
7
|
+
# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# NHKore is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# NHKore is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU Lesser General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU Lesser General Public License
|
20
|
+
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'time'
|
25
|
+
|
26
|
+
require 'nhkore/util'
|
27
|
+
require 'nhkore/word'
|
28
|
+
|
29
|
+
|
30
|
+
module NHKore
|
31
|
+
###
|
32
|
+
# @author Jonathan Bradley Whited (@esotericpig)
|
33
|
+
# @since 0.2.0
|
34
|
+
###
|
35
|
+
class Article
|
36
|
+
attr_accessor :datetime
|
37
|
+
attr_accessor :futsuurl
|
38
|
+
attr_accessor :sha256
|
39
|
+
attr_accessor :title
|
40
|
+
attr_accessor :url
|
41
|
+
attr_reader :words
|
42
|
+
|
43
|
+
def initialize()
|
44
|
+
super()
|
45
|
+
|
46
|
+
@datetime = nil
|
47
|
+
@futsuurl = nil
|
48
|
+
@sha256 = nil
|
49
|
+
@title = nil
|
50
|
+
@url = nil
|
51
|
+
@words = {}
|
52
|
+
end
|
53
|
+
|
54
|
+
# Why does this not look up the kanji/kana only and then update the other
|
55
|
+
# kana/kanji part appropriately?
|
56
|
+
# - There are some words like +行って+. Without the kana, it's difficult to
|
57
|
+
# determine what kana it should be. Should it be +いって+ or +おこなって+?
|
58
|
+
# - Similarly, if we just have +いって+, should this be +行って+ or +言って+?
|
59
|
+
# - Therefore, if we only have the kanji or only have the kana, we don't
|
60
|
+
# try to populate the other value.
|
61
|
+
def add_word(word,use_freq: false)
|
62
|
+
curr_word = words[word.key]
|
63
|
+
|
64
|
+
if curr_word.nil?()
|
65
|
+
words[word.key] = word
|
66
|
+
curr_word = word
|
67
|
+
else
|
68
|
+
curr_word.freq += (use_freq ? word.freq : 1)
|
69
|
+
|
70
|
+
curr_word.defn = word.defn if word.defn.to_s().length > curr_word.defn.to_s().length
|
71
|
+
curr_word.eng = word.eng if word.eng.to_s().length > curr_word.eng.to_s().length
|
72
|
+
end
|
73
|
+
|
74
|
+
return curr_word
|
75
|
+
end
|
76
|
+
|
77
|
+
def encode_with(coder)
|
78
|
+
# Order matters.
|
79
|
+
|
80
|
+
coder[:datetime] = @datetime.nil?() ? @datetime : @datetime.iso8601()
|
81
|
+
coder[:title] = @title
|
82
|
+
coder[:url] = @url
|
83
|
+
coder[:futsuurl] = @futsuurl
|
84
|
+
coder[:sha256] = @sha256
|
85
|
+
coder[:words] = @words
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.load_data(key,hash)
|
89
|
+
datetime = hash[:datetime]
|
90
|
+
words = hash[:words]
|
91
|
+
|
92
|
+
article = Article.new()
|
93
|
+
|
94
|
+
article.datetime = Util.empty_web_str?(datetime) ? nil : Time.iso8601(datetime)
|
95
|
+
article.futsuurl = hash[:futsuurl]
|
96
|
+
article.sha256 = hash[:sha256]
|
97
|
+
article.title = hash[:title]
|
98
|
+
article.url = hash[:url]
|
99
|
+
|
100
|
+
if !words.nil?()
|
101
|
+
words.each() do |k,h|
|
102
|
+
k = k.to_s() # Change from a symbol
|
103
|
+
article.words[k] = Word.load_data(k,h)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
return article
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_s(mini: false)
|
111
|
+
s = ''.dup()
|
112
|
+
|
113
|
+
s << "'#{@url}':"
|
114
|
+
s << "\n datetime: '#{@datetime}'"
|
115
|
+
s << "\n title: '#{@title}'"
|
116
|
+
s << "\n url: '#{@url}'"
|
117
|
+
s << "\n futsuurl: '#{@futsuurl}'"
|
118
|
+
s << "\n sha256: '#{@sha256}'"
|
119
|
+
|
120
|
+
if !mini
|
121
|
+
s << "\n words:"
|
122
|
+
@words.each() do |key,word|
|
123
|
+
s << "\n #{word}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
return s
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|