nhkore 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'time'
25
+
26
+ require 'nhkore/util'
27
+ require 'nhkore/word'
28
+
29
+
30
+ module NHKore
31
+ ###
32
+ # @author Jonathan Bradley Whited (@esotericpig)
33
+ # @since 0.2.0
34
+ ###
35
+ class Article
36
+ attr_accessor :datetime
37
+ attr_accessor :futsuurl
38
+ attr_accessor :sha256
39
+ attr_accessor :title
40
+ attr_accessor :url
41
+ attr_reader :words
42
+
43
+ def initialize()
44
+ super()
45
+
46
+ @datetime = nil
47
+ @futsuurl = nil
48
+ @sha256 = nil
49
+ @title = nil
50
+ @url = nil
51
+ @words = {}
52
+ end
53
+
54
+ # Why does this not look up the kanji/kana only and then update the other
55
+ # kana/kanji part appropriately?
56
+ # - There are some words like +行って+. Without the kana, it's difficult to
57
+ # determine what kana it should be. Should it be +いって+ or +おこなって+?
58
+ # - Similarly, if we just have +いって+, should this be +行って+ or +言って+?
59
+ # - Therefore, if we only have the kanji or only have the kana, we don't
60
+ # try to populate the other value.
61
+ def add_word(word,use_freq: false)
62
+ curr_word = words[word.key]
63
+
64
+ if curr_word.nil?()
65
+ words[word.key] = word
66
+ curr_word = word
67
+ else
68
+ curr_word.freq += (use_freq ? word.freq : 1)
69
+
70
+ curr_word.defn = word.defn if word.defn.to_s().length > curr_word.defn.to_s().length
71
+ curr_word.eng = word.eng if word.eng.to_s().length > curr_word.eng.to_s().length
72
+ end
73
+
74
+ return curr_word
75
+ end
76
+
77
+ def encode_with(coder)
78
+ # Order matters.
79
+
80
+ coder[:datetime] = @datetime.nil?() ? @datetime : @datetime.iso8601()
81
+ coder[:title] = @title
82
+ coder[:url] = @url
83
+ coder[:futsuurl] = @futsuurl
84
+ coder[:sha256] = @sha256
85
+ coder[:words] = @words
86
+ end
87
+
88
+ def self.load_data(key,hash)
89
+ datetime = hash[:datetime]
90
+ words = hash[:words]
91
+
92
+ article = Article.new()
93
+
94
+ article.datetime = Util.empty_web_str?(datetime) ? nil : Time.iso8601(datetime)
95
+ article.futsuurl = hash[:futsuurl]
96
+ article.sha256 = hash[:sha256]
97
+ article.title = hash[:title]
98
+ article.url = hash[:url]
99
+
100
+ if !words.nil?()
101
+ words.each() do |k,h|
102
+ k = k.to_s() # Change from a symbol
103
+ article.words[k] = Word.load_data(k,h)
104
+ end
105
+ end
106
+
107
+ return article
108
+ end
109
+
110
+ def to_s(mini: false)
111
+ s = ''.dup()
112
+
113
+ s << "'#{@url}':"
114
+ s << "\n datetime: '#{@datetime}'"
115
+ s << "\n title: '#{@title}'"
116
+ s << "\n url: '#{@url}'"
117
+ s << "\n futsuurl: '#{@futsuurl}'"
118
+ s << "\n sha256: '#{@sha256}'"
119
+
120
+ if !mini
121
+ s << "\n words:"
122
+ @words.each() do |key,word|
123
+ s << "\n #{word}"
124
+ end
125
+ end
126
+
127
+ return s
128
+ end
129
+ end
130
+ end