nhkore 0.3.3 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,328 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ #--
5
+ # This file is part of NHKore.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
9
+ #++
10
+
11
+
12
+ require 'attr_bool'
13
+ require 'date'
14
+ require 'time'
15
+
16
+ require 'nhkore/util'
17
+
18
+
19
+ module NHKore
20
+ ###
21
+ # @author Jonathan Bradley Whited
22
+ # @since 0.3.4
23
+ ###
24
+ class DatetimeParser
25
+ extend AttrBool::Ext
26
+
27
+ # Order matters!
28
+ FMTS = [
29
+ '%Y-%m-%d %H:%M',
30
+ '%Y-%m-%d %H',
31
+ '%Y-%m-%d',
32
+ '%m-%d %H:%M',
33
+ '%Y-%m %H:%M',
34
+ '%m-%d %H',
35
+ '%Y-%m %H',
36
+ '%m-%d',
37
+ '%Y-%m',
38
+ '%d %H:%M',
39
+ '%y %H:%M',
40
+ '%d %H',
41
+ '%Y %H',
42
+ '%H:%M',
43
+ '%d',
44
+ '%Y',
45
+ ].freeze
46
+
47
+ def self.guess_year(year)
48
+ if year < 1000
49
+ century = Util::JST_YEAR / 100 * 100 # 2120 -> 2100
50
+ millennium = Util::JST_YEAR / 1000 * 1000 # 2120 -> 2000
51
+
52
+ # If year <= 23 (2022 -> 23)...
53
+ if year <= ((Util::JST_YEAR % 100) + 1)
54
+ # Assume this century.
55
+ year = century + year
56
+ elsif year >= 100
57
+ # If (2000 + 150) <= 2201 (if current year is 2200)...
58
+ if (millennium + year) <= (Util::JST_YEAR + 1)
59
+ # Assume this millennium.
60
+ # So if the current year is 2200, and year is 150,
61
+ # then it will be 2000 + 150 = 2150.
62
+ else
63
+ # Assume previous millennium (2000 -> 1000),
64
+ # so year 999 will become 1999.
65
+ millennium -= 1000 if millennium >= 1000
66
+ end
67
+
68
+ year = millennium + year
69
+ else
70
+ # Assume previous century (2000 -> 1900).
71
+ century -= 100 if century >= 100
72
+ year = century + year
73
+ end
74
+ end
75
+
76
+ return year
77
+ end
78
+
79
+ def self.parse_range(value)
80
+ # Do not use unspace_web_str(), want spaces for formats.
81
+ value = Util.strip_web_str(Util.reduce_space(value))
82
+ values = value.split('...',2)
83
+
84
+ return nil if values.empty? # For '' or '...'
85
+
86
+ # For '2020...' or '...2020'.
87
+ if value.include?('...')
88
+ # values.length is always 2 because of 2 in split() above.
89
+
90
+ # For '2020...'.
91
+ if Util.empty_web_str?(values[1])
92
+ values[1] = :infinity
93
+ # For '...2020'.
94
+ elsif Util.empty_web_str?(values[0])
95
+ values[0] = :infinity
96
+ end
97
+ end
98
+
99
+ datetimes = [
100
+ DatetimeParser.new, # "From" date time
101
+ DatetimeParser.new, # "To" date time
102
+ ]
103
+
104
+ values.each_with_index do |v,i|
105
+ dt = datetimes[i]
106
+
107
+ # Minimum/Maximum date time for '2020...' or '...2020'.
108
+ if v == :infinity
109
+ # "From" date time.
110
+ if i == 0
111
+ dt.min!
112
+ # "To" date time.
113
+ else
114
+ dt.max!
115
+ end
116
+ else
117
+ v = Util.strip_web_str(v)
118
+
119
+ FMTS.each_with_index do |fmt,j|
120
+ # If don't do this, "%d" values will be parsed using "%d %H".
121
+ # It seems as though strptime() ignores space.
122
+ raise ArgumentError if fmt.include?(' ') && !v.include?(' ')
123
+
124
+ # If don't do this, "%y..." values will be parsed using "%d...".
125
+ raise ArgumentError if fmt.start_with?('%d') && v.split(' ')[0].length > 2
126
+
127
+ dt.parse!(v,fmt)
128
+
129
+ break # No problem; this format worked
130
+ rescue ArgumentError
131
+ # Out of formats.
132
+ raise if j >= (FMTS.length - 1)
133
+ end
134
+ end
135
+ end
136
+
137
+ from = datetimes[0]
138
+ to = datetimes[1]
139
+
140
+ from.autofill!(:from,to)
141
+ to.autofill!(:to,from)
142
+
143
+ return [from.jst_time,to.jst_time]
144
+ end
145
+
146
+ attr_accessor :day
147
+ attr_accessor :hour
148
+ attr_accessor :min
149
+ attr_accessor :month
150
+ attr_accessor :sec
151
+ attr_accessor :year
152
+
153
+ attr_accessor? :has_day
154
+ attr_accessor? :has_hour
155
+ attr_accessor? :has_min
156
+ attr_accessor? :has_month
157
+ attr_accessor? :has_sec
158
+ attr_accessor? :has_year
159
+
160
+ attr_reader? :min_or_max
161
+
162
+ def initialize(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
163
+ super()
164
+
165
+ set!(year,month,day,hour,min,sec)
166
+
167
+ self.has = false
168
+ @min_or_max = false
169
+ end
170
+
171
+ def autofill!(type,other)
172
+ case type
173
+ when :from
174
+ is_from = true
175
+ when :to
176
+ is_from = false
177
+ else
178
+ raise ArgumentError,"invalid type[#{type}]"
179
+ end
180
+
181
+ return self if @min_or_max
182
+
183
+ has_small = false
184
+ jst_now = Util.jst_now()
185
+
186
+ # Must be from smallest to biggest.
187
+
188
+ if @has_sec || other.has_sec?
189
+ @sec = other.sec unless @has_sec
190
+ has_small = true
191
+ else
192
+ if has_small
193
+ @sec = jst_now.sec
194
+ else
195
+ @sec = is_from ? 0 : 59
196
+ end
197
+ end
198
+
199
+ if @has_min || other.has_min?
200
+ @min = other.min unless @has_min
201
+ has_small = true
202
+ else
203
+ if has_small
204
+ @min = jst_now.min
205
+ else
206
+ @min = is_from ? 0 : 59
207
+ end
208
+ end
209
+
210
+ if @has_hour || other.has_hour?
211
+ @hour = other.hour unless @has_hour
212
+ has_small = true
213
+ else
214
+ if has_small
215
+ @hour = jst_now.hour
216
+ else
217
+ @hour = is_from ? 0 : 23
218
+ end
219
+ end
220
+
221
+ if @has_day || other.has_day?
222
+ @day = other.day unless @has_day
223
+ has_small = true
224
+ else
225
+ if has_small
226
+ @day = jst_now.day
227
+ else
228
+ @day = is_from ? 1 : :last_day
229
+ end
230
+ end
231
+
232
+ if @has_month || other.has_month?
233
+ @month = other.month unless @has_month
234
+ has_small = true
235
+ else
236
+ if has_small
237
+ @month = jst_now.month
238
+ else
239
+ @month = is_from ? 1 : 12
240
+ end
241
+ end
242
+
243
+ if @has_year || other.has_year?
244
+ @year = other.year unless @has_year
245
+ has_small = true # rubocop:disable Lint/UselessAssignment
246
+ else
247
+ if has_small
248
+ @year = jst_now.year
249
+ else
250
+ @year = is_from ? Util::MIN_SANE_YEAR : jst_now.year
251
+ end
252
+ end
253
+
254
+ # Must be after setting @year & @month.
255
+ if @day == :last_day
256
+ @day = Date.new(@year,@month,-1).day
257
+ end
258
+
259
+ return self
260
+ end
261
+
262
+ def max!
263
+ @min_or_max = true
264
+
265
+ # Ex: 2020-12-31 23:59:59
266
+ return set!(Util::JST_YEAR,12,31,23,59,59)
267
+ end
268
+
269
+ def min!
270
+ @min_or_max = true
271
+
272
+ # Ex: 1924-01-01 00:00:00
273
+ return set!(Util::MIN_SANE_YEAR,1,1,0,0,0)
274
+ end
275
+
276
+ def parse!(value,fmt)
277
+ value = Time.strptime(value,fmt,&self.class.method(:guess_year))
278
+
279
+ @has_day = fmt.include?('%d')
280
+ @has_hour = fmt.include?('%H')
281
+ @has_min = fmt.include?('%M')
282
+ @has_month = fmt.include?('%m')
283
+ @has_sec = fmt.include?('%S')
284
+ @has_year = fmt.include?('%Y')
285
+
286
+ @day = value.day if @has_day
287
+ @hour = value.hour if @has_hour
288
+ @min = value.min if @has_min
289
+ @month = value.month if @has_month
290
+ @sec = value.sec if @has_sec
291
+ @year = value.year if @has_year
292
+
293
+ return self
294
+ end
295
+
296
+ def set!(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
297
+ @year = year
298
+ @month = month
299
+ @day = day
300
+ @hour = hour
301
+ @min = min
302
+ @sec = sec
303
+
304
+ return self
305
+ end
306
+
307
+ def has=(value)
308
+ @has_day = value
309
+ @has_hour = value
310
+ @has_min = value
311
+ @has_month = value
312
+ @has_sec = value
313
+ @has_year = value
314
+ end
315
+
316
+ def jst_time
317
+ return Util.jst_time(time)
318
+ end
319
+
320
+ def time
321
+ return Time.new(@year,@month,@day,@hour,@min,@sec)
322
+ end
323
+
324
+ def to_s
325
+ return "#{@year}-#{@month}-#{@day} #{@hour}:#{@min}:#{@sec}"
326
+ end
327
+ end
328
+ end
data/lib/nhkore/defn.rb CHANGED
@@ -1,23 +1,11 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
@@ -29,75 +17,80 @@ require 'nhkore/word'
29
17
 
30
18
  module NHKore
31
19
  ###
32
- # @author Jonathan Bradley Whited (@esotericpig)
20
+ # @author Jonathan Bradley Whited
33
21
  # @since 0.2.0
34
22
  ###
35
23
  class Defn
36
24
  attr_reader :hyoukis
37
25
  attr_accessor :text
38
26
  attr_reader :words
39
-
40
- def initialize()
27
+
28
+ def initialize
41
29
  super()
42
-
30
+
43
31
  @hyoukis = []
44
- @text = ''.dup()
32
+ @text = ''.dup
45
33
  @words = []
46
34
  end
47
-
35
+
48
36
  # If no data, don't raise errors; don't care if have a definition or not.
49
37
  def self.scrape(hash,missingno: nil,url: nil)
50
- defn = Defn.new()
51
-
38
+ defn = Defn.new
39
+
52
40
  hyoukis = hash['hyouki']
53
-
54
- if !hyoukis.nil?()
55
- hyoukis.each() do |hyouki|
56
- next if hyouki.nil?()
57
- next if (hyouki = Util.strip_web_str(hyouki)).empty?()
58
-
59
- defn.hyoukis << hyouki
60
- end
41
+
42
+ hyoukis&.each() do |hyouki|
43
+ next if hyouki.nil?
44
+ next if (hyouki = Util.strip_web_str(hyouki)).empty?
45
+
46
+ defn.hyoukis << hyouki
61
47
  end
62
-
48
+
63
49
  def_str = hash['def']
64
-
50
+
65
51
  if Util.empty_web_str?(def_str)
66
- return defn.hyoukis.empty?() ? nil : defn
52
+ return defn.hyoukis.empty? ? nil : defn
67
53
  end
68
-
54
+
69
55
  doc = Nokogiri::HTML(def_str)
70
- doc = doc.css('body') # Auto-added by Nokogiri
71
-
72
- doc.children.each() do |child|
73
- name = Util.unspace_web_str(child.name).downcase() if child.respond_to?(:name)
74
-
56
+ doc = doc.css('body') # Auto-added by Nokogiri.
57
+
58
+ doc.children.each do |child|
59
+ name = Util.unspace_web_str(child.name).downcase if child.respond_to?(:name)
60
+
75
61
  is_text = false
76
- word = nil
77
-
62
+ words = []
63
+
78
64
  if name == 'ruby'
79
- word = Word.scrape_ruby_tag(child,missingno: missingno,url: url)
80
- elsif child.respond_to?(:text) # Don't do child.text?(), just want content
81
- word = Word.scrape_text_node(child,url: url)
65
+ # Returns an array.
66
+ words = Word.scrape_ruby_tag(child,missingno: missingno,url: url)
67
+ elsif child.respond_to?(:text) # Don't do child.text?(), just want content.
68
+ words << Word.scrape_text_node(child,url: url)
82
69
  is_text = true
83
70
  end
84
-
85
- if word.nil?()
71
+
72
+ # All word-scraping methods can return nil,
73
+ # so remove all nils for empty?() check.
74
+ words = words&.compact
75
+
76
+ if words.nil? || words.empty?
86
77
  defn.text << Util.reduce_jpn_space(child.text) if is_text
87
78
  else
88
- defn.text << Util.reduce_jpn_space(word.word)
89
- defn.words << word unless Util.empty_web_str?(word.word)
79
+ words.each do |word|
80
+ defn.text << Util.reduce_jpn_space(word.word)
81
+ defn.words << word unless Util.empty_web_str?(word.word)
82
+ end
90
83
  end
91
84
  end
92
-
93
- return nil if defn.hyoukis.empty?() && defn.words.empty?()
94
-
85
+
86
+ return nil if defn.hyoukis.empty? && defn.words.empty?
87
+
95
88
  defn.text = Util.strip_web_str(defn.text)
96
-
89
+
97
90
  return defn
98
91
  end
99
-
100
- def to_s()
92
+
93
+ def to_s
101
94
  return @text
102
95
  end
103
96
  end