nhkore 0.3.1 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,342 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'attr_bool'
25
+ require 'date'
26
+ require 'time'
27
+
28
+ require 'nhkore/util'
29
+
30
+
31
+ module NHKore
32
+ ###
33
+ # @author Jonathan Bradley Whited (@esotericpig)
34
+ # @since 0.3.4
35
+ ###
36
+ class DatetimeParser
37
+ # Order matters!
38
+ FMTS = [
39
+ '%Y-%m-%d %H:%M',
40
+ '%Y-%m-%d %H',
41
+ '%Y-%m-%d',
42
+ '%m-%d %H:%M',
43
+ '%Y-%m %H:%M',
44
+ '%m-%d %H',
45
+ '%Y-%m %H',
46
+ '%m-%d',
47
+ '%Y-%m',
48
+ '%d %H:%M',
49
+ '%y %H:%M',
50
+ '%d %H',
51
+ '%Y %H',
52
+ '%H:%M',
53
+ '%d',
54
+ '%Y',
55
+ ]
56
+
57
+ def self.guess_year(year)
58
+ if year < 1000
59
+ century = Util::JST_YEAR / 100 * 100 # 2120 -> 2100
60
+ millennium = Util::JST_YEAR / 1000 * 1000 # 2120 -> 2000
61
+
62
+ # If year <= 23 (2022 -> 23)...
63
+ if year <= ((Util::JST_YEAR % 100) + 1)
64
+ # Assume this century.
65
+ year = century + year
66
+ elsif year >= 100
67
+ # If (2000 + 150) <= 2201 (if current year is 2200)...
68
+ if (millennium + year) <= (Util::JST_YEAR + 1)
69
+ # Assume this millennium.
70
+ # So if the current year is 2200, and year is 150,
71
+ # then it will be 2000 + 150 = 2150.
72
+ year = millennium + year
73
+ else
74
+ # Assume previous millennium (2000 -> 1000),
75
+ # so year 999 will become 1999.
76
+ millennium -= 1000 if millennium >= 1000
77
+ year = millennium + year
78
+ end
79
+ else
80
+ # Assume previous century (2000 -> 1900).
81
+ century -= 100 if century >= 100
82
+ year = century + year
83
+ end
84
+ end
85
+
86
+ return year
87
+ end
88
+
89
+ def self.parse_range(value)
90
+ # Do not use unspace_web_str(), want spaces for formats.
91
+ value = Util.strip_web_str(Util.reduce_space(value))
92
+ values = value.split('...',2)
93
+
94
+ return nil if values.empty?() # For '' or '...'
95
+
96
+ # For '2020...' or '...2020'.
97
+ if value.include?('...')
98
+ # values.length is always 2 because of 2 in split() above.
99
+
100
+ # For '2020...'.
101
+ if Util.empty_web_str?(values[1])
102
+ values[1] = :infinity
103
+ # For '...2020'.
104
+ elsif Util.empty_web_str?(values[0])
105
+ values[0] = :infinity
106
+ end
107
+ end
108
+
109
+ datetimes = [
110
+ DatetimeParser.new(), # "From" date time
111
+ DatetimeParser.new(), # "To" date time
112
+ ]
113
+
114
+ values.each_with_index() do |v,i|
115
+ dt = datetimes[i]
116
+
117
+ # Minimum/Maximum date time for '2020...' or '...2020'.
118
+ if v == :infinity
119
+ # "From" date time.
120
+ if i == 0
121
+ dt.min!()
122
+ # "To" date time.
123
+ else
124
+ dt.max!()
125
+ end
126
+ else
127
+ v = Util.strip_web_str(v)
128
+
129
+ FMTS.each_with_index() do |fmt,i|
130
+ begin
131
+ # If don't do this, "%d" values will be parsed using "%d %H".
132
+ # It seems as though strptime() ignores space.
133
+ raise ArgumentError if fmt.include?(' ') && !v.include?(' ')
134
+
135
+ # If don't do this, "%y..." values will be parsed using "%d...".
136
+ raise ArgumentError if fmt.start_with?('%d') && v.split(' ')[0].length > 2
137
+
138
+ dt.parse!(v,fmt)
139
+
140
+ break # No problem; this format worked
141
+ rescue ArgumentError
142
+ # Out of formats.
143
+ raise if i >= (FMTS.length - 1)
144
+ end
145
+ end
146
+ end
147
+ end
148
+
149
+ from = datetimes[0]
150
+ to = datetimes[1]
151
+
152
+ from.autofill!(:from,to)
153
+ to.autofill!(:to,from)
154
+
155
+ return [from.jst_time(),to.jst_time()]
156
+ end
157
+
158
+ attr_accessor :day
159
+ attr_accessor :hour
160
+ attr_accessor :min
161
+ attr_accessor :month
162
+ attr_accessor :sec
163
+ attr_accessor :year
164
+
165
+ attr_accessor? :has_day
166
+ attr_accessor? :has_hour
167
+ attr_accessor? :has_min
168
+ attr_accessor? :has_month
169
+ attr_accessor? :has_sec
170
+ attr_accessor? :has_year
171
+
172
+ attr_reader? :min_or_max
173
+
174
+ def initialize(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
175
+ super()
176
+
177
+ set!(year,month,day,hour,min,sec)
178
+
179
+ self.has = false
180
+ @min_or_max = false
181
+ end
182
+
183
+ def autofill!(type,other)
184
+ case type
185
+ when :from
186
+ is_from = true
187
+ when :to
188
+ is_from = false
189
+ else
190
+ raise ArgumentError,"invalid type[#{type}]"
191
+ end
192
+
193
+ return self if @min_or_max
194
+
195
+ has_small = false
196
+ jst_now = Util.jst_now()
197
+
198
+ # Must be from smallest to biggest.
199
+
200
+ if @has_sec || other.has_sec?()
201
+ @sec = other.sec unless @has_sec
202
+ has_small = true
203
+ else
204
+ if has_small
205
+ @sec = jst_now.sec
206
+ else
207
+ @sec = is_from ? 0 : 59
208
+ end
209
+ end
210
+
211
+ if @has_min || other.has_min?()
212
+ @min = other.min unless @has_min
213
+ has_small = true
214
+ else
215
+ if has_small
216
+ @min = jst_now.min
217
+ else
218
+ @min = is_from ? 0 : 59
219
+ end
220
+ end
221
+
222
+ if @has_hour || other.has_hour?()
223
+ @hour = other.hour unless @has_hour
224
+ has_small = true
225
+ else
226
+ if has_small
227
+ @hour = jst_now.hour
228
+ else
229
+ @hour = is_from ? 0 : 23
230
+ end
231
+ end
232
+
233
+ if @has_day || other.has_day?()
234
+ @day = other.day unless @has_day
235
+ has_small = true
236
+ else
237
+ if has_small
238
+ @day = jst_now.day
239
+ else
240
+ @day = is_from ? 1 : :last_day
241
+ end
242
+ end
243
+
244
+ if @has_month || other.has_month?()
245
+ @month = other.month unless @has_month
246
+ has_small = true
247
+ else
248
+ if has_small
249
+ @month = jst_now.month
250
+ else
251
+ @month = is_from ? 1 : 12
252
+ end
253
+ end
254
+
255
+ if @has_year || other.has_year?()
256
+ @year = other.year unless @has_year
257
+ has_small = true
258
+ else
259
+ if has_small
260
+ @year = jst_now.year
261
+ else
262
+ @year = is_from ? Util::MIN_SANE_YEAR : jst_now.year
263
+ end
264
+ end
265
+
266
+ # Must be after setting @year & @month.
267
+ if @day == :last_day
268
+ @day = Date.new(@year,@month,-1).day
269
+ end
270
+
271
+ return self
272
+ end
273
+
274
+ def max!()
275
+ @min_or_max = true
276
+
277
+ # Ex: 2020-12-31 23:59:59
278
+ return set!(Util::JST_YEAR,12,31,23,59,59)
279
+ end
280
+
281
+ def min!()
282
+ @min_or_max = true
283
+
284
+ # Ex: 1924-01-01 00:00:00
285
+ return set!(Util::MIN_SANE_YEAR,1,1,0,0,0)
286
+ end
287
+
288
+ def parse!(value,fmt)
289
+ value = Time.strptime(value,fmt,&self.class.method(:guess_year))
290
+
291
+ @has_day = fmt.include?('%d')
292
+ @has_hour = fmt.include?('%H')
293
+ @has_min = fmt.include?('%M')
294
+ @has_month = fmt.include?('%m')
295
+ @has_sec = fmt.include?('%S')
296
+ @has_year = fmt.include?('%Y')
297
+
298
+ @day = value.day if @has_day
299
+ @hour = value.hour if @has_hour
300
+ @min = value.min if @has_min
301
+ @month = value.month if @has_month
302
+ @sec = value.sec if @has_sec
303
+ @year = value.year if @has_year
304
+
305
+ return self
306
+ end
307
+
308
+ def set!(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
309
+ @year = year
310
+ @month = month
311
+ @day = day
312
+ @hour = hour
313
+ @min = min
314
+ @sec = sec
315
+
316
+ return self
317
+ end
318
+
319
+ def has=(value)
320
+ @has_day = value
321
+ @has_hour = value
322
+ @has_min = value
323
+ @has_month = value
324
+ @has_sec = value
325
+ @has_year = value
326
+
327
+ return self
328
+ end
329
+
330
+ def jst_time()
331
+ return Util.jst_time(time())
332
+ end
333
+
334
+ def time()
335
+ return Time.new(@year,@month,@day,@hour,@min,@sec)
336
+ end
337
+
338
+ def to_s()
339
+ return "#{@year}-#{@month}-#{@day} #{@hour}:#{@min}:#{@sec}"
340
+ end
341
+ end
342
+ end
@@ -44,7 +44,7 @@ module NHKore
44
44
  end
45
45
 
46
46
  def self.parse_url(url,basename: nil)
47
- url = Util.strip_web_str(url)
47
+ url = Util.strip_web_str(url.to_s())
48
48
 
49
49
  raise ParseError,"cannot parse dictionary URL from URL[#{url}]" if url.empty?()
50
50
 
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'nhkore/article'
25
+ require 'nhkore/article_scraper'
26
+ require 'nhkore/cleaner'
27
+ require 'nhkore/datetime_parser'
28
+ require 'nhkore/defn'
29
+ require 'nhkore/dict'
30
+ require 'nhkore/dict_scraper'
31
+ require 'nhkore/entry'
32
+ require 'nhkore/error'
33
+ require 'nhkore/fileable'
34
+ require 'nhkore/missingno'
35
+ require 'nhkore/news'
36
+ require 'nhkore/polisher'
37
+ require 'nhkore/scraper'
38
+ require 'nhkore/search_link'
39
+ require 'nhkore/search_scraper'
40
+ require 'nhkore/sifter'
41
+ require 'nhkore/splitter'
42
+ require 'nhkore/user_agents'
43
+ require 'nhkore/util'
44
+ require 'nhkore/variator'
45
+ require 'nhkore/version'
46
+ require 'nhkore/word'
47
+
48
+
49
+ module NHKore
50
+ ###
51
+ # Include this file to only require the files needed to use this
52
+ # Gem as a library (i.e., don't include CLI-related files).
53
+ #
54
+ # @author Jonathan Bradley Whited (@esotericpig)
55
+ # @since 0.3.2
56
+ ###
57
+ module Lib
58
+ end
59
+ end
@@ -49,7 +49,10 @@ module NHKore
49
49
  end
50
50
 
51
51
  def add_article(article,key: nil,overwrite: false)
52
- key = article.url if key.nil?()
52
+ url = article.url
53
+ url = url.to_s() unless url.nil?()
54
+
55
+ key = key.nil?() ? url : key.to_s()
53
56
 
54
57
  if !overwrite
55
58
  raise ArgumentError,"duplicate article[#{key}] in articles" if @articles.key?(key)
@@ -57,7 +60,7 @@ module NHKore
57
60
  end
58
61
 
59
62
  @articles[key] = article
60
- @sha256s[article.sha256] = article.url
63
+ @sha256s[article.sha256] = url
61
64
 
62
65
  return self
63
66
  end
@@ -91,16 +94,20 @@ module NHKore
91
94
  end
92
95
 
93
96
  def update_article(article,url)
97
+ url = url.to_s() unless url.nil?()
98
+
94
99
  # Favor https.
95
- return if article.url =~ FAVORED_URL
100
+ return if article.url.to_s() =~ FAVORED_URL
96
101
  return if url !~ FAVORED_URL
97
102
 
98
- @articles.delete(article.url)
103
+ @articles.delete(article.url) # Probably no to_s() here
99
104
  @articles[url] = article
100
105
  article.url = url
101
106
  end
102
107
 
103
108
  def article(key)
109
+ key = key.to_s() unless key.nil?()
110
+
104
111
  return @articles[key]
105
112
  end
106
113
 
@@ -119,6 +126,8 @@ module NHKore
119
126
  end
120
127
 
121
128
  def article?(key)
129
+ key = key.to_s() unless key.nil?()
130
+
122
131
  return @articles.key?(key)
123
132
  end
124
133