nhkore 0.3.1 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,342 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'attr_bool'
25
+ require 'date'
26
+ require 'time'
27
+
28
+ require 'nhkore/util'
29
+
30
+
31
+ module NHKore
32
+ ###
33
+ # @author Jonathan Bradley Whited (@esotericpig)
34
+ # @since 0.3.4
35
+ ###
36
+ class DatetimeParser
37
+ # Order matters!
38
+ FMTS = [
39
+ '%Y-%m-%d %H:%M',
40
+ '%Y-%m-%d %H',
41
+ '%Y-%m-%d',
42
+ '%m-%d %H:%M',
43
+ '%Y-%m %H:%M',
44
+ '%m-%d %H',
45
+ '%Y-%m %H',
46
+ '%m-%d',
47
+ '%Y-%m',
48
+ '%d %H:%M',
49
+ '%y %H:%M',
50
+ '%d %H',
51
+ '%Y %H',
52
+ '%H:%M',
53
+ '%d',
54
+ '%Y',
55
+ ]
56
+
57
+ def self.guess_year(year)
58
+ if year < 1000
59
+ century = Util::JST_YEAR / 100 * 100 # 2120 -> 2100
60
+ millennium = Util::JST_YEAR / 1000 * 1000 # 2120 -> 2000
61
+
62
+ # If year <= 23 (2022 -> 23)...
63
+ if year <= ((Util::JST_YEAR % 100) + 1)
64
+ # Assume this century.
65
+ year = century + year
66
+ elsif year >= 100
67
+ # If (2000 + 150) <= 2201 (if current year is 2200)...
68
+ if (millennium + year) <= (Util::JST_YEAR + 1)
69
+ # Assume this millennium.
70
+ # So if the current year is 2200, and year is 150,
71
+ # then it will be 2000 + 150 = 2150.
72
+ year = millennium + year
73
+ else
74
+ # Assume previous millennium (2000 -> 1000),
75
+ # so year 999 will become 1999.
76
+ millennium -= 1000 if millennium >= 1000
77
+ year = millennium + year
78
+ end
79
+ else
80
+ # Assume previous century (2000 -> 1900).
81
+ century -= 100 if century >= 100
82
+ year = century + year
83
+ end
84
+ end
85
+
86
+ return year
87
+ end
88
+
89
+ def self.parse_range(value)
90
+ # Do not use unspace_web_str(), want spaces for formats.
91
+ value = Util.strip_web_str(Util.reduce_space(value))
92
+ values = value.split('...',2)
93
+
94
+ return nil if values.empty?() # For '' or '...'
95
+
96
+ # For '2020...' or '...2020'.
97
+ if value.include?('...')
98
+ # values.length is always 2 because of 2 in split() above.
99
+
100
+ # For '2020...'.
101
+ if Util.empty_web_str?(values[1])
102
+ values[1] = :infinity
103
+ # For '...2020'.
104
+ elsif Util.empty_web_str?(values[0])
105
+ values[0] = :infinity
106
+ end
107
+ end
108
+
109
+ datetimes = [
110
+ DatetimeParser.new(), # "From" date time
111
+ DatetimeParser.new(), # "To" date time
112
+ ]
113
+
114
+ values.each_with_index() do |v,i|
115
+ dt = datetimes[i]
116
+
117
+ # Minimum/Maximum date time for '2020...' or '...2020'.
118
+ if v == :infinity
119
+ # "From" date time.
120
+ if i == 0
121
+ dt.min!()
122
+ # "To" date time.
123
+ else
124
+ dt.max!()
125
+ end
126
+ else
127
+ v = Util.strip_web_str(v)
128
+
129
+ FMTS.each_with_index() do |fmt,i|
130
+ begin
131
+ # If don't do this, "%d" values will be parsed using "%d %H".
132
+ # It seems as though strptime() ignores space.
133
+ raise ArgumentError if fmt.include?(' ') && !v.include?(' ')
134
+
135
+ # If don't do this, "%y..." values will be parsed using "%d...".
136
+ raise ArgumentError if fmt.start_with?('%d') && v.split(' ')[0].length > 2
137
+
138
+ dt.parse!(v,fmt)
139
+
140
+ break # No problem; this format worked
141
+ rescue ArgumentError
142
+ # Out of formats.
143
+ raise if i >= (FMTS.length - 1)
144
+ end
145
+ end
146
+ end
147
+ end
148
+
149
+ from = datetimes[0]
150
+ to = datetimes[1]
151
+
152
+ from.autofill!(:from,to)
153
+ to.autofill!(:to,from)
154
+
155
+ return [from.jst_time(),to.jst_time()]
156
+ end
157
+
158
+ attr_accessor :day
159
+ attr_accessor :hour
160
+ attr_accessor :min
161
+ attr_accessor :month
162
+ attr_accessor :sec
163
+ attr_accessor :year
164
+
165
+ attr_accessor? :has_day
166
+ attr_accessor? :has_hour
167
+ attr_accessor? :has_min
168
+ attr_accessor? :has_month
169
+ attr_accessor? :has_sec
170
+ attr_accessor? :has_year
171
+
172
+ attr_reader? :min_or_max
173
+
174
+ def initialize(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
175
+ super()
176
+
177
+ set!(year,month,day,hour,min,sec)
178
+
179
+ self.has = false
180
+ @min_or_max = false
181
+ end
182
+
183
+ def autofill!(type,other)
184
+ case type
185
+ when :from
186
+ is_from = true
187
+ when :to
188
+ is_from = false
189
+ else
190
+ raise ArgumentError,"invalid type[#{type}]"
191
+ end
192
+
193
+ return self if @min_or_max
194
+
195
+ has_small = false
196
+ jst_now = Util.jst_now()
197
+
198
+ # Must be from smallest to biggest.
199
+
200
+ if @has_sec || other.has_sec?()
201
+ @sec = other.sec unless @has_sec
202
+ has_small = true
203
+ else
204
+ if has_small
205
+ @sec = jst_now.sec
206
+ else
207
+ @sec = is_from ? 0 : 59
208
+ end
209
+ end
210
+
211
+ if @has_min || other.has_min?()
212
+ @min = other.min unless @has_min
213
+ has_small = true
214
+ else
215
+ if has_small
216
+ @min = jst_now.min
217
+ else
218
+ @min = is_from ? 0 : 59
219
+ end
220
+ end
221
+
222
+ if @has_hour || other.has_hour?()
223
+ @hour = other.hour unless @has_hour
224
+ has_small = true
225
+ else
226
+ if has_small
227
+ @hour = jst_now.hour
228
+ else
229
+ @hour = is_from ? 0 : 23
230
+ end
231
+ end
232
+
233
+ if @has_day || other.has_day?()
234
+ @day = other.day unless @has_day
235
+ has_small = true
236
+ else
237
+ if has_small
238
+ @day = jst_now.day
239
+ else
240
+ @day = is_from ? 1 : :last_day
241
+ end
242
+ end
243
+
244
+ if @has_month || other.has_month?()
245
+ @month = other.month unless @has_month
246
+ has_small = true
247
+ else
248
+ if has_small
249
+ @month = jst_now.month
250
+ else
251
+ @month = is_from ? 1 : 12
252
+ end
253
+ end
254
+
255
+ if @has_year || other.has_year?()
256
+ @year = other.year unless @has_year
257
+ has_small = true
258
+ else
259
+ if has_small
260
+ @year = jst_now.year
261
+ else
262
+ @year = is_from ? Util::MIN_SANE_YEAR : jst_now.year
263
+ end
264
+ end
265
+
266
+ # Must be after setting @year & @month.
267
+ if @day == :last_day
268
+ @day = Date.new(@year,@month,-1).day
269
+ end
270
+
271
+ return self
272
+ end
273
+
274
+ def max!()
275
+ @min_or_max = true
276
+
277
+ # Ex: 2020-12-31 23:59:59
278
+ return set!(Util::JST_YEAR,12,31,23,59,59)
279
+ end
280
+
281
+ def min!()
282
+ @min_or_max = true
283
+
284
+ # Ex: 1924-01-01 00:00:00
285
+ return set!(Util::MIN_SANE_YEAR,1,1,0,0,0)
286
+ end
287
+
288
+ def parse!(value,fmt)
289
+ value = Time.strptime(value,fmt,&self.class.method(:guess_year))
290
+
291
+ @has_day = fmt.include?('%d')
292
+ @has_hour = fmt.include?('%H')
293
+ @has_min = fmt.include?('%M')
294
+ @has_month = fmt.include?('%m')
295
+ @has_sec = fmt.include?('%S')
296
+ @has_year = fmt.include?('%Y')
297
+
298
+ @day = value.day if @has_day
299
+ @hour = value.hour if @has_hour
300
+ @min = value.min if @has_min
301
+ @month = value.month if @has_month
302
+ @sec = value.sec if @has_sec
303
+ @year = value.year if @has_year
304
+
305
+ return self
306
+ end
307
+
308
+ def set!(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
309
+ @year = year
310
+ @month = month
311
+ @day = day
312
+ @hour = hour
313
+ @min = min
314
+ @sec = sec
315
+
316
+ return self
317
+ end
318
+
319
+ def has=(value)
320
+ @has_day = value
321
+ @has_hour = value
322
+ @has_min = value
323
+ @has_month = value
324
+ @has_sec = value
325
+ @has_year = value
326
+
327
+ return self
328
+ end
329
+
330
+ def jst_time()
331
+ return Util.jst_time(time())
332
+ end
333
+
334
+ def time()
335
+ return Time.new(@year,@month,@day,@hour,@min,@sec)
336
+ end
337
+
338
+ def to_s()
339
+ return "#{@year}-#{@month}-#{@day} #{@hour}:#{@min}:#{@sec}"
340
+ end
341
+ end
342
+ end
@@ -44,7 +44,7 @@ module NHKore
44
44
  end
45
45
 
46
46
  def self.parse_url(url,basename: nil)
47
- url = Util.strip_web_str(url)
47
+ url = Util.strip_web_str(url.to_s())
48
48
 
49
49
  raise ParseError,"cannot parse dictionary URL from URL[#{url}]" if url.empty?()
50
50
 
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of NHKore.
7
+ # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # NHKore is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU Lesser General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # NHKore is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public License
20
+ # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'nhkore/article'
25
+ require 'nhkore/article_scraper'
26
+ require 'nhkore/cleaner'
27
+ require 'nhkore/datetime_parser'
28
+ require 'nhkore/defn'
29
+ require 'nhkore/dict'
30
+ require 'nhkore/dict_scraper'
31
+ require 'nhkore/entry'
32
+ require 'nhkore/error'
33
+ require 'nhkore/fileable'
34
+ require 'nhkore/missingno'
35
+ require 'nhkore/news'
36
+ require 'nhkore/polisher'
37
+ require 'nhkore/scraper'
38
+ require 'nhkore/search_link'
39
+ require 'nhkore/search_scraper'
40
+ require 'nhkore/sifter'
41
+ require 'nhkore/splitter'
42
+ require 'nhkore/user_agents'
43
+ require 'nhkore/util'
44
+ require 'nhkore/variator'
45
+ require 'nhkore/version'
46
+ require 'nhkore/word'
47
+
48
+
49
+ module NHKore
50
+ ###
51
+ # Include this file to only require the files needed to use this
52
+ # Gem as a library (i.e., don't include CLI-related files).
53
+ #
54
+ # @author Jonathan Bradley Whited (@esotericpig)
55
+ # @since 0.3.2
56
+ ###
57
+ module Lib
58
+ end
59
+ end
@@ -49,7 +49,10 @@ module NHKore
49
49
  end
50
50
 
51
51
  def add_article(article,key: nil,overwrite: false)
52
- key = article.url if key.nil?()
52
+ url = article.url
53
+ url = url.to_s() unless url.nil?()
54
+
55
+ key = key.nil?() ? url : key.to_s()
53
56
 
54
57
  if !overwrite
55
58
  raise ArgumentError,"duplicate article[#{key}] in articles" if @articles.key?(key)
@@ -57,7 +60,7 @@ module NHKore
57
60
  end
58
61
 
59
62
  @articles[key] = article
60
- @sha256s[article.sha256] = article.url
63
+ @sha256s[article.sha256] = url
61
64
 
62
65
  return self
63
66
  end
@@ -91,16 +94,20 @@ module NHKore
91
94
  end
92
95
 
93
96
  def update_article(article,url)
97
+ url = url.to_s() unless url.nil?()
98
+
94
99
  # Favor https.
95
- return if article.url =~ FAVORED_URL
100
+ return if article.url.to_s() =~ FAVORED_URL
96
101
  return if url !~ FAVORED_URL
97
102
 
98
- @articles.delete(article.url)
103
+ @articles.delete(article.url) # Probably no to_s() here
99
104
  @articles[url] = article
100
105
  article.url = url
101
106
  end
102
107
 
103
108
  def article(key)
109
+ key = key.to_s() unless key.nil?()
110
+
104
111
  return @articles[key]
105
112
  end
106
113
 
@@ -119,6 +126,8 @@ module NHKore
119
126
  end
120
127
 
121
128
  def article?(key)
129
+ key = key.to_s() unless key.nil?()
130
+
122
131
  return @articles.key?(key)
123
132
  end
124
133