date_extractor 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/date_extractor.gemspec +1 -1
- data/lib/date_extractor/version.rb +1 -1
- data/lib/date_extractor.rb +145 -141
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b729b3c4a7d4f759cb83eb4f3caa8b28290556e
|
4
|
+
data.tar.gz: 0a3921dc386b53084a8839ad6132dabb5eeaacf4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 436e6fbffd2ffbec1c5693fc2761efcf004e0bb089c9e92e5f48f3bddb0b154029f0f9d9e2e0a86f23ba9c29971a8b1916ad6b1dd58a5b726c3247da270b12b0
|
7
|
+
data.tar.gz: e6b70609de5f70b98570cc12d00688e86c408957ef51de3ae33915a5a1b0eb8c2bc7357e6c46de36fb5dc5313f433b5029efaf016dc60e8f9465b689a7584904
|
data/Gemfile.lock
CHANGED
data/date_extractor.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
|
12
12
|
spec.summary = %q{Extract dates from a text.}
|
13
13
|
spec.description = %q{Extract dates from a text.}
|
14
|
-
spec.homepage = "https://github.com/south37"
|
14
|
+
spec.homepage = "https://github.com/south37/date_extractor"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
17
17
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
data/lib/date_extractor.rb
CHANGED
@@ -111,195 +111,199 @@ module DateExtractor
|
|
111
111
|
|
112
112
|
RE = /(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
|
113
113
|
|
114
|
-
|
115
|
-
|
116
|
-
|
114
|
+
class << self
|
115
|
+
# @param [String] body
|
116
|
+
# @param [Integer | NilClass] fallback_month
|
117
|
+
# @param [Integer | NilClass] fallback_year
|
118
|
+
# @param [Boolean] debug
|
119
|
+
# @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
|
120
|
+
def extract(body, fallback_month: nil, fallback_year: nil, debug: false)
|
121
|
+
today = Date.today
|
122
|
+
fallback_month ||= Date.today.month
|
123
|
+
fallback_year ||= Date.today.year
|
124
|
+
|
125
|
+
day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
|
126
|
+
|
127
|
+
day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
|
128
|
+
day_with_hours_size = day_matches.size
|
129
|
+
|
130
|
+
timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
|
131
|
+
|
132
|
+
timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
|
133
|
+
timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
|
134
|
+
i = 0 # index of left_day
|
135
|
+
|
136
|
+
while i < day_with_hours_size
|
137
|
+
left_day = day_with_hours[i]
|
138
|
+
if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
|
139
|
+
i += 1
|
140
|
+
next end
|
141
|
+
|
142
|
+
right_day = day_with_hours[i+1]
|
143
|
+
if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
|
144
|
+
right_day = day_with_hours[i+2]
|
145
|
+
end
|
117
146
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
147
|
+
if right_day.nil? # left_day is on the last
|
148
|
+
# Check if timeslot is on the right of left_day
|
149
|
+
if left_day[0].end(0) <= start_pos
|
150
|
+
timeslots_container[i].push timeslot_match
|
151
|
+
end
|
152
|
+
else
|
153
|
+
# Check if timeslot is between left_day and right_day
|
154
|
+
if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
|
155
|
+
timeslots_container[i].push timeslot_match
|
156
|
+
end
|
157
|
+
end
|
124
158
|
|
125
|
-
|
126
|
-
|
127
|
-
rescue
|
128
|
-
if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
|
129
|
-
start_min = 30
|
130
|
-
else
|
131
|
-
start_min = nil
|
159
|
+
i += 1
|
160
|
+
end
|
132
161
|
end
|
133
|
-
end
|
134
162
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
end
|
163
|
+
days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
|
164
|
+
|
165
|
+
result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
|
166
|
+
result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
|
140
167
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
168
|
+
if !debug # Reject nil dates
|
169
|
+
exists = result_datetimes.map { |arr| !arr[0].nil? }
|
170
|
+
result_strs = result_strs.select.with_index { |str, i| exists[i] }
|
171
|
+
result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
|
172
|
+
[result_strs, result_datetimes]
|
146
173
|
else
|
147
|
-
|
174
|
+
[result_strs, result_datetimes]
|
148
175
|
end
|
149
176
|
end
|
150
177
|
|
151
|
-
|
152
|
-
|
178
|
+
private
|
179
|
+
|
180
|
+
def get_match_and_positions(body, re)
|
181
|
+
body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
|
182
|
+
end
|
153
183
|
|
154
|
-
|
155
|
-
if !hour.nil?
|
184
|
+
def get_hour_from_timeslot_match(match)
|
156
185
|
begin
|
157
|
-
|
186
|
+
start_hour = to_downer_letter(match[:start_hour])
|
158
187
|
rescue
|
159
|
-
|
188
|
+
start_hour = nil
|
160
189
|
end
|
161
|
-
else
|
162
|
-
result = nil
|
163
|
-
end
|
164
|
-
result
|
165
|
-
end
|
166
190
|
|
167
|
-
# @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
|
168
|
-
# If month is not specified, fallback_month is used as month. This value is
|
169
|
-
# updated by discovering other month specification. Same for fallback_year.
|
170
|
-
def self.days_from_matches(matches, fallback_month, fallback_year, debug: false)
|
171
|
-
matches.map do |match|
|
172
191
|
begin
|
173
|
-
|
174
|
-
fallback_year = year
|
192
|
+
start_min = to_downer_letter(match[:start_min])
|
175
193
|
rescue
|
176
|
-
|
194
|
+
if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
|
195
|
+
start_min = 30
|
196
|
+
else
|
197
|
+
start_min = nil
|
198
|
+
end
|
177
199
|
end
|
178
200
|
|
179
|
-
# When ONLY_DAY_RE is used, month is nil
|
180
201
|
begin
|
181
|
-
|
182
|
-
fallback_month = month
|
202
|
+
end_hour = to_downer_letter(match[:end_hour])
|
183
203
|
rescue
|
184
|
-
|
204
|
+
end_hour = nil
|
185
205
|
end
|
186
206
|
|
187
|
-
day = to_downer_letter(match[:day]).to_i
|
188
|
-
|
189
|
-
start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
|
190
|
-
|
191
207
|
begin
|
192
|
-
|
208
|
+
end_min = to_downer_letter(match[:end_min])
|
193
209
|
rescue
|
194
|
-
|
210
|
+
if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
|
211
|
+
end_min = 30
|
212
|
+
else
|
213
|
+
end_min = nil
|
214
|
+
end
|
195
215
|
end
|
196
216
|
|
197
|
-
|
198
|
-
|
217
|
+
[start_hour, start_min, end_hour, end_min]
|
218
|
+
end
|
199
219
|
|
200
|
-
|
201
|
-
|
220
|
+
def create_datetime_if_exists(year, month, day, hour, min)
|
221
|
+
if !hour.nil?
|
222
|
+
begin
|
223
|
+
result = DateTime.new(year, month, day, hour.to_i, min.to_i)
|
224
|
+
rescue
|
225
|
+
result = nil
|
226
|
+
end
|
202
227
|
else
|
203
|
-
|
228
|
+
result = nil
|
204
229
|
end
|
230
|
+
result
|
205
231
|
end
|
206
|
-
end
|
207
232
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
233
|
+
# @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
|
234
|
+
# If month is not specified, fallback_month is used as month. This value is
|
235
|
+
# updated by discovering other month specification. Same for fallback_year.
|
236
|
+
def days_from_matches(matches, fallback_month, fallback_year, debug: false)
|
237
|
+
matches.map do |match|
|
238
|
+
begin
|
239
|
+
year = to_downer_letter(match[:year])
|
240
|
+
fallback_year = year
|
241
|
+
rescue
|
242
|
+
year = fallback_year
|
243
|
+
end
|
216
244
|
|
217
|
-
|
218
|
-
|
245
|
+
# When ONLY_DAY_RE is used, month is nil
|
246
|
+
begin
|
247
|
+
month = to_downer_letter(match[:month]).to_i
|
248
|
+
fallback_month = month
|
249
|
+
rescue
|
250
|
+
month = fallback_month
|
251
|
+
end
|
219
252
|
|
220
|
-
|
221
|
-
start_hour, start_min, end_hour, end_min = self.get_hour_from_timeslot_match(timeslot_match)
|
253
|
+
day = to_downer_letter(match[:day]).to_i
|
222
254
|
|
223
|
-
|
224
|
-
end_t = self.create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
|
255
|
+
start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
|
225
256
|
|
226
|
-
|
227
|
-
|
228
|
-
|
257
|
+
begin
|
258
|
+
date = Date.new(year, month, day)
|
259
|
+
rescue
|
260
|
+
date = nil
|
261
|
+
end
|
229
262
|
|
230
|
-
|
231
|
-
|
263
|
+
start_t = create_datetime_if_exists(year, month, day, start_hour, start_min)
|
264
|
+
end_t = create_datetime_if_exists(year, month, day, end_hour, end_min)
|
232
265
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
c
|
266
|
+
if !date.nil?
|
267
|
+
[match, date, start_t, end_t]
|
268
|
+
else
|
269
|
+
[match, nil, nil, nil]
|
270
|
+
end
|
239
271
|
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
# @param [String] body
|
244
|
-
# @param [Integer | NilClass] fallback_month
|
245
|
-
# @param [Integer | NilClass] fallback_year
|
246
|
-
# @param [Boolean] debug
|
247
|
-
# @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
|
248
|
-
def self.extract(body, fallback_month: nil, fallback_year: nil, debug: false)
|
249
|
-
today = Date.today
|
250
|
-
fallback_month ||= Date.today.month
|
251
|
-
fallback_year ||= Date.today.year
|
252
|
-
|
253
|
-
day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
|
272
|
+
end
|
254
273
|
|
255
|
-
|
256
|
-
|
274
|
+
# days contains day whidh has same index with timeslots_container
|
275
|
+
def days_from_timeslot_matches(timeslots_container, day_with_hours)
|
276
|
+
result = []
|
257
277
|
|
258
|
-
|
278
|
+
day_with_hours.each_with_index do |day_with_hour, i|
|
279
|
+
result.push(day_with_hour)
|
280
|
+
_, day, _, _ = day_with_hour #
|
281
|
+
next if day.nil?
|
259
282
|
|
260
|
-
|
261
|
-
|
262
|
-
i = 0 # index of left_day
|
283
|
+
timeslot_matches = timeslots_container[i]
|
284
|
+
next if (timeslot_matches.size == 0)
|
263
285
|
|
264
|
-
|
265
|
-
|
266
|
-
if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
|
267
|
-
i += 1
|
268
|
-
next end
|
286
|
+
timeslot_matches.each do |timeslot_match|
|
287
|
+
start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(timeslot_match)
|
269
288
|
|
270
|
-
|
271
|
-
|
272
|
-
right_day = day_with_hours[i+2]
|
273
|
-
end
|
289
|
+
start_t = create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
|
290
|
+
end_t = create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
|
274
291
|
|
275
|
-
|
276
|
-
# Check if timeslot is on the right of left_day
|
277
|
-
if left_day[0].end(0) <= start_pos
|
278
|
-
timeslots_container[i].push timeslot_match
|
279
|
-
end
|
280
|
-
else
|
281
|
-
# Check if timeslot is between left_day and right_day
|
282
|
-
if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
|
283
|
-
timeslots_container[i].push timeslot_match
|
284
|
-
end
|
292
|
+
result.push([timeslot_match, day, start_t, end_t])
|
285
293
|
end
|
286
|
-
|
287
|
-
i += 1
|
288
294
|
end
|
289
|
-
end
|
290
|
-
|
291
|
-
days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
|
292
295
|
|
293
|
-
|
294
|
-
|
296
|
+
result
|
297
|
+
end
|
295
298
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
299
|
+
def to_downer_letter(upper_or_downer_letter)
|
300
|
+
upper_or_downer_letter.split('').map do |c|
|
301
|
+
if /[0-9]/.match(c)
|
302
|
+
(c.ord - "0".ord).to_s
|
303
|
+
else
|
304
|
+
c
|
305
|
+
end
|
306
|
+
end.join
|
303
307
|
end
|
304
308
|
end
|
305
309
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: date_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nao Minami
|
@@ -85,7 +85,7 @@ files:
|
|
85
85
|
- date_extractor.gemspec
|
86
86
|
- lib/date_extractor.rb
|
87
87
|
- lib/date_extractor/version.rb
|
88
|
-
homepage: https://github.com/south37
|
88
|
+
homepage: https://github.com/south37/date_extractor
|
89
89
|
licenses:
|
90
90
|
- MIT
|
91
91
|
metadata: {}
|