date_extractor 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/date_extractor.gemspec +1 -1
- data/lib/date_extractor/version.rb +1 -1
- data/lib/date_extractor.rb +145 -141
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b729b3c4a7d4f759cb83eb4f3caa8b28290556e
|
4
|
+
data.tar.gz: 0a3921dc386b53084a8839ad6132dabb5eeaacf4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 436e6fbffd2ffbec1c5693fc2761efcf004e0bb089c9e92e5f48f3bddb0b154029f0f9d9e2e0a86f23ba9c29971a8b1916ad6b1dd58a5b726c3247da270b12b0
|
7
|
+
data.tar.gz: e6b70609de5f70b98570cc12d00688e86c408957ef51de3ae33915a5a1b0eb8c2bc7357e6c46de36fb5dc5313f433b5029efaf016dc60e8f9465b689a7584904
|
data/Gemfile.lock
CHANGED
data/date_extractor.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
|
12
12
|
spec.summary = %q{Extract dates from a text.}
|
13
13
|
spec.description = %q{Extract dates from a text.}
|
14
|
-
spec.homepage = "https://github.com/south37"
|
14
|
+
spec.homepage = "https://github.com/south37/date_extractor"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
17
17
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
data/lib/date_extractor.rb
CHANGED
@@ -111,195 +111,199 @@ module DateExtractor
|
|
111
111
|
|
112
112
|
RE = /(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
|
113
113
|
|
114
|
-
|
115
|
-
|
116
|
-
|
114
|
+
class << self
|
115
|
+
# @param [String] body
|
116
|
+
# @param [Integer | NilClass] fallback_month
|
117
|
+
# @param [Integer | NilClass] fallback_year
|
118
|
+
# @param [Boolean] debug
|
119
|
+
# @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
|
120
|
+
def extract(body, fallback_month: nil, fallback_year: nil, debug: false)
|
121
|
+
today = Date.today
|
122
|
+
fallback_month ||= Date.today.month
|
123
|
+
fallback_year ||= Date.today.year
|
124
|
+
|
125
|
+
day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
|
126
|
+
|
127
|
+
day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
|
128
|
+
day_with_hours_size = day_matches.size
|
129
|
+
|
130
|
+
timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
|
131
|
+
|
132
|
+
timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
|
133
|
+
timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
|
134
|
+
i = 0 # index of left_day
|
135
|
+
|
136
|
+
while i < day_with_hours_size
|
137
|
+
left_day = day_with_hours[i]
|
138
|
+
if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
|
139
|
+
i += 1
|
140
|
+
next end
|
141
|
+
|
142
|
+
right_day = day_with_hours[i+1]
|
143
|
+
if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
|
144
|
+
right_day = day_with_hours[i+2]
|
145
|
+
end
|
117
146
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
147
|
+
if right_day.nil? # left_day is on the last
|
148
|
+
# Check if timeslot is on the right of left_day
|
149
|
+
if left_day[0].end(0) <= start_pos
|
150
|
+
timeslots_container[i].push timeslot_match
|
151
|
+
end
|
152
|
+
else
|
153
|
+
# Check if timeslot is between left_day and right_day
|
154
|
+
if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
|
155
|
+
timeslots_container[i].push timeslot_match
|
156
|
+
end
|
157
|
+
end
|
124
158
|
|
125
|
-
|
126
|
-
|
127
|
-
rescue
|
128
|
-
if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
|
129
|
-
start_min = 30
|
130
|
-
else
|
131
|
-
start_min = nil
|
159
|
+
i += 1
|
160
|
+
end
|
132
161
|
end
|
133
|
-
end
|
134
162
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
end
|
163
|
+
days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
|
164
|
+
|
165
|
+
result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
|
166
|
+
result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
|
140
167
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
168
|
+
if !debug # Reject nil dates
|
169
|
+
exists = result_datetimes.map { |arr| !arr[0].nil? }
|
170
|
+
result_strs = result_strs.select.with_index { |str, i| exists[i] }
|
171
|
+
result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
|
172
|
+
[result_strs, result_datetimes]
|
146
173
|
else
|
147
|
-
|
174
|
+
[result_strs, result_datetimes]
|
148
175
|
end
|
149
176
|
end
|
150
177
|
|
151
|
-
|
152
|
-
|
178
|
+
private
|
179
|
+
|
180
|
+
def get_match_and_positions(body, re)
|
181
|
+
body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
|
182
|
+
end
|
153
183
|
|
154
|
-
|
155
|
-
if !hour.nil?
|
184
|
+
def get_hour_from_timeslot_match(match)
|
156
185
|
begin
|
157
|
-
|
186
|
+
start_hour = to_downer_letter(match[:start_hour])
|
158
187
|
rescue
|
159
|
-
|
188
|
+
start_hour = nil
|
160
189
|
end
|
161
|
-
else
|
162
|
-
result = nil
|
163
|
-
end
|
164
|
-
result
|
165
|
-
end
|
166
190
|
|
167
|
-
# @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
|
168
|
-
# If month is not specified, fallback_month is used as month. This value is
|
169
|
-
# updated by discovering other month specification. Same for fallback_year.
|
170
|
-
def self.days_from_matches(matches, fallback_month, fallback_year, debug: false)
|
171
|
-
matches.map do |match|
|
172
191
|
begin
|
173
|
-
|
174
|
-
fallback_year = year
|
192
|
+
start_min = to_downer_letter(match[:start_min])
|
175
193
|
rescue
|
176
|
-
|
194
|
+
if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
|
195
|
+
start_min = 30
|
196
|
+
else
|
197
|
+
start_min = nil
|
198
|
+
end
|
177
199
|
end
|
178
200
|
|
179
|
-
# When ONLY_DAY_RE is used, month is nil
|
180
201
|
begin
|
181
|
-
|
182
|
-
fallback_month = month
|
202
|
+
end_hour = to_downer_letter(match[:end_hour])
|
183
203
|
rescue
|
184
|
-
|
204
|
+
end_hour = nil
|
185
205
|
end
|
186
206
|
|
187
|
-
day = to_downer_letter(match[:day]).to_i
|
188
|
-
|
189
|
-
start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
|
190
|
-
|
191
207
|
begin
|
192
|
-
|
208
|
+
end_min = to_downer_letter(match[:end_min])
|
193
209
|
rescue
|
194
|
-
|
210
|
+
if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
|
211
|
+
end_min = 30
|
212
|
+
else
|
213
|
+
end_min = nil
|
214
|
+
end
|
195
215
|
end
|
196
216
|
|
197
|
-
|
198
|
-
|
217
|
+
[start_hour, start_min, end_hour, end_min]
|
218
|
+
end
|
199
219
|
|
200
|
-
|
201
|
-
|
220
|
+
def create_datetime_if_exists(year, month, day, hour, min)
|
221
|
+
if !hour.nil?
|
222
|
+
begin
|
223
|
+
result = DateTime.new(year, month, day, hour.to_i, min.to_i)
|
224
|
+
rescue
|
225
|
+
result = nil
|
226
|
+
end
|
202
227
|
else
|
203
|
-
|
228
|
+
result = nil
|
204
229
|
end
|
230
|
+
result
|
205
231
|
end
|
206
|
-
end
|
207
232
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
233
|
+
# @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
|
234
|
+
# If month is not specified, fallback_month is used as month. This value is
|
235
|
+
# updated by discovering other month specification. Same for fallback_year.
|
236
|
+
def days_from_matches(matches, fallback_month, fallback_year, debug: false)
|
237
|
+
matches.map do |match|
|
238
|
+
begin
|
239
|
+
year = to_downer_letter(match[:year])
|
240
|
+
fallback_year = year
|
241
|
+
rescue
|
242
|
+
year = fallback_year
|
243
|
+
end
|
216
244
|
|
217
|
-
|
218
|
-
|
245
|
+
# When ONLY_DAY_RE is used, month is nil
|
246
|
+
begin
|
247
|
+
month = to_downer_letter(match[:month]).to_i
|
248
|
+
fallback_month = month
|
249
|
+
rescue
|
250
|
+
month = fallback_month
|
251
|
+
end
|
219
252
|
|
220
|
-
|
221
|
-
start_hour, start_min, end_hour, end_min = self.get_hour_from_timeslot_match(timeslot_match)
|
253
|
+
day = to_downer_letter(match[:day]).to_i
|
222
254
|
|
223
|
-
|
224
|
-
end_t = self.create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
|
255
|
+
start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
|
225
256
|
|
226
|
-
|
227
|
-
|
228
|
-
|
257
|
+
begin
|
258
|
+
date = Date.new(year, month, day)
|
259
|
+
rescue
|
260
|
+
date = nil
|
261
|
+
end
|
229
262
|
|
230
|
-
|
231
|
-
|
263
|
+
start_t = create_datetime_if_exists(year, month, day, start_hour, start_min)
|
264
|
+
end_t = create_datetime_if_exists(year, month, day, end_hour, end_min)
|
232
265
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
c
|
266
|
+
if !date.nil?
|
267
|
+
[match, date, start_t, end_t]
|
268
|
+
else
|
269
|
+
[match, nil, nil, nil]
|
270
|
+
end
|
239
271
|
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
# @param [String] body
|
244
|
-
# @param [Integer | NilClass] fallback_month
|
245
|
-
# @param [Integer | NilClass] fallback_year
|
246
|
-
# @param [Boolean] debug
|
247
|
-
# @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
|
248
|
-
def self.extract(body, fallback_month: nil, fallback_year: nil, debug: false)
|
249
|
-
today = Date.today
|
250
|
-
fallback_month ||= Date.today.month
|
251
|
-
fallback_year ||= Date.today.year
|
252
|
-
|
253
|
-
day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
|
272
|
+
end
|
254
273
|
|
255
|
-
|
256
|
-
|
274
|
+
# days contains day whidh has same index with timeslots_container
|
275
|
+
def days_from_timeslot_matches(timeslots_container, day_with_hours)
|
276
|
+
result = []
|
257
277
|
|
258
|
-
|
278
|
+
day_with_hours.each_with_index do |day_with_hour, i|
|
279
|
+
result.push(day_with_hour)
|
280
|
+
_, day, _, _ = day_with_hour #
|
281
|
+
next if day.nil?
|
259
282
|
|
260
|
-
|
261
|
-
|
262
|
-
i = 0 # index of left_day
|
283
|
+
timeslot_matches = timeslots_container[i]
|
284
|
+
next if (timeslot_matches.size == 0)
|
263
285
|
|
264
|
-
|
265
|
-
|
266
|
-
if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
|
267
|
-
i += 1
|
268
|
-
next end
|
286
|
+
timeslot_matches.each do |timeslot_match|
|
287
|
+
start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(timeslot_match)
|
269
288
|
|
270
|
-
|
271
|
-
|
272
|
-
right_day = day_with_hours[i+2]
|
273
|
-
end
|
289
|
+
start_t = create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
|
290
|
+
end_t = create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
|
274
291
|
|
275
|
-
|
276
|
-
# Check if timeslot is on the right of left_day
|
277
|
-
if left_day[0].end(0) <= start_pos
|
278
|
-
timeslots_container[i].push timeslot_match
|
279
|
-
end
|
280
|
-
else
|
281
|
-
# Check if timeslot is between left_day and right_day
|
282
|
-
if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
|
283
|
-
timeslots_container[i].push timeslot_match
|
284
|
-
end
|
292
|
+
result.push([timeslot_match, day, start_t, end_t])
|
285
293
|
end
|
286
|
-
|
287
|
-
i += 1
|
288
294
|
end
|
289
|
-
end
|
290
|
-
|
291
|
-
days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
|
292
295
|
|
293
|
-
|
294
|
-
|
296
|
+
result
|
297
|
+
end
|
295
298
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
299
|
+
def to_downer_letter(upper_or_downer_letter)
|
300
|
+
upper_or_downer_letter.split('').map do |c|
|
301
|
+
if /[0-9]/.match(c)
|
302
|
+
(c.ord - "0".ord).to_s
|
303
|
+
else
|
304
|
+
c
|
305
|
+
end
|
306
|
+
end.join
|
303
307
|
end
|
304
308
|
end
|
305
309
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: date_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nao Minami
|
@@ -85,7 +85,7 @@ files:
|
|
85
85
|
- date_extractor.gemspec
|
86
86
|
- lib/date_extractor.rb
|
87
87
|
- lib/date_extractor/version.rb
|
88
|
-
homepage: https://github.com/south37
|
88
|
+
homepage: https://github.com/south37/date_extractor
|
89
89
|
licenses:
|
90
90
|
- MIT
|
91
91
|
metadata: {}
|