date_extractor 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ccaf1114d7c608b8f2cccb38d902db374839807
4
- data.tar.gz: f101665c1df1a41c6c4b7eeda73de833e5ff6ebf
3
+ metadata.gz: 2b729b3c4a7d4f759cb83eb4f3caa8b28290556e
4
+ data.tar.gz: 0a3921dc386b53084a8839ad6132dabb5eeaacf4
5
5
  SHA512:
6
- metadata.gz: 46b468a50ed84fbf06888fff0447acd7ad602aaf2919e89e9d686cd098240ed28b4be8d740553370a28d2d70a9299479e1d5912f1aba4246b4a3327eb2f1b8a1
7
- data.tar.gz: 58f2827dc897ee64acde531cd6dbd77a5fbb144abcf7a6c24a5e19aa188d57cad7d7557fffc92e60f2058fcf7500141a997bb55616249bfaeda54b7e4f109868
6
+ metadata.gz: 436e6fbffd2ffbec1c5693fc2761efcf004e0bb089c9e92e5f48f3bddb0b154029f0f9d9e2e0a86f23ba9c29971a8b1916ad6b1dd58a5b726c3247da270b12b0
7
+ data.tar.gz: e6b70609de5f70b98570cc12d00688e86c408957ef51de3ae33915a5a1b0eb8c2bc7357e6c46de36fb5dc5313f433b5029efaf016dc60e8f9465b689a7584904
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- date_extractor (0.1.0)
4
+ date_extractor (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = %q{Extract dates from a text.}
13
13
  spec.description = %q{Extract dates from a text.}
14
- spec.homepage = "https://github.com/south37"
14
+ spec.homepage = "https://github.com/south37/date_extractor"
15
15
  spec.license = "MIT"
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -1,3 +1,3 @@
1
1
  module DateExtractor
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -111,195 +111,199 @@ module DateExtractor
111
111
 
112
112
  RE = /(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
113
113
 
114
- def self.get_match_and_positions(body, re)
115
- body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
116
- end
114
+ class << self
115
+ # @param [String] body
116
+ # @param [Integer | NilClass] fallback_month
117
+ # @param [Integer | NilClass] fallback_year
118
+ # @param [Boolean] debug
119
+ # @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
120
+ def extract(body, fallback_month: nil, fallback_year: nil, debug: false)
121
+ today = Date.today
122
+ fallback_month ||= Date.today.month
123
+ fallback_year ||= Date.today.year
124
+
125
+ day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
126
+
127
+ day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
128
+ day_with_hours_size = day_matches.size
129
+
130
+ timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
131
+
132
+ timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
133
+ timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
134
+ i = 0 # index of left_day
135
+
136
+ while i < day_with_hours_size
137
+ left_day = day_with_hours[i]
138
+ if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
139
+ i += 1
140
+ next end
141
+
142
+ right_day = day_with_hours[i+1]
143
+ if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
144
+ right_day = day_with_hours[i+2]
145
+ end
117
146
 
118
- def self.get_hour_from_timeslot_match(match)
119
- begin
120
- start_hour = to_downer_letter(match[:start_hour])
121
- rescue
122
- start_hour = nil
123
- end
147
+ if right_day.nil? # left_day is on the last
148
+ # Check if timeslot is on the right of left_day
149
+ if left_day[0].end(0) <= start_pos
150
+ timeslots_container[i].push timeslot_match
151
+ end
152
+ else
153
+ # Check if timeslot is between left_day and right_day
154
+ if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
155
+ timeslots_container[i].push timeslot_match
156
+ end
157
+ end
124
158
 
125
- begin
126
- start_min = to_downer_letter(match[:start_min])
127
- rescue
128
- if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
129
- start_min = 30
130
- else
131
- start_min = nil
159
+ i += 1
160
+ end
132
161
  end
133
- end
134
162
 
135
- begin
136
- end_hour = to_downer_letter(match[:end_hour])
137
- rescue
138
- end_hour = nil
139
- end
163
+ days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
164
+
165
+ result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
166
+ result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
140
167
 
141
- begin
142
- end_min = to_downer_letter(match[:end_min])
143
- rescue
144
- if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
145
- end_min = 30
168
+ if !debug # Reject nil dates
169
+ exists = result_datetimes.map { |arr| !arr[0].nil? }
170
+ result_strs = result_strs.select.with_index { |str, i| exists[i] }
171
+ result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
172
+ [result_strs, result_datetimes]
146
173
  else
147
- end_min = nil
174
+ [result_strs, result_datetimes]
148
175
  end
149
176
  end
150
177
 
151
- [start_hour, start_min, end_hour, end_min]
152
- end
178
+ private
179
+
180
+ def get_match_and_positions(body, re)
181
+ body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
182
+ end
153
183
 
154
- def self.create_datetime_if_exists(year, month, day, hour, min)
155
- if !hour.nil?
184
+ def get_hour_from_timeslot_match(match)
156
185
  begin
157
- result = DateTime.new(year, month, day, hour.to_i, min.to_i)
186
+ start_hour = to_downer_letter(match[:start_hour])
158
187
  rescue
159
- result = nil
188
+ start_hour = nil
160
189
  end
161
- else
162
- result = nil
163
- end
164
- result
165
- end
166
190
 
167
- # @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
168
- # If month is not specified, fallback_month is used as month. This value is
169
- # updated by discovering other month specification. Same for fallback_year.
170
- def self.days_from_matches(matches, fallback_month, fallback_year, debug: false)
171
- matches.map do |match|
172
191
  begin
173
- year = to_downer_letter(match[:year])
174
- fallback_year = year
192
+ start_min = to_downer_letter(match[:start_min])
175
193
  rescue
176
- year = fallback_year
194
+ if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
195
+ start_min = 30
196
+ else
197
+ start_min = nil
198
+ end
177
199
  end
178
200
 
179
- # When ONLY_DAY_RE is used, month is nil
180
201
  begin
181
- month = to_downer_letter(match[:month]).to_i
182
- fallback_month = month
202
+ end_hour = to_downer_letter(match[:end_hour])
183
203
  rescue
184
- month = fallback_month
204
+ end_hour = nil
185
205
  end
186
206
 
187
- day = to_downer_letter(match[:day]).to_i
188
-
189
- start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
190
-
191
207
  begin
192
- date = Date.new(year, month, day)
208
+ end_min = to_downer_letter(match[:end_min])
193
209
  rescue
194
- date = nil
210
+ if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
211
+ end_min = 30
212
+ else
213
+ end_min = nil
214
+ end
195
215
  end
196
216
 
197
- start_t = self.create_datetime_if_exists(year, month, day, start_hour, start_min)
198
- end_t = self.create_datetime_if_exists(year, month, day, end_hour, end_min)
217
+ [start_hour, start_min, end_hour, end_min]
218
+ end
199
219
 
200
- if !date.nil?
201
- [match, date, start_t, end_t]
220
+ def create_datetime_if_exists(year, month, day, hour, min)
221
+ if !hour.nil?
222
+ begin
223
+ result = DateTime.new(year, month, day, hour.to_i, min.to_i)
224
+ rescue
225
+ result = nil
226
+ end
202
227
  else
203
- [match, nil, nil, nil]
228
+ result = nil
204
229
  end
230
+ result
205
231
  end
206
- end
207
232
 
208
- # days contains day whidh has same index with timeslots_container
209
- def self.days_from_timeslot_matches(timeslots_container, day_with_hours)
210
- result = []
211
-
212
- day_with_hours.each_with_index do |day_with_hour, i|
213
- result.push(day_with_hour)
214
- _, day, _, _ = day_with_hour #
215
- next if day.nil?
233
+ # @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
234
+ # If month is not specified, fallback_month is used as month. This value is
235
+ # updated by discovering other month specification. Same for fallback_year.
236
+ def days_from_matches(matches, fallback_month, fallback_year, debug: false)
237
+ matches.map do |match|
238
+ begin
239
+ year = to_downer_letter(match[:year])
240
+ fallback_year = year
241
+ rescue
242
+ year = fallback_year
243
+ end
216
244
 
217
- timeslot_matches = timeslots_container[i]
218
- next if (timeslot_matches.size == 0)
245
+ # When ONLY_DAY_RE is used, month is nil
246
+ begin
247
+ month = to_downer_letter(match[:month]).to_i
248
+ fallback_month = month
249
+ rescue
250
+ month = fallback_month
251
+ end
219
252
 
220
- timeslot_matches.each do |timeslot_match|
221
- start_hour, start_min, end_hour, end_min = self.get_hour_from_timeslot_match(timeslot_match)
253
+ day = to_downer_letter(match[:day]).to_i
222
254
 
223
- start_t = self.create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
224
- end_t = self.create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
255
+ start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
225
256
 
226
- result.push([timeslot_match, day, start_t, end_t])
227
- end
228
- end
257
+ begin
258
+ date = Date.new(year, month, day)
259
+ rescue
260
+ date = nil
261
+ end
229
262
 
230
- result
231
- end
263
+ start_t = create_datetime_if_exists(year, month, day, start_hour, start_min)
264
+ end_t = create_datetime_if_exists(year, month, day, end_hour, end_min)
232
265
 
233
- def self.to_downer_letter(upper_or_downer_letter)
234
- upper_or_downer_letter.split('').map do |c|
235
- if /[0-9]/.match(c)
236
- (c.ord - "0".ord).to_s
237
- else
238
- c
266
+ if !date.nil?
267
+ [match, date, start_t, end_t]
268
+ else
269
+ [match, nil, nil, nil]
270
+ end
239
271
  end
240
- end.join
241
- end
242
-
243
- # @param [String] body
244
- # @param [Integer | NilClass] fallback_month
245
- # @param [Integer | NilClass] fallback_year
246
- # @param [Boolean] debug
247
- # @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
248
- def self.extract(body, fallback_month: nil, fallback_year: nil, debug: false)
249
- today = Date.today
250
- fallback_month ||= Date.today.month
251
- fallback_year ||= Date.today.year
252
-
253
- day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
272
+ end
254
273
 
255
- day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
256
- day_with_hours_size = day_matches.size
274
+ # days contains day whidh has same index with timeslots_container
275
+ def days_from_timeslot_matches(timeslots_container, day_with_hours)
276
+ result = []
257
277
 
258
- timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
278
+ day_with_hours.each_with_index do |day_with_hour, i|
279
+ result.push(day_with_hour)
280
+ _, day, _, _ = day_with_hour #
281
+ next if day.nil?
259
282
 
260
- timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
261
- timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
262
- i = 0 # index of left_day
283
+ timeslot_matches = timeslots_container[i]
284
+ next if (timeslot_matches.size == 0)
263
285
 
264
- while i < day_with_hours_size
265
- left_day = day_with_hours[i]
266
- if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
267
- i += 1
268
- next end
286
+ timeslot_matches.each do |timeslot_match|
287
+ start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(timeslot_match)
269
288
 
270
- right_day = day_with_hours[i+1]
271
- if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
272
- right_day = day_with_hours[i+2]
273
- end
289
+ start_t = create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
290
+ end_t = create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
274
291
 
275
- if right_day.nil? # left_day is on the last
276
- # Check if timeslot is on the right of left_day
277
- if left_day[0].end(0) <= start_pos
278
- timeslots_container[i].push timeslot_match
279
- end
280
- else
281
- # Check if timeslot is between left_day and right_day
282
- if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
283
- timeslots_container[i].push timeslot_match
284
- end
292
+ result.push([timeslot_match, day, start_t, end_t])
285
293
  end
286
-
287
- i += 1
288
294
  end
289
- end
290
-
291
- days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
292
295
 
293
- result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
294
- result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
296
+ result
297
+ end
295
298
 
296
- if !debug # Reject nil dates
297
- exists = result_datetimes.map { |arr| !arr[0].nil? }
298
- result_strs = result_strs.select.with_index { |str, i| exists[i] }
299
- result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
300
- [result_strs, result_datetimes]
301
- else
302
- [result_strs, result_datetimes]
299
+ def to_downer_letter(upper_or_downer_letter)
300
+ upper_or_downer_letter.split('').map do |c|
301
+ if /[0-9]/.match(c)
302
+ (c.ord - "0".ord).to_s
303
+ else
304
+ c
305
+ end
306
+ end.join
303
307
  end
304
308
  end
305
309
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: date_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nao Minami
@@ -85,7 +85,7 @@ files:
85
85
  - date_extractor.gemspec
86
86
  - lib/date_extractor.rb
87
87
  - lib/date_extractor/version.rb
88
- homepage: https://github.com/south37
88
+ homepage: https://github.com/south37/date_extractor
89
89
  licenses:
90
90
  - MIT
91
91
  metadata: {}