date_extractor 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ccaf1114d7c608b8f2cccb38d902db374839807
4
- data.tar.gz: f101665c1df1a41c6c4b7eeda73de833e5ff6ebf
3
+ metadata.gz: 2b729b3c4a7d4f759cb83eb4f3caa8b28290556e
4
+ data.tar.gz: 0a3921dc386b53084a8839ad6132dabb5eeaacf4
5
5
  SHA512:
6
- metadata.gz: 46b468a50ed84fbf06888fff0447acd7ad602aaf2919e89e9d686cd098240ed28b4be8d740553370a28d2d70a9299479e1d5912f1aba4246b4a3327eb2f1b8a1
7
- data.tar.gz: 58f2827dc897ee64acde531cd6dbd77a5fbb144abcf7a6c24a5e19aa188d57cad7d7557fffc92e60f2058fcf7500141a997bb55616249bfaeda54b7e4f109868
6
+ metadata.gz: 436e6fbffd2ffbec1c5693fc2761efcf004e0bb089c9e92e5f48f3bddb0b154029f0f9d9e2e0a86f23ba9c29971a8b1916ad6b1dd58a5b726c3247da270b12b0
7
+ data.tar.gz: e6b70609de5f70b98570cc12d00688e86c408957ef51de3ae33915a5a1b0eb8c2bc7357e6c46de36fb5dc5313f433b5029efaf016dc60e8f9465b689a7584904
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- date_extractor (0.1.0)
4
+ date_extractor (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = %q{Extract dates from a text.}
13
13
  spec.description = %q{Extract dates from a text.}
14
- spec.homepage = "https://github.com/south37"
14
+ spec.homepage = "https://github.com/south37/date_extractor"
15
15
  spec.license = "MIT"
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -1,3 +1,3 @@
1
1
  module DateExtractor
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -111,195 +111,199 @@ module DateExtractor
111
111
 
112
112
  RE = /(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
113
113
 
114
- def self.get_match_and_positions(body, re)
115
- body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
116
- end
114
+ class << self
115
+ # @param [String] body
116
+ # @param [Integer | NilClass] fallback_month
117
+ # @param [Integer | NilClass] fallback_year
118
+ # @param [Boolean] debug
119
+ # @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
120
+ def extract(body, fallback_month: nil, fallback_year: nil, debug: false)
121
+ today = Date.today
122
+ fallback_month ||= Date.today.month
123
+ fallback_year ||= Date.today.year
124
+
125
+ day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
126
+
127
+ day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
128
+ day_with_hours_size = day_matches.size
129
+
130
+ timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
131
+
132
+ timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
133
+ timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
134
+ i = 0 # index of left_day
135
+
136
+ while i < day_with_hours_size
137
+ left_day = day_with_hours[i]
138
+ if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
139
+ i += 1
140
+ next end
141
+
142
+ right_day = day_with_hours[i+1]
143
+ if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
144
+ right_day = day_with_hours[i+2]
145
+ end
117
146
 
118
- def self.get_hour_from_timeslot_match(match)
119
- begin
120
- start_hour = to_downer_letter(match[:start_hour])
121
- rescue
122
- start_hour = nil
123
- end
147
+ if right_day.nil? # left_day is on the last
148
+ # Check if timeslot is on the right of left_day
149
+ if left_day[0].end(0) <= start_pos
150
+ timeslots_container[i].push timeslot_match
151
+ end
152
+ else
153
+ # Check if timeslot is between left_day and right_day
154
+ if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
155
+ timeslots_container[i].push timeslot_match
156
+ end
157
+ end
124
158
 
125
- begin
126
- start_min = to_downer_letter(match[:start_min])
127
- rescue
128
- if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
129
- start_min = 30
130
- else
131
- start_min = nil
159
+ i += 1
160
+ end
132
161
  end
133
- end
134
162
 
135
- begin
136
- end_hour = to_downer_letter(match[:end_hour])
137
- rescue
138
- end_hour = nil
139
- end
163
+ days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
164
+
165
+ result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
166
+ result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
140
167
 
141
- begin
142
- end_min = to_downer_letter(match[:end_min])
143
- rescue
144
- if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
145
- end_min = 30
168
+ if !debug # Reject nil dates
169
+ exists = result_datetimes.map { |arr| !arr[0].nil? }
170
+ result_strs = result_strs.select.with_index { |str, i| exists[i] }
171
+ result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
172
+ [result_strs, result_datetimes]
146
173
  else
147
- end_min = nil
174
+ [result_strs, result_datetimes]
148
175
  end
149
176
  end
150
177
 
151
- [start_hour, start_min, end_hour, end_min]
152
- end
178
+ private
179
+
180
+ def get_match_and_positions(body, re)
181
+ body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
182
+ end
153
183
 
154
- def self.create_datetime_if_exists(year, month, day, hour, min)
155
- if !hour.nil?
184
+ def get_hour_from_timeslot_match(match)
156
185
  begin
157
- result = DateTime.new(year, month, day, hour.to_i, min.to_i)
186
+ start_hour = to_downer_letter(match[:start_hour])
158
187
  rescue
159
- result = nil
188
+ start_hour = nil
160
189
  end
161
- else
162
- result = nil
163
- end
164
- result
165
- end
166
190
 
167
- # @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
168
- # If month is not specified, fallback_month is used as month. This value is
169
- # updated by discovering other month specification. Same for fallback_year.
170
- def self.days_from_matches(matches, fallback_month, fallback_year, debug: false)
171
- matches.map do |match|
172
191
  begin
173
- year = to_downer_letter(match[:year])
174
- fallback_year = year
192
+ start_min = to_downer_letter(match[:start_min])
175
193
  rescue
176
- year = fallback_year
194
+ if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
195
+ start_min = 30
196
+ else
197
+ start_min = nil
198
+ end
177
199
  end
178
200
 
179
- # When ONLY_DAY_RE is used, month is nil
180
201
  begin
181
- month = to_downer_letter(match[:month]).to_i
182
- fallback_month = month
202
+ end_hour = to_downer_letter(match[:end_hour])
183
203
  rescue
184
- month = fallback_month
204
+ end_hour = nil
185
205
  end
186
206
 
187
- day = to_downer_letter(match[:day]).to_i
188
-
189
- start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
190
-
191
207
  begin
192
- date = Date.new(year, month, day)
208
+ end_min = to_downer_letter(match[:end_min])
193
209
  rescue
194
- date = nil
210
+ if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
211
+ end_min = 30
212
+ else
213
+ end_min = nil
214
+ end
195
215
  end
196
216
 
197
- start_t = self.create_datetime_if_exists(year, month, day, start_hour, start_min)
198
- end_t = self.create_datetime_if_exists(year, month, day, end_hour, end_min)
217
+ [start_hour, start_min, end_hour, end_min]
218
+ end
199
219
 
200
- if !date.nil?
201
- [match, date, start_t, end_t]
220
+ def create_datetime_if_exists(year, month, day, hour, min)
221
+ if !hour.nil?
222
+ begin
223
+ result = DateTime.new(year, month, day, hour.to_i, min.to_i)
224
+ rescue
225
+ result = nil
226
+ end
202
227
  else
203
- [match, nil, nil, nil]
228
+ result = nil
204
229
  end
230
+ result
205
231
  end
206
- end
207
232
 
208
- # days contains day whidh has same index with timeslots_container
209
- def self.days_from_timeslot_matches(timeslots_container, day_with_hours)
210
- result = []
211
-
212
- day_with_hours.each_with_index do |day_with_hour, i|
213
- result.push(day_with_hour)
214
- _, day, _, _ = day_with_hour #
215
- next if day.nil?
233
+ # @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
234
+ # If month is not specified, fallback_month is used as month. This value is
235
+ # updated by discovering other month specification. Same for fallback_year.
236
+ def days_from_matches(matches, fallback_month, fallback_year, debug: false)
237
+ matches.map do |match|
238
+ begin
239
+ year = to_downer_letter(match[:year])
240
+ fallback_year = year
241
+ rescue
242
+ year = fallback_year
243
+ end
216
244
 
217
- timeslot_matches = timeslots_container[i]
218
- next if (timeslot_matches.size == 0)
245
+ # When ONLY_DAY_RE is used, month is nil
246
+ begin
247
+ month = to_downer_letter(match[:month]).to_i
248
+ fallback_month = month
249
+ rescue
250
+ month = fallback_month
251
+ end
219
252
 
220
- timeslot_matches.each do |timeslot_match|
221
- start_hour, start_min, end_hour, end_min = self.get_hour_from_timeslot_match(timeslot_match)
253
+ day = to_downer_letter(match[:day]).to_i
222
254
 
223
- start_t = self.create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
224
- end_t = self.create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
255
+ start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
225
256
 
226
- result.push([timeslot_match, day, start_t, end_t])
227
- end
228
- end
257
+ begin
258
+ date = Date.new(year, month, day)
259
+ rescue
260
+ date = nil
261
+ end
229
262
 
230
- result
231
- end
263
+ start_t = create_datetime_if_exists(year, month, day, start_hour, start_min)
264
+ end_t = create_datetime_if_exists(year, month, day, end_hour, end_min)
232
265
 
233
- def self.to_downer_letter(upper_or_downer_letter)
234
- upper_or_downer_letter.split('').map do |c|
235
- if /[0-9]/.match(c)
236
- (c.ord - "0".ord).to_s
237
- else
238
- c
266
+ if !date.nil?
267
+ [match, date, start_t, end_t]
268
+ else
269
+ [match, nil, nil, nil]
270
+ end
239
271
  end
240
- end.join
241
- end
242
-
243
- # @param [String] body
244
- # @param [Integer | NilClass] fallback_month
245
- # @param [Integer | NilClass] fallback_year
246
- # @param [Boolean] debug
247
- # @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
248
- def self.extract(body, fallback_month: nil, fallback_year: nil, debug: false)
249
- today = Date.today
250
- fallback_month ||= Date.today.month
251
- fallback_year ||= Date.today.year
252
-
253
- day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
272
+ end
254
273
 
255
- day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
256
- day_with_hours_size = day_matches.size
274
+ # days contains day whidh has same index with timeslots_container
275
+ def days_from_timeslot_matches(timeslots_container, day_with_hours)
276
+ result = []
257
277
 
258
- timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
278
+ day_with_hours.each_with_index do |day_with_hour, i|
279
+ result.push(day_with_hour)
280
+ _, day, _, _ = day_with_hour #
281
+ next if day.nil?
259
282
 
260
- timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
261
- timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
262
- i = 0 # index of left_day
283
+ timeslot_matches = timeslots_container[i]
284
+ next if (timeslot_matches.size == 0)
263
285
 
264
- while i < day_with_hours_size
265
- left_day = day_with_hours[i]
266
- if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
267
- i += 1
268
- next end
286
+ timeslot_matches.each do |timeslot_match|
287
+ start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(timeslot_match)
269
288
 
270
- right_day = day_with_hours[i+1]
271
- if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
272
- right_day = day_with_hours[i+2]
273
- end
289
+ start_t = create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
290
+ end_t = create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
274
291
 
275
- if right_day.nil? # left_day is on the last
276
- # Check if timeslot is on the right of left_day
277
- if left_day[0].end(0) <= start_pos
278
- timeslots_container[i].push timeslot_match
279
- end
280
- else
281
- # Check if timeslot is between left_day and right_day
282
- if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
283
- timeslots_container[i].push timeslot_match
284
- end
292
+ result.push([timeslot_match, day, start_t, end_t])
285
293
  end
286
-
287
- i += 1
288
294
  end
289
- end
290
-
291
- days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
292
295
 
293
- result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
294
- result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
296
+ result
297
+ end
295
298
 
296
- if !debug # Reject nil dates
297
- exists = result_datetimes.map { |arr| !arr[0].nil? }
298
- result_strs = result_strs.select.with_index { |str, i| exists[i] }
299
- result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
300
- [result_strs, result_datetimes]
301
- else
302
- [result_strs, result_datetimes]
299
+ def to_downer_letter(upper_or_downer_letter)
300
+ upper_or_downer_letter.split('').map do |c|
301
+ if /[0-9]/.match(c)
302
+ (c.ord - "0".ord).to_s
303
+ else
304
+ c
305
+ end
306
+ end.join
303
307
  end
304
308
  end
305
309
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: date_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nao Minami
@@ -85,7 +85,7 @@ files:
85
85
  - date_extractor.gemspec
86
86
  - lib/date_extractor.rb
87
87
  - lib/date_extractor/version.rb
88
- homepage: https://github.com/south37
88
+ homepage: https://github.com/south37/date_extractor
89
89
  licenses:
90
90
  - MIT
91
91
  metadata: {}