hizuke 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hizuke/parser.rb CHANGED
@@ -1,437 +1,109 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "date"
3
+ require_relative 'constants'
4
+ require_relative 'date_calculator'
5
+ require_relative 'pattern_matcher'
6
+ require 'date'
4
7
 
5
8
  module Hizuke
6
- # Result object containing the clean text and extracted date
7
- class Result
8
- attr_reader :text, :date
9
-
10
- def initialize(text, date)
11
- @text = text
12
- @date = date
13
- end
14
- end
15
-
16
- # Parser class responsible for extracting dates from text
17
- class Parser
18
- # Mapping of day names to their wday values (0-6, Sunday is 0)
19
- DAYS_OF_WEEK = {
20
- "monday" => 1,
21
- "tuesday" => 2,
22
- "wednesday" => 3,
23
- "thursday" => 4,
24
- "friday" => 5,
25
- "saturday" => 6,
26
- "sunday" => 0
27
- }.freeze
28
-
29
- # Date keywords mapping
30
- DATE_KEYWORDS = {
31
- "yesterday" => -1,
32
- "today" => 0,
33
- "tomorrow" => 1,
34
- "dayaftertomorrow" => 2,
35
- "day after tomorrow" => 2,
36
- "daybeforeyesterday" => -2,
37
- "day before yesterday" => -2,
38
- "nextweek" => :next_week,
39
- "next week" => :next_week,
40
- "lastweek" => :last_week,
41
- "last week" => :last_week,
42
- "nextmonth" => :next_month,
43
- "next month" => :next_month,
44
- "lastmonth" => :last_month,
45
- "last month" => :last_month,
46
- "nextyear" => :next_year,
47
- "next year" => :next_year,
48
- "lastyear" => :last_year,
49
- "last year" => :last_year,
50
- "nextquarter" => :next_quarter,
51
- "next quarter" => :next_quarter,
52
- "lastquarter" => :last_quarter,
53
- "last quarter" => :last_quarter,
54
- "thisweekend" => :this_weekend,
55
- "this weekend" => :this_weekend,
56
- "endofweek" => :end_of_week,
57
- "end of week" => :end_of_week,
58
- "endofmonth" => :end_of_month,
59
- "end of month" => :end_of_month,
60
- "endofyear" => :end_of_year,
61
- "end of year" => :end_of_year,
62
- "midweek" => :mid_week,
63
- "mid week" => :mid_week,
64
- "midmonth" => :mid_month,
65
- "mid month" => :mid_month
66
- }.freeze
67
-
68
- # Regex patterns for dynamic date references
69
- IN_X_DAYS_PATTERN = /in (\d+) days?/i
70
- X_DAYS_AGO_PATTERN = /(\d+) days? ago/i
71
- IN_X_WEEKS_PATTERN = /in (\d+) weeks?/i
72
- X_WEEKS_AGO_PATTERN = /(\d+) weeks? ago/i
73
- IN_X_MONTHS_PATTERN = /in (\d+) months?/i
74
- X_MONTHS_AGO_PATTERN = /(\d+) months? ago/i
75
- IN_X_YEARS_PATTERN = /in (\d+) years?/i
76
- X_YEARS_AGO_PATTERN = /(\d+) years? ago/i
77
-
78
- # Regex patterns for specific days of the week
79
- THIS_DAY_PATTERN = /this (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
80
- NEXT_DAY_PATTERN = /next (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
81
- LAST_DAY_PATTERN = /last (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
82
-
83
- # Parse text containing time references and extract both
84
- # the clean text and the date.
85
- #
86
- # @param text [String] the text to parse
87
- # @return [Hizuke::Result] the parsing result containing text and date
88
- # @raise [Hizuke::ParseError] if no valid date reference is found
89
- def self.parse(text)
90
- new.parse(text)
9
+ # Time of day representation with hour, minute and second
10
+ class TimeOfDay
11
+ attr_reader :hour, :min, :sec
12
+
13
+ def initialize(hour, min, sec)
14
+ @hour = hour
15
+ @min = min
16
+ @sec = sec
91
17
  end
92
18
 
93
- # Instance method to parse text
94
- #
95
- # @param text [String] the text to parse
96
- # @return [Hizuke::Result] the parsing result containing text and date
97
- # @raise [Hizuke::ParseError] if no valid date reference is found
98
- def parse(text)
99
- # Check if text is nil or empty
100
- raise ParseError, "Input text cannot be nil or empty" if text.nil? || text.empty?
101
-
102
- # Check for dynamic patterns first (in X days, X days ago)
103
- result = check_dynamic_patterns(text)
104
- return result if result
105
-
106
- # Check for day of week patterns (this Monday, next Tuesday, etc.)
107
- result = check_day_of_week_patterns(text)
108
- return result if result
109
-
110
- # Try to find compound date expressions (like "next week")
111
- compound_matches = {}
112
-
113
- DATE_KEYWORDS.keys.select { |k| k.include?(" ") }.each do |compound_key|
114
- if text.downcase.include?(compound_key)
115
- start_idx = text.downcase.index(compound_key)
116
- end_idx = start_idx + compound_key.length - 1
117
- compound_matches[compound_key] = [start_idx, end_idx]
118
- end
119
- end
120
-
121
- # If we found compound matches, handle them specially
122
- unless compound_matches.empty?
123
- # Use the first match (in case there are multiple)
124
- match_key, indices = compound_matches.min_by { |_, v| v[0] }
125
-
126
- # Calculate date based on the keyword
127
- date_value = DATE_KEYWORDS[match_key]
128
- date = calculate_date(date_value)
129
-
130
- # Remove the date expression from the text
131
- clean_text = text.dup
132
- clean_text.slice!(indices[0]..indices[1])
133
- clean_text = clean_text.strip
134
-
135
- return Result.new(clean_text, date)
136
- end
137
-
138
- # Split the text into words (for single-word date references)
139
- words = text.split
140
-
141
- # Find the first date keyword
142
- date_word_index = nil
143
- date_value = nil
144
-
145
- words.each_with_index do |word, index|
146
- clean_word = word.downcase.gsub(/[^a-z]/, '')
147
- if DATE_KEYWORDS.key?(clean_word)
148
- date_word_index = index
149
- date_value = DATE_KEYWORDS[clean_word]
150
- break
151
- end
19
+ def to_s
20
+ # Include seconds in the format if they are not zero
21
+ if sec.zero?
22
+ format('%<hour>02d:%<min>02d', hour: hour, min: min)
23
+ else
24
+ format('%<hour>02d:%<min>02d:%<sec>02d', hour: hour, min: min, sec: sec)
152
25
  end
153
-
154
- if date_word_index.nil?
155
- raise ParseError, "No valid date reference found in '#{text}'"
156
- end
157
-
158
- # Calculate the date based on the keyword
159
- date = calculate_date(date_value)
160
-
161
- # Create the clean text by removing the date keyword
162
- clean_words = words.dup
163
- clean_words.delete_at(date_word_index)
164
- clean_text = clean_words.join(" ").strip
165
-
166
- Result.new(clean_text, date)
167
26
  end
27
+ end
168
28
 
169
- private
170
-
171
- # Check for day of week patterns (this Monday, next Tuesday, last Friday, etc.)
172
- def check_day_of_week_patterns(text)
173
- # Check for "this [day]" pattern
174
- if (match = text.match(THIS_DAY_PATTERN))
175
- day_name = match[1].downcase
176
- day_value = DAYS_OF_WEEK[day_name]
177
- date = calculate_this_day(day_value)
178
- clean_text = text.gsub(match[0], "").strip
179
- return Result.new(clean_text, date)
180
- end
181
-
182
- # Check for "next [day]" pattern
183
- if (match = text.match(NEXT_DAY_PATTERN))
184
- day_name = match[1].downcase
185
- day_value = DAYS_OF_WEEK[day_name]
186
- date = calculate_next_day(day_value)
187
- clean_text = text.gsub(match[0], "").strip
188
- return Result.new(clean_text, date)
189
- end
190
-
191
- # Check for "last [day]" pattern
192
- if (match = text.match(LAST_DAY_PATTERN))
193
- day_name = match[1].downcase
194
- day_value = DAYS_OF_WEEK[day_name]
195
- date = calculate_last_day(day_value)
196
- clean_text = text.gsub(match[0], "").strip
197
- return Result.new(clean_text, date)
198
- end
29
+ # Result represents a parsing result with clean text and parsed date
30
+ class Result
31
+ attr_reader :clean_text, :date, :time
199
32
 
200
- nil
33
+ def initialize(clean_text, date, time = nil)
34
+ @clean_text = clean_text
35
+ @date = date
36
+ @time = time
201
37
  end
202
38
 
203
- # Check for dynamic date patterns like "in X days" or "X days ago"
204
- def check_dynamic_patterns(text)
205
- # Check for "in X days" pattern
206
- if (match = text.match(IN_X_DAYS_PATTERN))
207
- days = match[1].to_i
208
- date = Date.today + days
209
- clean_text = text.gsub(match[0], "").strip
210
- return Result.new(clean_text, date)
211
- end
212
-
213
- # Check for "X days ago" pattern
214
- if (match = text.match(X_DAYS_AGO_PATTERN))
215
- days = match[1].to_i
216
- date = Date.today - days
217
- clean_text = text.gsub(match[0], "").strip
218
- return Result.new(clean_text, date)
219
- end
220
-
221
- # Check for "in X weeks" pattern
222
- if (match = text.match(IN_X_WEEKS_PATTERN))
223
- weeks = match[1].to_i
224
- date = Date.today + (weeks * 7)
225
- clean_text = text.gsub(match[0], "").strip
226
- return Result.new(clean_text, date)
227
- end
228
-
229
- # Check for "X weeks ago" pattern
230
- if (match = text.match(X_WEEKS_AGO_PATTERN))
231
- weeks = match[1].to_i
232
- date = Date.today - (weeks * 7)
233
- clean_text = text.gsub(match[0], "").strip
234
- return Result.new(clean_text, date)
235
- end
236
-
237
- # Check for "in X months" pattern
238
- if (match = text.match(IN_X_MONTHS_PATTERN))
239
- months = match[1].to_i
240
- date = Date.today >> months
241
- clean_text = text.gsub(match[0], "").strip
242
- return Result.new(clean_text, date)
243
- end
39
+ # Alias for clean_text for backward compatibility
40
+ alias text clean_text
244
41
 
245
- # Check for "X months ago" pattern
246
- if (match = text.match(X_MONTHS_AGO_PATTERN))
247
- months = match[1].to_i
248
- date = Date.today << months
249
- clean_text = text.gsub(match[0], "").strip
250
- return Result.new(clean_text, date)
251
- end
42
+ # Returns a DateTime object if time is available, otherwise nil
43
+ def datetime
44
+ return nil unless @time && @date
252
45
 
253
- # Check for "in X years" pattern
254
- if (match = text.match(IN_X_YEARS_PATTERN))
255
- years = match[1].to_i
256
- date = Date.new(Date.today.year + years, Date.today.month, Date.today.day)
257
- clean_text = text.gsub(match[0], "").strip
258
- return Result.new(clean_text, date)
259
- end
46
+ DateTime.new(@date.year, @date.month, @date.day, @time.hour, @time.min, @time.sec)
47
+ end
48
+ end
260
49
 
261
- # Check for "X years ago" pattern
262
- if (match = text.match(X_YEARS_AGO_PATTERN))
263
- years = match[1].to_i
264
- date = Date.new(Date.today.year - years, Date.today.month, Date.today.day)
265
- clean_text = text.gsub(match[0], "").strip
266
- return Result.new(clean_text, date)
267
- end
50
+ # Error raised when parsing fails
51
+ class ParseError < StandardError; end
268
52
 
269
- nil
53
+ # Main parser class for the Hizuke library
54
+ class Parser
55
+ include Constants
56
+ include DateCalculator
57
+ include WeekCalculator
58
+ include MonthCalculator
59
+ include YearCalculator
60
+ include QuarterCalculator
61
+ include PeriodCalculator
62
+ include PatternMatcher
63
+
64
+ # Parse a date from text - class method
65
+ # @param text [String] the text to parse
66
+ # @return [Hizuke::Result] the parsing result with clean text and date
67
+ def self.parse(text)
68
+ new.parse(text)
270
69
  end
271
70
 
272
- # Calculate date for "this [day]" - the current/upcoming day in this week
273
- def calculate_this_day(target_wday)
274
- today = Date.today
275
- today_wday = today.wday
276
-
277
- # Calculate days until the target day in this week
278
- days_diff = (target_wday - today_wday) % 7
279
-
280
- # If it's the same day, return today's date
281
- if days_diff == 0
282
- return today
283
- end
284
-
285
- # Return the date of the next occurrence in this week
286
- today + days_diff
71
+ # Parse a date with result details from text - class method
72
+ # @param text [String] the text to parse
73
+ # @return [Hizuke::Result] the parsing result with clean text and date
74
+ def self.parse_with_result(text)
75
+ new.parse_with_result(text)
287
76
  end
288
77
 
289
- # Calculate date for "next [day]" - the day in next week
290
- def calculate_next_day(target_wday)
291
- today = Date.today
292
- today_wday = today.wday
293
-
294
- # Calculate days until the next occurrence
295
- days_until_target = (target_wday - today_wday) % 7
296
-
297
- # If today is the target day or the target day is earlier in the week,
298
- # we want the day next week, so add 7 days
299
- if days_until_target == 0 || target_wday < today_wday
300
- days_until_target += 7
301
- end
302
-
303
- today + days_until_target
304
- end
78
+ # Parse a date from text
79
+ # @param text [String] the text to parse
80
+ # @return [Hizuke::Result] the parsing result with clean text and date
81
+ def parse(text)
82
+ raise ParseError, 'Cannot parse nil input' if text.nil?
83
+ raise ParseError, 'Cannot parse empty input' if text.empty?
305
84
 
306
- # Calculate date for "last [day]" - the day in previous week
307
- def calculate_last_day(target_wday)
308
- today = Date.today
309
- today_wday = today.wday
310
-
311
- # Calculate days since the last occurrence
312
- days_since_target = (today_wday - target_wday) % 7
313
-
314
- # If today is the target day or the target day is later in the week,
315
- # we want the day last week, so add 7 days
316
- if days_since_target == 0 || target_wday > today_wday
317
- days_since_target += 7
318
- end
319
-
320
- today - days_since_target
85
+ parse_with_result(text)
321
86
  end
322
87
 
323
- # Calculate the date based on the keyword value
324
- def calculate_date(date_value)
325
- if date_value.is_a?(Integer)
326
- Date.today + date_value
327
- elsif date_value == :next_week
328
- # Find next Monday
329
- days_until_monday = (1 - Date.today.wday) % 7
330
- # If today is Monday, we want next Monday, not today
331
- days_until_monday = 7 if days_until_monday == 0
332
- Date.today + days_until_monday
333
- elsif date_value == :last_week
334
- # Find last Monday
335
- days_since_monday = (Date.today.wday - 1) % 7
336
- # If today is Monday, we want last Monday, not today
337
- days_since_monday = 7 if days_since_monday == 0
338
- Date.today - days_since_monday - 7
339
- elsif date_value == :next_month
340
- # Return the first day of the next month
341
- next_month = Date.today >> 1
342
- Date.new(next_month.year, next_month.month, 1)
343
- elsif date_value == :last_month
344
- # Return the first day of the previous month
345
- prev_month = Date.today << 1
346
- Date.new(prev_month.year, prev_month.month, 1)
347
- elsif date_value == :next_year
348
- # Return the first day of the next year
349
- next_year = Date.today.year + 1
350
- Date.new(next_year, 1, 1)
351
- elsif date_value == :last_year
352
- # Return the first day of the last year
353
- last_year = Date.today.year - 1
354
- Date.new(last_year, 1, 1)
355
- elsif date_value == :next_quarter
356
- # Return the first day of the next quarter
357
- today = Date.today
358
- current_month = today.month
359
-
360
- # Determine the start month of the next quarter
361
- next_quarter_month = case
362
- when current_month <= 3
363
- 4 # Q2 starts in April
364
- when current_month <= 6
365
- 7 # Q3 starts in July
366
- when current_month <= 9
367
- 10 # Q4 starts in October
368
- else
369
- 1 # Q1 of next year starts in January
370
- end
371
-
372
- # If the next quarter is in the next year, increment the year
373
- next_quarter_year = today.year
374
- next_quarter_year += 1 if current_month > 9
375
-
376
- Date.new(next_quarter_year, next_quarter_month, 1)
377
- elsif date_value == :last_quarter
378
- # Return the first day of the last quarter
379
- today = Date.today
380
- current_month = today.month
381
-
382
- # Determine the start month of the last quarter
383
- last_quarter_month = case
384
- when current_month <= 3
385
- 10 # Q4 of last year starts in October
386
- when current_month <= 6
387
- 1 # Q1 starts in January
388
- when current_month <= 9
389
- 4 # Q2 starts in April
390
- else
391
- 7 # Q3 starts in July
392
- end
393
-
394
- # If the last quarter is in the previous year, decrement the year
395
- last_quarter_year = today.year
396
- last_quarter_year -= 1 if current_month <= 3
397
-
398
- Date.new(last_quarter_year, last_quarter_month, 1)
399
- elsif date_value == :this_weekend
400
- # Calculate days until Saturday
401
- days_until_saturday = (6 - Date.today.wday) % 7
402
- # If today is Saturday or Sunday, we're already on the weekend
403
- days_until_saturday = 0 if days_until_saturday == 0 || days_until_saturday == 6
404
- Date.today + days_until_saturday
405
- elsif date_value == :end_of_week
406
- # Calculate days until Sunday (end of week)
407
- days_until_sunday = (0 - Date.today.wday) % 7
408
- # If today is Sunday, we're already at the end of the week
409
- days_until_sunday = 0 if days_until_sunday == 0
410
- Date.today + days_until_sunday
411
- elsif date_value == :end_of_month
412
- # Return the last day of the current month
413
- # Get the first day of next month
414
- next_month = Date.today >> 1
415
- first_day_next_month = Date.new(next_month.year, next_month.month, 1)
416
- # Subtract one day to get the last day of current month
417
- first_day_next_month - 1
418
- elsif date_value == :end_of_year
419
- # Return the last day of the current year (December 31)
420
- Date.new(Date.today.year, 12, 31)
421
- elsif date_value == :mid_week
422
- # Return Wednesday of the current week
423
- # Calculate days until/since Wednesday (3)
424
- today_wday = Date.today.wday
425
- target_wday = 3 # Wednesday
426
- days_diff = (target_wday - today_wday) % 7
427
- # If the difference is more than 3, then Wednesday has passed this week
428
- # So we need to go back to Wednesday
429
- days_diff = days_diff - 7 if days_diff > 3
430
- Date.today + days_diff
431
- elsif date_value == :mid_month
432
- # Return the 15th day of the current month
433
- Date.new(Date.today.year, Date.today.month, 15)
88
+ # Parse a date with result details from text
89
+ # @param text [String] the text to parse
90
+ # @return [Hizuke::Result] the parsing result with clean text and date
91
+ def parse_with_result(text)
92
+ raise ParseError, 'Cannot parse nil input' if text.nil?
93
+ raise ParseError, 'Cannot parse empty input' if text.empty?
94
+
95
+ # First we'll try to find a date
96
+ result = try_parsing_strategies(text)
97
+
98
+ if result
99
+ # If we found a date, extract time references
100
+ time, clean_text = extract_time_references(result.clean_text)
101
+ Result.new(clean_text, result.date, time)
102
+ else
103
+ # If we didn't find a date, extract only time references
104
+ time, clean_text = extract_time_references(text)
105
+ Result.new(clean_text, nil, time)
434
106
  end
435
107
  end
436
108
  end
437
- end
109
+ end