hizuke 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hizuke/parser.rb CHANGED
@@ -1,542 +1,109 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "date"
4
- require "time"
3
+ require_relative 'constants'
4
+ require_relative 'date_calculator'
5
+ require_relative 'pattern_matcher'
6
+ require 'date'
5
7
 
6
8
  module Hizuke
7
- # Simple class to represent a time of day without a date
9
+ # Time of day representation with hour, minute and second
8
10
  class TimeOfDay
9
11
  attr_reader :hour, :min, :sec
10
-
11
- def initialize(hour, min = 0, sec = 0)
12
+
13
+ def initialize(hour, min, sec)
12
14
  @hour = hour
13
15
  @min = min
14
16
  @sec = sec
15
17
  end
16
-
18
+
17
19
  def to_s
18
- if sec == 0
19
- format("%02d:%02d", hour, min)
20
+ # Include seconds in the format if they are not zero
21
+ if sec.zero?
22
+ format('%<hour>02d:%<min>02d', hour: hour, min: min)
20
23
  else
21
- format("%02d:%02d:%02d", hour, min, sec)
24
+ format('%<hour>02d:%<min>02d:%<sec>02d', hour: hour, min: min, sec: sec)
22
25
  end
23
26
  end
24
-
25
- def inspect
26
- to_s
27
- end
28
27
  end
29
-
30
- # Result object containing the clean text and extracted date/time
28
+
29
+ # Result represents a parsing result with clean text and parsed date
31
30
  class Result
32
- attr_reader :text, :date, :time
31
+ attr_reader :clean_text, :date, :time
33
32
 
34
- def initialize(text, date, time = nil)
35
- @text = text
33
+ def initialize(clean_text, date, time = nil)
34
+ @clean_text = clean_text
36
35
  @date = date
37
36
  @time = time
38
37
  end
39
38
 
39
+ # Alias for clean_text for backward compatibility
40
+ alias text clean_text
41
+
42
+ # Returns a DateTime object if time is available, otherwise nil
40
43
  def datetime
41
- return nil unless @time
42
-
43
- # Combine date and time into a Time object
44
- Time.new(@date.year, @date.month, @date.day,
45
- @time.hour, @time.min, @time.sec)
46
- end
47
- end
48
-
49
- # Configuration class for Hizuke
50
- class Configuration
51
- attr_accessor :morning_time, :evening_time
52
-
53
- def initialize
54
- @morning_time = { hour: 8, min: 0 }
55
- @evening_time = { hour: 20, min: 0 }
44
+ return nil unless @time && @date
45
+
46
+ DateTime.new(@date.year, @date.month, @date.day, @time.hour, @time.min, @time.sec)
56
47
  end
57
48
  end
58
-
59
- # Allows configuration of Hizuke
60
- def self.configure
61
- @configuration ||= Configuration.new
62
- yield(@configuration) if block_given?
63
- end
64
-
65
- # Returns the configuration
66
- def self.configuration
67
- @configuration ||= Configuration.new
68
- end
69
49
 
70
- # Parser class responsible for extracting dates from text
71
- class Parser
72
- # Mapping of day names to their wday values (0-6, Sunday is 0)
73
- DAYS_OF_WEEK = {
74
- "monday" => 1,
75
- "tuesday" => 2,
76
- "wednesday" => 3,
77
- "thursday" => 4,
78
- "friday" => 5,
79
- "saturday" => 6,
80
- "sunday" => 0
81
- }.freeze
82
-
83
- # Date keywords mapping
84
- DATE_KEYWORDS = {
85
- "yesterday" => -1,
86
- "today" => 0,
87
- "tomorrow" => 1,
88
- "dayaftertomorrow" => 2,
89
- "day after tomorrow" => 2,
90
- "daybeforeyesterday" => -2,
91
- "day before yesterday" => -2,
92
- "nextweek" => :next_week,
93
- "next week" => :next_week,
94
- "lastweek" => :last_week,
95
- "last week" => :last_week,
96
- "nextmonth" => :next_month,
97
- "next month" => :next_month,
98
- "lastmonth" => :last_month,
99
- "last month" => :last_month,
100
- "nextyear" => :next_year,
101
- "next year" => :next_year,
102
- "lastyear" => :last_year,
103
- "last year" => :last_year,
104
- "nextquarter" => :next_quarter,
105
- "next quarter" => :next_quarter,
106
- "lastquarter" => :last_quarter,
107
- "last quarter" => :last_quarter,
108
- "thisweekend" => :this_weekend,
109
- "this weekend" => :this_weekend,
110
- "endofweek" => :end_of_week,
111
- "end of week" => :end_of_week,
112
- "endofmonth" => :end_of_month,
113
- "end of month" => :end_of_month,
114
- "endofyear" => :end_of_year,
115
- "end of year" => :end_of_year,
116
- "midweek" => :mid_week,
117
- "mid week" => :mid_week,
118
- "midmonth" => :mid_month,
119
- "mid month" => :mid_month
120
- }.freeze
121
-
122
- # Regex patterns for dynamic date references
123
- IN_X_DAYS_PATTERN = /in (\d+) days?/i
124
- X_DAYS_AGO_PATTERN = /(\d+) days? ago/i
125
- IN_X_WEEKS_PATTERN = /in (\d+) weeks?/i
126
- X_WEEKS_AGO_PATTERN = /(\d+) weeks? ago/i
127
- IN_X_MONTHS_PATTERN = /in (\d+) months?/i
128
- X_MONTHS_AGO_PATTERN = /(\d+) months? ago/i
129
- IN_X_YEARS_PATTERN = /in (\d+) years?/i
130
- X_YEARS_AGO_PATTERN = /(\d+) years? ago/i
131
-
132
- # Regex patterns for specific days of the week
133
- THIS_DAY_PATTERN = /this (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
134
- NEXT_DAY_PATTERN = /next (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
135
- LAST_DAY_PATTERN = /last (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
50
+ # Error raised when parsing fails
51
+ class ParseError < StandardError; end
136
52
 
137
- # Regex patterns for time references
138
- TIME_PATTERN = /(?:at|@)\s*(\d{1,2})(?::(\d{1,2}))?(?::(\d{1,2}))?\s*(am|pm)?/i
139
-
140
- # Regex patterns for word-based time references
141
- NOON_PATTERN = /at\s+noon/i
142
- MIDNIGHT_PATTERN = /at\s+midnight/i
143
- MORNING_PATTERN = /in\s+the\s+morning/i
144
- EVENING_PATTERN = /in\s+the\s+evening/i
145
-
146
- # Parse text containing time references and extract both
147
- # the clean text and the date.
148
- #
53
+ # Main parser class for the Hizuke library
54
+ class Parser
55
+ include Constants
56
+ include DateCalculator
57
+ include WeekCalculator
58
+ include MonthCalculator
59
+ include YearCalculator
60
+ include QuarterCalculator
61
+ include PeriodCalculator
62
+ include PatternMatcher
63
+
64
+ # Parse a date from text - class method
149
65
  # @param text [String] the text to parse
150
- # @return [Hizuke::Result] the parsing result containing text and date
151
- # @raise [Hizuke::ParseError] if no valid date reference is found
66
+ # @return [Hizuke::Result] the parsing result with clean text and date
152
67
  def self.parse(text)
153
68
  new.parse(text)
154
69
  end
155
70
 
156
- # Instance method to parse text
157
- #
71
+ # Parse a date with result details from text - class method
158
72
  # @param text [String] the text to parse
159
- # @return [Hizuke::Result] the parsing result containing text and date
160
- # @raise [Hizuke::ParseError] if no valid date reference is found
161
- def parse(text)
162
- # Check if text is nil or empty
163
- raise ParseError, "Input text cannot be nil or empty" if text.nil? || text.empty?
164
-
165
- # Extract time if present
166
- extracted_time = nil
167
- clean_text = text
168
-
169
- # Try to match word-based time patterns first
170
- if match = clean_text.match(NOON_PATTERN)
171
- extracted_time = TimeOfDay.new(12, 0, 0)
172
- clean_text = clean_text.gsub(match[0], "").strip
173
- elsif match = clean_text.match(MIDNIGHT_PATTERN)
174
- extracted_time = TimeOfDay.new(0, 0, 0)
175
- clean_text = clean_text.gsub(match[0], "").strip
176
- elsif match = clean_text.match(MORNING_PATTERN)
177
- config = Hizuke.configuration
178
- extracted_time = TimeOfDay.new(config.morning_time[:hour], config.morning_time[:min], 0)
179
- clean_text = clean_text.gsub(match[0], "").strip
180
- elsif match = clean_text.match(EVENING_PATTERN)
181
- config = Hizuke.configuration
182
- extracted_time = TimeOfDay.new(config.evening_time[:hour], config.evening_time[:min], 0)
183
- clean_text = clean_text.gsub(match[0], "").strip
184
- # Then try the numeric time pattern
185
- elsif time_match = clean_text.match(TIME_PATTERN)
186
- hour = time_match[1].to_i
187
- min = time_match[2] ? time_match[2].to_i : 0
188
- sec = time_match[3] ? time_match[3].to_i : 0
189
-
190
- # Adjust for AM/PM
191
- if time_match[4]&.downcase == "pm" && hour < 12
192
- hour += 12
193
- elsif time_match[4]&.downcase == "am" && hour == 12
194
- hour = 0
195
- end
196
-
197
- extracted_time = TimeOfDay.new(hour, min, sec)
198
-
199
- # Remove the time expression from the text
200
- clean_text = clean_text.gsub(time_match[0], "").strip
201
- end
202
-
203
- # Check for dynamic patterns first (in X days, X days ago)
204
- result = check_dynamic_patterns(clean_text)
205
- if result
206
- return Result.new(result.text, result.date, extracted_time)
207
- end
208
-
209
- # Check for day of week patterns (this Monday, next Tuesday, etc.)
210
- result = check_day_of_week_patterns(clean_text)
211
- if result
212
- return Result.new(result.text, result.date, extracted_time)
213
- end
214
-
215
- # Try to find compound date expressions (like "next week")
216
- compound_matches = {}
217
-
218
- DATE_KEYWORDS.keys.select { |k| k.include?(" ") }.each do |compound_key|
219
- if clean_text.downcase.include?(compound_key)
220
- start_idx = clean_text.downcase.index(compound_key)
221
- end_idx = start_idx + compound_key.length - 1
222
- compound_matches[compound_key] = [start_idx, end_idx]
223
- end
224
- end
225
-
226
- # If we found compound matches, handle them specially
227
- unless compound_matches.empty?
228
- # Use the first match (in case there are multiple)
229
- match_key, indices = compound_matches.min_by { |_, v| v[0] }
230
-
231
- # Calculate date based on the keyword
232
- date_value = DATE_KEYWORDS[match_key]
233
- date = calculate_date(date_value)
234
-
235
- # Remove the date expression from the text
236
- final_text = clean_text.dup
237
- final_text.slice!(indices[0]..indices[1])
238
- final_text = final_text.strip
239
-
240
- return Result.new(final_text, date, extracted_time)
241
- end
242
-
243
- # Split the text into words (for single-word date references)
244
- words = clean_text.split
245
-
246
- # Find the first date keyword
247
- date_word_index = nil
248
- date_value = nil
249
-
250
- words.each_with_index do |word, index|
251
- clean_word = word.downcase.gsub(/[^a-z]/, '')
252
- if DATE_KEYWORDS.key?(clean_word)
253
- date_word_index = index
254
- date_value = DATE_KEYWORDS[clean_word]
255
- break
256
- end
257
- end
258
-
259
- if date_word_index.nil?
260
- raise ParseError, "No valid date reference found in '#{clean_text}'"
261
- end
262
-
263
- # Calculate the date based on the keyword
264
- date = calculate_date(date_value)
265
-
266
- # Create the clean text by removing the date keyword
267
- clean_words = words.dup
268
- clean_words.delete_at(date_word_index)
269
- final_text = clean_words.join(" ").strip
270
-
271
- Result.new(final_text, date, extracted_time)
73
+ # @return [Hizuke::Result] the parsing result with clean text and date
74
+ def self.parse_with_result(text)
75
+ new.parse_with_result(text)
272
76
  end
273
77
 
274
- private
275
-
276
- # Check for day of week patterns (this Monday, next Tuesday, last Friday, etc.)
277
- def check_day_of_week_patterns(text)
278
- # Check for "this [day]" pattern
279
- if (match = text.match(THIS_DAY_PATTERN))
280
- day_name = match[1].downcase
281
- day_value = DAYS_OF_WEEK[day_name]
282
- date = calculate_this_day(day_value)
283
- clean_text = text.gsub(match[0], "").strip
284
- return Result.new(clean_text, date)
285
- end
286
-
287
- # Check for "next [day]" pattern
288
- if (match = text.match(NEXT_DAY_PATTERN))
289
- day_name = match[1].downcase
290
- day_value = DAYS_OF_WEEK[day_name]
291
- date = calculate_next_day(day_value)
292
- clean_text = text.gsub(match[0], "").strip
293
- return Result.new(clean_text, date)
294
- end
295
-
296
- # Check for "last [day]" pattern
297
- if (match = text.match(LAST_DAY_PATTERN))
298
- day_name = match[1].downcase
299
- day_value = DAYS_OF_WEEK[day_name]
300
- date = calculate_last_day(day_value)
301
- clean_text = text.gsub(match[0], "").strip
302
- return Result.new(clean_text, date)
303
- end
304
-
305
- nil
306
- end
307
-
308
- # Check for dynamic date patterns like "in X days" or "X days ago"
309
- def check_dynamic_patterns(text)
310
- # Check for "in X days" pattern
311
- if (match = text.match(IN_X_DAYS_PATTERN))
312
- days = match[1].to_i
313
- date = Date.today + days
314
- clean_text = text.gsub(match[0], "").strip
315
- return Result.new(clean_text, date)
316
- end
317
-
318
- # Check for "X days ago" pattern
319
- if (match = text.match(X_DAYS_AGO_PATTERN))
320
- days = match[1].to_i
321
- date = Date.today - days
322
- clean_text = text.gsub(match[0], "").strip
323
- return Result.new(clean_text, date)
324
- end
325
-
326
- # Check for "in X weeks" pattern
327
- if (match = text.match(IN_X_WEEKS_PATTERN))
328
- weeks = match[1].to_i
329
- date = Date.today + (weeks * 7)
330
- clean_text = text.gsub(match[0], "").strip
331
- return Result.new(clean_text, date)
332
- end
333
-
334
- # Check for "X weeks ago" pattern
335
- if (match = text.match(X_WEEKS_AGO_PATTERN))
336
- weeks = match[1].to_i
337
- date = Date.today - (weeks * 7)
338
- clean_text = text.gsub(match[0], "").strip
339
- return Result.new(clean_text, date)
340
- end
341
-
342
- # Check for "in X months" pattern
343
- if (match = text.match(IN_X_MONTHS_PATTERN))
344
- months = match[1].to_i
345
- date = Date.today >> months
346
- clean_text = text.gsub(match[0], "").strip
347
- return Result.new(clean_text, date)
348
- end
349
-
350
- # Check for "X months ago" pattern
351
- if (match = text.match(X_MONTHS_AGO_PATTERN))
352
- months = match[1].to_i
353
- date = Date.today << months
354
- clean_text = text.gsub(match[0], "").strip
355
- return Result.new(clean_text, date)
356
- end
357
-
358
- # Check for "in X years" pattern
359
- if (match = text.match(IN_X_YEARS_PATTERN))
360
- years = match[1].to_i
361
- date = Date.new(Date.today.year + years, Date.today.month, Date.today.day)
362
- clean_text = text.gsub(match[0], "").strip
363
- return Result.new(clean_text, date)
364
- end
365
-
366
- # Check for "X years ago" pattern
367
- if (match = text.match(X_YEARS_AGO_PATTERN))
368
- years = match[1].to_i
369
- date = Date.new(Date.today.year - years, Date.today.month, Date.today.day)
370
- clean_text = text.gsub(match[0], "").strip
371
- return Result.new(clean_text, date)
372
- end
373
-
374
- nil
375
- end
78
+ # Parse a date from text
79
+ # @param text [String] the text to parse
80
+ # @return [Hizuke::Result] the parsing result with clean text and date
81
+ def parse(text)
82
+ raise ParseError, 'Cannot parse nil input' if text.nil?
83
+ raise ParseError, 'Cannot parse empty input' if text.empty?
376
84
 
377
- # Calculate date for "this [day]" - the current/upcoming day in this week
378
- def calculate_this_day(target_wday)
379
- today = Date.today
380
- today_wday = today.wday
381
-
382
- # Calculate days until the target day in this week
383
- days_diff = (target_wday - today_wday) % 7
384
-
385
- # If it's the same day, return today's date
386
- if days_diff == 0
387
- return today
388
- end
389
-
390
- # Return the date of the next occurrence in this week
391
- today + days_diff
85
+ parse_with_result(text)
392
86
  end
393
87
 
394
- # Calculate date for "next [day]" - the day in next week
395
- def calculate_next_day(target_wday)
396
- today = Date.today
397
- today_wday = today.wday
398
-
399
- # Calculate days until the next occurrence
400
- days_until_target = (target_wday - today_wday) % 7
401
-
402
- # If today is the target day or the target day is earlier in the week,
403
- # we want the day next week, so add 7 days
404
- if days_until_target == 0 || target_wday < today_wday
405
- days_until_target += 7
406
- end
407
-
408
- today + days_until_target
409
- end
88
+ # Parse a date with result details from text
89
+ # @param text [String] the text to parse
90
+ # @return [Hizuke::Result] the parsing result with clean text and date
91
+ def parse_with_result(text)
92
+ raise ParseError, 'Cannot parse nil input' if text.nil?
93
+ raise ParseError, 'Cannot parse empty input' if text.empty?
410
94
 
411
- # Calculate date for "last [day]" - the day in previous week
412
- def calculate_last_day(target_wday)
413
- today = Date.today
414
- today_wday = today.wday
415
-
416
- # Calculate days since the last occurrence
417
- days_since_target = (today_wday - target_wday) % 7
418
-
419
- # If today is the target day or the target day is later in the week,
420
- # we want the day last week, so add 7 days
421
- if days_since_target == 0 || target_wday > today_wday
422
- days_since_target += 7
423
- end
424
-
425
- today - days_since_target
426
- end
95
+ # First we'll try to find a date
96
+ result = try_parsing_strategies(text)
427
97
 
428
- # Calculate the date based on the keyword value
429
- def calculate_date(date_value)
430
- if date_value.is_a?(Integer)
431
- Date.today + date_value
432
- elsif date_value == :next_week
433
- # Find next Monday
434
- days_until_monday = (1 - Date.today.wday) % 7
435
- # If today is Monday, we want next Monday, not today
436
- days_until_monday = 7 if days_until_monday == 0
437
- Date.today + days_until_monday
438
- elsif date_value == :last_week
439
- # Find last Monday
440
- days_since_monday = (Date.today.wday - 1) % 7
441
- # If today is Monday, we want last Monday, not today
442
- days_since_monday = 7 if days_since_monday == 0
443
- Date.today - days_since_monday - 7
444
- elsif date_value == :next_month
445
- # Return the first day of the next month
446
- next_month = Date.today >> 1
447
- Date.new(next_month.year, next_month.month, 1)
448
- elsif date_value == :last_month
449
- # Return the first day of the previous month
450
- prev_month = Date.today << 1
451
- Date.new(prev_month.year, prev_month.month, 1)
452
- elsif date_value == :next_year
453
- # Return the first day of the next year
454
- next_year = Date.today.year + 1
455
- Date.new(next_year, 1, 1)
456
- elsif date_value == :last_year
457
- # Return the first day of the last year
458
- last_year = Date.today.year - 1
459
- Date.new(last_year, 1, 1)
460
- elsif date_value == :next_quarter
461
- # Return the first day of the next quarter
462
- today = Date.today
463
- current_month = today.month
464
-
465
- # Determine the start month of the next quarter
466
- next_quarter_month = case
467
- when current_month <= 3
468
- 4 # Q2 starts in April
469
- when current_month <= 6
470
- 7 # Q3 starts in July
471
- when current_month <= 9
472
- 10 # Q4 starts in October
473
- else
474
- 1 # Q1 of next year starts in January
475
- end
476
-
477
- # If the next quarter is in the next year, increment the year
478
- next_quarter_year = today.year
479
- next_quarter_year += 1 if current_month > 9
480
-
481
- Date.new(next_quarter_year, next_quarter_month, 1)
482
- elsif date_value == :last_quarter
483
- # Return the first day of the last quarter
484
- today = Date.today
485
- current_month = today.month
486
-
487
- # Determine the start month of the last quarter
488
- last_quarter_month = case
489
- when current_month <= 3
490
- 10 # Q4 of last year starts in October
491
- when current_month <= 6
492
- 1 # Q1 starts in January
493
- when current_month <= 9
494
- 4 # Q2 starts in April
495
- else
496
- 7 # Q3 starts in July
497
- end
498
-
499
- # If the last quarter is in the previous year, decrement the year
500
- last_quarter_year = today.year
501
- last_quarter_year -= 1 if current_month <= 3
502
-
503
- Date.new(last_quarter_year, last_quarter_month, 1)
504
- elsif date_value == :this_weekend
505
- # Calculate days until Saturday
506
- days_until_saturday = (6 - Date.today.wday) % 7
507
- # If today is Saturday or Sunday, we're already on the weekend
508
- days_until_saturday = 0 if days_until_saturday == 0 || days_until_saturday == 6
509
- Date.today + days_until_saturday
510
- elsif date_value == :end_of_week
511
- # Calculate days until Sunday (end of week)
512
- days_until_sunday = (0 - Date.today.wday) % 7
513
- # If today is Sunday, we're already at the end of the week
514
- days_until_sunday = 0 if days_until_sunday == 0
515
- Date.today + days_until_sunday
516
- elsif date_value == :end_of_month
517
- # Return the last day of the current month
518
- # Get the first day of next month
519
- next_month = Date.today >> 1
520
- first_day_next_month = Date.new(next_month.year, next_month.month, 1)
521
- # Subtract one day to get the last day of current month
522
- first_day_next_month - 1
523
- elsif date_value == :end_of_year
524
- # Return the last day of the current year (December 31)
525
- Date.new(Date.today.year, 12, 31)
526
- elsif date_value == :mid_week
527
- # Return Wednesday of the current week
528
- # Calculate days until/since Wednesday (3)
529
- today_wday = Date.today.wday
530
- target_wday = 3 # Wednesday
531
- days_diff = (target_wday - today_wday) % 7
532
- # If the difference is more than 3, then Wednesday has passed this week
533
- # So we need to go back to Wednesday
534
- days_diff = days_diff - 7 if days_diff > 3
535
- Date.today + days_diff
536
- elsif date_value == :mid_month
537
- # Return the 15th day of the current month
538
- Date.new(Date.today.year, Date.today.month, 15)
98
+ if result
99
+ # If we found a date, extract time references
100
+ time, clean_text = extract_time_references(result.clean_text)
101
+ Result.new(clean_text, result.date, time)
102
+ else
103
+ # If we didn't find a date, extract only time references
104
+ time, clean_text = extract_time_references(text)
105
+ Result.new(clean_text, nil, time)
539
106
  end
540
107
  end
541
108
  end
542
- end
109
+ end