hizuke 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/Gemfile +2 -2
- data/Gemfile.lock +1 -1
- data/README.md +91 -2
- data/Rakefile +6 -6
- data/hizuke.gemspec +39 -0
- data/lib/hizuke/constants.rb +86 -0
- data/lib/hizuke/date_calculator.rb +458 -0
- data/lib/hizuke/parser.rb +83 -411
- data/lib/hizuke/pattern_matcher.rb +495 -0
- data/lib/hizuke/version.rb +2 -2
- data/lib/hizuke.rb +31 -4
- metadata +6 -2
data/lib/hizuke/parser.rb
CHANGED
@@ -1,437 +1,109 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require_relative 'constants'
|
4
|
+
require_relative 'date_calculator'
|
5
|
+
require_relative 'pattern_matcher'
|
6
|
+
require 'date'
|
4
7
|
|
5
8
|
module Hizuke
|
6
|
-
#
|
7
|
-
class
|
8
|
-
attr_reader :
|
9
|
-
|
10
|
-
def initialize(
|
11
|
-
@
|
12
|
-
@
|
13
|
-
|
14
|
-
end
|
15
|
-
|
16
|
-
# Parser class responsible for extracting dates from text
|
17
|
-
class Parser
|
18
|
-
# Mapping of day names to their wday values (0-6, Sunday is 0)
|
19
|
-
DAYS_OF_WEEK = {
|
20
|
-
"monday" => 1,
|
21
|
-
"tuesday" => 2,
|
22
|
-
"wednesday" => 3,
|
23
|
-
"thursday" => 4,
|
24
|
-
"friday" => 5,
|
25
|
-
"saturday" => 6,
|
26
|
-
"sunday" => 0
|
27
|
-
}.freeze
|
28
|
-
|
29
|
-
# Date keywords mapping
|
30
|
-
DATE_KEYWORDS = {
|
31
|
-
"yesterday" => -1,
|
32
|
-
"today" => 0,
|
33
|
-
"tomorrow" => 1,
|
34
|
-
"dayaftertomorrow" => 2,
|
35
|
-
"day after tomorrow" => 2,
|
36
|
-
"daybeforeyesterday" => -2,
|
37
|
-
"day before yesterday" => -2,
|
38
|
-
"nextweek" => :next_week,
|
39
|
-
"next week" => :next_week,
|
40
|
-
"lastweek" => :last_week,
|
41
|
-
"last week" => :last_week,
|
42
|
-
"nextmonth" => :next_month,
|
43
|
-
"next month" => :next_month,
|
44
|
-
"lastmonth" => :last_month,
|
45
|
-
"last month" => :last_month,
|
46
|
-
"nextyear" => :next_year,
|
47
|
-
"next year" => :next_year,
|
48
|
-
"lastyear" => :last_year,
|
49
|
-
"last year" => :last_year,
|
50
|
-
"nextquarter" => :next_quarter,
|
51
|
-
"next quarter" => :next_quarter,
|
52
|
-
"lastquarter" => :last_quarter,
|
53
|
-
"last quarter" => :last_quarter,
|
54
|
-
"thisweekend" => :this_weekend,
|
55
|
-
"this weekend" => :this_weekend,
|
56
|
-
"endofweek" => :end_of_week,
|
57
|
-
"end of week" => :end_of_week,
|
58
|
-
"endofmonth" => :end_of_month,
|
59
|
-
"end of month" => :end_of_month,
|
60
|
-
"endofyear" => :end_of_year,
|
61
|
-
"end of year" => :end_of_year,
|
62
|
-
"midweek" => :mid_week,
|
63
|
-
"mid week" => :mid_week,
|
64
|
-
"midmonth" => :mid_month,
|
65
|
-
"mid month" => :mid_month
|
66
|
-
}.freeze
|
67
|
-
|
68
|
-
# Regex patterns for dynamic date references
|
69
|
-
IN_X_DAYS_PATTERN = /in (\d+) days?/i
|
70
|
-
X_DAYS_AGO_PATTERN = /(\d+) days? ago/i
|
71
|
-
IN_X_WEEKS_PATTERN = /in (\d+) weeks?/i
|
72
|
-
X_WEEKS_AGO_PATTERN = /(\d+) weeks? ago/i
|
73
|
-
IN_X_MONTHS_PATTERN = /in (\d+) months?/i
|
74
|
-
X_MONTHS_AGO_PATTERN = /(\d+) months? ago/i
|
75
|
-
IN_X_YEARS_PATTERN = /in (\d+) years?/i
|
76
|
-
X_YEARS_AGO_PATTERN = /(\d+) years? ago/i
|
77
|
-
|
78
|
-
# Regex patterns for specific days of the week
|
79
|
-
THIS_DAY_PATTERN = /this (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
|
80
|
-
NEXT_DAY_PATTERN = /next (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
|
81
|
-
LAST_DAY_PATTERN = /last (monday|tuesday|wednesday|thursday|friday|saturday|sunday)/i
|
82
|
-
|
83
|
-
# Parse text containing time references and extract both
|
84
|
-
# the clean text and the date.
|
85
|
-
#
|
86
|
-
# @param text [String] the text to parse
|
87
|
-
# @return [Hizuke::Result] the parsing result containing text and date
|
88
|
-
# @raise [Hizuke::ParseError] if no valid date reference is found
|
89
|
-
def self.parse(text)
|
90
|
-
new.parse(text)
|
9
|
+
# Time of day representation with hour, minute and second
|
10
|
+
class TimeOfDay
|
11
|
+
attr_reader :hour, :min, :sec
|
12
|
+
|
13
|
+
def initialize(hour, min, sec)
|
14
|
+
@hour = hour
|
15
|
+
@min = min
|
16
|
+
@sec = sec
|
91
17
|
end
|
92
18
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
# Check if text is nil or empty
|
100
|
-
raise ParseError, "Input text cannot be nil or empty" if text.nil? || text.empty?
|
101
|
-
|
102
|
-
# Check for dynamic patterns first (in X days, X days ago)
|
103
|
-
result = check_dynamic_patterns(text)
|
104
|
-
return result if result
|
105
|
-
|
106
|
-
# Check for day of week patterns (this Monday, next Tuesday, etc.)
|
107
|
-
result = check_day_of_week_patterns(text)
|
108
|
-
return result if result
|
109
|
-
|
110
|
-
# Try to find compound date expressions (like "next week")
|
111
|
-
compound_matches = {}
|
112
|
-
|
113
|
-
DATE_KEYWORDS.keys.select { |k| k.include?(" ") }.each do |compound_key|
|
114
|
-
if text.downcase.include?(compound_key)
|
115
|
-
start_idx = text.downcase.index(compound_key)
|
116
|
-
end_idx = start_idx + compound_key.length - 1
|
117
|
-
compound_matches[compound_key] = [start_idx, end_idx]
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
# If we found compound matches, handle them specially
|
122
|
-
unless compound_matches.empty?
|
123
|
-
# Use the first match (in case there are multiple)
|
124
|
-
match_key, indices = compound_matches.min_by { |_, v| v[0] }
|
125
|
-
|
126
|
-
# Calculate date based on the keyword
|
127
|
-
date_value = DATE_KEYWORDS[match_key]
|
128
|
-
date = calculate_date(date_value)
|
129
|
-
|
130
|
-
# Remove the date expression from the text
|
131
|
-
clean_text = text.dup
|
132
|
-
clean_text.slice!(indices[0]..indices[1])
|
133
|
-
clean_text = clean_text.strip
|
134
|
-
|
135
|
-
return Result.new(clean_text, date)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Split the text into words (for single-word date references)
|
139
|
-
words = text.split
|
140
|
-
|
141
|
-
# Find the first date keyword
|
142
|
-
date_word_index = nil
|
143
|
-
date_value = nil
|
144
|
-
|
145
|
-
words.each_with_index do |word, index|
|
146
|
-
clean_word = word.downcase.gsub(/[^a-z]/, '')
|
147
|
-
if DATE_KEYWORDS.key?(clean_word)
|
148
|
-
date_word_index = index
|
149
|
-
date_value = DATE_KEYWORDS[clean_word]
|
150
|
-
break
|
151
|
-
end
|
19
|
+
def to_s
|
20
|
+
# Include seconds in the format if they are not zero
|
21
|
+
if sec.zero?
|
22
|
+
format('%<hour>02d:%<min>02d', hour: hour, min: min)
|
23
|
+
else
|
24
|
+
format('%<hour>02d:%<min>02d:%<sec>02d', hour: hour, min: min, sec: sec)
|
152
25
|
end
|
153
|
-
|
154
|
-
if date_word_index.nil?
|
155
|
-
raise ParseError, "No valid date reference found in '#{text}'"
|
156
|
-
end
|
157
|
-
|
158
|
-
# Calculate the date based on the keyword
|
159
|
-
date = calculate_date(date_value)
|
160
|
-
|
161
|
-
# Create the clean text by removing the date keyword
|
162
|
-
clean_words = words.dup
|
163
|
-
clean_words.delete_at(date_word_index)
|
164
|
-
clean_text = clean_words.join(" ").strip
|
165
|
-
|
166
|
-
Result.new(clean_text, date)
|
167
26
|
end
|
27
|
+
end
|
168
28
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
def check_day_of_week_patterns(text)
|
173
|
-
# Check for "this [day]" pattern
|
174
|
-
if (match = text.match(THIS_DAY_PATTERN))
|
175
|
-
day_name = match[1].downcase
|
176
|
-
day_value = DAYS_OF_WEEK[day_name]
|
177
|
-
date = calculate_this_day(day_value)
|
178
|
-
clean_text = text.gsub(match[0], "").strip
|
179
|
-
return Result.new(clean_text, date)
|
180
|
-
end
|
181
|
-
|
182
|
-
# Check for "next [day]" pattern
|
183
|
-
if (match = text.match(NEXT_DAY_PATTERN))
|
184
|
-
day_name = match[1].downcase
|
185
|
-
day_value = DAYS_OF_WEEK[day_name]
|
186
|
-
date = calculate_next_day(day_value)
|
187
|
-
clean_text = text.gsub(match[0], "").strip
|
188
|
-
return Result.new(clean_text, date)
|
189
|
-
end
|
190
|
-
|
191
|
-
# Check for "last [day]" pattern
|
192
|
-
if (match = text.match(LAST_DAY_PATTERN))
|
193
|
-
day_name = match[1].downcase
|
194
|
-
day_value = DAYS_OF_WEEK[day_name]
|
195
|
-
date = calculate_last_day(day_value)
|
196
|
-
clean_text = text.gsub(match[0], "").strip
|
197
|
-
return Result.new(clean_text, date)
|
198
|
-
end
|
29
|
+
# Result represents a parsing result with clean text and parsed date
|
30
|
+
class Result
|
31
|
+
attr_reader :clean_text, :date, :time
|
199
32
|
|
200
|
-
|
33
|
+
def initialize(clean_text, date, time = nil)
|
34
|
+
@clean_text = clean_text
|
35
|
+
@date = date
|
36
|
+
@time = time
|
201
37
|
end
|
202
38
|
|
203
|
-
#
|
204
|
-
|
205
|
-
# Check for "in X days" pattern
|
206
|
-
if (match = text.match(IN_X_DAYS_PATTERN))
|
207
|
-
days = match[1].to_i
|
208
|
-
date = Date.today + days
|
209
|
-
clean_text = text.gsub(match[0], "").strip
|
210
|
-
return Result.new(clean_text, date)
|
211
|
-
end
|
212
|
-
|
213
|
-
# Check for "X days ago" pattern
|
214
|
-
if (match = text.match(X_DAYS_AGO_PATTERN))
|
215
|
-
days = match[1].to_i
|
216
|
-
date = Date.today - days
|
217
|
-
clean_text = text.gsub(match[0], "").strip
|
218
|
-
return Result.new(clean_text, date)
|
219
|
-
end
|
220
|
-
|
221
|
-
# Check for "in X weeks" pattern
|
222
|
-
if (match = text.match(IN_X_WEEKS_PATTERN))
|
223
|
-
weeks = match[1].to_i
|
224
|
-
date = Date.today + (weeks * 7)
|
225
|
-
clean_text = text.gsub(match[0], "").strip
|
226
|
-
return Result.new(clean_text, date)
|
227
|
-
end
|
228
|
-
|
229
|
-
# Check for "X weeks ago" pattern
|
230
|
-
if (match = text.match(X_WEEKS_AGO_PATTERN))
|
231
|
-
weeks = match[1].to_i
|
232
|
-
date = Date.today - (weeks * 7)
|
233
|
-
clean_text = text.gsub(match[0], "").strip
|
234
|
-
return Result.new(clean_text, date)
|
235
|
-
end
|
236
|
-
|
237
|
-
# Check for "in X months" pattern
|
238
|
-
if (match = text.match(IN_X_MONTHS_PATTERN))
|
239
|
-
months = match[1].to_i
|
240
|
-
date = Date.today >> months
|
241
|
-
clean_text = text.gsub(match[0], "").strip
|
242
|
-
return Result.new(clean_text, date)
|
243
|
-
end
|
39
|
+
# Alias for clean_text for backward compatibility
|
40
|
+
alias text clean_text
|
244
41
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
date = Date.today << months
|
249
|
-
clean_text = text.gsub(match[0], "").strip
|
250
|
-
return Result.new(clean_text, date)
|
251
|
-
end
|
42
|
+
# Returns a DateTime object if time is available, otherwise nil
|
43
|
+
def datetime
|
44
|
+
return nil unless @time && @date
|
252
45
|
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
date = Date.new(Date.today.year + years, Date.today.month, Date.today.day)
|
257
|
-
clean_text = text.gsub(match[0], "").strip
|
258
|
-
return Result.new(clean_text, date)
|
259
|
-
end
|
46
|
+
DateTime.new(@date.year, @date.month, @date.day, @time.hour, @time.min, @time.sec)
|
47
|
+
end
|
48
|
+
end
|
260
49
|
|
261
|
-
|
262
|
-
|
263
|
-
years = match[1].to_i
|
264
|
-
date = Date.new(Date.today.year - years, Date.today.month, Date.today.day)
|
265
|
-
clean_text = text.gsub(match[0], "").strip
|
266
|
-
return Result.new(clean_text, date)
|
267
|
-
end
|
50
|
+
# Error raised when parsing fails
|
51
|
+
class ParseError < StandardError; end
|
268
52
|
|
269
|
-
|
53
|
+
# Main parser class for the Hizuke library
|
54
|
+
class Parser
|
55
|
+
include Constants
|
56
|
+
include DateCalculator
|
57
|
+
include WeekCalculator
|
58
|
+
include MonthCalculator
|
59
|
+
include YearCalculator
|
60
|
+
include QuarterCalculator
|
61
|
+
include PeriodCalculator
|
62
|
+
include PatternMatcher
|
63
|
+
|
64
|
+
# Parse a date from text - class method
|
65
|
+
# @param text [String] the text to parse
|
66
|
+
# @return [Hizuke::Result] the parsing result with clean text and date
|
67
|
+
def self.parse(text)
|
68
|
+
new.parse(text)
|
270
69
|
end
|
271
70
|
|
272
|
-
#
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
# Calculate days until the target day in this week
|
278
|
-
days_diff = (target_wday - today_wday) % 7
|
279
|
-
|
280
|
-
# If it's the same day, return today's date
|
281
|
-
if days_diff == 0
|
282
|
-
return today
|
283
|
-
end
|
284
|
-
|
285
|
-
# Return the date of the next occurrence in this week
|
286
|
-
today + days_diff
|
71
|
+
# Parse a date with result details from text - class method
|
72
|
+
# @param text [String] the text to parse
|
73
|
+
# @return [Hizuke::Result] the parsing result with clean text and date
|
74
|
+
def self.parse_with_result(text)
|
75
|
+
new.parse_with_result(text)
|
287
76
|
end
|
288
77
|
|
289
|
-
#
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
days_until_target = (target_wday - today_wday) % 7
|
296
|
-
|
297
|
-
# If today is the target day or the target day is earlier in the week,
|
298
|
-
# we want the day next week, so add 7 days
|
299
|
-
if days_until_target == 0 || target_wday < today_wday
|
300
|
-
days_until_target += 7
|
301
|
-
end
|
302
|
-
|
303
|
-
today + days_until_target
|
304
|
-
end
|
78
|
+
# Parse a date from text
|
79
|
+
# @param text [String] the text to parse
|
80
|
+
# @return [Hizuke::Result] the parsing result with clean text and date
|
81
|
+
def parse(text)
|
82
|
+
raise ParseError, 'Cannot parse nil input' if text.nil?
|
83
|
+
raise ParseError, 'Cannot parse empty input' if text.empty?
|
305
84
|
|
306
|
-
|
307
|
-
def calculate_last_day(target_wday)
|
308
|
-
today = Date.today
|
309
|
-
today_wday = today.wday
|
310
|
-
|
311
|
-
# Calculate days since the last occurrence
|
312
|
-
days_since_target = (today_wday - target_wday) % 7
|
313
|
-
|
314
|
-
# If today is the target day or the target day is later in the week,
|
315
|
-
# we want the day last week, so add 7 days
|
316
|
-
if days_since_target == 0 || target_wday > today_wday
|
317
|
-
days_since_target += 7
|
318
|
-
end
|
319
|
-
|
320
|
-
today - days_since_target
|
85
|
+
parse_with_result(text)
|
321
86
|
end
|
322
87
|
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
next_month = Date.today >> 1
|
342
|
-
Date.new(next_month.year, next_month.month, 1)
|
343
|
-
elsif date_value == :last_month
|
344
|
-
# Return the first day of the previous month
|
345
|
-
prev_month = Date.today << 1
|
346
|
-
Date.new(prev_month.year, prev_month.month, 1)
|
347
|
-
elsif date_value == :next_year
|
348
|
-
# Return the first day of the next year
|
349
|
-
next_year = Date.today.year + 1
|
350
|
-
Date.new(next_year, 1, 1)
|
351
|
-
elsif date_value == :last_year
|
352
|
-
# Return the first day of the last year
|
353
|
-
last_year = Date.today.year - 1
|
354
|
-
Date.new(last_year, 1, 1)
|
355
|
-
elsif date_value == :next_quarter
|
356
|
-
# Return the first day of the next quarter
|
357
|
-
today = Date.today
|
358
|
-
current_month = today.month
|
359
|
-
|
360
|
-
# Determine the start month of the next quarter
|
361
|
-
next_quarter_month = case
|
362
|
-
when current_month <= 3
|
363
|
-
4 # Q2 starts in April
|
364
|
-
when current_month <= 6
|
365
|
-
7 # Q3 starts in July
|
366
|
-
when current_month <= 9
|
367
|
-
10 # Q4 starts in October
|
368
|
-
else
|
369
|
-
1 # Q1 of next year starts in January
|
370
|
-
end
|
371
|
-
|
372
|
-
# If the next quarter is in the next year, increment the year
|
373
|
-
next_quarter_year = today.year
|
374
|
-
next_quarter_year += 1 if current_month > 9
|
375
|
-
|
376
|
-
Date.new(next_quarter_year, next_quarter_month, 1)
|
377
|
-
elsif date_value == :last_quarter
|
378
|
-
# Return the first day of the last quarter
|
379
|
-
today = Date.today
|
380
|
-
current_month = today.month
|
381
|
-
|
382
|
-
# Determine the start month of the last quarter
|
383
|
-
last_quarter_month = case
|
384
|
-
when current_month <= 3
|
385
|
-
10 # Q4 of last year starts in October
|
386
|
-
when current_month <= 6
|
387
|
-
1 # Q1 starts in January
|
388
|
-
when current_month <= 9
|
389
|
-
4 # Q2 starts in April
|
390
|
-
else
|
391
|
-
7 # Q3 starts in July
|
392
|
-
end
|
393
|
-
|
394
|
-
# If the last quarter is in the previous year, decrement the year
|
395
|
-
last_quarter_year = today.year
|
396
|
-
last_quarter_year -= 1 if current_month <= 3
|
397
|
-
|
398
|
-
Date.new(last_quarter_year, last_quarter_month, 1)
|
399
|
-
elsif date_value == :this_weekend
|
400
|
-
# Calculate days until Saturday
|
401
|
-
days_until_saturday = (6 - Date.today.wday) % 7
|
402
|
-
# If today is Saturday or Sunday, we're already on the weekend
|
403
|
-
days_until_saturday = 0 if days_until_saturday == 0 || days_until_saturday == 6
|
404
|
-
Date.today + days_until_saturday
|
405
|
-
elsif date_value == :end_of_week
|
406
|
-
# Calculate days until Sunday (end of week)
|
407
|
-
days_until_sunday = (0 - Date.today.wday) % 7
|
408
|
-
# If today is Sunday, we're already at the end of the week
|
409
|
-
days_until_sunday = 0 if days_until_sunday == 0
|
410
|
-
Date.today + days_until_sunday
|
411
|
-
elsif date_value == :end_of_month
|
412
|
-
# Return the last day of the current month
|
413
|
-
# Get the first day of next month
|
414
|
-
next_month = Date.today >> 1
|
415
|
-
first_day_next_month = Date.new(next_month.year, next_month.month, 1)
|
416
|
-
# Subtract one day to get the last day of current month
|
417
|
-
first_day_next_month - 1
|
418
|
-
elsif date_value == :end_of_year
|
419
|
-
# Return the last day of the current year (December 31)
|
420
|
-
Date.new(Date.today.year, 12, 31)
|
421
|
-
elsif date_value == :mid_week
|
422
|
-
# Return Wednesday of the current week
|
423
|
-
# Calculate days until/since Wednesday (3)
|
424
|
-
today_wday = Date.today.wday
|
425
|
-
target_wday = 3 # Wednesday
|
426
|
-
days_diff = (target_wday - today_wday) % 7
|
427
|
-
# If the difference is more than 3, then Wednesday has passed this week
|
428
|
-
# So we need to go back to Wednesday
|
429
|
-
days_diff = days_diff - 7 if days_diff > 3
|
430
|
-
Date.today + days_diff
|
431
|
-
elsif date_value == :mid_month
|
432
|
-
# Return the 15th day of the current month
|
433
|
-
Date.new(Date.today.year, Date.today.month, 15)
|
88
|
+
# Parse a date with result details from text
|
89
|
+
# @param text [String] the text to parse
|
90
|
+
# @return [Hizuke::Result] the parsing result with clean text and date
|
91
|
+
def parse_with_result(text)
|
92
|
+
raise ParseError, 'Cannot parse nil input' if text.nil?
|
93
|
+
raise ParseError, 'Cannot parse empty input' if text.empty?
|
94
|
+
|
95
|
+
# First we'll try to find a date
|
96
|
+
result = try_parsing_strategies(text)
|
97
|
+
|
98
|
+
if result
|
99
|
+
# If we found a date, extract time references
|
100
|
+
time, clean_text = extract_time_references(result.clean_text)
|
101
|
+
Result.new(clean_text, result.date, time)
|
102
|
+
else
|
103
|
+
# If we didn't find a date, extract only time references
|
104
|
+
time, clean_text = extract_time_references(text)
|
105
|
+
Result.new(clean_text, nil, time)
|
434
106
|
end
|
435
107
|
end
|
436
108
|
end
|
437
|
-
end
|
109
|
+
end
|