date_parser 0.1.32 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0ddb095af53cceacb9a7b225fe35e7e0394eab5a
4
- data.tar.gz: 3dad721f0e8209cd16bb2428b5c7f69548b6c843
3
+ metadata.gz: 592cec3a9d927234909d05075ffc420aad8215e9
4
+ data.tar.gz: ca673826410f557feedea6ac8d44ed5518cf76c0
5
5
  SHA512:
6
- metadata.gz: 04001838b9215d7ddf43d24d53f2b07eb39cc472a5a54d9959144056dd93cd063e4d7adcde1e0720bdbd5aa09159852fb4af804295fb7725e867fad8a20b8b60
7
- data.tar.gz: 08996a4436fa3371c948b280d061107fd8d705dd4f43d1e90fa9e65c462d8ee158c53f088d52e3574e07e19ae0c5146a6c0bc653b63568f5ac9e59451e837431
6
+ metadata.gz: 01f07e1867c72a4c81afa994f4c43391182410b4a61c24f4d2c1a4884d18764d10495d1dccd48af3abd84092ca779cbe355d4abe70d48c14023c0fc011dad048
7
+ data.tar.gz: 1174abd562af7bc04eb9f1b4041fa9b1096841ff07cf7b0de63a65943c59cde210f8bb18b4236c42b3137627137fba6a45da9757f1d420d0cc879125d0853bb3
data/NEWS.md CHANGED
@@ -1,3 +1,9 @@
1
+ # DateParser 0.1.4
2
+ * Improved namespacing.
3
+ + NaturalDateParsing and Utils now part of the DateParser namespace.
4
+ * Removed an unused utils file.
5
+ * Some documentation expansion.
6
+
1
7
  # DateParser 0.1.3
2
8
  * New internal checks to avoid ambiguous behavior.
3
9
  + Notably: creation_date is enforced to be a descendent of the Date class.
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Gem Version](https://badge.fury.io/rb/date_parser.svg)](https://badge.fury.io/rb/date_parser)
2
+
1
3
  # DateParser
2
4
 
3
5
  DateParser is a simple, fast, effective way of parsing dates from natural language
@@ -1,476 +1,484 @@
1
- require_relative 'date_utils'
2
1
  require_relative "utils"
3
2
 
4
- # Handles the mechanics of natural language processing.
5
- #
6
- # == Methods
7
- #
8
- # <b>interpret_date(txt, creation_date, parse_single_years)</b>:
9
- # Return an array of dates from the set of parameters.
10
- #
11
- # We parse in order of decreasing
12
- # strictness. I.e., a very specific phrase like "January 1st, 2013" will be parsed
13
- # before "January 1st," which will be parsed before just "2013". Whenever we
14
- # determine a phrase is part of a date, we remove the phrase after parsing.
15
- # So in the example "January 1st, 2013" we'll return only one date.
16
- #
17
- # If no dates are found, returns an empty array.
18
- #
19
- # <b>parse_one_word(word, creation_date, parse_single_years)</b>: Given a single word,
20
- # a string, tries to return a Date object.
21
- #
22
-
23
- module NaturalDateParsing
24
-
25
- ###############################################
26
- ##
27
- ## Constants
28
- ##
29
-
30
- # Names of days as well as common shortened versions.
31
- SINGLE_DAYS = [
32
- 'mon', 'tue', 'wed', 'thur', 'fri', 'sat', 'sun',
33
- 'monday', 'tuesday', 'wednesday', 'thursday', 'friday',
34
- 'saturday', 'sunday', 'tues'
35
- ]
36
-
37
- # Phrases that denote a date relative to today (here often
38
- # called the creation_date)
39
- RELATIVE_DAYS = ['today', 'tomorrow', 'tonight', 'yesterday']
40
-
41
- # Names of months as well as common shortened versions
42
- MONTH = [
43
- 'jan', 'feb', 'mar', 'may', 'june', 'july', 'aug', 'sept', 'oct',
44
- 'nov', 'dec',
45
- 'january', 'february', 'march', 'april', 'august', 'september',
46
- 'october', 'november', 'december'
47
- ]
48
-
49
- # A list of numbers from [1, 31]
50
- NUMERIC_DAY = ('1'..'31').to_a
51
-
52
- # Numbers from [1, 31] as well as the common suffixes (such as 1st, 2nd, e.t.c.)
53
- SUFFIXED_NUMERIC_DAY = [
54
- '1st', '2nd', '3rd', '4th', '5th', '6th', '7th',
55
- '8th', '9th', '10th', '11th', '12th', '13th', '14th',
56
- '15th', '16th', '17th', '18th', '19th', '20th',
57
- '21st', '22nd', '23rd','24th', '25th',
58
- '26th', '27th', '28th', '29th', '30th', '31st'
59
- ]
3
+ module DateParser
60
4
 
61
- ###############################################
62
- ##
63
- ## Main Parsing/Processing Function
64
- ##
65
-
66
- # Processes a given text and returns an array of probable dates contained within.
67
- #
68
- # ==== Description
69
- #
70
- # Tries to interpret dates from the given text, in order from strictest
71
- # interpretation to looser interpretations. No word can be part of two
72
- # different dates.
73
- #
74
- # Works by calling parse_three_words, parse_two_words, and parse_one_word
75
- # on the text.
76
- #
77
- # ==== Attributes
78
- #
79
- # * +txt+ - The text to parse.
5
+ # Handles the mechanics of natural language processing.
80
6
  #
81
- # * +creation_date+ - A Date object of when the text was created or released.
82
- # Defaults to nil, but if provided can make returned dates more accurate.
7
+ # == Methods
83
8
  #
84
- # * +parse_single_years+ - A boolean. If true, we interpret single numbers as
85
- # years. This is a very broad assumption, and so defaults to false.
9
+ # <b>interpret_date(txt, creation_date, parse_single_years)</b>:
10
+ # Return an array of dates from the set of parameters.
86
11
  #
87
- # * +parse_ambiguous_dates+ - Some phrases are not necessarily dates depending
88
- # on context. For example "1st" may not refer to
89
- # the 1st of a month. This option toggles whether or not those
90
- # phrases are considered dates. Defaults to true.
12
+ # We parse in order of decreasing
13
+ # strictness. I.e., a very specific phrase like "January 1st, 2013" will be parsed
14
+ # before "January 1st," which will be parsed before just "2013". Whenever we
15
+ # determine a phrase is part of a date, we remove the phrase after parsing.
16
+ # So in the example "January 1st, 2013" we'll return only one date.
91
17
  #
92
- # ==== Examples
18
+ # If no dates are found, returns an empty array.
93
19
  #
94
- # text = "Henry and Hanke created a calendar that causes each day to fall " +
95
- # "on the same day of the week every year. They recommend its " +
96
- # "implementation on January 1, 2018, a Monday."
97
- # creation_date = Date.parse("July 6, 2016")
20
+ # <b>parse_one_word(word, creation_date, parse_single_years)</b>: Given a single word,
21
+ # a string, tries to return a Date object.
98
22
  #
99
- # NaturalDateParsing.interpret_date(text, creation_date)
100
- # #=> [#<Date: 2018-01-01 ((2458120j,0s,0n),+0s,2299161j)>,
101
- # #<Date: 2016-07-11 ((2457581j,0s,0n),+0s,2299161j)>]
23
+ # <b>parse_two_words(words, creation_date = nil)</b>: Attempts to return a Date object
24
+ # given a string containing two words.
102
25
  #
103
- # NaturalDateParsing.interpret_date("No dates here!")
104
- # #=> []
26
+ # <b>parse_three_words(words, creation_date = nil)</b>: Given three words,
27
+ # attempts to return a Date object.
105
28
  #
106
- # NaturalDateParsing.interpret_date("2012", nil, true)
107
- # #=> [#<Date: 2012-01-01 ((2455928j,0s,0n),+0s,2299161j)>]
108
- #
109
- def NaturalDateParsing.interpret_date(
110
- txt,
111
- creation_date = nil,
112
- parse_single_years = false,
113
- parse_ambiguous_dates = true
114
- )
115
- possible_dates = []
116
- txt = Utils::clean_str(txt)
117
- words = txt.split(" ").map{|x| x.strip}
29
+
30
+ module NaturalDateParsing
31
+
32
+ ###############################################
33
+ ##
34
+ ## Constants
35
+ ##
36
+
37
+ # Names of days as well as common shortened versions.
38
+ SINGLE_DAYS = [
39
+ 'mon', 'tue', 'wed', 'thur', 'fri', 'sat', 'sun',
40
+ 'monday', 'tuesday', 'wednesday', 'thursday', 'friday',
41
+ 'saturday', 'sunday', 'tues'
42
+ ]
43
+
44
+ # Phrases that denote a date relative to today (here often
45
+ # called the creation_date)
46
+ RELATIVE_DAYS = ['today', 'tomorrow', 'tonight', 'yesterday']
47
+
48
+ # Names of months as well as common shortened versions
49
+ MONTH = [
50
+ 'jan', 'feb', 'mar', 'may', 'june', 'july', 'aug', 'sept', 'oct',
51
+ 'nov', 'dec',
52
+ 'january', 'february', 'march', 'april', 'august', 'september',
53
+ 'october', 'november', 'december'
54
+ ]
55
+
56
+ # A list of numbers from [1, 31]
57
+ NUMERIC_DAY = ('1'..'31').to_a
58
+
59
+ # Numbers from [1, 31] as well as the common suffixes (such as 1st, 2nd, e.t.c.)
60
+ SUFFIXED_NUMERIC_DAY = [
61
+ '1st', '2nd', '3rd', '4th', '5th', '6th', '7th',
62
+ '8th', '9th', '10th', '11th', '12th', '13th', '14th',
63
+ '15th', '16th', '17th', '18th', '19th', '20th',
64
+ '21st', '22nd', '23rd','24th', '25th',
65
+ '26th', '27th', '28th', '29th', '30th', '31st'
66
+ ]
118
67
 
119
- # We use the while loop, as apparently there are cases where we try to subset
120
- # words despite the value of i being >= words.length - 3
121
- # TODO: Figure out why the above happens. Preferably return to for loop.
122
- # TODO: Cleaner way of structuring the below? I could break up the loops
123
- # into single functions. Consider.
124
- i = 0
68
+ ###############################################
69
+ ##
70
+ ## Main Parsing/Processing Function
71
+ ##
125
72
 
126
- while (i <= words.length - 3) do
127
- subset_words = words[i..(i+2)]
73
+ # Processes a given text and returns an array of probable dates contained within.
74
+ #
75
+ # ==== Description
76
+ #
77
+ # Tries to interpret dates from the given text, in order from strictest
78
+ # interpretation to looser interpretations. No word can be part of two
79
+ # different dates.
80
+ #
81
+ # Works by calling parse_three_words, parse_two_words, and parse_one_word
82
+ # on the text.
83
+ #
84
+ # ==== Attributes
85
+ #
86
+ # * +txt+ - The text to parse.
87
+ #
88
+ # * +creation_date+ - A Date object of when the text was created or released.
89
+ # Defaults to nil, but if provided can make returned dates more accurate.
90
+ #
91
+ # * +parse_single_years+ - A boolean. If true, we interpret single numbers as
92
+ # years. This is a very broad assumption, and so defaults to false.
93
+ #
94
+ # * +parse_ambiguous_dates+ - Some phrases are not necessarily dates depending
95
+ # on context. For example "1st" may not refer to
96
+ # the 1st of a month. This option toggles whether or not those
97
+ # phrases are considered dates. Defaults to true.
98
+ #
99
+ # ==== Examples
100
+ #
101
+ # text = "Henry and Hanke created a calendar that causes each day to fall " +
102
+ # "on the same day of the week every year. They recommend its " +
103
+ # "implementation on January 1, 2018, a Monday."
104
+ # creation_date = Date.parse("July 6, 2016")
105
+ #
106
+ # NaturalDateParsing.interpret_date(text, creation_date)
107
+ # #=> [#<Date: 2018-01-01 ((2458120j,0s,0n),+0s,2299161j)>,
108
+ # #<Date: 2016-07-11 ((2457581j,0s,0n),+0s,2299161j)>]
109
+ #
110
+ # NaturalDateParsing.interpret_date("No dates here!")
111
+ # #=> []
112
+ #
113
+ # NaturalDateParsing.interpret_date("2012", nil, true)
114
+ # #=> [#<Date: 2012-01-01 ((2455928j,0s,0n),+0s,2299161j)>]
115
+ #
116
+ def NaturalDateParsing.interpret_date(
117
+ txt,
118
+ creation_date = nil,
119
+ parse_single_years = false,
120
+ parse_ambiguous_dates = true
121
+ )
122
+ possible_dates = []
123
+ txt = Utils::clean_str(txt)
124
+ words = txt.split(" ").map{|x| x.strip}
128
125
 
129
- proposed_date = parse_three_words(subset_words, creation_date)
126
+ # We use the while loop, as apparently there are cases where we try to subset
127
+ # words despite the value of i being >= words.length - 3
128
+ # TODO: Figure out why the above happens. Preferably return to for loop.
129
+ # TODO: Cleaner way of structuring the below? I could break up the loops
130
+ # into single functions. Consider.
131
+ i = 0
130
132
 
131
- if(! proposed_date.nil?)
132
- possible_dates << proposed_date
133
- words = Utils::delete_at_indices(words, i..(i+2))
134
- i -= 1
133
+ while (i <= words.length - 3) do
134
+ subset_words = words[i..(i+2)]
135
+
136
+ proposed_date = parse_three_words(subset_words, creation_date)
137
+
138
+ if(! proposed_date.nil?)
139
+ possible_dates << proposed_date
140
+ words = Utils::delete_at_indices(words, i..(i+2))
141
+ i -= 1
142
+ end
143
+
144
+ i += 1
135
145
  end
136
146
 
137
- i += 1
138
- end
139
-
140
- i = 0
141
-
142
- while (i <= words.length - 2) do
143
- subset_words = words[i..(i+1)]
144
- proposed_date = parse_two_words(subset_words, creation_date)
147
+ i = 0
145
148
 
146
- if(! proposed_date.nil?)
147
- possible_dates << proposed_date
148
- words = Utils::delete_at_indices(words, i..(i+1))
149
- i -= 1
149
+ while (i <= words.length - 2) do
150
+ subset_words = words[i..(i+1)]
151
+ proposed_date = parse_two_words(subset_words, creation_date)
152
+
153
+ if(! proposed_date.nil?)
154
+ possible_dates << proposed_date
155
+ words = Utils::delete_at_indices(words, i..(i+1))
156
+ i -= 1
157
+ end
158
+
159
+ i += 1
150
160
  end
151
161
 
152
- i += 1
153
- end
154
-
155
- i = 0
156
-
157
- while (i <= words.length - 1) do
158
- subset_words = words[i]
162
+ i = 0
159
163
 
160
- proposed_date = parse_one_word(subset_words,
161
- creation_date,
162
- parse_single_years,
163
- parse_ambiguous_dates)
164
-
165
- if(! proposed_date.nil?)
166
- possible_dates << proposed_date
167
- words.delete_at(i)
168
- i -= 1
164
+ while (i <= words.length - 1) do
165
+ subset_words = words[i]
166
+
167
+ proposed_date = parse_one_word(subset_words,
168
+ creation_date,
169
+ parse_single_years,
170
+ parse_ambiguous_dates)
171
+
172
+ if(! proposed_date.nil?)
173
+ possible_dates << proposed_date
174
+ words.delete_at(i)
175
+ i -= 1
176
+ end
177
+
178
+ i += 1
169
179
  end
170
180
 
171
- i += 1
181
+ return possible_dates
172
182
  end
173
183
 
174
- return possible_dates
175
- end
176
-
177
-
178
- ###############################################
179
- ##
180
- ## Parse Cases (1 word, 2 words, 3 words)
181
- ##
182
-
183
- # Takes a single word and tries to return a date.
184
- #
185
- # If no date can be interpreted from the word, returns nil. We consider these
186
- # cases:
187
- # * DAY (mon, tuesday, e.t.c.)
188
- # * A relative day (today, tomorrow, tonight, yesterday)
189
- # * Dates of the form MM/DD
190
- # * Numbers such as [1st, 31st]
191
- # * MONTH (jan, february, e.t.c.)
192
- # * YYYY (2012, 102. Must be enabled.)
193
- # * YYYY-MM-DD, DD-MM-YYYY, MM-DD-YYYY
194
- #
195
- # ==== Attributes
196
- #
197
- # * +word+ - A String, preferably consisting of a single word.
198
- #
199
- # * +creation_date+ - A Date object of when the text was created or released.
200
- # Defaults to nil, but if provided can make returned dates more accurate.
201
- #
202
- # * +parse_single_years+ - A boolean. If true, we interpret single numbers as
203
- # years. This is a very broad assumption, and so defaults to false.
204
- #
205
- # * +parse_ambiguous_dates+ - Some phrases are not necessarily dates depending
206
- # on context. For example "1st" may not refer to
207
- # the 1st of a month. This option toggles whether or not those
208
- # phrases are considered dates. Defaults to true.
209
- #
210
- def NaturalDateParsing.parse_one_word(
211
- word,
212
- creation_date = nil,
213
- parse_single_years = false,
214
- parse_ambiguous_dates = true
215
- )
216
184
 
217
- if SINGLE_DAYS.include? word
218
- proposed_date = Date.parse(word)
185
+ ###############################################
186
+ ##
187
+ ## Parse Cases (1 word, 2 words, 3 words)
188
+ ##
189
+
190
+ # Takes a single word and tries to return a date.
191
+ #
192
+ # If no date can be interpreted from the word, returns nil. We consider these
193
+ # cases:
194
+ # * DAY (mon, tuesday, e.t.c.)
195
+ # * A relative day (today, tomorrow, tonight, yesterday)
196
+ # * Dates of the form MM/DD
197
+ # * Numbers such as [1st, 31st]
198
+ # * MONTH (jan, february, e.t.c.)
199
+ # * YYYY (2012, 102. Must be enabled.)
200
+ # * YYYY-MM-DD, DD-MM-YYYY, MM-DD-YYYY
201
+ #
202
+ # ==== Attributes
203
+ #
204
+ # * +word+ - A String, preferably consisting of a single word.
205
+ #
206
+ # * +creation_date+ - A Date object of when the text was created or released.
207
+ # Defaults to nil, but if provided can make returned dates more accurate.
208
+ #
209
+ # * +parse_single_years+ - A boolean. If true, we interpret single numbers as
210
+ # years. This is a very broad assumption, and so defaults to false.
211
+ #
212
+ # * +parse_ambiguous_dates+ - Some phrases are not necessarily dates depending
213
+ # on context. For example "1st" may not refer to
214
+ # the 1st of a month. This option toggles whether or not those
215
+ # phrases are considered dates. Defaults to true.
216
+ #
217
+ def NaturalDateParsing.parse_one_word(
218
+ word,
219
+ creation_date = nil,
220
+ parse_single_years = false,
221
+ parse_ambiguous_dates = true
222
+ )
219
223
 
220
- # If we have the creation_date date, we can try to be a little smarter
221
- if(! creation_date.nil?)
222
- weeks_to_shift = difference_in_weeks(Date.today, creation_date)
223
-
224
- proposed_date = proposed_date - (weeks_to_shift * 7)
224
+ if SINGLE_DAYS.include? word
225
+ proposed_date = Date.parse(word)
225
226
 
226
- # Right now though, it should be within 1 week of accuracy, and either one
227
- # week ahead or one week behind.
228
- # The solution is pretty simple. If the proposed date
229
- # is more than a week ahead of the creation date, then go back one week.
230
- if proposed_date - creation_date > 7
231
- proposed_date = proposed_date - 7
232
- elsif proposed_date - creation_date < 0
233
- proposed_date = proposed_date + 7
227
+ # If we have the creation_date date, we can try to be a little smarter
228
+ if(! creation_date.nil?)
229
+ weeks_to_shift = difference_in_weeks(Date.today, creation_date)
230
+
231
+ proposed_date = proposed_date - (weeks_to_shift * 7)
232
+
233
+ # Right now though, it should be within 1 week of accuracy, and either one
234
+ # week ahead or one week behind.
235
+ # The solution is pretty simple. If the proposed date
236
+ # is more than a week ahead of the creation date, then go back one week.
237
+ if proposed_date - creation_date > 7
238
+ proposed_date = proposed_date - 7
239
+ elsif proposed_date - creation_date < 0
240
+ proposed_date = proposed_date + 7
241
+ end
234
242
  end
243
+
244
+ return proposed_date
235
245
  end
236
246
 
237
- return proposed_date
238
- end
239
-
240
- # Parsing phrases like "yesterday", "today", "tonight"
241
- if RELATIVE_DAYS.include? word
242
- if word == 'today' || word == 'tonight'
243
- if creation_date.nil?
244
- return Date.today
245
- else
246
- return creation_date
247
+ # Parsing phrases like "yesterday", "today", "tonight"
248
+ if RELATIVE_DAYS.include? word
249
+ if word == 'today' || word == 'tonight'
250
+ if creation_date.nil?
251
+ return Date.today
252
+ else
253
+ return creation_date
254
+ end
255
+ elsif word == 'yesterday'
256
+ if creation_date.nil?
257
+ return Date.today - 1
258
+ else
259
+ return creation_date - 1
260
+ end
261
+ elsif word == "tomorrow"
262
+ return creation_date + 1
247
263
  end
248
- elsif word == 'yesterday'
249
- if creation_date.nil?
250
- return Date.today - 1
251
- else
252
- return creation_date - 1
253
- end
254
- elsif word == "tomorrow"
255
- return creation_date + 1
256
264
  end
265
+
266
+ # Parsing strings of the form XX/XX
267
+ if word.include? '/'
268
+ return slash_date(word, creation_date)
269
+ end
270
+
271
+ # Parsing strings like "23rd"
272
+ if (SUFFIXED_NUMERIC_DAY.include? word) && parse_ambiguous_dates
273
+ return numeric_single_day(word, creation_date)
274
+ end
275
+
276
+ # Parsing month names
277
+ if MONTH.include? word
278
+ return default_month(word, creation_date)
279
+ end
280
+
281
+ # In this case, we assume it's a year!
282
+ if parse_single_years && (Utils::is_int? word)
283
+ return default_year(word)
284
+ end
285
+
286
+ # Parsing XX-XX-XXXX or XXXX-XX-XX
287
+ if full_numeric_date?(word)
288
+ return full_numeric_date(word)
289
+ end
290
+
257
291
  end
258
292
 
259
- # Parsing strings of the form XX/XX
260
- if word.include? '/'
261
- return slash_date(word, creation_date)
262
- end
263
293
 
264
- # Parsing strings like "23rd"
265
- if (SUFFIXED_NUMERIC_DAY.include? word) && parse_ambiguous_dates
266
- return numeric_single_day(word, creation_date)
294
+ # Takes two words and tries to return a date.
295
+ #
296
+ # If no date can be interpreted from the word, returns nil. In this case,
297
+ # we look for dates of this form:
298
+ # * MONTH DAY
299
+ #
300
+ # ==== Attributes
301
+ #
302
+ # * +words+ - An array of two words, downcased and stripped.
303
+ #
304
+ # * +creation_date+ - A Date object of when the text was created or released.
305
+ # Defaults to nil, but if provided can make returned dates more accurate.
306
+ #
307
+ def NaturalDateParsing.parse_two_words(words, creation_date = nil)
308
+
309
+ if MONTH.include?(words[0]) && _weak_day?(words[1])
310
+ return month_day(words, creation_date)
311
+ end
312
+
267
313
  end
268
314
 
269
- # Parsing month names
270
- if MONTH.include? word
271
- return default_month(word, creation_date)
272
- end
273
315
 
274
- # In this case, we assume it's a year!
275
- if parse_single_years && (Utils::is_int? word)
276
- return default_year(word)
316
+ # Takes three words and tries to return a date.
317
+ #
318
+ # If no date can be interpreted from the word, returns nil. In this case,
319
+ # assumes the word can take these forms:
320
+ # * MONTH DAY YEAR
321
+ #
322
+ # ==== Attributes
323
+ #
324
+ # * +words+ - An array of three words, downcased and stripped.
325
+ #
326
+ # * +creation_date+ - A Date object of when the text was created or released.
327
+ # Defaults to nil, but if provided can make returned dates more accurate.
328
+ #
329
+ def NaturalDateParsing.parse_three_words(words, creation_date = nil)
330
+
331
+ if MONTH.include?(words[0]) && _weak_day?(words[1]) && Utils::is_int?(words[2])
332
+ return Date.parse(words.join(" "))
333
+ end
334
+
277
335
  end
278
336
 
279
- # Parsing XX-XX-XXXX or XXXX-XX-XX
280
- if full_numeric_date?(word)
281
- return full_numeric_date(word)
282
- end
337
+ ###############################################
338
+ ##
339
+ ## Parse Functions
340
+ ##
283
341
 
284
- end
285
-
286
-
287
- # Takes two words and tries to return a date.
288
- #
289
- # If no date can be interpreted from the word, returns nil. In this case,
290
- # we look for dates of this form:
291
- # * MONTH DAY
292
- #
293
- # ==== Attributes
294
- #
295
- # * +words+ - An array of two words, downcased and stripped.
296
- #
297
- # * +creation_date+ - A Date object of when the text was created or released.
298
- # Defaults to nil, but if provided can make returned dates more accurate.
299
- #
300
- def NaturalDateParsing.parse_two_words(words, creation_date = nil)
301
-
302
- if MONTH.include?(words[0]) && _weak_day?(words[1])
303
- return month_day(words, creation_date)
342
+ # Given a single word, assumes the word is of the form XX/XX and returns
343
+ # the appropriate Date object. If not possible, returns nil.
344
+ def NaturalDateParsing.slash_date(word, creation_date = nil)
345
+ samp = word.split('/')
346
+ month = samp[0].to_i
347
+ day = samp[1].to_i
348
+
349
+ if month > 0 && month <= 12 && day > 0 && day <= 31
350
+ # TODO: IMPROVE EXCEPTION HANDLING.
351
+ begin
352
+ proposed_date = Date.parse(word)
353
+ if(! creation_date.nil?) ## We're sensitive to only years here.
354
+ years_diff = Date.today.year - creation_date.year
355
+ proposed_date = proposed_date << (12 * years_diff)
356
+ end
357
+ return proposed_date
358
+ rescue ArgumentError
359
+ return nil
360
+ end
361
+ end
304
362
  end
305
363
 
306
- end
307
-
308
-
309
- # Takes three words and tries to return a date.
310
- #
311
- # If no date can be interpreted from the word, returns nil. In this case,
312
- # assumes the word can take these forms:
313
- # * MONTH DAY YEAR
314
- #
315
- # ==== Attributes
316
- #
317
- # * +words+ - An array of three words, downcased and stripped.
318
- #
319
- # * +creation_date+ - A Date object of when the text was created or released.
320
- # Defaults to nil, but if provided can make returned dates more accurate.
321
- #
322
- def NaturalDateParsing.parse_three_words(words, creation_date = nil)
323
-
324
- if MONTH.include?(words[0]) && _weak_day?(words[1]) && Utils::is_int?(words[2])
325
- return Date.parse(words.join(" "))
364
+ # Parses an array containing two elements (single words) on the assumption
365
+ # that the array is of the form ["MONTH", "DAY"]
366
+ def NaturalDateParsing.month_day(words, creation_date = nil)
367
+ begin
368
+ proposed_date = Date.parse(words.join(" "))
369
+
370
+ diff_in_years = creation_date.nil? ? 0 : (creation_date.year - Date.today.year)
371
+
372
+ return proposed_date >> diff_in_years * 12
373
+ rescue ArgumentError
374
+ return nil
375
+ end
326
376
  end
327
377
 
328
- end
329
-
330
- ###############################################
331
- ##
332
- ## Parse Functions
333
- ##
334
-
335
- # Given a single word, assumes the word is of the form XX/XX and returns
336
- # the appropriate Date object. If not possible, returns nil.
337
- def NaturalDateParsing.slash_date(word, creation_date = nil)
338
- samp = word.split('/')
339
- month = samp[0].to_i
340
- day = samp[1].to_i
341
-
342
- if month > 0 && month <= 12 && day > 0 && day <= 31
343
- # TODO: IMPROVE EXCEPTION HANDLING.
378
+ # Parses a single numeric date (1st, 2nd, 3rd, e.t.c.).
379
+ def NaturalDateParsing.numeric_single_day(word, creation_date = nil)
380
+ diff_in_months = creation_date.nil? ? 0 : (creation_date.year * 12 + creation_date.month) -
381
+ (Date.today.year * 12 + Date.today.month)
382
+
344
383
  begin
345
- proposed_date = Date.parse(word)
346
- if(! creation_date.nil?) ## We're sensitive to only years here.
347
- years_diff = Date.today.year - creation_date.year
348
- proposed_date = proposed_date << (12 * years_diff)
349
- end
350
- return proposed_date
384
+ return Date.parse(word) >> diff_in_months
351
385
  rescue ArgumentError
386
+ ## If an ArgumentError arises, Date is not convinced it's a date.
352
387
  return nil
353
388
  end
354
389
  end
355
- end
356
-
357
- # Parses an array containing two elements (single words) on the assumption
358
- # that the array is of the form ["MONTH", "DAY"]
359
- def NaturalDateParsing.month_day(words, creation_date = nil)
360
- begin
361
- proposed_date = Date.parse(words.join(" "))
390
+
391
+ # Parses a single word of the form XXXX-XX-XX, DD-MM-YYYY or MM-DD-YYYY
392
+ def NaturalDateParsing.full_numeric_date(word)
393
+ subparts = word.split("-")
394
+
395
+ # This is a weak check to see where the year is
396
+ year_index = (subparts[0].to_i).abs > 31 ? 0 : 2
362
397
 
363
- diff_in_years = creation_date.nil? ? 0 : (creation_date.year - Date.today.year)
398
+ # Then we assume it's of the form YYYY-MM-DD
399
+ if year_index == 0
400
+ return Date.parse(word)
401
+ else
402
+ # We check the subparts to try to see which part is DD.
403
+ # If we can't determine it, we assume it's International Standard Format,
404
+ # or DD-MM-YY
405
+
406
+ if subparts[1].to_i > 12
407
+ # American Standard (MM-DD-YYYY)
408
+ subparts[0] = numeric_month_to_string(subparts[0].to_i)
409
+ return Date.parse(subparts.join(" "))
410
+
411
+ else
412
+ # International Standard (DD-MM-YYYY)
413
+ return Date.parse(word)
414
+ end
415
+ end
364
416
 
365
- return proposed_date >> diff_in_years * 12
366
- rescue ArgumentError
367
- return nil
417
+ return Date.parse(word)
368
418
  end
369
- end
370
-
371
- # Parses a single numeric date (1st, 2nd, 3rd, e.t.c.).
372
- def NaturalDateParsing.numeric_single_day(word, creation_date = nil)
373
- diff_in_months = creation_date.nil? ? 0 : (creation_date.year * 12 + creation_date.month) -
374
- (Date.today.year * 12 + Date.today.month)
375
419
 
376
- begin
377
- return Date.parse(word) >> diff_in_months
378
- rescue ArgumentError
379
- ## If an ArgumentError arises, Date is not convinced it's a date.
380
- return nil
420
+
421
+ private
422
+
423
+ ##############################################
424
+ ##
425
+ ## Private Functions
426
+ ##
427
+
428
+ def NaturalDateParsing._weak_day?(word)
429
+ return (NUMERIC_DAY.include? word) || (SUFFIXED_NUMERIC_DAY.include? word)
381
430
  end
382
- end
383
-
384
- # Parses a single word of the form XXXX-XX-XX, DD-MM-YYYY or MM-DD-YYYY
385
- def NaturalDateParsing.full_numeric_date(word)
386
- subparts = word.split("-")
387
431
 
388
- # This is a weak check to see where the year is
389
- year_index = (subparts[0].to_i).abs > 31 ? 0 : 2
432
+ def NaturalDateParsing.default_year(year)
433
+ return Date.parse("Jan 1 " + year)
434
+ end
390
435
 
391
- # Then we assume it's of the form YYYY-MM-DD
392
- if year_index == 0
393
- return Date.parse(word)
394
- else
395
- # We check the subparts to try to see which part is DD.
396
- # If we can't determine it, we assume it's International Standard Format,
397
- # or DD-MM-YY
436
+ ## TODO. NOT SENSITIVE TO YEAR.
437
+ def NaturalDateParsing.default_month(month, released = nil)
438
+ this_year = released.nil? ? Date.today.year : released.year
439
+ return Date.parse(month + " " + this_year.to_s)
440
+ end
441
+
442
+ def NaturalDateParsing.suffix(number)
443
+ int = number.to_i
398
444
 
399
- if subparts[1].to_i > 12
400
- # American Standard (MM-DD-YYYY)
401
- subparts[0] = numeric_month_to_string(subparts[0].to_i)
402
- return Date.parse(subparts.join(" "))
403
-
445
+ ## Check to see if the least significant digit is 1.
446
+ if int & 1 == 1
447
+ return int.to_s + "st"
404
448
  else
405
- # International Standard (DD-MM-YYYY)
406
- return Date.parse(word)
449
+ return int.to_s + "th"
407
450
  end
408
451
  end
409
452
 
410
- return Date.parse(word)
411
- end
412
-
413
-
414
- private
415
-
416
- ##############################################
417
- ##
418
- ## Private Functions
419
- ##
420
-
421
- def NaturalDateParsing._weak_day?(word)
422
- return (NUMERIC_DAY.include? word) || (SUFFIXED_NUMERIC_DAY.include? word)
423
- end
424
-
425
- def NaturalDateParsing.default_year(year)
426
- return Date.parse("Jan 1 " + year)
427
- end
428
-
429
- ## TODO. NOT SENSITIVE TO YEAR.
430
- def NaturalDateParsing.default_month(month, released = nil)
431
- this_year = released.nil? ? Date.today.year : released.year
432
- return Date.parse(month + " " + this_year.to_s)
433
- end
434
-
435
- def NaturalDateParsing.suffix(number)
436
- int = number.to_i
437
-
438
- ## Check to see if the least significant digit is 1.
439
- if int & 1 == 1
440
- return int.to_s + "st"
441
- else
442
- return int.to_s + "th"
453
+ ## Be careful with this.
454
+ ## date1 is the later date.
455
+ def NaturalDateParsing.difference_in_weeks(date1, date2)
456
+ return ((date1 - date2) / 7).to_i
443
457
  end
444
- end
445
-
446
- ## Be careful with this.
447
- ## date1 is the later date.
448
- def NaturalDateParsing.difference_in_weeks(date1, date2)
449
- return ((date1 - date2) / 7).to_i
450
- end
451
-
452
- # Is it of the form XXXX-XX-XX?
453
- def NaturalDateParsing.full_numeric_date?(word)
454
- output = true
455
458
 
456
- if word.include? "-"
457
- substrings = word.split("-")
458
- for substring in substrings do
459
- output = output && Utils.is_int?(substring)
459
+ # Is it of the form XXXX-XX-XX?
460
+ def NaturalDateParsing.full_numeric_date?(word)
461
+ output = true
462
+
463
+ if word.include? "-"
464
+ substrings = word.split("-")
465
+ for substring in substrings do
466
+ output = output && Utils.is_int?(substring)
467
+ end
468
+ else
469
+ output = false
460
470
  end
461
- else
462
- output = false
471
+
472
+ return output
463
473
  end
464
474
 
465
- return output
466
- end
467
-
468
- def NaturalDateParsing.numeric_month_to_string(numeric)
469
- months = ["january", "february", "march", "april", "may", "june",
470
- "july", "august", "september", "october", "november",
471
- "december"]
475
+ def NaturalDateParsing.numeric_month_to_string(numeric)
476
+ months = ["january", "february", "march", "april", "may", "june",
477
+ "july", "august", "september", "october", "november",
478
+ "december"]
479
+
480
+ return months[numeric - 1]
481
+ end
472
482
 
473
- return months[numeric - 1]
474
483
  end
475
-
476
484
  end
@@ -1,51 +1,55 @@
1
- # Extra utility functions for broader use in the gem.
2
-
3
- module Utils
4
-
5
- # Determine whether or not a String is a base 10 integer.
6
- def Utils.is_int?(str)
7
- str.to_i.to_s == str || strong_is_int?(str)
8
- end
1
+ module DateParser
9
2
 
10
- # A more rigorous check to see if the String is an int.
11
- def Utils.strong_is_int?(str)
12
- nums = ("0".."9").to_a
3
+ # Extra utility functions for broader use in the gem.
4
+ #
5
+
6
+ module Utils
13
7
 
14
- for char in str.each_char do
15
- if ! nums.include? char
16
- return false
8
+ # Determine whether or not a String is a base 10 integer.
9
+ def Utils.is_int?(str)
10
+ str.to_i.to_s == str || strong_is_int?(str)
11
+ end
12
+
13
+ # A more rigorous check to see if the String is an int.
14
+ def Utils.strong_is_int?(str)
15
+ nums = ("0".."9").to_a
16
+
17
+ for char in str.each_char do
18
+ if ! nums.include? char
19
+ return false
20
+ end
17
21
  end
22
+
23
+ return true
18
24
  end
19
25
 
20
- return true
21
- end
22
-
23
- # Removes punctuation.
24
- def Utils.clean_out_punctuation(str)
25
- str.gsub(/[^a-z0-9\s\/-]/i, '')
26
- end
27
-
28
- # Removes punctuation and downcases the str.
29
- def Utils.clean_str(str)
30
- clean_out_punctuation(str).downcase
31
- end
32
-
33
- # Performs delete_at for a range of integers
34
- #
35
- # Assumes that the integers in range are contiguous, and sorted in ascending
36
- # order.
37
- def Utils.delete_at_indices(array, range)
38
- first_val = range.first
39
- for _ in range do
40
- array.delete_at(first_val)
26
+ # Removes punctuation.
27
+ def Utils.clean_out_punctuation(str)
28
+ str.gsub(/[^a-z0-9\s\/-]/i, '')
41
29
  end
42
30
 
43
- return array
44
- end
45
-
46
- # Checks to see if an object is descended from an ancestor (or is the ancestor)
47
- # nil_accepted is a flag that checks
48
- def Utils.descended_from?(obj, ancestor, nil_accepted = true)
49
- return obj.nil? ? nil_accepted : obj.class.ancestors.include?(ancestor)
31
+ # Removes punctuation and downcases the str.
32
+ def Utils.clean_str(str)
33
+ clean_out_punctuation(str).downcase
34
+ end
35
+
36
+ # Performs delete_at for a range of integers
37
+ #
38
+ # Assumes that the integers in range are contiguous, and sorted in ascending
39
+ # order.
40
+ def Utils.delete_at_indices(array, range)
41
+ first_val = range.first
42
+ for _ in range do
43
+ array.delete_at(first_val)
44
+ end
45
+
46
+ return array
47
+ end
48
+
49
+ # Checks to see if an object is descended from an ancestor (or is the ancestor)
50
+ # nil_accepted is a flag that checks
51
+ def Utils.descended_from?(obj, ancestor, nil_accepted = true)
52
+ return obj.nil? ? nil_accepted : obj.class.ancestors.include?(ancestor)
53
+ end
50
54
  end
51
55
  end
@@ -1,6 +1,6 @@
1
1
  require_relative "../date_parser"
2
2
 
3
- describe NaturalDateParsing do
3
+ describe DateParser::NaturalDateParsing do
4
4
 
5
5
  #########################################################
6
6
  ##
@@ -25,26 +25,26 @@ describe NaturalDateParsing do
25
25
  describe ".interpret_date" do
26
26
  context "given 'April 6th, 2014'" do
27
27
  it "returns Sun, 06 Apr 2014 as a date object" do
28
- expect(NaturalDateParsing::interpret_date(@date)).to eql(@parsed_date)
28
+ expect(DateParser::NaturalDateParsing::interpret_date(@date)).to eql(@parsed_date)
29
29
  end
30
30
  end
31
31
 
32
32
  context "given a sentence containing April 6th, 2014" do
33
33
  it "returns Sun, 06 Apr 2014 as a date object" do
34
- expect(NaturalDateParsing::interpret_date(@text)).to eql(@parsed_date)
34
+ expect(DateParser::NaturalDateParsing::interpret_date(@text)).to eql(@parsed_date)
35
35
  end
36
36
  end
37
37
 
38
38
  context "given a paragraph containing several dates" do
39
39
  it "returns a list of all dates mentioned in the paragraph" do
40
- expect(NaturalDateParsing::interpret_date(@paragraph)).to eql(@parsed_date_paragraph)
40
+ expect(DateParser::NaturalDateParsing::interpret_date(@paragraph)).to eql(@parsed_date_paragraph)
41
41
  end
42
42
  end
43
43
  end
44
44
  end
45
45
 
46
46
 
47
- describe NaturalDateParsing do
47
+ describe DateParser::NaturalDateParsing do
48
48
 
49
49
  #########################################################
50
50
  ##
@@ -62,7 +62,7 @@ describe NaturalDateParsing do
62
62
  answer = [Date.parse("February 27, 1844")]
63
63
 
64
64
  it "captures the single date" do
65
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
65
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
66
66
  end
67
67
  end
68
68
 
@@ -72,7 +72,7 @@ describe NaturalDateParsing do
72
72
  answer = [Date.parse("July 4th, 2016")]
73
73
 
74
74
  it "correctly uses the creation_date parameter" do
75
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
75
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
76
76
  end
77
77
  end
78
78
 
@@ -82,7 +82,7 @@ describe NaturalDateParsing do
82
82
  answer = [Date.parse("February 12, 1994")]
83
83
 
84
84
  it "correctly grabs the date" do
85
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
85
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
86
86
  end
87
87
  end
88
88
 
@@ -92,7 +92,7 @@ describe NaturalDateParsing do
92
92
  answer = [Date.today - 1]
93
93
 
94
94
  it "correctly grabs the date" do
95
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
95
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
96
96
  end
97
97
  end
98
98
 
@@ -102,7 +102,7 @@ describe NaturalDateParsing do
102
102
  answer = [Date.parse("January 11, 1994")]
103
103
 
104
104
  it "correctly grabs the date" do
105
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
105
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
106
106
  end
107
107
  end
108
108
 
@@ -113,7 +113,7 @@ describe NaturalDateParsing do
113
113
  parse_single_years = true
114
114
 
115
115
  it "correctly grabs the date" do
116
- expect(NaturalDateParsing::interpret_date(text, creation_date, parse_single_years)).to eql(answer)
116
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date, parse_single_years)).to eql(answer)
117
117
  end
118
118
  end
119
119
 
@@ -123,7 +123,7 @@ describe NaturalDateParsing do
123
123
  answer = [Date.parse("March 4, 2004")]
124
124
 
125
125
  it "correctly grabs the date" do
126
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
126
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
127
127
  end
128
128
  end
129
129
 
@@ -131,7 +131,7 @@ describe NaturalDateParsing do
131
131
  end
132
132
 
133
133
 
134
- describe NaturalDateParsing do
134
+ describe DateParser::NaturalDateParsing do
135
135
 
136
136
  #########################################################
137
137
  ##
@@ -148,7 +148,7 @@ describe NaturalDateParsing do
148
148
  parse_single_years = true
149
149
 
150
150
  it "correctly grabs the date" do
151
- expect(NaturalDateParsing::interpret_date(text, creation_date, parse_single_years)).to eql(answer)
151
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date, parse_single_years)).to eql(answer)
152
152
  end
153
153
  end
154
154
 
@@ -172,7 +172,7 @@ describe NaturalDateParsing do
172
172
  answer = [Date.parse("August 25, 2013")]
173
173
 
174
174
  it "correctly grabs the date" do
175
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
175
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
176
176
  end
177
177
  end
178
178
 
@@ -184,7 +184,7 @@ describe NaturalDateParsing do
184
184
  answer = [Date.parse("March 14, 1933")]
185
185
 
186
186
  it "correctly grabs the date" do
187
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
187
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
188
188
  end
189
189
  end
190
190
 
@@ -195,7 +195,7 @@ describe NaturalDateParsing do
195
195
  answer = [Date.parse("July 4, 2016")]
196
196
 
197
197
  it "correctly grabs the date" do
198
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
198
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
199
199
  end
200
200
  end
201
201
 
@@ -205,7 +205,7 @@ describe NaturalDateParsing do
205
205
  answer = [Date.parse("October 3, 2016")]
206
206
 
207
207
  it "correctly grabs the date" do
208
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
208
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
209
209
  end
210
210
  end
211
211
 
@@ -218,7 +218,7 @@ describe NaturalDateParsing do
218
218
  Date.parse("July 11, 2016")] # Reconsider
219
219
 
220
220
  it "correctly grabs the date" do
221
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
221
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
222
222
  end
223
223
  end
224
224
 
@@ -228,7 +228,7 @@ describe NaturalDateParsing do
228
228
  answer = [Date.parse("September 4, 1981")]
229
229
 
230
230
  it "correctly grabs the date" do
231
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
231
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
232
232
  end
233
233
  end
234
234
 
@@ -240,7 +240,7 @@ describe NaturalDateParsing do
240
240
  answer = [Date.parse("January 1st, 2016")]
241
241
 
242
242
  it "correctly grabs the date" do
243
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
243
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
244
244
  end
245
245
  end
246
246
 
@@ -252,7 +252,7 @@ describe NaturalDateParsing do
252
252
  answer = [Date.parse("June 8, 2016")]
253
253
 
254
254
  it "correctly grabs the date" do
255
- expect(NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
255
+ expect(DateParser::NaturalDateParsing::interpret_date(text, creation_date)).to eql(answer)
256
256
  end
257
257
  end
258
258
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: date_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.32
4
+ version: 0.1.41
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Kwon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-25 00:00:00.000000000 Z
11
+ date: 2016-10-10 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: DateParser is a simple, fast, and effective way to parse dates from natural
14
14
  language text.
@@ -22,7 +22,6 @@ files:
22
22
  - NEWS.md
23
23
  - README.md
24
24
  - lib/date_parser.rb
25
- - lib/date_parser/date_utils.rb
26
25
  - lib/date_parser/natural_date_parsing.rb
27
26
  - lib/date_parser/utils.rb
28
27
  - lib/spec/date_parser_spec.rb
File without changes