timetwister 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YWVhN2M0NjcyMTMwNGE2ZjQ0OTRkZjM5NTE5YzE1YjM4MmQxYzVmZQ==
5
+ data.tar.gz: !binary |-
6
+ NDkwNjYyOGZkYzViMDE2MWE4NzM1ZTQwZmE2OGI4MDg3YTk2M2Q5Ng==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NmFlOWEwMDk4YjRiM2JlMDZlOWVmYjVjOGI5YjJiZTRmNTQ1NzYxMmU3YmVk
10
+ YTRmMDVjZGY1NWVlOTFjMzIxYTQ3ZjdmYWQ5NzlkNWExZWI5Mjc2MjJjMDMx
11
+ M2M4YjY4ODhiYjUyMWM0ZDA4NGVhMmIwZmFhMGJjOTVhMTc1OTM=
12
+ data.tar.gz: !binary |-
13
+ YmRkOTYzNTc2YjdkZTkwNzFjZTFkNWQ4ZDU4MTFlNWYyMzg5NTMyZmZmNTNh
14
+ MjY1ZjMwZDI0Y2RlYTIwZTYxZTU5ZTJmZmU0ZDRlNDk2YmU3MzA1Yjg2OTE3
15
+ OTk4MmU0NTU4Mjc5NDc4YjBkNTgwMGRlNmFiOWIyYjQ5NDNjMzI=
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in timetwister.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Alex Duryee
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Timetwister
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'timetwister'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install timetwister
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/timetwister/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,20 @@
1
+ require "timetwister/version"
2
+ require "timetwister/parser"
3
+
4
+ module Timetwister
5
+
6
+ def self.parse(str, options={})
7
+
8
+ dates = { :original_string => str, :index_dates => [], :date_start => nil, :date_end => nil,
9
+ :date_start_full => nil, :date_end_full => nil, :inclusive_range => nil, :certainty => nil }
10
+
11
+ # defensive check - we don't want to try to parse certain malformed strings
12
+ # (otherwise dates get flipped and types get wacky)
13
+ if str.include?('??')
14
+ return dates
15
+ end
16
+
17
+ return Parser.string_to_dates(str, dates, options)
18
+
19
+ end
20
+ end
@@ -0,0 +1,1085 @@
1
+ require 'chronic'
2
+
3
+ class Parser
4
+
5
+ def self.string_to_dates(str, hash, options)
6
+ @string = str
7
+ @dates = hash
8
+ @options = options
9
+
10
+ @regex_tokens = regex_tokens
11
+
12
+ # perform this here, before the string gets purged of certainty indicators
13
+ @dates[:certainty] = return_certainty(@string)
14
+
15
+ @string = clean_string(@string)
16
+ self.match_replace
17
+
18
+ # if there are any future dates, return an empty hash
19
+ if @dates[:index_dates] != [] && @dates[:index_dates].last > Time.now.year
20
+ return { :original_string => @string, :index_dates => [], :keydate => nil, :keydate_z => nil, :date_start => nil, :date_end => nil,
21
+ :date_start_full => nil, :date_end_full => nil, :inclusive_range => nil, :certainty => nil }
22
+ end
23
+
24
+ if @dates[:date_start] && !@dates[:date_end] && !(@dates[:test_data] == 150 || @dates[:test_data] == 160)
25
+ @dates[:date_end] = @dates[:date_start]
26
+ end
27
+
28
+ stringify_values
29
+ add_full_dates
30
+
31
+ return @dates
32
+ end
33
+
34
+ def self.match_replace
35
+ match_replace_clusters.each do |c|
36
+ match_patterns = (c[:match].kind_of? Array) ? c[:match] : [c[:match]]
37
+ match_patterns.each do |p|
38
+ match_test = @regex_tokens[:anchor_start] + p + @regex_tokens[:anchor_end]
39
+ if @string.match(match_test)
40
+ @dates[:test_data] = c[:id]
41
+ if c[:proc]
42
+ # clone string to avoid changing it via in-place methods used in Procs
43
+ work_string = @string.clone
44
+ c[:proc].call(work_string, c[:arg])
45
+ end
46
+ break
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+
53
+ def self.match_replace_clusters
54
+ r = @regex_tokens
55
+
56
+ # extend regex_tokens for common complex formats
57
+
58
+ # July 4, 1776
59
+ r[:date_month_day_year] = "(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}"
60
+ # July 1776
61
+ r[:date_month_year] = "(#{r[:circa]})?#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
62
+ # 1776 July 4
63
+ r[:date_year_month_day] = "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:day_of_month]}"
64
+ # 1776 4 July
65
+ r[:date_year_day_month] = "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:day_of_month]}#{r[:named_month]}"
66
+ # 4 July 1776
67
+ r[:date_day_month_year] = "(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
68
+ # 1776 July
69
+ r[:date_year_month] = "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}"
70
+
71
+ match_replace = []
72
+
73
+ # NOTE: :match values will be converted to regular expressions
74
+ # and anchored at the beginning and end of test string.
75
+ # Leading and trailing punctuation will be ignored
76
+
77
+ # options[:force_8601] == true will force '2001-02' to be treated as February 2001 rather than 2001-2002
78
+ # and will handle ISO8601 ranges, e.g. 2001-02/2001-12
79
+ if @options[:force_8601]
80
+ match_replace << {
81
+ :match => "#{r[:iso8601_full]}\\/#{r[:iso8601_full]}",
82
+ :proc => proc_8601_range,
83
+ :id => 10
84
+ }
85
+ match_replace << {
86
+ :match => "#{r[:iso8601_month]}",
87
+ :proc => proc_month_year_single,
88
+ :id => 20
89
+ }
90
+ end
91
+
92
+ # ISO 8601 (full dates only - see note on options[:force_8601] above)
93
+ match_replace << {
94
+ :match => "#{r[:iso8601]}",
95
+ :proc => proc_full_date_single,
96
+ :id => 30
97
+ }
98
+
99
+ # ISO 8601 ranges (full dates only - see note on options[:force_8601] above)
100
+ match_replace << {
101
+ :match => "#{r[:iso8601]}\\/#{r[:iso8601]}",
102
+ :proc => proc_8601_range,
103
+ :id => 40
104
+ }
105
+
106
+ # matches any number of 4-digit years separated by a single range or list delimiter
107
+ match_replace << {
108
+ :match => "((#{r[:year]})|(#{r[:year_range_short]}))(#{r[:range_or_list_delimiter]}((#{r[:year]})|(#{r[:year_range_short]})))+",
109
+ :proc => proc_year_range_list_combo,
110
+ :id => 60
111
+ }
112
+
113
+ # 1969, [1969], c1969
114
+ # anti-matches the range delimiter as to not override id 150/160
115
+ match_replace << {
116
+ :match => [
117
+ "(#{r[:circa]})?[^#{r[:range_delimiter]}]#{r[:year]}([\\,\\;\\s(and)]{1,3}#{r[:nd]})?",
118
+ "^#{r[:year]}$"],
119
+ :proc => proc_single_year,
120
+ :id => 70
121
+ }
122
+
123
+ # "July 4 1976 - Oct 1 1981"
124
+ # "4 July 1976 - 1 Oct 1981"
125
+ # "1976 July 4 - 1981 Oct 1"
126
+ # "1976 4 July - 1981 1 Oct"
127
+ match_replace << {
128
+ :match => [
129
+ "#{r[:date_month_day_year]}#{r[:range_delimiter]}#{r[:date_month_day_year]}",
130
+ "#{r[:date_day_month_year]}#{r[:range_delimiter]}#{r[:date_day_month_year]}",
131
+ "#{r[:date_year_month_day]}#{r[:range_delimiter]}#{r[:date_year_month_day]}",
132
+ "#{r[:date_year_day_month]}#{r[:range_delimiter]}#{r[:date_year_day_month]}",
133
+ ],
134
+ :proc => proc_full_date_single_range,
135
+ :id => 80
136
+ }
137
+
138
+ # "1976 July - 1981 Oct"
139
+ # "July 1976 - Oct 1981"
140
+ match_replace << {
141
+ :match => [
142
+ "(#{r[:circa]})?#{r[:date_year_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:date_year_month]}",
143
+ "(#{r[:circa]})?#{r[:date_month_year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:date_month_year]}"
144
+ ],
145
+ :proc => proc_full_date_single_range,
146
+ :arg => 'month',
147
+ :id => 100
148
+ }
149
+
150
+
151
+
152
+ # 1969-1977
153
+ match_replace << {
154
+ :match => "(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}",
155
+ :proc => proc_year_range,
156
+ :id => 120
157
+ }
158
+
159
+ # 1960-1980s
160
+ match_replace << {
161
+ :match => "(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}#{r[:decade_s]}",
162
+ :proc => proc_range_year_to_decade,
163
+ :id => 130
164
+ }
165
+
166
+ # 1960s-1981
167
+ match_replace << {
168
+ :match => "(#{r[:circa]})?\\s?#{r[:decade_s]}#{r[:range_delimiter]}(#{r[:circa]})?\\s?#{r[:year]}",
169
+ :proc => proc_year_range,
170
+ :id => 140
171
+ }
172
+
173
+ # 1969-72
174
+ match_replace << {
175
+ :match => "(#{r[:circa]})?#{r[:year_range_short]}",
176
+ :proc => proc_year_range_short,
177
+ :id => 145
178
+ }
179
+
180
+ # 1969- (e.g. after 1969)
181
+ match_replace << {
182
+ :match => "(#{r[:circa]})?\\s?#{r[:year]}#{r[:range_delimiter]}",
183
+ :proc => proc_single_year,
184
+ :arg => 'start',
185
+ :id => 150
186
+ }
187
+
188
+ # -1969 (e.g. before 1969) - treat as single
189
+ match_replace << {
190
+ :match => "#{r[:range_delimiter]}(#{r[:circa]})?\\s?#{r[:year]}",
191
+ :proc => proc_single_year,
192
+ :arg => 'end',
193
+ :id => 160
194
+ }
195
+
196
+ # nd, n.d., undated, Undated...
197
+ # note that :id never manifests anywhere (no hash to put it into)
198
+ # so the :test_data for undated is nil
199
+ match_replace << {
200
+ :match => "#{r[:nd]}",
201
+ :proc => nil,
202
+ :id => 170
203
+ }
204
+
205
+ # 1970's, 1970s
206
+ match_replace << {
207
+ :match => "(#{r[:circa]})?#{r[:decade_s]}",
208
+ :proc => proc_decade_s,
209
+ :id => 180
210
+ }
211
+
212
+ # 1970s - 1980s, etc.
213
+ match_replace << {
214
+ :match => "(#{r[:circa]})?\\s?#{r[:decade_s]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:decade_s]}",
215
+ :proc => proc_decade_s_range,
216
+ :id => 190
217
+ }
218
+
219
+ # July 4 1976
220
+ # 4 July 1976
221
+ # 1976 July 4
222
+ # 1976 4 July
223
+ # (with or without optional commas)
224
+ match_replace << {
225
+ :match => [
226
+ "#{r[:date_month_day_year]}",
227
+ "#{r[:date_day_month_year]}",
228
+ "#{r[:date_year_month_day]}",
229
+ "#{r[:date_year_day_month]}"
230
+ ],
231
+ :proc => proc_full_date_single,
232
+ :id => 200
233
+ }
234
+
235
+
236
+ # December 1941
237
+ # 1941 December
238
+ # (with or without optional commas)
239
+ match_replace << {
240
+ :match => [
241
+ "(#{r[:circa]})?#{r[:date_month_year]}",
242
+ "(#{r[:circa]})?#{r[:date_year_month]}"
243
+ ],
244
+ :proc => proc_month_year_single,
245
+ :id => 220
246
+ }
247
+
248
+
249
+ # Jun-July 1969
250
+ # 1969 Jun-July
251
+ match_replace << {
252
+ :match => [
253
+ "(#{r[:circa]})?#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}",
254
+ "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}",
255
+ ],
256
+ :proc => proc_single_year_month_range,
257
+ :id => 230
258
+ }
259
+
260
+
261
+ # Feb. 1-20, 1980
262
+ # 1980 Feb. 1-20
263
+ # 1980 1-20 Feb.
264
+ match_replace << {
265
+ :match => [
266
+ "(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}",
267
+ "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}",
268
+ "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}#{r[:named_month]}",
269
+ "(#{r[:circa]})?#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
270
+ ],
271
+ :proc => proc_single_month_date_range,
272
+ :id => 240
273
+ }
274
+
275
+
276
+ # Early 1960's, mid-1980s, late 1950's, etc.
277
+ match_replace << {
278
+ :match => "(#{r[:circa]})?#{r[:decade_qualifier]}\\s?#{r[:decade_s]}",
279
+ :proc => proc_decade_s_qualified,
280
+ :id => 250
281
+ }
282
+
283
+
284
+
285
+
286
+ # 19--, 18--, 18--?, etc.
287
+ match_replace << {
288
+ :match => "(#{r[:circa]})?[1-2][0-9]\-{2}",
289
+ :proc => proc_century_with_placeholders,
290
+ :id => 290
291
+ }
292
+
293
+ # Jan 2-Dec 31 1865
294
+ # 1865 Jan 2-Dec 31
295
+ match_replace << {
296
+ :match => [
297
+ "(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}",
298
+ "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}",
299
+ "(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
300
+ ],
301
+ :proc => proc_year_with_dates,
302
+ :id => 310
303
+ }
304
+
305
+ # 1863 Aug 7-1866 Dec
306
+ match_replace << {
307
+ :match => [
308
+ "(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}#{r[:named_month]}",
309
+ "(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:year]}",
310
+ "(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:year]}",
311
+ "(#{r[:circa]})?#{r[:named_month]}#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}",
312
+ "(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:day_of_month]}"
313
+ ],
314
+ :proc => proc_full_with_year_month,
315
+ :id => 330
316
+ }
317
+
318
+ # 1942 November-1943
319
+ # 1943-1944 November
320
+ # November 1942-1943
321
+ # 1942-November 1943
322
+ match_replace << {
323
+ :match => [
324
+ "(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}",
325
+ "(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}#{r[:named_month]}",
326
+ "(#{r[:circa]})?#{r[:named_month]}#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}",
327
+ "(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:year]}"
328
+ ],
329
+ :proc => proc_year_range_single_date,
330
+ :id => 340
331
+ }
332
+
333
+ # 01/31/1999
334
+ match_replace << {
335
+ :match => "(#{r[:circa]})?[0-1]?[0-9]/[0-3]?[0-9]/#{r[:year]}",
336
+ :proc => proc_date_with_slashes,
337
+ :id => 350
338
+ }
339
+
340
+ match_replace
341
+ end
342
+
343
+
344
+ def self.proc_single_year
345
+ proc = Proc.new do |string, open_range|
346
+ year = string.gsub(/[^0-9]*/,'')
347
+ @dates[:index_dates] << year.to_i
348
+ case open_range
349
+ when 'start'
350
+ @dates[:date_start] = year
351
+ when 'end'
352
+ @dates[:date_end] = year
353
+ else
354
+ @dates[:date_start] = year
355
+ @dates[:date_end] = year
356
+ end
357
+ end
358
+ end
359
+
360
+
361
+ def self.proc_year_range
362
+ proc = Proc.new do |string|
363
+ # Only supports years from 1000
364
+ range = year_range(string)
365
+ if range.length > 0
366
+ range_start, range_end = range
367
+ if range_end > range_start
368
+
369
+ (range_start..range_end).to_a.each { |d| @dates[:index_dates] << d }
370
+
371
+ @dates[:inclusive_range] = true
372
+ process_year_range()
373
+ end
374
+ end
375
+ end
376
+ end
377
+
378
+
379
+
380
+
381
+ def self.proc_range_year_to_decade
382
+ proc = Proc.new do |string|
383
+ range = year_range(string)
384
+ range_start, range_end_decade = range
385
+
386
+ if range_start && range_end_decade
387
+ if range_end_decade > range_start
388
+ range_end = range_end_decade + 9
389
+ (range_start..range_end).to_a.each { |d| @dates[:index_dates] << d }
390
+ @dates[:inclusive_range] = true
391
+ process_year_range()
392
+ end
393
+ end
394
+ end
395
+ end
396
+
397
+
398
+ def self.proc_year_range_short
399
+ proc = Proc.new do |string|
400
+ range = string.split('-')
401
+ range.each { |d| d.gsub!(/[^0-9]*/,'') }
402
+ decade_string = range[0].match(/^[0-9]{2}/).to_s
403
+ range[1] = decade_string + range[1]
404
+ range_start = range[0].to_i
405
+ range_end = range[1].to_i
406
+
407
+ if range_end > range_start
408
+ (range_start..range_end).to_a.each { |d| @dates[:index_dates] << d }
409
+ @dates[:inclusive_range] = true
410
+ process_year_range()
411
+ end
412
+ end
413
+ end
414
+
415
+ def self.proc_year_range_list_combo
416
+ proc = Proc.new do |string|
417
+ ranges = []
418
+ list = []
419
+ index_dates = []
420
+ years = string.scan(/[0-2][0-9]{3}/)
421
+ delimiters = string.scan(/\s?[\-\;\,]\s?/)
422
+ delimiters.each { |d| d.strip! }
423
+ i = 0
424
+ while i < years.length
425
+ y1 = years[i]
426
+ d = delimiters[i]
427
+ if d == '-'
428
+ y2 = years[i + 1]
429
+ ranges << [y1,y2]
430
+ i += 2
431
+ else
432
+ list << y1
433
+ i += 1
434
+ end
435
+ end
436
+ ranges.each do |r|
437
+ range_start = r[0].to_i
438
+ range_end = r[1].to_i
439
+ (range_start..range_end).to_a.each { |d| index_dates << d }
440
+ end
441
+ list.each { |y| index_dates << y.to_i }
442
+ index_dates.sort!
443
+ @dates[:index_dates] = index_dates
444
+ @dates[:inclusive_range] = false
445
+ process_year_range()
446
+ end
447
+ end
448
+
449
+
450
+ def self.proc_decade_s
451
+ proc = Proc.new do |string|
452
+ decade = string.match(/[0-9]{3}0/).to_s
453
+ decade_start = decade.to_i
454
+ decade_end = (decade_start + 9)
455
+ @dates[:index_dates] = (decade_start..decade_end).to_a
456
+ @dates[:inclusive_range] = true
457
+ process_year_range()
458
+ end
459
+ end
460
+
461
+
462
+ def self.proc_century_with_placeholders
463
+ proc = Proc.new do |string|
464
+ century = string.match(/[0-9]{2}/).to_s
465
+ century += '00'
466
+ century_start = century.to_i
467
+ century_end = (century_start + 99)
468
+ @dates[:index_dates] = (century_start..century_end).to_a
469
+ @dates[:inclusive_range] = true
470
+ process_year_range()
471
+ end
472
+ end
473
+
474
+
475
+ def self.proc_decade_s_qualified
476
+ proc = Proc.new do |string|
477
+ decade = string.match(/[0-9]{3}0/).to_s
478
+ decade_start = decade.to_i
479
+ if string.match(/[Ee]arly/)
480
+ range_start = decade_start
481
+ range_end = decade_start + 5
482
+ elsif string.match(/[Mm]id(dle)?/)
483
+ range_start = decade_start + 3
484
+ range_end = range_start + 5
485
+ elsif string.match(/[Ll]ate/)
486
+ range_start = decade_start + 5
487
+ range_end = decade_start + 9
488
+ end
489
+ @dates[:index_dates] = (range_start..range_end).to_a
490
+ @dates[:inclusive_range] = true
491
+ process_year_range()
492
+ end
493
+ end
494
+
495
+
496
+ def self.proc_decade_s_range
497
+ proc = Proc.new do |string|
498
+ decades = string.scan(/[0-9]{3}0/)
499
+ if decades.length == 2
500
+ range_start = decades[0].to_i
501
+ range_end = decades[1].to_i + 9
502
+ @dates[:index_dates] = (range_start..range_end).to_a
503
+ @dates[:inclusive_range] = true
504
+ process_year_range()
505
+ end
506
+ end
507
+ end
508
+
509
+
510
+ def self.proc_full_date_single
511
+ proc = Proc.new do |string|
512
+ datetime = full_date_single_to_datetime(string)
513
+ if datetime
514
+ full_date_single_keydates(string,datetime,'%Y-%m-%d')
515
+ @dates[:index_dates] << datetime.strftime('%Y').to_i
516
+ end
517
+ end
518
+ end
519
+
520
+
521
+ def self.proc_month_year_single
522
+ proc = Proc.new do |string|
523
+ string.gsub!(/\?/,'')
524
+
525
+ # Chronic can't parse year-month strings properly
526
+ # so we need to change them to month-year before
527
+ # parsing them.
528
+
529
+ if string.match(/^[0-9]/)
530
+ tmpyear = string.split(' ')[0]
531
+ string.gsub!(/^.+? /,'')
532
+ string << " "
533
+ string << tmpyear
534
+ end
535
+
536
+ datetime = Chronic.parse(string)
537
+ if datetime
538
+ full_date_single_keydates(string,datetime, '%Y-%m')
539
+ @dates[:index_dates] << datetime.strftime('%Y').to_i
540
+ end
541
+ end
542
+ end
543
+
544
+
545
+
546
+
547
+
548
+ # "1976 July 4 - 1981 October 1", etc.
549
+ # call with second argument 'month' if no day value is present
550
+ def self.proc_full_date_single_range
551
+ proc = Proc.new do |string, specificity|
552
+ dates = []
553
+ full_date_format = (specificity == 'month') ? '%Y-%m' : '%Y-%m-%d'
554
+ if string.match(/\-/)
555
+ dates = string.split('-')
556
+ elsif string.match(/\sto\s/)
557
+ dates = string.split(' to ')
558
+ end
559
+
560
+ dates.each { |d| d.strip! }
561
+
562
+ if dates.length == 2
563
+ datetime_start = full_date_single_to_datetime(dates[0])
564
+ datetime_end = full_date_single_to_datetime(dates[1])
565
+
566
+ # if month-specific, modify datetimes to include all days of each month
567
+ if specificity == 'month'
568
+ month_date_start = datetime_start.strftime('%Y-%m')
569
+ datetime_start = Chronic.parse(month_date_start + '-01')
570
+ month_date_end = datetime_end.strftime('%Y-%m')
571
+ month_date_end_parts = month_date_end.split('-')
572
+
573
+ month_date_end_last = days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
574
+ month_date_full = month_date_end + "-#{month_date_end_last}"
575
+
576
+ datetime_end = Chronic.parse(month_date_full)
577
+ end
578
+
579
+ if datetime_start && datetime_end
580
+ process_date_range(datetime_start,datetime_end,specificity)
581
+ end
582
+ @dates[:inclusive_range] = true
583
+ end
584
+ end
585
+ end
586
+
587
+
588
+ # Feb. 1-20, 1980
589
+ # 1980 Feb. 1-20
590
+ # 1980 1-20 Feb.
591
+ def self.proc_single_month_date_range
592
+ proc = Proc.new do |string|
593
+ year = extract_year(string)
594
+ day_range = string.match(/\d{1,2}\-\d{1,2}/).to_s
595
+ string.gsub!(Regexp.new(day_range),'')
596
+ month = string.strip
597
+ days = day_range.split('-')
598
+ dates = []
599
+ if days.length == 2
600
+ days.each do |d|
601
+ d.strip!
602
+ dates << "#{month} #{d} #{year}"
603
+ end
604
+ datetime_start = full_date_single_to_datetime(dates[0])
605
+ datetime_end = full_date_single_to_datetime(dates[1])
606
+ if datetime_start && datetime_end
607
+ process_date_range(datetime_start,datetime_end)
608
+ end
609
+ end
610
+ @dates[:inclusive_range] = true
611
+ end
612
+ end
613
+
614
+
615
+ def self.proc_8601_range
616
+ proc = Proc.new do |string|
617
+ dates = string.split('/')
618
+ dates.each { |d| d.strip! }
619
+
620
+ datetime_start = iso8601_datetime(dates[0])
621
+ datetime_end = iso8601_datetime(dates[1])
622
+
623
+ if datetime_start && datetime_end
624
+ year_start = datetime_start.strftime('%Y').to_i
625
+ year_end = datetime_end.strftime('%Y').to_i
626
+
627
+ if datetime_comparitor(datetime_end) < datetime_comparitor(datetime_start)
628
+ # this range is reversed in error
629
+ years = [year_end,year_start]
630
+ year_start, year_end = years[0], years[1]
631
+ datetimes = [datetime_end,datetime_start]
632
+ datetime_start, datetime_end = datetimes[0], datetimes[1]
633
+ end
634
+
635
+ @dates[:index_dates] += (year_start..year_end).to_a
636
+ @dates[:date_start] = datetime_start.strftime(is8601_string_format dates[0])
637
+ @dates[:date_end] = datetime_end.strftime(is8601_string_format dates[1])
638
+ @dates[:inclusive_range] = true
639
+
640
+ end
641
+ end
642
+ end
643
+
644
+
645
+ # "1981 Oct-Dec", "Oct-Dec 1981", etc.
646
+ def self.proc_single_year_month_range
647
+ proc = Proc.new do |string|
648
+ year = string.match(/[0-9]{4}/).to_s
649
+ string.gsub!(year,'')
650
+ string.strip!
651
+ first_month = string.match(@regex_tokens[:named_month]).to_s
652
+ last_month = string.match(@regex_tokens[:named_month] + '$').to_s
653
+
654
+ # chronic is fiddly about short months with periods
655
+ # (e.g. "may.") so we remove them
656
+ date_string_first = first_month.delete('.') + ' 1,' + year
657
+ datetime_first = Chronic.parse(date_string_first)
658
+ if !last_month.empty?
659
+ @dates[:date_start] = datetime_first.strftime('%Y-%m')
660
+ date_string_last = last_month + ' ' + year
661
+ datetime_last = Chronic.parse(date_string_last)
662
+ @dates[:date_end] = datetime_last.strftime('%Y-%m')
663
+ end
664
+ @dates[:inclusive_range] = true
665
+ @dates[:index_dates] << year.to_i
666
+ end
667
+ end
668
+
669
+
670
+ # 1942 November-1943
671
+ # 1943-1944 November
672
+ # November 1942-1943
673
+ # 1942-November 1943
674
+ def self.proc_year_range_single_date
675
+ proc = Proc.new do |string|
676
+ dates = []
677
+ if string.match(/\-/)
678
+ dates = string.split('-')
679
+ elsif string.match(/\sto\s/)
680
+ dates = string.split(' to ')
681
+ end
682
+
683
+ dates.each { |d| d.strip! }
684
+
685
+ if dates.length == 2
686
+ if dates[0].match(/[A-Za-z]/)
687
+ datetime_start = full_date_single_to_datetime(dates[0] + "-01")
688
+ datetime_end = full_date_single_to_datetime(dates[1] + "-12-31")
689
+ else
690
+ datetime_start = full_date_single_to_datetime(dates[0] + "-01-01")
691
+ datetime_end_tmp = full_date_single_to_datetime(dates[1] + "-28")
692
+ datetime_end = full_date_single_to_datetime(dates[1] + "-" + days_in_month(datetime_end_tmp.month, datetime_end_tmp.year).to_s)
693
+ end
694
+
695
+ if datetime_start && datetime_end
696
+ process_date_range(datetime_start,datetime_end,"month")
697
+ end
698
+ @dates[:inclusive_range] = true
699
+
700
+ end
701
+ end
702
+ end
703
+
704
+ # Jan 2-Dec 31 1865
705
+ # 1865 Jan 2-Dec 31
706
+ def self.proc_year_with_dates
707
+ proc = Proc.new do |string|
708
+ # extract year for later
709
+ year = string.match(/[0-9]{4}/).to_s
710
+
711
+ # instead of dealing with punctuation, we'll scorch the earth
712
+ string.gsub!(/[\,\?]/,'')
713
+
714
+ # split the string into two different dates
715
+ if string.match(/\-/)
716
+ dates = string.split('-')
717
+ elsif string.match(/\sto\s/)
718
+ dates = string.split(' to ')
719
+ end
720
+
721
+ # if everything's as expected, append the year to the shorter date
722
+ if dates.length == 2
723
+ dates.each { |d|
724
+ if d.match(year).nil?
725
+ d << " "
726
+ d << year
727
+ end
728
+
729
+ # Chronic seemed to choke with YYYY-MM-DD dates
730
+ # so we'll flip it to MM-DD-YYYY
731
+ if d.match("^" + year)
732
+ d.gsub!(year + " ","")
733
+ d << " "
734
+ d << year
735
+ end
736
+ }
737
+
738
+ # change our strings to datetime objects
739
+ # and send them to be processed elsewhere
740
+ datetime_start = Chronic.parse(dates[0])
741
+ datetime_end = Chronic.parse(dates[1])
742
+ process_date_range(datetime_start, datetime_end)
743
+ @dates[:inclusive_range] = true
744
+ end
745
+ end
746
+ end
747
+
748
+ # 1863 Aug 7-1866 Dec
749
+ def self.proc_full_with_year_month
750
+ proc = Proc.new do |string|
751
+ dates = []
752
+ if string.match(/\-/)
753
+ dates = string.split('-')
754
+ elsif string.match(/\sto\s/)
755
+ dates = string.split(' to ')
756
+ end
757
+
758
+ dates.each { |d| d.strip! }
759
+
760
+ if dates.length == 2
761
+
762
+ datetime_end = full_date_single_to_datetime(dates[1])
763
+
764
+ if !dates[0].match(/[0-9]\D+[0-9]/).nil?
765
+ datetime_start = full_date_single_to_datetime(dates[0])
766
+ month_date_start = datetime_start.strftime('%Y-%m-%d')
767
+ month_date_end = datetime_end.strftime('%Y-%m')
768
+ month_date_end_parts = month_date_end.split('-')
769
+
770
+ month_date_end_last = days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
771
+ month_date_full = month_date_end + "-#{month_date_end_last}"
772
+
773
+ datetime_end = Chronic.parse(month_date_full)
774
+ else
775
+ datetime_start = full_date_single_to_datetime(dates[0] + "-01")
776
+ if datetime_start && datetime_end
777
+ month_date_start = datetime_start.strftime('%Y-%m')
778
+ month_date_end = datetime_end.strftime('%Y-%m-%d')
779
+ end
780
+ end
781
+
782
+ if datetime_start && datetime_end
783
+ process_date_range(datetime_start,datetime_end)
784
+ end
785
+ @dates[:inclusive_range] = true
786
+ end
787
+ end
788
+ end
789
+
790
+ # we assume that all matching dates are mm/dd/yyyy
791
+ # if they're dd/mm/yyyy, this may get jumbled, but that's rare enough to be okay
792
+ def self.proc_date_with_slashes
793
+ proc = Proc.new do |string|
794
+ dates = string.split('/')
795
+ dates.collect! do |d|
796
+ d.strip!
797
+ if d.length == 1
798
+ # convert to ISO style numbers
799
+ d = "0" + d.to_s
800
+ else
801
+ # i am not proud of this
802
+ d = d
803
+ end
804
+ end
805
+ proc_full_date_single.call(dates[2].to_s + "-" + dates[0].to_s + "-" + dates[1].to_s)
806
+ end
807
+ end
808
+
809
+ def self.regex_tokens
810
+ return {
811
+ # 1969, [1969], c1969
812
+ :year => '[\[\sc\(]{0,3}[0-2][0-9]{3}[\]\s\.\,;\?\)]{0,3}',
813
+ # - or 'to'
814
+ :range_delimiter => '\s*((\-)|(to))\s*',
815
+ # , or ;
816
+ :list_delimiter => '\s*[\,\;]\s*',
817
+ # , or ;
818
+ :range_or_list_delimiter => '\s*([\,\;]|((\-)|(to)))\s*',
819
+ # n.d., undated, etc.
820
+ :nd => '[\[\s]{0,2}\b([Uu]+ndated\.?)|([nN]o?\.?\s*[dD](ate)?\.?)\b[\s\]\.]{0,3}',
821
+ # 1960s, 1960's
822
+ :decade_s => '[\[\s]{0,2}[0-9]{3}0\'?s[\]\s]{0,2}',
823
+
824
+ # 1970-75
825
+ :year_range_short => '\s*[0-9]{4}\s?\-\s*(([2-9][0-9])|(1[3-9]))\s*',
826
+
827
+ # 196-
828
+ :decade_aacr => '[0-9]{3}\-',
829
+ # named months, including abbreviations (case insensitive)
830
+ :named_month => '\s*(?i)\b((jan(uary)?)|(feb(ruary)?)|(mar(ch)?)|(apr(il)?)|(may)|(jun(e)?)|(jul(y)?)|(aug(ust)?)|(sep(t|tember)?)|(oct(ober)?)|(nov(ember)?)|(dec(ember)?))\b\.?\s*',
831
+ # circa, ca. - also matches 'c.', which is actually 'copyright', but is still not something we need to deal with
832
+ :circa => '\s*[Cc](irc)?a?\.?\s*',
833
+ # early, late, mid-
834
+ :decade_qualifier => '([Ee]arly)|([Mm]id)|([Ll]ate)\-?',
835
+ # 06-16-1972, 6-16-1972
836
+ :numeric_date_us => '(0?1)|(0?2)|(0?3)|(0?4)|(0?5)|(0?6)|(0?7)|(0?8)|(0?9)|1[0-2][\-\/](([0-2]?[0-9])|3[01])[\-\/])?[12][0-9]{3}',
837
+ # 1972-06-16
838
+ :iso8601 => '[0-9]{4}\-[0-9]{2}\-[0-9]{2}',
839
+ :iso8601_full => '[0-9]{4}((\-[0-9]{2})(\-[0-9]{2})?)?',
840
+ :iso8601_month => '[0-9]{4}\-[0-9]{2}',
841
+ :anchor_start => '^[^\w\d]*',
842
+ :anchor_end => '[^\w\d]*$',
843
+ :optional_comma => '[\s\,]*',
844
+ :day_of_month => '\s*(([0-2]?[0-9])|(3[0-1]))\s*'
845
+ }
846
+ end
847
+
848
+
849
+ def self.full_date_single_to_datetime(string)
850
+ new_string = string.clone
851
+ if new_string.match(/\d{4}\-\d{2}\-\d{2}/)
852
+ parse_string = new_string
853
+ else
854
+ year = new_string.match(/[0-9]{4}/).to_s
855
+ new_string.gsub!(Regexp.new(year), '')
856
+ if new_string.match(/[0-9]{1,2}/)
857
+ day = new_string.match(/[0-9]{1,2}/).to_s
858
+ new_string.gsub!(Regexp.new(day), '')
859
+ else
860
+ day = nil
861
+ end
862
+
863
+ new_string.gsub!(/[\.\,\s]+/,'')
864
+
865
+ month = new_string.clone
866
+ parse_string = month
867
+ parse_string += day ? " #{day}, #{year}" : " #{year}"
868
+ end
869
+ datetime = Chronic.parse(parse_string)
870
+ end
871
+
872
+
873
+ def self.process_date_range(datetime_start,datetime_end,specificity=nil)
874
+
875
+ if !datetime_start || !datetime_end
876
+ return
877
+ end
878
+
879
+ date_format = (specificity == 'month') ? '%Y-%m' : '%Y-%m-%d'
880
+
881
+ year_start = datetime_start.strftime('%Y').to_i
882
+ year_end = datetime_end.strftime('%Y').to_i
883
+
884
+ if datetime_comparitor(datetime_end) > datetime_comparitor(datetime_start)
885
+
886
+ @dates[:index_dates] += (year_start..year_end).to_a
887
+
888
+ @dates[:date_start] = datetime_start.strftime(date_format)
889
+ @dates[:date_end] = datetime_end.strftime(date_format)
890
+
891
+ @dates[:date_start_full] = datetime_start.strftime('%Y-%m-%d')
892
+ @dates[:date_end_full] = datetime_end.strftime('%Y-%m-%d')
893
+ end
894
+ end
895
+
896
+
897
+ def self.full_date_single_keydates(string,datetime,format)
898
+ @dates[:date_start] = datetime.strftime(format)
899
+ end
900
+
901
+
902
+ def self.process_year_range
903
+ @dates[:index_dates].sort!
904
+ @dates[:index_dates].uniq!
905
+ @dates[:date_start] = @dates[:index_dates].first
906
+ @dates[:date_end] = @dates[:index_dates].last
907
+ end
908
+
909
+
910
+ def self.is8601_string_format(iso_8601_date)
911
+ if iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/)
912
+ return '%Y-%m-%d'
913
+ elsif iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}$/)
914
+ return '%Y-%m'
915
+ else
916
+ return '%Y'
917
+ end
918
+ end
919
+
920
+
921
+ def self.iso8601_datetime(iso_8601_date)
922
+ if iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/)
923
+ Chronic.parse(iso_8601_date)
924
+ elsif iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}$/)
925
+ Chronic.parse(iso_8601_date + '-01')
926
+ else
927
+ Chronic.parse(iso_8601_date + '-01-01')
928
+ end
929
+ end
930
+
931
+
932
+ # Removes the first 4-digit number found in the string and returns it
933
+ def self.extract_year(string)
934
+ year = string.match(/\d{4}/).to_s
935
+ string.gsub!(Regexp.new(year),'')
936
+ year
937
+ end
938
+
939
+
940
+ # removes sub-strings that do not contain parsable data
941
+ def self.clean_string(string)
942
+ r = @regex_tokens
943
+ # remove n.y. and variants from beginning of string
944
+ substrings = [
945
+ /\[n\.?y\.?\]/,
946
+ /[\[\]\(\)]/,
947
+ /[\.\,\)\;\:]*$/,
948
+ /\?/,
949
+ /approx\.?(imately)?/i,
950
+ /\s#{regex_tokens[:circa]}\s/,
951
+ /^#{regex_tokens[:circa]}\s/,
952
+ Regexp.new("([\,\;\s(and)]{0,4}#{regex_tokens[:nd]})?$")
953
+ ]
954
+
955
+ # transform seasons to months
956
+ string.gsub!(/[Ww]inter/, " January 1 - March 20 ")
957
+ string.gsub!(/[Ss]pring/, " March 20 - June 21 ")
958
+ string.gsub!(/[Ss]ummer/, " June 21 - September 23 ")
959
+ string.gsub!(/[Aa]utumn/, " September 23 - December 22 ")
960
+ string.gsub!(/[Ff]all/, " September 23 - December 22 ")
961
+
962
+ # remove days of the week
963
+ dow = [/[Ss]unday,?\s+/, /[Mm]onday,?\s+/, /[Tt]uesday,?\s+/, /[Ww]ednesday,?\s+/, /[Tt]hursday,?\s+/, /[Ff]riday,?\s+/, /[Ss]aturday,?\s+/]
964
+ dow.each {|d| string.gsub!(d, '')}
965
+
966
+ # remove times of day
967
+ tod = [/[Mm]orning,?\s+/, /[Aa]fternoon,?\s+/, /[Ee]vening,?\s+/, /[Nn]ight,?\s+/]
968
+ tod.each {|t| string.gsub!(t, '')}
969
+
970
+ # remove single question marks
971
+ string.gsub!(/([0-9])\?([^\?])/,'\1\2')
972
+
973
+ substrings.each { |s| string.gsub!(s,'') }
974
+ string.strip!
975
+ string
976
+ end
977
+
978
+ def self.year_range(string)
979
+ range = string.scan(Regexp.new(@regex_tokens[:year]))
980
+ range.each { |d| d.gsub!(/[^0-9]*/,'') }
981
+ range.map { |y| y.to_i }
982
+ end
983
+
984
+
985
+ def self.datetime_comparitor(datetime)
986
+ d = datetime.to_s
987
+ d.gsub!(/[^\d]/,'')
988
+ return d.to_i
989
+ end
990
+
991
+
992
+ def self.leap_year?(year)
993
+ year = (year.kind_of? String) ? year.to_i : year
994
+ if year % 400 == 0
995
+ return true
996
+ elsif year % 100 == 0
997
+ return false
998
+ elsif year % 4 == 0
999
+ return true
1000
+ else
1001
+ return false
1002
+ end
1003
+ end
1004
+
1005
+
1006
+ # month and year must be numeric
1007
+ def self.days_in_month(month,year)
1008
+ month = month.kind_of?(String) ? month.to_i : month
1009
+ year = year.kind_of?(String) ? year.to_i : year
1010
+ days = {
1011
+ 1 => 31,
1012
+ 2 => leap_year?(year) ? 29 : 28,
1013
+ 3 => 31,
1014
+ 4 => 30,
1015
+ 5 => 31,
1016
+ 6 => 30,
1017
+ 7 => 31,
1018
+ 8 => 31,
1019
+ 9 => 30,
1020
+ 10 => 31,
1021
+ 11 => 30,
1022
+ 12 => 31
1023
+ }
1024
+ days[month]
1025
+ end
1026
+
1027
+
1028
+ def self.stringify_values
1029
+ @dates.each do |k,v|
1030
+ if v.is_a?(Fixnum)
1031
+ @dates[k] = v.to_s
1032
+ end
1033
+ end
1034
+ end
1035
+
1036
+
1037
+ def self.add_full_dates
1038
+ if @dates[:date_start] && !@dates[:date_start_full]
1039
+ if @dates[:date_start].match(/\d{4}\-\d{2}\-\d{2}/)
1040
+ @dates[:date_start_full] = @dates[:date_start]
1041
+ elsif @dates[:date_start].match(/\d{4}\-\d{2}/)
1042
+ @dates[:date_start_full] = @dates[:date_start] + "-01"
1043
+ elsif @dates[:date_start].match(/\d{4}/)
1044
+ @dates[:date_start_full] = @dates[:date_start] + "-01-01"
1045
+ end
1046
+ end
1047
+ if @dates[:date_end] && !@dates[:date_end_full]
1048
+ if @dates[:date_end].match(/\d{4}\-\d{2}\-\d{2}/)
1049
+ @dates[:date_end_full] = @dates[:date_end]
1050
+ else
1051
+ year = @dates[:date_end].match(/^\d{4}/).to_s
1052
+ if @dates[:date_end].match(/\d{4}\-\d{2}/)
1053
+ month = @dates[:date_end].match(/\d{2}$/).to_s
1054
+ day = days_in_month(month,year).to_s
1055
+ @dates[:date_end_full] = @dates[:date_end] + "-#{day}"
1056
+ elsif @dates[:date_end].match(/\d{4}/)
1057
+ @dates[:date_end_full] = @dates[:date_end] + "-12-31"
1058
+ end
1059
+ end
1060
+ end
1061
+ end
1062
+
1063
+ def self.return_certainty(str)
1064
+
1065
+ # order of precedence, from least to most certain:
1066
+ # 1) questionable dates
1067
+ # 2) approximate dates
1068
+ # 3) inferred dates
1069
+
1070
+ if str.include?('?')
1071
+ return 'questionable'
1072
+ end
1073
+
1074
+ if str.downcase.include?('ca') || \
1075
+ str.downcase.include?('approx')
1076
+ return 'approximate'
1077
+ end
1078
+
1079
+ if str.include?('[') || str.include?(']')
1080
+ return 'inferred'
1081
+ end
1082
+
1083
+ return nil
1084
+ end
1085
+ end
@@ -0,0 +1,3 @@
1
+ module Timetwister
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'timetwister/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "timetwister"
8
+ spec.version = Timetwister::VERSION
9
+ spec.authors = ["Alex Duryee"]
10
+ spec.email = ["alexanderduryee@nypl.org"]
11
+ spec.summary = "Chronic wrapper to handle messy date data"
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: timetwister
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alex Duryee
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-12-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description:
42
+ email:
43
+ - alexanderduryee@nypl.org
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - lib/timetwister.rb
54
+ - lib/timetwister/parser.rb
55
+ - lib/timetwister/version.rb
56
+ - timetwister.gemspec
57
+ homepage: ''
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.4.5
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: Chronic wrapper to handle messy date data
81
+ test_files: []