timetwister 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/lib/timetwister.rb +20 -0
- data/lib/timetwister/parser.rb +1085 -0
- data/lib/timetwister/version.rb +3 -0
- data/timetwister.gemspec +22 -0
- metadata +81 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YWVhN2M0NjcyMTMwNGE2ZjQ0OTRkZjM5NTE5YzE1YjM4MmQxYzVmZQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NDkwNjYyOGZkYzViMDE2MWE4NzM1ZTQwZmE2OGI4MDg3YTk2M2Q5Ng==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NmFlOWEwMDk4YjRiM2JlMDZlOWVmYjVjOGI5YjJiZTRmNTQ1NzYxMmU3YmVk
|
10
|
+
YTRmMDVjZGY1NWVlOTFjMzIxYTQ3ZjdmYWQ5NzlkNWExZWI5Mjc2MjJjMDMx
|
11
|
+
M2M4YjY4ODhiYjUyMWM0ZDA4NGVhMmIwZmFhMGJjOTVhMTc1OTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YmRkOTYzNTc2YjdkZTkwNzFjZTFkNWQ4ZDU4MTFlNWYyMzg5NTMyZmZmNTNh
|
14
|
+
MjY1ZjMwZDI0Y2RlYTIwZTYxZTU5ZTJmZmU0ZDRlNDk2YmU3MzA1Yjg2OTE3
|
15
|
+
OTk4MmU0NTU4Mjc5NDc4YjBkNTgwMGRlNmFiOWIyYjQ5NDNjMzI=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Alex Duryee
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Timetwister
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'timetwister'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install timetwister
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it ( https://github.com/[my-github-username]/timetwister/fork )
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/timetwister.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "timetwister/version"
|
2
|
+
require "timetwister/parser"
|
3
|
+
|
4
|
+
module Timetwister
|
5
|
+
|
6
|
+
def self.parse(str, options={})
|
7
|
+
|
8
|
+
dates = { :original_string => str, :index_dates => [], :date_start => nil, :date_end => nil,
|
9
|
+
:date_start_full => nil, :date_end_full => nil, :inclusive_range => nil, :certainty => nil }
|
10
|
+
|
11
|
+
# defensive check - we don't want to try to parse certain malformed strings
|
12
|
+
# (otherwise dates get flipped and types get wacky)
|
13
|
+
if str.include?('??')
|
14
|
+
return dates
|
15
|
+
end
|
16
|
+
|
17
|
+
return Parser.string_to_dates(str, dates, options)
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,1085 @@
|
|
1
|
+
require 'chronic'
|
2
|
+
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
def self.string_to_dates(str, hash, options)
|
6
|
+
@string = str
|
7
|
+
@dates = hash
|
8
|
+
@options = options
|
9
|
+
|
10
|
+
@regex_tokens = regex_tokens
|
11
|
+
|
12
|
+
# perform this here, before the string gets purged of certainty indicators
|
13
|
+
@dates[:certainty] = return_certainty(@string)
|
14
|
+
|
15
|
+
@string = clean_string(@string)
|
16
|
+
self.match_replace
|
17
|
+
|
18
|
+
# if there are any future dates, return an empty hash
|
19
|
+
if @dates[:index_dates] != [] && @dates[:index_dates].last > Time.now.year
|
20
|
+
return { :original_string => @string, :index_dates => [], :keydate => nil, :keydate_z => nil, :date_start => nil, :date_end => nil,
|
21
|
+
:date_start_full => nil, :date_end_full => nil, :inclusive_range => nil, :certainty => nil }
|
22
|
+
end
|
23
|
+
|
24
|
+
if @dates[:date_start] && !@dates[:date_end] && !(@dates[:test_data] == 150 || @dates[:test_data] == 160)
|
25
|
+
@dates[:date_end] = @dates[:date_start]
|
26
|
+
end
|
27
|
+
|
28
|
+
stringify_values
|
29
|
+
add_full_dates
|
30
|
+
|
31
|
+
return @dates
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.match_replace
|
35
|
+
match_replace_clusters.each do |c|
|
36
|
+
match_patterns = (c[:match].kind_of? Array) ? c[:match] : [c[:match]]
|
37
|
+
match_patterns.each do |p|
|
38
|
+
match_test = @regex_tokens[:anchor_start] + p + @regex_tokens[:anchor_end]
|
39
|
+
if @string.match(match_test)
|
40
|
+
@dates[:test_data] = c[:id]
|
41
|
+
if c[:proc]
|
42
|
+
# clone string to avoid changing it via in-place methods used in Procs
|
43
|
+
work_string = @string.clone
|
44
|
+
c[:proc].call(work_string, c[:arg])
|
45
|
+
end
|
46
|
+
break
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
def self.match_replace_clusters
|
54
|
+
r = @regex_tokens
|
55
|
+
|
56
|
+
# extend regex_tokens for common complex formats
|
57
|
+
|
58
|
+
# July 4, 1776
|
59
|
+
r[:date_month_day_year] = "(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}"
|
60
|
+
# July 1776
|
61
|
+
r[:date_month_year] = "(#{r[:circa]})?#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
|
62
|
+
# 1776 July 4
|
63
|
+
r[:date_year_month_day] = "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:day_of_month]}"
|
64
|
+
# 1776 4 July
|
65
|
+
r[:date_year_day_month] = "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:day_of_month]}#{r[:named_month]}"
|
66
|
+
# 4 July 1776
|
67
|
+
r[:date_day_month_year] = "(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
|
68
|
+
# 1776 July
|
69
|
+
r[:date_year_month] = "(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}"
|
70
|
+
|
71
|
+
match_replace = []
|
72
|
+
|
73
|
+
# NOTE: :match values will be converted to regular expressions
|
74
|
+
# and anchored at the beginning and end of test string.
|
75
|
+
# Leading and trailing punctuation will be ignored
|
76
|
+
|
77
|
+
# options[:force_8601] == true will force '2001-02' to be treated as February 2001 rather than 2001-2002
|
78
|
+
# and will handle ISO8601 ranges, e.g. 2001-02/2001-12
|
79
|
+
if @options[:force_8601]
|
80
|
+
match_replace << {
|
81
|
+
:match => "#{r[:iso8601_full]}\\/#{r[:iso8601_full]}",
|
82
|
+
:proc => proc_8601_range,
|
83
|
+
:id => 10
|
84
|
+
}
|
85
|
+
match_replace << {
|
86
|
+
:match => "#{r[:iso8601_month]}",
|
87
|
+
:proc => proc_month_year_single,
|
88
|
+
:id => 20
|
89
|
+
}
|
90
|
+
end
|
91
|
+
|
92
|
+
# ISO 8601 (full dates only - see note on options[:force_8601] above)
|
93
|
+
match_replace << {
|
94
|
+
:match => "#{r[:iso8601]}",
|
95
|
+
:proc => proc_full_date_single,
|
96
|
+
:id => 30
|
97
|
+
}
|
98
|
+
|
99
|
+
# ISO 8601 ranges (full dates only - see note on options[:force_8601] above)
|
100
|
+
match_replace << {
|
101
|
+
:match => "#{r[:iso8601]}\\/#{r[:iso8601]}",
|
102
|
+
:proc => proc_8601_range,
|
103
|
+
:id => 40
|
104
|
+
}
|
105
|
+
|
106
|
+
# matches any number of 4-digit years separated by a single range or list delimiter
|
107
|
+
match_replace << {
|
108
|
+
:match => "((#{r[:year]})|(#{r[:year_range_short]}))(#{r[:range_or_list_delimiter]}((#{r[:year]})|(#{r[:year_range_short]})))+",
|
109
|
+
:proc => proc_year_range_list_combo,
|
110
|
+
:id => 60
|
111
|
+
}
|
112
|
+
|
113
|
+
# 1969, [1969], c1969
|
114
|
+
# anti-matches the range delimiter as to not override id 150/160
|
115
|
+
match_replace << {
|
116
|
+
:match => [
|
117
|
+
"(#{r[:circa]})?[^#{r[:range_delimiter]}]#{r[:year]}([\\,\\;\\s(and)]{1,3}#{r[:nd]})?",
|
118
|
+
"^#{r[:year]}$"],
|
119
|
+
:proc => proc_single_year,
|
120
|
+
:id => 70
|
121
|
+
}
|
122
|
+
|
123
|
+
# "July 4 1976 - Oct 1 1981"
|
124
|
+
# "4 July 1976 - 1 Oct 1981"
|
125
|
+
# "1976 July 4 - 1981 Oct 1"
|
126
|
+
# "1976 4 July - 1981 1 Oct"
|
127
|
+
match_replace << {
|
128
|
+
:match => [
|
129
|
+
"#{r[:date_month_day_year]}#{r[:range_delimiter]}#{r[:date_month_day_year]}",
|
130
|
+
"#{r[:date_day_month_year]}#{r[:range_delimiter]}#{r[:date_day_month_year]}",
|
131
|
+
"#{r[:date_year_month_day]}#{r[:range_delimiter]}#{r[:date_year_month_day]}",
|
132
|
+
"#{r[:date_year_day_month]}#{r[:range_delimiter]}#{r[:date_year_day_month]}",
|
133
|
+
],
|
134
|
+
:proc => proc_full_date_single_range,
|
135
|
+
:id => 80
|
136
|
+
}
|
137
|
+
|
138
|
+
# "1976 July - 1981 Oct"
|
139
|
+
# "July 1976 - Oct 1981"
|
140
|
+
match_replace << {
|
141
|
+
:match => [
|
142
|
+
"(#{r[:circa]})?#{r[:date_year_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:date_year_month]}",
|
143
|
+
"(#{r[:circa]})?#{r[:date_month_year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:date_month_year]}"
|
144
|
+
],
|
145
|
+
:proc => proc_full_date_single_range,
|
146
|
+
:arg => 'month',
|
147
|
+
:id => 100
|
148
|
+
}
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
# 1969-1977
|
153
|
+
match_replace << {
|
154
|
+
:match => "(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}",
|
155
|
+
:proc => proc_year_range,
|
156
|
+
:id => 120
|
157
|
+
}
|
158
|
+
|
159
|
+
# 1960-1980s
|
160
|
+
match_replace << {
|
161
|
+
:match => "(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}#{r[:decade_s]}",
|
162
|
+
:proc => proc_range_year_to_decade,
|
163
|
+
:id => 130
|
164
|
+
}
|
165
|
+
|
166
|
+
# 1960s-1981
|
167
|
+
match_replace << {
|
168
|
+
:match => "(#{r[:circa]})?\\s?#{r[:decade_s]}#{r[:range_delimiter]}(#{r[:circa]})?\\s?#{r[:year]}",
|
169
|
+
:proc => proc_year_range,
|
170
|
+
:id => 140
|
171
|
+
}
|
172
|
+
|
173
|
+
# 1969-72
|
174
|
+
match_replace << {
|
175
|
+
:match => "(#{r[:circa]})?#{r[:year_range_short]}",
|
176
|
+
:proc => proc_year_range_short,
|
177
|
+
:id => 145
|
178
|
+
}
|
179
|
+
|
180
|
+
# 1969- (e.g. after 1969)
|
181
|
+
match_replace << {
|
182
|
+
:match => "(#{r[:circa]})?\\s?#{r[:year]}#{r[:range_delimiter]}",
|
183
|
+
:proc => proc_single_year,
|
184
|
+
:arg => 'start',
|
185
|
+
:id => 150
|
186
|
+
}
|
187
|
+
|
188
|
+
# -1969 (e.g. before 1969) - treat as single
|
189
|
+
match_replace << {
|
190
|
+
:match => "#{r[:range_delimiter]}(#{r[:circa]})?\\s?#{r[:year]}",
|
191
|
+
:proc => proc_single_year,
|
192
|
+
:arg => 'end',
|
193
|
+
:id => 160
|
194
|
+
}
|
195
|
+
|
196
|
+
# nd, n.d., undated, Undated...
|
197
|
+
# note that :id never manifests anywhere (no hash to put it into)
|
198
|
+
# so the :test_data for undated is nil
|
199
|
+
match_replace << {
|
200
|
+
:match => "#{r[:nd]}",
|
201
|
+
:proc => nil,
|
202
|
+
:id => 170
|
203
|
+
}
|
204
|
+
|
205
|
+
# 1970's, 1970s
|
206
|
+
match_replace << {
|
207
|
+
:match => "(#{r[:circa]})?#{r[:decade_s]}",
|
208
|
+
:proc => proc_decade_s,
|
209
|
+
:id => 180
|
210
|
+
}
|
211
|
+
|
212
|
+
# 1970s - 1980s, etc.
|
213
|
+
match_replace << {
|
214
|
+
:match => "(#{r[:circa]})?\\s?#{r[:decade_s]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:decade_s]}",
|
215
|
+
:proc => proc_decade_s_range,
|
216
|
+
:id => 190
|
217
|
+
}
|
218
|
+
|
219
|
+
# July 4 1976
|
220
|
+
# 4 July 1976
|
221
|
+
# 1976 July 4
|
222
|
+
# 1976 4 July
|
223
|
+
# (with or without optional commas)
|
224
|
+
match_replace << {
|
225
|
+
:match => [
|
226
|
+
"#{r[:date_month_day_year]}",
|
227
|
+
"#{r[:date_day_month_year]}",
|
228
|
+
"#{r[:date_year_month_day]}",
|
229
|
+
"#{r[:date_year_day_month]}"
|
230
|
+
],
|
231
|
+
:proc => proc_full_date_single,
|
232
|
+
:id => 200
|
233
|
+
}
|
234
|
+
|
235
|
+
|
236
|
+
# December 1941
|
237
|
+
# 1941 December
|
238
|
+
# (with or without optional commas)
|
239
|
+
match_replace << {
|
240
|
+
:match => [
|
241
|
+
"(#{r[:circa]})?#{r[:date_month_year]}",
|
242
|
+
"(#{r[:circa]})?#{r[:date_year_month]}"
|
243
|
+
],
|
244
|
+
:proc => proc_month_year_single,
|
245
|
+
:id => 220
|
246
|
+
}
|
247
|
+
|
248
|
+
|
249
|
+
# Jun-July 1969
|
250
|
+
# 1969 Jun-July
|
251
|
+
match_replace << {
|
252
|
+
:match => [
|
253
|
+
"(#{r[:circa]})?#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}",
|
254
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}",
|
255
|
+
],
|
256
|
+
:proc => proc_single_year_month_range,
|
257
|
+
:id => 230
|
258
|
+
}
|
259
|
+
|
260
|
+
|
261
|
+
# Feb. 1-20, 1980
|
262
|
+
# 1980 Feb. 1-20
|
263
|
+
# 1980 1-20 Feb.
|
264
|
+
match_replace << {
|
265
|
+
:match => [
|
266
|
+
"(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}",
|
267
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}",
|
268
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}#{r[:named_month]}",
|
269
|
+
"(#{r[:circa]})?#{r[:day_of_month]}#{r[:range_delimiter]}#{r[:day_of_month]}#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
|
270
|
+
],
|
271
|
+
:proc => proc_single_month_date_range,
|
272
|
+
:id => 240
|
273
|
+
}
|
274
|
+
|
275
|
+
|
276
|
+
# Early 1960's, mid-1980s, late 1950's, etc.
|
277
|
+
match_replace << {
|
278
|
+
:match => "(#{r[:circa]})?#{r[:decade_qualifier]}\\s?#{r[:decade_s]}",
|
279
|
+
:proc => proc_decade_s_qualified,
|
280
|
+
:id => 250
|
281
|
+
}
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
|
286
|
+
# 19--, 18--, 18--?, etc.
|
287
|
+
match_replace << {
|
288
|
+
:match => "(#{r[:circa]})?[1-2][0-9]\-{2}",
|
289
|
+
:proc => proc_century_with_placeholders,
|
290
|
+
:id => 290
|
291
|
+
}
|
292
|
+
|
293
|
+
# Jan 2-Dec 31 1865
|
294
|
+
# 1865 Jan 2-Dec 31
|
295
|
+
match_replace << {
|
296
|
+
:match => [
|
297
|
+
"(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}",
|
298
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:optional_comma]}#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}",
|
299
|
+
"(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:optional_comma]}#{r[:year]}"
|
300
|
+
],
|
301
|
+
:proc => proc_year_with_dates,
|
302
|
+
:id => 310
|
303
|
+
}
|
304
|
+
|
305
|
+
# 1863 Aug 7-1866 Dec
|
306
|
+
match_replace << {
|
307
|
+
:match => [
|
308
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}#{r[:named_month]}",
|
309
|
+
"(#{r[:circa]})?#{r[:day_of_month]}#{r[:named_month]}#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:year]}",
|
310
|
+
"(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:year]}",
|
311
|
+
"(#{r[:circa]})?#{r[:named_month]}#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:day_of_month]}#{r[:optional_comma]}#{r[:year]}",
|
312
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:day_of_month]}"
|
313
|
+
],
|
314
|
+
:proc => proc_full_with_year_month,
|
315
|
+
:id => 330
|
316
|
+
}
|
317
|
+
|
318
|
+
# 1942 November-1943
|
319
|
+
# 1943-1944 November
|
320
|
+
# November 1942-1943
|
321
|
+
# 1942-November 1943
|
322
|
+
match_replace << {
|
323
|
+
:match => [
|
324
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:named_month]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}",
|
325
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}#{r[:named_month]}",
|
326
|
+
"(#{r[:circa]})?#{r[:named_month]}#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:year]}",
|
327
|
+
"(#{r[:circa]})?#{r[:year]}#{r[:range_delimiter]}(#{r[:circa]})?#{r[:named_month]}#{r[:year]}"
|
328
|
+
],
|
329
|
+
:proc => proc_year_range_single_date,
|
330
|
+
:id => 340
|
331
|
+
}
|
332
|
+
|
333
|
+
# 01/31/1999
|
334
|
+
match_replace << {
|
335
|
+
:match => "(#{r[:circa]})?[0-1]?[0-9]/[0-3]?[0-9]/#{r[:year]}",
|
336
|
+
:proc => proc_date_with_slashes,
|
337
|
+
:id => 350
|
338
|
+
}
|
339
|
+
|
340
|
+
match_replace
|
341
|
+
end
|
342
|
+
|
343
|
+
|
344
|
+
def self.proc_single_year
|
345
|
+
proc = Proc.new do |string, open_range|
|
346
|
+
year = string.gsub(/[^0-9]*/,'')
|
347
|
+
@dates[:index_dates] << year.to_i
|
348
|
+
case open_range
|
349
|
+
when 'start'
|
350
|
+
@dates[:date_start] = year
|
351
|
+
when 'end'
|
352
|
+
@dates[:date_end] = year
|
353
|
+
else
|
354
|
+
@dates[:date_start] = year
|
355
|
+
@dates[:date_end] = year
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
|
361
|
+
def self.proc_year_range
|
362
|
+
proc = Proc.new do |string|
|
363
|
+
# Only supports years from 1000
|
364
|
+
range = year_range(string)
|
365
|
+
if range.length > 0
|
366
|
+
range_start, range_end = range
|
367
|
+
if range_end > range_start
|
368
|
+
|
369
|
+
(range_start..range_end).to_a.each { |d| @dates[:index_dates] << d }
|
370
|
+
|
371
|
+
@dates[:inclusive_range] = true
|
372
|
+
process_year_range()
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
def self.proc_range_year_to_decade
|
382
|
+
proc = Proc.new do |string|
|
383
|
+
range = year_range(string)
|
384
|
+
range_start, range_end_decade = range
|
385
|
+
|
386
|
+
if range_start && range_end_decade
|
387
|
+
if range_end_decade > range_start
|
388
|
+
range_end = range_end_decade + 9
|
389
|
+
(range_start..range_end).to_a.each { |d| @dates[:index_dates] << d }
|
390
|
+
@dates[:inclusive_range] = true
|
391
|
+
process_year_range()
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
|
398
|
+
def self.proc_year_range_short
|
399
|
+
proc = Proc.new do |string|
|
400
|
+
range = string.split('-')
|
401
|
+
range.each { |d| d.gsub!(/[^0-9]*/,'') }
|
402
|
+
decade_string = range[0].match(/^[0-9]{2}/).to_s
|
403
|
+
range[1] = decade_string + range[1]
|
404
|
+
range_start = range[0].to_i
|
405
|
+
range_end = range[1].to_i
|
406
|
+
|
407
|
+
if range_end > range_start
|
408
|
+
(range_start..range_end).to_a.each { |d| @dates[:index_dates] << d }
|
409
|
+
@dates[:inclusive_range] = true
|
410
|
+
process_year_range()
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
def self.proc_year_range_list_combo
|
416
|
+
proc = Proc.new do |string|
|
417
|
+
ranges = []
|
418
|
+
list = []
|
419
|
+
index_dates = []
|
420
|
+
years = string.scan(/[0-2][0-9]{3}/)
|
421
|
+
delimiters = string.scan(/\s?[\-\;\,]\s?/)
|
422
|
+
delimiters.each { |d| d.strip! }
|
423
|
+
i = 0
|
424
|
+
while i < years.length
|
425
|
+
y1 = years[i]
|
426
|
+
d = delimiters[i]
|
427
|
+
if d == '-'
|
428
|
+
y2 = years[i + 1]
|
429
|
+
ranges << [y1,y2]
|
430
|
+
i += 2
|
431
|
+
else
|
432
|
+
list << y1
|
433
|
+
i += 1
|
434
|
+
end
|
435
|
+
end
|
436
|
+
ranges.each do |r|
|
437
|
+
range_start = r[0].to_i
|
438
|
+
range_end = r[1].to_i
|
439
|
+
(range_start..range_end).to_a.each { |d| index_dates << d }
|
440
|
+
end
|
441
|
+
list.each { |y| index_dates << y.to_i }
|
442
|
+
index_dates.sort!
|
443
|
+
@dates[:index_dates] = index_dates
|
444
|
+
@dates[:inclusive_range] = false
|
445
|
+
process_year_range()
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
|
450
|
+
def self.proc_decade_s
|
451
|
+
proc = Proc.new do |string|
|
452
|
+
decade = string.match(/[0-9]{3}0/).to_s
|
453
|
+
decade_start = decade.to_i
|
454
|
+
decade_end = (decade_start + 9)
|
455
|
+
@dates[:index_dates] = (decade_start..decade_end).to_a
|
456
|
+
@dates[:inclusive_range] = true
|
457
|
+
process_year_range()
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
|
462
|
+
def self.proc_century_with_placeholders
|
463
|
+
proc = Proc.new do |string|
|
464
|
+
century = string.match(/[0-9]{2}/).to_s
|
465
|
+
century += '00'
|
466
|
+
century_start = century.to_i
|
467
|
+
century_end = (century_start + 99)
|
468
|
+
@dates[:index_dates] = (century_start..century_end).to_a
|
469
|
+
@dates[:inclusive_range] = true
|
470
|
+
process_year_range()
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
|
475
|
+
def self.proc_decade_s_qualified
|
476
|
+
proc = Proc.new do |string|
|
477
|
+
decade = string.match(/[0-9]{3}0/).to_s
|
478
|
+
decade_start = decade.to_i
|
479
|
+
if string.match(/[Ee]arly/)
|
480
|
+
range_start = decade_start
|
481
|
+
range_end = decade_start + 5
|
482
|
+
elsif string.match(/[Mm]id(dle)?/)
|
483
|
+
range_start = decade_start + 3
|
484
|
+
range_end = range_start + 5
|
485
|
+
elsif string.match(/[Ll]ate/)
|
486
|
+
range_start = decade_start + 5
|
487
|
+
range_end = decade_start + 9
|
488
|
+
end
|
489
|
+
@dates[:index_dates] = (range_start..range_end).to_a
|
490
|
+
@dates[:inclusive_range] = true
|
491
|
+
process_year_range()
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
|
496
|
+
def self.proc_decade_s_range
|
497
|
+
proc = Proc.new do |string|
|
498
|
+
decades = string.scan(/[0-9]{3}0/)
|
499
|
+
if decades.length == 2
|
500
|
+
range_start = decades[0].to_i
|
501
|
+
range_end = decades[1].to_i + 9
|
502
|
+
@dates[:index_dates] = (range_start..range_end).to_a
|
503
|
+
@dates[:inclusive_range] = true
|
504
|
+
process_year_range()
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
|
510
|
+
def self.proc_full_date_single
|
511
|
+
proc = Proc.new do |string|
|
512
|
+
datetime = full_date_single_to_datetime(string)
|
513
|
+
if datetime
|
514
|
+
full_date_single_keydates(string,datetime,'%Y-%m-%d')
|
515
|
+
@dates[:index_dates] << datetime.strftime('%Y').to_i
|
516
|
+
end
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
520
|
+
|
521
|
+
def self.proc_month_year_single
|
522
|
+
proc = Proc.new do |string|
|
523
|
+
string.gsub!(/\?/,'')
|
524
|
+
|
525
|
+
# Chronic can't parse year-month strings properly
|
526
|
+
# so we need to change them to month-year before
|
527
|
+
# parsing them.
|
528
|
+
|
529
|
+
if string.match(/^[0-9]/)
|
530
|
+
tmpyear = string.split(' ')[0]
|
531
|
+
string.gsub!(/^.+? /,'')
|
532
|
+
string << " "
|
533
|
+
string << tmpyear
|
534
|
+
end
|
535
|
+
|
536
|
+
datetime = Chronic.parse(string)
|
537
|
+
if datetime
|
538
|
+
full_date_single_keydates(string,datetime, '%Y-%m')
|
539
|
+
@dates[:index_dates] << datetime.strftime('%Y').to_i
|
540
|
+
end
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
|
545
|
+
|
546
|
+
|
547
|
+
|
548
|
+
# "1976 July 4 - 1981 October 1", etc.
|
549
|
+
# call with second argument 'month' if no day value is present
|
550
|
+
def self.proc_full_date_single_range
|
551
|
+
proc = Proc.new do |string, specificity|
|
552
|
+
dates = []
|
553
|
+
full_date_format = (specificity == 'month') ? '%Y-%m' : '%Y-%m-%d'
|
554
|
+
if string.match(/\-/)
|
555
|
+
dates = string.split('-')
|
556
|
+
elsif string.match(/\sto\s/)
|
557
|
+
dates = string.split(' to ')
|
558
|
+
end
|
559
|
+
|
560
|
+
dates.each { |d| d.strip! }
|
561
|
+
|
562
|
+
if dates.length == 2
|
563
|
+
datetime_start = full_date_single_to_datetime(dates[0])
|
564
|
+
datetime_end = full_date_single_to_datetime(dates[1])
|
565
|
+
|
566
|
+
# if month-specific, modify datetimes to include all days of each month
|
567
|
+
if specificity == 'month'
|
568
|
+
month_date_start = datetime_start.strftime('%Y-%m')
|
569
|
+
datetime_start = Chronic.parse(month_date_start + '-01')
|
570
|
+
month_date_end = datetime_end.strftime('%Y-%m')
|
571
|
+
month_date_end_parts = month_date_end.split('-')
|
572
|
+
|
573
|
+
month_date_end_last = days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
|
574
|
+
month_date_full = month_date_end + "-#{month_date_end_last}"
|
575
|
+
|
576
|
+
datetime_end = Chronic.parse(month_date_full)
|
577
|
+
end
|
578
|
+
|
579
|
+
if datetime_start && datetime_end
|
580
|
+
process_date_range(datetime_start,datetime_end,specificity)
|
581
|
+
end
|
582
|
+
@dates[:inclusive_range] = true
|
583
|
+
end
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
|
588
|
+
# Feb. 1-20, 1980
|
589
|
+
# 1980 Feb. 1-20
|
590
|
+
# 1980 1-20 Feb.
|
591
|
+
def self.proc_single_month_date_range
|
592
|
+
proc = Proc.new do |string|
|
593
|
+
year = extract_year(string)
|
594
|
+
day_range = string.match(/\d{1,2}\-\d{1,2}/).to_s
|
595
|
+
string.gsub!(Regexp.new(day_range),'')
|
596
|
+
month = string.strip
|
597
|
+
days = day_range.split('-')
|
598
|
+
dates = []
|
599
|
+
if days.length == 2
|
600
|
+
days.each do |d|
|
601
|
+
d.strip!
|
602
|
+
dates << "#{month} #{d} #{year}"
|
603
|
+
end
|
604
|
+
datetime_start = full_date_single_to_datetime(dates[0])
|
605
|
+
datetime_end = full_date_single_to_datetime(dates[1])
|
606
|
+
if datetime_start && datetime_end
|
607
|
+
process_date_range(datetime_start,datetime_end)
|
608
|
+
end
|
609
|
+
end
|
610
|
+
@dates[:inclusive_range] = true
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
614
|
+
|
615
|
+
def self.proc_8601_range
|
616
|
+
proc = Proc.new do |string|
|
617
|
+
dates = string.split('/')
|
618
|
+
dates.each { |d| d.strip! }
|
619
|
+
|
620
|
+
datetime_start = iso8601_datetime(dates[0])
|
621
|
+
datetime_end = iso8601_datetime(dates[1])
|
622
|
+
|
623
|
+
if datetime_start && datetime_end
|
624
|
+
year_start = datetime_start.strftime('%Y').to_i
|
625
|
+
year_end = datetime_end.strftime('%Y').to_i
|
626
|
+
|
627
|
+
if datetime_comparitor(datetime_end) < datetime_comparitor(datetime_start)
|
628
|
+
# this range is reversed in error
|
629
|
+
years = [year_end,year_start]
|
630
|
+
year_start, year_end = years[0], years[1]
|
631
|
+
datetimes = [datetime_end,datetime_start]
|
632
|
+
datetime_start, datetime_end = datetimes[0], datetimes[1]
|
633
|
+
end
|
634
|
+
|
635
|
+
@dates[:index_dates] += (year_start..year_end).to_a
|
636
|
+
@dates[:date_start] = datetime_start.strftime(is8601_string_format dates[0])
|
637
|
+
@dates[:date_end] = datetime_end.strftime(is8601_string_format dates[1])
|
638
|
+
@dates[:inclusive_range] = true
|
639
|
+
|
640
|
+
end
|
641
|
+
end
|
642
|
+
end
|
643
|
+
|
644
|
+
|
645
|
+
# "1981 Oct-Dec", "Oct-Dec 1981", etc.
|
646
|
+
def self.proc_single_year_month_range
|
647
|
+
proc = Proc.new do |string|
|
648
|
+
year = string.match(/[0-9]{4}/).to_s
|
649
|
+
string.gsub!(year,'')
|
650
|
+
string.strip!
|
651
|
+
first_month = string.match(@regex_tokens[:named_month]).to_s
|
652
|
+
last_month = string.match(@regex_tokens[:named_month] + '$').to_s
|
653
|
+
|
654
|
+
# chronic is fiddly about short months with periods
|
655
|
+
# (e.g. "may.") so we remove them
|
656
|
+
date_string_first = first_month.delete('.') + ' 1,' + year
|
657
|
+
datetime_first = Chronic.parse(date_string_first)
|
658
|
+
if !last_month.empty?
|
659
|
+
@dates[:date_start] = datetime_first.strftime('%Y-%m')
|
660
|
+
date_string_last = last_month + ' ' + year
|
661
|
+
datetime_last = Chronic.parse(date_string_last)
|
662
|
+
@dates[:date_end] = datetime_last.strftime('%Y-%m')
|
663
|
+
end
|
664
|
+
@dates[:inclusive_range] = true
|
665
|
+
@dates[:index_dates] << year.to_i
|
666
|
+
end
|
667
|
+
end
|
668
|
+
|
669
|
+
|
670
|
+
# 1942 November-1943
|
671
|
+
# 1943-1944 November
|
672
|
+
# November 1942-1943
|
673
|
+
# 1942-November 1943
|
674
|
+
def self.proc_year_range_single_date
|
675
|
+
proc = Proc.new do |string|
|
676
|
+
dates = []
|
677
|
+
if string.match(/\-/)
|
678
|
+
dates = string.split('-')
|
679
|
+
elsif string.match(/\sto\s/)
|
680
|
+
dates = string.split(' to ')
|
681
|
+
end
|
682
|
+
|
683
|
+
dates.each { |d| d.strip! }
|
684
|
+
|
685
|
+
if dates.length == 2
|
686
|
+
if dates[0].match(/[A-Za-z]/)
|
687
|
+
datetime_start = full_date_single_to_datetime(dates[0] + "-01")
|
688
|
+
datetime_end = full_date_single_to_datetime(dates[1] + "-12-31")
|
689
|
+
else
|
690
|
+
datetime_start = full_date_single_to_datetime(dates[0] + "-01-01")
|
691
|
+
datetime_end_tmp = full_date_single_to_datetime(dates[1] + "-28")
|
692
|
+
datetime_end = full_date_single_to_datetime(dates[1] + "-" + days_in_month(datetime_end_tmp.month, datetime_end_tmp.year).to_s)
|
693
|
+
end
|
694
|
+
|
695
|
+
if datetime_start && datetime_end
|
696
|
+
process_date_range(datetime_start,datetime_end,"month")
|
697
|
+
end
|
698
|
+
@dates[:inclusive_range] = true
|
699
|
+
|
700
|
+
end
|
701
|
+
end
|
702
|
+
end
|
703
|
+
|
704
|
+
# Jan 2-Dec 31 1865
|
705
|
+
# 1865 Jan 2-Dec 31
|
706
|
+
def self.proc_year_with_dates
|
707
|
+
proc = Proc.new do |string|
|
708
|
+
# extract year for later
|
709
|
+
year = string.match(/[0-9]{4}/).to_s
|
710
|
+
|
711
|
+
# instead of dealing with punctuation, we'll scorch the earth
|
712
|
+
string.gsub!(/[\,\?]/,'')
|
713
|
+
|
714
|
+
# split the string into two different dates
|
715
|
+
if string.match(/\-/)
|
716
|
+
dates = string.split('-')
|
717
|
+
elsif string.match(/\sto\s/)
|
718
|
+
dates = string.split(' to ')
|
719
|
+
end
|
720
|
+
|
721
|
+
# if everything's as expected, append the year to the shorter date
|
722
|
+
if dates.length == 2
|
723
|
+
dates.each { |d|
|
724
|
+
if d.match(year).nil?
|
725
|
+
d << " "
|
726
|
+
d << year
|
727
|
+
end
|
728
|
+
|
729
|
+
# Chronic seemed to choke with YYYY-MM-DD dates
|
730
|
+
# so we'll flip it to MM-DD-YYYY
|
731
|
+
if d.match("^" + year)
|
732
|
+
d.gsub!(year + " ","")
|
733
|
+
d << " "
|
734
|
+
d << year
|
735
|
+
end
|
736
|
+
}
|
737
|
+
|
738
|
+
# change our strings to datetime objects
|
739
|
+
# and send them to be processed elsewhere
|
740
|
+
datetime_start = Chronic.parse(dates[0])
|
741
|
+
datetime_end = Chronic.parse(dates[1])
|
742
|
+
process_date_range(datetime_start, datetime_end)
|
743
|
+
@dates[:inclusive_range] = true
|
744
|
+
end
|
745
|
+
end
|
746
|
+
end
|
747
|
+
|
748
|
+
# 1863 Aug 7-1866 Dec
|
749
|
+
def self.proc_full_with_year_month
|
750
|
+
proc = Proc.new do |string|
|
751
|
+
dates = []
|
752
|
+
if string.match(/\-/)
|
753
|
+
dates = string.split('-')
|
754
|
+
elsif string.match(/\sto\s/)
|
755
|
+
dates = string.split(' to ')
|
756
|
+
end
|
757
|
+
|
758
|
+
dates.each { |d| d.strip! }
|
759
|
+
|
760
|
+
if dates.length == 2
|
761
|
+
|
762
|
+
datetime_end = full_date_single_to_datetime(dates[1])
|
763
|
+
|
764
|
+
if !dates[0].match(/[0-9]\D+[0-9]/).nil?
|
765
|
+
datetime_start = full_date_single_to_datetime(dates[0])
|
766
|
+
month_date_start = datetime_start.strftime('%Y-%m-%d')
|
767
|
+
month_date_end = datetime_end.strftime('%Y-%m')
|
768
|
+
month_date_end_parts = month_date_end.split('-')
|
769
|
+
|
770
|
+
month_date_end_last = days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
|
771
|
+
month_date_full = month_date_end + "-#{month_date_end_last}"
|
772
|
+
|
773
|
+
datetime_end = Chronic.parse(month_date_full)
|
774
|
+
else
|
775
|
+
datetime_start = full_date_single_to_datetime(dates[0] + "-01")
|
776
|
+
if datetime_start && datetime_end
|
777
|
+
month_date_start = datetime_start.strftime('%Y-%m')
|
778
|
+
month_date_end = datetime_end.strftime('%Y-%m-%d')
|
779
|
+
end
|
780
|
+
end
|
781
|
+
|
782
|
+
if datetime_start && datetime_end
|
783
|
+
process_date_range(datetime_start,datetime_end)
|
784
|
+
end
|
785
|
+
@dates[:inclusive_range] = true
|
786
|
+
end
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
# we assume that all matching dates are mm/dd/yyyy
|
791
|
+
# if they're dd/mm/yyyy, this may get jumbled, but that's rare enough to be okay
|
792
|
+
def self.proc_date_with_slashes
|
793
|
+
proc = Proc.new do |string|
|
794
|
+
dates = string.split('/')
|
795
|
+
dates.collect! do |d|
|
796
|
+
d.strip!
|
797
|
+
if d.length == 1
|
798
|
+
# convert to ISO style numbers
|
799
|
+
d = "0" + d.to_s
|
800
|
+
else
|
801
|
+
# i am not proud of this
|
802
|
+
d = d
|
803
|
+
end
|
804
|
+
end
|
805
|
+
proc_full_date_single.call(dates[2].to_s + "-" + dates[0].to_s + "-" + dates[1].to_s)
|
806
|
+
end
|
807
|
+
end
|
808
|
+
|
809
|
+
def self.regex_tokens
|
810
|
+
return {
|
811
|
+
# 1969, [1969], c1969
|
812
|
+
:year => '[\[\sc\(]{0,3}[0-2][0-9]{3}[\]\s\.\,;\?\)]{0,3}',
|
813
|
+
# - or 'to'
|
814
|
+
:range_delimiter => '\s*((\-)|(to))\s*',
|
815
|
+
# , or ;
|
816
|
+
:list_delimiter => '\s*[\,\;]\s*',
|
817
|
+
# , or ;
|
818
|
+
:range_or_list_delimiter => '\s*([\,\;]|((\-)|(to)))\s*',
|
819
|
+
# n.d., undated, etc.
|
820
|
+
:nd => '[\[\s]{0,2}\b([Uu]+ndated\.?)|([nN]o?\.?\s*[dD](ate)?\.?)\b[\s\]\.]{0,3}',
|
821
|
+
# 1960s, 1960's
|
822
|
+
:decade_s => '[\[\s]{0,2}[0-9]{3}0\'?s[\]\s]{0,2}',
|
823
|
+
|
824
|
+
# 1970-75
|
825
|
+
:year_range_short => '\s*[0-9]{4}\s?\-\s*(([2-9][0-9])|(1[3-9]))\s*',
|
826
|
+
|
827
|
+
# 196-
|
828
|
+
:decade_aacr => '[0-9]{3}\-',
|
829
|
+
# named months, including abbreviations (case insensitive)
|
830
|
+
:named_month => '\s*(?i)\b((jan(uary)?)|(feb(ruary)?)|(mar(ch)?)|(apr(il)?)|(may)|(jun(e)?)|(jul(y)?)|(aug(ust)?)|(sep(t|tember)?)|(oct(ober)?)|(nov(ember)?)|(dec(ember)?))\b\.?\s*',
|
831
|
+
# circa, ca. - also matches 'c.', which is actually 'copyright', but is still not something we need to deal with
|
832
|
+
:circa => '\s*[Cc](irc)?a?\.?\s*',
|
833
|
+
# early, late, mid-
|
834
|
+
:decade_qualifier => '([Ee]arly)|([Mm]id)|([Ll]ate)\-?',
|
835
|
+
# 06-16-1972, 6-16-1972
|
836
|
+
:numeric_date_us => '(0?1)|(0?2)|(0?3)|(0?4)|(0?5)|(0?6)|(0?7)|(0?8)|(0?9)|1[0-2][\-\/](([0-2]?[0-9])|3[01])[\-\/])?[12][0-9]{3}',
|
837
|
+
# 1972-06-16
|
838
|
+
:iso8601 => '[0-9]{4}\-[0-9]{2}\-[0-9]{2}',
|
839
|
+
:iso8601_full => '[0-9]{4}((\-[0-9]{2})(\-[0-9]{2})?)?',
|
840
|
+
:iso8601_month => '[0-9]{4}\-[0-9]{2}',
|
841
|
+
:anchor_start => '^[^\w\d]*',
|
842
|
+
:anchor_end => '[^\w\d]*$',
|
843
|
+
:optional_comma => '[\s\,]*',
|
844
|
+
:day_of_month => '\s*(([0-2]?[0-9])|(3[0-1]))\s*'
|
845
|
+
}
|
846
|
+
end
|
847
|
+
|
848
|
+
|
849
|
+
def self.full_date_single_to_datetime(string)
|
850
|
+
new_string = string.clone
|
851
|
+
if new_string.match(/\d{4}\-\d{2}\-\d{2}/)
|
852
|
+
parse_string = new_string
|
853
|
+
else
|
854
|
+
year = new_string.match(/[0-9]{4}/).to_s
|
855
|
+
new_string.gsub!(Regexp.new(year), '')
|
856
|
+
if new_string.match(/[0-9]{1,2}/)
|
857
|
+
day = new_string.match(/[0-9]{1,2}/).to_s
|
858
|
+
new_string.gsub!(Regexp.new(day), '')
|
859
|
+
else
|
860
|
+
day = nil
|
861
|
+
end
|
862
|
+
|
863
|
+
new_string.gsub!(/[\.\,\s]+/,'')
|
864
|
+
|
865
|
+
month = new_string.clone
|
866
|
+
parse_string = month
|
867
|
+
parse_string += day ? " #{day}, #{year}" : " #{year}"
|
868
|
+
end
|
869
|
+
datetime = Chronic.parse(parse_string)
|
870
|
+
end
|
871
|
+
|
872
|
+
|
873
|
+
def self.process_date_range(datetime_start,datetime_end,specificity=nil)
|
874
|
+
|
875
|
+
if !datetime_start || !datetime_end
|
876
|
+
return
|
877
|
+
end
|
878
|
+
|
879
|
+
date_format = (specificity == 'month') ? '%Y-%m' : '%Y-%m-%d'
|
880
|
+
|
881
|
+
year_start = datetime_start.strftime('%Y').to_i
|
882
|
+
year_end = datetime_end.strftime('%Y').to_i
|
883
|
+
|
884
|
+
if datetime_comparitor(datetime_end) > datetime_comparitor(datetime_start)
|
885
|
+
|
886
|
+
@dates[:index_dates] += (year_start..year_end).to_a
|
887
|
+
|
888
|
+
@dates[:date_start] = datetime_start.strftime(date_format)
|
889
|
+
@dates[:date_end] = datetime_end.strftime(date_format)
|
890
|
+
|
891
|
+
@dates[:date_start_full] = datetime_start.strftime('%Y-%m-%d')
|
892
|
+
@dates[:date_end_full] = datetime_end.strftime('%Y-%m-%d')
|
893
|
+
end
|
894
|
+
end
|
895
|
+
|
896
|
+
|
897
|
+
def self.full_date_single_keydates(string,datetime,format)
|
898
|
+
@dates[:date_start] = datetime.strftime(format)
|
899
|
+
end
|
900
|
+
|
901
|
+
|
902
|
+
def self.process_year_range
|
903
|
+
@dates[:index_dates].sort!
|
904
|
+
@dates[:index_dates].uniq!
|
905
|
+
@dates[:date_start] = @dates[:index_dates].first
|
906
|
+
@dates[:date_end] = @dates[:index_dates].last
|
907
|
+
end
|
908
|
+
|
909
|
+
|
910
|
+
def self.is8601_string_format(iso_8601_date)
|
911
|
+
if iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/)
|
912
|
+
return '%Y-%m-%d'
|
913
|
+
elsif iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}$/)
|
914
|
+
return '%Y-%m'
|
915
|
+
else
|
916
|
+
return '%Y'
|
917
|
+
end
|
918
|
+
end
|
919
|
+
|
920
|
+
|
921
|
+
def self.iso8601_datetime(iso_8601_date)
|
922
|
+
if iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/)
|
923
|
+
Chronic.parse(iso_8601_date)
|
924
|
+
elsif iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}$/)
|
925
|
+
Chronic.parse(iso_8601_date + '-01')
|
926
|
+
else
|
927
|
+
Chronic.parse(iso_8601_date + '-01-01')
|
928
|
+
end
|
929
|
+
end
|
930
|
+
|
931
|
+
|
932
|
+
# Removes the first 4-digit number found in the string and returns it
|
933
|
+
def self.extract_year(string)
|
934
|
+
year = string.match(/\d{4}/).to_s
|
935
|
+
string.gsub!(Regexp.new(year),'')
|
936
|
+
year
|
937
|
+
end
|
938
|
+
|
939
|
+
|
940
|
+
# removes sub-strings that do not contain parsable data
|
941
|
+
def self.clean_string(string)
|
942
|
+
r = @regex_tokens
|
943
|
+
# remove n.y. and variants from beginning of string
|
944
|
+
substrings = [
|
945
|
+
/\[n\.?y\.?\]/,
|
946
|
+
/[\[\]\(\)]/,
|
947
|
+
/[\.\,\)\;\:]*$/,
|
948
|
+
/\?/,
|
949
|
+
/approx\.?(imately)?/i,
|
950
|
+
/\s#{regex_tokens[:circa]}\s/,
|
951
|
+
/^#{regex_tokens[:circa]}\s/,
|
952
|
+
Regexp.new("([\,\;\s(and)]{0,4}#{regex_tokens[:nd]})?$")
|
953
|
+
]
|
954
|
+
|
955
|
+
# transform seasons to months
|
956
|
+
string.gsub!(/[Ww]inter/, " January 1 - March 20 ")
|
957
|
+
string.gsub!(/[Ss]pring/, " March 20 - June 21 ")
|
958
|
+
string.gsub!(/[Ss]ummer/, " June 21 - September 23 ")
|
959
|
+
string.gsub!(/[Aa]utumn/, " September 23 - December 22 ")
|
960
|
+
string.gsub!(/[Ff]all/, " September 23 - December 22 ")
|
961
|
+
|
962
|
+
# remove days of the week
|
963
|
+
dow = [/[Ss]unday,?\s+/, /[Mm]onday,?\s+/, /[Tt]uesday,?\s+/, /[Ww]ednesday,?\s+/, /[Tt]hursday,?\s+/, /[Ff]riday,?\s+/, /[Ss]aturday,?\s+/]
|
964
|
+
dow.each {|d| string.gsub!(d, '')}
|
965
|
+
|
966
|
+
# remove times of day
|
967
|
+
tod = [/[Mm]orning,?\s+/, /[Aa]fternoon,?\s+/, /[Ee]vening,?\s+/, /[Nn]ight,?\s+/]
|
968
|
+
tod.each {|t| string.gsub!(t, '')}
|
969
|
+
|
970
|
+
# remove single question marks
|
971
|
+
string.gsub!(/([0-9])\?([^\?])/,'\1\2')
|
972
|
+
|
973
|
+
substrings.each { |s| string.gsub!(s,'') }
|
974
|
+
string.strip!
|
975
|
+
string
|
976
|
+
end
|
977
|
+
|
978
|
+
def self.year_range(string)
|
979
|
+
range = string.scan(Regexp.new(@regex_tokens[:year]))
|
980
|
+
range.each { |d| d.gsub!(/[^0-9]*/,'') }
|
981
|
+
range.map { |y| y.to_i }
|
982
|
+
end
|
983
|
+
|
984
|
+
|
985
|
+
def self.datetime_comparitor(datetime)
|
986
|
+
d = datetime.to_s
|
987
|
+
d.gsub!(/[^\d]/,'')
|
988
|
+
return d.to_i
|
989
|
+
end
|
990
|
+
|
991
|
+
|
992
|
+
def self.leap_year?(year)
|
993
|
+
year = (year.kind_of? String) ? year.to_i : year
|
994
|
+
if year % 400 == 0
|
995
|
+
return true
|
996
|
+
elsif year % 100 == 0
|
997
|
+
return false
|
998
|
+
elsif year % 4 == 0
|
999
|
+
return true
|
1000
|
+
else
|
1001
|
+
return false
|
1002
|
+
end
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
|
1006
|
+
# month and year must be numeric
|
1007
|
+
def self.days_in_month(month,year)
|
1008
|
+
month = month.kind_of?(String) ? month.to_i : month
|
1009
|
+
year = year.kind_of?(String) ? year.to_i : year
|
1010
|
+
days = {
|
1011
|
+
1 => 31,
|
1012
|
+
2 => leap_year?(year) ? 29 : 28,
|
1013
|
+
3 => 31,
|
1014
|
+
4 => 30,
|
1015
|
+
5 => 31,
|
1016
|
+
6 => 30,
|
1017
|
+
7 => 31,
|
1018
|
+
8 => 31,
|
1019
|
+
9 => 30,
|
1020
|
+
10 => 31,
|
1021
|
+
11 => 30,
|
1022
|
+
12 => 31
|
1023
|
+
}
|
1024
|
+
days[month]
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
|
1028
|
+
def self.stringify_values
|
1029
|
+
@dates.each do |k,v|
|
1030
|
+
if v.is_a?(Fixnum)
|
1031
|
+
@dates[k] = v.to_s
|
1032
|
+
end
|
1033
|
+
end
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
|
1037
|
+
def self.add_full_dates
|
1038
|
+
if @dates[:date_start] && !@dates[:date_start_full]
|
1039
|
+
if @dates[:date_start].match(/\d{4}\-\d{2}\-\d{2}/)
|
1040
|
+
@dates[:date_start_full] = @dates[:date_start]
|
1041
|
+
elsif @dates[:date_start].match(/\d{4}\-\d{2}/)
|
1042
|
+
@dates[:date_start_full] = @dates[:date_start] + "-01"
|
1043
|
+
elsif @dates[:date_start].match(/\d{4}/)
|
1044
|
+
@dates[:date_start_full] = @dates[:date_start] + "-01-01"
|
1045
|
+
end
|
1046
|
+
end
|
1047
|
+
if @dates[:date_end] && !@dates[:date_end_full]
|
1048
|
+
if @dates[:date_end].match(/\d{4}\-\d{2}\-\d{2}/)
|
1049
|
+
@dates[:date_end_full] = @dates[:date_end]
|
1050
|
+
else
|
1051
|
+
year = @dates[:date_end].match(/^\d{4}/).to_s
|
1052
|
+
if @dates[:date_end].match(/\d{4}\-\d{2}/)
|
1053
|
+
month = @dates[:date_end].match(/\d{2}$/).to_s
|
1054
|
+
day = days_in_month(month,year).to_s
|
1055
|
+
@dates[:date_end_full] = @dates[:date_end] + "-#{day}"
|
1056
|
+
elsif @dates[:date_end].match(/\d{4}/)
|
1057
|
+
@dates[:date_end_full] = @dates[:date_end] + "-12-31"
|
1058
|
+
end
|
1059
|
+
end
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
def self.return_certainty(str)
|
1064
|
+
|
1065
|
+
# order of precedence, from least to most certain:
|
1066
|
+
# 1) questionable dates
|
1067
|
+
# 2) approximate dates
|
1068
|
+
# 3) inferred dates
|
1069
|
+
|
1070
|
+
if str.include?('?')
|
1071
|
+
return 'questionable'
|
1072
|
+
end
|
1073
|
+
|
1074
|
+
if str.downcase.include?('ca') || \
|
1075
|
+
str.downcase.include?('approx')
|
1076
|
+
return 'approximate'
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
if str.include?('[') || str.include?(']')
|
1080
|
+
return 'inferred'
|
1081
|
+
end
|
1082
|
+
|
1083
|
+
return nil
|
1084
|
+
end
|
1085
|
+
end
|
data/timetwister.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'timetwister/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "timetwister"
|
8
|
+
spec.version = Timetwister::VERSION
|
9
|
+
spec.authors = ["Alex Duryee"]
|
10
|
+
spec.email = ["alexanderduryee@nypl.org"]
|
11
|
+
spec.summary = "Chronic wrapper to handle messy date data"
|
12
|
+
spec.homepage = ""
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: timetwister
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alex Duryee
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-12-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description:
|
42
|
+
email:
|
43
|
+
- alexanderduryee@nypl.org
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- Gemfile
|
50
|
+
- LICENSE.txt
|
51
|
+
- README.md
|
52
|
+
- Rakefile
|
53
|
+
- lib/timetwister.rb
|
54
|
+
- lib/timetwister/parser.rb
|
55
|
+
- lib/timetwister/version.rb
|
56
|
+
- timetwister.gemspec
|
57
|
+
homepage: ''
|
58
|
+
licenses:
|
59
|
+
- MIT
|
60
|
+
metadata: {}
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options: []
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
requirements: []
|
76
|
+
rubyforge_project:
|
77
|
+
rubygems_version: 2.4.5
|
78
|
+
signing_key:
|
79
|
+
specification_version: 4
|
80
|
+
summary: Chronic wrapper to handle messy date data
|
81
|
+
test_files: []
|