edtf 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/.rspec +3 -0
- data/Gemfile +7 -0
- data/LICENSE +26 -0
- data/README.md +81 -0
- data/Rakefile +21 -0
- data/edtf.gemspec +34 -0
- data/features/parser/date_times.feature +35 -0
- data/features/parser/dates.feature +53 -0
- data/features/parser/intervals.feature +19 -0
- data/features/parser/precision.feature +14 -0
- data/features/parser/unspecified.feature +19 -0
- data/features/step_definitions/edtf_steps.rb +80 -0
- data/features/support/env.rb +1 -0
- data/lib/edtf.rb +35 -0
- data/lib/edtf/date.rb +77 -0
- data/lib/edtf/extensions.rb +4 -0
- data/lib/edtf/interval.rb +66 -0
- data/lib/edtf/parser.y +313 -0
- data/lib/edtf/seasons.rb +36 -0
- data/lib/edtf/uncertainty.rb +58 -0
- data/lib/edtf/version.rb +3 -0
- data/spec/edtf/extensions_spec.rb +44 -0
- data/spec/edtf/seasons_spec.rb +59 -0
- data/spec/edtf/uncertainty_spec.rb +114 -0
- data/spec/spec_helper.rb +8 -0
- metadata +148 -0
data/lib/edtf/date.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module EDTF
|
2
|
+
|
3
|
+
module ExtendedDate
|
4
|
+
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
include Seasons
|
8
|
+
|
9
|
+
attr_accessor :calendar
|
10
|
+
|
11
|
+
def self.included(base)
|
12
|
+
base.extend(ClassMethods)
|
13
|
+
end
|
14
|
+
|
15
|
+
def uncertain
|
16
|
+
@uncertain ||= Uncertainty.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def approximate
|
20
|
+
@approximate ||= Uncertainty.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def unspecified
|
24
|
+
@unspecified ||= Unspecified.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def_delegators :uncertain, :uncertain?, :certain?
|
28
|
+
|
29
|
+
def certain!(*arguments)
|
30
|
+
uncertain.certain!(*arguments)
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def uncertain!(*arguments)
|
35
|
+
uncertain.uncertain!(*arguments)
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
def approximate?(*arguments)
|
40
|
+
approximate.uncertain?(*arguments)
|
41
|
+
end
|
42
|
+
|
43
|
+
def approximate!(*arguments)
|
44
|
+
approximate.uncertain!(*arguments)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def precise!(*arguments)
|
49
|
+
approximate.certain!(*arguments)
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def_delegators :unspecified, :unspecified?, :specified?, :unsepcific?, :specific?
|
54
|
+
|
55
|
+
def unspecified!(*arguments)
|
56
|
+
unspecified.unspecified!(*arguments)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
alias unspecific! unspecified!
|
61
|
+
|
62
|
+
def specified!(*arguments)
|
63
|
+
unspecified.specified!(*arguments)
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
alias specific! specified!
|
68
|
+
|
69
|
+
module ClassMethods
|
70
|
+
def edtf(input, options = {})
|
71
|
+
::EDTF::Parser.new(options).parse(input)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module EDTF
|
2
|
+
|
3
|
+
class Interval
|
4
|
+
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
def_delegators :to_range, *(Range.instance_methods - Enumerable.instance_methods - Object.instance_methods)
|
10
|
+
|
11
|
+
attr_reader :from, :to
|
12
|
+
|
13
|
+
def initialize(from = :open, to = :open)
|
14
|
+
@from, @to = from, to
|
15
|
+
end
|
16
|
+
|
17
|
+
def from=(from)
|
18
|
+
@from = from || :open
|
19
|
+
end
|
20
|
+
|
21
|
+
def to=(to)
|
22
|
+
@to = to || :open
|
23
|
+
end
|
24
|
+
|
25
|
+
[:open, :unknown].each do |method_name|
|
26
|
+
|
27
|
+
define_method("#{method_name}?") do
|
28
|
+
@to == method_name || @from == method_name
|
29
|
+
end
|
30
|
+
|
31
|
+
define_method("#{method_name}!") do
|
32
|
+
@to = method_name
|
33
|
+
end
|
34
|
+
|
35
|
+
alias_method("#{method_name}_end!", "#{method_name}!")
|
36
|
+
|
37
|
+
define_method("#{method_name}_end?") do
|
38
|
+
@to == method_name
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def unknown_start?
|
44
|
+
@from == :unknown
|
45
|
+
end
|
46
|
+
|
47
|
+
def unknown_start!
|
48
|
+
@from = :unknown
|
49
|
+
end
|
50
|
+
|
51
|
+
# TODO how to handle +/- Infinity for Dates?
|
52
|
+
|
53
|
+
def to_range
|
54
|
+
case
|
55
|
+
when open?
|
56
|
+
nil
|
57
|
+
when unknown_end?
|
58
|
+
nil
|
59
|
+
else
|
60
|
+
Range.new(unknown_start? ? Date.new : from, to)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/lib/edtf/parser.y
ADDED
@@ -0,0 +1,313 @@
|
|
1
|
+
# -*- racc -*-
|
2
|
+
|
3
|
+
class EDTF::Parser
|
4
|
+
|
5
|
+
token T Z E X PLUS MINUS COLON SLASH D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 LP RP
|
6
|
+
UNCERTAIN APPROXIMATE UNSPECIFIED UNKNOWN OPEN LONGYEAR CARET UNMATCHED
|
7
|
+
|
8
|
+
expect 0
|
9
|
+
|
10
|
+
rule
|
11
|
+
|
12
|
+
edtf : level_0_expression
|
13
|
+
| level_1_expression
|
14
|
+
| level_2_expression
|
15
|
+
# | { result = Date.today }
|
16
|
+
|
17
|
+
|
18
|
+
# ---- Level 0 / ISO 8601 Rules ----
|
19
|
+
|
20
|
+
level_0_expression : date
|
21
|
+
| date_time
|
22
|
+
# | level_0_interval # --> level_1_interval
|
23
|
+
|
24
|
+
date : positive_date
|
25
|
+
| negative_date
|
26
|
+
|
27
|
+
positive_date : year { result = Date.new(val[0]) }
|
28
|
+
| year_month { result = Date.new(*val.flatten) }
|
29
|
+
| year_month_day { result = Date.new(*val.flatten) }
|
30
|
+
|
31
|
+
negative_date : MINUS positive_date { result = Date.new(-1 * val[1].year, val[1].month, val[1].day) }
|
32
|
+
|
33
|
+
|
34
|
+
date_time : date T time { result = DateTime.new(val[0].year, val[0].month, val[0].day, *val[2]) }
|
35
|
+
|
36
|
+
time : base_time
|
37
|
+
| base_time zone_offset { result = val.flatten }
|
38
|
+
|
39
|
+
base_time : hour COLON minute COLON second { result = [val[0], val[2], val[4]] }
|
40
|
+
| midnight
|
41
|
+
|
42
|
+
midnight : D2 D4 COLON D0 D0 COLON D0 D0 { result = [24, 0, 0] }
|
43
|
+
|
44
|
+
zone_offset : Z { result = 0 }
|
45
|
+
| MINUS zone_offset_hour { result = -1 * val[1] }
|
46
|
+
| PLUS positive_zone_offset { result = val[1] }
|
47
|
+
|
48
|
+
positive_zone_offset : zone_offset_hour
|
49
|
+
| D0 D0 COLON D0 D0 { result = 0 }
|
50
|
+
|
51
|
+
|
52
|
+
zone_offset_hour : d01_13 COLON minute { result = Rational(val[0] * 60 + val[2], 1440) }
|
53
|
+
| D1 D4 COLON D0 D0 { result = Rational(840, 1440) }
|
54
|
+
| D0 D0 COLON d01_59 { result = Rational(val[3], 1440) }
|
55
|
+
|
56
|
+
year : digit digit digit digit { result = val.zip([1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
57
|
+
|
58
|
+
month : d01_12
|
59
|
+
|
60
|
+
year_month : year MINUS month { result = [val[0], val[2]] }
|
61
|
+
|
62
|
+
# We raise an exception if there are two many days for the month, but
|
63
|
+
# do not consider leap years, as the EDTF BNF did not either.
|
64
|
+
# NB: an exception will be raised regardless, because the Ruby Date
|
65
|
+
# implementation calculates leap years.
|
66
|
+
year_month_day : year_month MINUS d01_31 { result = val[0] << val[2]; raise ArgumentError, "invalid date (invalid days #{result[2]} for month #{result[1]})" if result[2] > 31 || (result[2] > 30 && [2,4,6,9,11].include?(result[1])) || (result[2] > 29 && result[1] == 2) }
|
67
|
+
|
68
|
+
hour : d00_23
|
69
|
+
|
70
|
+
minute : d00_59
|
71
|
+
|
72
|
+
second : d00_59
|
73
|
+
|
74
|
+
# covered by level_1_interval
|
75
|
+
# level_0_interval : date SLASH date { result = Interval.new(val[0], val[1]) }
|
76
|
+
|
77
|
+
# ---- Level 1 Extension Rules ----
|
78
|
+
|
79
|
+
level_1_expression : uncertain_or_approximate_date
|
80
|
+
| unspecified
|
81
|
+
| level_1_interval
|
82
|
+
| long_year_simple
|
83
|
+
| season
|
84
|
+
|
85
|
+
|
86
|
+
uncertain_or_approximate_date : date uncertain_or_approximate { result = val[0]; val[1].each { |m| result.send(m) } }
|
87
|
+
|
88
|
+
uncertain_or_approximate : UNCERTAIN { result = [:uncertain!] }
|
89
|
+
| APPROXIMATE { result = [:approximate!] }
|
90
|
+
| UNCERTAIN APPROXIMATE { result = [:uncertain!, :approximate!] }
|
91
|
+
|
92
|
+
|
93
|
+
unspecified : unspecified_year
|
94
|
+
| unspecified_month
|
95
|
+
| unspecified_day
|
96
|
+
| unspecified_day_and_month
|
97
|
+
|
98
|
+
unspecified_year : digit digit digit UNSPECIFIED { result = Date.new(val[0,3].zip([1000,100,10]).reduce(0) { |s,(a,b)| s += a * b }); result.unspecified.year[3] = true }
|
99
|
+
| digit digit UNSPECIFIED UNSPECIFIED { result = Date.new(val[0,2].zip([1000,100]).reduce(0) { |s,(a,b)| s += a * b }); result.unspecified.year[2,2] = [true, true] }
|
100
|
+
|
101
|
+
unspecified_month : year MINUS UNSPECIFIED UNSPECIFIED { result = Date.new(val[0]).unspecified!(:month) }
|
102
|
+
|
103
|
+
unspecified_day : year_month MINUS UNSPECIFIED UNSPECIFIED { result = Date.new(*val[0]).unspecified!(:day) }
|
104
|
+
|
105
|
+
unspecified_day_and_month : year MINUS UNSPECIFIED UNSPECIFIED MINUS UNSPECIFIED UNSPECIFIED { result = Date.new(val[0]).unspecified!([:day,:month]) }
|
106
|
+
|
107
|
+
|
108
|
+
level_1_interval : level_1_start SLASH level_1_end { result = Interval.new(val[0], val[2]) }
|
109
|
+
|
110
|
+
level_1_start : date
|
111
|
+
| uncertain_or_approximate_date
|
112
|
+
| UNKNOWN { result = :unknown }
|
113
|
+
|
114
|
+
level_1_end : level_1_start
|
115
|
+
| OPEN { result = :open }
|
116
|
+
|
117
|
+
|
118
|
+
long_year_simple : LONGYEAR long_year { result = Date.new(val[1]) }
|
119
|
+
| LONGYEAR MINUS long_year { result = Date.new(-1 * val[2]) }
|
120
|
+
|
121
|
+
long_year : positive_digit digit digit digit digit { result = val.zip([10000,1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
122
|
+
| long_year digit { result = 10 * val[0] + val[1] }
|
123
|
+
|
124
|
+
|
125
|
+
season : year MINUS season_number { result = Date.new(val[0]); result.season = val[2] }
|
126
|
+
|
127
|
+
season_number : D2 D1 { result = 21 }
|
128
|
+
| D2 D2 { result = 22 }
|
129
|
+
| D2 D3 { result = 23 }
|
130
|
+
| D2 D4 { result = 24 }
|
131
|
+
|
132
|
+
|
133
|
+
# ---- Level 2 Extension Rules ----
|
134
|
+
|
135
|
+
level_2_expression : season_qualified
|
136
|
+
# | internal_uncertain_or_approximate
|
137
|
+
# | internal_unspecified
|
138
|
+
# | choice_list
|
139
|
+
# | inclusive_list
|
140
|
+
| masked_precision
|
141
|
+
# | level_2_interval
|
142
|
+
| date_and_calendar
|
143
|
+
| long_year_scientific
|
144
|
+
|
145
|
+
|
146
|
+
season_qualified : season CARET { result = val[0]; result.qualifier = val[1] }
|
147
|
+
|
148
|
+
|
149
|
+
long_year_scientific : long_year_simple E integer { result = Date.new(val[0].year * 10 ** val[2]) }
|
150
|
+
| LONGYEAR int1_4 E integer { result = Date.new(val[1] * 10 ** val[3]) }
|
151
|
+
| LONGYEAR MINUS int1_4 E integer { result = Date.new(-1 * val[2] * 10 ** val[4]) }
|
152
|
+
|
153
|
+
|
154
|
+
date_and_calendar : date CARET { result = val[0]; result.calendar = val[1] }
|
155
|
+
|
156
|
+
|
157
|
+
masked_precision : digit digit digit X { d = val[0,3].zip([1000,100,10]).reduce(0) { |s,(a,b)| s += a * b }; result = Date.new(d) ... Date.new(d+10) }
|
158
|
+
| digit digit X X { d = val[0,2].zip([1000,100]).reduce(0) { |s,(a,b)| s += a * b }; result = Date.new(d) ... Date.new(d+100) }
|
159
|
+
|
160
|
+
# ---- Auxiliary Rules ----
|
161
|
+
|
162
|
+
digit : D0 { result = 0 }
|
163
|
+
| positive_digit
|
164
|
+
|
165
|
+
positive_digit : D1 { result = 1 }
|
166
|
+
| D2 { result = 2 }
|
167
|
+
| D3 { result = 3 }
|
168
|
+
| D4 { result = 4 }
|
169
|
+
| D5 { result = 5 }
|
170
|
+
| D6 { result = 6 }
|
171
|
+
| D7 { result = 7 }
|
172
|
+
| D8 { result = 8 }
|
173
|
+
| D9 { result = 9 }
|
174
|
+
|
175
|
+
d01_12 : D0 positive_digit { result = val[1] }
|
176
|
+
| D1 D0 { result = 10 }
|
177
|
+
| D1 D1 { result = 11 }
|
178
|
+
| D1 D2 { result = 12 }
|
179
|
+
|
180
|
+
d01_13 : d01_12
|
181
|
+
| D1 D3 { result = 13 }
|
182
|
+
|
183
|
+
d01_23 : D0 positive_digit { result = val[1] }
|
184
|
+
| D1 digit { result = 10 + val[1] }
|
185
|
+
| D2 D0 { result = 20 }
|
186
|
+
| D2 D1 { result = 21 }
|
187
|
+
| D2 D2 { result = 22 }
|
188
|
+
| D2 D3 { result = 23 }
|
189
|
+
|
190
|
+
d00_23 : D0 D0 { result = 0 }
|
191
|
+
| d01_23
|
192
|
+
|
193
|
+
d01_29 : d01_23
|
194
|
+
| D2 D4 { result = 24 }
|
195
|
+
| D2 D5 { result = 25 }
|
196
|
+
| D2 D6 { result = 26 }
|
197
|
+
| D2 D7 { result = 27 }
|
198
|
+
| D2 D8 { result = 28 }
|
199
|
+
| D2 D9 { result = 29 }
|
200
|
+
|
201
|
+
d01_30 : d01_29
|
202
|
+
| D3 D0 { result = 30 }
|
203
|
+
|
204
|
+
d01_31 : d01_30
|
205
|
+
| D3 D1 { result = 31 }
|
206
|
+
|
207
|
+
d01_59 : d01_29
|
208
|
+
| D3 digit { result = 30 + val[1] }
|
209
|
+
| D4 digit { result = 40 + val[1] }
|
210
|
+
| D5 digit { result = 50 + val[1] }
|
211
|
+
|
212
|
+
d00_59 : D0 D0 { result = 0 }
|
213
|
+
| d01_59
|
214
|
+
|
215
|
+
int1_4 : positive_digit { result = val[0] }
|
216
|
+
| positive_digit digit { result = 10 * val[0] + val[1] }
|
217
|
+
| positive_digit digit digit { result = val.zip([100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
218
|
+
| positive_digit digit digit digit { result = val.zip([1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
219
|
+
|
220
|
+
integer : positive_digit { result = val[0] }
|
221
|
+
| integer digit { result = 10 * val[0] + val[1] }
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
---- header
|
226
|
+
require 'strscan'
|
227
|
+
|
228
|
+
---- inner
|
229
|
+
|
230
|
+
@defaults = {
|
231
|
+
:level => 2,
|
232
|
+
:debug => false
|
233
|
+
}
|
234
|
+
|
235
|
+
class << self; attr_reader :defaults; end
|
236
|
+
|
237
|
+
attr_reader :options
|
238
|
+
|
239
|
+
def initialize(options = {})
|
240
|
+
@options = Parser.defaults.merge(options)
|
241
|
+
end
|
242
|
+
|
243
|
+
def parse(input)
|
244
|
+
@yydebug = @options[:debug] || ENV['DEBUG']
|
245
|
+
scan(input)
|
246
|
+
do_parse
|
247
|
+
end
|
248
|
+
|
249
|
+
def next_token
|
250
|
+
@stack.shift
|
251
|
+
end
|
252
|
+
|
253
|
+
def on_error(tid, val, vstack)
|
254
|
+
warn "failed to parse extended date time %s (%s) %s" % [val.inspect, token_to_str(tid) || '?', vstack.inspect]
|
255
|
+
end
|
256
|
+
|
257
|
+
def scan(input)
|
258
|
+
@src = StringScanner.new(input)
|
259
|
+
@stack = []
|
260
|
+
tokenize
|
261
|
+
end
|
262
|
+
|
263
|
+
private
|
264
|
+
|
265
|
+
def tokenize
|
266
|
+
until @src.eos?
|
267
|
+
case
|
268
|
+
when @src.scan(/\(/)
|
269
|
+
@stack << [:LP, @src.matched]
|
270
|
+
when @src.scan(/\)/)
|
271
|
+
@stack << [:RP, @src.matched]
|
272
|
+
when @src.scan(/T/)
|
273
|
+
@stack << [:T, @src.matched]
|
274
|
+
when @src.scan(/Z/)
|
275
|
+
@stack << [:Z, @src.matched]
|
276
|
+
when @src.scan(/\?/)
|
277
|
+
@stack << [:UNCERTAIN, @src.matched]
|
278
|
+
when @src.scan(/~/)
|
279
|
+
@stack << [:APPROXIMATE, @src.matched]
|
280
|
+
when @src.scan(/open/i)
|
281
|
+
@stack << [:OPEN, @src.matched]
|
282
|
+
when @src.scan(/unkn?own/i) # matches 'unkown' typo too
|
283
|
+
@stack << [:UNKNOWN, @src.matched]
|
284
|
+
when @src.scan(/u/)
|
285
|
+
@stack << [:UNSPECIFIED, @src.matched]
|
286
|
+
when @src.scan(/x/i)
|
287
|
+
@stack << [:X, @src.matched]
|
288
|
+
when @src.scan(/y/)
|
289
|
+
@stack << [:LONGYEAR, @src.matched]
|
290
|
+
when @src.scan(/e/)
|
291
|
+
@stack << [:E, @src.matched]
|
292
|
+
when @src.scan(/\+/)
|
293
|
+
@stack << [:PLUS, @src.matched]
|
294
|
+
when @src.scan(/-/)
|
295
|
+
@stack << [:MINUS, @src.matched]
|
296
|
+
when @src.scan(/:/)
|
297
|
+
@stack << [:COLON, @src.matched]
|
298
|
+
when @src.scan(/\//)
|
299
|
+
@stack << [:SLASH, @src.matched]
|
300
|
+
when @src.scan(/\^\w+/)
|
301
|
+
@stack << [:CARET, @src.matched[1..-1]]
|
302
|
+
when @src.scan(/\d/)
|
303
|
+
@stack << [['D', @src.matched].join.intern, @src.matched]
|
304
|
+
else @src.scan(/./)
|
305
|
+
@stack << [:UNMATCHED, @src.rest]
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
@stack
|
310
|
+
end
|
311
|
+
|
312
|
+
|
313
|
+
# -*- racc -*-
|