edtf 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/.rspec +3 -0
- data/Gemfile +7 -0
- data/LICENSE +26 -0
- data/README.md +81 -0
- data/Rakefile +21 -0
- data/edtf.gemspec +34 -0
- data/features/parser/date_times.feature +35 -0
- data/features/parser/dates.feature +53 -0
- data/features/parser/intervals.feature +19 -0
- data/features/parser/precision.feature +14 -0
- data/features/parser/unspecified.feature +19 -0
- data/features/step_definitions/edtf_steps.rb +80 -0
- data/features/support/env.rb +1 -0
- data/lib/edtf.rb +35 -0
- data/lib/edtf/date.rb +77 -0
- data/lib/edtf/extensions.rb +4 -0
- data/lib/edtf/interval.rb +66 -0
- data/lib/edtf/parser.y +313 -0
- data/lib/edtf/seasons.rb +36 -0
- data/lib/edtf/uncertainty.rb +58 -0
- data/lib/edtf/version.rb +3 -0
- data/spec/edtf/extensions_spec.rb +44 -0
- data/spec/edtf/seasons_spec.rb +59 -0
- data/spec/edtf/uncertainty_spec.rb +114 -0
- data/spec/spec_helper.rb +8 -0
- metadata +148 -0
data/lib/edtf/date.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module EDTF
|
2
|
+
|
3
|
+
module ExtendedDate
|
4
|
+
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
include Seasons
|
8
|
+
|
9
|
+
attr_accessor :calendar
|
10
|
+
|
11
|
+
def self.included(base)
|
12
|
+
base.extend(ClassMethods)
|
13
|
+
end
|
14
|
+
|
15
|
+
def uncertain
|
16
|
+
@uncertain ||= Uncertainty.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def approximate
|
20
|
+
@approximate ||= Uncertainty.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def unspecified
|
24
|
+
@unspecified ||= Unspecified.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def_delegators :uncertain, :uncertain?, :certain?
|
28
|
+
|
29
|
+
def certain!(*arguments)
|
30
|
+
uncertain.certain!(*arguments)
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def uncertain!(*arguments)
|
35
|
+
uncertain.uncertain!(*arguments)
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
def approximate?(*arguments)
|
40
|
+
approximate.uncertain?(*arguments)
|
41
|
+
end
|
42
|
+
|
43
|
+
def approximate!(*arguments)
|
44
|
+
approximate.uncertain!(*arguments)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def precise!(*arguments)
|
49
|
+
approximate.certain!(*arguments)
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def_delegators :unspecified, :unspecified?, :specified?, :unsepcific?, :specific?
|
54
|
+
|
55
|
+
def unspecified!(*arguments)
|
56
|
+
unspecified.unspecified!(*arguments)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
alias unspecific! unspecified!
|
61
|
+
|
62
|
+
def specified!(*arguments)
|
63
|
+
unspecified.specified!(*arguments)
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
alias specific! specified!
|
68
|
+
|
69
|
+
module ClassMethods
|
70
|
+
def edtf(input, options = {})
|
71
|
+
::EDTF::Parser.new(options).parse(input)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module EDTF
|
2
|
+
|
3
|
+
class Interval
|
4
|
+
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
def_delegators :to_range, *(Range.instance_methods - Enumerable.instance_methods - Object.instance_methods)
|
10
|
+
|
11
|
+
attr_reader :from, :to
|
12
|
+
|
13
|
+
def initialize(from = :open, to = :open)
|
14
|
+
@from, @to = from, to
|
15
|
+
end
|
16
|
+
|
17
|
+
def from=(from)
|
18
|
+
@from = from || :open
|
19
|
+
end
|
20
|
+
|
21
|
+
def to=(to)
|
22
|
+
@to = to || :open
|
23
|
+
end
|
24
|
+
|
25
|
+
[:open, :unknown].each do |method_name|
|
26
|
+
|
27
|
+
define_method("#{method_name}?") do
|
28
|
+
@to == method_name || @from == method_name
|
29
|
+
end
|
30
|
+
|
31
|
+
define_method("#{method_name}!") do
|
32
|
+
@to = method_name
|
33
|
+
end
|
34
|
+
|
35
|
+
alias_method("#{method_name}_end!", "#{method_name}!")
|
36
|
+
|
37
|
+
define_method("#{method_name}_end?") do
|
38
|
+
@to == method_name
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def unknown_start?
|
44
|
+
@from == :unknown
|
45
|
+
end
|
46
|
+
|
47
|
+
def unknown_start!
|
48
|
+
@from = :unknown
|
49
|
+
end
|
50
|
+
|
51
|
+
# TODO how to handle +/- Infinity for Dates?
|
52
|
+
|
53
|
+
def to_range
|
54
|
+
case
|
55
|
+
when open?
|
56
|
+
nil
|
57
|
+
when unknown_end?
|
58
|
+
nil
|
59
|
+
else
|
60
|
+
Range.new(unknown_start? ? Date.new : from, to)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/lib/edtf/parser.y
ADDED
@@ -0,0 +1,313 @@
|
|
1
|
+
# -*- racc -*-
|
2
|
+
|
3
|
+
class EDTF::Parser
|
4
|
+
|
5
|
+
token T Z E X PLUS MINUS COLON SLASH D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 LP RP
|
6
|
+
UNCERTAIN APPROXIMATE UNSPECIFIED UNKNOWN OPEN LONGYEAR CARET UNMATCHED
|
7
|
+
|
8
|
+
expect 0
|
9
|
+
|
10
|
+
rule
|
11
|
+
|
12
|
+
edtf : level_0_expression
|
13
|
+
| level_1_expression
|
14
|
+
| level_2_expression
|
15
|
+
# | { result = Date.today }
|
16
|
+
|
17
|
+
|
18
|
+
# ---- Level 0 / ISO 8601 Rules ----
|
19
|
+
|
20
|
+
level_0_expression : date
|
21
|
+
| date_time
|
22
|
+
# | level_0_interval # --> level_1_interval
|
23
|
+
|
24
|
+
date : positive_date
|
25
|
+
| negative_date
|
26
|
+
|
27
|
+
positive_date : year { result = Date.new(val[0]) }
|
28
|
+
| year_month { result = Date.new(*val.flatten) }
|
29
|
+
| year_month_day { result = Date.new(*val.flatten) }
|
30
|
+
|
31
|
+
negative_date : MINUS positive_date { result = Date.new(-1 * val[1].year, val[1].month, val[1].day) }
|
32
|
+
|
33
|
+
|
34
|
+
date_time : date T time { result = DateTime.new(val[0].year, val[0].month, val[0].day, *val[2]) }
|
35
|
+
|
36
|
+
time : base_time
|
37
|
+
| base_time zone_offset { result = val.flatten }
|
38
|
+
|
39
|
+
base_time : hour COLON minute COLON second { result = [val[0], val[2], val[4]] }
|
40
|
+
| midnight
|
41
|
+
|
42
|
+
midnight : D2 D4 COLON D0 D0 COLON D0 D0 { result = [24, 0, 0] }
|
43
|
+
|
44
|
+
zone_offset : Z { result = 0 }
|
45
|
+
| MINUS zone_offset_hour { result = -1 * val[1] }
|
46
|
+
| PLUS positive_zone_offset { result = val[1] }
|
47
|
+
|
48
|
+
positive_zone_offset : zone_offset_hour
|
49
|
+
| D0 D0 COLON D0 D0 { result = 0 }
|
50
|
+
|
51
|
+
|
52
|
+
zone_offset_hour : d01_13 COLON minute { result = Rational(val[0] * 60 + val[2], 1440) }
|
53
|
+
| D1 D4 COLON D0 D0 { result = Rational(840, 1440) }
|
54
|
+
| D0 D0 COLON d01_59 { result = Rational(val[3], 1440) }
|
55
|
+
|
56
|
+
year : digit digit digit digit { result = val.zip([1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
57
|
+
|
58
|
+
month : d01_12
|
59
|
+
|
60
|
+
year_month : year MINUS month { result = [val[0], val[2]] }
|
61
|
+
|
62
|
+
# We raise an exception if there are two many days for the month, but
|
63
|
+
# do not consider leap years, as the EDTF BNF did not either.
|
64
|
+
# NB: an exception will be raised regardless, because the Ruby Date
|
65
|
+
# implementation calculates leap years.
|
66
|
+
year_month_day : year_month MINUS d01_31 { result = val[0] << val[2]; raise ArgumentError, "invalid date (invalid days #{result[2]} for month #{result[1]})" if result[2] > 31 || (result[2] > 30 && [2,4,6,9,11].include?(result[1])) || (result[2] > 29 && result[1] == 2) }
|
67
|
+
|
68
|
+
hour : d00_23
|
69
|
+
|
70
|
+
minute : d00_59
|
71
|
+
|
72
|
+
second : d00_59
|
73
|
+
|
74
|
+
# covered by level_1_interval
|
75
|
+
# level_0_interval : date SLASH date { result = Interval.new(val[0], val[1]) }
|
76
|
+
|
77
|
+
# ---- Level 1 Extension Rules ----
|
78
|
+
|
79
|
+
level_1_expression : uncertain_or_approximate_date
|
80
|
+
| unspecified
|
81
|
+
| level_1_interval
|
82
|
+
| long_year_simple
|
83
|
+
| season
|
84
|
+
|
85
|
+
|
86
|
+
uncertain_or_approximate_date : date uncertain_or_approximate { result = val[0]; val[1].each { |m| result.send(m) } }
|
87
|
+
|
88
|
+
uncertain_or_approximate : UNCERTAIN { result = [:uncertain!] }
|
89
|
+
| APPROXIMATE { result = [:approximate!] }
|
90
|
+
| UNCERTAIN APPROXIMATE { result = [:uncertain!, :approximate!] }
|
91
|
+
|
92
|
+
|
93
|
+
unspecified : unspecified_year
|
94
|
+
| unspecified_month
|
95
|
+
| unspecified_day
|
96
|
+
| unspecified_day_and_month
|
97
|
+
|
98
|
+
unspecified_year : digit digit digit UNSPECIFIED { result = Date.new(val[0,3].zip([1000,100,10]).reduce(0) { |s,(a,b)| s += a * b }); result.unspecified.year[3] = true }
|
99
|
+
| digit digit UNSPECIFIED UNSPECIFIED { result = Date.new(val[0,2].zip([1000,100]).reduce(0) { |s,(a,b)| s += a * b }); result.unspecified.year[2,2] = [true, true] }
|
100
|
+
|
101
|
+
unspecified_month : year MINUS UNSPECIFIED UNSPECIFIED { result = Date.new(val[0]).unspecified!(:month) }
|
102
|
+
|
103
|
+
unspecified_day : year_month MINUS UNSPECIFIED UNSPECIFIED { result = Date.new(*val[0]).unspecified!(:day) }
|
104
|
+
|
105
|
+
unspecified_day_and_month : year MINUS UNSPECIFIED UNSPECIFIED MINUS UNSPECIFIED UNSPECIFIED { result = Date.new(val[0]).unspecified!([:day,:month]) }
|
106
|
+
|
107
|
+
|
108
|
+
level_1_interval : level_1_start SLASH level_1_end { result = Interval.new(val[0], val[2]) }
|
109
|
+
|
110
|
+
level_1_start : date
|
111
|
+
| uncertain_or_approximate_date
|
112
|
+
| UNKNOWN { result = :unknown }
|
113
|
+
|
114
|
+
level_1_end : level_1_start
|
115
|
+
| OPEN { result = :open }
|
116
|
+
|
117
|
+
|
118
|
+
long_year_simple : LONGYEAR long_year { result = Date.new(val[1]) }
|
119
|
+
| LONGYEAR MINUS long_year { result = Date.new(-1 * val[2]) }
|
120
|
+
|
121
|
+
long_year : positive_digit digit digit digit digit { result = val.zip([10000,1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
122
|
+
| long_year digit { result = 10 * val[0] + val[1] }
|
123
|
+
|
124
|
+
|
125
|
+
season : year MINUS season_number { result = Date.new(val[0]); result.season = val[2] }
|
126
|
+
|
127
|
+
season_number : D2 D1 { result = 21 }
|
128
|
+
| D2 D2 { result = 22 }
|
129
|
+
| D2 D3 { result = 23 }
|
130
|
+
| D2 D4 { result = 24 }
|
131
|
+
|
132
|
+
|
133
|
+
# ---- Level 2 Extension Rules ----
|
134
|
+
|
135
|
+
level_2_expression : season_qualified
|
136
|
+
# | internal_uncertain_or_approximate
|
137
|
+
# | internal_unspecified
|
138
|
+
# | choice_list
|
139
|
+
# | inclusive_list
|
140
|
+
| masked_precision
|
141
|
+
# | level_2_interval
|
142
|
+
| date_and_calendar
|
143
|
+
| long_year_scientific
|
144
|
+
|
145
|
+
|
146
|
+
season_qualified : season CARET { result = val[0]; result.qualifier = val[1] }
|
147
|
+
|
148
|
+
|
149
|
+
long_year_scientific : long_year_simple E integer { result = Date.new(val[0].year * 10 ** val[2]) }
|
150
|
+
| LONGYEAR int1_4 E integer { result = Date.new(val[1] * 10 ** val[3]) }
|
151
|
+
| LONGYEAR MINUS int1_4 E integer { result = Date.new(-1 * val[2] * 10 ** val[4]) }
|
152
|
+
|
153
|
+
|
154
|
+
date_and_calendar : date CARET { result = val[0]; result.calendar = val[1] }
|
155
|
+
|
156
|
+
|
157
|
+
masked_precision : digit digit digit X { d = val[0,3].zip([1000,100,10]).reduce(0) { |s,(a,b)| s += a * b }; result = Date.new(d) ... Date.new(d+10) }
|
158
|
+
| digit digit X X { d = val[0,2].zip([1000,100]).reduce(0) { |s,(a,b)| s += a * b }; result = Date.new(d) ... Date.new(d+100) }
|
159
|
+
|
160
|
+
# ---- Auxiliary Rules ----
|
161
|
+
|
162
|
+
digit : D0 { result = 0 }
|
163
|
+
| positive_digit
|
164
|
+
|
165
|
+
positive_digit : D1 { result = 1 }
|
166
|
+
| D2 { result = 2 }
|
167
|
+
| D3 { result = 3 }
|
168
|
+
| D4 { result = 4 }
|
169
|
+
| D5 { result = 5 }
|
170
|
+
| D6 { result = 6 }
|
171
|
+
| D7 { result = 7 }
|
172
|
+
| D8 { result = 8 }
|
173
|
+
| D9 { result = 9 }
|
174
|
+
|
175
|
+
d01_12 : D0 positive_digit { result = val[1] }
|
176
|
+
| D1 D0 { result = 10 }
|
177
|
+
| D1 D1 { result = 11 }
|
178
|
+
| D1 D2 { result = 12 }
|
179
|
+
|
180
|
+
d01_13 : d01_12
|
181
|
+
| D1 D3 { result = 13 }
|
182
|
+
|
183
|
+
d01_23 : D0 positive_digit { result = val[1] }
|
184
|
+
| D1 digit { result = 10 + val[1] }
|
185
|
+
| D2 D0 { result = 20 }
|
186
|
+
| D2 D1 { result = 21 }
|
187
|
+
| D2 D2 { result = 22 }
|
188
|
+
| D2 D3 { result = 23 }
|
189
|
+
|
190
|
+
d00_23 : D0 D0 { result = 0 }
|
191
|
+
| d01_23
|
192
|
+
|
193
|
+
d01_29 : d01_23
|
194
|
+
| D2 D4 { result = 24 }
|
195
|
+
| D2 D5 { result = 25 }
|
196
|
+
| D2 D6 { result = 26 }
|
197
|
+
| D2 D7 { result = 27 }
|
198
|
+
| D2 D8 { result = 28 }
|
199
|
+
| D2 D9 { result = 29 }
|
200
|
+
|
201
|
+
d01_30 : d01_29
|
202
|
+
| D3 D0 { result = 30 }
|
203
|
+
|
204
|
+
d01_31 : d01_30
|
205
|
+
| D3 D1 { result = 31 }
|
206
|
+
|
207
|
+
d01_59 : d01_29
|
208
|
+
| D3 digit { result = 30 + val[1] }
|
209
|
+
| D4 digit { result = 40 + val[1] }
|
210
|
+
| D5 digit { result = 50 + val[1] }
|
211
|
+
|
212
|
+
d00_59 : D0 D0 { result = 0 }
|
213
|
+
| d01_59
|
214
|
+
|
215
|
+
int1_4 : positive_digit { result = val[0] }
|
216
|
+
| positive_digit digit { result = 10 * val[0] + val[1] }
|
217
|
+
| positive_digit digit digit { result = val.zip([100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
218
|
+
| positive_digit digit digit digit { result = val.zip([1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } }
|
219
|
+
|
220
|
+
integer : positive_digit { result = val[0] }
|
221
|
+
| integer digit { result = 10 * val[0] + val[1] }
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
---- header
|
226
|
+
require 'strscan'
|
227
|
+
|
228
|
+
---- inner
|
229
|
+
|
230
|
+
@defaults = {
|
231
|
+
:level => 2,
|
232
|
+
:debug => false
|
233
|
+
}
|
234
|
+
|
235
|
+
class << self; attr_reader :defaults; end
|
236
|
+
|
237
|
+
attr_reader :options
|
238
|
+
|
239
|
+
def initialize(options = {})
|
240
|
+
@options = Parser.defaults.merge(options)
|
241
|
+
end
|
242
|
+
|
243
|
+
def parse(input)
|
244
|
+
@yydebug = @options[:debug] || ENV['DEBUG']
|
245
|
+
scan(input)
|
246
|
+
do_parse
|
247
|
+
end
|
248
|
+
|
249
|
+
def next_token
|
250
|
+
@stack.shift
|
251
|
+
end
|
252
|
+
|
253
|
+
def on_error(tid, val, vstack)
|
254
|
+
warn "failed to parse extended date time %s (%s) %s" % [val.inspect, token_to_str(tid) || '?', vstack.inspect]
|
255
|
+
end
|
256
|
+
|
257
|
+
def scan(input)
|
258
|
+
@src = StringScanner.new(input)
|
259
|
+
@stack = []
|
260
|
+
tokenize
|
261
|
+
end
|
262
|
+
|
263
|
+
private
|
264
|
+
|
265
|
+
def tokenize
|
266
|
+
until @src.eos?
|
267
|
+
case
|
268
|
+
when @src.scan(/\(/)
|
269
|
+
@stack << [:LP, @src.matched]
|
270
|
+
when @src.scan(/\)/)
|
271
|
+
@stack << [:RP, @src.matched]
|
272
|
+
when @src.scan(/T/)
|
273
|
+
@stack << [:T, @src.matched]
|
274
|
+
when @src.scan(/Z/)
|
275
|
+
@stack << [:Z, @src.matched]
|
276
|
+
when @src.scan(/\?/)
|
277
|
+
@stack << [:UNCERTAIN, @src.matched]
|
278
|
+
when @src.scan(/~/)
|
279
|
+
@stack << [:APPROXIMATE, @src.matched]
|
280
|
+
when @src.scan(/open/i)
|
281
|
+
@stack << [:OPEN, @src.matched]
|
282
|
+
when @src.scan(/unkn?own/i) # matches 'unkown' typo too
|
283
|
+
@stack << [:UNKNOWN, @src.matched]
|
284
|
+
when @src.scan(/u/)
|
285
|
+
@stack << [:UNSPECIFIED, @src.matched]
|
286
|
+
when @src.scan(/x/i)
|
287
|
+
@stack << [:X, @src.matched]
|
288
|
+
when @src.scan(/y/)
|
289
|
+
@stack << [:LONGYEAR, @src.matched]
|
290
|
+
when @src.scan(/e/)
|
291
|
+
@stack << [:E, @src.matched]
|
292
|
+
when @src.scan(/\+/)
|
293
|
+
@stack << [:PLUS, @src.matched]
|
294
|
+
when @src.scan(/-/)
|
295
|
+
@stack << [:MINUS, @src.matched]
|
296
|
+
when @src.scan(/:/)
|
297
|
+
@stack << [:COLON, @src.matched]
|
298
|
+
when @src.scan(/\//)
|
299
|
+
@stack << [:SLASH, @src.matched]
|
300
|
+
when @src.scan(/\^\w+/)
|
301
|
+
@stack << [:CARET, @src.matched[1..-1]]
|
302
|
+
when @src.scan(/\d/)
|
303
|
+
@stack << [['D', @src.matched].join.intern, @src.matched]
|
304
|
+
else @src.scan(/./)
|
305
|
+
@stack << [:UNMATCHED, @src.rest]
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
@stack
|
310
|
+
end
|
311
|
+
|
312
|
+
|
313
|
+
# -*- racc -*-
|