kdl 0.1.0 → 1.0.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/kdl/tokenizer.rb CHANGED
@@ -1,12 +1,41 @@
1
+ require 'bigdecimal'
2
+
1
3
  module KDL
2
4
  class Tokenizer
3
- class Error < StandardError; end
5
+ class Error < StandardError
6
+ def initialize(message, line, column)
7
+ super("#{message} (#{line}:#{column})")
8
+ end
9
+ end
10
+
11
+ class Token
12
+ attr_reader :type, :value, :line, :column, :meta
13
+
14
+ def initialize(type, value, line, column, meta = {})
15
+ @type = type
16
+ @value = value
17
+ @line = line
18
+ @column = column
19
+ @meta = meta
20
+ end
21
+
22
+ def ==(other)
23
+ return false unless other.is_a?(Token)
24
+
25
+ type == other.type && value == other.value && line == other.line && column == other.column
26
+ end
27
+
28
+ def to_s
29
+ "#{value.inspect} (#{line}:#{column})"
30
+ end
31
+ alias inspect to_s
32
+ end
4
33
 
5
34
  attr_reader :index
6
35
 
7
36
  SYMBOLS = {
8
- '{' => :LPAREN,
9
- '}' => :RPAREN,
37
+ '{' => :LBRACE,
38
+ '}' => :RBRACE,
10
39
  '=' => :EQUALS,
11
40
  '=' => :EQUALS,
12
41
  ';' => :SEMICOLON
@@ -20,10 +49,13 @@ module KDL
20
49
 
21
50
  NEWLINES = ["\u000A", "\u0085", "\u000C", "\u2028", "\u2029"]
22
51
 
23
- NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}\\<>[]\","
52
+ NON_IDENTIFIER_CHARS = Regexp.escape "#{SYMBOLS.keys.join('')}()/\\<>[]\","
24
53
  IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}\x0-\x20]/
25
54
  INITIAL_IDENTIFIER_CHARS = /[^#{NON_IDENTIFIER_CHARS}0-9\x0-\x20]/
26
55
 
56
+ ALLOWED_IN_TYPE = [:ident, :string, :rawstring]
57
+ NOT_ALLOWED_AFTER_TYPE = [:single_line_comment, :multi_line_comment]
58
+
27
59
  def initialize(str, start = 0)
28
60
  @str = str
29
61
  @context = nil
@@ -32,11 +64,17 @@ module KDL
32
64
  @buffer = ""
33
65
  @done = false
34
66
  @previous_context = nil
67
+ @line = 1
68
+ @column = 1
69
+ @type_context = false
70
+ @last_token = nil
35
71
  end
36
72
 
37
73
  def next_token
38
74
  @context = nil
39
75
  @previous_context = nil
76
+ @line_at_start = @line
77
+ @column_at_start = @column
40
78
  loop do
41
79
  c = @str[@index]
42
80
  case @context
@@ -45,11 +83,11 @@ module KDL
45
83
  when '"'
46
84
  self.context = :string
47
85
  @buffer = ''
48
- @index += 1
86
+ traverse(1)
49
87
  when 'r'
50
88
  if @str[@index + 1] == '"'
51
89
  self.context = :rawstring
52
- @index += 2
90
+ traverse(2)
53
91
  @rawstring_hashes = 0
54
92
  @buffer = ''
55
93
  next
@@ -69,20 +107,20 @@ module KDL
69
107
  end
70
108
  self.context = :ident
71
109
  @buffer = c
72
- @index += 1
110
+ traverse(1)
73
111
  when /[0-9\-+]/
74
112
  n = @str[@index + 1]
75
113
  if c == '0' && n =~ /[box]/
76
- @index += 2
114
+ traverse(2)
77
115
  @buffer = ''
78
116
  self.context = case n
79
- when 'b' then :binary
80
- when 'o' then :octal
81
- when 'x' then :hexadecimal
82
- end
117
+ when 'b' then :binary
118
+ when 'o' then :octal
119
+ when 'x' then :hexadecimal
120
+ end
83
121
  else
84
122
  self.context = :decimal
85
- @index += 1
123
+ traverse(1)
86
124
  @buffer = c
87
125
  end
88
126
  when '\\'
@@ -90,68 +128,80 @@ module KDL
90
128
  la = t.next_token[0]
91
129
  if la == :NEWLINE
92
130
  @index = t.index
131
+ new_line
93
132
  elsif la == :WS && (lan = t.next_token[0]) == :NEWLINE
94
133
  @index = t.index
134
+ new_line
95
135
  else
96
- raise Error, "Unexpected '\\'"
136
+ raise_error "Unexpected '\\'"
97
137
  end
98
138
  when *SYMBOLS.keys
99
- @index += 1
100
- return [SYMBOLS[c], c]
139
+ return token(SYMBOLS[c], c).tap { traverse(1) }
101
140
  when "\r"
102
141
  n = @str[@index + 1]
103
142
  if n == "\n"
104
- @index += 2
105
- return [:NEWLINE, "#{c}#{n}"]
143
+ return token(:NEWLINE, "#{c}#{n}").tap do
144
+ traverse(2)
145
+ new_line
146
+ end
106
147
  else
107
- @index += 1
108
- return [:NEWLINE, c]
148
+ return token(:NEWLINE, c).tap do
149
+ traverse(1)
150
+ new_line
151
+ end
109
152
  end
110
153
  when *NEWLINES
111
- @index += 1
112
- return [:NEWLINE, c]
154
+ return token(:NEWLINE, c).tap do
155
+ traverse(1)
156
+ new_line
157
+ end
113
158
  when "/"
114
159
  if @str[@index + 1] == '/'
115
160
  self.context = :single_line_comment
116
- @index += 2
161
+ traverse(2)
117
162
  elsif @str[@index + 1] == '*'
118
163
  self.context = :multi_line_comment
119
164
  @comment_nesting = 1
120
- @index += 2
165
+ traverse(2)
121
166
  elsif @str[@index + 1] == '-'
122
- @index += 2
123
- return [:SLASHDASH, '/-']
167
+ return token(:SLASHDASH, '/-').tap { traverse(2) }
124
168
  else
125
169
  self.context = :ident
126
170
  @buffer = c
127
- @index += 1
171
+ traverse(1)
128
172
  end
129
173
  when *WHITEPACE
130
174
  self.context = :whitespace
131
175
  @buffer = c
132
- @index += 1
176
+ traverse(1)
133
177
  when nil
134
- return [false, false] if @done
178
+ return [false, token(:EOF, :EOF)[1]] if @done
135
179
  @done = true
136
- return [:EOF, '']
180
+ return token(:EOF, :EOF)
137
181
  when INITIAL_IDENTIFIER_CHARS
138
182
  self.context = :ident
139
183
  @buffer = c
140
- @index += 1
184
+ traverse(1)
185
+ when '('
186
+ @type_context = true
187
+ return token(:LPAREN, c).tap { traverse(1) }
188
+ when ')'
189
+ @type_context = false
190
+ return token(:RPAREN, c).tap { traverse(1) }
141
191
  else
142
- raise Error, "Unexpected character #{c.inspect}"
192
+ raise_error "Unexpected character #{c.inspect}"
143
193
  end
144
194
  when :ident
145
195
  case c
146
196
  when IDENTIFIER_CHARS
147
- @index += 1
197
+ traverse(1)
148
198
  @buffer += c
149
199
  else
150
200
  case @buffer
151
- when 'true' then return [:TRUE, true]
152
- when 'false' then return [:FALSE, false]
153
- when 'null' then return [:NULL, nil]
154
- else return [:IDENT, @buffer]
201
+ when 'true' then return token(:TRUE, true)
202
+ when 'false' then return token(:FALSE, false)
203
+ when 'null' then return token(:NULL, nil)
204
+ else return token(:IDENT, @buffer)
155
205
  end
156
206
  end
157
207
  when :string
@@ -159,18 +209,17 @@ module KDL
159
209
  when '\\'
160
210
  @buffer += c
161
211
  @buffer += @str[@index + 1]
162
- @index += 2
212
+ traverse(2)
163
213
  when '"'
164
- @index += 1
165
- return [:STRING, convert_escapes(@buffer)]
214
+ return token(:STRING, convert_escapes(@buffer)).tap { traverse(1) }
166
215
  when nil
167
- raise Error, "Unterminated string literal"
216
+ raise_error "Unterminated string literal"
168
217
  else
169
218
  @buffer += c
170
- @index += 1
219
+ traverse(1)
171
220
  end
172
221
  when :rawstring
173
- raise Error, "Unterminated rawstring literal" if c.nil?
222
+ raise_error "Unterminated rawstring literal" if c.nil?
174
223
 
175
224
  if c == '"'
176
225
  h = 0
@@ -178,17 +227,16 @@ module KDL
178
227
  h += 1
179
228
  end
180
229
  if h == @rawstring_hashes
181
- @index += 1 + h
182
- return [:RAWSTRING, @buffer]
230
+ return token(:RAWSTRING, @buffer).tap { traverse(1 + h) }
183
231
  end
184
232
  end
185
233
 
186
234
  @buffer += c
187
- @index += 1
235
+ traverse(1)
188
236
  when :decimal
189
237
  case c
190
238
  when /[0-9.\-+_eE]/
191
- @index += 1
239
+ traverse(1)
192
240
  @buffer += c
193
241
  else
194
242
  return parse_decimal(@buffer)
@@ -196,7 +244,7 @@ module KDL
196
244
  when :hexadecimal
197
245
  case c
198
246
  when /[0-9a-fA-F_]/
199
- @index += 1
247
+ traverse(1)
200
248
  @buffer += c
201
249
  else
202
250
  return parse_hexadecimal(@buffer)
@@ -204,7 +252,7 @@ module KDL
204
252
  when :octal
205
253
  case c
206
254
  when /[0-7_]/
207
- @index += 1
255
+ traverse(1)
208
256
  @buffer += c
209
257
  else
210
258
  return parse_octal(@buffer)
@@ -212,7 +260,7 @@ module KDL
212
260
  when :binary
213
261
  case c
214
262
  when /[01_]/
215
- @index += 1
263
+ traverse(1)
216
264
  @buffer += c
217
265
  else
218
266
  return parse_binary(@buffer)
@@ -220,52 +268,80 @@ module KDL
220
268
  when :single_line_comment
221
269
  if NEWLINES.include?(c) || c == "\r"
222
270
  self.context = nil
271
+ @column_at_start = @column
223
272
  next
224
273
  elsif c.nil?
225
274
  @done = true
226
- return [:EOF, '']
275
+ return token(:EOF, :EOF)
227
276
  else
228
- @index += 1
277
+ traverse(1)
229
278
  end
230
279
  when :multi_line_comment
231
280
  if c == '/' && @str[@index + 1] == '*'
232
281
  @comment_nesting += 1
233
- @index += 2
282
+ traverse(2)
234
283
  elsif c == '*' && @str[@index + 1] == '/'
235
284
  @comment_nesting -= 1
236
- @index += 2
285
+ traverse(2)
237
286
  if @comment_nesting == 0
238
287
  revert_context
239
288
  end
240
289
  else
241
- @index += 1
290
+ traverse(1)
242
291
  end
243
292
  when :whitespace
244
293
  if WHITEPACE.include?(c)
245
- @index += 1
294
+ traverse(1)
246
295
  @buffer += c
247
296
  elsif c == "\\"
248
297
  t = Tokenizer.new(@str, @index + 1)
249
298
  la = t.next_token[0]
250
299
  if la == :NEWLINE
251
300
  @index = t.index
301
+ new_line
252
302
  elsif (la == :WS && (lan = t.next_token[0]) == :NEWLINE)
253
303
  @index = t.index
304
+ new_line
254
305
  else
255
- raise Error, "Unexpected '\\'"
306
+ raise_error "Unexpected '\\'"
256
307
  end
257
308
  elsif c == "/" && @str[@index + 1] == '*'
258
309
  self.context = :multi_line_comment
259
310
  @comment_nesting = 1
260
- @index += 2
311
+ traverse(2)
261
312
  else
262
- return [:WS, @buffer]
313
+ return token(:WS, @buffer)
263
314
  end
264
315
  end
265
316
  end
266
317
  end
267
318
 
319
+ private
320
+
321
+ def token(type, value, **meta)
322
+ @last_token = [type, Token.new(type, value, @line_at_start, @column_at_start, meta)]
323
+ end
324
+
325
+ def traverse(n = 1)
326
+ @column += n
327
+ @index += n
328
+ end
329
+
330
+ def raise_error(message)
331
+ raise Error.new(message, @line, @column)
332
+ end
333
+
334
+ def new_line
335
+ @column = 1
336
+ @line += 1
337
+ end
338
+
268
339
  def context=(val)
340
+ if @type_context && !ALLOWED_IN_TYPE.include?(val)
341
+ raise_error "#{val} context not allowed in type declaration"
342
+ elsif @last_token && @last_token[0] == :RPAREN && NOT_ALLOWED_AFTER_TYPE.include?(val)
343
+ raise_error 'Comments are not allowed after a type declaration'
344
+ end
269
345
  @previous_context = @context
270
346
  @context = val
271
347
  end
@@ -275,23 +351,38 @@ module KDL
275
351
  @previous_context = nil
276
352
  end
277
353
 
278
- private
279
-
280
354
  def parse_decimal(s)
281
- return [:FLOAT, Float(munch_underscores(s))] if s =~ /[.eE]/
282
- [:INTEGER, Integer(munch_underscores(s), 10)]
355
+ return parse_float(s) if s =~ /[.E]/i
356
+
357
+ token(:INTEGER, Integer(munch_underscores(s), 10), format: '%d')
283
358
  end
284
-
359
+
360
+ def parse_float(s)
361
+ match, _, fraction, exponent = *s.match(/^([-+]?[\d_]+)(?:\.(\d+))?(?:[eE]([-+]?[\d_]+))?$/)
362
+ raise_error "Invalid floating point value #{s}" if match.nil?
363
+
364
+ s = munch_underscores(s)
365
+
366
+ decimals = fraction.nil? ? 0 : fraction.size
367
+ value = Float(s)
368
+ scientific = value.abs >= 100 || (exponent && exponent.to_i.abs >= 2)
369
+ if value.infinite? || (value.zero? && exponent.to_i < 0)
370
+ token(:FLOAT, BigDecimal(s))
371
+ else
372
+ token(:FLOAT, value, format: scientific ? "%.#{decimals}E" : nil)
373
+ end
374
+ end
375
+
285
376
  def parse_hexadecimal(s)
286
- [:INTEGER, Integer(munch_underscores(s), 16)]
377
+ token(:INTEGER, Integer(munch_underscores(s), 16))
287
378
  end
288
-
379
+
289
380
  def parse_octal(s)
290
- [:INTEGER, Integer(munch_underscores(s), 8)]
381
+ token(:INTEGER, Integer(munch_underscores(s), 8))
291
382
  end
292
-
383
+
293
384
  def parse_binary(s)
294
- [:INTEGER, Integer(munch_underscores(s), 2)]
385
+ token(:INTEGER, Integer(munch_underscores(s), 2))
295
386
  end
296
387
 
297
388
  def munch_underscores(s)
@@ -308,12 +399,13 @@ module KDL
308
399
  when '\"' then "\""
309
400
  when '\b' then "\b"
310
401
  when '\f' then "\f"
311
- else raise Error, "Unexpected escape #{m.inspect}"
402
+ when '\/' then "/"
403
+ else raise_error "Unexpected escape #{m.inspect}"
312
404
  end
313
405
  end.gsub(/\\u\{[0-9a-fA-F]{0,6}\}/) do |m|
314
406
  i = Integer(m[3..-2], 16)
315
407
  if i < 0 || i > 0x10FFFF
316
- raise Error, "Invalid code point #{u}"
408
+ raise_error "Invalid code point #{u}"
317
409
  end
318
410
  i.chr(Encoding::UTF_8)
319
411
  end
@@ -0,0 +1,15 @@
1
+ require 'base64'
2
+
3
+ module KDL
4
+ module Types
5
+ class Base64 < Value
6
+ def self.call(value, type = 'base64')
7
+ return nil unless value.is_a? ::KDL::Value::String
8
+
9
+ data = ::Base64.decode64(value.value)
10
+ new(data, type: type)
11
+ end
12
+ end
13
+ MAPPING['base64'] = Base64
14
+ end
15
+ end