RbYAML 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/README +1 -1
  2. data/lib/rbyaml/composer.rb +28 -25
  3. data/lib/rbyaml/composer.rb.~1.2.~ +109 -0
  4. data/lib/rbyaml/constructor.rb +94 -84
  5. data/lib/rbyaml/constructor.rb.~1.2.~ +381 -0
  6. data/lib/rbyaml/dumper.rb +10 -17
  7. data/lib/rbyaml/dumper.rb.~1.2.~ +43 -0
  8. data/lib/rbyaml/emitter.rb +13 -26
  9. data/lib/rbyaml/emitter.rb.~1.2.~ +1116 -0
  10. data/lib/rbyaml/error.rb +15 -21
  11. data/lib/rbyaml/events.rb +29 -5
  12. data/lib/rbyaml/events.rb.~1.2.~ +93 -0
  13. data/lib/rbyaml/loader.rb +11 -23
  14. data/lib/rbyaml/loader.rb.~1.2.~ +52 -0
  15. data/lib/rbyaml/nodes.rb +13 -9
  16. data/lib/rbyaml/nodes.rb.~1.2.~ +52 -0
  17. data/lib/rbyaml/parser.rb +481 -343
  18. data/lib/rbyaml/parser.rb.old +531 -0
  19. data/lib/rbyaml/parser.rb.~1.2.~ +494 -0
  20. data/lib/rbyaml/reader.rb.~1.1.1.1.~ +127 -0
  21. data/lib/rbyaml/representer.rb +26 -17
  22. data/lib/rbyaml/representer.rb.~1.2.~ +239 -0
  23. data/lib/rbyaml/resolver.rb +15 -15
  24. data/lib/rbyaml/resolver.rb.~1.1.~ +163 -0
  25. data/lib/rbyaml/scanner.rb +457 -366
  26. data/lib/rbyaml/scanner.rb.~1.2.~ +1259 -0
  27. data/lib/rbyaml/serializer.rb +19 -17
  28. data/lib/rbyaml/serializer.rb.~1.2.~ +115 -0
  29. data/lib/rbyaml/tokens.rb +44 -4
  30. data/lib/rbyaml/tokens.rb.~1.2.~ +164 -0
  31. data/lib/rbyaml/util.rb +28 -0
  32. data/lib/rbyaml/yaml.rb +12 -12
  33. data/lib/rbyaml/yaml.rb.~1.2.~ +136 -0
  34. data/test/test_bm.rb +28 -0
  35. data/test/test_bm_syck.rb +28 -0
  36. data/test/test_invoke.rb +31 -0
  37. data/test/test_one.rb +5 -0
  38. data/test/test_profile.rb +32 -0
  39. data/test/test_rbyaml.rb +2 -1
  40. data/test/test_rbyaml.rb.~1.2.~ +31 -0
  41. data/test/test_time.rb +13 -8
  42. data/test/test_time.rb.~1.1.~ +29 -0
  43. data/test/yamlx.rb +3563 -0
  44. metadata +27 -2
@@ -0,0 +1,1259 @@
1
+ # Scanner produces tokens of the following types:
2
+ # STREAM-START
3
+ # STREAM-END
4
+ # DIRECTIVE(name, value)
5
+ # DOCUMENT-START
6
+ # DOCUMENT-END
7
+ # BLOCK-SEQUENCE-START
8
+ # BLOCK-MAPPING-START
9
+ # BLOCK-END
10
+ # FLOW-SEQUENCE-START
11
+ # FLOW-MAPPING-START
12
+ # FLOW-SEQUENCE-END
13
+ # FLOW-MAPPING-END
14
+ # BLOCK-ENTRY
15
+ # FLOW-ENTRY
16
+ # KEY
17
+ # VALUE
18
+ # ALIAS(value)
19
+ # ANCHOR(value)
20
+ # TAG(value)
21
+ # SCALAR(value, plain)
22
+ #
23
+ # Read comments in the Scanner code for more details.
24
+ #
25
+
26
+ require 'rbyaml/error'
27
+ require 'rbyaml/tokens'
28
+
29
+ module RbYAML
30
+ class ScannerError < MarkedYAMLError
31
+ end
32
+
33
+ class SimpleKey
34
+ attr_reader :token_number, :required, :index, :line, :column, :mark
35
+
36
+ def initialize(token_number,required,index,line,column,mark)
37
+ @token_number = token_number
38
+ @required = required
39
+ @index = index
40
+ @line = line
41
+ @column = column
42
+ @mark = mark
43
+ end
44
+ end
45
+
46
+ module Scanner
47
+ def initialize_scanner
48
+ # It is assumed that Scanner and Reader will mixin to the same point.
49
+ # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
+ #
51
+ # Reader supports the following methods
52
+ # self.peek(i=0) # peek the next i-th character
53
+ # self.prefix(l=1) # peek the next l characters
54
+ # self.forward(l=1) # read the next l characters and move the pointer.
55
+
56
+ # Had we reached the end of the stream?
57
+ @done = false
58
+
59
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
60
+ # context.
61
+ @flow_level = 0
62
+
63
+ # List of processed tokens that are not yet emitted.
64
+ @tokens = []
65
+
66
+ # Add the STREAM-START token.
67
+ fetch_stream_start
68
+
69
+ # Number of tokens that were emitted through the `get_token` method.
70
+ @tokens_taken = 0
71
+
72
+ # The current indentation level.
73
+ @indent = -1
74
+
75
+ # Past indentation levels.
76
+ @indents = []
77
+
78
+ # Variables related to simple keys treatment.
79
+
80
+ # A simple key is a key that is not denoted by the '?' indicator.
81
+ # Example of simple keys:
82
+ # ---
83
+ # block simple key: value
84
+ # ? not a simple key:
85
+ # : { flow simple key: value }
86
+ # We emit the KEY token before all keys, so when we find a potential
87
+ # simple key, we try to locate the corresponding ':' indicator.
88
+ # Simple keys should be limited to a single line and 1024 characters.
89
+
90
+ # Can a simple key start at the current position? A simple key may
91
+ # start:
92
+ # - at the beginning of the line, not counting indentation spaces
93
+ # (in block context),
94
+ # - after '{', '[', ',' (in the flow context),
95
+ # - after '?', ':', '-' (in the block context).
96
+ # In the block context, this flag also signifies if a block collection
97
+ # may start at the current position.
98
+ @allow_simple_key = true
99
+
100
+ # Keep track of possible simple keys. This is a dictionary. The key
101
+ # is `flow_level`; there can be no more that one possible simple key
102
+ # for each level. The value is a SimpleKey record:
103
+ # (token_number, required, index, line, column, mark)
104
+ # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
+ # '[', or '{' tokens.
106
+ @possible_simple_keys = {}
107
+ end
108
+
109
+ def check_token(*choices)
110
+ # Check if the next token is one of the given types.
111
+ fetch_more_tokens while need_more_tokens
112
+ unless @tokens.empty?
113
+ return true if choices.empty?
114
+ for choice in choices
115
+ return true if choice === @tokens[0]
116
+ end
117
+ end
118
+ return false
119
+ end
120
+
121
+ def peek_token
122
+ # Return the next token, but do not delete if from the queue.
123
+ fetch_more_tokens while need_more_tokens
124
+ return @tokens[0] unless @tokens.empty?
125
+ end
126
+
127
+ def get_token
128
+ # Return the next token.
129
+ fetch_more_tokens while need_more_tokens
130
+ unless @tokens.empty?
131
+ @tokens_taken += 1
132
+ @tokens.shift
133
+ end
134
+ end
135
+
136
+ def each_token
137
+ fetch_more_tokens while need_more_tokens
138
+ while !@tokens.empty?
139
+ @tokens_taken += 1
140
+ yield @tokens.shift
141
+ fetch_more_tokens while need_more_tokens
142
+ end
143
+ end
144
+
145
+ def need_more_tokens
146
+ return false if @done
147
+ return true if @tokens.empty?
148
+ # The current token may be a potential simple key, so we
149
+ # need to look further.
150
+ stale_possible_simple_keys
151
+ return true if next_possible_simple_key == @tokens_taken
152
+ end
153
+
154
+ def fetch_more_tokens
155
+ # Eat whitespaces and comments until we reach the next token.
156
+ scan_to_next_token
157
+
158
+ # Remove obsolete possible simple keys.
159
+ stale_possible_simple_keys
160
+
161
+ # Compare the current indentation and column. It may add some tokens
162
+ # and decrease the current indentation level.
163
+ unwind_indent(@column)
164
+
165
+ # Peek the next character.
166
+ ch = peek
167
+
168
+ return case
169
+ # Is it the end of stream?
170
+ when ch == ?\0: fetch_stream_end
171
+ # Is it a directive?
172
+ when ch == ?% && check_directive: fetch_directive
173
+ # Is it the document start?
174
+ when ch == ?- && check_document_start: fetch_document_start
175
+ # Is it the document end?
176
+ when ch == ?. && check_document_end: fetch_document_end
177
+ # Is it the flow sequence start indicator?
178
+ when ch == ?[: fetch_flow_sequence_start
179
+ # Is it the flow mapping start indicator?
180
+ when ch == ?{: fetch_flow_mapping_start
181
+ # Is it the flow sequence end indicator?
182
+ when ch == ?]: fetch_flow_sequence_end
183
+ # Is it the flow mapping end indicator?
184
+ when ch == ?}: fetch_flow_mapping_end
185
+ # Is it the flow entry indicator?
186
+ when ch == ?,: fetch_flow_entry
187
+ # Is it the block entry indicator?
188
+ when ch == ?- && check_block_entry: fetch_block_entry
189
+ # Is it the key indicator?
190
+ when ch == ?? && check_key: fetch_key
191
+ # Is it the value indicator?
192
+ when ch == ?: && check_value: fetch_value
193
+ # Is it an alias?
194
+ when ch == ?*: fetch_alias
195
+ # Is it an anchor?
196
+ when ch == ?&: fetch_anchor
197
+ # Is it a tag?
198
+ when ch == ?!: fetch_tag
199
+ # Is it a literal scalar?
200
+ when ch == ?| && @flow_level==0: fetch_literal
201
+ # Is it a folded scalar?
202
+ when ch == ?> && @flow_level==0: fetch_folded
203
+ # Is it a single quoted scalar?
204
+ when ch == ?': fetch_single
205
+ # Is it a double quoted scalar?
206
+ when ch == ?": fetch_double
207
+ # It must be a plain scalar then.
208
+ when check_plain: fetch_plain
209
+ else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
+ end
211
+ end
212
+
213
+ # Simple keys treatment.
214
+
215
+ def next_possible_simple_key
216
+ # Return the number of the nearest possible simple key. Actually we
217
+ # don't need to loop through the whole dictionary.
218
+ min_token_number = nil
219
+ for level in @possible_simple_keys.keys
220
+ key = @possible_simple_keys[level]
221
+ if min_token_number.nil? || key.token_number < min_token_number
222
+ min_token_number = key.token_number
223
+ end
224
+ end
225
+ min_token_number
226
+ end
227
+
228
+ def stale_possible_simple_keys
229
+ # Remove entries that are no longer possible simple keys. According to
230
+ # the YAML specification, simple keys
231
+ # - should be limited to a single line,
232
+ # - should be no longer than 1024 characters.
233
+ # Disabling this procedure will allow simple keys of any length and
234
+ # height (may cause problems if indentation is broken though).
235
+ @possible_simple_keys.delete_if {|level,key|
236
+ if key.line != @line || @index-key.index > 1024
237
+ raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
+ return true
239
+ end
240
+ return false
241
+ }
242
+ end
243
+
244
+ def save_possible_simple_key
245
+ # The next token may start a simple key. We check if it's possible
246
+ # and save its position. This function is called for
247
+ # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
+
249
+ # Check if a simple key is required at the current position.
250
+ required = @flow_level==0 && @indent == @column
251
+
252
+ # The next token might be a simple key. Let's save it's number and
253
+ # position.
254
+ if @allow_simple_key
255
+ remove_possible_simple_key
256
+ token_number = @tokens_taken+@tokens.length
257
+ key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
+ @possible_simple_keys[@flow_level] = key
259
+ end
260
+ end
261
+
262
+ def remove_possible_simple_key
263
+ # Remove the saved possible key position at the current flow level.
264
+ key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
+ end
266
+
267
+ # Indentation functions.
268
+
269
+ def unwind_indent(column)
270
+ ## In flow context, tokens should respect indentation.
271
+ ## Actually the condition should be `@indent >= column` according to
272
+ ## the spec. But this condition will prohibit intuitively correct
273
+ ## constructions such as
274
+ ## key : {
275
+ ## }
276
+ #if @flow_level and @indent > column
277
+ # raise ScannerError(nil, nil,
278
+ # "invalid intendation or unclosed '[' or '{'",
279
+ # get_mark)
280
+
281
+ # In the flow context, indentation is ignored. We make the scanner less
282
+ # restrictive then specification requires.
283
+ return nil if @flow_level != 0
284
+ # In block context, we may need to issue the BLOCK-END tokens.
285
+ while @indent > column
286
+ mark = get_mark
287
+ @indent = @indents.pop()
288
+ @tokens << BlockEndToken.new(mark, mark)
289
+ end
290
+ end
291
+
292
+ def add_indent(column)
293
+ # Check if we need to increase indentation.
294
+ if @indent < column
295
+ @indents << @indent
296
+ @indent = column
297
+ return true
298
+ end
299
+ return false
300
+ end
301
+
302
+ # Fetchers.
303
+
304
+ def fetch_stream_start
305
+ # We always add STREAM-START as the first token and STREAM-END as the
306
+ # last token.
307
+ # Read the token.
308
+ mark = get_mark
309
+ # Add STREAM-START.
310
+ @tokens << StreamStartToken.new(mark, mark, @encoding)
311
+ end
312
+
313
+
314
+ def fetch_stream_end
315
+ # Set the current intendation to -1.
316
+ unwind_indent(-1)
317
+ # Reset everything (not really needed).
318
+ @allow_simple_key = false
319
+ @possible_simple_keys = {}
320
+ # Read the token.
321
+ mark = get_mark
322
+ # Add STREAM-END.
323
+ @tokens << StreamEndToken.new(mark, mark)
324
+ # The stream is finished.
325
+ @done = true
326
+ end
327
+
328
+ def fetch_directive
329
+ # Set the current intendation to -1.
330
+ unwind_indent(-1)
331
+ # Reset simple keys.
332
+ remove_possible_simple_key
333
+ @allow_simple_key = false
334
+ # Scan and add DIRECTIVE.
335
+ @tokens << scan_directive
336
+ end
337
+
338
+ def fetch_document_start
339
+ fetch_document_indicator(DocumentStartToken)
340
+ end
341
+
342
+ def fetch_document_end
343
+ fetch_document_indicator(DocumentEndToken)
344
+ end
345
+
346
+ def fetch_document_indicator(token)
347
+ # Set the current intendation to -1.
348
+ unwind_indent(-1)
349
+ # Reset simple keys. Note that there could not be a block collection
350
+ # after '---'.
351
+ remove_possible_simple_key
352
+ @allow_simple_key = false
353
+ # Add DOCUMENT-START or DOCUMENT-END.
354
+ start_mark = get_mark
355
+ forward(3)
356
+ end_mark = get_mark
357
+ @tokens << token.new(start_mark, end_mark)
358
+ end
359
+
360
+ def fetch_flow_sequence_start
361
+ fetch_flow_collection_start(FlowSequenceStartToken)
362
+ end
363
+
364
+ def fetch_flow_mapping_start
365
+ fetch_flow_collection_start(FlowMappingStartToken)
366
+ end
367
+
368
+ def fetch_flow_collection_start(token)
369
+ # '[' and '{' may start a simple key.
370
+ save_possible_simple_key
371
+ # Increase the flow level.
372
+ @flow_level += 1
373
+ # Simple keys are allowed after '[' and '{'.
374
+ @allow_simple_key = true
375
+ # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
+ start_mark = get_mark
377
+ forward
378
+ end_mark = get_mark
379
+ @tokens << token.new(start_mark, end_mark)
380
+ end
381
+
382
+ def fetch_flow_sequence_end
383
+ fetch_flow_collection_end(FlowSequenceEndToken)
384
+ end
385
+
386
+ def fetch_flow_mapping_end
387
+ fetch_flow_collection_end(FlowMappingEndToken)
388
+ end
389
+
390
+ def fetch_flow_collection_end(token)
391
+ # Reset possible simple key on the current level.
392
+ remove_possible_simple_key
393
+ # Decrease the flow level.
394
+ @flow_level -= 1
395
+ # No simple keys after ']' or '}'.
396
+ @allow_simple_key = false
397
+ # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
+ start_mark = get_mark
399
+ forward
400
+ end_mark = get_mark
401
+ @tokens << token.new(start_mark, end_mark)
402
+ end
403
+
404
+ def fetch_flow_entry
405
+ # Simple keys are allowed after ','.
406
+ @allow_simple_key = true
407
+ # Reset possible simple key on the current level.
408
+ remove_possible_simple_key
409
+ # Add FLOW-ENTRY.
410
+ start_mark = get_mark
411
+ forward
412
+ end_mark = get_mark
413
+ @tokens << FlowEntryToken.new(start_mark, end_mark)
414
+ end
415
+
416
+ def fetch_block_entry
417
+ # Block context needs additional checks.
418
+ if @flow_level==0
419
+ raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
+ # We may need to add BLOCK-SEQUENCE-START.
421
+ if add_indent(@column)
422
+ mark = get_mark
423
+ @tokens << BlockSequenceStartToken.new(mark, mark)
424
+ end
425
+ # It's an error for the block entry to occur in the flow context,
426
+ # but we let the parser detect this.
427
+ end
428
+ # Simple keys are allowed after '-'.
429
+ @allow_simple_key = true
430
+ # Reset possible simple key on the current level.
431
+ remove_possible_simple_key
432
+ # Add BLOCK-ENTRY.
433
+ start_mark = get_mark
434
+ forward
435
+ end_mark = get_mark
436
+ @tokens << BlockEntryToken.new(start_mark, end_mark)
437
+ end
438
+
439
+ def fetch_key
440
+ # Block context needs additional checks.
441
+ if @flow_level==0
442
+ # Are we allowed to start a key (not nessesary a simple)?
443
+ raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
+ # We may need to add BLOCK-MAPPING-START.
445
+ if add_indent(@column)
446
+ mark = get_mark
447
+ @tokens << BlockMappingStartToken.new(mark, mark)
448
+ end
449
+ end
450
+ # Simple keys are allowed after '?' in the block context.
451
+ @allow_simple_key = @flow_level==0
452
+ # Reset possible simple key on the current level.
453
+ remove_possible_simple_key
454
+ # Add KEY.
455
+ start_mark = get_mark
456
+ forward
457
+ end_mark = get_mark
458
+ @tokens << KeyToken.new(start_mark, end_mark)
459
+ end
460
+
461
+ def fetch_value
462
+ # Do we determine a simple key?
463
+ if @possible_simple_keys.include?(@flow_level)
464
+ # Add KEY.
465
+ key = @possible_simple_keys[@flow_level]
466
+ @possible_simple_keys.delete(@flow_level)
467
+ @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
+ # If this key starts a new block mapping, we need to add
469
+ # BLOCK-MAPPING-START.
470
+ @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
+ # There cannot be two simple keys one after another.
472
+ @allow_simple_key = false
473
+ # It must be a part of a complex key.
474
+ else
475
+ # Block context needs additional checks.
476
+ # (Do we really need them? They will be catched by the parser
477
+ # anyway.)
478
+ if @flow_level==0
479
+ # We are allowed to start a complex value if and only if
480
+ # we can start a simple key.
481
+ raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
+ # Simple keys are allowed after ':' in the block context.
483
+ @allow_simple_key = @flow_level==0
484
+ # Reset possible simple key on the current level.
485
+ remove_possible_simple_key
486
+ end
487
+ end
488
+ # Add VALUE.
489
+ start_mark = get_mark
490
+ forward
491
+ end_mark = get_mark
492
+ @tokens << ValueToken.new(start_mark, end_mark)
493
+ end
494
+
495
+ def fetch_alias
496
+ # ALIAS could be a simple key.
497
+ save_possible_simple_key
498
+ # No simple keys after ALIAS.
499
+ @allow_simple_key = false
500
+ # Scan and add ALIAS.
501
+ @tokens << scan_anchor(AliasToken)
502
+ end
503
+
504
+ def fetch_anchor
505
+ # ANCHOR could start a simple key.
506
+ save_possible_simple_key
507
+ # No simple keys after ANCHOR.
508
+ @allow_simple_key = false
509
+ # Scan and add ANCHOR.
510
+ @tokens << scan_anchor(AnchorToken)
511
+ end
512
+
513
+ def fetch_tag
514
+ # TAG could start a simple key.
515
+ save_possible_simple_key
516
+ # No simple keys after TAG.
517
+ @allow_simple_key = false
518
+ # Scan and add TAG.
519
+ @tokens << scan_tag
520
+ end
521
+
522
+ def fetch_literal
523
+ fetch_block_scalar(?|)
524
+ end
525
+
526
+ def fetch_folded
527
+ fetch_block_scalar(?>)
528
+ end
529
+
530
+ def fetch_block_scalar(style)
531
+ # A simple key may follow a block scalar.
532
+ @allow_simple_key = true
533
+ # Reset possible simple key on the current level.
534
+ remove_possible_simple_key
535
+ # Scan and add SCALAR.
536
+ @tokens << scan_block_scalar(style)
537
+ end
538
+
539
+ def fetch_single
540
+ fetch_flow_scalar(?')
541
+ end
542
+
543
+ def fetch_double
544
+ fetch_flow_scalar(?")
545
+ end
546
+
547
+ def fetch_flow_scalar(style)
548
+ # A flow scalar could be a simple key.
549
+ save_possible_simple_key
550
+ # No simple keys after flow scalars.
551
+ @allow_simple_key = false
552
+ # Scan and add SCALAR.
553
+ @tokens << scan_flow_scalar(style)
554
+ end
555
+
556
+ def fetch_plain
557
+ # A plain scalar could be a simple key.
558
+ save_possible_simple_key
559
+ # No simple keys after plain scalars. But note that `scan_plain` will
560
+ # change this flag if the scan is finished at the beginning of the
561
+ # line.
562
+ @allow_simple_key = false
563
+ # Scan and add SCALAR. May change `allow_simple_key`.
564
+ @tokens << scan_plain
565
+ end
566
+
567
+ # Checkers.
568
+
569
+ def check_directive
570
+ # DIRECTIVE: ^ '%' ...
571
+ # The '%' indicator is already checked.
572
+ @column == 0
573
+ end
574
+
575
+ def check_document_start
576
+ # DOCUMENT-START: ^ '---' (' '|'\n')
577
+ @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
+ end
579
+
580
+ def check_document_end
581
+ # DOCUMENT-END: ^ '...' (' '|'\n')
582
+ @column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
583
+ end
584
+
585
+ def check_block_entry
586
+ # BLOCK-ENTRY: '-' (' '|'\n')
587
+ "\0 \t\r\n\x85".include?(peek(1))
588
+ end
589
+
590
+ def check_key
591
+ # KEY(flow context): '?'
592
+ # KEY(block context): '?' (' '|'\n')
593
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
594
+ end
595
+
596
+ def check_value
597
+ # VALUE(flow context): ':'
598
+ # VALUE(block context): ':' (' '|'\n')
599
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
600
+ end
601
+
602
+ def check_plain
603
+ # A plain scalar may start with any non-space character except:
604
+ # '-', '?', ':', ',', '[', ']', '{', '}',
605
+ # '#', '&', '*', '!', '|', '>', '\'', '\"',
606
+ # '%', '@', '`'.
607
+ #
608
+ # It may also start with
609
+ # '-', '?', ':'
610
+ # if it is followed by a non-space character.
611
+ #
612
+ # Note that we limit the last rule to the block context (except the
613
+ # '-' character) because we want the flow context to be space
614
+ # independent.
615
+ ch = peek
616
+ !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
617
+ end
618
+
619
+
620
+
621
+
622
+
623
+
624
+ # Scanners.
625
+
626
+ def scan_to_next_token
627
+ # We ignore spaces, line breaks and comments.
628
+ # If we find a line break in the block context, we set the flag
629
+ # `allow_simple_key` on.
630
+ #
631
+ # TODO: We need to make tab handling rules more sane. A good rule is
632
+ # Tabs cannot precede tokens
633
+ # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
634
+ # KEY(block), VALUE(block), BLOCK-ENTRY
635
+ # So the checking code is
636
+ # if <TAB>:
637
+ # @allow_simple_keys = false
638
+ # We also need to add the check for `allow_simple_keys == true` to
639
+ # `unwind_indent` before issuing BLOCK-END.
640
+ # Scanners for block, flow, and plain scalars need to be modified.
641
+ found = false
642
+ while !found
643
+ while peek == 32
644
+ forward
645
+ end
646
+ if peek == ?#
647
+ forward while !"\0\r\n\x85".include?(peek)
648
+ end
649
+ if !scan_line_break.empty?
650
+ @allow_simple_key = true if @flow_level==0
651
+ else
652
+ found = true
653
+ end
654
+ end
655
+ end
656
+
657
+ def scan_directive
658
+ # See the specification for details.
659
+ start_mark = get_mark
660
+ forward
661
+ name = scan_directive_name(start_mark)
662
+ value = nil
663
+ if name == "YAML"
664
+ value = scan_yaml_directive_value(start_mark)
665
+ end_mark = get_mark
666
+ elsif name == "TAG"
667
+ value = scan_tag_directive_value(start_mark)
668
+ end_mark = get_mark
669
+ else
670
+ end_mark = get_mark
671
+ forward while !"\0\r\n\x85".include?(peek)
672
+ end
673
+ scan_directive_ignored_line(start_mark)
674
+ DirectiveToken.new(name, value, start_mark, end_mark)
675
+ end
676
+
677
+ def scan_directive_name(start_mark)
678
+ # See the specification for details.
679
+ length = 0
680
+ ch = peek(length)
681
+ while /[-0-9A-Za-z_]/ =~ ch.chr
682
+ length += 1
683
+ ch = peek(length)
684
+ end
685
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
686
+ value = prefix(length)
687
+ forward(length)
688
+ ch = peek()
689
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
690
+ value
691
+ end
692
+
693
+ def scan_yaml_directive_value(start_mark)
694
+ # See the specification for details.
695
+ forward while peek == 32
696
+ major = scan_yaml_directive_number(start_mark)
697
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
698
+ forward
699
+ minor = scan_yaml_directive_number(start_mark)
700
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
701
+ [major, minor]
702
+ end
703
+
704
+ def scan_yaml_directive_number(start_mark)
705
+ # See the specification for details.
706
+ ch = peek
707
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
708
+ length = 0
709
+ length += 1 while ((?0..?9) === peek(length))
710
+ value = prefix(length)
711
+ forward(length)
712
+ value
713
+ end
714
+
715
+ def scan_tag_directive_value(start_mark)
716
+ # See the specification for details.
717
+ forward while peek == 32
718
+ handle = scan_tag_directive_handle(start_mark)
719
+ forward while peek == 32
720
+ prefix = scan_tag_directive_prefix(start_mark)
721
+ [handle, prefix]
722
+ end
723
+
724
+ def scan_tag_directive_handle(start_mark)
725
+ # See the specification for details.
726
+ value = scan_tag_handle("directive", start_mark)
727
+ ch = peek
728
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
729
+ value
730
+ end
731
+
732
+ def scan_tag_directive_prefix(start_mark)
733
+ # See the specification for details.
734
+ value = scan_tag_uri("directive", start_mark)
735
+ ch = peek
736
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
737
+ value
738
+ end
739
+
740
+ def scan_directive_ignored_line(start_mark)
741
+ # See the specification for details.
742
+ forward while peek == 32
743
+ if peek == ?#
744
+ forward while !"\0\r\n\x85".include?(peek)
745
+ end
746
+ ch = peek
747
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
748
+ scan_line_break
749
+ end
750
+
751
+ def scan_anchor(token)
752
+ # The specification does not restrict characters for anchors and
753
+ # aliases. This may lead to problems, for instance, the document:
754
+ # [ *alias, value ]
755
+ # can be interpteted in two ways, as
756
+ # [ "value" ]
757
+ # and
758
+ # [ *alias , "value" ]
759
+ # Therefore we restrict aliases to numbers and ASCII letters.
760
+ start_mark = get_mark
761
+ indicator = peek
762
+ name = (indicator == ?*) ? "alias":"anchor"
763
+ forward
764
+ length = 0
765
+ ch = peek(length)
766
+ while /[-0-9A-Za-z_]/ =~ ch.chr
767
+ length += 1
768
+ ch = peek(length)
769
+ end
770
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
771
+ value = prefix(length)
772
+ forward(length)
773
+ ch = peek
774
+ if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
775
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
776
+ end
777
+ end_mark = get_mark
778
+ token.new(value, start_mark, end_mark)
779
+ end
780
+
781
+
782
+ def scan_tag
783
+ # See the specification for details.
784
+ start_mark = get_mark
785
+ ch = peek(1)
786
+ if ch == ?<
787
+ handle = nil
788
+ forward(2)
789
+ suffix = scan_tag_uri("tag", start_mark)
790
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
791
+ forward
792
+ elsif "\0 \t\r\n\x85".include?(ch)
793
+ handle = nil
794
+ suffix = "!"
795
+ forward
796
+ else
797
+ length = 1
798
+ use_handle = false
799
+ while !"\0 \t\r\n\x85".include?(ch)
800
+ if ch == ?!
801
+ use_handle = true
802
+ break
803
+ end
804
+ length += 1
805
+ ch = peek(length)
806
+ end
807
+ handle = "!"
808
+ if use_handle
809
+ handle = scan_tag_handle("tag", start_mark)
810
+ else
811
+ handle = "!"
812
+ forward
813
+ end
814
+ suffix = scan_tag_uri("tag", start_mark)
815
+ end
816
+ ch = peek
817
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
818
+ value = [handle, suffix]
819
+ end_mark = get_mark
820
+ TagToken.new(value, start_mark, end_mark)
821
+ end
822
+
823
+ def scan_block_scalar(style)
824
+ # See the specification for details.
825
+ folded = style== ?>
826
+ chunks = []
827
+ start_mark = get_mark
828
+ # Scan the header.
829
+ forward
830
+ chomping, increment = scan_block_scalar_indicators(start_mark)
831
+ scan_block_scalar_ignored_line(start_mark)
832
+ # Determine the indentation level and go to the first non-empty line.
833
+ min_indent = @indent+1
834
+ min_indent = 1 if min_indent < 1
835
+ if increment.nil?
836
+ breaks, max_indent, end_mark = scan_block_scalar_indentation
837
+ indent = [min_indent, max_indent].max
838
+ else
839
+ indent = min_indent+increment-1
840
+ breaks, end_mark = scan_block_scalar_breaks(indent)
841
+ end
842
+ line_break = ''
843
+ # Scan the inner part of the block scalar.
844
+ while @column == indent and peek != ?\0
845
+ chunks += breaks
846
+ leading_non_space = !" \t".include?(peek)
847
+ length = 0
848
+ length += 1 while !"\0\r\n\x85".include?(peek(length))
849
+ chunks << prefix(length)
850
+ forward(length)
851
+ line_break = scan_line_break
852
+ breaks, end_mark = scan_block_scalar_breaks(indent)
853
+ if @column == indent && peek != 0
854
+ # Unfortunately, folding rules are ambiguous.
855
+ #
856
+ # This is the folding according to the specification:
857
+ if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
858
+ chunks << ' ' if breaks.empty?
859
+ else
860
+ chunks << line_break
861
+ end
862
+ # This is Clark Evans's interpretation (also in the spec
863
+ # examples):
864
+ #
865
+ #if folded and line_break == u'\n':
866
+ # if not breaks:
867
+ # if self.peek() not in ' \t':
868
+ # chunks.append(u' ')
869
+ # else:
870
+ # chunks.append(line_break)
871
+ #else:
872
+ # chunks.append(line_break)
873
+ else
874
+ break
875
+ end
876
+ end
877
+
878
+ # Chomp the tail.
879
+ if chomping
880
+ chunks << line_break
881
+ chunks += breaks
882
+ end
883
+
884
+ # We are done.
885
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
886
+ end
887
+
888
+ def scan_block_scalar_indicators(start_mark)
889
+ # See the specification for details.
890
+ chomping = nil
891
+ increment = nil
892
+ ch = peek
893
+ if /[+-]/ =~ ch.chr
894
+ chomping = ch == ?+
895
+ forward
896
+ ch = peek
897
+ if (?0..?9) === ch
898
+ increment = ch.to_i
899
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
900
+ forward
901
+ end
902
+ elsif (?0..?9) === ch
903
+ increment = ch
904
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
905
+ forward
906
+ ch = peek
907
+ if /[+-]/ =~ ch.chr
908
+ chomping = ch == ?+
909
+ forward
910
+ end
911
+ end
912
+ ch = peek
913
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
914
+ [chomping, increment]
915
+ end
916
+
917
+ def scan_block_scalar_ignored_line(start_mark)
918
+ # See the specification for details.
919
+ forward while peek == 32
920
+ if peek == ?#
921
+ forward while !"\0\r\n\x85".include?(peek)
922
+ end
923
+ ch = peek
924
+
925
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
926
+ scan_line_break
927
+ end
928
+
929
+ def scan_block_scalar_indentation
930
+ # See the specification for details.
931
+ chunks = []
932
+ max_indent = 0
933
+ end_mark = get_mark
934
+ while " \r\n\x85".include?(peek)
935
+ if peek != 32
936
+ chunks << scan_line_break
937
+ end_mark = get_mark
938
+ else
939
+ forward
940
+ max_indent = @column if @column > max_indent
941
+ end
942
+ end
943
+ [chunks, max_indent, end_mark]
944
+ end
945
+
946
+ def scan_block_scalar_breaks(indent)
947
+ # See the specification for details.
948
+ chunks = []
949
+ end_mark = get_mark
950
+ forward while @column < indent && peek == 32
951
+ while "\r\n\x85".include?(peek)
952
+ chunks << scan_line_break
953
+ end_mark = get_mark
954
+ forward while @column < indent && peek == 32
955
+ end
956
+ [chunks, end_mark]
957
+ end
958
+
959
+ def scan_flow_scalar(style)
960
+ # See the specification for details.
961
+ # Note that we loose indentation rules for quoted scalars. Quoted
962
+ # scalars don't need to adhere indentation because " and ' clearly
963
+ # mark the beginning and the end of them. Therefore we are less
964
+ # restrictive then the specification requires. We only need to check
965
+ # that document separators are not included in scalars.
966
+ double = style == ?"
967
+ chunks = []
968
+ start_mark = get_mark
969
+ quote = peek
970
+ forward
971
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
972
+ while peek != quote
973
+ chunks += scan_flow_scalar_spaces(double, start_mark)
974
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
+ end
976
+ forward
977
+ end_mark = get_mark
978
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
979
+ end
980
+
981
+ ESCAPE_REPLACEMENTS = {
982
+ "0" => "\0",
983
+ "a" => "\x07",
984
+ "b" => "\x08",
985
+ "t" => "\x09",
986
+ "\t" => "\x09",
987
+ "n" => "\x0A",
988
+ "v" => "\x0B",
989
+ "f" => "\x0C",
990
+ "r" => "\x0D",
991
+ "e" => "\x1B",
992
+ " " => "\x20",
993
+ '"' => '"',
994
+ "\\" => "\\",
995
+ "N" => "\x85",
996
+ "_" => "\xA0"
997
+ }
998
+
999
+ ESCAPE_CODES = {
1000
+ 'x' => 2
1001
+ }
1002
+
1003
+ def scan_flow_scalar_non_spaces(double, start_mark)
1004
+ # See the specification for details.
1005
+ chunks = []
1006
+ while true
1007
+ length = 0
1008
+ length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1009
+ if length!=0
1010
+ chunks << prefix(length)
1011
+ forward(length)
1012
+ end
1013
+ ch = peek
1014
+ if !double && ch == ?' && peek(1) == ?'
1015
+ chunks << ?'
1016
+ forward(2)
1017
+ elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1018
+ chunks << ch
1019
+ forward
1020
+ elsif double && ch == ?\\
1021
+ forward
1022
+ ch = peek
1023
+ if ESCAPE_REPLACEMENTS.member?(ch.chr)
1024
+ chunks << ESCAPE_REPLACEMENTS[ch.chr]
1025
+ forward
1026
+ elsif ESCAPE_CODES.member?(ch.chr)
1027
+ length = ESCAPE_CODES[ch.chr]
1028
+ forward
1029
+ length.times do |k|
1030
+ if /[0-9A-Fa-f]/ !~ peek(k).chr
1031
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1032
+ "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1033
+ end
1034
+ end
1035
+ code = prefix(length).to_i.to_s(16)
1036
+ chunks << code
1037
+ forward(length)
1038
+ elsif "\r\n\x85".include?(ch)
1039
+ scan_line_break
1040
+ chunks += scan_flow_scalar_breaks(double, start_mark)
1041
+ else
1042
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
1043
+ end
1044
+ else
1045
+ return chunks
1046
+ end
1047
+ end
1048
+ end
1049
+
1050
+ def scan_flow_scalar_spaces(double, start_mark)
1051
+ # See the specification for details.
1052
+ chunks = []
1053
+ length = 0
1054
+ length += 1 while /[ \t]/ =~ peek(length).chr
1055
+ whitespaces = prefix(length)
1056
+ forward(length)
1057
+ ch = peek
1058
+ if ch == ?\0
1059
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1060
+ elsif "\r\n\x85".include?(ch)
1061
+ line_break = scan_line_break
1062
+ breaks = scan_flow_scalar_breaks(double, start_mark)
1063
+ if line_break != ?\n
1064
+ chunks << line_break
1065
+ elsif breaks.empty?
1066
+ chunks << ' '
1067
+ end
1068
+ chunks += breaks
1069
+ else
1070
+ chunks << whitespaces
1071
+ end
1072
+ chunks
1073
+ end
1074
+
1075
+ def scan_flow_scalar_breaks(double, start_mark)
1076
+ # See the specification for details.
1077
+ chunks = []
1078
+ while true
1079
+ # Instead of checking indentation, we check for document
1080
+ # separators.
1081
+ prefix = prefix(3)
1082
+ if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1083
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1084
+ end
1085
+ forward while /[ \t]/ =~ peek.chr
1086
+ if "\r\n\x85".include?(peek)
1087
+ chunks << scan_line_break
1088
+ else
1089
+ return chunks
1090
+ end
1091
+ end
1092
+ end
1093
+
1094
+ def scan_plain
1095
+ # See the specification for details.
1096
+ # We add an additional restriction for the flow context:
1097
+ # plain scalars in the flow context cannot contain ',', ':' and '?'.
1098
+ # We also keep track of the `allow_simple_key` flag here.
1099
+ # Indentation rules are loosed for the flow context.
1100
+ chunks = []
1101
+ start_mark = get_mark
1102
+ end_mark = start_mark
1103
+ indent = @indent+1
1104
+ # We allow zero indentation for scalars, but then we need to check for
1105
+ # document separators at the beginning of the line.
1106
+ #if indent == 0
1107
+ # indent = 1
1108
+ spaces = []
1109
+ while true
1110
+ length = 0
1111
+ break if peek == ?#
1112
+ while true
1113
+ ch = peek(length)
1114
+ if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1115
+ break
1116
+ end
1117
+ length += 1
1118
+ end
1119
+ if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
1120
+ forward(length)
1121
+ raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1122
+ end
1123
+ break if length == 0
1124
+ @allow_simple_key = false
1125
+ chunks += spaces
1126
+ chunks << prefix(length)
1127
+ forward(length)
1128
+ end_mark = get_mark
1129
+ spaces = scan_plain_spaces(indent, start_mark)
1130
+ break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1131
+ end
1132
+ return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1133
+ end
1134
+
1135
+ def scan_plain_spaces(indent, start_mark)
1136
+ # See the specification for details.
1137
+ # The specification is really confusing about tabs in plain scalars.
1138
+ # We just forbid them completely. Do not use tabs in YAML!
1139
+ chunks = []
1140
+ length = 0
1141
+ length += 1 while peek(length) == 32
1142
+ whitespaces = prefix(length)
1143
+ forward(length)
1144
+ ch = peek
1145
+ if "\r\n\x85".include?(ch)
1146
+ line_break = scan_line_break
1147
+ @allow_simple_key = true
1148
+ prefix = prefix(3)
1149
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1150
+ breaks = []
1151
+ while " \r\n\x85".include?(peek)
1152
+ if peek == 32
1153
+ forward
1154
+ else
1155
+ breaks << scan_line_break
1156
+ prefix = prefix(3)
1157
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1158
+ end
1159
+ end
1160
+ if line_break != '\n'
1161
+ chunks << line_break
1162
+ elsif breaks.empty?
1163
+ chunks << ' '
1164
+ end
1165
+ chunks += breaks
1166
+ elsif !whitespaces.empty?
1167
+ chunks << whitespaces
1168
+ end
1169
+ chunks
1170
+ end
1171
+
1172
+ def scan_tag_handle(name, start_mark)
1173
+ # See the specification for details.
1174
+ # For some strange reasons, the specification does not allow '_' in
1175
+ # tag handles. I have allowed it anyway.
1176
+ ch = peek
1177
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1178
+ length = 1
1179
+ ch = peek(length)
1180
+ if ch != 32
1181
+ while /[-_0-9A-Za-z]/ =~ ch.chr
1182
+ length += 1
1183
+ ch = peek(length)
1184
+ end
1185
+ if ch != ?!
1186
+ forward(length)
1187
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
1188
+ end
1189
+ length += 1
1190
+ end
1191
+ value = prefix(length)
1192
+ forward(length)
1193
+ value
1194
+ end
1195
+
1196
+ def scan_tag_uri(name, start_mark)
1197
+ # See the specification for details.
1198
+ # Note: we do not check if URI is well-formed.
1199
+ chunks = []
1200
+ length = 0
1201
+ ch = peek(length)
1202
+ while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1203
+ if ch == ?%
1204
+ chunks << prefix(length)
1205
+ forward(length)
1206
+ length = 0
1207
+ chunks << scan_uri_escapes(name, start_mark)
1208
+ else
1209
+ length += 1
1210
+ end
1211
+ ch = peek(length)
1212
+ end
1213
+ if length!=0
1214
+ chunks << prefix(length)
1215
+ forward(length)
1216
+ length = 0
1217
+ end
1218
+
1219
+ raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1220
+ chunks.join('')
1221
+ end
1222
+
1223
+ def scan_uri_escapes(name, start_mark)
1224
+ # See the specification for details.
1225
+ bytes = []
1226
+ mark = get_mark
1227
+ while peek == ?%
1228
+ forward
1229
+ 2.times do |k|
1230
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1231
+ get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1232
+ end
1233
+ bytes << prefix(2).to_i.to_s(16)
1234
+ forward(2)
1235
+ end
1236
+ bytes.join('')
1237
+ end
1238
+
1239
+ def scan_line_break
1240
+ # Transforms:
1241
+ # '\r\n' : '\n'
1242
+ # '\r' : '\n'
1243
+ # '\n' : '\n'
1244
+ # '\x85' : '\n'
1245
+ # default : ''
1246
+ ch = peek
1247
+ if "\r\n\x85".include?(ch)
1248
+ if prefix(2) == "\r\n"
1249
+ forward(2)
1250
+ else
1251
+ forward
1252
+ end
1253
+ return "\n"
1254
+ end
1255
+ ""
1256
+ end
1257
+ end
1258
+ end
1259
+