RbYAML 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README +1 -1
  2. data/lib/rbyaml/composer.rb +28 -25
  3. data/lib/rbyaml/composer.rb.~1.2.~ +109 -0
  4. data/lib/rbyaml/constructor.rb +94 -84
  5. data/lib/rbyaml/constructor.rb.~1.2.~ +381 -0
  6. data/lib/rbyaml/dumper.rb +10 -17
  7. data/lib/rbyaml/dumper.rb.~1.2.~ +43 -0
  8. data/lib/rbyaml/emitter.rb +13 -26
  9. data/lib/rbyaml/emitter.rb.~1.2.~ +1116 -0
  10. data/lib/rbyaml/error.rb +15 -21
  11. data/lib/rbyaml/events.rb +29 -5
  12. data/lib/rbyaml/events.rb.~1.2.~ +93 -0
  13. data/lib/rbyaml/loader.rb +11 -23
  14. data/lib/rbyaml/loader.rb.~1.2.~ +52 -0
  15. data/lib/rbyaml/nodes.rb +13 -9
  16. data/lib/rbyaml/nodes.rb.~1.2.~ +52 -0
  17. data/lib/rbyaml/parser.rb +481 -343
  18. data/lib/rbyaml/parser.rb.old +531 -0
  19. data/lib/rbyaml/parser.rb.~1.2.~ +494 -0
  20. data/lib/rbyaml/reader.rb.~1.1.1.1.~ +127 -0
  21. data/lib/rbyaml/representer.rb +26 -17
  22. data/lib/rbyaml/representer.rb.~1.2.~ +239 -0
  23. data/lib/rbyaml/resolver.rb +15 -15
  24. data/lib/rbyaml/resolver.rb.~1.1.~ +163 -0
  25. data/lib/rbyaml/scanner.rb +457 -366
  26. data/lib/rbyaml/scanner.rb.~1.2.~ +1259 -0
  27. data/lib/rbyaml/serializer.rb +19 -17
  28. data/lib/rbyaml/serializer.rb.~1.2.~ +115 -0
  29. data/lib/rbyaml/tokens.rb +44 -4
  30. data/lib/rbyaml/tokens.rb.~1.2.~ +164 -0
  31. data/lib/rbyaml/util.rb +28 -0
  32. data/lib/rbyaml/yaml.rb +12 -12
  33. data/lib/rbyaml/yaml.rb.~1.2.~ +136 -0
  34. data/test/test_bm.rb +28 -0
  35. data/test/test_bm_syck.rb +28 -0
  36. data/test/test_invoke.rb +31 -0
  37. data/test/test_one.rb +5 -0
  38. data/test/test_profile.rb +32 -0
  39. data/test/test_rbyaml.rb +2 -1
  40. data/test/test_rbyaml.rb.~1.2.~ +31 -0
  41. data/test/test_time.rb +13 -8
  42. data/test/test_time.rb.~1.1.~ +29 -0
  43. data/test/yamlx.rb +3563 -0
  44. metadata +27 -2
@@ -0,0 +1,1259 @@
1
+ # Scanner produces tokens of the following types:
2
+ # STREAM-START
3
+ # STREAM-END
4
+ # DIRECTIVE(name, value)
5
+ # DOCUMENT-START
6
+ # DOCUMENT-END
7
+ # BLOCK-SEQUENCE-START
8
+ # BLOCK-MAPPING-START
9
+ # BLOCK-END
10
+ # FLOW-SEQUENCE-START
11
+ # FLOW-MAPPING-START
12
+ # FLOW-SEQUENCE-END
13
+ # FLOW-MAPPING-END
14
+ # BLOCK-ENTRY
15
+ # FLOW-ENTRY
16
+ # KEY
17
+ # VALUE
18
+ # ALIAS(value)
19
+ # ANCHOR(value)
20
+ # TAG(value)
21
+ # SCALAR(value, plain)
22
+ #
23
+ # Read comments in the Scanner code for more details.
24
+ #
25
+
26
+ require 'rbyaml/error'
27
+ require 'rbyaml/tokens'
28
+
29
+ module RbYAML
30
+ class ScannerError < MarkedYAMLError
31
+ end
32
+
33
+ class SimpleKey
34
+ attr_reader :token_number, :required, :index, :line, :column, :mark
35
+
36
+ def initialize(token_number,required,index,line,column,mark)
37
+ @token_number = token_number
38
+ @required = required
39
+ @index = index
40
+ @line = line
41
+ @column = column
42
+ @mark = mark
43
+ end
44
+ end
45
+
46
+ module Scanner
47
+ def initialize_scanner
48
+ # It is assumed that Scanner and Reader will mixin to the same point.
49
+ # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
+ #
51
+ # Reader supports the following methods
52
+ # self.peek(i=0) # peek the next i-th character
53
+ # self.prefix(l=1) # peek the next l characters
54
+ # self.forward(l=1) # read the next l characters and move the pointer.
55
+
56
+ # Had we reached the end of the stream?
57
+ @done = false
58
+
59
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
60
+ # context.
61
+ @flow_level = 0
62
+
63
+ # List of processed tokens that are not yet emitted.
64
+ @tokens = []
65
+
66
+ # Add the STREAM-START token.
67
+ fetch_stream_start
68
+
69
+ # Number of tokens that were emitted through the `get_token` method.
70
+ @tokens_taken = 0
71
+
72
+ # The current indentation level.
73
+ @indent = -1
74
+
75
+ # Past indentation levels.
76
+ @indents = []
77
+
78
+ # Variables related to simple keys treatment.
79
+
80
+ # A simple key is a key that is not denoted by the '?' indicator.
81
+ # Example of simple keys:
82
+ # ---
83
+ # block simple key: value
84
+ # ? not a simple key:
85
+ # : { flow simple key: value }
86
+ # We emit the KEY token before all keys, so when we find a potential
87
+ # simple key, we try to locate the corresponding ':' indicator.
88
+ # Simple keys should be limited to a single line and 1024 characters.
89
+
90
+ # Can a simple key start at the current position? A simple key may
91
+ # start:
92
+ # - at the beginning of the line, not counting indentation spaces
93
+ # (in block context),
94
+ # - after '{', '[', ',' (in the flow context),
95
+ # - after '?', ':', '-' (in the block context).
96
+ # In the block context, this flag also signifies if a block collection
97
+ # may start at the current position.
98
+ @allow_simple_key = true
99
+
100
+ # Keep track of possible simple keys. This is a dictionary. The key
101
+ # is `flow_level`; there can be no more that one possible simple key
102
+ # for each level. The value is a SimpleKey record:
103
+ # (token_number, required, index, line, column, mark)
104
+ # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
+ # '[', or '{' tokens.
106
+ @possible_simple_keys = {}
107
+ end
108
+
109
+ def check_token(*choices)
110
+ # Check if the next token is one of the given types.
111
+ fetch_more_tokens while need_more_tokens
112
+ unless @tokens.empty?
113
+ return true if choices.empty?
114
+ for choice in choices
115
+ return true if choice === @tokens[0]
116
+ end
117
+ end
118
+ return false
119
+ end
120
+
121
+ def peek_token
122
+ # Return the next token, but do not delete if from the queue.
123
+ fetch_more_tokens while need_more_tokens
124
+ return @tokens[0] unless @tokens.empty?
125
+ end
126
+
127
+ def get_token
128
+ # Return the next token.
129
+ fetch_more_tokens while need_more_tokens
130
+ unless @tokens.empty?
131
+ @tokens_taken += 1
132
+ @tokens.shift
133
+ end
134
+ end
135
+
136
+ def each_token
137
+ fetch_more_tokens while need_more_tokens
138
+ while !@tokens.empty?
139
+ @tokens_taken += 1
140
+ yield @tokens.shift
141
+ fetch_more_tokens while need_more_tokens
142
+ end
143
+ end
144
+
145
+ def need_more_tokens
146
+ return false if @done
147
+ return true if @tokens.empty?
148
+ # The current token may be a potential simple key, so we
149
+ # need to look further.
150
+ stale_possible_simple_keys
151
+ return true if next_possible_simple_key == @tokens_taken
152
+ end
153
+
154
+ def fetch_more_tokens
155
+ # Eat whitespaces and comments until we reach the next token.
156
+ scan_to_next_token
157
+
158
+ # Remove obsolete possible simple keys.
159
+ stale_possible_simple_keys
160
+
161
+ # Compare the current indentation and column. It may add some tokens
162
+ # and decrease the current indentation level.
163
+ unwind_indent(@column)
164
+
165
+ # Peek the next character.
166
+ ch = peek
167
+
168
+ return case
169
+ # Is it the end of stream?
170
+ when ch == ?\0: fetch_stream_end
171
+ # Is it a directive?
172
+ when ch == ?% && check_directive: fetch_directive
173
+ # Is it the document start?
174
+ when ch == ?- && check_document_start: fetch_document_start
175
+ # Is it the document end?
176
+ when ch == ?. && check_document_end: fetch_document_end
177
+ # Is it the flow sequence start indicator?
178
+ when ch == ?[: fetch_flow_sequence_start
179
+ # Is it the flow mapping start indicator?
180
+ when ch == ?{: fetch_flow_mapping_start
181
+ # Is it the flow sequence end indicator?
182
+ when ch == ?]: fetch_flow_sequence_end
183
+ # Is it the flow mapping end indicator?
184
+ when ch == ?}: fetch_flow_mapping_end
185
+ # Is it the flow entry indicator?
186
+ when ch == ?,: fetch_flow_entry
187
+ # Is it the block entry indicator?
188
+ when ch == ?- && check_block_entry: fetch_block_entry
189
+ # Is it the key indicator?
190
+ when ch == ?? && check_key: fetch_key
191
+ # Is it the value indicator?
192
+ when ch == ?: && check_value: fetch_value
193
+ # Is it an alias?
194
+ when ch == ?*: fetch_alias
195
+ # Is it an anchor?
196
+ when ch == ?&: fetch_anchor
197
+ # Is it a tag?
198
+ when ch == ?!: fetch_tag
199
+ # Is it a literal scalar?
200
+ when ch == ?| && @flow_level==0: fetch_literal
201
+ # Is it a folded scalar?
202
+ when ch == ?> && @flow_level==0: fetch_folded
203
+ # Is it a single quoted scalar?
204
+ when ch == ?': fetch_single
205
+ # Is it a double quoted scalar?
206
+ when ch == ?": fetch_double
207
+ # It must be a plain scalar then.
208
+ when check_plain: fetch_plain
209
+ else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
+ end
211
+ end
212
+
213
+ # Simple keys treatment.
214
+
215
+ def next_possible_simple_key
216
+ # Return the number of the nearest possible simple key. Actually we
217
+ # don't need to loop through the whole dictionary.
218
+ min_token_number = nil
219
+ for level in @possible_simple_keys.keys
220
+ key = @possible_simple_keys[level]
221
+ if min_token_number.nil? || key.token_number < min_token_number
222
+ min_token_number = key.token_number
223
+ end
224
+ end
225
+ min_token_number
226
+ end
227
+
228
+ def stale_possible_simple_keys
229
+ # Remove entries that are no longer possible simple keys. According to
230
+ # the YAML specification, simple keys
231
+ # - should be limited to a single line,
232
+ # - should be no longer than 1024 characters.
233
+ # Disabling this procedure will allow simple keys of any length and
234
+ # height (may cause problems if indentation is broken though).
235
+ @possible_simple_keys.delete_if {|level,key|
236
+ if key.line != @line || @index-key.index > 1024
237
+ raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
+ return true
239
+ end
240
+ return false
241
+ }
242
+ end
243
+
244
+ def save_possible_simple_key
245
+ # The next token may start a simple key. We check if it's possible
246
+ # and save its position. This function is called for
247
+ # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
+
249
+ # Check if a simple key is required at the current position.
250
+ required = @flow_level==0 && @indent == @column
251
+
252
+ # The next token might be a simple key. Let's save it's number and
253
+ # position.
254
+ if @allow_simple_key
255
+ remove_possible_simple_key
256
+ token_number = @tokens_taken+@tokens.length
257
+ key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
+ @possible_simple_keys[@flow_level] = key
259
+ end
260
+ end
261
+
262
+ def remove_possible_simple_key
263
+ # Remove the saved possible key position at the current flow level.
264
+ key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
+ end
266
+
267
+ # Indentation functions.
268
+
269
+ def unwind_indent(column)
270
+ ## In flow context, tokens should respect indentation.
271
+ ## Actually the condition should be `@indent >= column` according to
272
+ ## the spec. But this condition will prohibit intuitively correct
273
+ ## constructions such as
274
+ ## key : {
275
+ ## }
276
+ #if @flow_level and @indent > column
277
+ # raise ScannerError(nil, nil,
278
+ # "invalid intendation or unclosed '[' or '{'",
279
+ # get_mark)
280
+
281
+ # In the flow context, indentation is ignored. We make the scanner less
282
+ # restrictive then specification requires.
283
+ return nil if @flow_level != 0
284
+ # In block context, we may need to issue the BLOCK-END tokens.
285
+ while @indent > column
286
+ mark = get_mark
287
+ @indent = @indents.pop()
288
+ @tokens << BlockEndToken.new(mark, mark)
289
+ end
290
+ end
291
+
292
+ def add_indent(column)
293
+ # Check if we need to increase indentation.
294
+ if @indent < column
295
+ @indents << @indent
296
+ @indent = column
297
+ return true
298
+ end
299
+ return false
300
+ end
301
+
302
+ # Fetchers.
303
+
304
+ def fetch_stream_start
305
+ # We always add STREAM-START as the first token and STREAM-END as the
306
+ # last token.
307
+ # Read the token.
308
+ mark = get_mark
309
+ # Add STREAM-START.
310
+ @tokens << StreamStartToken.new(mark, mark, @encoding)
311
+ end
312
+
313
+
314
+ def fetch_stream_end
315
+ # Set the current intendation to -1.
316
+ unwind_indent(-1)
317
+ # Reset everything (not really needed).
318
+ @allow_simple_key = false
319
+ @possible_simple_keys = {}
320
+ # Read the token.
321
+ mark = get_mark
322
+ # Add STREAM-END.
323
+ @tokens << StreamEndToken.new(mark, mark)
324
+ # The stream is finished.
325
+ @done = true
326
+ end
327
+
328
+ def fetch_directive
329
+ # Set the current intendation to -1.
330
+ unwind_indent(-1)
331
+ # Reset simple keys.
332
+ remove_possible_simple_key
333
+ @allow_simple_key = false
334
+ # Scan and add DIRECTIVE.
335
+ @tokens << scan_directive
336
+ end
337
+
338
+ def fetch_document_start
339
+ fetch_document_indicator(DocumentStartToken)
340
+ end
341
+
342
+ def fetch_document_end
343
+ fetch_document_indicator(DocumentEndToken)
344
+ end
345
+
346
+ def fetch_document_indicator(token)
347
+ # Set the current intendation to -1.
348
+ unwind_indent(-1)
349
+ # Reset simple keys. Note that there could not be a block collection
350
+ # after '---'.
351
+ remove_possible_simple_key
352
+ @allow_simple_key = false
353
+ # Add DOCUMENT-START or DOCUMENT-END.
354
+ start_mark = get_mark
355
+ forward(3)
356
+ end_mark = get_mark
357
+ @tokens << token.new(start_mark, end_mark)
358
+ end
359
+
360
+ def fetch_flow_sequence_start
361
+ fetch_flow_collection_start(FlowSequenceStartToken)
362
+ end
363
+
364
+ def fetch_flow_mapping_start
365
+ fetch_flow_collection_start(FlowMappingStartToken)
366
+ end
367
+
368
+ def fetch_flow_collection_start(token)
369
+ # '[' and '{' may start a simple key.
370
+ save_possible_simple_key
371
+ # Increase the flow level.
372
+ @flow_level += 1
373
+ # Simple keys are allowed after '[' and '{'.
374
+ @allow_simple_key = true
375
+ # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
+ start_mark = get_mark
377
+ forward
378
+ end_mark = get_mark
379
+ @tokens << token.new(start_mark, end_mark)
380
+ end
381
+
382
+ def fetch_flow_sequence_end
383
+ fetch_flow_collection_end(FlowSequenceEndToken)
384
+ end
385
+
386
+ def fetch_flow_mapping_end
387
+ fetch_flow_collection_end(FlowMappingEndToken)
388
+ end
389
+
390
+ def fetch_flow_collection_end(token)
391
+ # Reset possible simple key on the current level.
392
+ remove_possible_simple_key
393
+ # Decrease the flow level.
394
+ @flow_level -= 1
395
+ # No simple keys after ']' or '}'.
396
+ @allow_simple_key = false
397
+ # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
+ start_mark = get_mark
399
+ forward
400
+ end_mark = get_mark
401
+ @tokens << token.new(start_mark, end_mark)
402
+ end
403
+
404
+ def fetch_flow_entry
405
+ # Simple keys are allowed after ','.
406
+ @allow_simple_key = true
407
+ # Reset possible simple key on the current level.
408
+ remove_possible_simple_key
409
+ # Add FLOW-ENTRY.
410
+ start_mark = get_mark
411
+ forward
412
+ end_mark = get_mark
413
+ @tokens << FlowEntryToken.new(start_mark, end_mark)
414
+ end
415
+
416
+ def fetch_block_entry
417
+ # Block context needs additional checks.
418
+ if @flow_level==0
419
+ raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
+ # We may need to add BLOCK-SEQUENCE-START.
421
+ if add_indent(@column)
422
+ mark = get_mark
423
+ @tokens << BlockSequenceStartToken.new(mark, mark)
424
+ end
425
+ # It's an error for the block entry to occur in the flow context,
426
+ # but we let the parser detect this.
427
+ end
428
+ # Simple keys are allowed after '-'.
429
+ @allow_simple_key = true
430
+ # Reset possible simple key on the current level.
431
+ remove_possible_simple_key
432
+ # Add BLOCK-ENTRY.
433
+ start_mark = get_mark
434
+ forward
435
+ end_mark = get_mark
436
+ @tokens << BlockEntryToken.new(start_mark, end_mark)
437
+ end
438
+
439
+ def fetch_key
440
+ # Block context needs additional checks.
441
+ if @flow_level==0
442
+ # Are we allowed to start a key (not nessesary a simple)?
443
+ raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
+ # We may need to add BLOCK-MAPPING-START.
445
+ if add_indent(@column)
446
+ mark = get_mark
447
+ @tokens << BlockMappingStartToken.new(mark, mark)
448
+ end
449
+ end
450
+ # Simple keys are allowed after '?' in the block context.
451
+ @allow_simple_key = @flow_level==0
452
+ # Reset possible simple key on the current level.
453
+ remove_possible_simple_key
454
+ # Add KEY.
455
+ start_mark = get_mark
456
+ forward
457
+ end_mark = get_mark
458
+ @tokens << KeyToken.new(start_mark, end_mark)
459
+ end
460
+
461
+ def fetch_value
462
+ # Do we determine a simple key?
463
+ if @possible_simple_keys.include?(@flow_level)
464
+ # Add KEY.
465
+ key = @possible_simple_keys[@flow_level]
466
+ @possible_simple_keys.delete(@flow_level)
467
+ @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
+ # If this key starts a new block mapping, we need to add
469
+ # BLOCK-MAPPING-START.
470
+ @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
+ # There cannot be two simple keys one after another.
472
+ @allow_simple_key = false
473
+ # It must be a part of a complex key.
474
+ else
475
+ # Block context needs additional checks.
476
+ # (Do we really need them? They will be catched by the parser
477
+ # anyway.)
478
+ if @flow_level==0
479
+ # We are allowed to start a complex value if and only if
480
+ # we can start a simple key.
481
+ raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
+ # Simple keys are allowed after ':' in the block context.
483
+ @allow_simple_key = @flow_level==0
484
+ # Reset possible simple key on the current level.
485
+ remove_possible_simple_key
486
+ end
487
+ end
488
+ # Add VALUE.
489
+ start_mark = get_mark
490
+ forward
491
+ end_mark = get_mark
492
+ @tokens << ValueToken.new(start_mark, end_mark)
493
+ end
494
+
495
+ def fetch_alias
496
+ # ALIAS could be a simple key.
497
+ save_possible_simple_key
498
+ # No simple keys after ALIAS.
499
+ @allow_simple_key = false
500
+ # Scan and add ALIAS.
501
+ @tokens << scan_anchor(AliasToken)
502
+ end
503
+
504
+ def fetch_anchor
505
+ # ANCHOR could start a simple key.
506
+ save_possible_simple_key
507
+ # No simple keys after ANCHOR.
508
+ @allow_simple_key = false
509
+ # Scan and add ANCHOR.
510
+ @tokens << scan_anchor(AnchorToken)
511
+ end
512
+
513
+ def fetch_tag
514
+ # TAG could start a simple key.
515
+ save_possible_simple_key
516
+ # No simple keys after TAG.
517
+ @allow_simple_key = false
518
+ # Scan and add TAG.
519
+ @tokens << scan_tag
520
+ end
521
+
522
+ def fetch_literal
523
+ fetch_block_scalar(?|)
524
+ end
525
+
526
+ def fetch_folded
527
+ fetch_block_scalar(?>)
528
+ end
529
+
530
+ def fetch_block_scalar(style)
531
+ # A simple key may follow a block scalar.
532
+ @allow_simple_key = true
533
+ # Reset possible simple key on the current level.
534
+ remove_possible_simple_key
535
+ # Scan and add SCALAR.
536
+ @tokens << scan_block_scalar(style)
537
+ end
538
+
539
+ def fetch_single
540
+ fetch_flow_scalar(?')
541
+ end
542
+
543
+ def fetch_double
544
+ fetch_flow_scalar(?")
545
+ end
546
+
547
+ def fetch_flow_scalar(style)
548
+ # A flow scalar could be a simple key.
549
+ save_possible_simple_key
550
+ # No simple keys after flow scalars.
551
+ @allow_simple_key = false
552
+ # Scan and add SCALAR.
553
+ @tokens << scan_flow_scalar(style)
554
+ end
555
+
556
+ def fetch_plain
557
+ # A plain scalar could be a simple key.
558
+ save_possible_simple_key
559
+ # No simple keys after plain scalars. But note that `scan_plain` will
560
+ # change this flag if the scan is finished at the beginning of the
561
+ # line.
562
+ @allow_simple_key = false
563
+ # Scan and add SCALAR. May change `allow_simple_key`.
564
+ @tokens << scan_plain
565
+ end
566
+
567
+ # Checkers.
568
+
569
+ def check_directive
570
+ # DIRECTIVE: ^ '%' ...
571
+ # The '%' indicator is already checked.
572
+ @column == 0
573
+ end
574
+
575
+ def check_document_start
576
+ # DOCUMENT-START: ^ '---' (' '|'\n')
577
+ @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
+ end
579
+
580
+ def check_document_end
581
+ # DOCUMENT-END: ^ '...' (' '|'\n')
582
+ @column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
583
+ end
584
+
585
+ def check_block_entry
586
+ # BLOCK-ENTRY: '-' (' '|'\n')
587
+ "\0 \t\r\n\x85".include?(peek(1))
588
+ end
589
+
590
+ def check_key
591
+ # KEY(flow context): '?'
592
+ # KEY(block context): '?' (' '|'\n')
593
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
594
+ end
595
+
596
+ def check_value
597
+ # VALUE(flow context): ':'
598
+ # VALUE(block context): ':' (' '|'\n')
599
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
600
+ end
601
+
602
+ def check_plain
603
+ # A plain scalar may start with any non-space character except:
604
+ # '-', '?', ':', ',', '[', ']', '{', '}',
605
+ # '#', '&', '*', '!', '|', '>', '\'', '\"',
606
+ # '%', '@', '`'.
607
+ #
608
+ # It may also start with
609
+ # '-', '?', ':'
610
+ # if it is followed by a non-space character.
611
+ #
612
+ # Note that we limit the last rule to the block context (except the
613
+ # '-' character) because we want the flow context to be space
614
+ # independent.
615
+ ch = peek
616
+ !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
617
+ end
618
+
619
+
620
+
621
+
622
+
623
+
624
+ # Scanners.
625
+
626
+ def scan_to_next_token
627
+ # We ignore spaces, line breaks and comments.
628
+ # If we find a line break in the block context, we set the flag
629
+ # `allow_simple_key` on.
630
+ #
631
+ # TODO: We need to make tab handling rules more sane. A good rule is
632
+ # Tabs cannot precede tokens
633
+ # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
634
+ # KEY(block), VALUE(block), BLOCK-ENTRY
635
+ # So the checking code is
636
+ # if <TAB>:
637
+ # @allow_simple_keys = false
638
+ # We also need to add the check for `allow_simple_keys == true` to
639
+ # `unwind_indent` before issuing BLOCK-END.
640
+ # Scanners for block, flow, and plain scalars need to be modified.
641
+ found = false
642
+ while !found
643
+ while peek == 32
644
+ forward
645
+ end
646
+ if peek == ?#
647
+ forward while !"\0\r\n\x85".include?(peek)
648
+ end
649
+ if !scan_line_break.empty?
650
+ @allow_simple_key = true if @flow_level==0
651
+ else
652
+ found = true
653
+ end
654
+ end
655
+ end
656
+
657
+ def scan_directive
658
+ # See the specification for details.
659
+ start_mark = get_mark
660
+ forward
661
+ name = scan_directive_name(start_mark)
662
+ value = nil
663
+ if name == "YAML"
664
+ value = scan_yaml_directive_value(start_mark)
665
+ end_mark = get_mark
666
+ elsif name == "TAG"
667
+ value = scan_tag_directive_value(start_mark)
668
+ end_mark = get_mark
669
+ else
670
+ end_mark = get_mark
671
+ forward while !"\0\r\n\x85".include?(peek)
672
+ end
673
+ scan_directive_ignored_line(start_mark)
674
+ DirectiveToken.new(name, value, start_mark, end_mark)
675
+ end
676
+
677
+ def scan_directive_name(start_mark)
678
+ # See the specification for details.
679
+ length = 0
680
+ ch = peek(length)
681
+ while /[-0-9A-Za-z_]/ =~ ch.chr
682
+ length += 1
683
+ ch = peek(length)
684
+ end
685
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
686
+ value = prefix(length)
687
+ forward(length)
688
+ ch = peek()
689
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
690
+ value
691
+ end
692
+
693
+ def scan_yaml_directive_value(start_mark)
694
+ # See the specification for details.
695
+ forward while peek == 32
696
+ major = scan_yaml_directive_number(start_mark)
697
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
698
+ forward
699
+ minor = scan_yaml_directive_number(start_mark)
700
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
701
+ [major, minor]
702
+ end
703
+
704
+ def scan_yaml_directive_number(start_mark)
705
+ # See the specification for details.
706
+ ch = peek
707
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
708
+ length = 0
709
+ length += 1 while ((?0..?9) === peek(length))
710
+ value = prefix(length)
711
+ forward(length)
712
+ value
713
+ end
714
+
715
+ def scan_tag_directive_value(start_mark)
716
+ # See the specification for details.
717
+ forward while peek == 32
718
+ handle = scan_tag_directive_handle(start_mark)
719
+ forward while peek == 32
720
+ prefix = scan_tag_directive_prefix(start_mark)
721
+ [handle, prefix]
722
+ end
723
+
724
+ def scan_tag_directive_handle(start_mark)
725
+ # See the specification for details.
726
+ value = scan_tag_handle("directive", start_mark)
727
+ ch = peek
728
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
729
+ value
730
+ end
731
+
732
+ def scan_tag_directive_prefix(start_mark)
733
+ # See the specification for details.
734
+ value = scan_tag_uri("directive", start_mark)
735
+ ch = peek
736
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
737
+ value
738
+ end
739
+
740
+ def scan_directive_ignored_line(start_mark)
741
+ # See the specification for details.
742
+ forward while peek == 32
743
+ if peek == ?#
744
+ forward while !"\0\r\n\x85".include?(peek)
745
+ end
746
+ ch = peek
747
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
748
+ scan_line_break
749
+ end
750
+
751
+ def scan_anchor(token)
752
+ # The specification does not restrict characters for anchors and
753
+ # aliases. This may lead to problems, for instance, the document:
754
+ # [ *alias, value ]
755
+ # can be interpteted in two ways, as
756
+ # [ "value" ]
757
+ # and
758
+ # [ *alias , "value" ]
759
+ # Therefore we restrict aliases to numbers and ASCII letters.
760
+ start_mark = get_mark
761
+ indicator = peek
762
+ name = (indicator == ?*) ? "alias":"anchor"
763
+ forward
764
+ length = 0
765
+ ch = peek(length)
766
+ while /[-0-9A-Za-z_]/ =~ ch.chr
767
+ length += 1
768
+ ch = peek(length)
769
+ end
770
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
771
+ value = prefix(length)
772
+ forward(length)
773
+ ch = peek
774
+ if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
775
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
776
+ end
777
+ end_mark = get_mark
778
+ token.new(value, start_mark, end_mark)
779
+ end
780
+
781
+
782
+ def scan_tag
783
+ # See the specification for details.
784
+ start_mark = get_mark
785
+ ch = peek(1)
786
+ if ch == ?<
787
+ handle = nil
788
+ forward(2)
789
+ suffix = scan_tag_uri("tag", start_mark)
790
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
791
+ forward
792
+ elsif "\0 \t\r\n\x85".include?(ch)
793
+ handle = nil
794
+ suffix = "!"
795
+ forward
796
+ else
797
+ length = 1
798
+ use_handle = false
799
+ while !"\0 \t\r\n\x85".include?(ch)
800
+ if ch == ?!
801
+ use_handle = true
802
+ break
803
+ end
804
+ length += 1
805
+ ch = peek(length)
806
+ end
807
+ handle = "!"
808
+ if use_handle
809
+ handle = scan_tag_handle("tag", start_mark)
810
+ else
811
+ handle = "!"
812
+ forward
813
+ end
814
+ suffix = scan_tag_uri("tag", start_mark)
815
+ end
816
+ ch = peek
817
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
818
+ value = [handle, suffix]
819
+ end_mark = get_mark
820
+ TagToken.new(value, start_mark, end_mark)
821
+ end
822
+
823
+ def scan_block_scalar(style)
824
+ # See the specification for details.
825
+ folded = style== ?>
826
+ chunks = []
827
+ start_mark = get_mark
828
+ # Scan the header.
829
+ forward
830
+ chomping, increment = scan_block_scalar_indicators(start_mark)
831
+ scan_block_scalar_ignored_line(start_mark)
832
+ # Determine the indentation level and go to the first non-empty line.
833
+ min_indent = @indent+1
834
+ min_indent = 1 if min_indent < 1
835
+ if increment.nil?
836
+ breaks, max_indent, end_mark = scan_block_scalar_indentation
837
+ indent = [min_indent, max_indent].max
838
+ else
839
+ indent = min_indent+increment-1
840
+ breaks, end_mark = scan_block_scalar_breaks(indent)
841
+ end
842
+ line_break = ''
843
+ # Scan the inner part of the block scalar.
844
+ while @column == indent and peek != ?\0
845
+ chunks += breaks
846
+ leading_non_space = !" \t".include?(peek)
847
+ length = 0
848
+ length += 1 while !"\0\r\n\x85".include?(peek(length))
849
+ chunks << prefix(length)
850
+ forward(length)
851
+ line_break = scan_line_break
852
+ breaks, end_mark = scan_block_scalar_breaks(indent)
853
+ if @column == indent && peek != 0
854
+ # Unfortunately, folding rules are ambiguous.
855
+ #
856
+ # This is the folding according to the specification:
857
+ if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
858
+ chunks << ' ' if breaks.empty?
859
+ else
860
+ chunks << line_break
861
+ end
862
+ # This is Clark Evans's interpretation (also in the spec
863
+ # examples):
864
+ #
865
+ #if folded and line_break == u'\n':
866
+ # if not breaks:
867
+ # if self.peek() not in ' \t':
868
+ # chunks.append(u' ')
869
+ # else:
870
+ # chunks.append(line_break)
871
+ #else:
872
+ # chunks.append(line_break)
873
+ else
874
+ break
875
+ end
876
+ end
877
+
878
+ # Chomp the tail.
879
+ if chomping
880
+ chunks << line_break
881
+ chunks += breaks
882
+ end
883
+
884
+ # We are done.
885
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
886
+ end
887
+
888
+ def scan_block_scalar_indicators(start_mark)
889
+ # See the specification for details.
890
+ chomping = nil
891
+ increment = nil
892
+ ch = peek
893
+ if /[+-]/ =~ ch.chr
894
+ chomping = ch == ?+
895
+ forward
896
+ ch = peek
897
+ if (?0..?9) === ch
898
+ increment = ch.to_i
899
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
900
+ forward
901
+ end
902
+ elsif (?0..?9) === ch
903
+ increment = ch
904
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
905
+ forward
906
+ ch = peek
907
+ if /[+-]/ =~ ch.chr
908
+ chomping = ch == ?+
909
+ forward
910
+ end
911
+ end
912
+ ch = peek
913
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
914
+ [chomping, increment]
915
+ end
916
+
917
+ def scan_block_scalar_ignored_line(start_mark)
918
+ # See the specification for details.
919
+ forward while peek == 32
920
+ if peek == ?#
921
+ forward while !"\0\r\n\x85".include?(peek)
922
+ end
923
+ ch = peek
924
+
925
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
926
+ scan_line_break
927
+ end
928
+
929
+ def scan_block_scalar_indentation
930
+ # See the specification for details.
931
+ chunks = []
932
+ max_indent = 0
933
+ end_mark = get_mark
934
+ while " \r\n\x85".include?(peek)
935
+ if peek != 32
936
+ chunks << scan_line_break
937
+ end_mark = get_mark
938
+ else
939
+ forward
940
+ max_indent = @column if @column > max_indent
941
+ end
942
+ end
943
+ [chunks, max_indent, end_mark]
944
+ end
945
+
946
+ def scan_block_scalar_breaks(indent)
947
+ # See the specification for details.
948
+ chunks = []
949
+ end_mark = get_mark
950
+ forward while @column < indent && peek == 32
951
+ while "\r\n\x85".include?(peek)
952
+ chunks << scan_line_break
953
+ end_mark = get_mark
954
+ forward while @column < indent && peek == 32
955
+ end
956
+ [chunks, end_mark]
957
+ end
958
+
959
+ def scan_flow_scalar(style)
960
+ # See the specification for details.
961
+ # Note that we loose indentation rules for quoted scalars. Quoted
962
+ # scalars don't need to adhere indentation because " and ' clearly
963
+ # mark the beginning and the end of them. Therefore we are less
964
+ # restrictive then the specification requires. We only need to check
965
+ # that document separators are not included in scalars.
966
+ double = style == ?"
967
+ chunks = []
968
+ start_mark = get_mark
969
+ quote = peek
970
+ forward
971
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
972
+ while peek != quote
973
+ chunks += scan_flow_scalar_spaces(double, start_mark)
974
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
+ end
976
+ forward
977
+ end_mark = get_mark
978
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
979
+ end
980
+
981
+ ESCAPE_REPLACEMENTS = {
982
+ "0" => "\0",
983
+ "a" => "\x07",
984
+ "b" => "\x08",
985
+ "t" => "\x09",
986
+ "\t" => "\x09",
987
+ "n" => "\x0A",
988
+ "v" => "\x0B",
989
+ "f" => "\x0C",
990
+ "r" => "\x0D",
991
+ "e" => "\x1B",
992
+ " " => "\x20",
993
+ '"' => '"',
994
+ "\\" => "\\",
995
+ "N" => "\x85",
996
+ "_" => "\xA0"
997
+ }
998
+
999
+ ESCAPE_CODES = {
1000
+ 'x' => 2
1001
+ }
1002
+
1003
+ def scan_flow_scalar_non_spaces(double, start_mark)
1004
+ # See the specification for details.
1005
+ chunks = []
1006
+ while true
1007
+ length = 0
1008
+ length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1009
+ if length!=0
1010
+ chunks << prefix(length)
1011
+ forward(length)
1012
+ end
1013
+ ch = peek
1014
+ if !double && ch == ?' && peek(1) == ?'
1015
+ chunks << ?'
1016
+ forward(2)
1017
+ elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1018
+ chunks << ch
1019
+ forward
1020
+ elsif double && ch == ?\\
1021
+ forward
1022
+ ch = peek
1023
+ if ESCAPE_REPLACEMENTS.member?(ch.chr)
1024
+ chunks << ESCAPE_REPLACEMENTS[ch.chr]
1025
+ forward
1026
+ elsif ESCAPE_CODES.member?(ch.chr)
1027
+ length = ESCAPE_CODES[ch.chr]
1028
+ forward
1029
+ length.times do |k|
1030
+ if /[0-9A-Fa-f]/ !~ peek(k).chr
1031
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1032
+ "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1033
+ end
1034
+ end
1035
+ code = prefix(length).to_i.to_s(16)
1036
+ chunks << code
1037
+ forward(length)
1038
+ elsif "\r\n\x85".include?(ch)
1039
+ scan_line_break
1040
+ chunks += scan_flow_scalar_breaks(double, start_mark)
1041
+ else
1042
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
1043
+ end
1044
+ else
1045
+ return chunks
1046
+ end
1047
+ end
1048
+ end
1049
+
1050
+ def scan_flow_scalar_spaces(double, start_mark)
1051
+ # See the specification for details.
1052
+ chunks = []
1053
+ length = 0
1054
+ length += 1 while /[ \t]/ =~ peek(length).chr
1055
+ whitespaces = prefix(length)
1056
+ forward(length)
1057
+ ch = peek
1058
+ if ch == ?\0
1059
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1060
+ elsif "\r\n\x85".include?(ch)
1061
+ line_break = scan_line_break
1062
+ breaks = scan_flow_scalar_breaks(double, start_mark)
1063
+ if line_break != ?\n
1064
+ chunks << line_break
1065
+ elsif breaks.empty?
1066
+ chunks << ' '
1067
+ end
1068
+ chunks += breaks
1069
+ else
1070
+ chunks << whitespaces
1071
+ end
1072
+ chunks
1073
+ end
1074
+
1075
+ def scan_flow_scalar_breaks(double, start_mark)
1076
+ # See the specification for details.
1077
+ chunks = []
1078
+ while true
1079
+ # Instead of checking indentation, we check for document
1080
+ # separators.
1081
+ prefix = prefix(3)
1082
+ if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1083
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1084
+ end
1085
+ forward while /[ \t]/ =~ peek.chr
1086
+ if "\r\n\x85".include?(peek)
1087
+ chunks << scan_line_break
1088
+ else
1089
+ return chunks
1090
+ end
1091
+ end
1092
+ end
1093
+
1094
+ def scan_plain
1095
+ # See the specification for details.
1096
+ # We add an additional restriction for the flow context:
1097
+ # plain scalars in the flow context cannot contain ',', ':' and '?'.
1098
+ # We also keep track of the `allow_simple_key` flag here.
1099
+ # Indentation rules are loosed for the flow context.
1100
+ chunks = []
1101
+ start_mark = get_mark
1102
+ end_mark = start_mark
1103
+ indent = @indent+1
1104
+ # We allow zero indentation for scalars, but then we need to check for
1105
+ # document separators at the beginning of the line.
1106
+ #if indent == 0
1107
+ # indent = 1
1108
+ spaces = []
1109
+ while true
1110
+ length = 0
1111
+ break if peek == ?#
1112
+ while true
1113
+ ch = peek(length)
1114
+ if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1115
+ break
1116
+ end
1117
+ length += 1
1118
+ end
1119
+ if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
1120
+ forward(length)
1121
+ raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1122
+ end
1123
+ break if length == 0
1124
+ @allow_simple_key = false
1125
+ chunks += spaces
1126
+ chunks << prefix(length)
1127
+ forward(length)
1128
+ end_mark = get_mark
1129
+ spaces = scan_plain_spaces(indent, start_mark)
1130
+ break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1131
+ end
1132
+ return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1133
+ end
1134
+
1135
+ def scan_plain_spaces(indent, start_mark)
1136
+ # See the specification for details.
1137
+ # The specification is really confusing about tabs in plain scalars.
1138
+ # We just forbid them completely. Do not use tabs in YAML!
1139
+ chunks = []
1140
+ length = 0
1141
+ length += 1 while peek(length) == 32
1142
+ whitespaces = prefix(length)
1143
+ forward(length)
1144
+ ch = peek
1145
+ if "\r\n\x85".include?(ch)
1146
+ line_break = scan_line_break
1147
+ @allow_simple_key = true
1148
+ prefix = prefix(3)
1149
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1150
+ breaks = []
1151
+ while " \r\n\x85".include?(peek)
1152
+ if peek == 32
1153
+ forward
1154
+ else
1155
+ breaks << scan_line_break
1156
+ prefix = prefix(3)
1157
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1158
+ end
1159
+ end
1160
+ if line_break != '\n'
1161
+ chunks << line_break
1162
+ elsif breaks.empty?
1163
+ chunks << ' '
1164
+ end
1165
+ chunks += breaks
1166
+ elsif !whitespaces.empty?
1167
+ chunks << whitespaces
1168
+ end
1169
+ chunks
1170
+ end
1171
+
1172
+ def scan_tag_handle(name, start_mark)
1173
+ # See the specification for details.
1174
+ # For some strange reasons, the specification does not allow '_' in
1175
+ # tag handles. I have allowed it anyway.
1176
+ ch = peek
1177
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1178
+ length = 1
1179
+ ch = peek(length)
1180
+ if ch != 32
1181
+ while /[-_0-9A-Za-z]/ =~ ch.chr
1182
+ length += 1
1183
+ ch = peek(length)
1184
+ end
1185
+ if ch != ?!
1186
+ forward(length)
1187
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
1188
+ end
1189
+ length += 1
1190
+ end
1191
+ value = prefix(length)
1192
+ forward(length)
1193
+ value
1194
+ end
1195
+
1196
+ def scan_tag_uri(name, start_mark)
1197
+ # See the specification for details.
1198
+ # Note: we do not check if URI is well-formed.
1199
+ chunks = []
1200
+ length = 0
1201
+ ch = peek(length)
1202
+ while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1203
+ if ch == ?%
1204
+ chunks << prefix(length)
1205
+ forward(length)
1206
+ length = 0
1207
+ chunks << scan_uri_escapes(name, start_mark)
1208
+ else
1209
+ length += 1
1210
+ end
1211
+ ch = peek(length)
1212
+ end
1213
+ if length!=0
1214
+ chunks << prefix(length)
1215
+ forward(length)
1216
+ length = 0
1217
+ end
1218
+
1219
+ raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1220
+ chunks.join('')
1221
+ end
1222
+
1223
+ def scan_uri_escapes(name, start_mark)
1224
+ # See the specification for details.
1225
+ bytes = []
1226
+ mark = get_mark
1227
+ while peek == ?%
1228
+ forward
1229
+ 2.times do |k|
1230
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1231
+ get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1232
+ end
1233
+ bytes << prefix(2).to_i.to_s(16)
1234
+ forward(2)
1235
+ end
1236
+ bytes.join('')
1237
+ end
1238
+
1239
+ def scan_line_break
1240
+ # Transforms:
1241
+ # '\r\n' : '\n'
1242
+ # '\r' : '\n'
1243
+ # '\n' : '\n'
1244
+ # '\x85' : '\n'
1245
+ # default : ''
1246
+ ch = peek
1247
+ if "\r\n\x85".include?(ch)
1248
+ if prefix(2) == "\r\n"
1249
+ forward(2)
1250
+ else
1251
+ forward
1252
+ end
1253
+ return "\n"
1254
+ end
1255
+ ""
1256
+ end
1257
+ end
1258
+ end
1259
+