marshal-md 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1074 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+ require "time"
5
+
6
+ module MarshalMd
7
+ class Loader
8
+ Line = Struct.new(:indent, :text, :lineno)
9
+
10
+ def initialize(md)
11
+ @lines = tokenize(md)
12
+ @pos = 0
13
+ @registry = ObjectRegistry.new
14
+ end
15
+
16
+ def load
17
+ parse_value(0)
18
+ end
19
+
20
+ private
21
+
22
+ def tokenize(md)
23
+ lines = []
24
+ md.each_line.with_index do |raw, i|
25
+ next if raw.strip.empty?
26
+ stripped = raw.rstrip
27
+ spaces = stripped.length - stripped.lstrip.length
28
+ indent = spaces / 2
29
+ lines << Line.new(indent, stripped.lstrip, i + 1)
30
+ end
31
+ lines
32
+ end
33
+
34
+ def current_line
35
+ @lines[@pos]
36
+ end
37
+
38
+ def advance
39
+ @pos += 1
40
+ end
41
+
42
+ def eof?
43
+ @pos >= @lines.length
44
+ end
45
+
46
+ def parse_value(expected_indent)
47
+ return nil if eof?
48
+
49
+ line = current_line
50
+ text = line.text
51
+
52
+ # Reference
53
+ if text =~ /^\*(\w+) \(ref\)$/
54
+ advance
55
+ return @registry.resolve($1)
56
+ end
57
+
58
+ # Anchor prefix
59
+ if text =~ /^&(\w+) (.+)$/
60
+ anchor = $1
61
+ rest = $2
62
+ return parse_anchored(anchor, rest, line.indent)
63
+ end
64
+
65
+ parse_unanchored(text, line)
66
+ end
67
+
68
+ def parse_unanchored(text, line)
69
+ # nil
70
+ if text == "nil (NilClass)"
71
+ advance
72
+ return nil
73
+ end
74
+
75
+ # Boolean
76
+ if text == "true (Boolean)"
77
+ advance
78
+ return true
79
+ end
80
+ if text == "false (Boolean)"
81
+ advance
82
+ return false
83
+ end
84
+
85
+ # Integer
86
+ if text =~ /^(-?\d+) \(Integer\)$/
87
+ advance
88
+ return $1.to_i
89
+ end
90
+
91
+ # Float
92
+ if text =~ /^(-?(?:Infinity|NaN|0\.0|[\d.]+(?:e[+-]?\d+)?)) \(Float\)$/i
93
+ advance
94
+ return parse_float_value($1)
95
+ end
96
+
97
+ # Symbol
98
+ if text =~ /^:(.+) \(Symbol\)$/
99
+ advance
100
+ return $1.to_sym
101
+ end
102
+
103
+ # Encoding
104
+ if text =~ /^(.+) \(Encoding\)$/
105
+ advance
106
+ return Encoding.find($1)
107
+ end
108
+
109
+ # Complex
110
+ if text =~ /^\((-?[\d.]+)\+(-?[\d.]+)i\) \(Complex\)$/
111
+ advance
112
+ return Complex($1.include?('.') ? $1.to_f : $1.to_i, $2.include?('.') ? $2.to_f : $2.to_i)
113
+ end
114
+
115
+ # Rational
116
+ if text =~ /^(-?\d+)\/(-?\d+) \(Rational\)$/
117
+ advance
118
+ return Rational($1.to_i, $2.to_i)
119
+ end
120
+
121
+ # Class
122
+ if text =~ /^(.+) \(Class\)$/
123
+ advance
124
+ return resolve_class($1)
125
+ end
126
+
127
+ # Module
128
+ if text =~ /^(.+) \(Module\)$/
129
+ advance
130
+ return resolve_class($1)
131
+ end
132
+
133
+ # Base64 string
134
+ if text =~ /^base64:(\S+) \(String, (.+?), (\d+) bytes\)$/
135
+ advance
136
+ return Base64.strict_decode64($1)
137
+ end
138
+
139
+ # String with encoding
140
+ if text =~ /^"(.*)" \(String, (.+)\)$/
141
+ advance
142
+ str = unescape($1)
143
+ return str.encode($2)
144
+ end
145
+
146
+ # String (UTF-8)
147
+ if text =~ /^"(.*)" \(String\)$/
148
+ advance
149
+ return unescape($1)
150
+ end
151
+
152
+ # Inline Range
153
+ if text =~ /^(.+?)(\.\.\.?)(.+) \(Range\)$/
154
+ advance
155
+ range_begin = parse_inline_value($1)
156
+ range_end = parse_inline_value($3)
157
+ exclusive = $2 == "..."
158
+ return Range.new(range_begin, range_end, exclusive)
159
+ end
160
+
161
+ # Multi-line Range
162
+ if text == "(Range)"
163
+ advance
164
+ return parse_multiline_range(line.indent)
165
+ end
166
+
167
+ # Regexp
168
+ if text =~ /^\/(.*)\/([imx]*) \(Regexp\)$/
169
+ advance
170
+ return build_regexp($1, $2)
171
+ end
172
+
173
+ # Time with usec
174
+ if text =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.(\d+) ([+-]\d{4}) \(Time\)$/
175
+ advance
176
+ return parse_time_with_usec($1, $2, $3)
177
+ end
178
+
179
+ # Time without usec
180
+ if text =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [+-]\d{4}) \(Time\)$/
181
+ advance
182
+ return Time.parse($1)
183
+ end
184
+
185
+ # Inline Array
186
+ if text =~ /^\[.*\] \(Array\)$/
187
+ advance
188
+ return parse_inline_array(text)
189
+ end
190
+
191
+ # Empty array
192
+ if text == "[] (Array)"
193
+ advance
194
+ return []
195
+ end
196
+
197
+ # Multi-line Array
198
+ if text == "(Array)"
199
+ advance
200
+ return parse_multiline_array(line.indent)
201
+ end
202
+
203
+ # Inline Hash
204
+ if text =~ /^\{.*\} \(Hash\)$/
205
+ advance
206
+ return parse_inline_hash(text)
207
+ end
208
+
209
+ # Empty hash
210
+ if text == "{} (Hash)"
211
+ advance
212
+ return {}
213
+ end
214
+
215
+ # Multi-line Hash
216
+ if text == "(Hash)"
217
+ advance
218
+ return parse_multiline_hash(line.indent)
219
+ end
220
+
221
+ # Struct
222
+ if text =~ /^#<(.+?)> \((.+?), Struct\)$/
223
+ klass_name = $2
224
+ advance
225
+ return parse_struct(klass_name, line.indent)
226
+ end
227
+
228
+ # Custom object with marshal_dump
229
+ if text =~ /^#<(.+?)> \((.+?), marshal_dump\)$/
230
+ klass_name = $2
231
+ advance
232
+ return parse_marshal_dump_object(klass_name, line.indent)
233
+ end
234
+
235
+ # Custom object with _dump
236
+ if text =~ /^\((.+?), _dump\)$/
237
+ klass_name = $1
238
+ advance
239
+ return parse_dump_object(klass_name, line.indent)
240
+ end
241
+
242
+ # Custom object (including subclassed built-ins)
243
+ if text =~ /^#<(.+?)> \((.+?)\)$/
244
+ klass_name = $2
245
+ advance
246
+ return parse_custom_object(klass_name, line.indent)
247
+ end
248
+
249
+ raise "Unexpected format at line #{line.lineno}: #{text}"
250
+ end
251
+
252
+ def parse_anchored(anchor, rest, indent)
253
+ # Reference
254
+ if rest =~ /^\*(\w+) \(ref\)$/
255
+ advance
256
+ return @registry.resolve($1)
257
+ end
258
+
259
+ # Determine what the rest describes and handle allocation+registration before parsing children
260
+
261
+ # Inline Array
262
+ if rest =~ /^\[.*\] \(Array\)$/
263
+ advance
264
+ arr = parse_inline_array(rest)
265
+ @registry.store(anchor, arr)
266
+ return arr
267
+ end
268
+
269
+ if rest == "[] (Array)"
270
+ advance
271
+ arr = []
272
+ @registry.store(anchor, arr)
273
+ return arr
274
+ end
275
+
276
+ # Multi-line Array
277
+ if rest == "(Array)"
278
+ advance
279
+ arr = []
280
+ @registry.store(anchor, arr)
281
+ parse_multiline_array_into(arr, indent)
282
+ return arr
283
+ end
284
+
285
+ # Inline Hash
286
+ if rest =~ /^\{.*\} \(Hash\)$/
287
+ advance
288
+ hash = parse_inline_hash(rest)
289
+ @registry.store(anchor, hash)
290
+ return hash
291
+ end
292
+
293
+ if rest == "{} (Hash)"
294
+ advance
295
+ hash = {}
296
+ @registry.store(anchor, hash)
297
+ return hash
298
+ end
299
+
300
+ # Multi-line Hash
301
+ if rest == "(Hash)"
302
+ advance
303
+ hash = {}
304
+ @registry.store(anchor, hash)
305
+ parse_multiline_hash_into(hash, indent)
306
+ return hash
307
+ end
308
+
309
+ # Struct
310
+ if rest =~ /^#<(.+?)> \((.+?), Struct\)$/
311
+ klass_name = $2
312
+ advance
313
+ klass = resolve_class(klass_name)
314
+ # Allocate and register before parsing members
315
+ obj = klass.allocate
316
+ @registry.store(anchor, obj)
317
+ parse_struct_members_into(obj, klass, indent)
318
+ return obj
319
+ end
320
+
321
+ # Custom object with marshal_dump
322
+ if rest =~ /^#<(.+?)> \((.+?), marshal_dump\)$/
323
+ klass_name = $2
324
+ advance
325
+ klass = resolve_class(klass_name)
326
+ obj = klass.allocate
327
+ @registry.store(anchor, obj)
328
+ data = parse_value(indent + 1)
329
+ obj.send(:marshal_load, data)
330
+ return obj
331
+ end
332
+
333
+ # Custom object with _dump
334
+ if rest =~ /^\((.+?), _dump\)$/
335
+ klass_name = $1
336
+ advance
337
+ klass = resolve_class(klass_name)
338
+ data = parse_child_string(indent)
339
+ obj = klass._load(data)
340
+ @registry.store(anchor, obj)
341
+ return obj
342
+ end
343
+
344
+ # Custom object (including subclassed built-ins)
345
+ if rest =~ /^#<(.+?)> \((.+?)\)$/
346
+ klass_name = $2
347
+ advance
348
+ klass = resolve_class(klass_name)
349
+ obj = allocate_for(klass)
350
+ @registry.store(anchor, obj)
351
+ obj = parse_custom_object_body(obj, klass, indent)
352
+ @registry.store(anchor, obj) # update in case of replacement
353
+ return obj
354
+ end
355
+
356
+ # String
357
+ if rest =~ /^"(.*)" \(String(?:, (.+))?\)$/
358
+ advance
359
+ str = unescape($1)
360
+ str = str.encode($2) if $2
361
+ @registry.store(anchor, str)
362
+ return str
363
+ end
364
+
365
+ if rest =~ /^base64:(\S+) \(String, (.+?), (\d+) bytes\)$/
366
+ advance
367
+ str = Base64.strict_decode64($1)
368
+ @registry.store(anchor, str)
369
+ return str
370
+ end
371
+
372
+ # Float
373
+ if rest =~ /^(-?(?:Infinity|NaN|0\.0|[\d.]+(?:e[+-]?\d+)?)) \(Float\)$/i
374
+ advance
375
+ val = parse_float_value($1)
376
+ @registry.store(anchor, val)
377
+ return val
378
+ end
379
+
380
+ # Inline Range
381
+ if rest =~ /^(.+?)(\.\.\.?)(.+) \(Range\)$/
382
+ advance
383
+ range_begin = parse_inline_value($1)
384
+ range_end = parse_inline_value($3)
385
+ exclusive = $2 == "..."
386
+ val = Range.new(range_begin, range_end, exclusive)
387
+ @registry.store(anchor, val)
388
+ return val
389
+ end
390
+
391
+ # Multi-line Range
392
+ if rest == "(Range)"
393
+ advance
394
+ val = parse_multiline_range(indent)
395
+ @registry.store(anchor, val)
396
+ return val
397
+ end
398
+
399
+ # Regexp
400
+ if rest =~ /^\/(.*)\/([imx]*) \(Regexp\)$/
401
+ advance
402
+ val = build_regexp($1, $2)
403
+ @registry.store(anchor, val)
404
+ return val
405
+ end
406
+
407
+ # Time with usec
408
+ if rest =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.(\d+) ([+-]\d{4}) \(Time\)$/
409
+ advance
410
+ val = parse_time_with_usec($1, $2, $3)
411
+ @registry.store(anchor, val)
412
+ return val
413
+ end
414
+
415
+ # Time without usec
416
+ if rest =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [+-]\d{4}) \(Time\)$/
417
+ advance
418
+ val = Time.parse($1)
419
+ @registry.store(anchor, val)
420
+ return val
421
+ end
422
+
423
+ raise "Unexpected anchored format: &#{anchor} #{rest}"
424
+ end
425
+
426
+ # --- Type-specific parsers ---
427
+
428
+ def parse_float_value(str)
429
+ case str
430
+ when "Infinity" then Float::INFINITY
431
+ when "-Infinity" then -Float::INFINITY
432
+ when "NaN" then Float::NAN
433
+ when "-0.0" then -0.0
434
+ else str.to_f
435
+ end
436
+ end
437
+
438
+ def parse_time_with_usec(datetime, usec_str, zone)
439
+ tz = zone
440
+ if tz =~ /^([+-])(\d{2})(\d{2})$/
441
+ tz = "#{$1}#{$2}:#{$3}"
442
+ end
443
+ t = Time.parse("#{datetime} #{zone}")
444
+ usec = usec_str.to_i
445
+ # Use Rational for exact usec precision
446
+ sec_with_usec = t.sec + Rational(usec, 1_000_000)
447
+ Time.new(t.year, t.month, t.day, t.hour, t.min, sec_with_usec, tz)
448
+ end
449
+
450
+ def build_regexp(source, flags_str)
451
+ flags = 0
452
+ flags |= Regexp::IGNORECASE if flags_str.include?("i")
453
+ flags |= Regexp::EXTENDED if flags_str.include?("x")
454
+ flags |= Regexp::MULTILINE if flags_str.include?("m")
455
+ Regexp.new(source, flags)
456
+ end
457
+
458
+ def parse_multiline_range(parent_indent)
459
+ rb = nil
460
+ re = nil
461
+ excl = false
462
+
463
+ while !eof? && current_line.indent > parent_indent
464
+ text = current_line.text
465
+ if text == "__begin__:"
466
+ advance
467
+ rb = parse_value(current_line&.indent || parent_indent + 2)
468
+ elsif text == "__end__:"
469
+ advance
470
+ re = parse_value(current_line&.indent || parent_indent + 2)
471
+ elsif text =~ /^__exclude_end__: (.+)$/
472
+ excl = $1 == "true"
473
+ advance
474
+ else
475
+ break
476
+ end
477
+ end
478
+
479
+ Range.new(rb, re, excl)
480
+ end
481
+
482
+ def parse_struct(klass_name, parent_indent)
483
+ klass = resolve_class(klass_name)
484
+ obj = klass.allocate
485
+ parse_struct_members_into(obj, klass, parent_indent)
486
+ obj
487
+ end
488
+
489
+ def parse_struct_members_into(obj, klass, parent_indent)
490
+ members = klass.members
491
+ while !eof? && current_line.indent > parent_indent
492
+ text = current_line.text
493
+
494
+ # __extend__ directive
495
+ if text =~ /^__extend__: (.+)$/
496
+ mod = resolve_class($1)
497
+ obj.extend(mod)
498
+ advance
499
+ next
500
+ end
501
+
502
+ # __prepend__ directive
503
+ if text =~ /^__prepend__: (.+)$/
504
+ mod = resolve_class($1)
505
+ obj.singleton_class.prepend(mod)
506
+ advance
507
+ next
508
+ end
509
+
510
+ if text =~ /^(\w+): (.+)$/
511
+ name = $1
512
+ value_text = $2
513
+ advance
514
+ if name.start_with?("@")
515
+ val = parse_inline_value(value_text)
516
+ obj.instance_variable_set(name, val)
517
+ elsif members.include?(name.to_sym)
518
+ val = parse_inline_value(value_text)
519
+ obj[name.to_sym] = val
520
+ end
521
+ elsif text =~ /^(\w+):$/
522
+ name = $1
523
+ advance
524
+ val = parse_value(current_line&.indent || parent_indent + 2)
525
+ if name.start_with?("@")
526
+ obj.instance_variable_set(name, val)
527
+ elsif members.include?(name.to_sym)
528
+ obj[name.to_sym] = val
529
+ end
530
+ elsif text =~ /^(@\w+): (.+)$/
531
+ ivar = $1
532
+ advance
533
+ val = parse_inline_value($2)
534
+ obj.instance_variable_set(ivar, val)
535
+ elsif text =~ /^(@\w+):$/
536
+ ivar = $1
537
+ advance
538
+ val = parse_value(current_line&.indent || parent_indent + 2)
539
+ obj.instance_variable_set(ivar, val)
540
+ else
541
+ break
542
+ end
543
+ end
544
+ end
545
+
546
+ def parse_marshal_dump_object(klass_name, parent_indent)
547
+ klass = resolve_class(klass_name)
548
+ obj = klass.allocate
549
+ data = parse_value(parent_indent + 1)
550
+ obj.send(:marshal_load, data)
551
+ obj
552
+ end
553
+
554
+ def parse_dump_object(klass_name, parent_indent)
555
+ klass = resolve_class(klass_name)
556
+ data = parse_child_string(parent_indent)
557
+ klass._load(data)
558
+ end
559
+
560
+ def parse_child_string(parent_indent)
561
+ if !eof? && current_line.indent > parent_indent
562
+ text = current_line.text
563
+ if text =~ /^"(.*)" \(String(?:, (.+))?\)$/
564
+ advance
565
+ str = unescape($1)
566
+ str = str.encode($2) if $2
567
+ return str
568
+ elsif text =~ /^base64:(\S+) \(String, (.+?), (\d+) bytes\)$/
569
+ advance
570
+ return Base64.strict_decode64($1)
571
+ else
572
+ return parse_value(parent_indent + 1)
573
+ end
574
+ end
575
+ nil
576
+ end
577
+
578
+ def allocate_for(klass)
579
+ if klass <= Array
580
+ klass == Array ? [] : klass.allocate
581
+ elsif klass <= Hash
582
+ klass == Hash ? {} : klass.allocate
583
+ elsif klass <= String
584
+ klass.new("")
585
+ elsif klass <= Range
586
+ klass.allocate
587
+ elsif klass <= Regexp
588
+ klass.allocate
589
+ elsif klass <= Time
590
+ klass.allocate
591
+ else
592
+ klass.allocate
593
+ end
594
+ end
595
+
596
+ def parse_custom_object(klass_name, parent_indent)
597
+ klass = resolve_class(klass_name)
598
+ obj = allocate_for(klass)
599
+ parse_custom_object_body(obj, klass, parent_indent)
600
+ end
601
+
602
+ def parse_custom_object_body(obj, klass, parent_indent)
603
+ @_pending_time_replacement = nil
604
+ @_pending_regexp_replacement = nil
605
+ @_pending_range_begin = nil
606
+ @_pending_range_end = nil
607
+ @_pending_range_excl = false
608
+
609
+ saved_ivars = []
610
+ parse_custom_body_into(obj, klass, parent_indent)
611
+
612
+ # Reconstruct Time subclass - collect ivars from allocated obj first
613
+ if @_pending_time_replacement && klass <= Time
614
+ t = @_pending_time_replacement
615
+ ivars = obj.instance_variables.map { |iv| [iv, obj.instance_variable_get(iv)] }
616
+ obj = klass.at(t.to_r)
617
+ obj = obj.getlocal(t.utc_offset)
618
+ ivars.each { |iv, val| obj.instance_variable_set(iv, val) }
619
+ end
620
+
621
+ # Reconstruct Regexp subclass
622
+ if @_pending_regexp_replacement && klass <= Regexp
623
+ r = @_pending_regexp_replacement
624
+ ivars = obj.instance_variables.map { |iv| [iv, obj.instance_variable_get(iv)] }
625
+ obj = klass.allocate
626
+ # Use Regexp#initialize from the base class
627
+ Regexp.instance_method(:initialize).bind(obj).call(r.source, r.options)
628
+ ivars.each { |iv, val| obj.instance_variable_set(iv, val) }
629
+ end
630
+
631
+ # Reconstruct Range subclass
632
+ if @_pending_range_begin || @_pending_range_end
633
+ if klass <= Range
634
+ # Initialize the existing object in-place to preserve identity (for circular refs)
635
+ Range.instance_method(:initialize).bind(obj).call(@_pending_range_begin, @_pending_range_end, @_pending_range_excl)
636
+ end
637
+ end
638
+
639
+ obj
640
+ end
641
+
642
+ def parse_custom_body_into(obj, klass, parent_indent)
643
+ while !eof? && current_line.indent > parent_indent
644
+ text = current_line.text
645
+
646
+ # __extend__ directive
647
+ if text =~ /^__extend__: (.+)$/
648
+ mod = resolve_class($1)
649
+ obj.extend(mod)
650
+ advance
651
+ next
652
+ end
653
+
654
+ # __prepend__ directive
655
+ if text =~ /^__prepend__: (.+)$/
656
+ mod = resolve_class($1)
657
+ obj.singleton_class.prepend(mod)
658
+ advance
659
+ next
660
+ end
661
+
662
+ # __message__ for exceptions
663
+ if text =~ /^__message__: (.+)$/
664
+ advance
665
+ msg = parse_inline_value($1)
666
+ if obj.is_a?(Exception)
667
+ # Use Exception's initialize to set message without triggering subclass initialize
668
+ Exception.instance_method(:initialize).bind(obj).call(msg)
669
+ end
670
+ next
671
+ end
672
+
673
+ # __backtrace__ for exceptions
674
+ if text == "__backtrace__:"
675
+ advance
676
+ if !eof? && current_line.indent > parent_indent + 1
677
+ bt = parse_value(current_line.indent)
678
+ obj.set_backtrace(bt) if obj.is_a?(Exception) && bt
679
+ end
680
+ next
681
+ end
682
+
683
+ # __value__ for subclassed strings
684
+ if text =~ /^__value__: (.+)$/
685
+ advance
686
+ str_val = parse_inline_value($1)
687
+ if obj.is_a?(String)
688
+ obj.replace(str_val)
689
+ end
690
+ next
691
+ end
692
+
693
+ # __elements__ for subclassed arrays
694
+ if text == "__elements__:"
695
+ advance
696
+ if !eof? && current_line.indent > parent_indent + 1
697
+ ct = current_line.text
698
+ if ct =~ /^\[.*\] \(Array\)$/ || ct == "[] (Array)"
699
+ elements = parse_value(current_line.indent)
700
+ elsif ct == "(Array)"
701
+ advance
702
+ elements = []
703
+ while !eof? && current_line.indent > parent_indent + 2
704
+ elements << parse_value(current_line.indent)
705
+ end
706
+ else
707
+ elements = []
708
+ end
709
+ if obj.is_a?(Array)
710
+ elements.each { |el| obj << el }
711
+ end
712
+ end
713
+ next
714
+ end
715
+
716
+ # __entries__ for subclassed hashes
717
+ if text == "__entries__:"
718
+ advance
719
+ if !eof? && current_line.indent > parent_indent + 1
720
+ ct = current_line.text
721
+ if ct =~ /^\{.*\} \(Hash\)$/ || ct == "{} (Hash)"
722
+ entries = parse_value(current_line.indent)
723
+ entries.each { |k, v| obj[k] = v } if obj.is_a?(Hash)
724
+ elsif ct == "(Hash)"
725
+ advance
726
+ # Parse directly into obj to preserve compare_by_identity
727
+ if obj.is_a?(Hash)
728
+ parse_multiline_hash_into(obj, current_line ? current_line.indent - 1 : parent_indent + 2)
729
+ else
730
+ entries = {}
731
+ parse_multiline_hash_into(entries, current_line ? current_line.indent - 1 : parent_indent + 2)
732
+ end
733
+ end
734
+ end
735
+ next
736
+ end
737
+
738
+ # __compare_by_identity__ for hashes
739
+ if text == "__compare_by_identity__: true"
740
+ obj.compare_by_identity if obj.is_a?(Hash)
741
+ advance
742
+ next
743
+ end
744
+
745
+ # __default__ for hashes
746
+ if text =~ /^__default__: (.+)$/
747
+ advance
748
+ val = parse_inline_value($1)
749
+ obj.default = val if obj.is_a?(Hash)
750
+ next
751
+ end
752
+ if text == "__default__:"
753
+ advance
754
+ val = parse_value(current_line&.indent || parent_indent + 2)
755
+ obj.default = val if obj.is_a?(Hash)
756
+ next
757
+ end
758
+
759
+ # __time__ for subclassed Time
760
+ if text =~ /^__time__: (.+) \(Time\)$/
761
+ time_str = $1
762
+ advance
763
+ if time_str =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.(\d+) ([+-]\d{4})$/
764
+ t = parse_time_with_usec($1, $2, $3)
765
+ else
766
+ t = Time.parse(time_str)
767
+ end
768
+ # For Time subclass: replace the allocated object with a proper one
769
+ if obj.is_a?(Time)
770
+ # We can't modify an allocated Time in place, so we need
771
+ # to re-create it. The caller should use the returned object.
772
+ @_pending_time_replacement = t
773
+ end
774
+ next
775
+ end
776
+
777
+ # __pattern__ for subclassed Regexp
778
+ if text =~ /^__pattern__: \/(.*)\/([imx]*) \(Regexp\)$/
779
+ source = $1
780
+ flags_str = $2
781
+ advance
782
+ r = build_regexp(source, flags_str)
783
+ @_pending_regexp_replacement = r
784
+ next
785
+ end
786
+
787
+ # __begin__, __end__, __exclude_end__ for subclassed Range
788
+ if text == "__begin__:"
789
+ advance
790
+ @_pending_range_begin = parse_value(parent_indent + 2)
791
+ next
792
+ end
793
+ if text == "__end__:"
794
+ advance
795
+ @_pending_range_end = parse_value(parent_indent + 2)
796
+ next
797
+ end
798
+ if text =~ /^__exclude_end__: (.+)$/
799
+ @_pending_range_excl = ($1 == "true")
800
+ advance
801
+ next
802
+ end
803
+
804
+ # Instance variable
805
+ if text =~ /^(@\w+): (.+)$/
806
+ ivar = $1
807
+ value_text = $2
808
+ advance
809
+ val = parse_inline_value(value_text)
810
+ obj.instance_variable_set(ivar, val)
811
+ elsif text =~ /^(@\w+):$/
812
+ ivar = $1
813
+ advance
814
+ val = parse_value(current_line&.indent || parent_indent + 2)
815
+ obj.instance_variable_set(ivar, val)
816
+ else
817
+ break
818
+ end
819
+ end
820
+ end
821
+
822
+ # --- Inline parsing ---
823
+
824
+ def parse_inline_value(text)
825
+ text = text.strip
826
+
827
+ # Bare scalars
828
+ return nil if text == "nil"
829
+ return true if text == "true"
830
+ return false if text == "false"
831
+
832
+ # Reference
833
+ if text =~ /^\*(\w+)( \(ref\))?$/
834
+ return @registry.resolve($1)
835
+ end
836
+
837
+ # Base64 string
838
+ if text =~ /^base64:(\S+) \(String, (.+?), (\d+) bytes\)$/
839
+ return Base64.strict_decode64($1)
840
+ end
841
+
842
+ # String with encoding
843
+ if text =~ /^"(.*)" \(String, (.+)\)$/
844
+ return unescape($1).encode($2)
845
+ end
846
+
847
+ # String (annotated)
848
+ if text =~ /^"(.*)" \(String\)$/
849
+ return unescape($1)
850
+ end
851
+
852
+ # Bare string
853
+ if text =~ /^"(.*)"$/
854
+ return unescape($1)
855
+ end
856
+
857
+ # Bare symbol
858
+ if text =~ /^:(.+)$/
859
+ return $1.to_sym
860
+ end
861
+
862
+ # Inline Array
863
+ if text =~ /^\[.*\] \(Array\)$/
864
+ return parse_inline_array(text)
865
+ end
866
+
867
+ # Inline Hash
868
+ if text =~ /^\{.*\} \(Hash\)$/
869
+ return parse_inline_hash(text)
870
+ end
871
+
872
+ # Range (annotated)
873
+ if text =~ /^(.+?)(\.\.\.?)(.+) \(Range\)$/
874
+ range_begin = parse_inline_value($1)
875
+ range_end = parse_inline_value($3)
876
+ return Range.new(range_begin, range_end, $2 == "...")
877
+ end
878
+
879
+ # Regexp
880
+ if text =~ /^\/(.*)\/([imx]*) \(Regexp\)$/
881
+ return build_regexp($1, $2)
882
+ end
883
+
884
+ # Time with usec
885
+ if text =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.(\d+) ([+-]\d{4}) \(Time\)$/
886
+ return parse_time_with_usec($1, $2, $3)
887
+ end
888
+
889
+ # Time
890
+ if text =~ /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [+-]\d{4}) \(Time\)$/
891
+ return Time.parse($1)
892
+ end
893
+
894
+ # Float special
895
+ return Float::INFINITY if text == "Infinity"
896
+ return -Float::INFINITY if text == "-Infinity"
897
+ return Float::NAN if text == "NaN"
898
+ return -0.0 if text == "-0.0"
899
+
900
+ # Float
901
+ if text =~ /^-?\d+\.\d+$/ || text =~ /^-?\d+(\.\d+)?e[+-]?\d+$/i
902
+ return text.to_f
903
+ end
904
+
905
+ # Integer
906
+ if text =~ /^-?\d+$/
907
+ return text.to_i
908
+ end
909
+
910
+ raise "Cannot parse inline value: #{text}"
911
+ end
912
+
913
+ def parse_inline_array(text)
914
+ inner = text.sub(/^\[/, "").sub(/\] \(Array\)$/, "")
915
+ return [] if inner.strip.empty?
916
+ elements = split_inline(inner)
917
+ elements.map { |el| parse_inline_value(el) }
918
+ end
919
+
920
+ def parse_inline_hash(text)
921
+ inner = text.sub(/^\{/, "").sub(/\} \(Hash\)$/, "")
922
+ return {} if inner.strip.empty?
923
+
924
+ hash = {}
925
+ pairs = split_inline(inner)
926
+ pairs.each do |pair|
927
+ if pair =~ /^(\w+): (.+)$/
928
+ key = $1.to_sym
929
+ value = parse_inline_value($2)
930
+ hash[key] = value
931
+ elsif pair =~ /^(.+?) => (.+)$/
932
+ key = parse_inline_value($1)
933
+ value = parse_inline_value($2)
934
+ hash[key] = value
935
+ end
936
+ end
937
+ hash
938
+ end
939
+
940
+ def split_inline(str)
941
+ elements = []
942
+ current = ""
943
+ depth = 0
944
+ in_string = false
945
+ escape_next = false
946
+
947
+ str.each_char do |ch|
948
+ if escape_next
949
+ current += ch
950
+ escape_next = false
951
+ next
952
+ end
953
+
954
+ if ch == "\\"
955
+ current += ch
956
+ escape_next = true
957
+ next
958
+ end
959
+
960
+ if ch == '"'
961
+ in_string = !in_string
962
+ current += ch
963
+ next
964
+ end
965
+
966
+ if !in_string
967
+ if ch == "[" || ch == "{" || ch == "("
968
+ depth += 1
969
+ current += ch
970
+ elsif ch == "]" || ch == "}" || ch == ")"
971
+ depth -= 1
972
+ current += ch
973
+ elsif ch == "," && depth == 0
974
+ elements << current.strip
975
+ current = ""
976
+ else
977
+ current += ch
978
+ end
979
+ else
980
+ current += ch
981
+ end
982
+ end
983
+
984
+ elements << current.strip unless current.strip.empty?
985
+ elements
986
+ end
987
+
988
+ def parse_multiline_array(parent_indent)
989
+ arr = []
990
+ parse_multiline_array_into(arr, parent_indent)
991
+ arr
992
+ end
993
+
994
+ def parse_multiline_array_into(arr, parent_indent)
995
+ while !eof? && current_line.indent > parent_indent
996
+ arr << parse_value(current_line.indent)
997
+ end
998
+ end
999
+
1000
+ def parse_multiline_hash(parent_indent)
1001
+ hash = {}
1002
+ parse_multiline_hash_into(hash, parent_indent)
1003
+ hash
1004
+ end
1005
+
1006
+ def parse_multiline_hash_into(hash, parent_indent)
1007
+ while !eof? && current_line.indent > parent_indent
1008
+ line = current_line
1009
+ text = line.text
1010
+
1011
+ if text =~ /^(.+?) => (.+)$/
1012
+ key = parse_inline_value($1)
1013
+ value = parse_inline_value($2)
1014
+ advance
1015
+ hash[key] = value
1016
+ elsif text =~ /^(.+?) =>$/
1017
+ key = parse_inline_value($1)
1018
+ advance
1019
+ value = parse_value(line.indent + 1)
1020
+ hash[key] = value
1021
+ elsif text == "(entry)"
1022
+ # Complex key: (entry) followed by key value, then =>, then value
1023
+ advance
1024
+ key = parse_value(line.indent + 1)
1025
+ # Skip the => line
1026
+ if !eof? && current_line.text == "=>"
1027
+ advance
1028
+ end
1029
+ value = parse_value(line.indent + 1)
1030
+ hash[key] = value
1031
+ else
1032
+ break
1033
+ end
1034
+ end
1035
+ end
1036
+
1037
+ def unescape(str)
1038
+ result = +""
1039
+ i = 0
1040
+ while i < str.length
1041
+ if str[i] == "\\" && i + 1 < str.length
1042
+ case str[i + 1]
1043
+ when "n" then result << "\n"; i += 2
1044
+ when "r" then result << "\r"; i += 2
1045
+ when "t" then result << "\t"; i += 2
1046
+ when "0" then result << "\0"; i += 2
1047
+ when "\\" then result << "\\"; i += 2
1048
+ when '"' then result << '"'; i += 2
1049
+ when "x"
1050
+ # \xHH hex escape
1051
+ if i + 3 < str.length
1052
+ hex = str[i + 2, 2]
1053
+ result << hex.to_i(16).chr
1054
+ i += 4
1055
+ else
1056
+ result << str[i]; i += 1
1057
+ end
1058
+ else result << str[i]; i += 1
1059
+ end
1060
+ else
1061
+ result << str[i]
1062
+ i += 1
1063
+ end
1064
+ end
1065
+ result
1066
+ end
1067
+
1068
+ def resolve_class(name)
1069
+ name.split("::").reduce(Object) { |mod, const| mod.const_get(const) }
1070
+ rescue NameError
1071
+ raise ArgumentError, "undefined class/module #{name}"
1072
+ end
1073
+ end
1074
+ end