psych-pure 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d126f1c02cd1f7eba6b74a19f76dc938a6c828dc8dfea96a5663b3b4c8ac5bac
4
- data.tar.gz: 6df58931a63090a6248a44d94986057fa60faa5c6ec2015b832da7a4d1fefe7a
3
+ metadata.gz: 62e40e29dab03fe116feadefc2a62967ec840deb52bee18bff89ae1318bf4d02
4
+ data.tar.gz: 3f5612a277682bbef3a698e03cc3bde6e9da02dc3f3024d9978aff34af8a624c
5
5
  SHA512:
6
- metadata.gz: 00f44a5e4889ac4674d844f5dbfbff8a39fb1126f40b7135a33165539a3e6d27d5d5303b0289259471ba3d8c3dfd7e58e75f2fa8c5339258239a89b58e314c2d
7
- data.tar.gz: feaacc73c47c767edf1f454de1ecf4db4c812557f10578d88f715508b1975c524edc9acf5c21b40f77c40e1fc9eb25e7e79ee6496d8322cfb49e0030d457be3c
6
+ metadata.gz: 5be228176e34e4e908e1ea6af1cebbf41b59fa311de3e4a5abec21aefd9e6028f87473714b9e2c0ccf554bec1a379986b9323744c94ebdf9144cdc08d9116991
7
+ data.tar.gz: 4ba02fec03109e76b40883918d1d46125bdd8c1afb9d486a5af2decb868ebc6c967229dda3758e6281b5df709a89b2e6f704d747bad7f307d748861aba580b98
data/CHANGELOG.md CHANGED
@@ -6,6 +6,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.1.3] - 2025-10-24
10
+
11
+ - Fix up roundtripping when using `<<` inside mappings.
12
+ - Fix up roundtripping when using duplicate keys inside mappings.
13
+ - Fix up comment handling when using duplicate keys inside mappings.
14
+
15
+ ## [0.1.2] - 2025-03-04
16
+
17
+ - Fix up comment dumping to not drift around objects.
18
+ - Rely on source formatting when possible for scalar values.
19
+ - Fix up multi-line literals with the "keep" flag (|+).
20
+ - Fix up aliasing hashes that have comments loaded.
21
+
9
22
  ## [0.1.1] - 2025-02-13
10
23
 
11
24
  - Fix up comment handling to preserve within hashes.
@@ -18,6 +31,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
18
31
 
19
32
  - 🎉 Initial release. 🎉
20
33
 
21
- [unreleased]: https://github.com/kddnewton/psych-pure/compare/v0.1.1...HEAD
34
+ [unreleased]: https://github.com/kddnewton/psych-pure/compare/v0.1.3...HEAD
35
+ [0.1.3]: https://github.com/kddnewton/psych-pure/compare/v0.1.2...v0.1.3
36
+ [0.1.2]: https://github.com/kddnewton/psych-pure/compare/v0.1.1...v0.1.2
22
37
  [0.1.1]: https://github.com/kddnewton/psych-pure/compare/v0.1.0...v0.1.1
23
38
  [0.1.0]: https://github.com/kddnewton/psych-pure/compare/24de62...v0.1.0
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Psych
4
4
  module Pure
5
- VERSION = "0.1.1"
5
+ VERSION = "0.1.3"
6
6
  end
7
7
  end
data/lib/psych/pure.rb CHANGED
@@ -7,6 +7,14 @@ require "strscan"
7
7
  require "stringio"
8
8
 
9
9
  module Psych
10
+ module Nodes
11
+ class Scalar
12
+ # The source of the scalar, as it was found in the input. This may be set
13
+ # in order to be reused when dumping the object.
14
+ attr_accessor :source
15
+ end
16
+ end
17
+
10
18
  # A YAML parser written in Ruby.
11
19
  module Pure
12
20
  # An internal exception is an exception that should not have occurred. It is
@@ -17,7 +25,7 @@ module Psych
17
25
  end
18
26
  end
19
27
 
20
- # A source is wraps the input string and provides methods to access line and
28
+ # A source wraps the input string and provides methods to access line and
21
29
  # column information from a byte offset.
22
30
  class Source
23
31
  def initialize(string)
@@ -27,7 +35,13 @@ module Psych
27
35
  offset = 0
28
36
  string.each_line do |line|
29
37
  @line_offsets << offset
30
- @trimmable_lines << line.match?(/\A(?: *#.*)?\n\z/)
38
+ @trimmable_lines <<
39
+ case line
40
+ when /\A *#.*\n\z/ then :comment
41
+ when /\A *\n\z/ then :blank
42
+ else false
43
+ end
44
+
31
45
  offset += line.bytesize
32
46
  end
33
47
 
@@ -43,6 +57,14 @@ module Psych
43
57
  offset
44
58
  end
45
59
 
60
+ def trim_comments(offset)
61
+ while (l = line(offset)) != 0 && (offset == @line_offsets[l]) && @trimmable_lines[l - 1] == :comment
62
+ offset = @line_offsets[l - 1]
63
+ end
64
+
65
+ offset
66
+ end
67
+
46
68
  def line(offset)
47
69
  index = @line_offsets.bsearch_index { |line_offset| line_offset > offset }
48
70
  return @line_offsets.size - 1 if index.nil?
@@ -64,6 +86,10 @@ module Psych
64
86
  @pos_end = pos_end
65
87
  end
66
88
 
89
+ def range
90
+ @pos_start...@pos_end
91
+ end
92
+
67
93
  def start_line
68
94
  @source.line(@pos_start)
69
95
  end
@@ -89,6 +115,11 @@ module Psych
89
115
  Location.new(@source, @pos_start, @source.trim(@pos_end))
90
116
  end
91
117
 
118
+ # Trim trailing comments from this location.
119
+ def trim_comments
120
+ Location.new(@source, @pos_start, @source.trim_comments(@pos_end))
121
+ end
122
+
92
123
  def to_a
93
124
  [start_line, start_column, end_line, end_column]
94
125
  end
@@ -147,48 +178,89 @@ module Psych
147
178
  end
148
179
  end
149
180
 
150
- # Wraps a Ruby object with its comments from the source input.
151
- class CommentsObject < SimpleDelegator
152
- attr_reader :psych_comments
181
+ # Wraps a Ruby object with its node from the source input.
182
+ class LoadedObject < SimpleDelegator
183
+ # The node associated with the object.
184
+ attr_reader :psych_node
185
+
186
+ # Whether or not this object has been modified. If it has, then we cannot
187
+ # rely on the source formatting, and need to format it ourselves.
188
+ attr_reader :dirty
153
189
 
154
- def initialize(object, psych_comments)
155
- @psych_comments = psych_comments
190
+ def initialize(object, psych_node, dirty = false)
156
191
  super(object)
192
+ @psych_node = psych_node
193
+ @dirty = dirty
194
+ end
195
+
196
+ def replace(psych_node)
197
+ @psych_node = psych_node
198
+ @dirty = true
157
199
  end
158
200
  end
159
201
 
160
- # Wraps a Ruby hash with its comments from the source input.
161
- class CommentsHash < SimpleDelegator
162
- attr_reader :psych_comments, :psych_key_comments
202
+ # Wraps a Ruby hash with its node from the source input.
203
+ class LoadedHash < SimpleDelegator
204
+ class PsychKey
205
+ attr_reader :key_node, :value_node
163
206
 
164
- def initialize(object, psych_comments, psych_key_comments = {})
165
- @psych_comments = psych_comments
166
- @psych_key_comments = psych_key_comments
167
- commentless = {}
207
+ def initialize(key_node, value_node)
208
+ @key_node = key_node
209
+ @value_node = value_node
210
+ end
168
211
 
169
- object.each do |key, value|
170
- if key.is_a?(CommentsObject)
171
- @psych_key_comments[key.__getobj__] = key.psych_comments
172
- commentless[key.__getobj__] = value
173
- else
174
- commentless[key] = value
175
- end
212
+ def replace(value_node)
213
+ @value_node = value_node
176
214
  end
215
+ end
177
216
 
178
- super(commentless)
217
+ # The node associated with the hash.
218
+ attr_reader :psych_node
219
+
220
+ def initialize(object, psych_node)
221
+ super(object)
222
+ @psych_node = psych_node
223
+ @psych_keys = []
224
+ end
225
+
226
+ def psych_keys
227
+ @psych_keys.map do |psych_key|
228
+ [psych_key.key_node, psych_key.value_node]
229
+ end
179
230
  end
180
231
 
181
232
  def []=(key, value)
182
- if (previous = self[key])
183
- if previous.is_a?(CommentsObject)
184
- value = CommentsObject.new(value, previous.psych_comments)
185
- elsif previous.is_a?(CommentsHash)
186
- value = CommentsHash.new(value, previous.psych_comments, previous.psych_key_comments)
233
+ if begin
234
+ @psych_keys.none? do |psych_key|
235
+ key_node = psych_key.key_node
236
+ key_node_inner =
237
+ if key_node.is_a?(LoadedHash) || key_node.is_a?(LoadedObject)
238
+ key_node.__getobj__
239
+ else
240
+ key_node
241
+ end
242
+
243
+ if key_node_inner.eql?(key)
244
+ psych_key.replace(value)
245
+ true
246
+ end
187
247
  end
248
+ end then
249
+ @psych_keys << PsychKey.new(key, value)
188
250
  end
189
251
 
190
252
  super(key, value)
191
253
  end
254
+
255
+ def set!(key_node, value_node)
256
+ @psych_keys << PsychKey.new(key_node, value_node)
257
+ __getobj__[key_node.__getobj__] = value_node
258
+ end
259
+
260
+ def join!(key_node, value_node)
261
+ @psych_keys << PsychKey.new(key_node, value_node)
262
+ merge!(value_node)
263
+ end
192
264
  end
193
265
 
194
266
  # This module contains all of the extensions to Psych that we need in order
@@ -304,9 +376,18 @@ module Psych
304
376
  # Extend the document stream to be able to attach comments to the
305
377
  # document.
306
378
  module DocumentStream
379
+ def start_document(version, tag_directives, implicit)
380
+ node = Nodes::Document.new(version, tag_directives, implicit)
381
+ set_start_location(node)
382
+ push(node)
383
+ end
384
+
307
385
  def end_document(implicit_end = !streaming?)
308
386
  @last.implicit_end = implicit_end
309
- @block.call(attach_comments(pop))
387
+ node = pop
388
+ set_end_location(node)
389
+ attach_comments(node)
390
+ @block.call(node)
310
391
  end
311
392
  end
312
393
 
@@ -347,22 +428,77 @@ module Psych
347
428
  result = super
348
429
 
349
430
  if @comments
350
- if result.is_a?(Hash)
351
- result = CommentsHash.new(result, node.comments? ? node.comments : nil)
352
- elsif node.comments?
353
- result = CommentsObject.new(result, node.comments)
431
+ case result
432
+ when LoadedObject, LoadedHash
433
+ # skip
434
+ else
435
+ result = LoadedObject.new(result, node)
354
436
  end
355
437
  end
356
438
 
357
439
  result
358
440
  end
441
+
442
+ private
443
+
444
+ def revive_hash(hash, node, tagged = false)
445
+ return super unless @comments
446
+
447
+ revived = LoadedHash.new(hash, node)
448
+ node.children.each_slice(2) do |key_node, value_node|
449
+ key = accept(key_node)
450
+ value = accept(value_node)
451
+
452
+ if key == "<<" && key_node.tag != "tag:yaml.org,2002:str"
453
+ case value_node
454
+ when Nodes::Alias, Nodes::Mapping
455
+ begin
456
+ # h1:
457
+ # <<: *h2
458
+ # <<: { k: v }
459
+ revived.join!(key, value)
460
+ rescue TypeError
461
+ # a: &a [1, 2, 3]
462
+ # h: { <<: *a }
463
+ revived.set!(key, value)
464
+ end
465
+ when Nodes::Sequence
466
+ # h1:
467
+ # <<: [*h2, *h3]
468
+ begin
469
+ temporary = {}
470
+ value.reverse_each { |value| temporary.merge!(value) }
471
+ rescue TypeError
472
+ revived.set!(key, value)
473
+ else
474
+ value_node.children.zip(value).reverse_each do |(child_value_node, child_value)|
475
+ revived.join!(key, child_value)
476
+ end
477
+ end
478
+ else
479
+ # k: v
480
+ revived.set!(key, value)
481
+ end
482
+ else
483
+ if !tagged && @symbolize_names && key.is_a?(String)
484
+ key = key.to_sym
485
+ elsif !@freeze
486
+ key = deduplicate(key)
487
+ end
488
+
489
+ revived.set!(key, value)
490
+ end
491
+ end
492
+
493
+ revived
494
+ end
359
495
  end
360
496
 
361
497
  # Extend the ToRuby singleton to be able to pass the comments option.
362
498
  module ToRubySingleton
363
499
  def create(symbolize_names: false, freeze: false, strict_integer: false, comments: false)
364
500
  class_loader = ClassLoader.new
365
- scanner = ScalarScanner.new(class_loader, strict_integer: strict_integer)
501
+ scanner = ScalarScanner.new(class_loader, strict_integer: strict_integer)
366
502
  new(scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze, comments: comments)
367
503
  end
368
504
  end
@@ -463,11 +599,12 @@ module Psych
463
599
  # A scalar event represents a single value in the YAML document. It can be
464
600
  # many different types.
465
601
  class Scalar
466
- attr_reader :location, :value, :style
602
+ attr_reader :location, :source, :value, :style
467
603
  attr_accessor :anchor, :tag
468
604
 
469
- def initialize(location, value, style)
605
+ def initialize(location, source, value, style)
470
606
  @location = location
607
+ @source = source
471
608
  @value = value
472
609
  @anchor = nil
473
610
  @tag = nil
@@ -475,15 +612,20 @@ module Psych
475
612
  end
476
613
 
477
614
  def accept(handler)
478
- handler.event_location(*@location)
479
- handler.scalar(
480
- @value,
481
- @anchor,
482
- @tag,
483
- (!@tag || @tag == "!") && (@style == Nodes::Scalar::PLAIN),
484
- (!@tag || @tag == "!") && (@style != Nodes::Scalar::PLAIN),
485
- @style
486
- )
615
+ handler.event_location(*@location.trim)
616
+
617
+ event =
618
+ handler.scalar(
619
+ @value,
620
+ @anchor,
621
+ @tag,
622
+ (!@tag || @tag == "!") && (@style == Nodes::Scalar::PLAIN),
623
+ (!@tag || @tag == "!") && (@style != Nodes::Scalar::PLAIN),
624
+ @style
625
+ )
626
+
627
+ event.source = source if event.is_a?(Nodes::Scalar)
628
+ event
487
629
  end
488
630
  end
489
631
 
@@ -1423,7 +1565,7 @@ module Psych
1423
1565
  # e-scalar ::=
1424
1566
  # <empty>
1425
1567
  def parse_e_scalar
1426
- events_push_flush_properties(Scalar.new(Location.point(@source, @scanner.pos), "", Nodes::Scalar::PLAIN))
1568
+ events_push_flush_properties(Scalar.new(Location.point(@source, @scanner.pos), "", "", Nodes::Scalar::PLAIN))
1427
1569
  true
1428
1570
  end
1429
1571
 
@@ -1527,7 +1669,7 @@ module Psych
1527
1669
  end
1528
1670
  end
1529
1671
 
1530
- events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), value, Nodes::Scalar::DOUBLE_QUOTED))
1672
+ events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), from(pos_start), value, Nodes::Scalar::DOUBLE_QUOTED))
1531
1673
  true
1532
1674
  end
1533
1675
  end
@@ -1671,7 +1813,7 @@ module Psych
1671
1813
  value.gsub!(/(?:[\ \t]*\r?\n[\ \t]*)/, "\n")
1672
1814
  value.gsub!(/\n(\n*)/) { $1.empty? ? " " : $1 }
1673
1815
  value.gsub!("''", "'")
1674
- events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), value, Nodes::Scalar::SINGLE_QUOTED))
1816
+ events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), from(pos_start), value, Nodes::Scalar::SINGLE_QUOTED))
1675
1817
  true
1676
1818
  end
1677
1819
  end
@@ -2211,10 +2353,13 @@ module Psych
2211
2353
  end
2212
2354
 
2213
2355
  if result
2214
- value = from(pos_start)
2356
+ source = from(pos_start)
2357
+
2358
+ value = source.dup
2215
2359
  value.gsub!(/(?:[\ \t]*\r?\n[\ \t]*)/, "\n")
2216
2360
  value.gsub!(/\n(\n*)/) { $1.empty? ? " " : $1 }
2217
- events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), value, Nodes::Scalar::PLAIN))
2361
+
2362
+ events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), source, value, Nodes::Scalar::PLAIN))
2218
2363
  end
2219
2364
 
2220
2365
  result
@@ -2440,9 +2585,7 @@ module Psych
2440
2585
  parse_l_literal_content(n + m, t)
2441
2586
  } then
2442
2587
  @in_scalar = false
2443
- lines = events_cache_pop
2444
- lines.pop if lines.length > 0 && lines.last.empty?
2445
- value = lines.map { |line| "#{line}\n" }.join
2588
+ value = events_cache_pop.map { |line| "#{line}\n" }.join
2446
2589
 
2447
2590
  case t
2448
2591
  when :clip
@@ -2450,12 +2593,13 @@ module Psych
2450
2593
  when :strip
2451
2594
  value.sub!(/\n+\z/, "")
2452
2595
  when :keep
2453
- value.sub!(/\n(\n+)\z/) { $1 } if !value.match?(/\S/)
2596
+ # nothing
2454
2597
  else
2455
2598
  raise InternalException, t.inspect
2456
2599
  end
2457
2600
 
2458
- events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), value, Nodes::Scalar::LITERAL))
2601
+ location = Location.new(@source, pos_start, @scanner.pos).trim_comments
2602
+ events_push_flush_properties(Scalar.new(location, @scanner.string.byteslice(location.range).chomp, value, Nodes::Scalar::LITERAL))
2459
2603
  true
2460
2604
  else
2461
2605
  @in_scalar = false
@@ -2469,6 +2613,8 @@ module Psych
2469
2613
  # l-empty(n,block-in)*
2470
2614
  # s-indent(n) nb-char+
2471
2615
  def parse_l_nb_literal_text(n)
2616
+ events_cache_size = @events_cache[-1].size
2617
+
2472
2618
  try do
2473
2619
  if star { parse_l_empty(n, :block_in) } && parse_s_indent(n)
2474
2620
  pos_start = @scanner.pos
@@ -2477,6 +2623,12 @@ module Psych
2477
2623
  events_push(from(pos_start))
2478
2624
  true
2479
2625
  end
2626
+ else
2627
+ # When parsing all of the l_empty calls, we may have added a bunch
2628
+ # of empty lines to the events cache. We need to clear those out
2629
+ # here.
2630
+ @events_cache[-1].slice!(events_cache_size..-1)
2631
+ false
2480
2632
  end
2481
2633
  end
2482
2634
  end
@@ -2545,7 +2697,8 @@ module Psych
2545
2697
  raise InternalException, t.inspect
2546
2698
  end
2547
2699
 
2548
- events_push_flush_properties(Scalar.new(Location.new(@source, pos_start, @scanner.pos), value, Nodes::Scalar::FOLDED))
2700
+ location = Location.new(@source, pos_start, @scanner.pos).trim_comments
2701
+ events_push_flush_properties(Scalar.new(location, @scanner.string.byteslice(location.range).chomp, value, Nodes::Scalar::FOLDED))
2549
2702
  true
2550
2703
  else
2551
2704
  @in_scalar = false
@@ -2739,7 +2892,7 @@ module Psych
2739
2892
 
2740
2893
  if try { plus { try { parse_s_indent(n + m) && parse_ns_l_block_map_entry(n + m) } } }
2741
2894
  events_cache_flush
2742
- events_push_flush_properties(MappingEnd.new(Location.point(@source, @scanner.pos))) # TODO
2895
+ events_push_flush_properties(MappingEnd.new(Location.point(@source, @scanner.pos)))
2743
2896
  true
2744
2897
  else
2745
2898
  events_cache_pop
@@ -2855,7 +3008,7 @@ module Psych
2855
3008
  star { try { parse_s_indent(n) && parse_ns_l_block_map_entry(n) } }
2856
3009
  } then
2857
3010
  events_cache_flush
2858
- events_push_flush_properties(MappingEnd.new(Location.point(@source, @scanner.pos))) # TODO
3011
+ events_push_flush_properties(MappingEnd.new(Location.point(@source, @scanner.pos)))
2859
3012
  true
2860
3013
  else
2861
3014
  events_cache_pop
@@ -3042,12 +3195,12 @@ module Psych
3042
3195
  def parse_l_yaml_stream
3043
3196
  events_push_flush_properties(StreamStart.new(Location.point(@source, @scanner.pos)))
3044
3197
 
3045
- @document_start_event = DocumentStart.new(Location.point(@source, @scanner.pos))
3046
- @tag_directives = @document_start_event.tag_directives
3047
- @document_end_event = nil
3048
-
3049
3198
  if try {
3050
3199
  if parse_l_document_prefix
3200
+ @document_start_event = DocumentStart.new(Location.point(@source, @scanner.pos))
3201
+ @tag_directives = @document_start_event.tag_directives
3202
+ @document_end_event = nil
3203
+
3051
3204
  parse_l_any_document
3052
3205
  star do
3053
3206
  try do
@@ -3105,11 +3258,11 @@ module Psych
3105
3258
  # aliases, since we may find that we need to add an anchor after the
3106
3259
  # object has already been flushed.
3107
3260
  class Node
3108
- attr_reader :value, :comments
3261
+ attr_reader :value, :psych_node
3109
3262
 
3110
- def initialize(value, comments)
3263
+ def initialize(value, psych_node)
3111
3264
  @value = value
3112
- @comments = comments
3265
+ @psych_node = psych_node
3113
3266
  @anchor = nil
3114
3267
  end
3115
3268
 
@@ -3127,7 +3280,7 @@ module Psych
3127
3280
 
3128
3281
  # Represents an array of nodes.
3129
3282
  class ArrayNode < Node
3130
- attr_accessor :anchor
3283
+ attr_accessor :anchor, :tag
3131
3284
 
3132
3285
  def accept(visitor)
3133
3286
  visitor.visit_array(self)
@@ -3136,7 +3289,7 @@ module Psych
3136
3289
 
3137
3290
  # Represents a hash of nodes.
3138
3291
  class HashNode < Node
3139
- attr_accessor :anchor
3292
+ attr_accessor :anchor, :tag
3140
3293
 
3141
3294
  def accept(visitor)
3142
3295
  visitor.visit_hash(self)
@@ -3150,6 +3303,14 @@ module Psych
3150
3303
  # Represents a generic object that is not matched by any of the other node
3151
3304
  # types.
3152
3305
  class ObjectNode < Node
3306
+ # The explicit tag associated with the object.
3307
+ attr_accessor :tag
3308
+
3309
+ # Whether or not this object was modified after being loaded. In this
3310
+ # case we cannot rely on the source formatting, and need to instead
3311
+ # format the value ourselves.
3312
+ attr_accessor :dirty
3313
+
3153
3314
  def accept(visitor)
3154
3315
  visitor.visit_object(self)
3155
3316
  end
@@ -3175,6 +3336,14 @@ module Psych
3175
3336
 
3176
3337
  # Represents a string object.
3177
3338
  class StringNode < Node
3339
+ # The explicit tag associated with the object.
3340
+ attr_accessor :tag
3341
+
3342
+ # Whether or not this object was modified after being loaded. In this
3343
+ # case we cannot rely on the source formatting, and need to instead
3344
+ # format the value ourselves.
3345
+ attr_accessor :dirty
3346
+
3178
3347
  def accept(visitor)
3179
3348
  visitor.visit_string(self)
3180
3349
  end
@@ -3195,14 +3364,10 @@ module Psych
3195
3364
  # Visit an ArrayNode.
3196
3365
  def visit_array(node)
3197
3366
  with_comments(node) do |value|
3198
- if (anchor = node.anchor)
3199
- @q.text("&#{anchor} ")
3200
- end
3201
-
3202
- if value.empty?
3203
- @q.text("[]")
3367
+ if value.empty? || ((psych_node = node.psych_node).is_a?(Nodes::Sequence) && psych_node.style == Nodes::Sequence::FLOW)
3368
+ visit_array_contents_flow(node.anchor, node.tag, value)
3204
3369
  else
3205
- visit_array_contents(value)
3370
+ visit_array_contents_block(node.anchor, node.tag, value)
3206
3371
  end
3207
3372
  end
3208
3373
  end
@@ -3210,16 +3375,10 @@ module Psych
3210
3375
  # Visit a HashNode.
3211
3376
  def visit_hash(node)
3212
3377
  with_comments(node) do |value|
3213
- if (anchor = node.anchor)
3214
- @q.text("&#{anchor}")
3215
- end
3216
-
3217
- if value.empty?
3218
- @q.text(" ") if anchor
3219
- @q.text("{}")
3378
+ if value.empty? || ((psych_node = node.psych_node).is_a?(Nodes::Mapping) && psych_node.style == Nodes::Mapping::FLOW)
3379
+ visit_hash_contents_flow(node.anchor, node.tag, value)
3220
3380
  else
3221
- @q.breakable if anchor
3222
- visit_hash_contents(value)
3381
+ visit_hash_contents_block(node.anchor, node.tag, value)
3223
3382
  end
3224
3383
  end
3225
3384
  end
@@ -3227,50 +3386,88 @@ module Psych
3227
3386
  # Visit an ObjectNode.
3228
3387
  def visit_object(node)
3229
3388
  with_comments(node) do |value|
3230
- @q.text(Psych.dump(value, indentation: @q.indent)[/\A--- (.+)\n\z/m, 1]) # TODO
3389
+ if !node.dirty && (psych_node = node.psych_node)
3390
+ if (tag = node.tag)
3391
+ @q.text("#{tag} ")
3392
+ end
3393
+
3394
+ @q.text(psych_node.source || psych_node.value)
3395
+ else
3396
+ if (tag = node.tag) && tag != "tag:yaml.org,2002:binary"
3397
+ @q.text("#{tag} ")
3398
+ end
3399
+
3400
+ @q.text(dump_object(value))
3401
+ end
3231
3402
  end
3232
3403
  end
3233
3404
 
3234
3405
  # Visit an OmapNode.
3235
3406
  def visit_omap(node)
3236
3407
  with_comments(node) do |value|
3237
- if (anchor = node.anchor)
3238
- @q.text("&#{anchor} ")
3239
- end
3240
-
3241
- @q.text("!!omap")
3242
- @q.breakable
3243
-
3244
- visit_array_contents(value)
3408
+ visit_array_contents_block(node.anchor, "!!omap", value)
3245
3409
  end
3246
3410
  end
3247
3411
 
3248
3412
  # Visit a SetNode.
3249
3413
  def visit_set(node)
3250
3414
  with_comments(node) do |value|
3251
- if (anchor = node.anchor)
3252
- @q.text("&#{anchor} ")
3253
- end
3254
-
3255
- @q.text("!set")
3256
- @q.breakable
3257
-
3258
- visit_hash_contents(node.value)
3415
+ visit_hash_contents_block(node.anchor, "!set", value)
3259
3416
  end
3260
3417
  end
3261
3418
 
3262
3419
  # Visit a StringNode.
3263
- alias visit_string visit_object
3420
+ def visit_string(node)
3421
+ with_comments(node) do |value|
3422
+ if !node.dirty && (psych_node = node.psych_node)
3423
+ if (tag = node.tag)
3424
+ @q.text("#{tag} ")
3425
+ end
3426
+
3427
+ @q.text(psych_node.source || psych_node.value)
3428
+ else
3429
+ if (tag = node.tag) && tag != "tag:yaml.org,2002:binary"
3430
+ @q.text("#{tag} ")
3431
+ end
3432
+
3433
+ @q.text(dump_object(value))
3434
+ end
3435
+ end
3436
+ end
3264
3437
 
3265
3438
  private
3266
3439
 
3440
+ # TODO: Certain objects require special formatting. Usually this
3441
+ # involves scanning the object itself and determining what kind of YAML
3442
+ # object it is, then dumping it back out. We rely on Psych itself to do
3443
+ # this formatting for us.
3444
+ #
3445
+ # Note this is the one place where we indirectly rely on libyaml,
3446
+ # because Psych delegates to libyaml to dump the object. This is less
3447
+ # than ideal, because it means in some circumstances we have an indirect
3448
+ # dependency. Ideally this would all be removed in favor of our own
3449
+ # formatting.
3450
+ def dump_object(value)
3451
+ Psych.dump(value, indentation: @q.indent)[/\A--- (.+?)(?:\n\.\.\.)?\n\z/m, 1]
3452
+ end
3453
+
3267
3454
  # Shortcut to visit a node by passing this visitor to the accept method.
3268
3455
  def visit(node)
3269
3456
  node.accept(self)
3270
3457
  end
3271
3458
 
3272
- # Visit the elements within an array.
3273
- def visit_array_contents(contents)
3459
+ # Visit the elements within an array in the block format.
3460
+ def visit_array_contents_block(anchor, tag, contents)
3461
+ if anchor
3462
+ @q.text("&#{anchor}")
3463
+ tag ? @q.text(" ") : @q.breakable
3464
+ end
3465
+
3466
+ if tag
3467
+ @q.text(tag)
3468
+ @q.breakable
3469
+ end
3470
+
3274
3471
  @q.seplist(contents, -> { @q.breakable }) do |element|
3275
3472
  @q.text("-")
3276
3473
  next if element.is_a?(NilNode)
@@ -3278,79 +3475,164 @@ module Psych
3278
3475
  @q.text(" ")
3279
3476
  @q.nest(2) { visit(element) }
3280
3477
  end
3478
+
3479
+ @q.current_group.break
3281
3480
  end
3282
3481
 
3283
- # Visit the key/value pairs within a hash.
3284
- def visit_hash_contents(contents)
3285
- @q.seplist(contents, -> { @q.breakable }) do |key, value|
3286
- inlined = false
3482
+ # Visit the elements within an array in the flow format.
3483
+ def visit_array_contents_flow(anchor, tag, contents)
3484
+ @q.group do
3485
+ @q.text("&#{anchor} ") if anchor
3486
+ @q.text("#{tag} ") if tag
3487
+ @q.text("[")
3287
3488
 
3288
- case key
3289
- when NilNode
3290
- @q.text("! ''")
3291
- when ArrayNode, HashNode, OmapNode, SetNode
3292
- if key.anchor.nil?
3293
- @q.text("? ")
3294
- @q.nest(2) { visit(key) }
3295
- @q.breakable
3296
- inlined = true
3297
- else
3298
- visit(key)
3489
+ unless contents.empty?
3490
+ @q.nest(2) do
3491
+ @q.breakable("")
3492
+ @q.seplist(contents, -> { @q.comma_breakable }) { |element| visit(element) }
3299
3493
  end
3300
- when AliasNode, ObjectNode
3494
+ @q.breakable("")
3495
+ end
3496
+
3497
+ @q.text("]")
3498
+ end
3499
+ end
3500
+
3501
+ # Visit a key value pair within a hash.
3502
+ def visit_hash_key_value(key, value)
3503
+ inlined = false
3504
+
3505
+ case key
3506
+ when NilNode
3507
+ @q.text("! ''")
3508
+ when ArrayNode, HashNode, OmapNode, SetNode
3509
+ if key.anchor.nil?
3510
+ @q.text("? ")
3511
+ @q.nest(2) { visit(key) }
3512
+ @q.breakable
3513
+ inlined = true
3514
+ else
3301
3515
  visit(key)
3302
- when StringNode
3303
- if key.value.include?("\n")
3304
- @q.text("? ")
3305
- visit(key)
3306
- @q.breakable
3307
- inlined = true
3308
- else
3309
- visit(key)
3310
- end
3311
3516
  end
3517
+ when AliasNode, ObjectNode
3518
+ visit(key)
3519
+ when StringNode
3520
+ if key.value.include?("\n")
3521
+ @q.text("? ")
3522
+ visit(key)
3523
+ @q.breakable
3524
+ inlined = true
3525
+ else
3526
+ visit(key)
3527
+ end
3528
+ else
3529
+ raise InternalException
3530
+ end
3312
3531
 
3313
- @q.text(":")
3532
+ @q.text(":")
3314
3533
 
3315
- case value
3316
- when NilNode
3317
- # skip
3318
- when OmapNode, SetNode
3534
+ case value
3535
+ when NilNode
3536
+ # skip
3537
+ when OmapNode, SetNode
3538
+ @q.text(" ")
3539
+ @q.nest(2) { visit(value) }
3540
+ when ArrayNode
3541
+ if ((psych_node = value.psych_node).is_a?(Nodes::Sequence) && psych_node.style == Nodes::Sequence::FLOW) || value.value.empty?
3542
+ @q.text(" ")
3543
+ visit(value)
3544
+ elsif inlined || value.anchor || value.tag || value.value.empty?
3319
3545
  @q.text(" ")
3320
3546
  @q.nest(2) { visit(value) }
3321
- when ArrayNode
3322
- if value.value.empty?
3323
- @q.text(" []")
3324
- elsif inlined || value.anchor
3325
- @q.text(" ")
3326
- @q.nest(2) { visit(value) }
3327
- else
3547
+ else
3548
+ @q.breakable
3549
+ visit(value)
3550
+ end
3551
+ when HashNode
3552
+ if ((psych_node = value.psych_node).is_a?(Nodes::Mapping) && psych_node.style == Nodes::Mapping::FLOW) || value.value.empty?
3553
+ @q.text(" ")
3554
+ visit(value)
3555
+ elsif inlined || value.anchor || value.tag
3556
+ @q.text(" ")
3557
+ @q.nest(2) { visit(value) }
3558
+ else
3559
+ @q.nest(2) do
3328
3560
  @q.breakable
3329
3561
  visit(value)
3330
3562
  end
3331
- when HashNode
3332
- if value.value.empty?
3333
- @q.text(" {}")
3334
- elsif inlined || value.anchor
3335
- @q.text(" ")
3336
- @q.nest(2) { visit(value) }
3337
- else
3338
- @q.nest(2) do
3339
- @q.breakable
3340
- visit(value)
3563
+ end
3564
+ when AliasNode, ObjectNode, StringNode
3565
+ @q.text(" ")
3566
+ @q.nest(2) { visit(value) }
3567
+ else
3568
+ raise InternalException
3569
+ end
3570
+ end
3571
+
3572
+ # Visit the key/value pairs within a hash in the block format.
3573
+ def visit_hash_contents_block(anchor, tag, children)
3574
+ if anchor
3575
+ @q.text("&#{anchor}")
3576
+ tag ? @q.text(" ") : @q.breakable
3577
+ end
3578
+
3579
+ if tag
3580
+ @q.text(tag)
3581
+ @q.breakable
3582
+ end
3583
+
3584
+ current_line = nil
3585
+ ((0...children.length) % 2).each do |index|
3586
+ key = children[index]
3587
+ value = children[index + 1]
3588
+
3589
+ if index > 0
3590
+ @q.breakable
3591
+
3592
+ if current_line && (psych_node = key.psych_node)
3593
+ start_line = psych_node.start_line
3594
+ if (leading = key.psych_node.comments.leading).any?
3595
+ start_line = leading.first.start_line
3341
3596
  end
3597
+
3598
+ @q.breakable if start_line - current_line >= 2
3342
3599
  end
3343
- when AliasNode, ObjectNode, StringNode
3344
- @q.text(" ")
3345
- @q.nest(2) { visit(value) }
3346
3600
  end
3601
+
3602
+ current_line = (psych_node = value.psych_node) ? psych_node.end_line : nil
3603
+ visit_hash_key_value(key, value)
3604
+ end
3605
+
3606
+ @q.current_group.break
3607
+ end
3608
+
3609
+ # Visit the key/value pairs within a hash in the flow format.
3610
+ def visit_hash_contents_flow(anchor, tag, children)
3611
+ @q.group do
3612
+ @q.text("&#{anchor} ") if anchor
3613
+ @q.text("#{tag} ") if tag
3614
+ @q.text("{")
3615
+
3616
+ unless children.empty?
3617
+ @q.nest(2) do
3618
+ @q.breakable
3619
+
3620
+ ((0...children.length) % 2).each do |index|
3621
+ @q.comma_breakable if index != 0
3622
+ visit_hash_key_value(children[index], children[index + 1])
3623
+ end
3624
+ end
3625
+ @q.breakable
3626
+ end
3627
+
3628
+ @q.text("}")
3347
3629
  end
3348
3630
  end
3349
3631
 
3350
3632
  # Print out the leading and trailing comments of a node, as well as
3351
3633
  # yielding the value of the node to the block.
3352
3634
  def with_comments(node)
3353
- if (comments = node.comments) && (leading = comments.leading).any?
3635
+ if (comments = node.psych_node&.comments) && (leading = comments.leading).any?
3354
3636
  line = nil
3355
3637
 
3356
3638
  leading.each do |comment|
@@ -3435,14 +3717,45 @@ module Psych
3435
3717
  # This is the main entrypoint into this object. It is responsible for
3436
3718
  # pushing a new object onto the emitter, which is then represented as a
3437
3719
  # YAML document.
3438
- def <<(object)
3720
+ def emit(object)
3439
3721
  if @started
3440
- @io << "...\n---"
3722
+ @io << "...\n"
3441
3723
  else
3442
- @io << "---"
3443
3724
  @started = true
3444
3725
  end
3445
3726
 
3727
+ # Very rare circumstance here that there are leading comments attached
3728
+ # to the root object of a document that occur before the --- marker. In
3729
+ # this case we want to output them first here, then dump the object.
3730
+ reload_comments = nil
3731
+ if (object.is_a?(LoadedObject) || object.is_a?(LoadedHash)) && (psych_node = object.psych_node).comments? && (leading = psych_node.comments.leading).any?
3732
+ leading = [*leading]
3733
+ line = psych_node.start_line - 1
3734
+
3735
+ while leading.any? && leading.last.start_line == line
3736
+ leading.pop
3737
+ line -= 1
3738
+ end
3739
+
3740
+ psych_node.comments.leading.slice!(0, leading.length)
3741
+ line = nil
3742
+
3743
+ leading.each do |comment|
3744
+ if line && (line < comment.start_line)
3745
+ @io << "\n" * (comment.start_line - line - 1)
3746
+ end
3747
+
3748
+ @io << comment.value
3749
+ @io << "\n"
3750
+
3751
+ line = comment.start_line
3752
+ end
3753
+
3754
+ reload_comments = leading.concat(psych_node.comments.leading)
3755
+ end
3756
+
3757
+ @io << "---"
3758
+
3446
3759
  if (node = dump(object)).is_a?(NilNode)
3447
3760
  @io << "\n"
3448
3761
  else
@@ -3461,44 +3774,87 @@ module Psych
3461
3774
 
3462
3775
  @io << q.output
3463
3776
  end
3777
+
3778
+ # If we initially split up the leading comments, then we need to reload
3779
+ # them back to their original state here.
3780
+ unless reload_comments.nil?
3781
+ object.psych_node.comments.leading.replace(reload_comments)
3782
+ end
3464
3783
  end
3465
3784
 
3466
3785
  private
3467
3786
 
3468
- # Walk through the given object and convert it into a tree of nodes.
3469
- def dump(base_object, comments = nil)
3470
- object = base_object
3471
-
3472
- if base_object.is_a?(CommentsObject) || base_object.is_a?(CommentsHash)
3473
- object = base_object.__getobj__
3474
- comments = base_object.psych_comments
3787
+ # Dump the tag value for a given node.
3788
+ def dump_tag(value)
3789
+ case value
3790
+ when /\Atag:yaml.org,2002:(.+)\z/
3791
+ "!!#{$1}"
3792
+ else
3793
+ value
3475
3794
  end
3795
+ end
3476
3796
 
3477
- if object.nil?
3478
- NilNode.new(object, comments)
3479
- elsif @object_nodes.key?(object)
3480
- AliasNode.new(@object_nodes[object].anchor = (@object_anchors[object] ||= (@object_anchor += 1)), comments)
3797
+ # Walk through the given object and convert it into a tree of nodes.
3798
+ def dump(base_object)
3799
+ if base_object.nil?
3800
+ NilNode.new(nil, nil)
3481
3801
  else
3482
- case object
3483
- when Psych::Omap
3484
- @object_nodes[object] = OmapNode.new(object.map { |(key, value)| HashNode.new({ dump(key) => dump(value) }, nil) }, comments)
3485
- when Psych::Set
3486
- @object_nodes[object] = SetNode.new(object.to_h { |key, value| [dump(key), dump(value)] }, comments)
3487
- when Array
3488
- @object_nodes[object] = ArrayNode.new(object.map { |element| dump(element) }, comments)
3489
- when Hash
3490
- dumped =
3491
- if base_object.is_a?(CommentsHash)
3492
- object.to_h { |key, value| [dump(key, base_object.psych_key_comments[key]), dump(value)] }
3802
+ object = base_object
3803
+ psych_node = nil
3804
+ dirty = false
3805
+
3806
+ if base_object.is_a?(LoadedObject)
3807
+ object = base_object.__getobj__
3808
+ psych_node = base_object.psych_node
3809
+ dirty = base_object.dirty
3810
+ elsif base_object.is_a?(LoadedHash)
3811
+ object = base_object.__getobj__
3812
+ psych_node = base_object.psych_node
3813
+ end
3814
+
3815
+ if @object_nodes.key?(object)
3816
+ @object_anchors[object] ||=
3817
+ if psych_node.is_a?(Nodes::Alias)
3818
+ psych_node.anchor
3493
3819
  else
3494
- object.to_h { |key, value| [dump(key), dump(value)] }
3820
+ @object_anchor += 1
3495
3821
  end
3496
3822
 
3497
- @object_nodes[object] = HashNode.new(dumped, comments)
3498
- when String
3499
- StringNode.new(object, comments)
3823
+ AliasNode.new(@object_nodes[object].anchor = @object_anchors[object], psych_node)
3500
3824
  else
3501
- ObjectNode.new(object, comments)
3825
+ case object
3826
+ when Psych::Omap
3827
+ @object_nodes[object] = OmapNode.new(object.map { |(key, value)| HashNode.new([dump(key), dump(value)], nil) }, psych_node)
3828
+ when Psych::Set
3829
+ @object_nodes[object] = SetNode.new(object.flat_map { |key, value| [dump(key), dump(value)] }, psych_node)
3830
+ when Array
3831
+ dumped = ArrayNode.new(object.map { |element| dump(element) }, psych_node)
3832
+ dumped.tag = dump_tag(psych_node&.tag)
3833
+
3834
+ @object_nodes[object] = dumped
3835
+ when Hash
3836
+ contents =
3837
+ if base_object.is_a?(LoadedHash)
3838
+ base_object.psych_keys.flat_map { |(key, value)| [dump(key), dump(value)] }
3839
+ else
3840
+ object.flat_map { |key, value| [dump(key), dump(value)] }
3841
+ end
3842
+
3843
+ dumped = HashNode.new(contents, psych_node)
3844
+ dumped.tag = dump_tag(psych_node&.tag)
3845
+
3846
+ @object_nodes[object] = dumped
3847
+ when String
3848
+ dumped = StringNode.new(object, psych_node)
3849
+ dumped.tag = dump_tag(psych_node&.tag)
3850
+ dumped.dirty = dirty
3851
+ dumped
3852
+ else
3853
+ dumped = ObjectNode.new(object, psych_node)
3854
+ dumped.tag = dump_tag(psych_node&.tag)
3855
+ dumped.dirty = dirty
3856
+ dumped
3857
+ end
3502
3858
  end
3503
3859
  end
3504
3860
  end
@@ -3538,10 +3894,10 @@ module Psych
3538
3894
  private
3539
3895
 
3540
3896
  # Dump the given object, ensuring that it is a permitted object.
3541
- def dump(base_object, comments = nil)
3897
+ def dump(base_object)
3542
3898
  object = base_object
3543
3899
 
3544
- if base_object.is_a?(CommentsObject) || base_object.is_a?(CommentsHash)
3900
+ if base_object.is_a?(LoadedObject) || base_object.is_a?(LoadedHash)
3545
3901
  object = base_object.__getobj__
3546
3902
  end
3547
3903
 
@@ -3677,7 +4033,7 @@ module Psych
3677
4033
 
3678
4034
  real_io = io || StringIO.new
3679
4035
  emitter = Emitter.new(real_io, options)
3680
- emitter << o
4036
+ emitter.emit(o)
3681
4037
  io || real_io.string
3682
4038
  end
3683
4039
 
@@ -3691,7 +4047,7 @@ module Psych
3691
4047
 
3692
4048
  real_io = io || StringIO.new
3693
4049
  emitter = SafeEmitter.new(real_io, options)
3694
- emitter << o
4050
+ emitter.emit(o)
3695
4051
  io || real_io.string
3696
4052
  end
3697
4053
 
@@ -3699,7 +4055,7 @@ module Psych
3699
4055
  def self.dump_stream(*objects)
3700
4056
  real_io = io || StringIO.new
3701
4057
  emitter = Emitter.new(real_io, {})
3702
- objects.each { |object| emitter << object }
4058
+ objects.each { |object| emitter.emit(object) }
3703
4059
  io || real_io.string
3704
4060
  end
3705
4061
  end
data/psych-pure.gemspec CHANGED
@@ -40,4 +40,5 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency "bundler"
41
41
  spec.add_development_dependency "minitest"
42
42
  spec.add_development_dependency "rake"
43
+ spec.add_development_dependency "simplecov"
43
44
  end
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: psych-pure
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Newton
8
+ autorequire:
8
9
  bindir: bin
9
10
  cert_chain: []
10
- date: 2025-02-13 00:00:00.000000000 Z
11
+ date: 2025-10-24 00:00:00.000000000 Z
11
12
  dependencies:
12
13
  - !ruby/object:Gem::Dependency
13
14
  name: psych
@@ -79,6 +80,21 @@ dependencies:
79
80
  - - ">="
80
81
  - !ruby/object:Gem::Version
81
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description:
82
98
  email:
83
99
  - kddnewton@gmail.com
84
100
  executables: []
@@ -97,9 +113,10 @@ licenses:
97
113
  - MIT
98
114
  metadata:
99
115
  bug_tracker_uri: https://github.com/kddnewton/psych-pure/issues
100
- changelog_uri: https://github.com/kddnewton/psych-pure/blob/v0.1.1/CHANGELOG.md
116
+ changelog_uri: https://github.com/kddnewton/psych-pure/blob/v0.1.3/CHANGELOG.md
101
117
  source_code_uri: https://github.com/kddnewton/psych-pure
102
118
  rubygems_mfa_required: 'true'
119
+ post_install_message:
103
120
  rdoc_options: []
104
121
  require_paths:
105
122
  - lib
@@ -114,7 +131,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
114
131
  - !ruby/object:Gem::Version
115
132
  version: '0'
116
133
  requirements: []
117
- rubygems_version: 3.6.2
134
+ rubygems_version: 3.5.16
135
+ signing_key:
118
136
  specification_version: 4
119
137
  summary: A YAML parser written in Ruby
120
138
  test_files: []