yaml-janitor 20260406.1 → 20260406.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: abcc9adc3f21f808aab705842175693a36722392d5da69757579bfaaef0c0f1a
4
- data.tar.gz: 31249485c56fda0ed92b0fed092b0b864fcb82cccd643e71a0ffbec382777cf2
3
+ metadata.gz: fb223767d0b5e017b8970002eb29b3ebc595d920d7dc09e9e872b1aa606eef5f
4
+ data.tar.gz: 214424d3a8d3544d23e00af3a4a5d666cbc641409681aabec7293082d9240a74
5
5
  SHA512:
6
- metadata.gz: 8d8fabe90ec71b5c5921078194ab313dfb458e7f84ca64ac58ac13720209c92008c443fda49093bc2bef934c8f7ef387b8215cf730776769995498413df7ee86
7
- data.tar.gz: d397efb992b5ab7c7569bf76c0b02aceb320c46a5f788ccf6898c2ddd23ca689c3cfb0bd6d19bb634787e3728f14ea84e31a53a235c8dc898bb01b8e714d8c54
6
+ metadata.gz: 9eb25e96054a887664b893c5a4395d81385bb931ee04dfe79b2438d5a7099bcad05d22637673e056556cb4392a515de45eb686c99ef8e083f18d8a3afa0548d1
7
+ data.tar.gz: 118d8f81448af12f8fc0a852ec55e52798cbae4935ae08c9a13ee9e21a42fce1dcadbd4ac3fc273e58f1010a286f53e99140b5048a8a366e1ecefd1cfb356255
@@ -5,44 +5,60 @@ module YamlJanitor
5
5
  # according to configuration rules. Unlike Psych::Pure.dump, we have
6
6
  # complete control over formatting choices.
7
7
  class Emitter
8
- def initialize(node, config)
8
+ def initialize(node, config, ast: nil)
9
9
  @node = node
10
10
  @config = config
11
11
  @output = []
12
+ @ast = ast
13
+ @ast_root = extract_ast_root(ast)
12
14
  end
13
15
 
14
16
  def emit
15
17
  # Emit any leading comments on the root document
16
18
  emit_comments(get_comments(@node, :leading), 0)
17
19
 
18
- emit_document(@node)
20
+ emit_document(@node, ast_node: @ast_root)
19
21
  @output.join("\n") + "\n"
20
22
  end
21
23
 
22
24
  private
23
25
 
24
- def emit_document(node, indent: 0)
26
+ def emit_document(node, indent: 0, ast_node: nil)
25
27
  case node
26
28
  when Psych::Pure::LoadedHash
27
- emit_mapping(node, indent)
29
+ emit_mapping(node, indent, ast_node: ast_node)
28
30
  when Hash
29
- emit_mapping(node, indent)
31
+ emit_mapping(node, indent, ast_node: ast_node)
30
32
  when Psych::Pure::LoadedObject
31
33
  # Check if it wraps an array
32
34
  inner = node.__getobj__
33
35
  if inner.is_a?(Array)
34
- emit_sequence(inner, indent, loaded_object: node)
36
+ emit_sequence(inner, indent, loaded_object: node, ast_node: ast_node)
35
37
  else
36
- emit_node(inner, indent)
38
+ emit_node(inner, indent, ast_node: ast_node)
37
39
  end
38
40
  when Array
39
- emit_sequence(node, indent)
41
+ emit_sequence(node, indent, ast_node: ast_node)
40
42
  else
41
43
  emit_scalar(node, indent)
42
44
  end
43
45
  end
44
46
 
45
- def emit_mapping(hash, indent)
47
+ def emit_mapping(hash, indent, ast_node: nil)
48
+ ast_pairs = ast_mapping_pairs(ast_node)
49
+
50
+ # If the AST has anchors or aliases, use AST-driven emission
51
+ # because the loaded hash may have already expanded the aliases
52
+ has_anchors_or_aliases = ast_pairs.any? do |_, v|
53
+ v.is_a?(Psych::Nodes::Alias) ||
54
+ (v.respond_to?(:anchor) && v.anchor) ||
55
+ (v.is_a?(Psych::Nodes::Mapping) && v.children.each_slice(2).any? { |_, cv| cv.is_a?(Psych::Nodes::Alias) })
56
+ end
57
+ if has_anchors_or_aliases
58
+ emit_mapping_from_ast(ast_pairs, indent)
59
+ return
60
+ end
61
+
46
62
  # Use psych_keys if available (LoadedHash), otherwise fall back to regular iteration
47
63
  entries = if hash.respond_to?(:psych_keys)
48
64
  hash.psych_keys.map { |pk| [pk.key_node, pk.value_node] }
@@ -51,6 +67,8 @@ module YamlJanitor
51
67
  end
52
68
 
53
69
  entries.each_with_index do |(key, value), index|
70
+ _ast_key, ast_value = ast_pairs[index] || [nil, nil]
71
+
54
72
  # Add blank line between top-level keys if configured
55
73
  actual_value = value.is_a?(Psych::Pure::LoadedObject) ? value.__getobj__ : value
56
74
  @output << "" if index > 0 && indent == 0 && should_add_blank_line?(actual_value)
@@ -61,13 +79,26 @@ module YamlJanitor
61
79
  # Emit the key-value pair
62
80
  key_str = scalar_to_string(key.is_a?(Psych::Pure::LoadedObject) ? key.__getobj__ : key)
63
81
 
82
+ # Check if this value is an alias in the AST
83
+ if ast_value&.is_a?(Psych::Nodes::Alias)
84
+ @output << "#{' ' * indent}#{key_str}: *#{ast_value.anchor}"
85
+ emit_comments(get_comments(key, :trailing), indent)
86
+ next
87
+ end
88
+
89
+ # Check if the value has an anchor
90
+ anchor_suffix = ""
91
+ if ast_value&.respond_to?(:anchor) && ast_value.anchor
92
+ anchor_suffix = " &#{ast_value.anchor}"
93
+ end
94
+
64
95
  # Unwrap LoadedObject to check the actual type
65
96
  actual_value = value.is_a?(Psych::Pure::LoadedObject) ? value.__getobj__ : value
66
97
 
67
98
  case actual_value
68
99
  when Hash, Psych::Pure::LoadedHash, Array
69
100
  # Complex value - put on next line
70
- line = "#{' ' * indent}#{key_str}:"
101
+ line = "#{' ' * indent}#{key_str}:#{anchor_suffix}"
71
102
 
72
103
  # Check for inline comment on the value
73
104
  if (trailing = get_comments(value, :trailing))
@@ -79,14 +110,14 @@ module YamlJanitor
79
110
  end
80
111
 
81
112
  @output << line
82
- emit_node(value, indent + indentation)
113
+ emit_node(value, indent + indentation, ast_node: ast_value)
83
114
 
84
115
  # Emit any non-inline trailing comments
85
116
  emit_comments(trailing, indent) if trailing&.any?
86
117
  else
87
118
  # Simple value - same line
88
119
  value_str = scalar_to_string(actual_value)
89
- line = "#{' ' * indent}#{key_str}: #{value_str}"
120
+ line = "#{' ' * indent}#{key_str}:#{anchor_suffix} #{value_str}"
90
121
 
91
122
  # Check for inline comment on the value
92
123
  if (trailing = get_comments(value, :trailing))
@@ -102,7 +133,7 @@ module YamlJanitor
102
133
  end
103
134
  end
104
135
 
105
- def emit_sequence(array, indent, loaded_object: nil)
136
+ def emit_sequence(array, indent, loaded_object: nil, ast_node: nil)
106
137
  array.each_with_index do |item, index|
107
138
  # Emit any leading comments (check both the item and the LoadedObject wrapper)
108
139
  comments = get_comments(item, :leading) || (loaded_object ? get_comments(loaded_object, :leading) : nil)
@@ -188,14 +219,14 @@ module YamlJanitor
188
219
  end
189
220
  end
190
221
 
191
- def emit_node(node, indent)
222
+ def emit_node(node, indent, ast_node: nil)
192
223
  case node
193
224
  when Psych::Pure::LoadedHash, Hash
194
- emit_mapping(node, indent)
225
+ emit_mapping(node, indent, ast_node: ast_node)
195
226
  when Psych::Pure::LoadedObject
196
- emit_node(node.__getobj__, indent)
227
+ emit_node(node.__getobj__, indent, ast_node: ast_node)
197
228
  when Array
198
- emit_sequence(node, indent)
229
+ emit_sequence(node, indent, ast_node: ast_node)
199
230
  else
200
231
  @output << "#{' ' * indent}#{scalar_to_string(node)}"
201
232
  end
@@ -282,8 +313,125 @@ module YamlJanitor
282
313
  value.is_a?(Hash) || value.is_a?(Array)
283
314
  end
284
315
 
316
+ # Emit a mapping by walking the AST directly.
317
+ # Used when the mapping contains aliases that got expanded in the loaded data.
318
+ def emit_mapping_from_ast(ast_pairs, indent)
319
+ ast_pairs.each_with_index do |(ast_key, ast_value), index|
320
+ key_str = ast_key.value rescue ast_key.to_s
321
+
322
+ if ast_value.is_a?(Psych::Nodes::Alias)
323
+ # Emit alias reference
324
+ @output << "#{' ' * indent}#{key_str}: *#{ast_value.anchor}"
325
+ elsif ast_value.is_a?(Psych::Nodes::Mapping)
326
+ anchor_suffix = ast_value.anchor ? " &#{ast_value.anchor}" : ""
327
+ @output << "#{' ' * indent}#{key_str}:#{anchor_suffix}"
328
+ # Recurse into the mapping's AST children
329
+ child_pairs = ast_value.children.each_slice(2).to_a
330
+ if child_pairs.any? { |_, v| v.is_a?(Psych::Nodes::Alias) }
331
+ emit_mapping_from_ast(child_pairs, indent + indentation)
332
+ else
333
+ child_pairs.each do |ck, cv|
334
+ ck_str = ck.value rescue ck.to_s
335
+ if cv.is_a?(Psych::Nodes::Scalar)
336
+ @output << "#{' ' * (indent + indentation)}#{ck_str}: #{format_ast_scalar(cv)}"
337
+ elsif cv.is_a?(Psych::Nodes::Alias)
338
+ @output << "#{' ' * (indent + indentation)}#{ck_str}: *#{cv.anchor}"
339
+ elsif cv.is_a?(Psych::Nodes::Mapping)
340
+ anchor_suffix = cv.anchor ? " &#{cv.anchor}" : ""
341
+ @output << "#{' ' * (indent + indentation)}#{ck_str}:#{anchor_suffix}"
342
+ emit_mapping_from_ast(cv.children.each_slice(2).to_a, indent + indentation * 2)
343
+ elsif cv.is_a?(Psych::Nodes::Sequence)
344
+ anchor_suffix = cv.anchor ? " &#{cv.anchor}" : ""
345
+ @output << "#{' ' * (indent + indentation)}#{ck_str}:#{anchor_suffix}"
346
+ emit_sequence_from_ast(cv.children, indent + indentation * 2)
347
+ end
348
+ end
349
+ end
350
+ elsif ast_value.is_a?(Psych::Nodes::Scalar)
351
+ anchor_suffix = ast_value.anchor ? " &#{ast_value.anchor}" : ""
352
+ @output << "#{' ' * indent}#{key_str}:#{anchor_suffix} #{format_ast_scalar(ast_value)}"
353
+ elsif ast_value.is_a?(Psych::Nodes::Sequence)
354
+ anchor_suffix = ast_value.anchor ? " &#{ast_value.anchor}" : ""
355
+ @output << "#{' ' * indent}#{key_str}:#{anchor_suffix}"
356
+ emit_sequence_from_ast(ast_value.children, indent + indentation)
357
+ end
358
+ end
359
+ end
360
+
361
+ # Emit a sequence from AST nodes
362
+ def emit_sequence_from_ast(children, indent)
363
+ children.each do |child|
364
+ if child.is_a?(Psych::Nodes::Scalar)
365
+ @output << "#{' ' * indent}- #{format_ast_scalar(child)}"
366
+ elsif child.is_a?(Psych::Nodes::Alias)
367
+ @output << "#{' ' * indent}- *#{child.anchor}"
368
+ elsif child.is_a?(Psych::Nodes::Mapping)
369
+ pairs = child.children.each_slice(2).to_a
370
+ pairs.each_with_index do |(k, v), i|
371
+ prefix = i == 0 ? "#{' ' * indent}- " : "#{' ' * (indent + indentation)}"
372
+ k_str = k.value rescue k.to_s
373
+ if v.is_a?(Psych::Nodes::Scalar)
374
+ @output << "#{prefix}#{k_str}: #{format_ast_scalar(v)}"
375
+ elsif v.is_a?(Psych::Nodes::Alias)
376
+ @output << "#{prefix}#{k_str}: *#{v.anchor}"
377
+ end
378
+ end
379
+ end
380
+ end
381
+ end
382
+
383
+ # Format an AST scalar value, preserving its original type.
384
+ # The AST node knows its tag (int, bool, null, etc.) so we
385
+ # can emit without spurious quoting.
386
+ def format_ast_scalar(node)
387
+ return "null" if node.tag == "tag:yaml.org,2002:null" || node.value.nil?
388
+
389
+ case node.tag
390
+ when "tag:yaml.org,2002:int", "tag:yaml.org,2002:float"
391
+ node.value
392
+ when "tag:yaml.org,2002:bool"
393
+ node.value
394
+ else
395
+ # For plain scalars, use the raw value if it was unquoted in the original
396
+ if node.plain
397
+ node.value
398
+ else
399
+ scalar_to_string(node.value)
400
+ end
401
+ end
402
+ end
403
+
285
404
  def indentation
286
405
  @config.indentation
287
406
  end
407
+
408
+ # Extract the root mapping node from a parsed AST
409
+ def extract_ast_root(ast)
410
+ return nil unless ast
411
+ return nil unless ast.respond_to?(:children)
412
+
413
+ case ast
414
+ when Psych::Nodes::Document
415
+ # Document -> Mapping
416
+ ast.children&.each do |child|
417
+ return child if child.is_a?(Psych::Nodes::Mapping)
418
+ end
419
+ when Psych::Nodes::Stream
420
+ # Stream -> Document -> Mapping
421
+ ast.children&.each do |doc|
422
+ next unless doc.is_a?(Psych::Nodes::Document)
423
+ doc.children&.each do |child|
424
+ return child if child.is_a?(Psych::Nodes::Mapping)
425
+ end
426
+ end
427
+ end
428
+ nil
429
+ end
430
+
431
+ # Get key/value AST node pairs from a mapping node
432
+ def ast_mapping_pairs(ast_node)
433
+ return [] unless ast_node.is_a?(Psych::Nodes::Mapping)
434
+ ast_node.children.each_slice(2).to_a
435
+ end
288
436
  end
289
437
  end
@@ -22,11 +22,14 @@ module YamlJanitor
22
22
  def lint(yaml_content, fix: false, file: nil)
23
23
  violations = []
24
24
 
25
+ # Parse AST for anchor/alias tracking
26
+ ast = Psych::Pure.parse(yaml_content)
27
+
25
28
  # Load with comments
26
29
  loaded = Psych::Pure.load(yaml_content, comments: true, permitted_classes: [Date, Time, DateTime, Symbol, Regexp], aliases: true)
27
30
 
28
- # Format using our custom emitter
29
- formatted = Emitter.new(loaded, @config).emit
31
+ # Format using our custom emitter (pass AST for anchor/alias preservation)
32
+ formatted = Emitter.new(loaded, @config, ast: ast).emit
30
33
 
31
34
  # Check if formatting would change the file
32
35
  if yaml_content != formatted
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module YamlJanitor
4
- VERSION = "20260406.1"
4
+ VERSION = "20260406.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yaml-janitor
3
3
  version: !ruby/object:Gem::Version
4
- version: '20260406.1'
4
+ version: '20260406.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - ducks