canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -0,0 +1,353 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Canon
6
+ module Comparison
7
+ # Single factory for DiffNode creation in the DOM comparison path.
8
+ #
9
+ # Centralises reason building, metadata enrichment (path, serialization,
10
+ # attributes), and whitespace visualization — previously duplicated
11
+ # across MarkupComparator and XmlComparator.
12
+ class DiffNodeBuilder
13
+ # Build an enriched DiffNode.
14
+ def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
15
+ raise ArgumentError, "dimension required for DiffNode" if dimension.nil?
16
+
17
+ reason = build_reason(node1, node2, diff1, diff2, dimension)
18
+ metadata = enrich_metadata(node1, node2)
19
+
20
+ Canon::Diff::DiffNode.new(
21
+ node1: node1,
22
+ node2: node2,
23
+ dimension: dimension,
24
+ reason: reason,
25
+ **metadata,
26
+ )
27
+ end
28
+
29
+ # --- Reason building ---------------------------------------------------
30
+
31
+ def self.build_reason(node1, node2, diff1, diff2, dimension)
32
+ # Nil-node text content with namespace info
33
+ if dimension == :text_content && (node1.nil? || node2.nil?)
34
+ node = node1 || node2
35
+ if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
36
+ ns = Canon::XmlParsing.namespace_uri(node)
37
+ ns_info = ns.nil? || ns.empty? ? "" : " (namespace: #{ns})"
38
+ label = Canon::Comparison.code_pair_label(diff1, diff2)
39
+ return "element '#{node.name}'#{ns_info}: #{label}"
40
+ end
41
+ end
42
+
43
+ case dimension
44
+ when :attribute_presence
45
+ build_attribute_difference_reason(
46
+ extract_attributes(node1), extract_attributes(node2)
47
+ )
48
+ when :attribute_values
49
+ build_attribute_values_reason(node1, node2)
50
+ when :text_content
51
+ build_text_difference_reason(
52
+ extract_text_content(node1), extract_text_content(node2)
53
+ )
54
+ when :attribute_order
55
+ build_attribute_order_reason(node1, node2)
56
+ when :comments
57
+ build_comment_difference_reason(node1,
58
+ node2) || fallback_reason(diff1,
59
+ diff2, dimension, node1, node2)
60
+ when :whitespace_adjacency
61
+ build_whitespace_adjacency_reason(node1, node2)
62
+ else
63
+ fallback_reason(diff1, diff2, dimension, node1, node2)
64
+ end
65
+ end
66
+
67
+ # --- Metadata enrichment -----------------------------------------------
68
+
69
+ def self.enrich_metadata(node1, node2)
70
+ {
71
+ path: Canon::Diff::PathBuilder.build(node1 || node2,
72
+ format: :document),
73
+ serialized_before: serialize(node1),
74
+ serialized_after: serialize(node2),
75
+ attributes_before: extract_attributes(node1),
76
+ attributes_after: extract_attributes(node2),
77
+ }
78
+ end
79
+
80
+ # --- Node queries (delegate to NodeSerializer) -------------------------
81
+
82
+ def self.serialize(node)
83
+ return nil if node.nil?
84
+
85
+ Canon::Diff::NodeSerializer.serialize(node)
86
+ end
87
+
88
+ def self.extract_attributes(node)
89
+ return nil if node.nil?
90
+
91
+ Canon::Diff::NodeSerializer.extract_attributes(node)
92
+ end
93
+
94
+ # --- Attribute reason builders -----------------------------------------
95
+
96
+ def self.build_attribute_difference_reason(attrs1, attrs2)
97
+ unless attrs1 && attrs2
98
+ return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes"
99
+ end
100
+
101
+ keys1 = attrs1.keys.to_set
102
+ keys2 = attrs2.keys.to_set
103
+
104
+ only_in_first = keys1 - keys2
105
+ only_in_second = keys2 - keys1
106
+ different_values = (keys1 & keys2).reject { |k| attrs1[k] == attrs2[k] }
107
+
108
+ parts = []
109
+ parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
110
+ parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
111
+ parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
112
+
113
+ parts.empty? ? "#{keys1.size} vs #{keys2.size} attributes (same names)" : parts.join("; ")
114
+ end
115
+
116
+ def self.build_attribute_values_reason(node1, node2)
117
+ attrs1 = extract_attributes(node1) || {}
118
+ attrs2 = extract_attributes(node2) || {}
119
+
120
+ differing = (attrs1.keys | attrs2.keys).sort.reject do |k|
121
+ attrs1[k.to_s] == attrs2[k.to_s]
122
+ end
123
+
124
+ changed = differing.map do |k|
125
+ "Changed: #{k}=\"#{attrs1[k.to_s]}\" → \"#{attrs2[k.to_s]}\""
126
+ end
127
+
128
+ changed.empty? ? "attributes differ" : "Attributes differ (#{changed.join('; ')})"
129
+ end
130
+
131
+ def self.build_attribute_order_reason(node1, node2)
132
+ keys1 = extract_attributes(node1)&.keys || []
133
+ keys2 = extract_attributes(node2)&.keys || []
134
+ "Attribute order changed: [#{keys1.join(', ')}] → [#{keys2.join(', ')}]"
135
+ end
136
+
137
+ # --- Text content extraction -------------------------------------------
138
+
139
+ def self.extract_text_content(node)
140
+ return nil if node.nil?
141
+
142
+ case node
143
+ when Canon::Xml::Nodes::TextNode
144
+ node.value
145
+ when Canon::Xml::Node
146
+ node.text_content
147
+ else
148
+ if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
149
+ node.content.to_s
150
+ elsif Canon::XmlParsing.xml_node?(node)
151
+ Canon::XmlParsing.text_content(node)
152
+ else
153
+ node.to_s
154
+ end
155
+ end
156
+ rescue StandardError
157
+ nil
158
+ end
159
+
160
+ # --- Text diff reason --------------------------------------------------
161
+
162
+ def self.build_text_difference_reason(text1, text2)
163
+ return "missing vs '#{truncate(text2)}'" if text1.nil? && text2
164
+ return "'#{truncate(text1)}' vs missing" if text1 && text2.nil?
165
+ return "both missing" if text1.nil? && text2.nil?
166
+
167
+ if whitespace_only?(text1) && whitespace_only?(text2)
168
+ return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
169
+ end
170
+
171
+ "Text: \"#{visualize_whitespace(text1)}\" vs \"#{visualize_whitespace(text2)}\""
172
+ end
173
+
174
+ # --- Comment reason ----------------------------------------------------
175
+
176
+ def self.build_comment_difference_reason(node1, node2)
177
+ cm1 = node1 && NodeInspector.comment_node?(node1)
178
+ cm2 = node2 && NodeInspector.comment_node?(node2)
179
+
180
+ return nil unless cm1 || cm2
181
+
182
+ if cm1 && !cm2
183
+ "Comment present on EXPECTED only: <!--#{truncate(comment_text(node1))}-->"
184
+ elsif cm2 && !cm1
185
+ "Comment present on ACTUAL only: <!--#{truncate(comment_text(node2))}-->"
186
+ else
187
+ t1 = truncate(comment_text(node1))
188
+ t2 = truncate(comment_text(node2))
189
+ "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
190
+ end
191
+ end
192
+
193
+ def self.comment_text(node)
194
+ NodeInspector.text_content(node).to_s
195
+ end
196
+
197
+ # --- Whitespace adjacency reason (#137) --------------------------------
198
+
199
+ def self.build_whitespace_adjacency_reason(node1, node2)
200
+ text1 = extract_text_content(node1)
201
+ text2 = extract_text_content(node2)
202
+
203
+ ws_on_first = NodeInspector.whitespace_only_text?(node1) &&
204
+ !NodeInspector.whitespace_only_text?(node2)
205
+ ws_on_second = NodeInspector.whitespace_only_text?(node2) &&
206
+ !NodeInspector.whitespace_only_text?(node1)
207
+
208
+ unless ws_on_first || ws_on_second
209
+ return build_text_difference_reason(text1, text2)
210
+ end
211
+
212
+ if ws_on_first
213
+ build_adjacency_side(text1, text2, node1, "EXPECTED", "ACTUAL")
214
+ else
215
+ build_adjacency_side(text2, text1, node2, "ACTUAL", "EXPECTED")
216
+ end
217
+ end
218
+
219
+ # --- Whitespace visualization ------------------------------------------
220
+
221
+ def self.visualize_whitespace(text)
222
+ return "" if text.nil?
223
+
224
+ viz_map = character_visualization_map
225
+ text.chars.map { |char| viz_map[char] || char }.join
226
+ end
227
+
228
+ def self.describe_whitespace(text)
229
+ return "0 chars" if text.nil? || text.empty?
230
+
231
+ char_count = text.length
232
+ parts = []
233
+ parts << "#{text.count("\n")} newlines" if text.include?("\n")
234
+ parts << "#{text.count(' ')} spaces" if text.include?(" ")
235
+ parts << "#{text.count("\t")} tabs" if text.include?("\t")
236
+
237
+ "#{char_count} chars (#{parts.join(', ')})"
238
+ end
239
+
240
+ def self.whitespace_only?(text)
241
+ return false if text.nil?
242
+
243
+ text.to_s.strip.empty?
244
+ end
245
+
246
+ def self.truncate(text, max_length = 40)
247
+ return "" if text.nil?
248
+
249
+ text = text.to_s
250
+ return text if text.length <= max_length
251
+
252
+ "#{text[0...max_length]}..."
253
+ end
254
+
255
+ # --- Private helpers ---------------------------------------------------
256
+
257
+ # Default reason when no dimension-specific handler matched.
258
+ def self.fallback_reason(diff1, diff2, dimension, node1, node2)
259
+ if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
260
+ "element structure mismatch (children differ)"
261
+ elsif dimension == :element_structure &&
262
+ diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
263
+ diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
264
+ (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
265
+ (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
266
+ node1.name && node2.name && node1.name != node2.name
267
+ "different element name (<#{node1.name}> vs <#{node2.name}>)"
268
+ else
269
+ Canon::Comparison.code_pair_label(diff1, diff2)
270
+ end
271
+ end
272
+ private_class_method :fallback_reason
273
+
274
+ # Build one side of a whitespace-adjacency reason.
275
+ def self.build_adjacency_side(ws_text, content_text, ws_node,
276
+ present_side, absent_side)
277
+ ws_vis = visualize_whitespace(ws_text)
278
+
279
+ if content_text.nil? || content_text.strip.empty?
280
+ parent_label = whitespace_adjacency_parent_label(ws_node)
281
+ "Whitespace inside #{parent_label}: " \
282
+ "present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
283
+ else
284
+ direction = whitespace_partner_direction(ws_node)
285
+ content_vis = visualize_whitespace(truncate(content_text))
286
+ "Whitespace #{direction} \"#{content_vis}\": " \
287
+ "present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
288
+ end
289
+ end
290
+ private_class_method :build_adjacency_side
291
+
292
+ def self.whitespace_adjacency_parent_label(ws_node)
293
+ parent = NodeInspector.parent(ws_node)
294
+ return "(unknown parent)" unless parent
295
+
296
+ name = parent.name
297
+ name && !name.empty? ? "<#{name}>" : "(unknown parent)"
298
+ end
299
+ private_class_method :whitespace_adjacency_parent_label
300
+
301
+ # Direction of the partner content relative to the whitespace node.
302
+ def self.whitespace_partner_direction(ws_node)
303
+ parent = NodeInspector.parent(ws_node)
304
+ return "adjacent to" unless parent
305
+
306
+ siblings = parent.children
307
+ idx = siblings.index(ws_node)
308
+ return "adjacent to" unless idx
309
+
310
+ if non_ws_sibling_exists?(siblings, idx, 1) then "before"
311
+ elsif non_ws_sibling_exists?(siblings, idx, -1) then "after"
312
+ else "adjacent to"
313
+ end
314
+ end
315
+ private_class_method :whitespace_partner_direction
316
+
317
+ def self.non_ws_sibling_exists?(siblings, idx, direction)
318
+ i = idx + direction
319
+ while i >= 0 && i < siblings.length
320
+ s = siblings[i]
321
+ is_ws_text = NodeInspector.text_node?(s) &&
322
+ NodeInspector.text_content(s).strip.empty?
323
+ return true unless is_ws_text
324
+
325
+ i += direction
326
+ end
327
+ false
328
+ end
329
+ private_class_method :non_ws_sibling_exists?
330
+
331
+ # Lazy-loaded character visualization map from YAML.
332
+ def self.character_visualization_map
333
+ @character_visualization_map ||= begin
334
+ require "yaml"
335
+ lib_root = File.expand_path("../..", __dir__)
336
+ yaml_path = File.join(lib_root,
337
+ "canon/diff_formatter/character_map.yml")
338
+ data = YAML.load_file(yaml_path)
339
+
340
+ data["characters"].each_with_object({}) do |char_data, map|
341
+ char = if char_data["unicode"]
342
+ [char_data["unicode"].to_i(16)].pack("U")
343
+ else
344
+ char_data["character"]
345
+ end
346
+ map[char] = char_data["visualization"]
347
+ end
348
+ end
349
+ end
350
+ private_class_method :character_visualization_map
351
+ end
352
+ end
353
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module Dimensions
6
+ # Immutable value object representing a single comparison dimension.
7
+ #
8
+ # A dimension is an aspect of a document that can be compared with
9
+ # different behaviors (e.g., :strict, :normalize, :ignore). Each
10
+ # dimension knows its own classification rules — whether a difference
11
+ # is normative (affects equivalence) for a given behavior, and whether
12
+ # formatting detection should apply.
13
+ class Dimension
14
+ attr_reader :name, :valid_behaviors
15
+
16
+ # @param name [Symbol] Dimension identifier (e.g., :text_content)
17
+ # @param valid_behaviors [Array<Symbol>] Allowed behaviors
18
+ # @param normative_rule [Symbol] :behavior_not_ignore or :strict_only
19
+ # @param formatting_detection [Boolean] Whether FormattingDetector applies
20
+ def initialize(name:, valid_behaviors:, normative_rule: :behavior_not_ignore,
21
+ formatting_detection: false)
22
+ @name = name
23
+ @valid_behaviors = valid_behaviors.freeze
24
+ @normative_rule = normative_rule
25
+ @formatting_detection = formatting_detection
26
+ freeze
27
+ end
28
+
29
+ # Whether a difference in this dimension with the given behavior is
30
+ # normative (affects equivalence).
31
+ def normative?(behavior)
32
+ case @normative_rule
33
+ when :strict_only then behavior == :strict
34
+ else behavior != :ignore
35
+ end
36
+ end
37
+
38
+ # Whether the given behavior is valid for this dimension.
39
+ def valid_behavior?(behavior)
40
+ @valid_behaviors.include?(behavior)
41
+ end
42
+
43
+ # Whether formatting detection should apply to differences in this
44
+ # dimension.
45
+ def supports_formatting_detection?
46
+ @formatting_detection
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module Dimensions
6
+ # Immutable collection of dimensions for a specific format.
7
+ #
8
+ # Each format (XML, JSON, YAML) has its own DimensionSet listing the
9
+ # comparison aspects relevant to that format. Provides lookup by name,
10
+ # enumeration, and existence checks.
11
+ class DimensionSet
12
+ attr_reader :format
13
+
14
+ # @param format [Symbol] Format identifier (e.g., :xml, :json, :yaml)
15
+ # @param dimensions [Array<Dimension>] Dimensions for this format
16
+ def initialize(format, dimensions)
17
+ @format = format
18
+ @dimensions = dimensions.to_h do |dim|
19
+ [dim.name, dim]
20
+ end.freeze
21
+ freeze
22
+ end
23
+
24
+ # Lookup a dimension by name.
25
+ #
26
+ # @param name [Symbol]
27
+ # @return [Dimension, nil]
28
+ def [](name)
29
+ @dimensions[name]
30
+ end
31
+
32
+ # All dimension names for this format, in definition order.
33
+ #
34
+ # @return [Array<Symbol>]
35
+ def names
36
+ @dimensions.keys
37
+ end
38
+
39
+ # Whether this format has a dimension with the given name.
40
+ #
41
+ # @param name [Symbol]
42
+ # @return [Boolean]
43
+ def dimension?(name)
44
+ @dimensions.key?(name)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,75 +1,116 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "base_dimension"
4
- require_relative "text_content_dimension"
5
- require_relative "comments_dimension"
6
- require_relative "attribute_values_dimension"
7
- require_relative "attribute_presence_dimension"
8
- require_relative "attribute_order_dimension"
9
- require_relative "element_position_dimension"
10
- require_relative "structural_whitespace_dimension"
11
-
12
3
  module Canon
13
4
  module Comparison
14
5
  module Dimensions
15
- # Registry for comparison dimensions
6
+ # Pre-built dimension sets with format lookup.
16
7
  #
17
- # Provides a central access point for all dimension classes
18
- # and maps dimension symbols to their implementations.
8
+ # XML/HTML share 7 dimensions. JSON has 3. YAML has 4.
9
+ # Format aliases (html, html4, html5) resolve to the XML set.
19
10
  module Registry
20
- # Dimension class mappings
21
- DIMENSION_CLASSES = {
22
- text_content: TextContentDimension,
23
- comments: CommentsDimension,
24
- attribute_values: AttributeValuesDimension,
25
- attribute_presence: AttributePresenceDimension,
26
- attribute_order: AttributeOrderDimension,
27
- element_position: ElementPositionDimension,
28
- structural_whitespace: StructuralWhitespaceDimension,
29
- }.freeze
30
-
31
- # Get a dimension instance by name
32
- #
33
- # @param dimension_name [Symbol] Dimension name
34
- # @return [BaseDimension] Dimension instance
35
- # @raise [Canon::Error] if dimension is unknown
36
- def self.get(dimension_name)
37
- dimension_class = DIMENSION_CLASSES[dimension_name]
11
+ SETS = {
12
+ xml: DimensionSet.new(:xml, [
13
+ Dimension.new(
14
+ name: :text_content,
15
+ valid_behaviors: %i[strict normalize
16
+ ignore],
17
+ formatting_detection: true,
18
+ ),
19
+ Dimension.new(
20
+ name: :structural_whitespace,
21
+ valid_behaviors: %i[strict normalize
22
+ ignore],
23
+ normative_rule: :strict_only,
24
+ formatting_detection: true,
25
+ ),
26
+ Dimension.new(
27
+ name: :attribute_presence,
28
+ valid_behaviors: %i[strict ignore],
29
+ ),
30
+ Dimension.new(
31
+ name: :attribute_order,
32
+ valid_behaviors: %i[strict ignore],
33
+ ),
34
+ Dimension.new(
35
+ name: :attribute_values,
36
+ valid_behaviors: %i[strict strip compact
37
+ normalize ignore],
38
+ ),
39
+ Dimension.new(
40
+ name: :element_position,
41
+ valid_behaviors: %i[strict ignore],
42
+ ),
43
+ Dimension.new(
44
+ name: :comments,
45
+ valid_behaviors: %i[strict ignore],
46
+ ),
47
+ ]),
38
48
 
39
- unless dimension_class
40
- raise Canon::Error,
41
- "Unknown dimension: #{dimension_name}. " \
42
- "Valid dimensions: #{DIMENSION_CLASSES.keys.join(', ')}"
43
- end
49
+ json: DimensionSet.new(:json, [
50
+ Dimension.new(
51
+ name: :text_content,
52
+ valid_behaviors: %i[strict normalize
53
+ ignore],
54
+ ),
55
+ Dimension.new(
56
+ name: :structural_whitespace,
57
+ valid_behaviors: %i[strict normalize
58
+ ignore],
59
+ normative_rule: :strict_only,
60
+ ),
61
+ Dimension.new(
62
+ name: :key_order,
63
+ valid_behaviors: %i[strict ignore],
64
+ ),
65
+ ]),
44
66
 
45
- dimension_class.new
46
- end
67
+ yaml: DimensionSet.new(:yaml, [
68
+ Dimension.new(
69
+ name: :text_content,
70
+ valid_behaviors: %i[strict normalize
71
+ ignore],
72
+ ),
73
+ Dimension.new(
74
+ name: :structural_whitespace,
75
+ valid_behaviors: %i[strict normalize
76
+ ignore],
77
+ normative_rule: :strict_only,
78
+ ),
79
+ Dimension.new(
80
+ name: :key_order,
81
+ valid_behaviors: %i[strict ignore],
82
+ ),
83
+ Dimension.new(
84
+ name: :comments,
85
+ valid_behaviors: %i[strict ignore],
86
+ ),
87
+ ]),
88
+ }.freeze
47
89
 
48
- # Get all available dimension names
49
- #
50
- # @return [Array<Symbol>] Available dimension names
51
- def self.available_dimensions
52
- DIMENSION_CLASSES.keys
53
- end
90
+ FORMAT_ALIASES = {
91
+ html: :xml,
92
+ html4: :xml,
93
+ html5: :xml,
94
+ }.freeze
54
95
 
55
- # Check if a dimension is available
56
- #
57
- # @param dimension_name [Symbol] Dimension name
58
- # @return [Boolean] true if dimension is available
59
- def self.dimension_exists?(dimension_name)
60
- DIMENSION_CLASSES.key?(dimension_name)
61
- end
96
+ class << self
97
+ # Look up the DimensionSet for a format.
98
+ # Format aliases (html, html4, html5) resolve to the :xml set.
99
+ # Unknown formats fall back to :xml.
100
+ #
101
+ # @param format [Symbol]
102
+ # @return [DimensionSet]
103
+ def for(format)
104
+ key = FORMAT_ALIASES[format] || format
105
+ SETS[key] || SETS[:xml]
106
+ end
62
107
 
63
- # Compare two nodes for a specific dimension
64
- #
65
- # @param dimension_name [Symbol] Dimension name
66
- # @param node1 [Object] First node
67
- # @param node2 [Object] Second node
68
- # @param behavior [Symbol] Comparison behavior
69
- # @return [Boolean] true if nodes match for this dimension
70
- def self.compare(dimension_name, node1, node2, behavior) # rubocop:disable Naming/PredicateMethod
71
- dimension = get(dimension_name)
72
- dimension.equivalent?(node1, node2, behavior)
108
+ # All format names with explicit sets (excluding aliases).
109
+ #
110
+ # @return [Array<Symbol>]
111
+ def format_names
112
+ SETS.keys
113
+ end
73
114
  end
74
115
  end
75
116
  end