canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -1,300 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "set"
4
- require_relative "../../diff/diff_node"
5
- require_relative "../../diff/path_builder"
6
- require_relative "../../diff/node_serializer"
7
- require_relative "../node_inspector"
8
-
9
- module Canon
10
- module Comparison
11
- # Builder for creating enriched DiffNode objects
12
- # Handles path building, serialization, and attribute extraction
13
- class DiffNodeBuilder
14
- # Build an enriched DiffNode
15
- #
16
- # @param node1 [Object, nil] First node
17
- # @param node2 [Object, nil] Second node
18
- # @param diff1 [String] Difference type for node1
19
- # @param diff2 [String] Difference type for node2
20
- # @param dimension [Symbol] The match dimension causing this difference
21
- # @return [DiffNode, nil] Enriched DiffNode or nil if dimension is nil
22
- def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
23
- # Validate dimension is required
24
- if dimension.nil?
25
- raise ArgumentError,
26
- "dimension required for DiffNode"
27
- end
28
-
29
- # Build informative reason message
30
- reason = build_reason(node1, node2, diff1, diff2, dimension)
31
-
32
- # Enrich with path, serialized content, and attributes for Stage 4 rendering
33
- metadata = enrich_metadata(node1, node2)
34
-
35
- Canon::Diff::DiffNode.new(
36
- node1: node1,
37
- node2: node2,
38
- dimension: dimension,
39
- reason: reason,
40
- **metadata,
41
- )
42
- end
43
-
44
- # Build a human-readable reason for a difference
45
- #
46
- # @param node1 [Object] First node
47
- # @param node2 [Object] Second node
48
- # @param diff1 [String] Difference type for node1
49
- # @param diff2 [String] Difference type for node2
50
- # @param dimension [Symbol] The dimension of the difference
51
- # @return [String] Human-readable reason
52
- def self.build_reason(node1, node2, diff1, diff2, dimension)
53
- # For deleted/inserted nodes, include namespace information if available
54
- if dimension == :text_content && (node1.nil? || node2.nil?)
55
- node = node1 || node2
56
- if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
57
- ns = node.namespace_uri
58
- ns_info = if ns.nil? || ns.empty?
59
- ""
60
- else
61
- " (namespace: #{ns})"
62
- end
63
- label = Canon::Comparison.code_pair_label(diff1, diff2)
64
- return "element '#{node.name}'#{ns_info}: #{label}"
65
- end
66
- end
67
-
68
- # For attribute presence differences, show what attributes differ
69
- if dimension == :attribute_presence
70
- attrs1 = extract_attributes(node1)
71
- attrs2 = extract_attributes(node2)
72
- return build_attribute_difference_reason(attrs1, attrs2)
73
- end
74
-
75
- # For attribute value differences, show which attributes changed
76
- if dimension == :attribute_values
77
- return build_attribute_values_reason(node1, node2)
78
- end
79
-
80
- # For text content differences, show the actual text (truncated if needed)
81
- if dimension == :text_content
82
- text1 = extract_text_content(node1)
83
- text2 = extract_text_content(node2)
84
- return build_text_difference_reason(text1, text2)
85
- end
86
-
87
- # For attribute order differences, show the actual attribute names
88
- if dimension == :attribute_order
89
- attrs1 = extract_attributes(node1)&.keys || []
90
- attrs2 = extract_attributes(node2)&.keys || []
91
- return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
92
- end
93
-
94
- # For asymmetric comment nodes (#144), name the side that carries
95
- # the comment and surface the comment text rather than reusing
96
- # the generic "element structure mismatch" wording.
97
- if dimension == :comments
98
- comment_reason = build_comment_difference_reason(node1, node2)
99
- return comment_reason if comment_reason
100
- end
101
-
102
- # Default reason
103
- if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
104
- "element structure mismatch (children differ)"
105
- elsif dimension == :element_structure &&
106
- diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
107
- diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
108
- (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
109
- (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
110
- node1.name && node2.name && node1.name != node2.name
111
- "different element name (<#{node1.name}> vs <#{node2.name}>)"
112
- else
113
- Canon::Comparison.code_pair_label(diff1, diff2)
114
- end
115
- end
116
-
117
- # Enrich DiffNode with canonical path, serialized content, and attributes
118
- # This extracts presentation-ready metadata from nodes for Stage 4 rendering
119
- #
120
- # @param node1 [Object, nil] First node
121
- # @param node2 [Object, nil] Second node
122
- # @return [Hash] Enriched metadata hash
123
- def self.enrich_metadata(node1, node2)
124
- {
125
- path: build_path(node1 || node2),
126
- serialized_before: serialize(node1),
127
- serialized_after: serialize(node2),
128
- attributes_before: extract_attributes(node1),
129
- attributes_after: extract_attributes(node2),
130
- }
131
- end
132
-
133
- # Build canonical path for a node
134
- #
135
- # @param node [Object] Node to build path for
136
- # @return [String, nil] Canonical path with ordinal indices
137
- def self.build_path(node)
138
- return nil if node.nil?
139
-
140
- Canon::Diff::PathBuilder.build(node, format: :document)
141
- end
142
-
143
- # Serialize a node to string for display
144
- #
145
- # @param node [Object, nil] Node to serialize
146
- # @return [String, nil] Serialized content
147
- def self.serialize(node)
148
- return nil if node.nil?
149
-
150
- Canon::Diff::NodeSerializer.serialize(node)
151
- end
152
-
153
- # Extract attributes from a node as a normalized hash
154
- #
155
- # @param node [Object, nil] Node to extract attributes from
156
- # @return [Hash, nil] Normalized attributes hash
157
- def self.extract_attributes(node)
158
- return nil if node.nil?
159
-
160
- Canon::Diff::NodeSerializer.extract_attributes(node)
161
- end
162
-
163
- # Build a clear reason message for attribute presence differences
164
- # Shows which attributes are only in node1, only in node2, or different values
165
- #
166
- # @param attrs1 [Hash, nil] First node's attributes
167
- # @param attrs2 [Hash, nil] Second node's attributes
168
- # @return [String] Clear explanation of the attribute difference
169
- def self.build_attribute_difference_reason(attrs1, attrs2)
170
- return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
171
-
172
- keys1 = attrs1.keys.to_set
173
- keys2 = attrs2.keys.to_set
174
-
175
- only_in_1 = keys1 - keys2
176
- only_in_2 = keys2 - keys1
177
- common = keys1 & keys2
178
-
179
- # Check if values differ for common keys
180
- different_values = common.reject { |k| attrs1[k] == attrs2[k] }
181
-
182
- parts = []
183
- parts << "only in first: #{only_in_1.to_a.sort.join(', ')}" if only_in_1.any?
184
- parts << "only in second: #{only_in_2.to_a.sort.join(', ')}" if only_in_2.any?
185
- parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
186
-
187
- if parts.empty?
188
- "#{keys1.size} vs #{keys2.size} attributes (same names)"
189
- else
190
- parts.join("; ")
191
- end
192
- end
193
-
194
- # Build a reason message for attribute value differences
195
- # Shows each changed attribute with its before/after values
196
- #
197
- # @param node1 [Object, nil] First node
198
- # @param node2 [Object, nil] Second node
199
- # @return [String] Clear explanation of the attribute value differences
200
- def self.build_attribute_values_reason(node1, node2)
201
- attrs1 = extract_attributes(node1) || {}
202
- attrs2 = extract_attributes(node2) || {}
203
-
204
- differing = (attrs1.keys | attrs2.keys).sort.reject do |k|
205
- attrs1[k.to_s] == attrs2[k.to_s]
206
- end
207
-
208
- changed_parts = differing.map do |k|
209
- "Changed: #{k}=\"#{attrs1[k.to_s]}\" → \"#{attrs2[k.to_s]}\""
210
- end
211
-
212
- if changed_parts.empty?
213
- "attributes differ"
214
- else
215
- "Attributes differ (#{changed_parts.join('; ')})"
216
- end
217
- end
218
-
219
- # Extract text content from a node
220
- #
221
- # @param node [Object, nil] Node to extract text from
222
- # @return [String, nil] Text content or nil
223
- def self.extract_text_content(node)
224
- return nil if node.nil?
225
-
226
- case node
227
- when Canon::Xml::Nodes::TextNode
228
- node.value
229
- when Canon::Xml::Node
230
- node.text_content
231
- else
232
- if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
233
- node.content.to_s
234
- elsif Canon::XmlParsing.xml_node?(node)
235
- Canon::XmlParsing.text_content(node)
236
- else
237
- node.to_s
238
- end
239
- end
240
- rescue StandardError
241
- nil
242
- end
243
-
244
- # Build a clear reason message for text content differences
245
- # Shows the actual text content (truncated if too long)
246
- #
247
- # @param text1 [String, nil] First text content
248
- # @param text2 [String, nil] Second text content
249
- # @return [String] Clear explanation of the text difference
250
- def self.build_text_difference_reason(text1, text2)
251
- # Handle nil cases
252
- return "missing vs '#{truncate(text2)}'" if text1.nil? && text2
253
- return "'#{truncate(text1)}' vs missing" if text1 && text2.nil?
254
- return "both missing" if text1.nil? && text2.nil?
255
-
256
- # Both have content - show truncated versions
257
- "'#{truncate(text1)}' vs '#{truncate(text2)}'"
258
- end
259
-
260
- # Build a Reason line for a +:comments+ diff. Returns +nil+ when
261
- # neither side carries a comment (caller falls back to default).
262
- def self.build_comment_difference_reason(node1, node2)
263
- cm1 = node1 && Canon::Comparison::NodeInspector.comment_node?(node1)
264
- cm2 = node2 && Canon::Comparison::NodeInspector.comment_node?(node2)
265
-
266
- return nil unless cm1 || cm2
267
-
268
- if cm1 && !cm2
269
- "Comment present on EXPECTED only: " \
270
- "<!--#{truncate(comment_text(node1))}-->"
271
- elsif cm2 && !cm1
272
- "Comment present on ACTUAL only: " \
273
- "<!--#{truncate(comment_text(node2))}-->"
274
- else
275
- t1 = truncate(comment_text(node1))
276
- t2 = truncate(comment_text(node2))
277
- "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
278
- end
279
- end
280
-
281
- def self.comment_text(node)
282
- Canon::Comparison::NodeInspector.text_content(node).to_s
283
- end
284
-
285
- # Truncate text for display in reason messages
286
- #
287
- # @param text [String] Text to truncate
288
- # @param max_length [Integer] Maximum length
289
- # @return [String] Truncated text
290
- def self.truncate(text, max_length = 40)
291
- return "" if text.nil?
292
-
293
- text = text.to_s
294
- return text if text.length <= max_length
295
-
296
- "#{text[0...max_length]}..."
297
- end
298
- end
299
- end
300
- end