canon 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +21 -22
  3. data/Rakefile +25 -2
  4. data/lib/canon/cache.rb +18 -27
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +20 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/node_inspector.rb +13 -48
  28. data/lib/canon/comparison/pipeline.rb +269 -0
  29. data/lib/canon/comparison/profile_definition.rb +0 -2
  30. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  31. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  32. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  33. data/lib/canon/comparison/strategies.rb +16 -0
  34. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
  35. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  36. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  37. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  38. data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
  39. data/lib/canon/comparison/xml_comparator.rb +4 -492
  40. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  41. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  42. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  43. data/lib/canon/comparison.rb +144 -267
  44. data/lib/canon/config/config_dsl.rb +159 -0
  45. data/lib/canon/config/env_provider.rb +0 -3
  46. data/lib/canon/config/env_schema.rb +48 -58
  47. data/lib/canon/config/profile_loader.rb +0 -1
  48. data/lib/canon/config.rb +116 -468
  49. data/lib/canon/diff/diff_block_builder.rb +0 -2
  50. data/lib/canon/diff/diff_classifier.rb +0 -5
  51. data/lib/canon/diff/diff_context.rb +0 -2
  52. data/lib/canon/diff/diff_context_builder.rb +0 -2
  53. data/lib/canon/diff/diff_line_builder.rb +2 -3
  54. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  55. data/lib/canon/diff/diff_node_mapper.rb +10 -12
  56. data/lib/canon/diff/diff_report_builder.rb +0 -4
  57. data/lib/canon/diff/formatting_detector.rb +3 -3
  58. data/lib/canon/diff/node_serializer.rb +0 -7
  59. data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
  60. data/lib/canon/diff.rb +39 -0
  61. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  62. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  63. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  64. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  66. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  67. data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
  68. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
  70. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  71. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
  72. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
  74. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
  75. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  76. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  77. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  78. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  79. data/lib/canon/diff_formatter.rb +26 -20
  80. data/lib/canon/formatters/html4_formatter.rb +0 -2
  81. data/lib/canon/formatters/html5_formatter.rb +0 -2
  82. data/lib/canon/formatters/html_formatter.rb +0 -3
  83. data/lib/canon/formatters/json_formatter.rb +0 -1
  84. data/lib/canon/formatters/xml_formatter.rb +0 -4
  85. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  86. data/lib/canon/formatters.rb +16 -0
  87. data/lib/canon/html/data_model.rb +1 -11
  88. data/lib/canon/html.rb +4 -3
  89. data/lib/canon/options/cli_generator.rb +0 -2
  90. data/lib/canon/options/registry.rb +0 -2
  91. data/lib/canon/options.rb +9 -0
  92. data/lib/canon/pretty_printer/html.rb +0 -1
  93. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  94. data/lib/canon/pretty_printer.rb +12 -0
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  96. data/lib/canon/tree_diff/adapters.rb +14 -0
  97. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  98. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  99. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  100. data/lib/canon/tree_diff/core.rb +17 -0
  101. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  102. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  103. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  104. data/lib/canon/tree_diff/matchers.rb +15 -0
  105. data/lib/canon/tree_diff/operation_converter.rb +7 -15
  106. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  107. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  108. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  109. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  110. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  111. data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
  112. data/lib/canon/tree_diff/operations.rb +13 -0
  113. data/lib/canon/tree_diff.rb +26 -27
  114. data/lib/canon/validators/base_validator.rb +5 -10
  115. data/lib/canon/validators/html_validator.rb +2 -8
  116. data/lib/canon/validators/json_validator.rb +0 -1
  117. data/lib/canon/validators/xml_validator.rb +2 -8
  118. data/lib/canon/validators/yaml_validator.rb +0 -1
  119. data/lib/canon/validators.rb +12 -0
  120. data/lib/canon/version.rb +1 -1
  121. data/lib/canon/xml/c14n.rb +0 -4
  122. data/lib/canon/xml/data_model.rb +5 -15
  123. data/lib/canon/xml/line_range_mapper.rb +0 -2
  124. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  125. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  126. data/lib/canon/xml/nodes/element_node.rb +0 -2
  127. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  128. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  129. data/lib/canon/xml/nodes/root_node.rb +0 -2
  130. data/lib/canon/xml/nodes/text_node.rb +0 -2
  131. data/lib/canon/xml/nodes.rb +19 -0
  132. data/lib/canon/xml/processor.rb +0 -5
  133. data/lib/canon/xml/sax_builder.rb +1 -8
  134. data/lib/canon/xml/whitespace_normalizer.rb +2 -2
  135. data/lib/canon/xml.rb +33 -0
  136. data/lib/canon/xml_backend.rb +50 -14
  137. data/lib/canon/xml_parsing.rb +32 -18
  138. data/lib/canon.rb +25 -15
  139. data/lib/tasks/performance.rake +0 -58
  140. data/lib/tasks/performance_comparator.rb +132 -65
  141. data/lib/tasks/performance_helpers.rb +4 -249
  142. data/lib/tasks/performance_report.rb +309 -0
  143. metadata +28 -15
  144. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  145. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  146. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  147. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  148. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  149. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  150. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  151. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  152. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
@@ -1,270 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "set"
4
- require_relative "../../diff/diff_node"
5
- require_relative "../../diff/path_builder"
6
- require_relative "../../diff/node_serializer"
7
- require_relative "../node_inspector"
8
-
9
- module Canon
10
- module Comparison
11
- # Builder for creating enriched DiffNode objects
12
- # Handles path building, serialization, and attribute extraction
13
- class DiffNodeBuilder
14
- # Build an enriched DiffNode
15
- #
16
- # @param node1 [Object, nil] First node
17
- # @param node2 [Object, nil] Second node
18
- # @param diff1 [String] Difference type for node1
19
- # @param diff2 [String] Difference type for node2
20
- # @param dimension [Symbol] The match dimension causing this difference
21
- # @return [DiffNode, nil] Enriched DiffNode or nil if dimension is nil
22
- def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
23
- # Validate dimension is required
24
- if dimension.nil?
25
- raise ArgumentError,
26
- "dimension required for DiffNode"
27
- end
28
-
29
- # Build informative reason message
30
- reason = build_reason(node1, node2, diff1, diff2, dimension)
31
-
32
- # Enrich with path, serialized content, and attributes for Stage 4 rendering
33
- metadata = enrich_metadata(node1, node2)
34
-
35
- Canon::Diff::DiffNode.new(
36
- node1: node1,
37
- node2: node2,
38
- dimension: dimension,
39
- reason: reason,
40
- **metadata,
41
- )
42
- end
43
-
44
- # Build a human-readable reason for a difference
45
- #
46
- # @param node1 [Object] First node
47
- # @param node2 [Object] Second node
48
- # @param diff1 [String] Difference type for node1
49
- # @param diff2 [String] Difference type for node2
50
- # @param dimension [Symbol] The dimension of the difference
51
- # @return [String] Human-readable reason
52
- def self.build_reason(node1, node2, diff1, diff2, dimension)
53
- # For deleted/inserted nodes, include namespace information if available
54
- if dimension == :text_content && (node1.nil? || node2.nil?)
55
- node = node1 || node2
56
- if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
57
- ns = node.namespace_uri
58
- ns_info = if ns.nil? || ns.empty?
59
- ""
60
- else
61
- " (namespace: #{ns})"
62
- end
63
- label = Canon::Comparison.code_pair_label(diff1, diff2)
64
- return "element '#{node.name}'#{ns_info}: #{label}"
65
- end
66
- end
67
-
68
- # For attribute presence differences, show what attributes differ
69
- if dimension == :attribute_presence
70
- attrs1 = extract_attributes(node1)
71
- attrs2 = extract_attributes(node2)
72
- return build_attribute_difference_reason(attrs1, attrs2)
73
- end
74
-
75
- # For text content differences, show the actual text (truncated if needed)
76
- if dimension == :text_content
77
- text1 = extract_text_content(node1)
78
- text2 = extract_text_content(node2)
79
- return build_text_difference_reason(text1, text2)
80
- end
81
-
82
- # For attribute order differences, show the actual attribute names
83
- if dimension == :attribute_order
84
- attrs1 = extract_attributes(node1)&.keys || []
85
- attrs2 = extract_attributes(node2)&.keys || []
86
- return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
87
- end
88
-
89
- # For asymmetric comment nodes (#144), name the side that carries
90
- # the comment and surface the comment text rather than reusing
91
- # the generic "element structure mismatch" wording.
92
- if dimension == :comments
93
- comment_reason = build_comment_difference_reason(node1, node2)
94
- return comment_reason if comment_reason
95
- end
96
-
97
- # Default reason
98
- if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
99
- "element structure mismatch (children differ)"
100
- elsif dimension == :element_structure &&
101
- diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
102
- diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
103
- (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
104
- (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
105
- node1.name && node2.name && node1.name != node2.name
106
- "different element name (<#{node1.name}> vs <#{node2.name}>)"
107
- else
108
- Canon::Comparison.code_pair_label(diff1, diff2)
109
- end
110
- end
111
-
112
- # Enrich DiffNode with canonical path, serialized content, and attributes
113
- # This extracts presentation-ready metadata from nodes for Stage 4 rendering
114
- #
115
- # @param node1 [Object, nil] First node
116
- # @param node2 [Object, nil] Second node
117
- # @return [Hash] Enriched metadata hash
118
- def self.enrich_metadata(node1, node2)
119
- {
120
- path: build_path(node1 || node2),
121
- serialized_before: serialize(node1),
122
- serialized_after: serialize(node2),
123
- attributes_before: extract_attributes(node1),
124
- attributes_after: extract_attributes(node2),
125
- }
126
- end
127
-
128
- # Build canonical path for a node
129
- #
130
- # @param node [Object] Node to build path for
131
- # @return [String, nil] Canonical path with ordinal indices
132
- def self.build_path(node)
133
- return nil if node.nil?
134
-
135
- Canon::Diff::PathBuilder.build(node, format: :document)
136
- end
137
-
138
- # Serialize a node to string for display
139
- #
140
- # @param node [Object, nil] Node to serialize
141
- # @return [String, nil] Serialized content
142
- def self.serialize(node)
143
- return nil if node.nil?
144
-
145
- Canon::Diff::NodeSerializer.serialize(node)
146
- end
147
-
148
- # Extract attributes from a node as a normalized hash
149
- #
150
- # @param node [Object, nil] Node to extract attributes from
151
- # @return [Hash, nil] Normalized attributes hash
152
- def self.extract_attributes(node)
153
- return nil if node.nil?
154
-
155
- Canon::Diff::NodeSerializer.extract_attributes(node)
156
- end
157
-
158
- # Build a clear reason message for attribute presence differences
159
- # Shows which attributes are only in node1, only in node2, or different values
160
- #
161
- # @param attrs1 [Hash, nil] First node's attributes
162
- # @param attrs2 [Hash, nil] Second node's attributes
163
- # @return [String] Clear explanation of the attribute difference
164
- def self.build_attribute_difference_reason(attrs1, attrs2)
165
- return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
166
-
167
- keys1 = attrs1.keys.to_set
168
- keys2 = attrs2.keys.to_set
169
-
170
- only_in_1 = keys1 - keys2
171
- only_in_2 = keys2 - keys1
172
- common = keys1 & keys2
173
-
174
- # Check if values differ for common keys
175
- different_values = common.reject { |k| attrs1[k] == attrs2[k] }
176
-
177
- parts = []
178
- parts << "only in first: #{only_in_1.to_a.sort.join(', ')}" if only_in_1.any?
179
- parts << "only in second: #{only_in_2.to_a.sort.join(', ')}" if only_in_2.any?
180
- parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
181
-
182
- if parts.empty?
183
- "#{keys1.size} vs #{keys2.size} attributes (same names)"
184
- else
185
- parts.join("; ")
186
- end
187
- end
188
-
189
- # Extract text content from a node
190
- #
191
- # @param node [Object, nil] Node to extract text from
192
- # @return [String, nil] Text content or nil
193
- def self.extract_text_content(node)
194
- return nil if node.nil?
195
-
196
- case node
197
- when Canon::Xml::Nodes::TextNode
198
- node.value
199
- when Canon::Xml::Node
200
- node.text_content
201
- else
202
- if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
203
- node.content.to_s
204
- elsif Canon::XmlParsing.xml_node?(node)
205
- Canon::XmlParsing.text_content(node)
206
- else
207
- node.to_s
208
- end
209
- end
210
- rescue StandardError
211
- nil
212
- end
213
-
214
- # Build a clear reason message for text content differences
215
- # Shows the actual text content (truncated if too long)
216
- #
217
- # @param text1 [String, nil] First text content
218
- # @param text2 [String, nil] Second text content
219
- # @return [String] Clear explanation of the text difference
220
- def self.build_text_difference_reason(text1, text2)
221
- # Handle nil cases
222
- return "missing vs '#{truncate(text2)}'" if text1.nil? && text2
223
- return "'#{truncate(text1)}' vs missing" if text1 && text2.nil?
224
- return "both missing" if text1.nil? && text2.nil?
225
-
226
- # Both have content - show truncated versions
227
- "'#{truncate(text1)}' vs '#{truncate(text2)}'"
228
- end
229
-
230
- # Build a Reason line for a +:comments+ diff. Returns +nil+ when
231
- # neither side carries a comment (caller falls back to default).
232
- def self.build_comment_difference_reason(node1, node2)
233
- cm1 = node1 && Canon::Comparison::NodeInspector.comment_node?(node1)
234
- cm2 = node2 && Canon::Comparison::NodeInspector.comment_node?(node2)
235
-
236
- return nil unless cm1 || cm2
237
-
238
- if cm1 && !cm2
239
- "Comment present on EXPECTED only: " \
240
- "<!--#{truncate(comment_text(node1))}-->"
241
- elsif cm2 && !cm1
242
- "Comment present on ACTUAL only: " \
243
- "<!--#{truncate(comment_text(node2))}-->"
244
- else
245
- t1 = truncate(comment_text(node1))
246
- t2 = truncate(comment_text(node2))
247
- "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
248
- end
249
- end
250
-
251
- def self.comment_text(node)
252
- Canon::Comparison::NodeInspector.text_content(node).to_s
253
- end
254
-
255
- # Truncate text for display in reason messages
256
- #
257
- # @param text [String] Text to truncate
258
- # @param max_length [Integer] Maximum length
259
- # @return [String] Truncated text
260
- def self.truncate(text, max_length = 40)
261
- return "" if text.nil?
262
-
263
- text = text.to_s
264
- return text if text.length <= max_length
265
-
266
- "#{text[0...max_length]}..."
267
- end
268
- end
269
- end
270
- end