canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,410 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+ require_relative "xml_comparator"
5
+ require_relative "match_options"
6
+ require_relative "comparison_result"
7
+ require_relative "../diff/diff_node"
8
+ require_relative "../diff/diff_classifier"
9
+
10
+ module Canon
11
+ module Comparison
12
+ # HTML comparison class
13
+ # Handles comparison of HTML nodes with various options
14
+ class HtmlComparator
15
+ # Default comparison options for HTML
16
+ DEFAULT_OPTS = {
17
+ # Structural filtering options
18
+ ignore_children: false,
19
+ ignore_text_nodes: false,
20
+ ignore_attr_content: [],
21
+ ignore_attrs: [],
22
+ ignore_attrs_by_name: [],
23
+ ignore_nodes: [],
24
+
25
+ # Output options
26
+ verbose: false,
27
+ diff_children: false,
28
+
29
+ # Match system options
30
+ match_profile: nil,
31
+ match: nil,
32
+ preprocessing: nil,
33
+ global_profile: nil,
34
+ global_options: nil,
35
+
36
+ # Diff display options
37
+ diff: nil,
38
+ }.freeze
39
+
40
+ class << self
41
+ # Compare two HTML nodes for equivalence
42
+ #
43
+ # @param html1 [String, Nokogiri::HTML::Document] First HTML
44
+ # @param html2 [String, Nokogiri::HTML::Document] Second HTML
45
+ # @param opts [Hash] Comparison options
46
+ # @param child_opts [Hash] Options for child comparison
47
+ # @return [Boolean, Array] true if equivalent, or array of diffs if
48
+ # verbose
49
+ def equivalent?(html1, html2, opts = {}, child_opts = {})
50
+ opts = DEFAULT_OPTS.merge(opts)
51
+
52
+ # Resolve match options with format-specific defaults
53
+ match_opts_hash = MatchOptions::Xml.resolve(
54
+ format: :html,
55
+ match_profile: opts[:match_profile],
56
+ match: opts[:match],
57
+ preprocessing: opts[:preprocessing],
58
+ global_profile: opts[:global_profile],
59
+ global_options: opts[:global_options],
60
+ )
61
+
62
+ # Wrap in ResolvedMatchOptions for DiffClassifier
63
+ match_opts = Canon::Comparison::ResolvedMatchOptions.new(
64
+ match_opts_hash,
65
+ format: :html,
66
+ )
67
+
68
+ # Store resolved match options hash for use in comparison logic
69
+ opts[:match_opts] = match_opts_hash
70
+
71
+ # Create child_opts with resolved options
72
+ child_opts = opts.merge(child_opts)
73
+
74
+ # Parse nodes if they are strings, applying preprocessing if needed
75
+ node1 = parse_node(html1, match_opts_hash[:preprocessing],
76
+ match_opts_hash)
77
+ node2 = parse_node(html2, match_opts_hash[:preprocessing],
78
+ match_opts_hash)
79
+
80
+ # Serialize preprocessed nodes for diff display (avoid re-preprocessing)
81
+ preprocessed_str1 = serialize_for_display(node1)
82
+ preprocessed_str2 = serialize_for_display(node2)
83
+
84
+ differences = []
85
+ diff_children = opts[:diff_children] || false
86
+
87
+ # DocumentFragment nodes need special handling - compare their children
88
+ # instead of the fragment nodes themselves
89
+ if node1.is_a?(Nokogiri::HTML4::DocumentFragment) &&
90
+ node2.is_a?(Nokogiri::HTML4::DocumentFragment)
91
+ # Compare children of fragments
92
+ children1 = node1.children.to_a
93
+ children2 = node2.children.to_a
94
+
95
+ if children1.length != children2.length
96
+ result = Comparison::UNEQUAL_ELEMENTS
97
+ elsif children1.empty?
98
+ result = Comparison::EQUIVALENT
99
+ else
100
+ # Compare each pair of children
101
+ result = Comparison::EQUIVALENT
102
+ children1.zip(children2).each do |child1, child2|
103
+ child_result = XmlComparator.send(:compare_nodes, child1, child2,
104
+ opts, child_opts, diff_children,
105
+ differences)
106
+ if child_result != Comparison::EQUIVALENT
107
+ result = child_result
108
+ break
109
+ end
110
+ end
111
+ end
112
+ else
113
+ result = XmlComparator.send(:compare_nodes, node1, node2, opts,
114
+ child_opts, diff_children, differences)
115
+ end
116
+
117
+ # Classify DiffNodes as normative/informative if we have verbose output
118
+ if opts[:verbose] && !differences.empty?
119
+ classifier = Canon::Diff::DiffClassifier.new(match_opts)
120
+ classifier.classify_all(differences.select do |d|
121
+ d.is_a?(Canon::Diff::DiffNode)
122
+ end)
123
+ end
124
+
125
+ if opts[:verbose]
126
+ ComparisonResult.new(
127
+ differences: differences,
128
+ preprocessed_strings: [preprocessed_str1, preprocessed_str2],
129
+ format: :html,
130
+ html_version: detect_html_version_from_node(node1),
131
+ match_options: match_opts_hash,
132
+ )
133
+ else
134
+ result == Comparison::EQUIVALENT
135
+ end
136
+ end
137
+
138
+ private
139
+
140
+ # Parse a node from string or return as-is
141
+ # Applies preprocessing transformation before parsing if specified
142
+ def parse_node(node, preprocessing = :none, match_opts = {})
143
+ # If already a Nokogiri node, check for incompatible XML documents
144
+ # Only raise error for non-string incompatible formats
145
+ unless node.is_a?(String)
146
+ # Detect if this is an XML document (not HTML)
147
+ # Strings are allowed since they can be wrapped/parsed as needed
148
+ if is_xml_document?(node)
149
+ raise Canon::CompareFormatMismatchError.new(:xml, :html)
150
+ end
151
+
152
+ # For :rendered preprocessing, apply normalization even to pre-parsed nodes
153
+ if preprocessing == :rendered
154
+ # If already a DocumentFragment with :rendered, just normalize it
155
+ if node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
156
+ node.is_a?(Nokogiri::HTML5::DocumentFragment) ||
157
+ node.is_a?(Nokogiri::XML::DocumentFragment)
158
+ # Normalize whitespace directly without re-parsing
159
+ normalize_html_style_script_comments(node)
160
+ normalize_rendered_whitespace(node, match_opts)
161
+ return node
162
+ end
163
+
164
+ # Normalize whitespace directly without re-parsing
165
+ normalize_html_style_script_comments(node)
166
+ normalize_rendered_whitespace(node, match_opts)
167
+ return node
168
+ end
169
+
170
+ # For other preprocessing, just return the node (including DocumentFragments)
171
+ return node
172
+ end
173
+
174
+ # Check if string contains XML declaration but is actually HTML
175
+ # Nokogiri::HTML4.to_s adds <?xml...?> but the content is still HTML
176
+ # Check if this is actually HTML content after the declaration
177
+ # Look for <html tag which indicates HTML
178
+ if node.strip.start_with?("<?xml") && !node.match?(/<html[\s>]/i)
179
+ # No <html> tag, this is likely pure XML
180
+ raise Canon::CompareFormatMismatchError.new(:xml, :html)
181
+ end
182
+
183
+ # Has <?xml but also <html> tag, so it's HTML with XML declaration
184
+ # (common output from Nokogiri::HTML4#to_s)
185
+
186
+ # For :rendered preprocessing, handle separately to avoid double-parsing
187
+ if preprocessing == :rendered
188
+ # Check if this is a full HTML document or a fragment
189
+ # Use full document parsing if it has <html> tag
190
+ if node.match?(/<html[\s>]/i)
191
+ doc = Nokogiri::HTML(node, &:noblanks)
192
+ normalize_html_style_script_comments(doc)
193
+ normalize_rendered_whitespace(doc, match_opts)
194
+ remove_whitespace_only_text_nodes(doc)
195
+ return doc
196
+ else
197
+ # Use fragment for partial HTML
198
+ frag = Nokogiri::HTML4.fragment(node)
199
+ normalize_html_style_script_comments(frag)
200
+ normalize_rendered_whitespace(frag, match_opts)
201
+ remove_whitespace_only_text_nodes(frag)
202
+ return frag
203
+ end
204
+ end
205
+
206
+ # Apply preprocessing to HTML string before parsing
207
+ html_string = case preprocessing
208
+ when :normalize
209
+ # Normalize whitespace: collapse runs, trim lines
210
+ node.lines.map(&:strip).reject(&:empty?).join("\n")
211
+ when :c14n
212
+ # Canonicalize the HTML (use XML canonicalization)
213
+ Canon::Xml::C14n.canonicalize(node,
214
+ with_comments: false)
215
+ when :format
216
+ # Pretty format the HTML
217
+ Canon.format(node, :html)
218
+ else
219
+ # :none or unrecognized - use as-is
220
+ node
221
+ end
222
+
223
+ # Use Nokogiri for HTML and normalize style/script comments
224
+ # Use noblanks to prevent Nokogiri from adding structural whitespace
225
+ doc = Nokogiri::HTML(html_string, &:noblanks)
226
+ normalize_html_style_script_comments(doc)
227
+ doc
228
+ end
229
+
230
+ # Detect HTML version from content
231
+ #
232
+ # @param content [String] HTML content
233
+ # @return [Symbol] :html5 or :html4
234
+ def detect_html_version(content)
235
+ # Check for HTML5 doctype (case-insensitive)
236
+ if content.match?(/<!DOCTYPE\s+html>/i)
237
+ :html5
238
+ # Check for HTML4 doctype patterns
239
+ elsif content.match?(/<!DOCTYPE\s+HTML\s+PUBLIC/i)
240
+ :html4
241
+ else
242
+ # Default to HTML5 for modern usage
243
+ :html5
244
+ end
245
+ end
246
+
247
+ # Detect HTML version from Nokogiri node
248
+ #
249
+ # @param node [Nokogiri::XML::Node] Nokogiri HTML node
250
+ # @return [Symbol] :html5 or :html4
251
+ def detect_html_version_from_node(node)
252
+ # Check node type
253
+ if node.is_a?(Nokogiri::HTML5::Document) ||
254
+ node.is_a?(Nokogiri::HTML5::DocumentFragment)
255
+ :html5
256
+ elsif node.is_a?(Nokogiri::HTML4::Document) ||
257
+ node.is_a?(Nokogiri::HTML4::DocumentFragment)
258
+ :html4
259
+ else
260
+ # Default to HTML4 for compatibility
261
+ :html4
262
+ end
263
+ end
264
+
265
+ # Serialize node to string for diff display
266
+ # This ensures the displayed diff matches what was compared
267
+ #
268
+ # @param node [Nokogiri::HTML::Document] Parsed HTML node
269
+ # @return [String] Serialized HTML string
270
+ def serialize_for_display(node)
271
+ # Get string representation with formatting for line-by-line diffs
272
+ # Use to_html which preserves line structure for diff display
273
+ node.to_html
274
+ end
275
+
276
+ # Normalize HTML comments within style and script tags
277
+ # Also removes whitespace-only CDATA children that Nokogiri creates
278
+ def normalize_html_style_script_comments(doc)
279
+ doc.css("style, script").each do |element|
280
+ # Remove HTML comments from style/script content
281
+ # SAFE: This regex operates on already-parsed DOM element content,
282
+ # not on raw user input. The non-greedy .*? correctly matches
283
+ # comment boundaries. Any remaining <!-- would be literal text
284
+ # (not a comment), which is safe in this context.
285
+ # CodeQL false positive: see https://github.com/github/codeql/issues/XXXX
286
+ normalized = element.content.gsub(/<!--.*?-->/m, "").strip
287
+
288
+ if normalized.empty?
289
+ # Remove all children (including whitespace-only CDATA nodes)
290
+ element.children.remove
291
+ else
292
+ element.content = normalized
293
+ end
294
+ end
295
+ end
296
+
297
+ # Normalize whitespace in text nodes according to HTML rendering rules
298
+ # In HTML rendering, sequences of whitespace (spaces, tabs, newlines)
299
+ # collapse to a single space, except in elements where whitespace is
300
+ # significant (pre, code, textarea, script, style)
301
+ #
302
+ # @param doc [Nokogiri::HTML::Document] Document to normalize
303
+ # @param match_opts [Hash] Match options to respect during normalization
304
+ def normalize_rendered_whitespace(doc, match_opts = {})
305
+ # If text_content is :strict, don't normalize ANY text content
306
+ # This allows users to explicitly request strict text matching
307
+ return if match_opts[:text_content] == :strict
308
+
309
+ # Elements where whitespace is significant - don't normalize
310
+ # This is an HTML rendering rule, not a match option
311
+ preserve_whitespace = %w[pre code textarea script style]
312
+
313
+ # Walk all text nodes
314
+ doc.xpath(".//text()").each do |text_node|
315
+ # Skip if this text node is inside a whitespace-preserving element
316
+ # Check all ancestors, not just immediate parent
317
+ # Whitespace preservation happens REGARDLESS of text_content setting
318
+ parent = text_node.parent
319
+ next if ancestor_preserves_whitespace?(parent, preserve_whitespace)
320
+
321
+ # Collapse whitespace sequences (spaces, tabs, newlines) to single
322
+ # space
323
+ normalized = text_node.content.gsub(/\s+/, " ")
324
+
325
+ # Trim leading/trailing whitespace if appropriate
326
+ normalized = normalized.strip if should_trim_text_node?(text_node)
327
+
328
+ text_node.content = normalized
329
+ end
330
+ end
331
+
332
+ # Check if any ancestor of the given node preserves whitespace
333
+ def ancestor_preserves_whitespace?(node, preserve_list)
334
+ current = node
335
+ while current.respond_to?(:name)
336
+ return true if preserve_list.include?(current.name.downcase)
337
+
338
+ # Stop at document root - documents don't have parents
339
+ break if current.is_a?(Nokogiri::XML::Document)
340
+
341
+ current = current.parent
342
+ end
343
+ false
344
+ end
345
+
346
+ # Determine if a text node should have leading/trailing whitespace
347
+ # trimmed Text nodes at the start or end of their parent element should
348
+ # be trimmed
349
+ def should_trim_text_node?(text_node)
350
+ parent = text_node.parent
351
+ siblings = parent.children
352
+
353
+ # Trim if text is the only child
354
+ return true if siblings.length == 1
355
+
356
+ # Trim if text is at the start or end of parent
357
+ text_node == siblings.first || text_node == siblings.last
358
+ end
359
+
360
+ # Remove whitespace-only text nodes from the document
361
+ # These are typically insignificant in HTML rendering (e.g., between
362
+ # block elements)
363
+ def remove_whitespace_only_text_nodes(doc)
364
+ doc.xpath(".//text()").each do |text_node|
365
+ # Remove if the text is only whitespace (after normalization)
366
+ if text_node.content.strip.empty?
367
+ text_node.remove
368
+ end
369
+ end
370
+ end
371
+
372
+ # Check if a node is an XML document (not HTML)
373
+ # XML documents typically have XML processing instructions or are
374
+ # instances of Nokogiri::XML::Document (not HTML variants)
375
+ def is_xml_document?(node)
376
+ # Check if it's a pure XML document (not HTML4/HTML5 which also
377
+ # inherit from XML::Document)
378
+ # Check both Document and DocumentFragment variants
379
+ return false if node.is_a?(Nokogiri::HTML4::Document) ||
380
+ node.is_a?(Nokogiri::HTML5::Document) ||
381
+ node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
382
+ node.is_a?(Nokogiri::HTML5::DocumentFragment)
383
+
384
+ # If it's an XML document, check for XML processing instruction
385
+ if node.is_a?(Nokogiri::XML::Document) && node.children.any? do |child|
386
+ child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
387
+ child.name == "xml"
388
+ end
389
+ # XML documents often start with <?xml ...?> processing instruction
390
+ return true
391
+
392
+ # Note: We don't blindly return true here because HTML documents
393
+ # also inherit from XML::Document. We only return true if there's
394
+ # an XML processing instruction above.
395
+ end
396
+
397
+ # Check if it's a fragment that contains XML processing instructions
398
+ if node.respond_to?(:children) && node.children.any? do |child|
399
+ child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
400
+ child.name == "xml"
401
+ end
402
+ return true
403
+ end
404
+
405
+ false
406
+ end
407
+ end
408
+ end
409
+ end
410
+ end
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "match_options"
5
+ require_relative "comparison_result"
6
+
7
+ module Canon
8
+ module Comparison
9
+ # JSON comparison class
10
+ # Handles comparison of JSON objects with various options
11
+ class JsonComparator
12
+ # Default comparison options for JSON
13
+ DEFAULT_OPTS = {
14
+ # Output options
15
+ verbose: false,
16
+
17
+ # Match system options
18
+ match_profile: nil,
19
+ match: nil,
20
+ preprocessing: nil,
21
+ global_profile: nil,
22
+ global_options: nil,
23
+
24
+ # Diff display options
25
+ diff: nil,
26
+ }.freeze
27
+
28
+ class << self
29
+ # Compare two JSON objects for equivalence
30
+ #
31
+ # @param json1 [String, Hash, Array] First JSON
32
+ # @param json2 [String, Hash, Array] Second JSON
33
+ # @param opts [Hash] Comparison options
34
+ # @return [Boolean, ComparisonResult] true if equivalent, or ComparisonResult if verbose
35
+ def equivalent?(json1, json2, opts = {})
36
+ opts = DEFAULT_OPTS.merge(opts)
37
+
38
+ # Resolve match options with format-specific defaults
39
+ match_opts_hash = MatchOptions::Json.resolve(
40
+ format: :json,
41
+ match_profile: opts[:match_profile],
42
+ match: opts[:match],
43
+ preprocessing: opts[:preprocessing],
44
+ global_profile: opts[:global_profile],
45
+ global_options: opts[:global_options],
46
+ )
47
+
48
+ # Wrap in ResolvedMatchOptions for consistency with XML/HTML
49
+ Canon::Comparison::ResolvedMatchOptions.new(
50
+ match_opts_hash,
51
+ format: :json,
52
+ )
53
+
54
+ # Store resolved match options for use in comparison logic
55
+ opts[:match_opts] = match_opts_hash
56
+
57
+ # Parse JSON if strings
58
+ obj1 = parse_json(json1)
59
+ obj2 = parse_json(json2)
60
+
61
+ differences = []
62
+ result = compare_ruby_objects(obj1, obj2, opts, differences, "")
63
+
64
+ if opts[:verbose]
65
+ # Format JSON for display
66
+ json_str1 = obj1.is_a?(String) ? obj1 : JSON.pretty_generate(obj1)
67
+ json_str2 = obj2.is_a?(String) ? obj2 : JSON.pretty_generate(obj2)
68
+
69
+ ComparisonResult.new(
70
+ differences: differences,
71
+ preprocessed_strings: [json_str1, json_str2],
72
+ format: :json,
73
+ match_options: match_opts_hash,
74
+ )
75
+ else
76
+ result == Comparison::EQUIVALENT
77
+ end
78
+ end
79
+
80
+ private
81
+
82
+ # Parse JSON from string or return as-is
83
+ def parse_json(obj)
84
+ return obj unless obj.is_a?(String)
85
+
86
+ JSON.parse(obj)
87
+ end
88
+
89
+ # Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
90
+ def compare_ruby_objects(obj1, obj2, opts, differences, path)
91
+ # Check for type mismatch
92
+ unless obj1.instance_of?(obj2.class)
93
+ add_ruby_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
94
+ opts, differences)
95
+ return Comparison::UNEQUAL_TYPES
96
+ end
97
+
98
+ case obj1
99
+ when Hash
100
+ compare_hashes(obj1, obj2, opts, differences, path)
101
+ when Array
102
+ compare_arrays(obj1, obj2, opts, differences, path)
103
+ when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
104
+ compare_primitives(obj1, obj2, opts, differences, path)
105
+ else
106
+ # Fallback to equality comparison
107
+ if obj1 == obj2
108
+ Comparison::EQUIVALENT
109
+ else
110
+ add_ruby_difference(path, obj1, obj2,
111
+ Comparison::UNEQUAL_PRIMITIVES, opts,
112
+ differences)
113
+ Comparison::UNEQUAL_PRIMITIVES
114
+ end
115
+ end
116
+ end
117
+
118
+ # Compare two hashes
119
+ def compare_hashes(hash1, hash2, opts, differences, path)
120
+ keys1 = hash1.keys
121
+ keys2 = hash2.keys
122
+
123
+ # Sort keys if order should be ignored (based on match options)
124
+ match_opts = opts[:match_opts]
125
+ if match_opts[:key_order] != :strict
126
+ keys1 = keys1.sort_by(&:to_s)
127
+ keys2 = keys2.sort_by(&:to_s)
128
+ end
129
+
130
+ # Check for missing keys
131
+ missing_in_2 = keys1 - keys2
132
+ missing_in_1 = keys2 - keys1
133
+
134
+ missing_in_2.each do |key|
135
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
136
+ add_ruby_difference(key_path, hash1[key], nil,
137
+ Comparison::MISSING_HASH_KEY, opts, differences)
138
+ end
139
+
140
+ missing_in_1.each do |key|
141
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
142
+ add_ruby_difference(key_path, nil, hash2[key],
143
+ Comparison::MISSING_HASH_KEY, opts, differences)
144
+ end
145
+
146
+ has_missing_keys = !missing_in_1.empty? || !missing_in_2.empty?
147
+
148
+ # Compare common keys
149
+ common_keys = keys1 & keys2
150
+ all_equivalent = true
151
+ common_keys.each do |key|
152
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
153
+ result = compare_ruby_objects(hash1[key], hash2[key], opts,
154
+ differences, key_path)
155
+ all_equivalent = false unless result == Comparison::EQUIVALENT
156
+ end
157
+
158
+ # Return appropriate status
159
+ return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
160
+ return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
161
+
162
+ has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
163
+ end
164
+
165
+ # Compare two arrays
166
+ def compare_arrays(arr1, arr2, opts, differences, path)
167
+ unless arr1.length == arr2.length
168
+ add_ruby_difference(path, arr1, arr2,
169
+ Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
170
+ differences)
171
+ return Comparison::UNEQUAL_ARRAY_LENGTHS
172
+ end
173
+
174
+ all_equivalent = true
175
+ arr1.each_with_index do |elem1, index|
176
+ elem2 = arr2[index]
177
+ elem_path = "#{path}[#{index}]"
178
+ result = compare_ruby_objects(elem1, elem2, opts, differences,
179
+ elem_path)
180
+ all_equivalent = false unless result == Comparison::EQUIVALENT
181
+ end
182
+
183
+ all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
184
+ end
185
+
186
+ # Compare primitive values
187
+ def compare_primitives(val1, val2, opts, differences, path)
188
+ if val1 == val2
189
+ Comparison::EQUIVALENT
190
+ else
191
+ add_ruby_difference(path, val1, val2,
192
+ Comparison::UNEQUAL_PRIMITIVES, opts,
193
+ differences)
194
+ Comparison::UNEQUAL_PRIMITIVES
195
+ end
196
+ end
197
+
198
+ # Add a Ruby object difference
199
+ def add_ruby_difference(path, obj1, obj2, diff_code, opts, differences)
200
+ return unless opts[:verbose]
201
+
202
+ differences << {
203
+ path: path,
204
+ value1: obj1,
205
+ value2: obj2,
206
+ diff_code: diff_code,
207
+ }
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end