canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -1,227 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Script to systematically investigate false positives
5
- # where semantic algorithm fails but DOM algorithm passes
6
-
7
- require "fileutils"
8
- require "json"
9
-
10
- # False positives to investigate
11
- FALSE_POSITIVES = [
12
- { file: "blocks_spec.rb", line: 352 },
13
- { file: "footnotes_spec.rb", line: 740 },
14
- { file: "inline_spec.rb", line: 1012 },
15
- { file: "inline_spec.rb", line: 1251 },
16
- { file: "postproc_spec.rb", line: 948 },
17
- { file: "postproc_word_spec.rb", line: 372 },
18
- { file: "postproc_word_spec.rb", line: 576 },
19
- { file: "presentation_xml_numbers_override_spec.rb", line: 2095 },
20
- { file: "presentation_xml_spec.rb", line: 1288 },
21
- { file: "presentation_xml_spec.rb", line: 1500 },
22
- { file: "ref_spec.rb", line: 906 },
23
- { file: "sourcecode_spec.rb", line: 124 },
24
- { file: "sourcecode_spec.rb", line: 610 },
25
- { file: "terms_spec.rb", line: 1445 },
26
- { file: "xref_format_spec.rb", line: 628 },
27
- { file: "xref_spec.rb", line: 315 },
28
- ].freeze
29
-
30
- ISODOC_SPEC_DIR = File.expand_path("../../../mn/isodoc/spec/isodoc", __dir__)
31
-
32
- class FalsePositiveInvestigator
33
- attr_reader :results
34
-
35
- def initialize
36
- @results = []
37
- end
38
-
39
- def investigate_all
40
- puts "=" * 80
41
- puts "INVESTIGATING 16 FALSE POSITIVES"
42
- puts "=" * 80
43
- puts
44
-
45
- FALSE_POSITIVES.each_with_index do |test, idx|
46
- puts "\n#{idx + 1}/#{FALSE_POSITIVES.size}: #{test[:file]}:#{test[:line]}"
47
- puts "-" * 80
48
-
49
- result = investigate_test(test)
50
- @results << result
51
-
52
- display_result(result)
53
- end
54
-
55
- summarize_results
56
- end
57
-
58
- def investigate_test(test)
59
- file_path = File.join(ISODOC_SPEC_DIR, test[:file])
60
-
61
- unless File.exist?(file_path)
62
- return {
63
- test: test,
64
- error: "File not found: #{file_path}",
65
- dom_passes: nil,
66
- semantic_passes: nil,
67
- }
68
- end
69
-
70
- result = {
71
- test: test,
72
- file_path: file_path,
73
- dom_passes: nil,
74
- semantic_passes: nil,
75
- semantic_output: nil,
76
- error: nil,
77
- }
78
-
79
- # Test with DOM algorithm
80
- puts " Testing with DOM algorithm..."
81
- dom_output = run_test(file_path, test[:line], "dom")
82
- result[:dom_passes] = dom_output[:success]
83
- result[:dom_output] = dom_output[:output]
84
-
85
- # Test with semantic algorithm
86
- puts " Testing with semantic algorithm..."
87
- semantic_output = run_test(file_path, test[:line], "semantic")
88
- result[:semantic_passes] = semantic_output[:success]
89
- result[:semantic_output] = semantic_output[:output]
90
-
91
- result
92
- rescue StandardError => e
93
- {
94
- test: test,
95
- error: "Exception: #{e.message}",
96
- dom_passes: nil,
97
- semantic_passes: nil,
98
- }
99
- end
100
-
101
- def run_test(file_path, line, algorithm)
102
- cmd = "cd #{ISODOC_SPEC_DIR}/.. && CANON_ALGORITHM=#{algorithm} bundle exec rspec #{file_path}:#{line} 2>&1"
103
- output = `#{cmd}`
104
- success = $?.success?
105
-
106
- {
107
- success: success,
108
- output: output,
109
- exit_code: $?.exitstatus,
110
- }
111
- end
112
-
113
- def display_result(result)
114
- if result[:error]
115
- puts " āŒ ERROR: #{result[:error]}"
116
- return
117
- end
118
-
119
- dom_status = result[:dom_passes] ? "āœ… PASS" : "āŒ FAIL"
120
- sem_status = result[:semantic_passes] ? "āœ… PASS" : "āŒ FAIL"
121
-
122
- puts " DOM: #{dom_status}"
123
- puts " Semantic: #{sem_status}"
124
-
125
- if result[:dom_passes] && !result[:semantic_passes]
126
- puts " āš ļø CONFIRMED FALSE POSITIVE"
127
- analyze_failure(result)
128
- elsif !result[:dom_passes] && result[:semantic_passes]
129
- puts " āš ļø UNEXPECTED: DOM fails but semantic passes!"
130
- elsif !result[:dom_passes] && !result[:semantic_passes]
131
- puts " ā„¹ļø Both algorithms fail (not a false positive)"
132
- else
133
- puts " āœ… Both algorithms pass (false positive may be fixed)"
134
- end
135
- end
136
-
137
- def analyze_failure(result)
138
- output = result[:semantic_output]
139
-
140
- # Look for diff patterns
141
- if output.include?("Expected XML to be equivalent")
142
- puts " šŸ“‹ Failure type: XML equivalence check"
143
- elsif output.include?("Expected HTML to be equivalent")
144
- puts " šŸ“‹ Failure type: HTML equivalence check"
145
- end
146
-
147
- # Extract key diff lines
148
- diff_lines = output.lines.select { |l| l.match?(/^\s*[+-]/) }.take(10)
149
- if diff_lines.any?
150
- puts " šŸ“Š Sample diff:"
151
- diff_lines.each { |l| puts " #{l.strip}" }
152
- end
153
-
154
- # Look for specific patterns
155
- if output.include?("whitespace")
156
- puts " šŸ” Involves: whitespace differences"
157
- end
158
- if output.include?("attribute")
159
- puts " šŸ” Involves: attribute differences"
160
- end
161
- if output.include?("text content")
162
- puts " šŸ” Involves: text content differences"
163
- end
164
- end
165
-
166
- def summarize_results
167
- puts "\n#{'=' * 80}"
168
- puts "SUMMARY"
169
- puts "=" * 80
170
-
171
- confirmed_fps = @results.count do |r|
172
- r[:dom_passes] && !r[:semantic_passes]
173
- end
174
- fixed = @results.count { |r| r[:dom_passes] && r[:semantic_passes] }
175
- errors = @results.count { |r| r[:error] }
176
- both_fail = @results.count { |r| !r[:dom_passes] && !r[:semantic_passes] }
177
-
178
- puts "Confirmed false positives: #{confirmed_fps}/16"
179
- puts "Already fixed: #{fixed}/16"
180
- puts "Both fail (not FP): #{both_fail}/16"
181
- puts "Errors: #{errors}/16"
182
- puts
183
-
184
- if confirmed_fps.positive?
185
- puts "FALSE POSITIVES TO FIX:"
186
- @results.each do |r|
187
- next unless r[:dom_passes] && !r[:semantic_passes]
188
-
189
- puts " - #{r[:test][:file]}:#{r[:test][:line]}"
190
- end
191
- end
192
-
193
- puts "\n#{'=' * 80}"
194
- end
195
-
196
- def save_detailed_output(output_dir = "tmp/false_positive_investigation")
197
- FileUtils.mkdir_p(output_dir)
198
-
199
- @results.each_with_index do |result, idx|
200
- next if result[:error]
201
-
202
- test = result[:test]
203
- filename = "#{idx + 1}_#{test[:file].gsub('.rb', '')}_#{test[:line]}.txt"
204
- filepath = File.join(output_dir, filename)
205
-
206
- File.write(filepath, <<~OUTPUT)
207
- Test: #{test[:file]}:#{test[:line]}
208
- DOM passes: #{result[:dom_passes]}
209
- Semantic passes: #{result[:semantic_passes]}
210
-
211
- ========================================
212
- SEMANTIC OUTPUT:
213
- ========================================
214
- #{result[:semantic_output]}
215
- OUTPUT
216
- end
217
-
218
- puts "\nDetailed output saved to: #{output_dir}/"
219
- end
220
- end
221
-
222
- # Run investigation
223
- investigator = FalsePositiveInvestigator.new
224
- investigator.investigate_all
225
- investigator.save_detailed_output
226
-
227
- puts "\nInvestigation complete!"
@@ -1,163 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Systematically investigate false positive failures
5
- # Usage: ruby scripts/investigate_false_positives_batch.rb <spec_file:line> [<spec_file:line> ...]
6
-
7
- require "bundler/setup"
8
- require "fileutils"
9
-
10
- # False positive test cases from XMLNS_FIX_VALIDATION.md
11
- FALSE_POSITIVES = [
12
- "blocks_spec.rb:352",
13
- "footnotes_spec.rb:740",
14
- "inline_spec.rb:1012",
15
- "inline_spec.rb:1251",
16
- "postproc_spec.rb:948",
17
- "postproc_word_spec.rb:372",
18
- "postproc_word_spec.rb:576",
19
- "presentation_xml_numbers_override_spec.rb:2095",
20
- "presentation_xml_spec.rb:1288",
21
- "presentation_xml_spec.rb:1500",
22
- "ref_spec.rb:906",
23
- "sourcecode_spec.rb:124",
24
- "sourcecode_spec.rb:610",
25
- "terms_spec.rb:1445",
26
- "xref_format_spec.rb:628",
27
- "xref_spec.rb:315",
28
- ].freeze
29
-
30
- def run_test(spec_file, line, algorithm)
31
- spec_path = File.expand_path("../../../mn/isodoc/spec/isodoc/#{spec_file}",
32
- __dir__)
33
-
34
- unless File.exist?(spec_path)
35
- puts " āŒ File not found: #{spec_path}"
36
- return nil
37
- end
38
-
39
- # Run with specific algorithm
40
- { "CANON_ALGORITHM" => algorithm }
41
- cmd = "cd #{File.dirname(spec_path)} && bundle exec rspec #{spec_path}:#{line} 2>&1"
42
-
43
- output = `#{cmd}`
44
- success = $?.success?
45
-
46
- { success: success, output: output }
47
- end
48
-
49
- def analyze_test(test_case)
50
- spec_file, line = test_case.split(":")
51
-
52
- puts "\n#{'=' * 80}"
53
- puts "ANALYZING: #{test_case}"
54
- puts "=" * 80
55
-
56
- # Run with DOM algorithm
57
- puts "\n1. Testing with DOM algorithm..."
58
- dom_result = run_test(spec_file, line, "dom")
59
- return unless dom_result
60
-
61
- dom_pass = dom_result[:success]
62
- puts " Result: #{dom_pass ? 'āœ… PASS' : 'āŒ FAIL'}"
63
-
64
- # Run with Semantic algorithm
65
- puts "\n2. Testing with Semantic algorithm..."
66
- semantic_result = run_test(spec_file, line, "semantic")
67
- return unless semantic_result
68
-
69
- semantic_pass = semantic_result[:success]
70
- puts " Result: #{semantic_pass ? 'āœ… PASS' : 'āŒ FAIL'}"
71
-
72
- # Analysis
73
- puts "\n#{'-' * 80}"
74
- puts "ANALYSIS:"
75
- puts "-" * 80
76
-
77
- if dom_pass && !semantic_pass
78
- puts "āœ… CONFIRMED FALSE POSITIVE: DOM passes, Semantic fails"
79
- puts "\nThis test should be investigated to understand why semantic is too strict."
80
-
81
- # Extract failure details from semantic output
82
- if semantic_result[:output] =~ /Failure\/Error:(.+?)(?=\n\n|\z)/m
83
- failure_section = $1
84
- puts "\nFailure details:"
85
- puts failure_section.lines.take(20).join
86
- end
87
-
88
- :false_positive
89
- elsif !dom_pass && semantic_pass
90
- puts "āš ļø UNEXPECTED: This was listed as false positive but DOM fails, Semantic passes"
91
- puts "This is actually a false NEGATIVE, not a false positive!"
92
- :false_negative
93
- elsif dom_pass && semantic_pass
94
- puts "āœ… BOTH PASS: This is no longer a false positive!"
95
- :fixed
96
- else
97
- puts "āŒ BOTH FAIL: This is a common failure, not a false positive"
98
- :common_failure
99
- end
100
- end
101
-
102
- def main
103
- # Get test cases from arguments or use all false positives
104
- test_cases = if ARGV.empty?
105
- FALSE_POSITIVES
106
- else
107
- ARGV
108
- end
109
-
110
- puts "Investigating #{test_cases.size} false positive test cases..."
111
-
112
- results = {
113
- false_positive: [],
114
- false_negative: [],
115
- fixed: [],
116
- common_failure: [],
117
- error: [],
118
- }
119
-
120
- test_cases.each do |test_case|
121
- result = analyze_test(test_case)
122
- results[result || :error] << test_case
123
- end
124
-
125
- # Summary
126
- puts "\n#{'=' * 80}"
127
- puts "SUMMARY"
128
- puts "=" * 80
129
-
130
- puts "\nāœ… Confirmed False Positives (need fixing): #{results[:false_positive].size}"
131
- results[:false_positive].each { |tc| puts " - #{tc}" }
132
-
133
- puts "\nšŸŽ‰ Already Fixed: #{results[:fixed].size}"
134
- results[:fixed].each { |tc| puts " - #{tc}" }
135
-
136
- puts "\nāš ļø Misclassified (actually false negatives): #{results[:false_negative].size}"
137
- results[:false_negative].each { |tc| puts " - #{tc}" }
138
-
139
- puts "\nāŒ Common Failures: #{results[:common_failure].size}"
140
- results[:common_failure].each { |tc| puts " - #{tc}" }
141
-
142
- puts "\nšŸ’„ Errors: #{results[:error].size}"
143
- results[:error].each { |tc| puts " - #{tc}" }
144
-
145
- # Save detailed results
146
- output_file = "/tmp/false_positive_investigation.txt"
147
- File.open(output_file, "w") do |f|
148
- f.puts "FALSE POSITIVE INVESTIGATION RESULTS"
149
- f.puts "=" * 80
150
- f.puts "\nConfirmed False Positives (#{results[:false_positive].size}):"
151
- results[:false_positive].each { |tc| f.puts tc }
152
- f.puts "\nAlready Fixed (#{results[:fixed].size}):"
153
- results[:fixed].each { |tc| f.puts tc }
154
- f.puts "\nMisclassified (#{results[:false_negative].size}):"
155
- results[:false_negative].each { |tc| f.puts tc }
156
- f.puts "\nCommon Failures (#{results[:common_failure].size}):"
157
- results[:common_failure].each { |tc| f.puts tc }
158
- end
159
-
160
- puts "\nDetailed results saved to: #{output_file}"
161
- end
162
-
163
- main if __FILE__ == $PROGRAM_NAME
@@ -1,125 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "bundler/setup"
5
- require "canon"
6
- require "nokogiri"
7
-
8
- # Test mixed content element extraction
9
- def test_mixed_content_extraction
10
- puts "=" * 80
11
- puts "Testing Mixed Content Text Extraction"
12
- puts "=" * 80
13
-
14
- # Create test XML with mixed content
15
- xml_str = <<~XML
16
- <root>
17
- <formattedAddress>123 Main St<br/>Springfield, IL<br/>62701</formattedAddress>
18
- <normalText>Just plain text</normalText>
19
- <withSpaces> Text with spaces </withSpaces>
20
- <withNewlines>Text
21
- with
22
- newlines</withNewlines>
23
- </root>
24
- XML
25
-
26
- doc = Nokogiri::XML(xml_str)
27
-
28
- # Test each element
29
- doc.root.element_children.each do |elem|
30
- puts "\nElement: <#{elem.name}>"
31
- puts " Content: #{elem.content.inspect}"
32
-
33
- # Extract text nodes
34
- text_nodes = elem.children.select(&:text?)
35
- puts " Text nodes count: #{text_nodes.size}"
36
- text_nodes.each_with_index do |node, i|
37
- puts " [#{i}]: #{node.text.inspect}"
38
- end
39
-
40
- # Join text
41
- joined = text_nodes.map(&:text).join
42
- puts " Joined text: #{joined.inspect}"
43
-
44
- # Show normalization
45
- normalized = joined.gsub(/\s+/, " ").strip
46
- puts " Normalized: #{normalized.inspect}"
47
-
48
- # Element children
49
- elem_children = elem.element_children
50
- puts " Element children: #{elem_children.map(&:name).inspect}"
51
- end
52
- end
53
-
54
- # Test with Canon adapter
55
- def test_with_adapter
56
- puts "\n#{'=' * 80}"
57
- puts "Testing with Canon XML Adapter"
58
- puts "=" * 80
59
-
60
- xml_str = <<~XML
61
- <root>
62
- <formattedAddress>123 Main St<br/>Springfield, IL<br/>62701</formattedAddress>
63
- </root>
64
- XML
65
-
66
- doc = Nokogiri::XML(xml_str)
67
-
68
- adapter = Canon::TreeDiff::Adapters::XMLAdapter.new
69
- tree = adapter.to_tree(doc)
70
-
71
- # Find the formattedAddress node
72
- address_node = tree.children.first
73
-
74
- puts "\nTreeNode for formattedAddress:"
75
- puts " Label: #{address_node.label}"
76
- puts " Value: #{address_node.value.inspect}"
77
- puts " Children count: #{address_node.children.size}"
78
- address_node.children.each do |child|
79
- puts " Child: #{child.label} = #{child.value.inspect}"
80
- end
81
- end
82
-
83
- # Test normalization in operation detector
84
- def test_normalization_comparison
85
- puts "\n#{'=' * 80}"
86
- puts "Testing Normalization in Comparison"
87
- puts "=" * 80
88
-
89
- # Two versions with different whitespace in mixed content
90
- xml1 = <<~XML
91
- <root>
92
- <address>123 Main St<br/>Springfield, IL<br/>62701</address>
93
- </root>
94
- XML
95
-
96
- xml2 = <<~XML
97
- <root>
98
- <address>123 Main St<br/>Springfield, IL<br/>62701</address>
99
- </root>
100
- XML
101
-
102
- # Compare with whitespace_sensitive: false
103
- result = Canon.semantic_tree_diff(xml1, xml2,
104
- whitespace_sensitive: false,
105
- verbose: true)
106
-
107
- puts "\nComparison result:"
108
- puts " Identical: #{result.identical?}"
109
- puts " Normative differences: #{result.normative_differences?}"
110
- puts " Informative differences: #{result.informative_differences?}"
111
-
112
- if result.operations.any?
113
- puts "\nOperations:"
114
- result.operations.each do |op|
115
- puts " #{op.type}: #{op.path} - #{op.classification}"
116
- puts " Old: #{op.old_value.inspect}" if op.old_value
117
- puts " New: #{op.new_value.inspect}" if op.new_value
118
- end
119
- end
120
- end
121
-
122
- # Run all tests
123
- test_mixed_content_extraction
124
- test_with_adapter
125
- test_normalization_comparison