canon 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +112 -25
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +82 -2
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  11. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  12. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  13. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  14. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  15. data/lib/canon/comparison/xml_comparator.rb +48 -23
  16. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  17. data/lib/canon/diff/diff_classifier.rb +101 -2
  18. data/lib/canon/diff/formatting_detector.rb +1 -1
  19. data/lib/canon/rspec_matchers.rb +37 -8
  20. data/lib/canon/version.rb +1 -1
  21. data/lib/canon/xml/data_model.rb +24 -13
  22. metadata +3 -78
  23. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  24. data/false_positive_analysis.txt +0 -0
  25. data/file1.html +0 -1
  26. data/file2.html +0 -1
  27. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  28. data/old-docs/BASIC_USAGE.adoc +0 -16
  29. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  30. data/old-docs/CLI.adoc +0 -497
  31. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  32. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  33. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  34. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  35. data/old-docs/DOM_DIFF.adoc +0 -1017
  36. data/old-docs/ENV_CONFIG.adoc +0 -876
  37. data/old-docs/FORMATS.adoc +0 -867
  38. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  39. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  40. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  41. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  42. data/old-docs/MODES.adoc +0 -432
  43. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  44. data/old-docs/OPTIONS.adoc +0 -1387
  45. data/old-docs/PREPROCESSING.adoc +0 -491
  46. data/old-docs/README.old.adoc +0 -2831
  47. data/old-docs/RSPEC.adoc +0 -814
  48. data/old-docs/RUBY_API.adoc +0 -485
  49. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  50. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  51. data/old-docs/STRING_COMPARE.adoc +0 -345
  52. data/old-docs/TMP.adoc +0 -3384
  53. data/old-docs/TREE_DIFF.adoc +0 -1080
  54. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  55. data/old-docs/VERBOSE.adoc +0 -482
  56. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  57. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  58. data/scripts/analyze_current_state.rb +0 -85
  59. data/scripts/analyze_false_positives.rb +0 -114
  60. data/scripts/analyze_remaining_failures.rb +0 -105
  61. data/scripts/compare_current_failures.rb +0 -95
  62. data/scripts/compare_dom_tree_diff.rb +0 -158
  63. data/scripts/compare_failures.rb +0 -151
  64. data/scripts/debug_attribute_extraction.rb +0 -66
  65. data/scripts/debug_blocks_839.rb +0 -115
  66. data/scripts/debug_meta_matching.rb +0 -52
  67. data/scripts/debug_p_matching.rb +0 -192
  68. data/scripts/debug_signature_matching.rb +0 -118
  69. data/scripts/debug_sourcecode_124.rb +0 -32
  70. data/scripts/debug_whitespace_sensitive.rb +0 -192
  71. data/scripts/extract_false_positives.rb +0 -138
  72. data/scripts/find_actual_false_positives.rb +0 -125
  73. data/scripts/investigate_all_false_positives.rb +0 -161
  74. data/scripts/investigate_batch1.rb +0 -127
  75. data/scripts/investigate_classification.rb +0 -150
  76. data/scripts/investigate_classification_detailed.rb +0 -190
  77. data/scripts/investigate_common_failures.rb +0 -342
  78. data/scripts/investigate_false_negative.rb +0 -80
  79. data/scripts/investigate_false_positive.rb +0 -83
  80. data/scripts/investigate_false_positives.rb +0 -227
  81. data/scripts/investigate_false_positives_batch.rb +0 -163
  82. data/scripts/investigate_mixed_content.rb +0 -125
  83. data/scripts/investigate_remaining_16.rb +0 -214
  84. data/scripts/run_single_test.rb +0 -29
  85. data/scripts/test_all_false_positives.rb +0 -95
  86. data/scripts/test_attribute_details.rb +0 -61
  87. data/scripts/test_both_algorithms.rb +0 -49
  88. data/scripts/test_both_simple.rb +0 -49
  89. data/scripts/test_enhanced_semantic_output.rb +0 -125
  90. data/scripts/test_readme_examples.rb +0 -131
  91. data/scripts/test_semantic_tree_diff.rb +0 -99
  92. data/scripts/test_semantic_ux_improvements.rb +0 -135
  93. data/scripts/test_single_false_positive.rb +0 -119
  94. data/scripts/test_size_limits.rb +0 -99
  95. data/test_html_1.html +0 -21
  96. data/test_html_2.html +0 -21
  97. data/test_nokogiri.rb +0 -33
  98. data/test_normalize.rb +0 -45
@@ -1,85 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Parse test results from a log file
5
- def parse_failures(log_file)
6
- lines = File.readlines(log_file)
7
-
8
- failures = []
9
- lines.each do |line|
10
- # Strip ANSI color codes first
11
- clean_line = line.gsub(/\e\[\d+m/, "")
12
-
13
- # Match rspec failure format
14
- if clean_line =~ /^rspec (\.\/spec\/\S+:\d+)/
15
- failures << $1
16
- end
17
- end
18
-
19
- failures
20
- end
21
-
22
- # Main
23
- semantic_log = "/tmp/semantic_after_mixed_content_fix.log"
24
- dom_log = "/tmp/dom_baseline.log"
25
-
26
- puts "=" * 80
27
- puts "Current Semantic Tree Algorithm Status"
28
- puts "=" * 80
29
-
30
- if File.exist?(semantic_log)
31
- semantic_failures = parse_failures(semantic_log)
32
- puts "\nSemantic failures: #{semantic_failures.size}"
33
-
34
- if File.exist?(dom_log)
35
- dom_failures = parse_failures(dom_log)
36
- puts "DOM failures: #{dom_failures.size}"
37
-
38
- # Calculate differences
39
- false_positives = semantic_failures - dom_failures
40
- false_negatives = dom_failures - semantic_failures
41
- common = semantic_failures & dom_failures
42
-
43
- puts "\n#{'=' * 80}"
44
- puts "Comparison with DOM Baseline"
45
- puts "=" * 80
46
- puts "False Positives (semantic fails, DOM passes): #{false_positives.size}"
47
- puts "False Negatives (semantic passes, DOM fails): #{false_negatives.size}"
48
- puts "Common failures (both fail): #{common.size}"
49
-
50
- if false_positives.any?
51
- puts "\n#{'-' * 80}"
52
- puts "FALSE POSITIVES (Need to fix - #{false_positives.size}):"
53
- puts "-" * 80
54
- false_positives.sort.each { |f| puts " #{f}" }
55
- end
56
-
57
- if false_negatives.any?
58
- puts "\n#{'-' * 80}"
59
- puts "FALSE NEGATIVES (Investigate - #{false_negatives.size}):"
60
- puts "-" * 80
61
- false_negatives.sort.each { |f| puts " #{f}" }
62
- end
63
-
64
- # Progress tracking
65
- puts "\n#{'=' * 80}"
66
- puts "Progress Tracking"
67
- puts "=" * 80
68
- puts "Initial state: 62 failures (29 FP, 5 FN, 33 common)"
69
- puts "After metadata: 56 failures (23 FP, 5 FN, 33 common)"
70
- puts "After mixed content: #{semantic_failures.size} failures (#{false_positives.size} FP, #{false_negatives.size} FN, #{common.size} common)"
71
- puts "Target (DOM parity): #{dom_failures.size} failures (0 FP, 0 FN, #{dom_failures.size} common)"
72
-
73
- improvement = 56 - semantic_failures.size
74
- remaining = semantic_failures.size - dom_failures.size
75
- puts "\nImprovement: #{improvement} tests fixed"
76
- puts "Remaining gap: #{remaining} tests"
77
-
78
- else
79
- puts "\nWarning: DOM baseline not found at #{dom_log}"
80
- puts "Semantic failures:"
81
- semantic_failures.sort.each { |f| puts " #{f}" }
82
- end
83
- else
84
- puts "Error: Semantic log not found at #{semantic_log}"
85
- end
@@ -1,114 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Analyze false positive patterns to identify systematic issues
5
- # Usage: ruby scripts/analyze_false_positives.rb /tmp/semantic_failures_final.txt
6
-
7
- require "json"
8
-
9
- # Parse false positives from comparison results
10
- def parse_false_positives
11
- file = "/tmp/failure_comparison.txt"
12
- false_positives = []
13
- in_section = false
14
-
15
- File.readlines(file).each do |line|
16
- if line.include?("FALSE POSITIVES")
17
- in_section = true
18
- next
19
- elsif line.include?("FALSE NEGATIVES")
20
- break
21
- end
22
-
23
- next unless in_section
24
- next if line.strip.empty?
25
-
26
- # Parse line like "blocks_spec.rb:352"
27
- if line =~ /^\s*([a-z_]+_spec\.rb):(\d+)/
28
- false_positives << { file: $1, line: $2.to_i }
29
- end
30
- end
31
-
32
- false_positives
33
- end
34
-
35
- # Run a specific test to capture its output
36
- def run_test(spec_file, line_num)
37
- cmd = "cd /Users/mulgogi/src/mn/isodoc && CANON_ALGORITHM=semantic bundle exec rspec ./spec/isodoc/#{spec_file}:#{line_num} 2>&1"
38
- output = `#{cmd}`
39
-
40
- {
41
- spec: "#{spec_file}:#{line_num}",
42
- output: output,
43
- has_diff: output.include?("expected"),
44
- diff_preview: extract_diff_preview(output),
45
- }
46
- end
47
-
48
- def extract_diff_preview(output)
49
- lines = output.lines
50
- diff_start = lines.index { |l| l.include?("expected") || l.include?("Diff:") }
51
- return nil unless diff_start
52
-
53
- # Get 10 lines after the diff marker
54
- preview = lines[diff_start, 10].join
55
- preview.length > 500 ? "#{preview[0..500]}..." : preview
56
- end
57
-
58
- def main
59
- puts "Analyzing false positive patterns..."
60
- puts "=" * 80
61
-
62
- false_positives = parse_false_positives
63
- puts "\nFound #{false_positives.size} false positives to analyze"
64
-
65
- # Group by spec file
66
- by_file = false_positives.group_by { |fp| fp[:file] }
67
-
68
- puts "\nBreakdown by file:"
69
- by_file.sort_by { |_, v| -v.size }.each do |file, items|
70
- puts " #{file}: #{items.size} failures"
71
- end
72
-
73
- # Sample a few from each top category
74
- puts "\n#{'=' * 80}"
75
- puts "SAMPLING TOP FAILURES FOR PATTERN ANALYSIS"
76
- puts "=" * 80
77
-
78
- samples = []
79
-
80
- # Take first 2 from each top category
81
- by_file.sort_by { |_, v| -v.size }.take(5).each_value do |items|
82
- items.take(2).each do |item|
83
- puts "\n#{'-' * 80}"
84
- puts "Testing: #{item[:file]}:#{item[:line]}"
85
- puts "-" * 80
86
-
87
- result = run_test(item[:file], item[:line])
88
- samples << result
89
-
90
- if result[:has_diff]
91
- puts "\nDiff Preview:"
92
- puts result[:diff_preview]
93
- else
94
- puts "\nNo diff found in output"
95
- end
96
- end
97
- end
98
-
99
- # Save results
100
- output = {
101
- total_false_positives: false_positives.size,
102
- by_file: by_file.transform_values(&:size),
103
- samples: samples.map do |s|
104
- { spec: s[:spec], diff_preview: s[:diff_preview] }
105
- end,
106
- }
107
-
108
- File.write("/tmp/false_positive_analysis.json", JSON.pretty_generate(output))
109
- puts "\n#{'=' * 80}"
110
- puts "Analysis saved to /tmp/false_positive_analysis.json"
111
- puts "=" * 80
112
- end
113
-
114
- main if __FILE__ == $PROGRAM_NAME
@@ -1,105 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Compare semantic tree failures vs DOM diff failures
5
- # to identify remaining false positives
6
-
7
- # DOM diff failures (39 - the baseline/correct)
8
- dom_failures = [
9
- "blocks_notes_spec.rb:494",
10
- "blocks_provisions_spec.rb:4",
11
- "blocks_spec.rb:4",
12
- "blocks_spec.rb:1062",
13
- "cleanup_spec.rb:180",
14
- "cleanup_spec.rb:347",
15
- "figures_spec.rb:5",
16
- "figures_spec.rb:1662",
17
- "figures_spec.rb:1764",
18
- "figures_spec.rb:1815",
19
- "footnotes_spec.rb:5",
20
- "i18n_spec.rb:1644",
21
- "inline_spec.rb:610",
22
- "inline_spec.rb:726",
23
- "inline_spec.rb:2114",
24
- "lists_spec.rb:4",
25
- "lists_spec.rb:817",
26
- "postproc_spec.rb:1010",
27
- "postproc_spec.rb:1084",
28
- "postproc_word_spec.rb:89",
29
- "presentation_xml_metadata_spec.rb:75",
30
- "presentation_xml_spec.rb:454",
31
- "ref_identifier_spec.rb:446",
32
- "ref_identifier_spec.rb:602",
33
- "ref_identifier_spec.rb:671",
34
- "ref_identifier_spec.rb:872",
35
- "ref_spec.rb:4",
36
- "ref_spec.rb:1511",
37
- "ref_spec.rb:1705",
38
- "section_spec.rb:4",
39
- "section_spec.rb:736",
40
- "section_title_spec.rb:4",
41
- "sourcecode_spec.rb:4",
42
- "sourcecode_spec.rb:838",
43
- "table_debug_spec.rb:4",
44
- "table_spec.rb:4",
45
- "table_spec.rb:811",
46
- "table_spec.rb:1683",
47
- "table_spec.rb:1906",
48
- ].to_set
49
-
50
- # Read semantic failures from file
51
- semantic_failures = File.readlines("/tmp/semantic_failures.txt").map do |line|
52
- # Extract spec file and line number from rspec output
53
- if line =~ /rspec \.\/spec\/isodoc\/(.+?)# /
54
- $1.strip
55
- end
56
- end.compact.to_set
57
-
58
- puts "=" * 80
59
- puts "SEMANTIC TREE ALGORITHM - ANALYSIS AFTER FIX"
60
- puts "=" * 80
61
- puts
62
- puts "Statistics:"
63
- puts " DOM diff failures (baseline): #{dom_failures.size}"
64
- puts " Semantic tree failures (current): #{semantic_failures.size}"
65
- puts " False positives (semantic only): #{(semantic_failures - dom_failures).size}"
66
- puts " False negatives (DOM only): #{(dom_failures - semantic_failures).size}"
67
- puts
68
-
69
- # False positives (in semantic but not in DOM)
70
- false_positives = semantic_failures - dom_failures
71
- if false_positives.any?
72
- puts "FALSE POSITIVES (#{false_positives.size} remaining):"
73
- puts "-" * 80
74
- false_positives.sort.each do |failure|
75
- puts " • #{failure}"
76
- end
77
- puts
78
- end
79
-
80
- # False negatives (in DOM but not in semantic)
81
- false_negatives = dom_failures - semantic_failures
82
- if false_negatives.any?
83
- puts "FALSE NEGATIVES (#{false_negatives.size} tests):"
84
- puts "-" * 80
85
- false_negatives.sort.each do |failure|
86
- puts " • #{failure}"
87
- end
88
- puts
89
- end
90
-
91
- # Real failures (both agree)
92
- real_failures = dom_failures & semantic_failures
93
- puts "REAL FAILURES (#{real_failures.size} tests - both algorithms agree):"
94
- puts "-" * 80
95
- real_failures.sort.each do |failure|
96
- puts " • #{failure}"
97
- end
98
- puts
99
-
100
- puts "=" * 80
101
- puts "SUMMARY:"
102
- puts " ✅ Fixed false positives: #{46 - false_positives.size} tests"
103
- puts " ⚠️ Remaining false positives: #{false_positives.size} tests"
104
- puts " ⚠️ False negatives: #{false_negatives.size} tests"
105
- puts "=" * 80
@@ -1,95 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Direct comparison of current semantic vs DOM failures
5
- # Usage: ruby scripts/compare_current_failures.rb
6
-
7
- require "set"
8
-
9
- def parse_failures(file)
10
- failures = Set.new
11
- File.readlines(file).each do |line|
12
- # Format: "rspec ./spec/isodoc/blocks_notes_spec.rb:494 # ..."
13
- if line =~ %r{rspec \./spec/isodoc/([a-z_0-9]+_spec\.rb):(\d+)}
14
- failures << "#{$1}:#{$2}"
15
- end
16
- end
17
- failures
18
- end
19
-
20
- semantic_file = "/tmp/semantic_fresh.txt"
21
- dom_file = "/tmp/dom_fresh.txt"
22
-
23
- puts "Parsing failures..."
24
- semantic = parse_failures(semantic_file)
25
- dom = parse_failures(dom_file)
26
-
27
- puts "\n#{'=' * 80}"
28
- puts "CURRENT FAILURE STATE"
29
- puts "=" * 80
30
-
31
- puts "\nTotal failures:"
32
- puts " Semantic: #{semantic.size}"
33
- puts " DOM: #{dom.size}"
34
- puts " Gap: #{(semantic.size - dom.size).abs}"
35
-
36
- false_positives = semantic - dom # Semantic fails, DOM passes
37
- false_negatives = dom - semantic # DOM fails, Semantic passes
38
- common = semantic & dom # Both fail
39
-
40
- puts "\n#{'-' * 80}"
41
- puts "FALSE POSITIVES (Semantic fails, DOM passes): #{false_positives.size}"
42
- puts "-" * 80
43
- false_positives.sort.each { |f| puts " #{f}" }
44
-
45
- puts "\n#{'-' * 80}"
46
- puts "FALSE NEGATIVES (DOM fails, Semantic passes): #{false_negatives.size}"
47
- puts "-" * 80
48
- false_negatives.sort.each { |f| puts " #{f}" }
49
-
50
- puts "\n#{'-' * 80}"
51
- puts "COMMON FAILURES (Both fail): #{common.size}"
52
- puts "-" * 80
53
- puts "(Not listing #{common.size} common failures for brevity)"
54
-
55
- puts "\n#{'=' * 80}"
56
- puts "ANALYSIS"
57
- puts "=" * 80
58
-
59
- if false_positives.empty? && false_negatives.size == 1
60
- puts "\n✅ PERFECT PARITY ACHIEVED!"
61
- puts " - No false positives (semantic not too strict)"
62
- puts " - Only 1 false negative (acceptable difference)"
63
- puts " - This is the target state!"
64
- elsif false_positives.empty? && false_negatives.empty?
65
- puts "\n🎉 EXACT PARITY ACHIEVED!"
66
- puts " - Both algorithms have identical failures"
67
- puts " - #{common.size} common failures"
68
- elsif false_positives.size == 1 && false_negatives.empty?
69
- puts "\n⚠️ ONE FALSE POSITIVE AWAY FROM PARITY"
70
- puts " - Need to fix 1 case where semantic is too strict"
71
- puts " - Target: #{dom.size} failures for both algorithms"
72
- else
73
- puts "\n📊 Current Status:"
74
- puts " - #{false_positives.size} false positives to fix (semantic too strict)"
75
- puts " - #{false_negatives.size} false negatives to address (semantic too lenient)"
76
- puts " - Gap from DOM: #{(semantic.size - dom.size).abs} failures"
77
- end
78
-
79
- # Save detailed results
80
- output_file = "/tmp/current_failure_analysis.txt"
81
- File.open(output_file, "w") do |f|
82
- f.puts "CURRENT FAILURE ANALYSIS"
83
- f.puts "=" * 80
84
- f.puts "\nSemantic: #{semantic.size} failures"
85
- f.puts "DOM: #{dom.size} failures"
86
- f.puts "Gap: #{(semantic.size - dom.size).abs}"
87
- f.puts "\nFALSE POSITIVES (#{false_positives.size}):"
88
- false_positives.sort.each { |fp| f.puts fp }
89
- f.puts "\nFALSE NEGATIVES (#{false_negatives.size}):"
90
- false_negatives.sort.each { |fn| f.puts fn }
91
- f.puts "\nCOMMON FAILURES (#{common.size}):"
92
- common.sort.each { |cf| f.puts cf }
93
- end
94
-
95
- puts "\nDetailed results saved to: #{output_file}"
@@ -1,158 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require_relative "../lib/canon"
5
-
6
- # Sample IsoDoc-style XML with differences
7
- xml1 = <<~XML
8
- <iso-standard xmlns="http://riboseinc.com/isoxml" type="presentation">
9
- <preface>
10
- <clause type="toc" id="_" displayorder="1">
11
- <fmt-title id="_" depth="1">Table of contents</fmt-title>
12
- </clause>
13
- <foreword id="fwd" displayorder="2">
14
- <title id="_">Foreword</title>
15
- <fmt-title id="_" depth="1">
16
- <semx element="title" source="_">Foreword</semx>
17
- </fmt-title>
18
- <figure id="F" autonum="1">
19
- <fmt-name id="_">
20
- <span class="fmt-caption-label">
21
- <span class="fmt-element-name">Figure</span>
22
- <semx element="autonum" source="F">1</semx>
23
- </span>
24
- </fmt-name>
25
- <note id="FB" autonum="">
26
- <fmt-name id="_">
27
- <span class="fmt-caption-label">
28
- <span class="fmt-element-name">NOTE</span>
29
- </span>
30
- </fmt-name>
31
- <p>XYZ</p>
32
- </note>
33
- </figure>
34
- </foreword>
35
- </preface>
36
- </iso-standard>
37
- XML
38
-
39
- xml2 = <<~XML
40
- <iso-standard xmlns="http://riboseinc.com/isoxml" type="presentation">
41
- <preface>
42
- <clause type="toc" id="_" displayorder="1">
43
- <fmt-title id="_" depth="1">Table of contents</fmt-title>
44
- </clause>
45
- <foreword id="fwd" displayorder="2">
46
- <title id="_">Introduction</title>
47
- <fmt-title id="_" depth="1">
48
- <semx element="title" source="_">Introduction</semx>
49
- </fmt-title>
50
- <figure id="F" autonum="2">
51
- <fmt-name id="_">
52
- <span class="fmt-caption-label">
53
- <span class="fmt-element-name">Figure</span>
54
- <semx element="autonum" source="F">2</semx>
55
- </span>
56
- </fmt-name>
57
- <note id="FB" autonum="">
58
- <fmt-name id="_">
59
- <span class="fmt-caption-label">
60
- <span class="fmt-element-name">NOTE</span>
61
- </span>
62
- </fmt-name>
63
- <p>ABC</p>
64
- </note>
65
- </figure>
66
- <p id="new-para">This is a new paragraph.</p>
67
- </foreword>
68
- </preface>
69
- </iso-standard>
70
- XML
71
-
72
- puts "=" * 80
73
- puts "DOM DIFF ALGORITHM COMPARISON"
74
- puts "=" * 80
75
- puts
76
-
77
- dom_result = Canon::Comparison.equivalent?(
78
- xml1, xml2,
79
- diff_algorithm: :dom,
80
- verbose: true
81
- )
82
-
83
- puts "Algorithm: DOM DIFF"
84
- puts "Differences count: #{dom_result.differences.length}"
85
- puts "Operations count: #{dom_result.operations.length}"
86
- puts
87
- puts "Sample differences:"
88
- dom_result.differences.first(5).each_with_index do |diff, i|
89
- puts " #{i + 1}. Dimension: #{diff.dimension}"
90
- puts " Expected: #{diff.value1.to_s[0..100]}" if diff.respond_to?(:value1) && diff.value1
91
- puts " Actual: #{diff.value2.to_s[0..100]}" if diff.respond_to?(:value2) && diff.value2
92
- puts
93
- end
94
-
95
- puts "\n#{'=' * 80}"
96
- puts "TREE (SEMANTIC) DIFF ALGORITHM COMPARISON"
97
- puts "=" * 80
98
- puts
99
-
100
- tree_result = Canon::Comparison.equivalent?(
101
- xml1, xml2,
102
- diff_algorithm: :semantic,
103
- verbose: true
104
- )
105
-
106
- puts "Algorithm: SEMANTIC TREE DIFF"
107
- puts "Differences count: #{tree_result.differences.length}"
108
- puts "Operations count: #{tree_result.operations.length}"
109
- puts
110
-
111
- if tree_result.match_options[:tree_diff_statistics]
112
- stats = tree_result.match_options[:tree_diff_statistics]
113
- puts "Tree Statistics:"
114
- puts " - Tree1 nodes: #{stats[:tree1_nodes]}"
115
- puts " - Tree2 nodes: #{stats[:tree2_nodes]}"
116
- puts " - Total matches: #{stats[:total_matches]}"
117
- puts " - Match ratio (tree1): #{(stats[:match_ratio_tree1] * 100).round(1)}%"
118
- puts " - Match ratio (tree2): #{(stats[:match_ratio_tree2] * 100).round(1)}%"
119
- puts
120
- end
121
-
122
- puts "Sample differences:"
123
- tree_result.differences.first(5).each_with_index do |diff, i|
124
- puts " #{i + 1}. Dimension: #{diff.dimension}"
125
- puts " Expected: #{diff.value1.to_s[0..100]}" if diff.respond_to?(:value1) && diff.value1
126
- puts " Actual: #{diff.value2.to_s[0..100]}" if diff.respond_to?(:value2) && diff.value2
127
- puts
128
- end
129
-
130
- puts "\n#{'=' * 80}"
131
- puts "TREE DIFF OPERATIONS"
132
- puts "=" * 80
133
- puts
134
-
135
- tree_result.operations.first(10).each_with_index do |op, i|
136
- puts " #{i + 1}. #{op.type.to_s.upcase}"
137
- puts " Node: #{op.node.label}" if op.node
138
- puts " Details: #{op.inspect[0..150]}"
139
- puts
140
- end
141
-
142
- puts "\n#{'=' * 80}"
143
- puts "COMPARISON SUMMARY"
144
- puts "=" * 80
145
- puts
146
-
147
- puts "DOM Diff:"
148
- puts " - Differences count: #{dom_result.differences.length}"
149
- puts " - Operations: #{dom_result.operations.length}"
150
- puts " - Has detailed diff information: #{!dom_result.differences.empty?}"
151
- puts
152
-
153
- puts "Tree Diff:"
154
- puts " - Differences count: #{tree_result.differences.length}"
155
- puts " - Operations: #{tree_result.operations.length}"
156
- puts " - Has detailed diff information: #{!tree_result.differences.empty?}"
157
- puts " - Has tree diff operations: #{!tree_result.operations.empty?}"
158
- puts " - Has tree statistics: #{!tree_result.match_options[:tree_diff_statistics].nil?}"