canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -1,85 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Parse test results from a log file
5
- def parse_failures(log_file)
6
- lines = File.readlines(log_file)
7
-
8
- failures = []
9
- lines.each do |line|
10
- # Strip ANSI color codes first
11
- clean_line = line.gsub(/\e\[\d+m/, "")
12
-
13
- # Match rspec failure format
14
- if clean_line =~ /^rspec (\.\/spec\/\S+:\d+)/
15
- failures << $1
16
- end
17
- end
18
-
19
- failures
20
- end
21
-
22
- # Main
23
- semantic_log = "/tmp/semantic_after_mixed_content_fix.log"
24
- dom_log = "/tmp/dom_baseline.log"
25
-
26
- puts "=" * 80
27
- puts "Current Semantic Tree Algorithm Status"
28
- puts "=" * 80
29
-
30
- if File.exist?(semantic_log)
31
- semantic_failures = parse_failures(semantic_log)
32
- puts "\nSemantic failures: #{semantic_failures.size}"
33
-
34
- if File.exist?(dom_log)
35
- dom_failures = parse_failures(dom_log)
36
- puts "DOM failures: #{dom_failures.size}"
37
-
38
- # Calculate differences
39
- false_positives = semantic_failures - dom_failures
40
- false_negatives = dom_failures - semantic_failures
41
- common = semantic_failures & dom_failures
42
-
43
- puts "\n#{'=' * 80}"
44
- puts "Comparison with DOM Baseline"
45
- puts "=" * 80
46
- puts "False Positives (semantic fails, DOM passes): #{false_positives.size}"
47
- puts "False Negatives (semantic passes, DOM fails): #{false_negatives.size}"
48
- puts "Common failures (both fail): #{common.size}"
49
-
50
- if false_positives.any?
51
- puts "\n#{'-' * 80}"
52
- puts "FALSE POSITIVES (Need to fix - #{false_positives.size}):"
53
- puts "-" * 80
54
- false_positives.sort.each { |f| puts " #{f}" }
55
- end
56
-
57
- if false_negatives.any?
58
- puts "\n#{'-' * 80}"
59
- puts "FALSE NEGATIVES (Investigate - #{false_negatives.size}):"
60
- puts "-" * 80
61
- false_negatives.sort.each { |f| puts " #{f}" }
62
- end
63
-
64
- # Progress tracking
65
- puts "\n#{'=' * 80}"
66
- puts "Progress Tracking"
67
- puts "=" * 80
68
- puts "Initial state: 62 failures (29 FP, 5 FN, 33 common)"
69
- puts "After metadata: 56 failures (23 FP, 5 FN, 33 common)"
70
- puts "After mixed content: #{semantic_failures.size} failures (#{false_positives.size} FP, #{false_negatives.size} FN, #{common.size} common)"
71
- puts "Target (DOM parity): #{dom_failures.size} failures (0 FP, 0 FN, #{dom_failures.size} common)"
72
-
73
- improvement = 56 - semantic_failures.size
74
- remaining = semantic_failures.size - dom_failures.size
75
- puts "\nImprovement: #{improvement} tests fixed"
76
- puts "Remaining gap: #{remaining} tests"
77
-
78
- else
79
- puts "\nWarning: DOM baseline not found at #{dom_log}"
80
- puts "Semantic failures:"
81
- semantic_failures.sort.each { |f| puts " #{f}" }
82
- end
83
- else
84
- puts "Error: Semantic log not found at #{semantic_log}"
85
- end
@@ -1,114 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Analyze false positive patterns to identify systematic issues
5
- # Usage: ruby scripts/analyze_false_positives.rb /tmp/semantic_failures_final.txt
6
-
7
- require "json"
8
-
9
- # Parse false positives from comparison results
10
- def parse_false_positives
11
- file = "/tmp/failure_comparison.txt"
12
- false_positives = []
13
- in_section = false
14
-
15
- File.readlines(file).each do |line|
16
- if line.include?("FALSE POSITIVES")
17
- in_section = true
18
- next
19
- elsif line.include?("FALSE NEGATIVES")
20
- break
21
- end
22
-
23
- next unless in_section
24
- next if line.strip.empty?
25
-
26
- # Parse line like "blocks_spec.rb:352"
27
- if line =~ /^\s*([a-z_]+_spec\.rb):(\d+)/
28
- false_positives << { file: $1, line: $2.to_i }
29
- end
30
- end
31
-
32
- false_positives
33
- end
34
-
35
- # Run a specific test to capture its output
36
- def run_test(spec_file, line_num)
37
- cmd = "cd /Users/mulgogi/src/mn/isodoc && CANON_ALGORITHM=semantic bundle exec rspec ./spec/isodoc/#{spec_file}:#{line_num} 2>&1"
38
- output = `#{cmd}`
39
-
40
- {
41
- spec: "#{spec_file}:#{line_num}",
42
- output: output,
43
- has_diff: output.include?("expected"),
44
- diff_preview: extract_diff_preview(output),
45
- }
46
- end
47
-
48
- def extract_diff_preview(output)
49
- lines = output.lines
50
- diff_start = lines.index { |l| l.include?("expected") || l.include?("Diff:") }
51
- return nil unless diff_start
52
-
53
- # Get 10 lines after the diff marker
54
- preview = lines[diff_start, 10].join
55
- preview.length > 500 ? "#{preview[0..500]}..." : preview
56
- end
57
-
58
- def main
59
- puts "Analyzing false positive patterns..."
60
- puts "=" * 80
61
-
62
- false_positives = parse_false_positives
63
- puts "\nFound #{false_positives.size} false positives to analyze"
64
-
65
- # Group by spec file
66
- by_file = false_positives.group_by { |fp| fp[:file] }
67
-
68
- puts "\nBreakdown by file:"
69
- by_file.sort_by { |_, v| -v.size }.each do |file, items|
70
- puts " #{file}: #{items.size} failures"
71
- end
72
-
73
- # Sample a few from each top category
74
- puts "\n#{'=' * 80}"
75
- puts "SAMPLING TOP FAILURES FOR PATTERN ANALYSIS"
76
- puts "=" * 80
77
-
78
- samples = []
79
-
80
- # Take first 2 from each top category
81
- by_file.sort_by { |_, v| -v.size }.take(5).each_value do |items|
82
- items.take(2).each do |item|
83
- puts "\n#{'-' * 80}"
84
- puts "Testing: #{item[:file]}:#{item[:line]}"
85
- puts "-" * 80
86
-
87
- result = run_test(item[:file], item[:line])
88
- samples << result
89
-
90
- if result[:has_diff]
91
- puts "\nDiff Preview:"
92
- puts result[:diff_preview]
93
- else
94
- puts "\nNo diff found in output"
95
- end
96
- end
97
- end
98
-
99
- # Save results
100
- output = {
101
- total_false_positives: false_positives.size,
102
- by_file: by_file.transform_values(&:size),
103
- samples: samples.map do |s|
104
- { spec: s[:spec], diff_preview: s[:diff_preview] }
105
- end,
106
- }
107
-
108
- File.write("/tmp/false_positive_analysis.json", JSON.pretty_generate(output))
109
- puts "\n#{'=' * 80}"
110
- puts "Analysis saved to /tmp/false_positive_analysis.json"
111
- puts "=" * 80
112
- end
113
-
114
- main if __FILE__ == $PROGRAM_NAME
@@ -1,105 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Compare semantic tree failures vs DOM diff failures
5
- # to identify remaining false positives
6
-
7
- # DOM diff failures (39 - the baseline/correct)
8
- dom_failures = [
9
- "blocks_notes_spec.rb:494",
10
- "blocks_provisions_spec.rb:4",
11
- "blocks_spec.rb:4",
12
- "blocks_spec.rb:1062",
13
- "cleanup_spec.rb:180",
14
- "cleanup_spec.rb:347",
15
- "figures_spec.rb:5",
16
- "figures_spec.rb:1662",
17
- "figures_spec.rb:1764",
18
- "figures_spec.rb:1815",
19
- "footnotes_spec.rb:5",
20
- "i18n_spec.rb:1644",
21
- "inline_spec.rb:610",
22
- "inline_spec.rb:726",
23
- "inline_spec.rb:2114",
24
- "lists_spec.rb:4",
25
- "lists_spec.rb:817",
26
- "postproc_spec.rb:1010",
27
- "postproc_spec.rb:1084",
28
- "postproc_word_spec.rb:89",
29
- "presentation_xml_metadata_spec.rb:75",
30
- "presentation_xml_spec.rb:454",
31
- "ref_identifier_spec.rb:446",
32
- "ref_identifier_spec.rb:602",
33
- "ref_identifier_spec.rb:671",
34
- "ref_identifier_spec.rb:872",
35
- "ref_spec.rb:4",
36
- "ref_spec.rb:1511",
37
- "ref_spec.rb:1705",
38
- "section_spec.rb:4",
39
- "section_spec.rb:736",
40
- "section_title_spec.rb:4",
41
- "sourcecode_spec.rb:4",
42
- "sourcecode_spec.rb:838",
43
- "table_debug_spec.rb:4",
44
- "table_spec.rb:4",
45
- "table_spec.rb:811",
46
- "table_spec.rb:1683",
47
- "table_spec.rb:1906",
48
- ].to_set
49
-
50
- # Read semantic failures from file
51
- semantic_failures = File.readlines("/tmp/semantic_failures.txt").map do |line|
52
- # Extract spec file and line number from rspec output
53
- if line =~ /rspec \.\/spec\/isodoc\/(.+?)# /
54
- $1.strip
55
- end
56
- end.compact.to_set
57
-
58
- puts "=" * 80
59
- puts "SEMANTIC TREE ALGORITHM - ANALYSIS AFTER FIX"
60
- puts "=" * 80
61
- puts
62
- puts "Statistics:"
63
- puts " DOM diff failures (baseline): #{dom_failures.size}"
64
- puts " Semantic tree failures (current): #{semantic_failures.size}"
65
- puts " False positives (semantic only): #{(semantic_failures - dom_failures).size}"
66
- puts " False negatives (DOM only): #{(dom_failures - semantic_failures).size}"
67
- puts
68
-
69
- # False positives (in semantic but not in DOM)
70
- false_positives = semantic_failures - dom_failures
71
- if false_positives.any?
72
- puts "FALSE POSITIVES (#{false_positives.size} remaining):"
73
- puts "-" * 80
74
- false_positives.sort.each do |failure|
75
- puts " • #{failure}"
76
- end
77
- puts
78
- end
79
-
80
- # False negatives (in DOM but not in semantic)
81
- false_negatives = dom_failures - semantic_failures
82
- if false_negatives.any?
83
- puts "FALSE NEGATIVES (#{false_negatives.size} tests):"
84
- puts "-" * 80
85
- false_negatives.sort.each do |failure|
86
- puts " • #{failure}"
87
- end
88
- puts
89
- end
90
-
91
- # Real failures (both agree)
92
- real_failures = dom_failures & semantic_failures
93
- puts "REAL FAILURES (#{real_failures.size} tests - both algorithms agree):"
94
- puts "-" * 80
95
- real_failures.sort.each do |failure|
96
- puts " • #{failure}"
97
- end
98
- puts
99
-
100
- puts "=" * 80
101
- puts "SUMMARY:"
102
- puts " ✅ Fixed false positives: #{46 - false_positives.size} tests"
103
- puts " ⚠️ Remaining false positives: #{false_positives.size} tests"
104
- puts " ⚠️ False negatives: #{false_negatives.size} tests"
105
- puts "=" * 80
@@ -1,95 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Direct comparison of current semantic vs DOM failures
5
- # Usage: ruby scripts/compare_current_failures.rb
6
-
7
- require "set"
8
-
9
- def parse_failures(file)
10
- failures = Set.new
11
- File.readlines(file).each do |line|
12
- # Format: "rspec ./spec/isodoc/blocks_notes_spec.rb:494 # ..."
13
- if line =~ %r{rspec \./spec/isodoc/([a-z_0-9]+_spec\.rb):(\d+)}
14
- failures << "#{$1}:#{$2}"
15
- end
16
- end
17
- failures
18
- end
19
-
20
- semantic_file = "/tmp/semantic_fresh.txt"
21
- dom_file = "/tmp/dom_fresh.txt"
22
-
23
- puts "Parsing failures..."
24
- semantic = parse_failures(semantic_file)
25
- dom = parse_failures(dom_file)
26
-
27
- puts "\n#{'=' * 80}"
28
- puts "CURRENT FAILURE STATE"
29
- puts "=" * 80
30
-
31
- puts "\nTotal failures:"
32
- puts " Semantic: #{semantic.size}"
33
- puts " DOM: #{dom.size}"
34
- puts " Gap: #{(semantic.size - dom.size).abs}"
35
-
36
- false_positives = semantic - dom # Semantic fails, DOM passes
37
- false_negatives = dom - semantic # DOM fails, Semantic passes
38
- common = semantic & dom # Both fail
39
-
40
- puts "\n#{'-' * 80}"
41
- puts "FALSE POSITIVES (Semantic fails, DOM passes): #{false_positives.size}"
42
- puts "-" * 80
43
- false_positives.sort.each { |f| puts " #{f}" }
44
-
45
- puts "\n#{'-' * 80}"
46
- puts "FALSE NEGATIVES (DOM fails, Semantic passes): #{false_negatives.size}"
47
- puts "-" * 80
48
- false_negatives.sort.each { |f| puts " #{f}" }
49
-
50
- puts "\n#{'-' * 80}"
51
- puts "COMMON FAILURES (Both fail): #{common.size}"
52
- puts "-" * 80
53
- puts "(Not listing #{common.size} common failures for brevity)"
54
-
55
- puts "\n#{'=' * 80}"
56
- puts "ANALYSIS"
57
- puts "=" * 80
58
-
59
- if false_positives.empty? && false_negatives.size == 1
60
- puts "\n✅ PERFECT PARITY ACHIEVED!"
61
- puts " - No false positives (semantic not too strict)"
62
- puts " - Only 1 false negative (acceptable difference)"
63
- puts " - This is the target state!"
64
- elsif false_positives.empty? && false_negatives.empty?
65
- puts "\n🎉 EXACT PARITY ACHIEVED!"
66
- puts " - Both algorithms have identical failures"
67
- puts " - #{common.size} common failures"
68
- elsif false_positives.size == 1 && false_negatives.empty?
69
- puts "\n⚠️ ONE FALSE POSITIVE AWAY FROM PARITY"
70
- puts " - Need to fix 1 case where semantic is too strict"
71
- puts " - Target: #{dom.size} failures for both algorithms"
72
- else
73
- puts "\n📊 Current Status:"
74
- puts " - #{false_positives.size} false positives to fix (semantic too strict)"
75
- puts " - #{false_negatives.size} false negatives to address (semantic too lenient)"
76
- puts " - Gap from DOM: #{(semantic.size - dom.size).abs} failures"
77
- end
78
-
79
- # Save detailed results
80
- output_file = "/tmp/current_failure_analysis.txt"
81
- File.open(output_file, "w") do |f|
82
- f.puts "CURRENT FAILURE ANALYSIS"
83
- f.puts "=" * 80
84
- f.puts "\nSemantic: #{semantic.size} failures"
85
- f.puts "DOM: #{dom.size} failures"
86
- f.puts "Gap: #{(semantic.size - dom.size).abs}"
87
- f.puts "\nFALSE POSITIVES (#{false_positives.size}):"
88
- false_positives.sort.each { |fp| f.puts fp }
89
- f.puts "\nFALSE NEGATIVES (#{false_negatives.size}):"
90
- false_negatives.sort.each { |fn| f.puts fn }
91
- f.puts "\nCOMMON FAILURES (#{common.size}):"
92
- common.sort.each { |cf| f.puts cf }
93
- end
94
-
95
- puts "\nDetailed results saved to: #{output_file}"
@@ -1,158 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require_relative "../lib/canon"
5
-
6
- # Sample IsoDoc-style XML with differences
7
- xml1 = <<~XML
8
- <iso-standard xmlns="http://riboseinc.com/isoxml" type="presentation">
9
- <preface>
10
- <clause type="toc" id="_" displayorder="1">
11
- <fmt-title id="_" depth="1">Table of contents</fmt-title>
12
- </clause>
13
- <foreword id="fwd" displayorder="2">
14
- <title id="_">Foreword</title>
15
- <fmt-title id="_" depth="1">
16
- <semx element="title" source="_">Foreword</semx>
17
- </fmt-title>
18
- <figure id="F" autonum="1">
19
- <fmt-name id="_">
20
- <span class="fmt-caption-label">
21
- <span class="fmt-element-name">Figure</span>
22
- <semx element="autonum" source="F">1</semx>
23
- </span>
24
- </fmt-name>
25
- <note id="FB" autonum="">
26
- <fmt-name id="_">
27
- <span class="fmt-caption-label">
28
- <span class="fmt-element-name">NOTE</span>
29
- </span>
30
- </fmt-name>
31
- <p>XYZ</p>
32
- </note>
33
- </figure>
34
- </foreword>
35
- </preface>
36
- </iso-standard>
37
- XML
38
-
39
- xml2 = <<~XML
40
- <iso-standard xmlns="http://riboseinc.com/isoxml" type="presentation">
41
- <preface>
42
- <clause type="toc" id="_" displayorder="1">
43
- <fmt-title id="_" depth="1">Table of contents</fmt-title>
44
- </clause>
45
- <foreword id="fwd" displayorder="2">
46
- <title id="_">Introduction</title>
47
- <fmt-title id="_" depth="1">
48
- <semx element="title" source="_">Introduction</semx>
49
- </fmt-title>
50
- <figure id="F" autonum="2">
51
- <fmt-name id="_">
52
- <span class="fmt-caption-label">
53
- <span class="fmt-element-name">Figure</span>
54
- <semx element="autonum" source="F">2</semx>
55
- </span>
56
- </fmt-name>
57
- <note id="FB" autonum="">
58
- <fmt-name id="_">
59
- <span class="fmt-caption-label">
60
- <span class="fmt-element-name">NOTE</span>
61
- </span>
62
- </fmt-name>
63
- <p>ABC</p>
64
- </note>
65
- </figure>
66
- <p id="new-para">This is a new paragraph.</p>
67
- </foreword>
68
- </preface>
69
- </iso-standard>
70
- XML
71
-
72
- puts "=" * 80
73
- puts "DOM DIFF ALGORITHM COMPARISON"
74
- puts "=" * 80
75
- puts
76
-
77
- dom_result = Canon::Comparison.equivalent?(
78
- xml1, xml2,
79
- diff_algorithm: :dom,
80
- verbose: true
81
- )
82
-
83
- puts "Algorithm: DOM DIFF"
84
- puts "Differences count: #{dom_result.differences.length}"
85
- puts "Operations count: #{dom_result.operations.length}"
86
- puts
87
- puts "Sample differences:"
88
- dom_result.differences.first(5).each_with_index do |diff, i|
89
- puts " #{i + 1}. Dimension: #{diff.dimension}"
90
- puts " Expected: #{diff.value1.to_s[0..100]}" if diff.respond_to?(:value1) && diff.value1
91
- puts " Actual: #{diff.value2.to_s[0..100]}" if diff.respond_to?(:value2) && diff.value2
92
- puts
93
- end
94
-
95
- puts "\n#{'=' * 80}"
96
- puts "TREE (SEMANTIC) DIFF ALGORITHM COMPARISON"
97
- puts "=" * 80
98
- puts
99
-
100
- tree_result = Canon::Comparison.equivalent?(
101
- xml1, xml2,
102
- diff_algorithm: :semantic,
103
- verbose: true
104
- )
105
-
106
- puts "Algorithm: SEMANTIC TREE DIFF"
107
- puts "Differences count: #{tree_result.differences.length}"
108
- puts "Operations count: #{tree_result.operations.length}"
109
- puts
110
-
111
- if tree_result.match_options[:tree_diff_statistics]
112
- stats = tree_result.match_options[:tree_diff_statistics]
113
- puts "Tree Statistics:"
114
- puts " - Tree1 nodes: #{stats[:tree1_nodes]}"
115
- puts " - Tree2 nodes: #{stats[:tree2_nodes]}"
116
- puts " - Total matches: #{stats[:total_matches]}"
117
- puts " - Match ratio (tree1): #{(stats[:match_ratio_tree1] * 100).round(1)}%"
118
- puts " - Match ratio (tree2): #{(stats[:match_ratio_tree2] * 100).round(1)}%"
119
- puts
120
- end
121
-
122
- puts "Sample differences:"
123
- tree_result.differences.first(5).each_with_index do |diff, i|
124
- puts " #{i + 1}. Dimension: #{diff.dimension}"
125
- puts " Expected: #{diff.value1.to_s[0..100]}" if diff.respond_to?(:value1) && diff.value1
126
- puts " Actual: #{diff.value2.to_s[0..100]}" if diff.respond_to?(:value2) && diff.value2
127
- puts
128
- end
129
-
130
- puts "\n#{'=' * 80}"
131
- puts "TREE DIFF OPERATIONS"
132
- puts "=" * 80
133
- puts
134
-
135
- tree_result.operations.first(10).each_with_index do |op, i|
136
- puts " #{i + 1}. #{op.type.to_s.upcase}"
137
- puts " Node: #{op.node.label}" if op.node
138
- puts " Details: #{op.inspect[0..150]}"
139
- puts
140
- end
141
-
142
- puts "\n#{'=' * 80}"
143
- puts "COMPARISON SUMMARY"
144
- puts "=" * 80
145
- puts
146
-
147
- puts "DOM Diff:"
148
- puts " - Differences count: #{dom_result.differences.length}"
149
- puts " - Operations: #{dom_result.operations.length}"
150
- puts " - Has detailed diff information: #{!dom_result.differences.empty?}"
151
- puts
152
-
153
- puts "Tree Diff:"
154
- puts " - Differences count: #{tree_result.differences.length}"
155
- puts " - Operations: #{tree_result.operations.length}"
156
- puts " - Has detailed diff information: #{!tree_result.differences.empty?}"
157
- puts " - Has tree diff operations: #{!tree_result.operations.empty?}"
158
- puts " - Has tree statistics: #{!tree_result.match_options[:tree_diff_statistics].nil?}"