canon 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +83 -22
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +196 -24
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/markup_comparator.rb +109 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +240 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +119 -5
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +4 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Parse test results from a log file
|
|
5
|
-
def parse_failures(log_file)
|
|
6
|
-
lines = File.readlines(log_file)
|
|
7
|
-
|
|
8
|
-
failures = []
|
|
9
|
-
lines.each do |line|
|
|
10
|
-
# Strip ANSI color codes first
|
|
11
|
-
clean_line = line.gsub(/\e\[\d+m/, "")
|
|
12
|
-
|
|
13
|
-
# Match rspec failure format
|
|
14
|
-
if clean_line =~ /^rspec (\.\/spec\/\S+:\d+)/
|
|
15
|
-
failures << $1
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
failures
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
# Main
|
|
23
|
-
semantic_log = "/tmp/semantic_after_mixed_content_fix.log"
|
|
24
|
-
dom_log = "/tmp/dom_baseline.log"
|
|
25
|
-
|
|
26
|
-
puts "=" * 80
|
|
27
|
-
puts "Current Semantic Tree Algorithm Status"
|
|
28
|
-
puts "=" * 80
|
|
29
|
-
|
|
30
|
-
if File.exist?(semantic_log)
|
|
31
|
-
semantic_failures = parse_failures(semantic_log)
|
|
32
|
-
puts "\nSemantic failures: #{semantic_failures.size}"
|
|
33
|
-
|
|
34
|
-
if File.exist?(dom_log)
|
|
35
|
-
dom_failures = parse_failures(dom_log)
|
|
36
|
-
puts "DOM failures: #{dom_failures.size}"
|
|
37
|
-
|
|
38
|
-
# Calculate differences
|
|
39
|
-
false_positives = semantic_failures - dom_failures
|
|
40
|
-
false_negatives = dom_failures - semantic_failures
|
|
41
|
-
common = semantic_failures & dom_failures
|
|
42
|
-
|
|
43
|
-
puts "\n#{'=' * 80}"
|
|
44
|
-
puts "Comparison with DOM Baseline"
|
|
45
|
-
puts "=" * 80
|
|
46
|
-
puts "False Positives (semantic fails, DOM passes): #{false_positives.size}"
|
|
47
|
-
puts "False Negatives (semantic passes, DOM fails): #{false_negatives.size}"
|
|
48
|
-
puts "Common failures (both fail): #{common.size}"
|
|
49
|
-
|
|
50
|
-
if false_positives.any?
|
|
51
|
-
puts "\n#{'-' * 80}"
|
|
52
|
-
puts "FALSE POSITIVES (Need to fix - #{false_positives.size}):"
|
|
53
|
-
puts "-" * 80
|
|
54
|
-
false_positives.sort.each { |f| puts " #{f}" }
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
if false_negatives.any?
|
|
58
|
-
puts "\n#{'-' * 80}"
|
|
59
|
-
puts "FALSE NEGATIVES (Investigate - #{false_negatives.size}):"
|
|
60
|
-
puts "-" * 80
|
|
61
|
-
false_negatives.sort.each { |f| puts " #{f}" }
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Progress tracking
|
|
65
|
-
puts "\n#{'=' * 80}"
|
|
66
|
-
puts "Progress Tracking"
|
|
67
|
-
puts "=" * 80
|
|
68
|
-
puts "Initial state: 62 failures (29 FP, 5 FN, 33 common)"
|
|
69
|
-
puts "After metadata: 56 failures (23 FP, 5 FN, 33 common)"
|
|
70
|
-
puts "After mixed content: #{semantic_failures.size} failures (#{false_positives.size} FP, #{false_negatives.size} FN, #{common.size} common)"
|
|
71
|
-
puts "Target (DOM parity): #{dom_failures.size} failures (0 FP, 0 FN, #{dom_failures.size} common)"
|
|
72
|
-
|
|
73
|
-
improvement = 56 - semantic_failures.size
|
|
74
|
-
remaining = semantic_failures.size - dom_failures.size
|
|
75
|
-
puts "\nImprovement: #{improvement} tests fixed"
|
|
76
|
-
puts "Remaining gap: #{remaining} tests"
|
|
77
|
-
|
|
78
|
-
else
|
|
79
|
-
puts "\nWarning: DOM baseline not found at #{dom_log}"
|
|
80
|
-
puts "Semantic failures:"
|
|
81
|
-
semantic_failures.sort.each { |f| puts " #{f}" }
|
|
82
|
-
end
|
|
83
|
-
else
|
|
84
|
-
puts "Error: Semantic log not found at #{semantic_log}"
|
|
85
|
-
end
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Analyze false positive patterns to identify systematic issues
|
|
5
|
-
# Usage: ruby scripts/analyze_false_positives.rb /tmp/semantic_failures_final.txt
|
|
6
|
-
|
|
7
|
-
require "json"
|
|
8
|
-
|
|
9
|
-
# Parse false positives from comparison results
|
|
10
|
-
def parse_false_positives
|
|
11
|
-
file = "/tmp/failure_comparison.txt"
|
|
12
|
-
false_positives = []
|
|
13
|
-
in_section = false
|
|
14
|
-
|
|
15
|
-
File.readlines(file).each do |line|
|
|
16
|
-
if line.include?("FALSE POSITIVES")
|
|
17
|
-
in_section = true
|
|
18
|
-
next
|
|
19
|
-
elsif line.include?("FALSE NEGATIVES")
|
|
20
|
-
break
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
next unless in_section
|
|
24
|
-
next if line.strip.empty?
|
|
25
|
-
|
|
26
|
-
# Parse line like "blocks_spec.rb:352"
|
|
27
|
-
if line =~ /^\s*([a-z_]+_spec\.rb):(\d+)/
|
|
28
|
-
false_positives << { file: $1, line: $2.to_i }
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
false_positives
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Run a specific test to capture its output
|
|
36
|
-
def run_test(spec_file, line_num)
|
|
37
|
-
cmd = "cd /Users/mulgogi/src/mn/isodoc && CANON_ALGORITHM=semantic bundle exec rspec ./spec/isodoc/#{spec_file}:#{line_num} 2>&1"
|
|
38
|
-
output = `#{cmd}`
|
|
39
|
-
|
|
40
|
-
{
|
|
41
|
-
spec: "#{spec_file}:#{line_num}",
|
|
42
|
-
output: output,
|
|
43
|
-
has_diff: output.include?("expected"),
|
|
44
|
-
diff_preview: extract_diff_preview(output),
|
|
45
|
-
}
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def extract_diff_preview(output)
|
|
49
|
-
lines = output.lines
|
|
50
|
-
diff_start = lines.index { |l| l.include?("expected") || l.include?("Diff:") }
|
|
51
|
-
return nil unless diff_start
|
|
52
|
-
|
|
53
|
-
# Get 10 lines after the diff marker
|
|
54
|
-
preview = lines[diff_start, 10].join
|
|
55
|
-
preview.length > 500 ? "#{preview[0..500]}..." : preview
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def main
|
|
59
|
-
puts "Analyzing false positive patterns..."
|
|
60
|
-
puts "=" * 80
|
|
61
|
-
|
|
62
|
-
false_positives = parse_false_positives
|
|
63
|
-
puts "\nFound #{false_positives.size} false positives to analyze"
|
|
64
|
-
|
|
65
|
-
# Group by spec file
|
|
66
|
-
by_file = false_positives.group_by { |fp| fp[:file] }
|
|
67
|
-
|
|
68
|
-
puts "\nBreakdown by file:"
|
|
69
|
-
by_file.sort_by { |_, v| -v.size }.each do |file, items|
|
|
70
|
-
puts " #{file}: #{items.size} failures"
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
# Sample a few from each top category
|
|
74
|
-
puts "\n#{'=' * 80}"
|
|
75
|
-
puts "SAMPLING TOP FAILURES FOR PATTERN ANALYSIS"
|
|
76
|
-
puts "=" * 80
|
|
77
|
-
|
|
78
|
-
samples = []
|
|
79
|
-
|
|
80
|
-
# Take first 2 from each top category
|
|
81
|
-
by_file.sort_by { |_, v| -v.size }.take(5).each_value do |items|
|
|
82
|
-
items.take(2).each do |item|
|
|
83
|
-
puts "\n#{'-' * 80}"
|
|
84
|
-
puts "Testing: #{item[:file]}:#{item[:line]}"
|
|
85
|
-
puts "-" * 80
|
|
86
|
-
|
|
87
|
-
result = run_test(item[:file], item[:line])
|
|
88
|
-
samples << result
|
|
89
|
-
|
|
90
|
-
if result[:has_diff]
|
|
91
|
-
puts "\nDiff Preview:"
|
|
92
|
-
puts result[:diff_preview]
|
|
93
|
-
else
|
|
94
|
-
puts "\nNo diff found in output"
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Save results
|
|
100
|
-
output = {
|
|
101
|
-
total_false_positives: false_positives.size,
|
|
102
|
-
by_file: by_file.transform_values(&:size),
|
|
103
|
-
samples: samples.map do |s|
|
|
104
|
-
{ spec: s[:spec], diff_preview: s[:diff_preview] }
|
|
105
|
-
end,
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
File.write("/tmp/false_positive_analysis.json", JSON.pretty_generate(output))
|
|
109
|
-
puts "\n#{'=' * 80}"
|
|
110
|
-
puts "Analysis saved to /tmp/false_positive_analysis.json"
|
|
111
|
-
puts "=" * 80
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
main if __FILE__ == $PROGRAM_NAME
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Compare semantic tree failures vs DOM diff failures
|
|
5
|
-
# to identify remaining false positives
|
|
6
|
-
|
|
7
|
-
# DOM diff failures (39 - the baseline/correct)
|
|
8
|
-
dom_failures = [
|
|
9
|
-
"blocks_notes_spec.rb:494",
|
|
10
|
-
"blocks_provisions_spec.rb:4",
|
|
11
|
-
"blocks_spec.rb:4",
|
|
12
|
-
"blocks_spec.rb:1062",
|
|
13
|
-
"cleanup_spec.rb:180",
|
|
14
|
-
"cleanup_spec.rb:347",
|
|
15
|
-
"figures_spec.rb:5",
|
|
16
|
-
"figures_spec.rb:1662",
|
|
17
|
-
"figures_spec.rb:1764",
|
|
18
|
-
"figures_spec.rb:1815",
|
|
19
|
-
"footnotes_spec.rb:5",
|
|
20
|
-
"i18n_spec.rb:1644",
|
|
21
|
-
"inline_spec.rb:610",
|
|
22
|
-
"inline_spec.rb:726",
|
|
23
|
-
"inline_spec.rb:2114",
|
|
24
|
-
"lists_spec.rb:4",
|
|
25
|
-
"lists_spec.rb:817",
|
|
26
|
-
"postproc_spec.rb:1010",
|
|
27
|
-
"postproc_spec.rb:1084",
|
|
28
|
-
"postproc_word_spec.rb:89",
|
|
29
|
-
"presentation_xml_metadata_spec.rb:75",
|
|
30
|
-
"presentation_xml_spec.rb:454",
|
|
31
|
-
"ref_identifier_spec.rb:446",
|
|
32
|
-
"ref_identifier_spec.rb:602",
|
|
33
|
-
"ref_identifier_spec.rb:671",
|
|
34
|
-
"ref_identifier_spec.rb:872",
|
|
35
|
-
"ref_spec.rb:4",
|
|
36
|
-
"ref_spec.rb:1511",
|
|
37
|
-
"ref_spec.rb:1705",
|
|
38
|
-
"section_spec.rb:4",
|
|
39
|
-
"section_spec.rb:736",
|
|
40
|
-
"section_title_spec.rb:4",
|
|
41
|
-
"sourcecode_spec.rb:4",
|
|
42
|
-
"sourcecode_spec.rb:838",
|
|
43
|
-
"table_debug_spec.rb:4",
|
|
44
|
-
"table_spec.rb:4",
|
|
45
|
-
"table_spec.rb:811",
|
|
46
|
-
"table_spec.rb:1683",
|
|
47
|
-
"table_spec.rb:1906",
|
|
48
|
-
].to_set
|
|
49
|
-
|
|
50
|
-
# Read semantic failures from file
|
|
51
|
-
semantic_failures = File.readlines("/tmp/semantic_failures.txt").map do |line|
|
|
52
|
-
# Extract spec file and line number from rspec output
|
|
53
|
-
if line =~ /rspec \.\/spec\/isodoc\/(.+?)# /
|
|
54
|
-
$1.strip
|
|
55
|
-
end
|
|
56
|
-
end.compact.to_set
|
|
57
|
-
|
|
58
|
-
puts "=" * 80
|
|
59
|
-
puts "SEMANTIC TREE ALGORITHM - ANALYSIS AFTER FIX"
|
|
60
|
-
puts "=" * 80
|
|
61
|
-
puts
|
|
62
|
-
puts "Statistics:"
|
|
63
|
-
puts " DOM diff failures (baseline): #{dom_failures.size}"
|
|
64
|
-
puts " Semantic tree failures (current): #{semantic_failures.size}"
|
|
65
|
-
puts " False positives (semantic only): #{(semantic_failures - dom_failures).size}"
|
|
66
|
-
puts " False negatives (DOM only): #{(dom_failures - semantic_failures).size}"
|
|
67
|
-
puts
|
|
68
|
-
|
|
69
|
-
# False positives (in semantic but not in DOM)
|
|
70
|
-
false_positives = semantic_failures - dom_failures
|
|
71
|
-
if false_positives.any?
|
|
72
|
-
puts "FALSE POSITIVES (#{false_positives.size} remaining):"
|
|
73
|
-
puts "-" * 80
|
|
74
|
-
false_positives.sort.each do |failure|
|
|
75
|
-
puts " • #{failure}"
|
|
76
|
-
end
|
|
77
|
-
puts
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# False negatives (in DOM but not in semantic)
|
|
81
|
-
false_negatives = dom_failures - semantic_failures
|
|
82
|
-
if false_negatives.any?
|
|
83
|
-
puts "FALSE NEGATIVES (#{false_negatives.size} tests):"
|
|
84
|
-
puts "-" * 80
|
|
85
|
-
false_negatives.sort.each do |failure|
|
|
86
|
-
puts " • #{failure}"
|
|
87
|
-
end
|
|
88
|
-
puts
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
# Real failures (both agree)
|
|
92
|
-
real_failures = dom_failures & semantic_failures
|
|
93
|
-
puts "REAL FAILURES (#{real_failures.size} tests - both algorithms agree):"
|
|
94
|
-
puts "-" * 80
|
|
95
|
-
real_failures.sort.each do |failure|
|
|
96
|
-
puts " • #{failure}"
|
|
97
|
-
end
|
|
98
|
-
puts
|
|
99
|
-
|
|
100
|
-
puts "=" * 80
|
|
101
|
-
puts "SUMMARY:"
|
|
102
|
-
puts " ✅ Fixed false positives: #{46 - false_positives.size} tests"
|
|
103
|
-
puts " ⚠️ Remaining false positives: #{false_positives.size} tests"
|
|
104
|
-
puts " ⚠️ False negatives: #{false_negatives.size} tests"
|
|
105
|
-
puts "=" * 80
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Direct comparison of current semantic vs DOM failures
|
|
5
|
-
# Usage: ruby scripts/compare_current_failures.rb
|
|
6
|
-
|
|
7
|
-
require "set"
|
|
8
|
-
|
|
9
|
-
def parse_failures(file)
|
|
10
|
-
failures = Set.new
|
|
11
|
-
File.readlines(file).each do |line|
|
|
12
|
-
# Format: "rspec ./spec/isodoc/blocks_notes_spec.rb:494 # ..."
|
|
13
|
-
if line =~ %r{rspec \./spec/isodoc/([a-z_0-9]+_spec\.rb):(\d+)}
|
|
14
|
-
failures << "#{$1}:#{$2}"
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
failures
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
semantic_file = "/tmp/semantic_fresh.txt"
|
|
21
|
-
dom_file = "/tmp/dom_fresh.txt"
|
|
22
|
-
|
|
23
|
-
puts "Parsing failures..."
|
|
24
|
-
semantic = parse_failures(semantic_file)
|
|
25
|
-
dom = parse_failures(dom_file)
|
|
26
|
-
|
|
27
|
-
puts "\n#{'=' * 80}"
|
|
28
|
-
puts "CURRENT FAILURE STATE"
|
|
29
|
-
puts "=" * 80
|
|
30
|
-
|
|
31
|
-
puts "\nTotal failures:"
|
|
32
|
-
puts " Semantic: #{semantic.size}"
|
|
33
|
-
puts " DOM: #{dom.size}"
|
|
34
|
-
puts " Gap: #{(semantic.size - dom.size).abs}"
|
|
35
|
-
|
|
36
|
-
false_positives = semantic - dom # Semantic fails, DOM passes
|
|
37
|
-
false_negatives = dom - semantic # DOM fails, Semantic passes
|
|
38
|
-
common = semantic & dom # Both fail
|
|
39
|
-
|
|
40
|
-
puts "\n#{'-' * 80}"
|
|
41
|
-
puts "FALSE POSITIVES (Semantic fails, DOM passes): #{false_positives.size}"
|
|
42
|
-
puts "-" * 80
|
|
43
|
-
false_positives.sort.each { |f| puts " #{f}" }
|
|
44
|
-
|
|
45
|
-
puts "\n#{'-' * 80}"
|
|
46
|
-
puts "FALSE NEGATIVES (DOM fails, Semantic passes): #{false_negatives.size}"
|
|
47
|
-
puts "-" * 80
|
|
48
|
-
false_negatives.sort.each { |f| puts " #{f}" }
|
|
49
|
-
|
|
50
|
-
puts "\n#{'-' * 80}"
|
|
51
|
-
puts "COMMON FAILURES (Both fail): #{common.size}"
|
|
52
|
-
puts "-" * 80
|
|
53
|
-
puts "(Not listing #{common.size} common failures for brevity)"
|
|
54
|
-
|
|
55
|
-
puts "\n#{'=' * 80}"
|
|
56
|
-
puts "ANALYSIS"
|
|
57
|
-
puts "=" * 80
|
|
58
|
-
|
|
59
|
-
if false_positives.empty? && false_negatives.size == 1
|
|
60
|
-
puts "\n✅ PERFECT PARITY ACHIEVED!"
|
|
61
|
-
puts " - No false positives (semantic not too strict)"
|
|
62
|
-
puts " - Only 1 false negative (acceptable difference)"
|
|
63
|
-
puts " - This is the target state!"
|
|
64
|
-
elsif false_positives.empty? && false_negatives.empty?
|
|
65
|
-
puts "\n🎉 EXACT PARITY ACHIEVED!"
|
|
66
|
-
puts " - Both algorithms have identical failures"
|
|
67
|
-
puts " - #{common.size} common failures"
|
|
68
|
-
elsif false_positives.size == 1 && false_negatives.empty?
|
|
69
|
-
puts "\n⚠️ ONE FALSE POSITIVE AWAY FROM PARITY"
|
|
70
|
-
puts " - Need to fix 1 case where semantic is too strict"
|
|
71
|
-
puts " - Target: #{dom.size} failures for both algorithms"
|
|
72
|
-
else
|
|
73
|
-
puts "\n📊 Current Status:"
|
|
74
|
-
puts " - #{false_positives.size} false positives to fix (semantic too strict)"
|
|
75
|
-
puts " - #{false_negatives.size} false negatives to address (semantic too lenient)"
|
|
76
|
-
puts " - Gap from DOM: #{(semantic.size - dom.size).abs} failures"
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# Save detailed results
|
|
80
|
-
output_file = "/tmp/current_failure_analysis.txt"
|
|
81
|
-
File.open(output_file, "w") do |f|
|
|
82
|
-
f.puts "CURRENT FAILURE ANALYSIS"
|
|
83
|
-
f.puts "=" * 80
|
|
84
|
-
f.puts "\nSemantic: #{semantic.size} failures"
|
|
85
|
-
f.puts "DOM: #{dom.size} failures"
|
|
86
|
-
f.puts "Gap: #{(semantic.size - dom.size).abs}"
|
|
87
|
-
f.puts "\nFALSE POSITIVES (#{false_positives.size}):"
|
|
88
|
-
false_positives.sort.each { |fp| f.puts fp }
|
|
89
|
-
f.puts "\nFALSE NEGATIVES (#{false_negatives.size}):"
|
|
90
|
-
false_negatives.sort.each { |fn| f.puts fn }
|
|
91
|
-
f.puts "\nCOMMON FAILURES (#{common.size}):"
|
|
92
|
-
common.sort.each { |cf| f.puts cf }
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
puts "\nDetailed results saved to: #{output_file}"
|
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative "../lib/canon"
|
|
5
|
-
|
|
6
|
-
# Sample IsoDoc-style XML with differences
|
|
7
|
-
xml1 = <<~XML
|
|
8
|
-
<iso-standard xmlns="http://riboseinc.com/isoxml" type="presentation">
|
|
9
|
-
<preface>
|
|
10
|
-
<clause type="toc" id="_" displayorder="1">
|
|
11
|
-
<fmt-title id="_" depth="1">Table of contents</fmt-title>
|
|
12
|
-
</clause>
|
|
13
|
-
<foreword id="fwd" displayorder="2">
|
|
14
|
-
<title id="_">Foreword</title>
|
|
15
|
-
<fmt-title id="_" depth="1">
|
|
16
|
-
<semx element="title" source="_">Foreword</semx>
|
|
17
|
-
</fmt-title>
|
|
18
|
-
<figure id="F" autonum="1">
|
|
19
|
-
<fmt-name id="_">
|
|
20
|
-
<span class="fmt-caption-label">
|
|
21
|
-
<span class="fmt-element-name">Figure</span>
|
|
22
|
-
<semx element="autonum" source="F">1</semx>
|
|
23
|
-
</span>
|
|
24
|
-
</fmt-name>
|
|
25
|
-
<note id="FB" autonum="">
|
|
26
|
-
<fmt-name id="_">
|
|
27
|
-
<span class="fmt-caption-label">
|
|
28
|
-
<span class="fmt-element-name">NOTE</span>
|
|
29
|
-
</span>
|
|
30
|
-
</fmt-name>
|
|
31
|
-
<p>XYZ</p>
|
|
32
|
-
</note>
|
|
33
|
-
</figure>
|
|
34
|
-
</foreword>
|
|
35
|
-
</preface>
|
|
36
|
-
</iso-standard>
|
|
37
|
-
XML
|
|
38
|
-
|
|
39
|
-
xml2 = <<~XML
|
|
40
|
-
<iso-standard xmlns="http://riboseinc.com/isoxml" type="presentation">
|
|
41
|
-
<preface>
|
|
42
|
-
<clause type="toc" id="_" displayorder="1">
|
|
43
|
-
<fmt-title id="_" depth="1">Table of contents</fmt-title>
|
|
44
|
-
</clause>
|
|
45
|
-
<foreword id="fwd" displayorder="2">
|
|
46
|
-
<title id="_">Introduction</title>
|
|
47
|
-
<fmt-title id="_" depth="1">
|
|
48
|
-
<semx element="title" source="_">Introduction</semx>
|
|
49
|
-
</fmt-title>
|
|
50
|
-
<figure id="F" autonum="2">
|
|
51
|
-
<fmt-name id="_">
|
|
52
|
-
<span class="fmt-caption-label">
|
|
53
|
-
<span class="fmt-element-name">Figure</span>
|
|
54
|
-
<semx element="autonum" source="F">2</semx>
|
|
55
|
-
</span>
|
|
56
|
-
</fmt-name>
|
|
57
|
-
<note id="FB" autonum="">
|
|
58
|
-
<fmt-name id="_">
|
|
59
|
-
<span class="fmt-caption-label">
|
|
60
|
-
<span class="fmt-element-name">NOTE</span>
|
|
61
|
-
</span>
|
|
62
|
-
</fmt-name>
|
|
63
|
-
<p>ABC</p>
|
|
64
|
-
</note>
|
|
65
|
-
</figure>
|
|
66
|
-
<p id="new-para">This is a new paragraph.</p>
|
|
67
|
-
</foreword>
|
|
68
|
-
</preface>
|
|
69
|
-
</iso-standard>
|
|
70
|
-
XML
|
|
71
|
-
|
|
72
|
-
puts "=" * 80
|
|
73
|
-
puts "DOM DIFF ALGORITHM COMPARISON"
|
|
74
|
-
puts "=" * 80
|
|
75
|
-
puts
|
|
76
|
-
|
|
77
|
-
dom_result = Canon::Comparison.equivalent?(
|
|
78
|
-
xml1, xml2,
|
|
79
|
-
diff_algorithm: :dom,
|
|
80
|
-
verbose: true
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
puts "Algorithm: DOM DIFF"
|
|
84
|
-
puts "Differences count: #{dom_result.differences.length}"
|
|
85
|
-
puts "Operations count: #{dom_result.operations.length}"
|
|
86
|
-
puts
|
|
87
|
-
puts "Sample differences:"
|
|
88
|
-
dom_result.differences.first(5).each_with_index do |diff, i|
|
|
89
|
-
puts " #{i + 1}. Dimension: #{diff.dimension}"
|
|
90
|
-
puts " Expected: #{diff.value1.to_s[0..100]}" if diff.respond_to?(:value1) && diff.value1
|
|
91
|
-
puts " Actual: #{diff.value2.to_s[0..100]}" if diff.respond_to?(:value2) && diff.value2
|
|
92
|
-
puts
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
puts "\n#{'=' * 80}"
|
|
96
|
-
puts "TREE (SEMANTIC) DIFF ALGORITHM COMPARISON"
|
|
97
|
-
puts "=" * 80
|
|
98
|
-
puts
|
|
99
|
-
|
|
100
|
-
tree_result = Canon::Comparison.equivalent?(
|
|
101
|
-
xml1, xml2,
|
|
102
|
-
diff_algorithm: :semantic,
|
|
103
|
-
verbose: true
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
puts "Algorithm: SEMANTIC TREE DIFF"
|
|
107
|
-
puts "Differences count: #{tree_result.differences.length}"
|
|
108
|
-
puts "Operations count: #{tree_result.operations.length}"
|
|
109
|
-
puts
|
|
110
|
-
|
|
111
|
-
if tree_result.match_options[:tree_diff_statistics]
|
|
112
|
-
stats = tree_result.match_options[:tree_diff_statistics]
|
|
113
|
-
puts "Tree Statistics:"
|
|
114
|
-
puts " - Tree1 nodes: #{stats[:tree1_nodes]}"
|
|
115
|
-
puts " - Tree2 nodes: #{stats[:tree2_nodes]}"
|
|
116
|
-
puts " - Total matches: #{stats[:total_matches]}"
|
|
117
|
-
puts " - Match ratio (tree1): #{(stats[:match_ratio_tree1] * 100).round(1)}%"
|
|
118
|
-
puts " - Match ratio (tree2): #{(stats[:match_ratio_tree2] * 100).round(1)}%"
|
|
119
|
-
puts
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
puts "Sample differences:"
|
|
123
|
-
tree_result.differences.first(5).each_with_index do |diff, i|
|
|
124
|
-
puts " #{i + 1}. Dimension: #{diff.dimension}"
|
|
125
|
-
puts " Expected: #{diff.value1.to_s[0..100]}" if diff.respond_to?(:value1) && diff.value1
|
|
126
|
-
puts " Actual: #{diff.value2.to_s[0..100]}" if diff.respond_to?(:value2) && diff.value2
|
|
127
|
-
puts
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
puts "\n#{'=' * 80}"
|
|
131
|
-
puts "TREE DIFF OPERATIONS"
|
|
132
|
-
puts "=" * 80
|
|
133
|
-
puts
|
|
134
|
-
|
|
135
|
-
tree_result.operations.first(10).each_with_index do |op, i|
|
|
136
|
-
puts " #{i + 1}. #{op.type.to_s.upcase}"
|
|
137
|
-
puts " Node: #{op.node.label}" if op.node
|
|
138
|
-
puts " Details: #{op.inspect[0..150]}"
|
|
139
|
-
puts
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
puts "\n#{'=' * 80}"
|
|
143
|
-
puts "COMPARISON SUMMARY"
|
|
144
|
-
puts "=" * 80
|
|
145
|
-
puts
|
|
146
|
-
|
|
147
|
-
puts "DOM Diff:"
|
|
148
|
-
puts " - Differences count: #{dom_result.differences.length}"
|
|
149
|
-
puts " - Operations: #{dom_result.operations.length}"
|
|
150
|
-
puts " - Has detailed diff information: #{!dom_result.differences.empty?}"
|
|
151
|
-
puts
|
|
152
|
-
|
|
153
|
-
puts "Tree Diff:"
|
|
154
|
-
puts " - Differences count: #{tree_result.differences.length}"
|
|
155
|
-
puts " - Operations: #{tree_result.operations.length}"
|
|
156
|
-
puts " - Has detailed diff information: #{!tree_result.differences.empty?}"
|
|
157
|
-
puts " - Has tree diff operations: #{!tree_result.operations.empty?}"
|
|
158
|
-
puts " - Has tree statistics: #{!tree_result.match_options[:tree_diff_statistics].nil?}"
|