canon 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +83 -22
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +196 -24
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/markup_comparator.rb +109 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +240 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +119 -5
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +4 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative "../lib/canon"
|
|
5
|
-
require_relative "../lib/canon/comparison"
|
|
6
|
-
require_relative "../lib/canon/pretty_printer/xml"
|
|
7
|
-
require_relative "../lib/canon/xml/c14n"
|
|
8
|
-
require_relative "../lib/canon/config"
|
|
9
|
-
|
|
10
|
-
puts "Testing corrected README.adoc examples..."
|
|
11
|
-
puts "=" * 60
|
|
12
|
-
|
|
13
|
-
# Test 1: Canon.format (default pretty-print)
|
|
14
|
-
puts "\n1. Testing Canon.format with XML (default)..."
|
|
15
|
-
begin
|
|
16
|
-
result = Canon.format("<root><b>2</b><a>1</a></root>", :xml)
|
|
17
|
-
if result.include?("<?xml") && result.include?("<root>")
|
|
18
|
-
puts "✓ Canon.format works (returns pretty-printed XML)"
|
|
19
|
-
else
|
|
20
|
-
puts "✗ Canon.format unexpected output"
|
|
21
|
-
end
|
|
22
|
-
rescue StandardError => e
|
|
23
|
-
puts "✗ Canon.format failed: #{e.message}"
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Test 2: Canonical form (compact)
|
|
27
|
-
puts "\n2. Testing Canon::Xml::C14n.canonicalize..."
|
|
28
|
-
begin
|
|
29
|
-
result = Canon::Xml::C14n.canonicalize("<root><b>2</b><a>1</a></root>",
|
|
30
|
-
with_comments: false)
|
|
31
|
-
expected = "<root><b>2</b><a>1</a></root>"
|
|
32
|
-
if result == expected
|
|
33
|
-
puts "✓ Canon::Xml::C14n.canonicalize works"
|
|
34
|
-
puts " Result: #{result}"
|
|
35
|
-
else
|
|
36
|
-
puts "✗ Unexpected result: #{result}"
|
|
37
|
-
end
|
|
38
|
-
rescue StandardError => e
|
|
39
|
-
puts "✗ Canon::Xml::C14n.canonicalize failed: #{e.message}"
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Test 3: Pretty printer with variable
|
|
43
|
-
puts "\n3. Testing Canon::PrettyPrinter::Xml with defined variable..."
|
|
44
|
-
begin
|
|
45
|
-
xml_input = "<root><b>2</b><a>1</a></root>"
|
|
46
|
-
result = Canon::PrettyPrinter::Xml.new(indent: 2).format(xml_input)
|
|
47
|
-
if result.include?("<?xml") && result.include?("<root>")
|
|
48
|
-
puts "✓ Canon::PrettyPrinter::Xml works with defined variable"
|
|
49
|
-
else
|
|
50
|
-
puts "✗ Unexpected output"
|
|
51
|
-
end
|
|
52
|
-
rescue StandardError => e
|
|
53
|
-
puts "✗ Canon::PrettyPrinter::Xml failed: #{e.message}"
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# Test 4: Basic comparison
|
|
57
|
-
puts "\n4. Testing Canon::Comparison.equivalent?..."
|
|
58
|
-
begin
|
|
59
|
-
xml1 = "<root><a>1</a><b>2</b></root>"
|
|
60
|
-
xml2 = "<root> <b>2</b> <a>1</a> </root>"
|
|
61
|
-
result = Canon::Comparison.equivalent?(xml1, xml2)
|
|
62
|
-
puts "✓ Canon::Comparison.equivalent? works"
|
|
63
|
-
puts " Result: #{result}"
|
|
64
|
-
rescue StandardError => e
|
|
65
|
-
puts "✗ Canon::Comparison.equivalent? failed: #{e.message}"
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# Test 5: Semantic tree diff
|
|
69
|
-
puts "\n5. Testing semantic tree diff with operations..."
|
|
70
|
-
begin
|
|
71
|
-
xml1 = "<root><a>1</a><b>2</b></root>"
|
|
72
|
-
xml2 = "<root> <b>2</b> <a>1</a> </root>"
|
|
73
|
-
result = Canon::Comparison.equivalent?(xml1, xml2,
|
|
74
|
-
verbose: true,
|
|
75
|
-
diff_algorithm: :semantic)
|
|
76
|
-
if result.respond_to?(:operations)
|
|
77
|
-
puts "✓ Semantic tree diff works"
|
|
78
|
-
puts " Result class: #{result.class}"
|
|
79
|
-
puts " Operations available: #{result.operations.class}"
|
|
80
|
-
else
|
|
81
|
-
puts "✗ Result doesn't have operations method"
|
|
82
|
-
end
|
|
83
|
-
rescue StandardError => e
|
|
84
|
-
puts "✗ Semantic tree diff failed: #{e.message}"
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Test 6: RSpec configuration (using Canon::Config)
|
|
88
|
-
puts "\n6. Testing Canon::Config.configure..."
|
|
89
|
-
begin
|
|
90
|
-
Canon::Config.configure do |config|
|
|
91
|
-
config.xml.match.profile = :spec_friendly
|
|
92
|
-
config.xml.diff.use_color = true
|
|
93
|
-
end
|
|
94
|
-
profile = Canon::Config.instance.xml.match.profile
|
|
95
|
-
use_color = Canon::Config.instance.xml.diff.use_color
|
|
96
|
-
if profile == :spec_friendly && use_color == true
|
|
97
|
-
puts "✓ Canon::Config.configure works correctly"
|
|
98
|
-
puts " Profile: #{profile}"
|
|
99
|
-
puts " Use color: #{use_color}"
|
|
100
|
-
else
|
|
101
|
-
puts "✗ Configuration values not set correctly"
|
|
102
|
-
end
|
|
103
|
-
rescue StandardError => e
|
|
104
|
-
puts "✗ Canon::Config.configure failed: #{e.message}"
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Test 7: Comparison with custom options
|
|
108
|
-
puts "\n7. Testing Canon::Comparison with match options..."
|
|
109
|
-
begin
|
|
110
|
-
doc1 = "<root><a> text </a></root>"
|
|
111
|
-
doc2 = "<root><a>text</a></root>"
|
|
112
|
-
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
113
|
-
match: {
|
|
114
|
-
text_content: :normalize,
|
|
115
|
-
structural_whitespace: :ignore,
|
|
116
|
-
comments: :ignore,
|
|
117
|
-
},
|
|
118
|
-
verbose: true)
|
|
119
|
-
if result.respond_to?(:equivalent?)
|
|
120
|
-
puts "✓ Canon::Comparison with match options works"
|
|
121
|
-
puts " Result equivalent: #{result.equivalent?}"
|
|
122
|
-
else
|
|
123
|
-
puts "✗ Result doesn't have expected methods"
|
|
124
|
-
end
|
|
125
|
-
rescue StandardError => e
|
|
126
|
-
puts "✗ Canon::Comparison with match options failed: #{e.message}"
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
puts "\n#{'=' * 60}"
|
|
130
|
-
puts "All tests completed successfully! ✓"
|
|
131
|
-
puts "=" * 60
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require "bundler/setup"
|
|
5
|
-
require "canon"
|
|
6
|
-
require "canon/diff_formatter"
|
|
7
|
-
|
|
8
|
-
# Test XML with differences
|
|
9
|
-
xml1 = <<~XML
|
|
10
|
-
<root>
|
|
11
|
-
<section id="1">
|
|
12
|
-
<title>First Section</title>
|
|
13
|
-
<paragraph>Original text</paragraph>
|
|
14
|
-
</section>
|
|
15
|
-
<section id="2">
|
|
16
|
-
<title>Second Section</title>
|
|
17
|
-
</section>
|
|
18
|
-
</root>
|
|
19
|
-
XML
|
|
20
|
-
|
|
21
|
-
xml2 = <<~XML
|
|
22
|
-
<root>
|
|
23
|
-
<section id="1">
|
|
24
|
-
<title>First Section Modified</title>
|
|
25
|
-
<paragraph>Changed text</paragraph>
|
|
26
|
-
</section>
|
|
27
|
-
<section id="3">
|
|
28
|
-
<title>Third Section</title>
|
|
29
|
-
</section>
|
|
30
|
-
</root>
|
|
31
|
-
XML
|
|
32
|
-
|
|
33
|
-
puts "=" * 80
|
|
34
|
-
puts "Testing Semantic Tree Diff vs DOM Diff"
|
|
35
|
-
puts "=" * 80
|
|
36
|
-
puts
|
|
37
|
-
|
|
38
|
-
# Test with show_compare enabled
|
|
39
|
-
puts "Running with show_compare=true (verbose mode)..."
|
|
40
|
-
puts "-" * 80
|
|
41
|
-
|
|
42
|
-
result = Canon::Comparison.equivalent?(
|
|
43
|
-
xml1,
|
|
44
|
-
xml2,
|
|
45
|
-
verbose: true,
|
|
46
|
-
show_compare: true,
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
if result.is_a?(Canon::Comparison::CombinedComparisonResult)
|
|
50
|
-
puts "\n✓ Got CombinedComparisonResult"
|
|
51
|
-
puts " Decision Algorithm: #{result.decision_algorithm}"
|
|
52
|
-
puts " Equivalent? #{result.equivalent?}"
|
|
53
|
-
puts
|
|
54
|
-
|
|
55
|
-
puts "DOM Diff Algorithm:"
|
|
56
|
-
puts "-" * 40
|
|
57
|
-
dom_result = result.dom_result
|
|
58
|
-
if dom_result.respond_to?(:differences)
|
|
59
|
-
puts " Number of differences: #{dom_result.differences.length}"
|
|
60
|
-
dom_result.differences.first(3).each_with_index do |diff, i|
|
|
61
|
-
puts " #{i + 1}. #{diff.inspect[0..200]}"
|
|
62
|
-
end
|
|
63
|
-
else
|
|
64
|
-
puts " Result: #{dom_result.inspect[0..200]}"
|
|
65
|
-
end
|
|
66
|
-
puts
|
|
67
|
-
|
|
68
|
-
puts "Tree Diff Algorithm:"
|
|
69
|
-
puts "-" * 40
|
|
70
|
-
tree_result = result.tree_result
|
|
71
|
-
if tree_result.respond_to?(:differences)
|
|
72
|
-
puts " Number of differences: #{tree_result.differences.length}"
|
|
73
|
-
tree_result.differences.first(3).each_with_index do |diff, i|
|
|
74
|
-
puts " #{i + 1}. #{diff.inspect[0..200]}"
|
|
75
|
-
end
|
|
76
|
-
else
|
|
77
|
-
puts " Result: #{tree_result.inspect[0..200]}"
|
|
78
|
-
end
|
|
79
|
-
puts
|
|
80
|
-
|
|
81
|
-
# Test formatting
|
|
82
|
-
puts "Formatted Output:"
|
|
83
|
-
puts "-" * 40
|
|
84
|
-
begin
|
|
85
|
-
formatted = Canon::DiffFormatter.format(result, mode: :by_line)
|
|
86
|
-
puts formatted[0..500]
|
|
87
|
-
rescue StandardError => e
|
|
88
|
-
puts " Error formatting: #{e.message}"
|
|
89
|
-
puts " #{e.backtrace.first(3).join("\n ")}"
|
|
90
|
-
end
|
|
91
|
-
else
|
|
92
|
-
puts "✗ Did not get CombinedComparisonResult, got: #{result.class}"
|
|
93
|
-
puts " Result: #{result.inspect[0..200]}"
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
puts
|
|
97
|
-
puts "=" * 80
|
|
98
|
-
puts "Test Complete"
|
|
99
|
-
puts "=" * 80
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Test semantic tree diff UX improvements
|
|
5
|
-
# Verifies that:
|
|
6
|
-
# 1. XPath includes position numbers
|
|
7
|
-
# 2. Element content previews are shown
|
|
8
|
-
# 3. Specific error categories match DOM clarity
|
|
9
|
-
|
|
10
|
-
require_relative "../lib/canon"
|
|
11
|
-
|
|
12
|
-
# Test case: HTML with multiple paragraphs to verify position numbers
|
|
13
|
-
html1 = <<~HTML
|
|
14
|
-
<html>
|
|
15
|
-
<body>
|
|
16
|
-
<div>
|
|
17
|
-
<p>First paragraph</p>
|
|
18
|
-
<p>Second paragraph</p>
|
|
19
|
-
<p>Third paragraph</p>
|
|
20
|
-
</div>
|
|
21
|
-
</body>
|
|
22
|
-
</html>
|
|
23
|
-
HTML
|
|
24
|
-
|
|
25
|
-
html2 = <<~HTML
|
|
26
|
-
<html>
|
|
27
|
-
<body>
|
|
28
|
-
<div>
|
|
29
|
-
<p>First paragraph</p>
|
|
30
|
-
<p id="modified">Second paragraph with changes</p>
|
|
31
|
-
<p>Third paragraph</p>
|
|
32
|
-
<p>Fourth paragraph added</p>
|
|
33
|
-
</div>
|
|
34
|
-
</body>
|
|
35
|
-
</html>
|
|
36
|
-
HTML
|
|
37
|
-
|
|
38
|
-
puts "=" * 80
|
|
39
|
-
puts "Testing Semantic Tree Diff UX Improvements"
|
|
40
|
-
puts "=" * 80
|
|
41
|
-
puts
|
|
42
|
-
|
|
43
|
-
# Test with semantic algorithm
|
|
44
|
-
result = Canon::Comparison.equivalent?(
|
|
45
|
-
html1,
|
|
46
|
-
html2,
|
|
47
|
-
diff_algorithm: :semantic,
|
|
48
|
-
format: :html,
|
|
49
|
-
verbose: true,
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
puts "Equivalent: #{result.equivalent?}"
|
|
53
|
-
puts
|
|
54
|
-
puts "Differences found: #{result.differences.length}"
|
|
55
|
-
puts
|
|
56
|
-
|
|
57
|
-
# Check each difference for UX improvements
|
|
58
|
-
result.differences.each_with_index do |diff, i|
|
|
59
|
-
puts "─" * 80
|
|
60
|
-
puts "Difference ##{i + 1}:"
|
|
61
|
-
puts " Dimension: #{diff.dimension}"
|
|
62
|
-
puts " Reason: #{diff.reason}"
|
|
63
|
-
|
|
64
|
-
# Check XPath has position numbers
|
|
65
|
-
node = diff.node1 || diff.node2
|
|
66
|
-
if node
|
|
67
|
-
xpath = if node.respond_to?(:path)
|
|
68
|
-
node.path
|
|
69
|
-
else
|
|
70
|
-
"(no xpath)"
|
|
71
|
-
end
|
|
72
|
-
puts " XPath: #{xpath}"
|
|
73
|
-
|
|
74
|
-
# Verify position numbers are included
|
|
75
|
-
if xpath.include?("[") && xpath.include?("]")
|
|
76
|
-
puts " ✓ XPath includes position numbers"
|
|
77
|
-
else
|
|
78
|
-
puts " ✗ WARNING: XPath missing position numbers"
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
# Check if reason is specific and actionable
|
|
83
|
-
if diff.reason
|
|
84
|
-
if diff.reason.include?("Missing") || diff.reason.include?("Extra") ||
|
|
85
|
-
diff.reason.include?("changed:") || diff.reason.include?("→")
|
|
86
|
-
puts " ✓ Reason is specific and actionable"
|
|
87
|
-
else
|
|
88
|
-
puts " ⚠ Reason could be more specific: #{diff.reason}"
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
puts
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
puts "=" * 80
|
|
96
|
-
puts "Formatted output:"
|
|
97
|
-
puts "=" * 80
|
|
98
|
-
puts result
|
|
99
|
-
|
|
100
|
-
# Test attribute differences
|
|
101
|
-
puts "\n\n"
|
|
102
|
-
puts "=" * 80
|
|
103
|
-
puts "Testing Attribute Difference Details"
|
|
104
|
-
puts "=" * 80
|
|
105
|
-
|
|
106
|
-
attr_html1 = '<div class="old" id="test" data-value="1">Content</div>'
|
|
107
|
-
attr_html2 = '<div class="new" id="test" data-value="2" data-extra="added">Content</div>'
|
|
108
|
-
|
|
109
|
-
attr_result = Canon::Comparison.equivalent?(
|
|
110
|
-
attr_html1,
|
|
111
|
-
attr_html2,
|
|
112
|
-
diff_algorithm: :semantic,
|
|
113
|
-
format: :html,
|
|
114
|
-
verbose: true,
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
puts "Differences:"
|
|
118
|
-
attr_result.differences.each do |diff|
|
|
119
|
-
puts " Dimension: #{diff.dimension}"
|
|
120
|
-
puts " Reason: #{diff.reason}"
|
|
121
|
-
|
|
122
|
-
# Check for specific attribute details
|
|
123
|
-
if diff.reason.include?("Missing:") || diff.reason.include?("Extra:") || diff.reason.include?("Changed:")
|
|
124
|
-
puts " ✓ Shows specific attribute changes"
|
|
125
|
-
end
|
|
126
|
-
puts
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
puts "Formatted output:"
|
|
130
|
-
puts attr_result
|
|
131
|
-
|
|
132
|
-
puts "\n"
|
|
133
|
-
puts "=" * 80
|
|
134
|
-
puts "Test complete!"
|
|
135
|
-
puts "=" * 80
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Test a single false positive case to understand the pattern
|
|
5
|
-
# Usage: ruby scripts/test_single_false_positive.rb
|
|
6
|
-
|
|
7
|
-
require "bundler/setup"
|
|
8
|
-
require "canon"
|
|
9
|
-
|
|
10
|
-
# Based on MIXED_CONTENT_FIX_RESULTS.md, one false positive is in sourcecode_spec.rb:124
|
|
11
|
-
# Let's test a simple sourcecode case with whitespace
|
|
12
|
-
|
|
13
|
-
# This represents a typical sourcecode/pre element case
|
|
14
|
-
expected = <<~HTML
|
|
15
|
-
<div class="example">
|
|
16
|
-
<pre class="sourcecode" id="X">
|
|
17
|
-
Line 1
|
|
18
|
-
Line 2
|
|
19
|
-
</pre>
|
|
20
|
-
</div>
|
|
21
|
-
HTML
|
|
22
|
-
|
|
23
|
-
actual = <<~HTML
|
|
24
|
-
<div class="example">
|
|
25
|
-
<pre class="sourcecode" id="X">Line 1
|
|
26
|
-
Line 2</pre>
|
|
27
|
-
</div>
|
|
28
|
-
HTML
|
|
29
|
-
|
|
30
|
-
puts "=" * 80
|
|
31
|
-
puts "TEST: Sourcecode whitespace handling (False Positive Pattern)"
|
|
32
|
-
puts "=" * 80
|
|
33
|
-
|
|
34
|
-
# Test DOM algorithm
|
|
35
|
-
puts "\n1. DOM ALGORITHM (Baseline):"
|
|
36
|
-
puts "-" * 40
|
|
37
|
-
dom_result = Canon::Comparison.equivalent?(expected, actual,
|
|
38
|
-
format: :html,
|
|
39
|
-
diff_algorithm: :dom,
|
|
40
|
-
verbose: false)
|
|
41
|
-
puts "Result: #{dom_result ? '✅ PASS (no difference)' : '❌ FAIL (has difference)'}"
|
|
42
|
-
|
|
43
|
-
# Test Semantic algorithm
|
|
44
|
-
puts "\n2. SEMANTIC ALGORITHM (Under Test):"
|
|
45
|
-
puts "-" * 40
|
|
46
|
-
semantic_result = Canon::Comparison.equivalent?(expected, actual,
|
|
47
|
-
format: :html,
|
|
48
|
-
diff_algorithm: :semantic,
|
|
49
|
-
verbose: false)
|
|
50
|
-
puts "Result: #{semantic_result ? '✅ PASS (no difference)' : '❌ FAIL (has difference)'}"
|
|
51
|
-
|
|
52
|
-
# Analysis
|
|
53
|
-
puts "\n#{'=' * 80}"
|
|
54
|
-
if dom_result && !semantic_result
|
|
55
|
-
puts "❌ FALSE POSITIVE DETECTED"
|
|
56
|
-
puts " DOM says: equivalent"
|
|
57
|
-
puts " Semantic says: different"
|
|
58
|
-
puts "\nThis is the pattern we need to fix!"
|
|
59
|
-
|
|
60
|
-
# Get detailed diff
|
|
61
|
-
puts "\nDetailed semantic diff:"
|
|
62
|
-
Canon::Comparison.equivalent?(expected, actual,
|
|
63
|
-
format: :html,
|
|
64
|
-
diff_algorithm: :semantic,
|
|
65
|
-
verbose: true)
|
|
66
|
-
elsif !dom_result && semantic_result
|
|
67
|
-
puts "❌ FALSE NEGATIVE DETECTED"
|
|
68
|
-
puts " DOM says: different"
|
|
69
|
-
puts " Semantic says: equivalent"
|
|
70
|
-
elsif dom_result && semantic_result
|
|
71
|
-
puts "✅ BOTH AGREE: No difference (correct match)"
|
|
72
|
-
else
|
|
73
|
-
puts "✅ BOTH AGREE: Has difference (correct non-match)"
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Now test with a metadata element case
|
|
77
|
-
puts "\n\n#{'=' * 80}"
|
|
78
|
-
puts "TEST: Metadata element handling (Another False Positive Pattern)"
|
|
79
|
-
puts "=" * 80
|
|
80
|
-
|
|
81
|
-
expected2 = <<~HTML
|
|
82
|
-
<p id="X">
|
|
83
|
-
<span class="fmt-xref-label">Clause 1</span>
|
|
84
|
-
<a name="X">Content here</a>
|
|
85
|
-
</p>
|
|
86
|
-
HTML
|
|
87
|
-
|
|
88
|
-
actual2 = <<~HTML
|
|
89
|
-
<p id="X">
|
|
90
|
-
<span class="fmt-xref-label">Clause 1</span>
|
|
91
|
-
<bookmark id="X"/>
|
|
92
|
-
Content here
|
|
93
|
-
</p>
|
|
94
|
-
HTML
|
|
95
|
-
|
|
96
|
-
puts "\n1. DOM ALGORITHM:"
|
|
97
|
-
dom2 = Canon::Comparison.equivalent?(expected2, actual2,
|
|
98
|
-
format: :html,
|
|
99
|
-
diff_algorithm: :dom,
|
|
100
|
-
verbose: false)
|
|
101
|
-
puts "Result: #{dom2 ? '✅ PASS' : '❌ FAIL'}"
|
|
102
|
-
|
|
103
|
-
puts "\n2. SEMANTIC ALGORITHM:"
|
|
104
|
-
semantic2 = Canon::Comparison.equivalent?(expected2, actual2,
|
|
105
|
-
format: :html,
|
|
106
|
-
diff_algorithm: :semantic,
|
|
107
|
-
verbose: false)
|
|
108
|
-
puts "Result: #{semantic2 ? '✅ PASS' : '❌ FAIL'}"
|
|
109
|
-
|
|
110
|
-
puts "\n#{'=' * 80}"
|
|
111
|
-
if dom2 && !semantic2
|
|
112
|
-
puts "❌ FALSE POSITIVE in metadata handling"
|
|
113
|
-
elsif !dom2 && semantic2
|
|
114
|
-
puts "❌ FALSE NEGATIVE in metadata handling"
|
|
115
|
-
elsif dom2 && semantic2
|
|
116
|
-
puts "✅ BOTH AGREE: equivalent"
|
|
117
|
-
else
|
|
118
|
-
puts "✅ BOTH AGREE: different"
|
|
119
|
-
end
|
data/scripts/test_size_limits.rb
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Test script to verify size limit functionality
|
|
5
|
-
|
|
6
|
-
require_relative "../lib/canon"
|
|
7
|
-
require_relative "../lib/canon/config"
|
|
8
|
-
require_relative "../lib/canon/commands/diff_command"
|
|
9
|
-
|
|
10
|
-
puts "Testing Canon Size Limits"
|
|
11
|
-
puts "=" * 60
|
|
12
|
-
|
|
13
|
-
# Test 1: File size limit configuration
|
|
14
|
-
puts "\n1. Testing file size limit configuration:"
|
|
15
|
-
config = Canon::Config.instance
|
|
16
|
-
puts " Default max_file_size: #{config.xml.diff.max_file_size}"
|
|
17
|
-
puts " Expected: 5242880 (5MB)"
|
|
18
|
-
|
|
19
|
-
# Test 2: Node count limit configuration
|
|
20
|
-
puts "\n2. Testing node count limit configuration:"
|
|
21
|
-
puts " Default max_node_count: #{config.xml.diff.max_node_count}"
|
|
22
|
-
puts " Expected: 10000"
|
|
23
|
-
|
|
24
|
-
# Test 3: Diff lines limit configuration
|
|
25
|
-
puts "\n3. Testing diff lines limit configuration:"
|
|
26
|
-
puts " Default max_diff_lines: #{config.xml.diff.max_diff_lines}"
|
|
27
|
-
puts " Expected: 10000"
|
|
28
|
-
|
|
29
|
-
# Test 4: ENV variable override
|
|
30
|
-
puts "\n4. Testing ENV variable override:"
|
|
31
|
-
ENV["CANON_MAX_FILE_SIZE"] = "1000000"
|
|
32
|
-
ENV["CANON_MAX_NODE_COUNT"] = "5000"
|
|
33
|
-
ENV["CANON_MAX_DIFF_LINES"] = "2000"
|
|
34
|
-
|
|
35
|
-
# Reset config to pick up ENV vars
|
|
36
|
-
Canon::Config.reset!
|
|
37
|
-
config = Canon::Config.instance
|
|
38
|
-
|
|
39
|
-
puts " After setting ENV vars:"
|
|
40
|
-
puts " max_file_size: #{config.xml.diff.max_file_size} (expected: 1000000)"
|
|
41
|
-
puts " max_node_count: #{config.xml.diff.max_node_count} (expected: 5000)"
|
|
42
|
-
puts " max_diff_lines: #{config.xml.diff.max_diff_lines} (expected: 2000)"
|
|
43
|
-
|
|
44
|
-
# Test 5: SizeLimitExceededError
|
|
45
|
-
puts "\n5. Testing SizeLimitExceededError:"
|
|
46
|
-
begin
|
|
47
|
-
error = Canon::SizeLimitExceededError.new(:file_size, 10_000_000, 5_242_880)
|
|
48
|
-
puts " Created error: #{error.class}"
|
|
49
|
-
puts " Message: #{error.message}"
|
|
50
|
-
rescue StandardError => e
|
|
51
|
-
puts " ERROR: #{e.class}: #{e.message}"
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Test 6: File size checking
|
|
55
|
-
puts "\n6. Testing file size checking in diff command:"
|
|
56
|
-
require "tempfile"
|
|
57
|
-
|
|
58
|
-
# Create a small test file
|
|
59
|
-
Tempfile.create(["test", ".xml"]) do |f1|
|
|
60
|
-
f1.write("<root><child>content</child></root>")
|
|
61
|
-
f1.flush
|
|
62
|
-
|
|
63
|
-
Tempfile.create(["test2", ".xml"]) do |f2|
|
|
64
|
-
f2.write("<root><child>different</child></root>")
|
|
65
|
-
f2.flush
|
|
66
|
-
|
|
67
|
-
# Set a very low limit to trigger error
|
|
68
|
-
ENV["CANON_MAX_FILE_SIZE"] = "10" # 10 bytes
|
|
69
|
-
Canon::Config.reset!
|
|
70
|
-
|
|
71
|
-
begin
|
|
72
|
-
cmd = Canon::Commands::DiffCommand.new(
|
|
73
|
-
format: :xml,
|
|
74
|
-
verbose: true,
|
|
75
|
-
color: false,
|
|
76
|
-
)
|
|
77
|
-
cmd.run(f1.path, f2.path)
|
|
78
|
-
puts " ✗ Should have raised SizeLimitExceededError"
|
|
79
|
-
rescue Canon::SizeLimitExceededError => e
|
|
80
|
-
puts " ✓ Correctly raised SizeLimitExceededError"
|
|
81
|
-
puts " Message: #{e.message.lines.first.strip}"
|
|
82
|
-
rescue SystemExit => e
|
|
83
|
-
# abort() in diff_command causes SystemExit
|
|
84
|
-
# Check if it was due to size limit by checking stderr
|
|
85
|
-
puts " ✓ File size check triggered (command aborted as expected)"
|
|
86
|
-
rescue StandardError => e
|
|
87
|
-
puts " ✗ Unexpected error: #{e.class}: #{e.message}"
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Clean up ENV
|
|
93
|
-
ENV.delete("CANON_MAX_FILE_SIZE")
|
|
94
|
-
ENV.delete("CANON_MAX_NODE_COUNT")
|
|
95
|
-
ENV.delete("CANON_MAX_DIFF_LINES")
|
|
96
|
-
Canon::Config.reset!
|
|
97
|
-
|
|
98
|
-
puts "\n#{'=' * 60}"
|
|
99
|
-
puts "Size limits test completed!"
|
data/test_html_1.html
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html>
|
|
2
|
-
<html>
|
|
3
|
-
<head>
|
|
4
|
-
<title>Test Page</title>
|
|
5
|
-
<!-- Version 1.0 stylesheet -->
|
|
6
|
-
</head>
|
|
7
|
-
<body>
|
|
8
|
-
<div class="container">
|
|
9
|
-
<!-- TODO: Add navigation menu -->
|
|
10
|
-
<h1>Welcome</h1>
|
|
11
|
-
<p>This is a test page.</p>
|
|
12
|
-
<!-- End of header section -->
|
|
13
|
-
|
|
14
|
-
<section id="content">
|
|
15
|
-
<h2>Main Content</h2>
|
|
16
|
-
<!-- Important: This section is critical -->
|
|
17
|
-
<p>Lorem ipsum dolor sit amet.</p>
|
|
18
|
-
</section>
|
|
19
|
-
</div>
|
|
20
|
-
</body>
|
|
21
|
-
</html>
|
data/test_html_2.html
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html>
|
|
2
|
-
<html>
|
|
3
|
-
<head>
|
|
4
|
-
<title>Test Page</title>
|
|
5
|
-
<!-- Version 2.0 updated styles -->
|
|
6
|
-
</head>
|
|
7
|
-
<body>
|
|
8
|
-
<div class="container">
|
|
9
|
-
<!-- DONE: Navigation menu added -->
|
|
10
|
-
<h1>Welcome</h1>
|
|
11
|
-
<p>This is a test page.</p>
|
|
12
|
-
<!-- End of navigation section -->
|
|
13
|
-
|
|
14
|
-
<section id="content">
|
|
15
|
-
<h2>Main Content</h2>
|
|
16
|
-
<!-- Note: Section has been reviewed -->
|
|
17
|
-
<p>Lorem ipsum dolor sit amet.</p>
|
|
18
|
-
</section>
|
|
19
|
-
</div>
|
|
20
|
-
</body>
|
|
21
|
-
</html>
|
data/test_nokogiri.rb
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
require "bundler/setup"
|
|
2
|
-
require "nokogiri"
|
|
3
|
-
|
|
4
|
-
html1 = "<html><body><p>Test</p></body></html>"
|
|
5
|
-
html2 = "<html>\n\n<body>\n\n<p>Test</p>\n\n</body>\n\n</html>"
|
|
6
|
-
|
|
7
|
-
puts "=== Nokogiri HTML5.fragment ==="
|
|
8
|
-
frag1 = Nokogiri::HTML5.fragment(html1)
|
|
9
|
-
frag2 = Nokogiri::HTML5.fragment(html2)
|
|
10
|
-
|
|
11
|
-
puts "html1 children count: #{frag1.children.count}"
|
|
12
|
-
frag1.children.each_with_index do |child, i|
|
|
13
|
-
puts " Child #{i}: #{child.class} - #{child.name}"
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
puts "\nhtml2 children count: #{frag2.children.count}"
|
|
17
|
-
frag2.children.each_with_index do |child, i|
|
|
18
|
-
puts " Child #{i}: #{child.class} - #{child.name}"
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
puts "\n=== Nokogiri::XML.fragment ==="
|
|
22
|
-
frag3 = Nokogiri::XML.fragment(html1)
|
|
23
|
-
frag4 = Nokogiri::XML.fragment(html2)
|
|
24
|
-
|
|
25
|
-
puts "html1 children count: #{frag3.children.count}"
|
|
26
|
-
frag3.children.each_with_index do |child, i|
|
|
27
|
-
puts " Child #{i}: #{child.class} - #{child.name}"
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
puts "\nhtml2 children count: #{frag4.children.count}"
|
|
31
|
-
frag4.children.each_with_index do |child, i|
|
|
32
|
-
puts " Child #{i}: #{child.class} - #{child.name}"
|
|
33
|
-
end
|