canon 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +112 -25
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +48 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +3 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
|
@@ -1,214 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Investigation script for the remaining 16 semantic vs DOM differences
|
|
5
|
-
# Goal: Achieve DOM parity (43 → 39 failures)
|
|
6
|
-
|
|
7
|
-
require "bundler/setup"
|
|
8
|
-
require "canon"
|
|
9
|
-
|
|
10
|
-
# The 16 test cases that fail with semantic but pass with DOM
|
|
11
|
-
REMAINING_FAILURES = [
|
|
12
|
-
{ file: "blocks_spec.rb", line: 352, category: "Whitespace in <pre>" },
|
|
13
|
-
{ file: "footnotes_spec.rb", line: 740, category: "Element matching" },
|
|
14
|
-
{ file: "inline_spec.rb", line: 1012, category: "Element matching" },
|
|
15
|
-
{ file: "inline_spec.rb", line: 1251, category: "Element matching" },
|
|
16
|
-
{ file: "postproc_spec.rb", line: 948, category: "HTML escapes" },
|
|
17
|
-
{ file: "postproc_word_spec.rb", line: 372, category: "Word processing" },
|
|
18
|
-
{ file: "postproc_word_spec.rb", line: 576, category: "Word processing" },
|
|
19
|
-
{ file: "presentation_xml_numbers_override_spec.rb", line: 2095,
|
|
20
|
-
category: "Number formatting" },
|
|
21
|
-
{ file: "presentation_xml_spec.rb", line: 1288, category: "Presentation" },
|
|
22
|
-
{ file: "presentation_xml_spec.rb", line: 1500, category: "Presentation" },
|
|
23
|
-
{ file: "ref_spec.rb", line: 906, category: "References" },
|
|
24
|
-
{ file: "sourcecode_spec.rb", line: 124, category: "Sourcecode" },
|
|
25
|
-
{ file: "sourcecode_spec.rb", line: 610, category: "Sourcecode" },
|
|
26
|
-
{ file: "terms_spec.rb", line: 1445, category: "Terms" },
|
|
27
|
-
{ file: "xref_format_spec.rb", line: 628, category: "Cross-references" },
|
|
28
|
-
{ file: "xref_spec.rb", line: 315, category: "Cross-references" },
|
|
29
|
-
].freeze
|
|
30
|
-
|
|
31
|
-
class FailureInvestigator
|
|
32
|
-
def initialize(test_case)
|
|
33
|
-
@test_case = test_case
|
|
34
|
-
@spec_file = File.join(ENV["HOME"], "mn/isodoc/spec", test_case[:file])
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def investigate
|
|
38
|
-
puts "\n#{'=' * 80}"
|
|
39
|
-
puts "Investigating: #{@test_case[:file]}:#{@test_case[:line]}"
|
|
40
|
-
puts "Category: #{@test_case[:category]}"
|
|
41
|
-
puts "=" * 80
|
|
42
|
-
|
|
43
|
-
extract_test_context
|
|
44
|
-
analyze_failure_mode
|
|
45
|
-
suggest_fix
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
private
|
|
49
|
-
|
|
50
|
-
def extract_test_context
|
|
51
|
-
puts "\n--- Test Context ---"
|
|
52
|
-
|
|
53
|
-
unless File.exist?(@spec_file)
|
|
54
|
-
puts "ERROR: Spec file not found: #{@spec_file}"
|
|
55
|
-
return
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
lines = File.readlines(@spec_file)
|
|
59
|
-
target_line = @test_case[:line] - 1
|
|
60
|
-
|
|
61
|
-
# Find the test block
|
|
62
|
-
start_line = target_line
|
|
63
|
-
while start_line.positive? && !lines[start_line].strip.start_with?("it ",
|
|
64
|
-
"specify ")
|
|
65
|
-
start_line -= 1
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# Extract test name
|
|
69
|
-
test_name = lines[start_line].strip if start_line >= 0
|
|
70
|
-
puts "Test: #{test_name}"
|
|
71
|
-
|
|
72
|
-
# Show context around the assertion
|
|
73
|
-
context_start = [target_line - 5, 0].max
|
|
74
|
-
context_end = [target_line + 5, lines.length - 1].min
|
|
75
|
-
|
|
76
|
-
puts "\nContext (lines #{context_start + 1}-#{context_end + 1}):"
|
|
77
|
-
(context_start..context_end).each do |i|
|
|
78
|
-
marker = i == target_line ? ">>>" : " "
|
|
79
|
-
puts "#{marker} #{i + 1}: #{lines[i].rstrip}"
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
def analyze_failure_mode
|
|
84
|
-
puts "\n--- Failure Analysis ---"
|
|
85
|
-
|
|
86
|
-
case @test_case[:category]
|
|
87
|
-
when "Whitespace in <pre>"
|
|
88
|
-
puts "Pattern: Whitespace handling in <pre> elements"
|
|
89
|
-
puts "Known issue: Semantic correctly detects whitespace differences"
|
|
90
|
-
puts "DOM behavior: May normalize whitespace incorrectly"
|
|
91
|
-
puts "Action needed: Verify if DOM is wrong or test expectations need adjustment"
|
|
92
|
-
|
|
93
|
-
when "Element matching"
|
|
94
|
-
puts "Pattern: Element matching/comparison issues"
|
|
95
|
-
puts "Possible causes:"
|
|
96
|
-
puts " - Attribute ordering differences"
|
|
97
|
-
puts " - Namespace handling"
|
|
98
|
-
puts " - Element signature computation"
|
|
99
|
-
puts "Action needed: Check signature and matching logic"
|
|
100
|
-
|
|
101
|
-
when "HTML escapes"
|
|
102
|
-
puts "Pattern: HTML entity/escape handling"
|
|
103
|
-
puts "Possible causes:"
|
|
104
|
-
puts " - Entity normalization differences"
|
|
105
|
-
puts " - Character reference handling"
|
|
106
|
-
puts "Action needed: Check entity handling in adapters"
|
|
107
|
-
|
|
108
|
-
when "Word processing"
|
|
109
|
-
puts "Pattern: Word-specific HTML processing"
|
|
110
|
-
puts "Possible causes:"
|
|
111
|
-
puts " - Word-specific markup handling"
|
|
112
|
-
puts " - Style attribute differences"
|
|
113
|
-
puts "Action needed: Check Word HTML adapter behavior"
|
|
114
|
-
|
|
115
|
-
when "Number formatting", "Presentation"
|
|
116
|
-
puts "Pattern: XML presentation/formatting"
|
|
117
|
-
puts "Possible causes:"
|
|
118
|
-
puts " - Number/formatting element handling"
|
|
119
|
-
puts " - Presentation attribute differences"
|
|
120
|
-
puts "Action needed: Check XML adapter presentation logic"
|
|
121
|
-
|
|
122
|
-
when "References", "Cross-references"
|
|
123
|
-
puts "Pattern: Reference/cross-reference handling"
|
|
124
|
-
puts "Possible causes:"
|
|
125
|
-
puts " - Reference ID matching"
|
|
126
|
-
puts " - Link element comparison"
|
|
127
|
-
puts "Action needed: Check reference element signatures"
|
|
128
|
-
|
|
129
|
-
when "Sourcecode"
|
|
130
|
-
puts "Pattern: Source code block handling"
|
|
131
|
-
puts "Possible causes:"
|
|
132
|
-
puts " - Whitespace in code blocks (like <pre>)"
|
|
133
|
-
puts " - Code element attribute matching"
|
|
134
|
-
puts "Action needed: Check sourcecode element handling"
|
|
135
|
-
|
|
136
|
-
when "Terms"
|
|
137
|
-
puts "Pattern: Term definition handling"
|
|
138
|
-
puts "Possible causes:"
|
|
139
|
-
puts " - Term element matching"
|
|
140
|
-
puts " - Definition structure differences"
|
|
141
|
-
puts "Action needed: Check term element signatures"
|
|
142
|
-
end
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
def suggest_fix
|
|
146
|
-
puts "\n--- Suggested Investigation Steps ---"
|
|
147
|
-
puts "1. Run the specific test with both DOM and semantic:"
|
|
148
|
-
puts " cd ~/mn/isodoc"
|
|
149
|
-
puts " CANON_DIFF_MODE=dom bundle exec rspec #{@test_case[:file]}:#{@test_case[:line]}"
|
|
150
|
-
puts " CANON_DIFF_MODE=semantic_tree bundle exec rspec #{@test_case[:file]}:#{@test_case[:line]}"
|
|
151
|
-
|
|
152
|
-
puts "\n2. If test fails with semantic, extract actual vs expected:"
|
|
153
|
-
puts " - Look for be_equivalent_to matcher"
|
|
154
|
-
puts " - Save actual and expected to temp files"
|
|
155
|
-
puts " - Compare manually"
|
|
156
|
-
|
|
157
|
-
puts "\n3. Determine root cause:"
|
|
158
|
-
puts " - Is semantic detecting a real difference? → Test may need adjustment"
|
|
159
|
-
puts " - Is semantic wrongly flagging? → Bug in semantic algorithm"
|
|
160
|
-
puts " - Is DOM wrongly passing? → DOM has a bug we're fixing"
|
|
161
|
-
|
|
162
|
-
puts "\n4. Check ROOT_CAUSE_ANALYSIS.md for similar patterns"
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
# Main execution
|
|
167
|
-
puts "Investigating 16 Remaining Semantic vs DOM Differences"
|
|
168
|
-
puts "Goal: Achieve DOM parity (43 → 39 failures)"
|
|
169
|
-
puts "\nTotal cases to investigate: #{REMAINING_FAILURES.length}"
|
|
170
|
-
|
|
171
|
-
if ARGV.empty?
|
|
172
|
-
puts "\nUsage:"
|
|
173
|
-
puts " #{$0} # Show all cases"
|
|
174
|
-
puts " #{$0} <number> # Investigate specific case (1-#{REMAINING_FAILURES.length})"
|
|
175
|
-
puts " #{$0} <category> # Investigate all cases in category"
|
|
176
|
-
puts "\nAvailable categories:"
|
|
177
|
-
REMAINING_FAILURES.map do |t|
|
|
178
|
-
t[:category]
|
|
179
|
-
end.uniq.sort.each { |c| puts " - #{c}" }
|
|
180
|
-
|
|
181
|
-
puts "\nAll cases:"
|
|
182
|
-
REMAINING_FAILURES.each_with_index do |test, idx|
|
|
183
|
-
puts " #{idx + 1}. #{test[:file]}:#{test[:line]} - #{test[:category]}"
|
|
184
|
-
end
|
|
185
|
-
else
|
|
186
|
-
arg = ARGV[0]
|
|
187
|
-
|
|
188
|
-
if /^\d+$/.match?(arg)
|
|
189
|
-
# Specific case number
|
|
190
|
-
idx = arg.to_i - 1
|
|
191
|
-
if idx >= 0 && idx < REMAINING_FAILURES.length
|
|
192
|
-
investigator = FailureInvestigator.new(REMAINING_FAILURES[idx])
|
|
193
|
-
investigator.investigate
|
|
194
|
-
else
|
|
195
|
-
puts "ERROR: Invalid case number. Must be 1-#{REMAINING_FAILURES.length}"
|
|
196
|
-
end
|
|
197
|
-
else
|
|
198
|
-
# Category filter
|
|
199
|
-
category = arg
|
|
200
|
-
matching = REMAINING_FAILURES.select do |t|
|
|
201
|
-
t[:category].downcase.include?(category.downcase)
|
|
202
|
-
end
|
|
203
|
-
|
|
204
|
-
if matching.empty?
|
|
205
|
-
puts "ERROR: No cases found for category: #{category}"
|
|
206
|
-
else
|
|
207
|
-
puts "\nInvestigating #{matching.length} cases matching '#{category}':"
|
|
208
|
-
matching.each do |test|
|
|
209
|
-
investigator = FailureInvestigator.new(test)
|
|
210
|
-
investigator.investigate
|
|
211
|
-
end
|
|
212
|
-
end
|
|
213
|
-
end
|
|
214
|
-
end
|
data/scripts/run_single_test.rb
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Run a specific test with both algorithms to see if it's a false positive
|
|
5
|
-
# Usage: ruby scripts/run_single_test.rb FILE:LINE
|
|
6
|
-
|
|
7
|
-
require "bundler/setup"
|
|
8
|
-
require_relative "../../mn/isodoc/spec/spec_helper"
|
|
9
|
-
|
|
10
|
-
# Run tests and capture results
|
|
11
|
-
test_file = ARGV[0] || "../../mn/isodoc/spec/isodoc/sourcecode_spec.rb:124"
|
|
12
|
-
|
|
13
|
-
puts "=" * 80
|
|
14
|
-
puts "Running test: #{test_file}"
|
|
15
|
-
puts "=" * 80
|
|
16
|
-
|
|
17
|
-
# First with DOM
|
|
18
|
-
puts "\n1. With DOM algorithm:"
|
|
19
|
-
puts "-" * 40
|
|
20
|
-
ENV["CANON_HTML_DIFF_ALGORITHM"] = "dom"
|
|
21
|
-
ENV["CANON_XML_DIFF_ALGORITHM"] = "dom"
|
|
22
|
-
system("cd ../../mn/isodoc && bundle exec rspec #{test_file} --format documentation 2>&1 | tail -20")
|
|
23
|
-
|
|
24
|
-
# Then with Semantic
|
|
25
|
-
puts "\n2. With Semantic algorithm:"
|
|
26
|
-
puts "-" * 40
|
|
27
|
-
ENV["CANON_HTML_DIFF_ALGORITHM"] = "semantic"
|
|
28
|
-
ENV["CANON_XML_DIFF_ALGORITHM"] = "semantic"
|
|
29
|
-
system("cd ../../mn/isodoc && bundle exec rspec #{test_file} --format documentation 2>&1 | tail -20")
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Test all 16 false positives with both algorithms
|
|
5
|
-
# Usage: ruby scripts/test_all_false_positives.rb
|
|
6
|
-
|
|
7
|
-
FALSE_POSITIVES = [
|
|
8
|
-
"spec/isodoc/blocks_spec.rb:352",
|
|
9
|
-
"spec/isodoc/footnotes_spec.rb:740",
|
|
10
|
-
"spec/isodoc/inline_spec.rb:1012",
|
|
11
|
-
"spec/isodoc/inline_spec.rb:1251",
|
|
12
|
-
"spec/isodoc/postproc_spec.rb:948",
|
|
13
|
-
"spec/isodoc/postproc_word_spec.rb:372",
|
|
14
|
-
"spec/isodoc/postproc_word_spec.rb:576",
|
|
15
|
-
"spec/isodoc/presentation_xml_numbers_override_spec.rb:2095",
|
|
16
|
-
"spec/isodoc/presentation_xml_spec.rb:1288",
|
|
17
|
-
"spec/isodoc/presentation_xml_spec.rb:1500",
|
|
18
|
-
"spec/isodoc/ref_spec.rb:906",
|
|
19
|
-
"spec/isodoc/sourcecode_spec.rb:124",
|
|
20
|
-
"spec/isodoc/sourcecode_spec.rb:610",
|
|
21
|
-
"spec/isodoc/terms_spec.rb:1445",
|
|
22
|
-
"spec/isodoc/xref_format_spec.rb:628",
|
|
23
|
-
"spec/isodoc/xref_spec.rb:315",
|
|
24
|
-
].freeze
|
|
25
|
-
|
|
26
|
-
ISODOC_DIR = "/Users/mulgogi/src/mn/isodoc"
|
|
27
|
-
|
|
28
|
-
results = {}
|
|
29
|
-
|
|
30
|
-
puts "=" * 80
|
|
31
|
-
puts "TESTING ALL 16 FALSE POSITIVES"
|
|
32
|
-
puts "=" * 80
|
|
33
|
-
puts
|
|
34
|
-
|
|
35
|
-
FALSE_POSITIVES.each_with_index do |test, idx|
|
|
36
|
-
puts "\n#{idx + 1}. Testing: #{test}"
|
|
37
|
-
puts "-" * 60
|
|
38
|
-
|
|
39
|
-
# Test with DOM
|
|
40
|
-
ENV["CANON_HTML_DIFF_ALGORITHM"] = "dom"
|
|
41
|
-
ENV["CANON_XML_DIFF_ALGORITHM"] = "dom"
|
|
42
|
-
`cd #{ISODOC_DIR} && bundle exec rspec #{test} 2>&1`
|
|
43
|
-
dom_pass = $?.success?
|
|
44
|
-
|
|
45
|
-
# Test with Semantic
|
|
46
|
-
ENV["CANON_HTML_DIFF_ALGORITHM"] = "semantic"
|
|
47
|
-
ENV["CANON_XML_DIFF_ALGORITHM"] = "semantic"
|
|
48
|
-
`cd #{ISODOC_DIR} && bundle exec rspec #{test} 2>&1`
|
|
49
|
-
semantic_pass = $?.success?
|
|
50
|
-
|
|
51
|
-
results[test] = {
|
|
52
|
-
dom: dom_pass,
|
|
53
|
-
semantic: semantic_pass,
|
|
54
|
-
false_positive: dom_pass && !semantic_pass,
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
status = if dom_pass && !semantic_pass
|
|
58
|
-
"❌ FALSE POSITIVE (DOM pass, Semantic fail)"
|
|
59
|
-
elsif !dom_pass && semantic_pass
|
|
60
|
-
"⚠️ FALSE NEGATIVE (DOM fail, Semantic pass)"
|
|
61
|
-
elsif dom_pass && semantic_pass
|
|
62
|
-
"✅ BOTH PASS"
|
|
63
|
-
else
|
|
64
|
-
"⏺ BOTH FAIL"
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
puts " #{status}"
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Summary
|
|
71
|
-
puts "\n#{'=' * 80}"
|
|
72
|
-
puts "SUMMARY"
|
|
73
|
-
puts "=" * 80
|
|
74
|
-
|
|
75
|
-
false_positives = results.select { |_, r| r[:false_positive] }
|
|
76
|
-
false_negatives = results.select { |_, r| !r[:dom] && r[:semantic] }
|
|
77
|
-
both_pass = results.select { |_, r| r[:dom] && r[:semantic] }
|
|
78
|
-
both_fail = results.select { |_, r| !r[:dom] && !r[:semantic] }
|
|
79
|
-
|
|
80
|
-
puts "\nFalse Positives (DOM pass, Semantic fail): #{false_positives.count}"
|
|
81
|
-
false_positives.each_key { |test| puts " - #{test}" }
|
|
82
|
-
|
|
83
|
-
puts "\nFalse Negatives (DOM fail, Semantic pass): #{false_negatives.count}"
|
|
84
|
-
false_negatives.each_key { |test| puts " - #{test}" }
|
|
85
|
-
|
|
86
|
-
puts "\nBoth Pass: #{both_pass.count}"
|
|
87
|
-
both_pass.each_key { |test| puts " - #{test}" }
|
|
88
|
-
|
|
89
|
-
puts "\nBoth Fail: #{both_fail.count}"
|
|
90
|
-
both_fail.each_key { |test| puts " - #{test}" }
|
|
91
|
-
|
|
92
|
-
puts "\n#{'=' * 80}"
|
|
93
|
-
puts "Current state: #{false_positives.count} false positives remaining"
|
|
94
|
-
puts "Target: 0 false positives (achieve DOM parity)"
|
|
95
|
-
puts "=" * 80
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative "../lib/canon"
|
|
5
|
-
require_relative "../lib/canon/diff_formatter"
|
|
6
|
-
require_relative "../lib/canon/diff_formatter/diff_detail_formatter"
|
|
7
|
-
|
|
8
|
-
# Test attribute values formatting - same attributes, different values
|
|
9
|
-
html1 = '<table id="T1" class="MsoNormalTable" border="1"></table>'
|
|
10
|
-
html2 = '<table id="T2" class="MsoNormalTable" border="2"></table>'
|
|
11
|
-
|
|
12
|
-
puts "=" * 70
|
|
13
|
-
puts "TEST 1: Multiple Attribute Values Differ (id removed, border changed)"
|
|
14
|
-
puts "=" * 70
|
|
15
|
-
|
|
16
|
-
result = Canon::Comparison.equivalent?(
|
|
17
|
-
html1,
|
|
18
|
-
html2,
|
|
19
|
-
match_algorithm: :semantic_tree,
|
|
20
|
-
ignore_attr_order: true,
|
|
21
|
-
verbose: true,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
# Print semantic diff report
|
|
25
|
-
if result.differences.any?
|
|
26
|
-
report = Canon::DiffFormatter::DiffDetailFormatter.format_report(
|
|
27
|
-
result.differences,
|
|
28
|
-
use_color: true,
|
|
29
|
-
)
|
|
30
|
-
puts report
|
|
31
|
-
else
|
|
32
|
-
puts "No differences found!"
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
puts "\n\n"
|
|
36
|
-
puts "=" * 70
|
|
37
|
-
puts "TEST 2: Attribute Order Differs"
|
|
38
|
-
puts "=" * 70
|
|
39
|
-
|
|
40
|
-
# Test attribute order formatting
|
|
41
|
-
html3 = '<table id="T1" class="MsoNormalTable" border="1"></table>'
|
|
42
|
-
html4 = '<table border="1" class="MsoNormalTable" id="T1"></table>'
|
|
43
|
-
|
|
44
|
-
result2 = Canon::Comparison.equivalent?(
|
|
45
|
-
html3,
|
|
46
|
-
html4,
|
|
47
|
-
match_algorithm: :semantic_tree,
|
|
48
|
-
ignore_attr_order: false, # Don't ignore order, so we see the difference
|
|
49
|
-
verbose: true,
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
# Print semantic diff report
|
|
53
|
-
if result2.differences.any?
|
|
54
|
-
report2 = Canon::DiffFormatter::DiffDetailFormatter.format_report(
|
|
55
|
-
result2.differences,
|
|
56
|
-
use_color: true,
|
|
57
|
-
)
|
|
58
|
-
puts report2
|
|
59
|
-
else
|
|
60
|
-
puts "No differences found!"
|
|
61
|
-
end
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative "../lib/canon"
|
|
5
|
-
require_relative "../lib/canon/diff_formatter"
|
|
6
|
-
|
|
7
|
-
# Test XML content
|
|
8
|
-
xml1 = <<~XML
|
|
9
|
-
<root>
|
|
10
|
-
<person>
|
|
11
|
-
<name>Alice</name>
|
|
12
|
-
<age>30</age>
|
|
13
|
-
</person>
|
|
14
|
-
</root>
|
|
15
|
-
XML
|
|
16
|
-
|
|
17
|
-
xml2 = <<~XML
|
|
18
|
-
<root>
|
|
19
|
-
<person>
|
|
20
|
-
<name>Bob</name>
|
|
21
|
-
<age>25</age>
|
|
22
|
-
</person>
|
|
23
|
-
</root>
|
|
24
|
-
XML
|
|
25
|
-
|
|
26
|
-
puts "=" * 80
|
|
27
|
-
puts "Testing :both algorithm option"
|
|
28
|
-
puts "=" * 80
|
|
29
|
-
puts
|
|
30
|
-
|
|
31
|
-
# Test with verbose: true and diff_algorithm: :both
|
|
32
|
-
result = Canon::Comparison.equivalent?(
|
|
33
|
-
xml1,
|
|
34
|
-
xml2,
|
|
35
|
-
verbose: true,
|
|
36
|
-
diff_algorithm: :both,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
puts "Result class: #{result.class}"
|
|
40
|
-
puts "Result equivalent?: #{result.equivalent?}"
|
|
41
|
-
puts
|
|
42
|
-
|
|
43
|
-
# Create formatter
|
|
44
|
-
formatter = Canon::DiffFormatter.new(use_color: true)
|
|
45
|
-
|
|
46
|
-
# Format the comparison result
|
|
47
|
-
output = formatter.format_comparison_result(result, xml1, xml2)
|
|
48
|
-
|
|
49
|
-
puts output
|
data/scripts/test_both_simple.rb
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative "../lib/canon"
|
|
5
|
-
|
|
6
|
-
# Test XML content
|
|
7
|
-
xml1 = <<~XML
|
|
8
|
-
<root>
|
|
9
|
-
<person>
|
|
10
|
-
<name>Alice</name>
|
|
11
|
-
<age>30</age>
|
|
12
|
-
</person>
|
|
13
|
-
</root>
|
|
14
|
-
XML
|
|
15
|
-
|
|
16
|
-
xml2 = <<~XML
|
|
17
|
-
<root>
|
|
18
|
-
<person>
|
|
19
|
-
<name>Bob</name>
|
|
20
|
-
<age>25</age>
|
|
21
|
-
</person>
|
|
22
|
-
</root>
|
|
23
|
-
XML
|
|
24
|
-
|
|
25
|
-
puts "=" * 80
|
|
26
|
-
puts "Testing :both algorithm option (simple test)"
|
|
27
|
-
puts "=" * 80
|
|
28
|
-
puts
|
|
29
|
-
|
|
30
|
-
# Test with verbose: true and diff_algorithm: :both
|
|
31
|
-
result = Canon::Comparison.equivalent?(
|
|
32
|
-
xml1,
|
|
33
|
-
xml2,
|
|
34
|
-
verbose: true,
|
|
35
|
-
diff_algorithm: :both,
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
puts "Result class: #{result.class}"
|
|
39
|
-
puts "Result equivalent?: #{result.equivalent?}"
|
|
40
|
-
puts "DOM result class: #{result.dom_result.class}"
|
|
41
|
-
puts "DOM result equivalent?: #{result.dom_result.equivalent?}"
|
|
42
|
-
puts "DOM result algorithm: #{result.dom_result.algorithm}"
|
|
43
|
-
puts "DOM differences count: #{result.dom_result.differences.count}"
|
|
44
|
-
puts
|
|
45
|
-
puts "Tree result class: #{result.tree_result.class}"
|
|
46
|
-
puts "Tree result equivalent?: #{result.tree_result.equivalent?}"
|
|
47
|
-
puts "Tree result algorithm: #{result.tree_result.algorithm}"
|
|
48
|
-
puts "Tree differences count: #{result.tree_result.differences.count}"
|
|
49
|
-
puts "Tree operations count: #{result.tree_result.operations.count}"
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require "bundler/setup"
|
|
5
|
-
require "canon"
|
|
6
|
-
|
|
7
|
-
# Test XML with differences
|
|
8
|
-
xml1 = <<~XML
|
|
9
|
-
<root>
|
|
10
|
-
<section id="1">
|
|
11
|
-
<title>First Section</title>
|
|
12
|
-
<paragraph>Original text</paragraph>
|
|
13
|
-
</section>
|
|
14
|
-
<section id="2">
|
|
15
|
-
<title>Second Section</title>
|
|
16
|
-
</section>
|
|
17
|
-
</root>
|
|
18
|
-
XML
|
|
19
|
-
|
|
20
|
-
xml2 = <<~XML
|
|
21
|
-
<root>
|
|
22
|
-
<section id="1">
|
|
23
|
-
<title>First Section Modified</title>
|
|
24
|
-
<paragraph>Changed text</paragraph>
|
|
25
|
-
</section>
|
|
26
|
-
<section id="3">
|
|
27
|
-
<title>Third Section</title>
|
|
28
|
-
</section>
|
|
29
|
-
</root>
|
|
30
|
-
XML
|
|
31
|
-
|
|
32
|
-
puts "=" * 80
|
|
33
|
-
puts "Testing Enhanced Semantic Tree Diff Output"
|
|
34
|
-
puts "=" * 80
|
|
35
|
-
puts
|
|
36
|
-
|
|
37
|
-
# Test with semantic diff algorithm
|
|
38
|
-
result = Canon::Comparison.equivalent?(
|
|
39
|
-
xml1,
|
|
40
|
-
xml2,
|
|
41
|
-
diff_algorithm: :semantic,
|
|
42
|
-
verbose: true,
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
puts "Algorithm: #{result.algorithm}"
|
|
46
|
-
puts "Equivalent: #{result.equivalent?}"
|
|
47
|
-
puts "Number of differences: #{result.differences.length}"
|
|
48
|
-
puts
|
|
49
|
-
puts "Detailed Operations:"
|
|
50
|
-
puts "-" * 80
|
|
51
|
-
|
|
52
|
-
result.differences.each_with_index do |diff, i|
|
|
53
|
-
puts "\n#{i + 1}. #{diff.dimension.to_s.upcase}"
|
|
54
|
-
puts " Reason: #{diff.reason}"
|
|
55
|
-
|
|
56
|
-
# Access the underlying operation metadata
|
|
57
|
-
if diff.respond_to?(:node1) && diff.node1
|
|
58
|
-
puts " Node1: #{diff.node1.inspect}"
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
if diff.respond_to?(:node2) && diff.node2
|
|
62
|
-
puts " Node2: #{diff.node2.inspect}"
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Show metadata if available
|
|
66
|
-
if diff.respond_to?(:metadata)
|
|
67
|
-
puts " Metadata: #{diff.metadata.inspect}"
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
puts
|
|
72
|
-
puts "=" * 80
|
|
73
|
-
puts "Testing operation metadata directly"
|
|
74
|
-
puts "=" * 80
|
|
75
|
-
|
|
76
|
-
# Access tree diff operations directly
|
|
77
|
-
if result.respond_to?(:match_options) && result.match_options
|
|
78
|
-
ops = result.match_options[:tree_diff_operations]
|
|
79
|
-
|
|
80
|
-
if ops
|
|
81
|
-
puts "\nDirect Tree Diff Operations:"
|
|
82
|
-
puts "-" * 80
|
|
83
|
-
|
|
84
|
-
ops.each_with_index do |op, i|
|
|
85
|
-
puts "\n#{i + 1}. Operation: #{op.type.to_s.upcase}"
|
|
86
|
-
|
|
87
|
-
# Show path information
|
|
88
|
-
if op[:path]
|
|
89
|
-
puts " Path: #{op[:path]}"
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
if op[:old_path] && op[:new_path]
|
|
93
|
-
puts " Old Path: #{op[:old_path]}"
|
|
94
|
-
puts " New Path: #{op[:new_path]}"
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Show content information
|
|
98
|
-
if op[:content]
|
|
99
|
-
puts " Content: #{op[:content]}"
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
if op[:old_content] && op[:new_content]
|
|
103
|
-
puts " Old Content: #{op[:old_content]}"
|
|
104
|
-
puts " New Content: #{op[:new_content]}"
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Show changes detail
|
|
108
|
-
if op[:changes]
|
|
109
|
-
puts " Changes:"
|
|
110
|
-
op[:changes].each do |key, change|
|
|
111
|
-
puts " - #{key}: #{change[:old]} => #{change[:new]}"
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
else
|
|
116
|
-
puts "No tree_diff_operations found in match_options"
|
|
117
|
-
end
|
|
118
|
-
else
|
|
119
|
-
puts "No match_options available"
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
puts
|
|
123
|
-
puts "=" * 80
|
|
124
|
-
puts "Test Complete"
|
|
125
|
-
puts "=" * 80
|