canon 0.1.8 ā 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +112 -25
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +48 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +3 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
|
@@ -1,227 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Script to systematically investigate false positives
|
|
5
|
-
# where semantic algorithm fails but DOM algorithm passes
|
|
6
|
-
|
|
7
|
-
require "fileutils"
|
|
8
|
-
require "json"
|
|
9
|
-
|
|
10
|
-
# False positives to investigate
|
|
11
|
-
FALSE_POSITIVES = [
|
|
12
|
-
{ file: "blocks_spec.rb", line: 352 },
|
|
13
|
-
{ file: "footnotes_spec.rb", line: 740 },
|
|
14
|
-
{ file: "inline_spec.rb", line: 1012 },
|
|
15
|
-
{ file: "inline_spec.rb", line: 1251 },
|
|
16
|
-
{ file: "postproc_spec.rb", line: 948 },
|
|
17
|
-
{ file: "postproc_word_spec.rb", line: 372 },
|
|
18
|
-
{ file: "postproc_word_spec.rb", line: 576 },
|
|
19
|
-
{ file: "presentation_xml_numbers_override_spec.rb", line: 2095 },
|
|
20
|
-
{ file: "presentation_xml_spec.rb", line: 1288 },
|
|
21
|
-
{ file: "presentation_xml_spec.rb", line: 1500 },
|
|
22
|
-
{ file: "ref_spec.rb", line: 906 },
|
|
23
|
-
{ file: "sourcecode_spec.rb", line: 124 },
|
|
24
|
-
{ file: "sourcecode_spec.rb", line: 610 },
|
|
25
|
-
{ file: "terms_spec.rb", line: 1445 },
|
|
26
|
-
{ file: "xref_format_spec.rb", line: 628 },
|
|
27
|
-
{ file: "xref_spec.rb", line: 315 },
|
|
28
|
-
].freeze
|
|
29
|
-
|
|
30
|
-
ISODOC_SPEC_DIR = File.expand_path("../../../mn/isodoc/spec/isodoc", __dir__)
|
|
31
|
-
|
|
32
|
-
class FalsePositiveInvestigator
|
|
33
|
-
attr_reader :results
|
|
34
|
-
|
|
35
|
-
def initialize
|
|
36
|
-
@results = []
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def investigate_all
|
|
40
|
-
puts "=" * 80
|
|
41
|
-
puts "INVESTIGATING 16 FALSE POSITIVES"
|
|
42
|
-
puts "=" * 80
|
|
43
|
-
puts
|
|
44
|
-
|
|
45
|
-
FALSE_POSITIVES.each_with_index do |test, idx|
|
|
46
|
-
puts "\n#{idx + 1}/#{FALSE_POSITIVES.size}: #{test[:file]}:#{test[:line]}"
|
|
47
|
-
puts "-" * 80
|
|
48
|
-
|
|
49
|
-
result = investigate_test(test)
|
|
50
|
-
@results << result
|
|
51
|
-
|
|
52
|
-
display_result(result)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
summarize_results
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def investigate_test(test)
|
|
59
|
-
file_path = File.join(ISODOC_SPEC_DIR, test[:file])
|
|
60
|
-
|
|
61
|
-
unless File.exist?(file_path)
|
|
62
|
-
return {
|
|
63
|
-
test: test,
|
|
64
|
-
error: "File not found: #{file_path}",
|
|
65
|
-
dom_passes: nil,
|
|
66
|
-
semantic_passes: nil,
|
|
67
|
-
}
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
result = {
|
|
71
|
-
test: test,
|
|
72
|
-
file_path: file_path,
|
|
73
|
-
dom_passes: nil,
|
|
74
|
-
semantic_passes: nil,
|
|
75
|
-
semantic_output: nil,
|
|
76
|
-
error: nil,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
# Test with DOM algorithm
|
|
80
|
-
puts " Testing with DOM algorithm..."
|
|
81
|
-
dom_output = run_test(file_path, test[:line], "dom")
|
|
82
|
-
result[:dom_passes] = dom_output[:success]
|
|
83
|
-
result[:dom_output] = dom_output[:output]
|
|
84
|
-
|
|
85
|
-
# Test with semantic algorithm
|
|
86
|
-
puts " Testing with semantic algorithm..."
|
|
87
|
-
semantic_output = run_test(file_path, test[:line], "semantic")
|
|
88
|
-
result[:semantic_passes] = semantic_output[:success]
|
|
89
|
-
result[:semantic_output] = semantic_output[:output]
|
|
90
|
-
|
|
91
|
-
result
|
|
92
|
-
rescue StandardError => e
|
|
93
|
-
{
|
|
94
|
-
test: test,
|
|
95
|
-
error: "Exception: #{e.message}",
|
|
96
|
-
dom_passes: nil,
|
|
97
|
-
semantic_passes: nil,
|
|
98
|
-
}
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
def run_test(file_path, line, algorithm)
|
|
102
|
-
cmd = "cd #{ISODOC_SPEC_DIR}/.. && CANON_ALGORITHM=#{algorithm} bundle exec rspec #{file_path}:#{line} 2>&1"
|
|
103
|
-
output = `#{cmd}`
|
|
104
|
-
success = $?.success?
|
|
105
|
-
|
|
106
|
-
{
|
|
107
|
-
success: success,
|
|
108
|
-
output: output,
|
|
109
|
-
exit_code: $?.exitstatus,
|
|
110
|
-
}
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
def display_result(result)
|
|
114
|
-
if result[:error]
|
|
115
|
-
puts " ā ERROR: #{result[:error]}"
|
|
116
|
-
return
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
dom_status = result[:dom_passes] ? "ā
PASS" : "ā FAIL"
|
|
120
|
-
sem_status = result[:semantic_passes] ? "ā
PASS" : "ā FAIL"
|
|
121
|
-
|
|
122
|
-
puts " DOM: #{dom_status}"
|
|
123
|
-
puts " Semantic: #{sem_status}"
|
|
124
|
-
|
|
125
|
-
if result[:dom_passes] && !result[:semantic_passes]
|
|
126
|
-
puts " ā ļø CONFIRMED FALSE POSITIVE"
|
|
127
|
-
analyze_failure(result)
|
|
128
|
-
elsif !result[:dom_passes] && result[:semantic_passes]
|
|
129
|
-
puts " ā ļø UNEXPECTED: DOM fails but semantic passes!"
|
|
130
|
-
elsif !result[:dom_passes] && !result[:semantic_passes]
|
|
131
|
-
puts " ā¹ļø Both algorithms fail (not a false positive)"
|
|
132
|
-
else
|
|
133
|
-
puts " ā
Both algorithms pass (false positive may be fixed)"
|
|
134
|
-
end
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
def analyze_failure(result)
|
|
138
|
-
output = result[:semantic_output]
|
|
139
|
-
|
|
140
|
-
# Look for diff patterns
|
|
141
|
-
if output.include?("Expected XML to be equivalent")
|
|
142
|
-
puts " š Failure type: XML equivalence check"
|
|
143
|
-
elsif output.include?("Expected HTML to be equivalent")
|
|
144
|
-
puts " š Failure type: HTML equivalence check"
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
# Extract key diff lines
|
|
148
|
-
diff_lines = output.lines.select { |l| l.match?(/^\s*[+-]/) }.take(10)
|
|
149
|
-
if diff_lines.any?
|
|
150
|
-
puts " š Sample diff:"
|
|
151
|
-
diff_lines.each { |l| puts " #{l.strip}" }
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
# Look for specific patterns
|
|
155
|
-
if output.include?("whitespace")
|
|
156
|
-
puts " š Involves: whitespace differences"
|
|
157
|
-
end
|
|
158
|
-
if output.include?("attribute")
|
|
159
|
-
puts " š Involves: attribute differences"
|
|
160
|
-
end
|
|
161
|
-
if output.include?("text content")
|
|
162
|
-
puts " š Involves: text content differences"
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
def summarize_results
|
|
167
|
-
puts "\n#{'=' * 80}"
|
|
168
|
-
puts "SUMMARY"
|
|
169
|
-
puts "=" * 80
|
|
170
|
-
|
|
171
|
-
confirmed_fps = @results.count do |r|
|
|
172
|
-
r[:dom_passes] && !r[:semantic_passes]
|
|
173
|
-
end
|
|
174
|
-
fixed = @results.count { |r| r[:dom_passes] && r[:semantic_passes] }
|
|
175
|
-
errors = @results.count { |r| r[:error] }
|
|
176
|
-
both_fail = @results.count { |r| !r[:dom_passes] && !r[:semantic_passes] }
|
|
177
|
-
|
|
178
|
-
puts "Confirmed false positives: #{confirmed_fps}/16"
|
|
179
|
-
puts "Already fixed: #{fixed}/16"
|
|
180
|
-
puts "Both fail (not FP): #{both_fail}/16"
|
|
181
|
-
puts "Errors: #{errors}/16"
|
|
182
|
-
puts
|
|
183
|
-
|
|
184
|
-
if confirmed_fps.positive?
|
|
185
|
-
puts "FALSE POSITIVES TO FIX:"
|
|
186
|
-
@results.each do |r|
|
|
187
|
-
next unless r[:dom_passes] && !r[:semantic_passes]
|
|
188
|
-
|
|
189
|
-
puts " - #{r[:test][:file]}:#{r[:test][:line]}"
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
puts "\n#{'=' * 80}"
|
|
194
|
-
end
|
|
195
|
-
|
|
196
|
-
def save_detailed_output(output_dir = "tmp/false_positive_investigation")
|
|
197
|
-
FileUtils.mkdir_p(output_dir)
|
|
198
|
-
|
|
199
|
-
@results.each_with_index do |result, idx|
|
|
200
|
-
next if result[:error]
|
|
201
|
-
|
|
202
|
-
test = result[:test]
|
|
203
|
-
filename = "#{idx + 1}_#{test[:file].gsub('.rb', '')}_#{test[:line]}.txt"
|
|
204
|
-
filepath = File.join(output_dir, filename)
|
|
205
|
-
|
|
206
|
-
File.write(filepath, <<~OUTPUT)
|
|
207
|
-
Test: #{test[:file]}:#{test[:line]}
|
|
208
|
-
DOM passes: #{result[:dom_passes]}
|
|
209
|
-
Semantic passes: #{result[:semantic_passes]}
|
|
210
|
-
|
|
211
|
-
========================================
|
|
212
|
-
SEMANTIC OUTPUT:
|
|
213
|
-
========================================
|
|
214
|
-
#{result[:semantic_output]}
|
|
215
|
-
OUTPUT
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
puts "\nDetailed output saved to: #{output_dir}/"
|
|
219
|
-
end
|
|
220
|
-
end
|
|
221
|
-
|
|
222
|
-
# Run investigation
|
|
223
|
-
investigator = FalsePositiveInvestigator.new
|
|
224
|
-
investigator.investigate_all
|
|
225
|
-
investigator.save_detailed_output
|
|
226
|
-
|
|
227
|
-
puts "\nInvestigation complete!"
|
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Systematically investigate false positive failures
|
|
5
|
-
# Usage: ruby scripts/investigate_false_positives_batch.rb <spec_file:line> [<spec_file:line> ...]
|
|
6
|
-
|
|
7
|
-
require "bundler/setup"
|
|
8
|
-
require "fileutils"
|
|
9
|
-
|
|
10
|
-
# False positive test cases from XMLNS_FIX_VALIDATION.md
|
|
11
|
-
FALSE_POSITIVES = [
|
|
12
|
-
"blocks_spec.rb:352",
|
|
13
|
-
"footnotes_spec.rb:740",
|
|
14
|
-
"inline_spec.rb:1012",
|
|
15
|
-
"inline_spec.rb:1251",
|
|
16
|
-
"postproc_spec.rb:948",
|
|
17
|
-
"postproc_word_spec.rb:372",
|
|
18
|
-
"postproc_word_spec.rb:576",
|
|
19
|
-
"presentation_xml_numbers_override_spec.rb:2095",
|
|
20
|
-
"presentation_xml_spec.rb:1288",
|
|
21
|
-
"presentation_xml_spec.rb:1500",
|
|
22
|
-
"ref_spec.rb:906",
|
|
23
|
-
"sourcecode_spec.rb:124",
|
|
24
|
-
"sourcecode_spec.rb:610",
|
|
25
|
-
"terms_spec.rb:1445",
|
|
26
|
-
"xref_format_spec.rb:628",
|
|
27
|
-
"xref_spec.rb:315",
|
|
28
|
-
].freeze
|
|
29
|
-
|
|
30
|
-
def run_test(spec_file, line, algorithm)
|
|
31
|
-
spec_path = File.expand_path("../../../mn/isodoc/spec/isodoc/#{spec_file}",
|
|
32
|
-
__dir__)
|
|
33
|
-
|
|
34
|
-
unless File.exist?(spec_path)
|
|
35
|
-
puts " ā File not found: #{spec_path}"
|
|
36
|
-
return nil
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Run with specific algorithm
|
|
40
|
-
{ "CANON_ALGORITHM" => algorithm }
|
|
41
|
-
cmd = "cd #{File.dirname(spec_path)} && bundle exec rspec #{spec_path}:#{line} 2>&1"
|
|
42
|
-
|
|
43
|
-
output = `#{cmd}`
|
|
44
|
-
success = $?.success?
|
|
45
|
-
|
|
46
|
-
{ success: success, output: output }
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def analyze_test(test_case)
|
|
50
|
-
spec_file, line = test_case.split(":")
|
|
51
|
-
|
|
52
|
-
puts "\n#{'=' * 80}"
|
|
53
|
-
puts "ANALYZING: #{test_case}"
|
|
54
|
-
puts "=" * 80
|
|
55
|
-
|
|
56
|
-
# Run with DOM algorithm
|
|
57
|
-
puts "\n1. Testing with DOM algorithm..."
|
|
58
|
-
dom_result = run_test(spec_file, line, "dom")
|
|
59
|
-
return unless dom_result
|
|
60
|
-
|
|
61
|
-
dom_pass = dom_result[:success]
|
|
62
|
-
puts " Result: #{dom_pass ? 'ā
PASS' : 'ā FAIL'}"
|
|
63
|
-
|
|
64
|
-
# Run with Semantic algorithm
|
|
65
|
-
puts "\n2. Testing with Semantic algorithm..."
|
|
66
|
-
semantic_result = run_test(spec_file, line, "semantic")
|
|
67
|
-
return unless semantic_result
|
|
68
|
-
|
|
69
|
-
semantic_pass = semantic_result[:success]
|
|
70
|
-
puts " Result: #{semantic_pass ? 'ā
PASS' : 'ā FAIL'}"
|
|
71
|
-
|
|
72
|
-
# Analysis
|
|
73
|
-
puts "\n#{'-' * 80}"
|
|
74
|
-
puts "ANALYSIS:"
|
|
75
|
-
puts "-" * 80
|
|
76
|
-
|
|
77
|
-
if dom_pass && !semantic_pass
|
|
78
|
-
puts "ā
CONFIRMED FALSE POSITIVE: DOM passes, Semantic fails"
|
|
79
|
-
puts "\nThis test should be investigated to understand why semantic is too strict."
|
|
80
|
-
|
|
81
|
-
# Extract failure details from semantic output
|
|
82
|
-
if semantic_result[:output] =~ /Failure\/Error:(.+?)(?=\n\n|\z)/m
|
|
83
|
-
failure_section = $1
|
|
84
|
-
puts "\nFailure details:"
|
|
85
|
-
puts failure_section.lines.take(20).join
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
:false_positive
|
|
89
|
-
elsif !dom_pass && semantic_pass
|
|
90
|
-
puts "ā ļø UNEXPECTED: This was listed as false positive but DOM fails, Semantic passes"
|
|
91
|
-
puts "This is actually a false NEGATIVE, not a false positive!"
|
|
92
|
-
:false_negative
|
|
93
|
-
elsif dom_pass && semantic_pass
|
|
94
|
-
puts "ā
BOTH PASS: This is no longer a false positive!"
|
|
95
|
-
:fixed
|
|
96
|
-
else
|
|
97
|
-
puts "ā BOTH FAIL: This is a common failure, not a false positive"
|
|
98
|
-
:common_failure
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def main
|
|
103
|
-
# Get test cases from arguments or use all false positives
|
|
104
|
-
test_cases = if ARGV.empty?
|
|
105
|
-
FALSE_POSITIVES
|
|
106
|
-
else
|
|
107
|
-
ARGV
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
puts "Investigating #{test_cases.size} false positive test cases..."
|
|
111
|
-
|
|
112
|
-
results = {
|
|
113
|
-
false_positive: [],
|
|
114
|
-
false_negative: [],
|
|
115
|
-
fixed: [],
|
|
116
|
-
common_failure: [],
|
|
117
|
-
error: [],
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
test_cases.each do |test_case|
|
|
121
|
-
result = analyze_test(test_case)
|
|
122
|
-
results[result || :error] << test_case
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# Summary
|
|
126
|
-
puts "\n#{'=' * 80}"
|
|
127
|
-
puts "SUMMARY"
|
|
128
|
-
puts "=" * 80
|
|
129
|
-
|
|
130
|
-
puts "\nā
Confirmed False Positives (need fixing): #{results[:false_positive].size}"
|
|
131
|
-
results[:false_positive].each { |tc| puts " - #{tc}" }
|
|
132
|
-
|
|
133
|
-
puts "\nš Already Fixed: #{results[:fixed].size}"
|
|
134
|
-
results[:fixed].each { |tc| puts " - #{tc}" }
|
|
135
|
-
|
|
136
|
-
puts "\nā ļø Misclassified (actually false negatives): #{results[:false_negative].size}"
|
|
137
|
-
results[:false_negative].each { |tc| puts " - #{tc}" }
|
|
138
|
-
|
|
139
|
-
puts "\nā Common Failures: #{results[:common_failure].size}"
|
|
140
|
-
results[:common_failure].each { |tc| puts " - #{tc}" }
|
|
141
|
-
|
|
142
|
-
puts "\nš„ Errors: #{results[:error].size}"
|
|
143
|
-
results[:error].each { |tc| puts " - #{tc}" }
|
|
144
|
-
|
|
145
|
-
# Save detailed results
|
|
146
|
-
output_file = "/tmp/false_positive_investigation.txt"
|
|
147
|
-
File.open(output_file, "w") do |f|
|
|
148
|
-
f.puts "FALSE POSITIVE INVESTIGATION RESULTS"
|
|
149
|
-
f.puts "=" * 80
|
|
150
|
-
f.puts "\nConfirmed False Positives (#{results[:false_positive].size}):"
|
|
151
|
-
results[:false_positive].each { |tc| f.puts tc }
|
|
152
|
-
f.puts "\nAlready Fixed (#{results[:fixed].size}):"
|
|
153
|
-
results[:fixed].each { |tc| f.puts tc }
|
|
154
|
-
f.puts "\nMisclassified (#{results[:false_negative].size}):"
|
|
155
|
-
results[:false_negative].each { |tc| f.puts tc }
|
|
156
|
-
f.puts "\nCommon Failures (#{results[:common_failure].size}):"
|
|
157
|
-
results[:common_failure].each { |tc| f.puts tc }
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
puts "\nDetailed results saved to: #{output_file}"
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
main if __FILE__ == $PROGRAM_NAME
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require "bundler/setup"
|
|
5
|
-
require "canon"
|
|
6
|
-
require "nokogiri"
|
|
7
|
-
|
|
8
|
-
# Test mixed content element extraction
|
|
9
|
-
def test_mixed_content_extraction
|
|
10
|
-
puts "=" * 80
|
|
11
|
-
puts "Testing Mixed Content Text Extraction"
|
|
12
|
-
puts "=" * 80
|
|
13
|
-
|
|
14
|
-
# Create test XML with mixed content
|
|
15
|
-
xml_str = <<~XML
|
|
16
|
-
<root>
|
|
17
|
-
<formattedAddress>123 Main St<br/>Springfield, IL<br/>62701</formattedAddress>
|
|
18
|
-
<normalText>Just plain text</normalText>
|
|
19
|
-
<withSpaces> Text with spaces </withSpaces>
|
|
20
|
-
<withNewlines>Text
|
|
21
|
-
with
|
|
22
|
-
newlines</withNewlines>
|
|
23
|
-
</root>
|
|
24
|
-
XML
|
|
25
|
-
|
|
26
|
-
doc = Nokogiri::XML(xml_str)
|
|
27
|
-
|
|
28
|
-
# Test each element
|
|
29
|
-
doc.root.element_children.each do |elem|
|
|
30
|
-
puts "\nElement: <#{elem.name}>"
|
|
31
|
-
puts " Content: #{elem.content.inspect}"
|
|
32
|
-
|
|
33
|
-
# Extract text nodes
|
|
34
|
-
text_nodes = elem.children.select(&:text?)
|
|
35
|
-
puts " Text nodes count: #{text_nodes.size}"
|
|
36
|
-
text_nodes.each_with_index do |node, i|
|
|
37
|
-
puts " [#{i}]: #{node.text.inspect}"
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Join text
|
|
41
|
-
joined = text_nodes.map(&:text).join
|
|
42
|
-
puts " Joined text: #{joined.inspect}"
|
|
43
|
-
|
|
44
|
-
# Show normalization
|
|
45
|
-
normalized = joined.gsub(/\s+/, " ").strip
|
|
46
|
-
puts " Normalized: #{normalized.inspect}"
|
|
47
|
-
|
|
48
|
-
# Element children
|
|
49
|
-
elem_children = elem.element_children
|
|
50
|
-
puts " Element children: #{elem_children.map(&:name).inspect}"
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Test with Canon adapter
|
|
55
|
-
def test_with_adapter
|
|
56
|
-
puts "\n#{'=' * 80}"
|
|
57
|
-
puts "Testing with Canon XML Adapter"
|
|
58
|
-
puts "=" * 80
|
|
59
|
-
|
|
60
|
-
xml_str = <<~XML
|
|
61
|
-
<root>
|
|
62
|
-
<formattedAddress>123 Main St<br/>Springfield, IL<br/>62701</formattedAddress>
|
|
63
|
-
</root>
|
|
64
|
-
XML
|
|
65
|
-
|
|
66
|
-
doc = Nokogiri::XML(xml_str)
|
|
67
|
-
|
|
68
|
-
adapter = Canon::TreeDiff::Adapters::XMLAdapter.new
|
|
69
|
-
tree = adapter.to_tree(doc)
|
|
70
|
-
|
|
71
|
-
# Find the formattedAddress node
|
|
72
|
-
address_node = tree.children.first
|
|
73
|
-
|
|
74
|
-
puts "\nTreeNode for formattedAddress:"
|
|
75
|
-
puts " Label: #{address_node.label}"
|
|
76
|
-
puts " Value: #{address_node.value.inspect}"
|
|
77
|
-
puts " Children count: #{address_node.children.size}"
|
|
78
|
-
address_node.children.each do |child|
|
|
79
|
-
puts " Child: #{child.label} = #{child.value.inspect}"
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Test normalization in operation detector
|
|
84
|
-
def test_normalization_comparison
|
|
85
|
-
puts "\n#{'=' * 80}"
|
|
86
|
-
puts "Testing Normalization in Comparison"
|
|
87
|
-
puts "=" * 80
|
|
88
|
-
|
|
89
|
-
# Two versions with different whitespace in mixed content
|
|
90
|
-
xml1 = <<~XML
|
|
91
|
-
<root>
|
|
92
|
-
<address>123 Main St<br/>Springfield, IL<br/>62701</address>
|
|
93
|
-
</root>
|
|
94
|
-
XML
|
|
95
|
-
|
|
96
|
-
xml2 = <<~XML
|
|
97
|
-
<root>
|
|
98
|
-
<address>123 Main St<br/>Springfield, IL<br/>62701</address>
|
|
99
|
-
</root>
|
|
100
|
-
XML
|
|
101
|
-
|
|
102
|
-
# Compare with whitespace_sensitive: false
|
|
103
|
-
result = Canon.semantic_tree_diff(xml1, xml2,
|
|
104
|
-
whitespace_sensitive: false,
|
|
105
|
-
verbose: true)
|
|
106
|
-
|
|
107
|
-
puts "\nComparison result:"
|
|
108
|
-
puts " Identical: #{result.identical?}"
|
|
109
|
-
puts " Normative differences: #{result.normative_differences?}"
|
|
110
|
-
puts " Informative differences: #{result.informative_differences?}"
|
|
111
|
-
|
|
112
|
-
if result.operations.any?
|
|
113
|
-
puts "\nOperations:"
|
|
114
|
-
result.operations.each do |op|
|
|
115
|
-
puts " #{op.type}: #{op.path} - #{op.classification}"
|
|
116
|
-
puts " Old: #{op.old_value.inspect}" if op.old_value
|
|
117
|
-
puts " New: #{op.new_value.inspect}" if op.new_value
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
# Run all tests
|
|
123
|
-
test_mixed_content_extraction
|
|
124
|
-
test_with_adapter
|
|
125
|
-
test_normalization_comparison
|