canon 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +112 -25
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +48 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +3 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Debug sourcecode_spec.rb:124 to understand the false positive pattern
|
|
5
|
-
# Usage: ruby scripts/debug_sourcecode_124.rb
|
|
6
|
-
|
|
7
|
-
require "bundler/setup"
|
|
8
|
-
require_relative "../../src/mn/isodoc/spec/spec_helper"
|
|
9
|
-
|
|
10
|
-
# Run the specific test with verbose output to capture expected/actual
|
|
11
|
-
puts "=" * 80
|
|
12
|
-
puts "DEBUGGING: sourcecode_spec.rb:124"
|
|
13
|
-
puts "=" * 80
|
|
14
|
-
|
|
15
|
-
# Run test with DOM (should pass)
|
|
16
|
-
puts "\n1. Running with DOM algorithm (should PASS):"
|
|
17
|
-
puts "-" * 60
|
|
18
|
-
ENV["CANON_HTML_DIFF_ALGORITHM"] = "dom"
|
|
19
|
-
ENV["CANON_XML_DIFF_ALGORITHM"] = "dom"
|
|
20
|
-
ENV["CANON_HTML_DIFF_VERBOSE"] = "true"
|
|
21
|
-
ENV["CANON_XML_DIFF_VERBOSE"] = "true"
|
|
22
|
-
system("cd /Users/mulgogi/src/mn/isodoc && bundle exec rspec spec/isodoc/sourcecode_spec.rb:124 --format documentation 2>&1")
|
|
23
|
-
|
|
24
|
-
puts "\n#{'=' * 80}"
|
|
25
|
-
# Run test with Semantic (should fail - false positive)
|
|
26
|
-
puts "\n2. Running with Semantic algorithm (should FAIL):"
|
|
27
|
-
puts "-" * 60
|
|
28
|
-
ENV["CANON_HTML_DIFF_ALGORITHM"] = "semantic"
|
|
29
|
-
ENV["CANON_XML_DIFF_ALGORITHM"] = "semantic"
|
|
30
|
-
ENV["CANON_HTML_DIFF_VERBOSE"] = "true"
|
|
31
|
-
ENV["CANON_XML_DIFF_VERBOSE"] = "true"
|
|
32
|
-
system("cd /Users/mulgogi/src/mn/isodoc && bundle exec rspec spec/isodoc/sourcecode_spec.rb:124 --format documentation 2>&1")
|
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative "../lib/canon"
|
|
5
|
-
require "nokogiri"
|
|
6
|
-
|
|
7
|
-
expected = <<~HTML
|
|
8
|
-
<pre>
|
|
9
|
-
|
|
10
|
-
</pre>
|
|
11
|
-
HTML
|
|
12
|
-
|
|
13
|
-
actual = <<~HTML
|
|
14
|
-
<pre> </pre>
|
|
15
|
-
HTML
|
|
16
|
-
|
|
17
|
-
# Parse and inspect the trees directly
|
|
18
|
-
puts "=== Tree Inspection ==="
|
|
19
|
-
doc1 = Nokogiri::HTML(expected)
|
|
20
|
-
doc2 = Nokogiri::HTML(actual)
|
|
21
|
-
|
|
22
|
-
pre1 = doc1.at_css("pre")
|
|
23
|
-
pre2 = doc2.at_css("pre")
|
|
24
|
-
|
|
25
|
-
puts "Pre1 text: #{pre1.text.inspect}"
|
|
26
|
-
puts "Pre1 text length: #{pre1.text.length}"
|
|
27
|
-
puts "Pre1 text bytes: #{pre1.text.bytes.inspect}"
|
|
28
|
-
|
|
29
|
-
puts "\nPre2 text: #{pre2.text.inspect}"
|
|
30
|
-
puts "Pre2 text length: #{pre2.text.length}"
|
|
31
|
-
puts "Pre2 text bytes: #{pre2.text.bytes.inspect}"
|
|
32
|
-
|
|
33
|
-
# Now build trees using the adapter
|
|
34
|
-
adapter = Canon::TreeDiff::Adapters::HTMLAdapter.new
|
|
35
|
-
tree1 = adapter.to_tree(doc1)
|
|
36
|
-
tree2 = adapter.to_tree(doc2)
|
|
37
|
-
|
|
38
|
-
# Check signatures
|
|
39
|
-
# tree1 structure: html -> body -> pre
|
|
40
|
-
body1 = tree1.children.first
|
|
41
|
-
pre1_node = body1.children.find { |c| c.label == "pre" }
|
|
42
|
-
body2 = tree2.children.first
|
|
43
|
-
pre2_node = body2.children.find { |c| c.label == "pre" }
|
|
44
|
-
|
|
45
|
-
puts "\n=== Signatures ==="
|
|
46
|
-
if pre1_node && pre2_node
|
|
47
|
-
sig1 = Canon::TreeDiff::Core::NodeSignature.for(pre1_node)
|
|
48
|
-
sig2 = Canon::TreeDiff::Core::NodeSignature.for(pre2_node)
|
|
49
|
-
puts "Pre1 label: #{pre1_node.label}, value: #{pre1_node.value.inspect}"
|
|
50
|
-
puts "Pre1 signature: #{sig1}"
|
|
51
|
-
puts "Pre2 label: #{pre2_node.label}, value: #{pre2_node.value.inspect}"
|
|
52
|
-
puts "Pre2 signature: #{sig2}"
|
|
53
|
-
puts "Signatures equal: #{sig1 == sig2}"
|
|
54
|
-
else
|
|
55
|
-
puts "ERROR: Could not find <pre> nodes"
|
|
56
|
-
puts "Body1 children: #{body1.children.map(&:label)}"
|
|
57
|
-
puts "Body2 children: #{body2.children.map(&:label)}"
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def print_tree(node, indent = 0)
|
|
61
|
-
prefix = " " * indent
|
|
62
|
-
puts "#{prefix}<#{node.label}>"
|
|
63
|
-
puts "#{prefix} value: #{node.value.inspect}" if node.value
|
|
64
|
-
puts "#{prefix} attrs: #{node.attributes}" unless node.attributes.empty?
|
|
65
|
-
node.children.each { |child| print_tree(child, indent + 1) }
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
puts "\n=== Tree 1 ==="
|
|
69
|
-
print_tree(tree1)
|
|
70
|
-
|
|
71
|
-
puts "\n=== Tree 2 ==="
|
|
72
|
-
print_tree(tree2)
|
|
73
|
-
|
|
74
|
-
# Now test comparison
|
|
75
|
-
puts "\n=== Comparison ==="
|
|
76
|
-
|
|
77
|
-
# Test using TreeDiff directly
|
|
78
|
-
require_relative "../lib/canon/tree_diff/tree_diff_integrator"
|
|
79
|
-
integrator = Canon::TreeDiff::TreeDiffIntegrator.new(
|
|
80
|
-
format: :html,
|
|
81
|
-
options: {},
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
puts "\n=== Direct TreeDiff Test ==="
|
|
85
|
-
diff_result = integrator.diff(doc1, doc2)
|
|
86
|
-
puts "Operations count: #{diff_result[:operations].size}"
|
|
87
|
-
diff_result[:operations].each_with_index do |op, idx|
|
|
88
|
-
puts "\nOperation #{idx + 1}:"
|
|
89
|
-
puts " Type: #{op.type}"
|
|
90
|
-
puts " Node: #{begin
|
|
91
|
-
op[:node]&.label
|
|
92
|
-
rescue StandardError
|
|
93
|
-
'N/A'
|
|
94
|
-
end}"
|
|
95
|
-
if op[:node]
|
|
96
|
-
puts " Value: #{begin
|
|
97
|
-
op[:node]&.value.inspect
|
|
98
|
-
rescue StandardError
|
|
99
|
-
'N/A'
|
|
100
|
-
end}"
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
# Convert operations to DiffNodes
|
|
105
|
-
puts "\n=== Operation Conversion ==="
|
|
106
|
-
converter = Canon::TreeDiff::OperationConverter.new(
|
|
107
|
-
format: :html,
|
|
108
|
-
match_options: {},
|
|
109
|
-
)
|
|
110
|
-
diff_nodes = converter.convert(diff_result[:operations])
|
|
111
|
-
puts "Converted DiffNodes count: #{diff_nodes.size}"
|
|
112
|
-
diff_nodes.each_with_index do |dn, idx|
|
|
113
|
-
puts "\nDiffNode #{idx + 1}:"
|
|
114
|
-
puts " Dimension: #{dn.dimension}"
|
|
115
|
-
puts " Normative: #{dn.normative?}"
|
|
116
|
-
puts " Reason: #{dn.reason}"
|
|
117
|
-
puts " Node1: #{begin
|
|
118
|
-
dn.node1.inspect
|
|
119
|
-
rescue StandardError
|
|
120
|
-
'nil'
|
|
121
|
-
end}"
|
|
122
|
-
puts " Node2: #{begin
|
|
123
|
-
dn.node2.inspect
|
|
124
|
-
rescue StandardError
|
|
125
|
-
'nil'
|
|
126
|
-
end}"
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
# Now test via Canon::Comparison
|
|
130
|
-
puts "\n=== Canon::Comparison Result (with :semantic) ==="
|
|
131
|
-
result = Canon::Comparison.equivalent?(
|
|
132
|
-
expected,
|
|
133
|
-
actual,
|
|
134
|
-
format: :html,
|
|
135
|
-
diff_algorithm: :semantic,
|
|
136
|
-
verbose: true,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
puts "Result class: #{result.class}"
|
|
140
|
-
puts "Equivalent: #{result.equivalent?}"
|
|
141
|
-
puts "Differences count: #{result.differences.size}"
|
|
142
|
-
puts "Has normative diffs: #{result.has_normative_diffs?}"
|
|
143
|
-
|
|
144
|
-
puts "\n=== Trying with :semantic_tree ==="
|
|
145
|
-
result2 = Canon::Comparison.equivalent?(
|
|
146
|
-
expected,
|
|
147
|
-
actual,
|
|
148
|
-
format: :html,
|
|
149
|
-
diff_algorithm: :semantic_tree,
|
|
150
|
-
verbose: true,
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
puts "Result class: #{result2.class}"
|
|
154
|
-
if result2.is_a?(Canon::Comparison::ComparisonResult)
|
|
155
|
-
puts "Equivalent: #{result2.equivalent?}"
|
|
156
|
-
puts "Differences count: #{result2.differences.size}"
|
|
157
|
-
else
|
|
158
|
-
puts "Result: #{result2.inspect}"
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
result.differences.each_with_index do |diff, idx|
|
|
162
|
-
puts "\n--- Diff #{idx + 1} ---"
|
|
163
|
-
puts "Dimension: #{diff.dimension}"
|
|
164
|
-
puts "Normative: #{diff.normative?}"
|
|
165
|
-
puts "Reason: #{diff.reason}"
|
|
166
|
-
if diff.node1
|
|
167
|
-
puts "Node1 type: #{diff.node1.class}"
|
|
168
|
-
puts "Node1 name: #{begin
|
|
169
|
-
diff.node1.name
|
|
170
|
-
rescue StandardError
|
|
171
|
-
'N/A'
|
|
172
|
-
end}"
|
|
173
|
-
puts "Node1 text: #{begin
|
|
174
|
-
diff.node1.text.inspect
|
|
175
|
-
rescue StandardError
|
|
176
|
-
'N/A'
|
|
177
|
-
end}"
|
|
178
|
-
end
|
|
179
|
-
if diff.node2
|
|
180
|
-
puts "Node2 type: #{diff.node2.class}"
|
|
181
|
-
puts "Node2 name: #{begin
|
|
182
|
-
diff.node2.name
|
|
183
|
-
rescue StandardError
|
|
184
|
-
'N/A'
|
|
185
|
-
end}"
|
|
186
|
-
puts "Node2 text: #{begin
|
|
187
|
-
diff.node2.text.inspect
|
|
188
|
-
rescue StandardError
|
|
189
|
-
'N/A'
|
|
190
|
-
end}"
|
|
191
|
-
end
|
|
192
|
-
end
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Script to extract and analyze the 16 false positive test cases
|
|
5
|
-
# Usage: ruby scripts/extract_false_positives.rb
|
|
6
|
-
|
|
7
|
-
require "bundler/setup"
|
|
8
|
-
require "canon"
|
|
9
|
-
|
|
10
|
-
# Map of test files to line numbers
|
|
11
|
-
FALSE_POSITIVES = {
|
|
12
|
-
"blocks_spec.rb" => [352],
|
|
13
|
-
"footnotes_spec.rb" => [740],
|
|
14
|
-
"inline_spec.rb" => [1012, 1251],
|
|
15
|
-
"postproc_spec.rb" => [948],
|
|
16
|
-
"postproc_word_spec.rb" => [372, 576],
|
|
17
|
-
"presentation_xml_numbers_override_spec.rb" => [2095],
|
|
18
|
-
"presentation_xml_spec.rb" => [1288, 1500],
|
|
19
|
-
"ref_spec.rb" => [906],
|
|
20
|
-
"sourcecode_spec.rb" => [124, 610],
|
|
21
|
-
"terms_spec.rb" => [1445],
|
|
22
|
-
"xref_format_spec.rb" => [628],
|
|
23
|
-
"xref_spec.rb" => [315],
|
|
24
|
-
}.freeze
|
|
25
|
-
|
|
26
|
-
ISODOC_SPEC_DIR = File.expand_path("../../mn/isodoc/spec/isodoc", __dir__)
|
|
27
|
-
|
|
28
|
-
def extract_test_context(file_path, line_number)
|
|
29
|
-
return nil unless File.exist?(file_path)
|
|
30
|
-
|
|
31
|
-
lines = File.readlines(file_path)
|
|
32
|
-
|
|
33
|
-
# Find the start of the test block (looking backward for 'it "')
|
|
34
|
-
start_line = line_number - 1
|
|
35
|
-
while start_line.positive?
|
|
36
|
-
break if /^\s*it\s+["']/.match?(lines[start_line])
|
|
37
|
-
|
|
38
|
-
start_line -= 1
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# Find the end of the test block (looking forward for matching 'end')
|
|
42
|
-
end_line = line_number - 1
|
|
43
|
-
depth = 0
|
|
44
|
-
while end_line < lines.length
|
|
45
|
-
line = lines[end_line]
|
|
46
|
-
depth += 1 if /\b(do|begin)\b/.match?(line)
|
|
47
|
-
depth -= 1 if /\bend\b/.match?(line)
|
|
48
|
-
break if depth <= 0 && end_line > start_line
|
|
49
|
-
|
|
50
|
-
end_line += 1
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Extract test description
|
|
54
|
-
test_desc = lines[start_line].match(/it\s+["'](.+?)["']/)&.captures&.first || "Unknown test"
|
|
55
|
-
|
|
56
|
-
{
|
|
57
|
-
file: File.basename(file_path),
|
|
58
|
-
line: line_number,
|
|
59
|
-
description: test_desc,
|
|
60
|
-
content: lines[start_line..end_line].join,
|
|
61
|
-
}
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def analyze_test_for_patterns(test_info)
|
|
65
|
-
content = test_info[:content]
|
|
66
|
-
|
|
67
|
-
patterns = []
|
|
68
|
-
|
|
69
|
-
# Check for various patterns
|
|
70
|
-
patterns << "whitespace_in_pre" if /<pre[^>]*>.*?<\/pre>/m.match?(content)
|
|
71
|
-
patterns << "sourcecode_element" if /sourcecode/i.match?(content)
|
|
72
|
-
patterns << "metadata_elements" if /<(bookmark|span|meta|a name=)/.match?(content)
|
|
73
|
-
patterns << "mixed_content" if /<[^>]+>[^<]*<[^>]+>/.match?(content)
|
|
74
|
-
patterns << "attribute_order" if /\s+\w+=["'][^"']*["']\s+\w+=["'][^"']*["']/.match?(content)
|
|
75
|
-
patterns << "nested_formatting" if /<(strong|em|i|b|u)[^>]*>.*?<(strong|em|i|b|u)/m.match?(content)
|
|
76
|
-
|
|
77
|
-
patterns
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
puts "=" * 80
|
|
81
|
-
puts "EXTRACTING FALSE POSITIVE TEST CASES"
|
|
82
|
-
puts "=" * 80
|
|
83
|
-
|
|
84
|
-
all_tests = []
|
|
85
|
-
pattern_summary = Hash.new(0)
|
|
86
|
-
|
|
87
|
-
FALSE_POSITIVES.each do |file, line_numbers|
|
|
88
|
-
file_path = File.join(ISODOC_SPEC_DIR, file)
|
|
89
|
-
|
|
90
|
-
puts "\n#{file}:"
|
|
91
|
-
|
|
92
|
-
line_numbers.each do |line|
|
|
93
|
-
test_info = extract_test_context(file_path, line)
|
|
94
|
-
|
|
95
|
-
if test_info
|
|
96
|
-
patterns = analyze_test_for_patterns(test_info)
|
|
97
|
-
test_info[:patterns] = patterns
|
|
98
|
-
all_tests << test_info
|
|
99
|
-
|
|
100
|
-
patterns.each { |p| pattern_summary[p] += 1 }
|
|
101
|
-
|
|
102
|
-
puts " Line #{line}: #{test_info[:description]}"
|
|
103
|
-
puts " Patterns: #{patterns.join(', ')}" unless patterns.empty?
|
|
104
|
-
else
|
|
105
|
-
puts " Line #{line}: ⚠️ Could not extract test"
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
puts "\n#{'=' * 80}"
|
|
111
|
-
puts "PATTERN SUMMARY"
|
|
112
|
-
puts "=" * 80
|
|
113
|
-
|
|
114
|
-
pattern_summary.sort_by { |_, count| -count }.each do |pattern, count|
|
|
115
|
-
puts " #{pattern}: #{count} occurrences"
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
puts "\n#{'=' * 80}"
|
|
119
|
-
puts "DETAILED TEST EXTRACTION"
|
|
120
|
-
puts "=" * 80
|
|
121
|
-
|
|
122
|
-
# Save detailed output
|
|
123
|
-
output_file = "false_positive_analysis.txt"
|
|
124
|
-
File.open(output_file, "w") do |f|
|
|
125
|
-
all_tests.each_with_index do |test, i|
|
|
126
|
-
f.puts "\n#{'=' * 80}"
|
|
127
|
-
f.puts "TEST #{i + 1}: #{test[:file]}:#{test[:line]}"
|
|
128
|
-
f.puts "=" * 80
|
|
129
|
-
f.puts "Description: #{test[:description]}"
|
|
130
|
-
f.puts "Patterns: #{test[:patterns].join(', ')}"
|
|
131
|
-
f.puts "\nTest Code:"
|
|
132
|
-
f.puts "-" * 80
|
|
133
|
-
f.puts test[:content]
|
|
134
|
-
end
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
puts "\nDetailed analysis saved to: #{output_file}"
|
|
138
|
-
puts "\nTotal false positives analyzed: #{all_tests.length}"
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Find the actual false positives by comparing DOM vs semantic failures
|
|
5
|
-
|
|
6
|
-
require "json"
|
|
7
|
-
require "set"
|
|
8
|
-
|
|
9
|
-
ISODOC_DIR = File.expand_path("../../../mn/isodoc", __dir__)
|
|
10
|
-
|
|
11
|
-
def run_tests(algorithm)
|
|
12
|
-
puts "Running with CANON_ALGORITHM=#{algorithm}..."
|
|
13
|
-
output_file = "/tmp/rspec_#{algorithm}_#{Process.pid}.json"
|
|
14
|
-
cmd = "cd #{ISODOC_DIR} && CANON_ALGORITHM=#{algorithm} bundle exec rspec --format json --out #{output_file} 2>&1 >/dev/null"
|
|
15
|
-
system(cmd)
|
|
16
|
-
|
|
17
|
-
if File.exist?(output_file)
|
|
18
|
-
content = File.read(output_file)
|
|
19
|
-
File.delete(output_file)
|
|
20
|
-
begin
|
|
21
|
-
JSON.parse(content)
|
|
22
|
-
rescue JSON::ParserError => e
|
|
23
|
-
puts "Failed to parse JSON for #{algorithm}: #{e.message}"
|
|
24
|
-
puts "First 200 chars: #{content[0..200]}"
|
|
25
|
-
nil
|
|
26
|
-
end
|
|
27
|
-
else
|
|
28
|
-
puts "Output file not created for #{algorithm}"
|
|
29
|
-
nil
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def extract_failures(results)
|
|
34
|
-
return [] unless results && results["examples"]
|
|
35
|
-
|
|
36
|
-
results["examples"].select { |ex| ex["status"] == "failed" }.map do |ex|
|
|
37
|
-
# Extract file and line from id
|
|
38
|
-
# Format: "./spec/isodoc/blocks_spec.rb[1:1:1]"
|
|
39
|
-
if ex["id"] =~ %r{\./(spec/isodoc/[^\[]+)\[}
|
|
40
|
-
file = $1
|
|
41
|
-
line = ex["line_number"]
|
|
42
|
-
"#{file}:#{line}"
|
|
43
|
-
end
|
|
44
|
-
end.compact
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
puts "=" * 80
|
|
48
|
-
puts "FINDING ACTUAL FALSE POSITIVES"
|
|
49
|
-
puts "=" * 80
|
|
50
|
-
puts
|
|
51
|
-
|
|
52
|
-
# Run with both algorithms
|
|
53
|
-
dom_results = run_tests("dom")
|
|
54
|
-
semantic_results = run_tests("semantic")
|
|
55
|
-
|
|
56
|
-
if dom_results.nil? || semantic_results.nil?
|
|
57
|
-
puts "ERROR: Failed to get test results"
|
|
58
|
-
exit 1
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Extract failure lists
|
|
62
|
-
dom_failures = Set.new(extract_failures(dom_results))
|
|
63
|
-
semantic_failures = Set.new(extract_failures(semantic_results))
|
|
64
|
-
|
|
65
|
-
puts "DOM failures: #{dom_failures.size}"
|
|
66
|
-
puts "Semantic failures: #{semantic_failures.size}"
|
|
67
|
-
puts
|
|
68
|
-
|
|
69
|
-
# Find false positives (pass with DOM, fail with semantic)
|
|
70
|
-
false_positives = semantic_failures - dom_failures
|
|
71
|
-
|
|
72
|
-
puts "=" * 80
|
|
73
|
-
puts "FALSE POSITIVES (#{false_positives.size})"
|
|
74
|
-
puts "Tests that PASS with DOM but FAIL with semantic:"
|
|
75
|
-
puts "=" * 80
|
|
76
|
-
|
|
77
|
-
if false_positives.empty?
|
|
78
|
-
puts "✅ NO FALSE POSITIVES FOUND!"
|
|
79
|
-
puts "DOM and semantic algorithms have perfect parity!"
|
|
80
|
-
else
|
|
81
|
-
false_positives.sort.each_with_index do |test, idx|
|
|
82
|
-
puts "#{idx + 1}. #{test}"
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# Find false negatives (fail with DOM, pass with semantic)
|
|
87
|
-
false_negatives = dom_failures - semantic_failures
|
|
88
|
-
|
|
89
|
-
puts
|
|
90
|
-
puts "=" * 80
|
|
91
|
-
puts "FALSE NEGATIVES (#{false_negatives.size})"
|
|
92
|
-
puts "Tests that FAIL with DOM but PASS with semantic:"
|
|
93
|
-
puts "=" * 80
|
|
94
|
-
|
|
95
|
-
if false_negatives.empty?
|
|
96
|
-
puts "✅ NO FALSE NEGATIVES FOUND!"
|
|
97
|
-
else
|
|
98
|
-
false_negatives.sort.each_with_index do |test, idx|
|
|
99
|
-
puts "#{idx + 1}. #{test}"
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Common failures
|
|
104
|
-
common_failures = dom_failures & semantic_failures
|
|
105
|
-
|
|
106
|
-
puts
|
|
107
|
-
puts "=" * 80
|
|
108
|
-
puts "COMMON FAILURES (#{common_failures.size})"
|
|
109
|
-
puts "Tests that FAIL with BOTH algorithms:"
|
|
110
|
-
puts "=" * 80
|
|
111
|
-
puts "#{common_failures.size} tests fail with both algorithms"
|
|
112
|
-
|
|
113
|
-
# Summary
|
|
114
|
-
puts
|
|
115
|
-
puts "=" * 80
|
|
116
|
-
puts "SUMMARY"
|
|
117
|
-
puts "=" * 80
|
|
118
|
-
puts "Total tests: #{dom_results['examples'].size}"
|
|
119
|
-
puts "DOM failures: #{dom_failures.size}"
|
|
120
|
-
puts "Semantic failures: #{semantic_failures.size}"
|
|
121
|
-
puts "Common failures: #{common_failures.size}"
|
|
122
|
-
puts "False positives: #{false_positives.size} (semantic fails, DOM passes)"
|
|
123
|
-
puts "False negatives: #{false_negatives.size} (DOM fails, semantic passes)"
|
|
124
|
-
puts "Gap: #{(semantic_failures.size - dom_failures.size).abs}"
|
|
125
|
-
puts "=" * 80
|
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
# Systematically investigate all false positive failures
|
|
5
|
-
# to identify patterns in why semantic fails but DOM passes
|
|
6
|
-
|
|
7
|
-
require "fileutils"
|
|
8
|
-
|
|
9
|
-
FALSE_POSITIVES = [
|
|
10
|
-
"blocks_spec.rb:352",
|
|
11
|
-
"footnotes_spec.rb:740",
|
|
12
|
-
"inline_spec.rb:1012",
|
|
13
|
-
"inline_spec.rb:1251",
|
|
14
|
-
"postproc_spec.rb:948",
|
|
15
|
-
"postproc_word_spec.rb:372",
|
|
16
|
-
"postproc_word_spec.rb:576",
|
|
17
|
-
"presentation_xml_numbers_override_spec.rb:2095",
|
|
18
|
-
"presentation_xml_spec.rb:1288",
|
|
19
|
-
"presentation_xml_spec.rb:1500",
|
|
20
|
-
"ref_spec.rb:906",
|
|
21
|
-
"sourcecode_spec.rb:124",
|
|
22
|
-
"sourcecode_spec.rb:610",
|
|
23
|
-
"terms_spec.rb:1445",
|
|
24
|
-
"xref_format_spec.rb:628",
|
|
25
|
-
"xref_spec.rb:315",
|
|
26
|
-
].freeze
|
|
27
|
-
|
|
28
|
-
ISODOC_PATH = "/Users/mulgogi/src/mn/isodoc"
|
|
29
|
-
|
|
30
|
-
def run_test(spec_file, line, algorithm)
|
|
31
|
-
cmd = "cd #{ISODOC_PATH} && CANON_ALGORITHM=#{algorithm} bundle exec rspec ./spec/isodoc/#{spec_file}:#{line} 2>&1"
|
|
32
|
-
output = `#{cmd}`
|
|
33
|
-
{
|
|
34
|
-
passed: $?.success?,
|
|
35
|
-
output: output,
|
|
36
|
-
}
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def extract_diff_type(output)
|
|
40
|
-
# Look for dimension in diff report
|
|
41
|
-
if output =~ /Dimension:[^\n]*\n[^\n]*Location:[^\n]*([^\n]+)/
|
|
42
|
-
location = $1.strip
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
if output =~ /Dimension:\s*([^\n]+)/
|
|
46
|
-
dimension = $1.strip
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Look for changes description
|
|
50
|
-
changes = []
|
|
51
|
-
output.scan(/✨ Changes:\s*([^\n]+)/) do |match|
|
|
52
|
-
changes << match[0].strip
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
{
|
|
56
|
-
dimension: dimension,
|
|
57
|
-
location: location,
|
|
58
|
-
changes: changes,
|
|
59
|
-
}
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def analyze_false_positive(fp)
|
|
63
|
-
file, line = fp.split(":")
|
|
64
|
-
puts "\n#{'=' * 80}"
|
|
65
|
-
puts "Analyzing: #{fp}"
|
|
66
|
-
puts "=" * 80
|
|
67
|
-
|
|
68
|
-
# Run with both algorithms
|
|
69
|
-
puts "\nRunning with DOM algorithm..."
|
|
70
|
-
dom_result = run_test(file, line, "dom")
|
|
71
|
-
|
|
72
|
-
puts "Running with SEMANTIC algorithm..."
|
|
73
|
-
semantic_result = run_test(file, line, "semantic")
|
|
74
|
-
|
|
75
|
-
# Verify it's actually a false positive
|
|
76
|
-
unless dom_result[:passed] && !semantic_result[:passed]
|
|
77
|
-
puts "⚠️ WARNING: Not a false positive!"
|
|
78
|
-
puts " DOM passed: #{dom_result[:passed]}"
|
|
79
|
-
puts " Semantic passed: #{semantic_result[:passed]}"
|
|
80
|
-
return nil
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
puts "✓ Confirmed false positive (DOM passes, Semantic fails)"
|
|
84
|
-
|
|
85
|
-
# Extract diff details from semantic output
|
|
86
|
-
diff_info = extract_diff_type(semantic_result[:output])
|
|
87
|
-
|
|
88
|
-
puts "\nDiff Details:"
|
|
89
|
-
puts " Dimension: #{diff_info[:dimension] || 'unknown'}"
|
|
90
|
-
puts " Location: #{diff_info[:location] || 'unknown'}"
|
|
91
|
-
puts " Changes: #{diff_info[:changes].join(', ')}" unless diff_info[:changes].empty?
|
|
92
|
-
|
|
93
|
-
# Save full output for detailed analysis
|
|
94
|
-
output_dir = "/tmp/false_positives"
|
|
95
|
-
FileUtils.mkdir_p(output_dir)
|
|
96
|
-
|
|
97
|
-
File.write("#{output_dir}/#{file.gsub('.rb', '')}_#{line}.txt",
|
|
98
|
-
semantic_result[:output])
|
|
99
|
-
puts "\nFull output saved to: #{output_dir}/#{file.gsub('.rb',
|
|
100
|
-
'')}_#{line}.txt"
|
|
101
|
-
|
|
102
|
-
diff_info.merge(spec: fp, file: file, line: line)
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
def main
|
|
106
|
-
puts "Investigating #{FALSE_POSITIVES.size} false positives..."
|
|
107
|
-
puts "This will take several minutes..."
|
|
108
|
-
|
|
109
|
-
results = []
|
|
110
|
-
|
|
111
|
-
FALSE_POSITIVES.each_with_index do |fp, idx|
|
|
112
|
-
puts "\n[#{idx + 1}/#{FALSE_POSITIVES.size}]"
|
|
113
|
-
result = analyze_false_positive(fp)
|
|
114
|
-
results << result if result
|
|
115
|
-
sleep 0.5 # Brief pause between tests
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
# Summarize patterns
|
|
119
|
-
puts "\n#{'=' * 80}"
|
|
120
|
-
puts "PATTERN ANALYSIS"
|
|
121
|
-
puts "=" * 80
|
|
122
|
-
|
|
123
|
-
puts "\nBy Dimension:"
|
|
124
|
-
dimension_groups = results.compact.group_by { |r| r[:dimension] }
|
|
125
|
-
dimension_groups.each do |dim, group|
|
|
126
|
-
puts " #{dim}: #{group.size} cases"
|
|
127
|
-
group.each { |r| puts " - #{r[:spec]}" }
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
puts "\nBy Changes:"
|
|
131
|
-
changes_groups = results.compact.group_by { |r| r[:changes].join(", ") }
|
|
132
|
-
changes_groups.each do |change, group|
|
|
133
|
-
puts " #{change}: #{group.size} cases"
|
|
134
|
-
group.each { |r| puts " - #{r[:spec]}" }
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
# Save summary
|
|
138
|
-
summary_file = "/tmp/false_positive_patterns.txt"
|
|
139
|
-
File.open(summary_file, "w") do |f|
|
|
140
|
-
f.puts "FALSE POSITIVE PATTERN ANALYSIS"
|
|
141
|
-
f.puts "=" * 80
|
|
142
|
-
f.puts "\nTotal: #{results.compact.size} false positives analyzed"
|
|
143
|
-
|
|
144
|
-
f.puts "\n\nBy Dimension:"
|
|
145
|
-
dimension_groups.each do |dim, group|
|
|
146
|
-
f.puts " #{dim}: #{group.size}"
|
|
147
|
-
group.each { |r| f.puts " #{r[:spec]}" }
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
f.puts "\n\nBy Changes:"
|
|
151
|
-
changes_groups.each do |change, group|
|
|
152
|
-
f.puts " #{change}: #{group.size}"
|
|
153
|
-
group.each { |r| f.puts " #{r[:spec]}" }
|
|
154
|
-
end
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
puts "\nSummary saved to: #{summary_file}"
|
|
158
|
-
puts "\nDetailed outputs in: /tmp/false_positives/"
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
main if __FILE__ == $PROGRAM_NAME
|