canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -1,127 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Investigate first batch of false positives in detail
5
-
6
- ISODOC_DIR = File.expand_path("../../../mn/isodoc", __dir__)
7
-
8
- BATCH_1 = [
9
- { file: "spec/isodoc/blocks_spec.rb", line: 352 },
10
- { file: "spec/isodoc/footnotes_spec.rb", line: 740 },
11
- { file: "spec/isodoc/inline_spec.rb", line: 1012 },
12
- { file: "spec/isodoc/inline_spec.rb", line: 1251 },
13
- { file: "spec/isodoc/postproc_spec.rb", line: 948 },
14
- ].freeze
15
-
16
- def run_test(test, algorithm)
17
- file = File.join(ISODOC_DIR, test[:file])
18
- cmd = "cd #{ISODOC_DIR} && CANON_ALGORITHM=#{algorithm} bundle exec rspec #{file}:#{test[:line]} 2>&1"
19
- output = `#{cmd}`
20
- success = $?.success?
21
-
22
- {
23
- success: success,
24
- output: output,
25
- }
26
- end
27
-
28
- def extract_diff(output)
29
- lines = output.lines
30
-
31
- # Find the diff section
32
- diff_start = lines.index { |l| l.include?("Diff:") }
33
- return nil unless diff_start
34
-
35
- # Extract lines after "Diff:" until we hit a blank line or end
36
- diff_lines = []
37
- (diff_start + 1...lines.size).each do |i|
38
- line = lines[i]
39
- break if line.strip.empty? && diff_lines.size > 5
40
-
41
- diff_lines << line
42
- end
43
-
44
- diff_lines.join
45
- end
46
-
47
- def analyze_diff(diff)
48
- return {} unless diff
49
-
50
- analysis = {
51
- whitespace: diff.match?(/\s+/) && diff.match?(/^\s*[-+]/),
52
- attributes: diff.match?(/\sattr|attribute/i),
53
- text_content: diff.match?(/text|content/i),
54
- elements: diff.match?(/element|tag|node/i),
55
- line_count: diff.lines.size,
56
- }
57
-
58
- # Sample key differences
59
- added = diff.lines.select do |l|
60
- l.start_with?("+") && !l.start_with?("+++")
61
- end.take(3)
62
- removed = diff.lines.select do |l|
63
- l.start_with?("-") && !l.start_with?("---")
64
- end.take(3)
65
-
66
- analysis[:sample_added] = added
67
- analysis[:sample_removed] = removed
68
- analysis
69
- end
70
-
71
- puts "=" * 80
72
- puts "BATCH 1 INVESTIGATION: 5 False Positives"
73
- puts "=" * 80
74
- puts
75
-
76
- BATCH_1.each_with_index do |test, idx|
77
- puts "\n#{idx + 1}/5: #{test[:file].sub('spec/isodoc/', '')}:#{test[:line]}"
78
- puts "-" * 80
79
-
80
- # Run with semantic (should fail)
81
- sem_result = run_test(test, "semantic")
82
-
83
- if sem_result[:success]
84
- puts "āš ļø UNEXPECTED: Test passes with semantic (may have been fixed)"
85
- next
86
- end
87
-
88
- puts "āœ“ Confirmed: Fails with semantic as expected"
89
-
90
- # Extract and analyze diff
91
- diff = extract_diff(sem_result[:output])
92
-
93
- if diff
94
- puts "\nšŸ“Š Diff Analysis:"
95
- analysis = analyze_diff(diff)
96
-
97
- puts " Diff size: #{analysis[:line_count]} lines"
98
- puts " Involves whitespace: #{analysis[:whitespace]}" if analysis[:whitespace]
99
- puts " Involves attributes: #{analysis[:attributes]}" if analysis[:attributes]
100
- puts " Involves text content: #{analysis[:text_content]}" if analysis[:text_content]
101
- puts " Involves elements: #{analysis[:elements]}" if analysis[:elements]
102
-
103
- if analysis[:sample_removed].any?
104
- puts "\n Sample lines REMOVED (semantic sees but DOM doesn't):"
105
- analysis[:sample_removed].each { |l| puts " #{l.strip}" }
106
- end
107
-
108
- if analysis[:sample_added].any?
109
- puts "\n Sample lines ADDED (semantic missing but DOM has):"
110
- analysis[:sample_added].each { |l| puts " #{l.strip}" }
111
- end
112
-
113
- # Show first 30 lines of actual diff
114
- puts "\n šŸ“‹ First 30 lines of diff:"
115
- diff.lines.take(30).each do |line|
116
- puts " #{line.rstrip}"
117
- end
118
- else
119
- puts "\nāš ļø Could not extract diff from output"
120
- puts "\nFirst 50 lines of output:"
121
- sem_result[:output].lines.take(50).each { |l| puts " #{l.rstrip}" }
122
- end
123
- end
124
-
125
- puts "\n#{'=' * 80}"
126
- puts "BATCH 1 INVESTIGATION COMPLETE"
127
- puts "=" * 80
@@ -1,150 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Investigation script to check if Canon is properly classifying
5
- # differences as normative vs informative according to match options
6
-
7
- require "bundler/setup"
8
- require_relative "../lib/canon"
9
-
10
- # Sample tests from the 43 common failures
11
- SAMPLE_TESTS = [
12
- { file: "blocks_notes_spec.rb", line: 494, desc: "blocks with notes" },
13
- { file: "blocks_provisions_spec.rb", line: 4, desc: "block provisions" },
14
- { file: "cleanup_spec.rb", line: 180, desc: "cleanup processing" },
15
- { file: "figures_spec.rb", line: 5, desc: "figure handling" },
16
- { file: "tables_spec.rb", line: 4, desc: "table structure" },
17
- ].freeze
18
-
19
- def run_single_test(test_info)
20
- puts "\n#{'=' * 80}"
21
- puts "Test: #{test_info[:file]}:#{test_info[:line]}"
22
- puts "Description: #{test_info[:desc]}"
23
- puts "=" * 80
24
-
25
- # Run the test with both algorithms to capture output
26
- isodoc_path = File.expand_path("~/src/mn/isodoc")
27
- test_path = File.join(isodoc_path, "spec/isodoc", test_info[:file])
28
-
29
- unless File.exist?(test_path)
30
- puts "āš ļø Test file not found: #{test_path}"
31
- return
32
- end
33
-
34
- %w[dom semantic].each do |algorithm|
35
- puts "\n--- #{algorithm.upcase} Algorithm ---"
36
-
37
- # Run test and capture output
38
- cmd = "cd #{isodoc_path} && " \
39
- "CANON_DIFF_ALGORITHM=#{algorithm} " \
40
- "bundle exec rspec #{test_path}:#{test_info[:line]} 2>&1"
41
-
42
- output = `#{cmd}`
43
-
44
- # Check if test passed or failed
45
- if output.include?("0 failures")
46
- puts "āœ… PASSED"
47
- next
48
- elsif output.include?("1 failure")
49
- puts "āŒ FAILED"
50
- else
51
- puts "āš ļø Unexpected output"
52
- next
53
- end
54
-
55
- # Extract diff information
56
- extract_diff_info(output)
57
- end
58
- end
59
-
60
- def extract_diff_info(output)
61
- # Look for dimension information in the output
62
- dimensions_found = []
63
-
64
- # Common patterns in Canon output
65
- dimension_patterns = [
66
- /DIFFERENCE.*dimension:\s*(\w+)/i,
67
- /Dimension:\s*(\w+)/i,
68
- /\[(\w+)\]/,
69
- ]
70
-
71
- dimension_patterns.each do |pattern|
72
- output.scan(pattern) do |match|
73
- dimension = match[0].downcase.to_sym
74
- dimensions_found << dimension unless dimensions_found.include?(dimension)
75
- end
76
- end
77
-
78
- if dimensions_found.any?
79
- puts "\nšŸ“Š Dimensions detected:"
80
- dimensions_found.each do |dim|
81
- puts " - #{dim}"
82
- end
83
- else
84
- puts "\nāš ļø No dimension information found in output"
85
- end
86
-
87
- # Look for normative/informative classification
88
- if output.match?(/normative/i)
89
- puts "\nšŸ“ Normative differences found"
90
- end
91
- if output.match?(/informative/i)
92
- puts "\nšŸ“ Informative differences found"
93
- end
94
-
95
- # Count differences
96
- diff_count = output.scan(/DIFFERENCE|difference/i).length
97
- puts "\nšŸ“ˆ Approximate difference count: #{diff_count}"
98
- end
99
-
100
- def check_match_options_config
101
- puts "\n#{'=' * 80}"
102
- puts "Match Options Configuration Check"
103
- puts "=" * 80
104
-
105
- # Check HTML match options (most common format in isodoc tests)
106
- puts "\nHTML Default Match Options:"
107
-
108
- html_defaults = Canon::Comparison::MatchOptions::Xml::FORMAT_DEFAULTS[:html]
109
- html_defaults.each do |dimension, behavior|
110
- normative = behavior != :ignore
111
- status = normative ? "NORMATIVE" : "INFORMATIVE"
112
- puts " #{dimension.to_s.ljust(25)} : #{behavior.to_s.ljust(12)} → #{status}"
113
- end
114
-
115
- puts "\nKey classifications:"
116
- puts " - attribute_order: #{html_defaults[:attribute_order]} " \
117
- "→ #{html_defaults[:attribute_order] == :ignore ? 'INFORMATIVE āœ“' : 'NORMATIVE āœ—'}"
118
- puts " - text_content: #{html_defaults[:text_content]} " \
119
- "→ NORMATIVE (but normalized during matching)"
120
- puts " - structural_whitespace: #{html_defaults[:structural_whitespace]} " \
121
- "→ NORMATIVE (but normalized during matching)"
122
- puts " - comments: #{html_defaults[:comments]} " \
123
- "→ #{html_defaults[:comments] == :ignore ? 'INFORMATIVE āœ“' : 'NORMATIVE āœ—'}"
124
- end
125
-
126
- def main
127
- puts "Canon Classification Investigation"
128
- puts "Checking if differences are properly classified as normative vs informative"
129
- puts "based on match options in effect"
130
-
131
- # First, show the match options configuration
132
- check_match_options_config
133
-
134
- # Then run sample tests
135
- puts "\n\nRunning sample tests to examine actual behavior..."
136
- SAMPLE_TESTS.each do |test_info|
137
- run_single_test(test_info)
138
- end
139
-
140
- puts "\n#{'=' * 80}"
141
- puts "Investigation complete"
142
- puts "=" * 80
143
- puts "\nKey Questions:"
144
- puts "1. Are both algorithms reporting the same dimensions?"
145
- puts "2. Are dimensions correctly classified per match options?"
146
- puts "3. Are ignored dimensions being treated as informative?"
147
- puts "4. Are normalized dimensions still showing as normative when they differ?"
148
- end
149
-
150
- main if __FILE__ == $PROGRAM_NAME
@@ -1,190 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Detailed investigation of classification in actual failing tests
5
-
6
- require "bundler/setup"
7
-
8
- # Actual failing tests from the 43 common failures
9
- FAILING_TESTS = [
10
- { file: "blocks_spec.rb", line: 352, desc: "examples" },
11
- { file: "cleanup_spec.rb", line: 180, desc: "tables with tfoot" },
12
- { file: "figures_spec.rb", line: 5, desc: "figures" },
13
- { file: "inline_spec.rb", line: 1012, desc: "inline formatting" },
14
- { file: "sourcecode_spec.rb", line: 124, desc: "sourcecode" },
15
- ].freeze
16
-
17
- def run_test_with_verbose(test_info, algorithm)
18
- isodoc_path = File.expand_path("~/src/mn/isodoc")
19
- test_path = "spec/isodoc/#{test_info[:file]}:#{test_info[:line]}"
20
-
21
- puts "\n--- #{algorithm.upcase} Algorithm ---"
22
-
23
- # Run test and capture full output
24
- cmd = "cd #{isodoc_path} && " \
25
- "CANON_DIFF_ALGORITHM=#{algorithm} " \
26
- "CANON_VERBOSE=true " \
27
- "bundle exec rspec #{test_path} 2>&1"
28
-
29
- output = `#{cmd}`
30
-
31
- # Check result
32
- if output.include?("0 failures")
33
- puts "āœ… PASSED - No classification to check"
34
- return nil
35
- elsif !output.include?("1 failure")
36
- puts "āš ļø Unexpected result"
37
- return nil
38
- end
39
-
40
- puts "āŒ FAILED - Analyzing diff output..."
41
-
42
- # Extract and analyze dimensions
43
- analyze_dimensions(output)
44
-
45
- output
46
- end
47
-
48
- def analyze_dimensions(output)
49
- # Look for dimension mentions in various formats
50
- dimensions = {}
51
-
52
- # Pattern 1: DIFFERENCE blocks
53
- output.scan(/DIFFERENCE.*?dimension:\s*(\w+).*?normative:\s*(\w+)/mi) do |dim, norm|
54
- dim_sym = dim.downcase.to_sym
55
- is_normative = norm.downcase == "true"
56
- dimensions[dim_sym] ||= { normative: 0, informative: 0 }
57
- if is_normative
58
- dimensions[dim_sym][:normative] += 1
59
- else
60
- dimensions[dim_sym][:informative] += 1
61
- end
62
- end
63
-
64
- # Pattern 2: Simple dimension mentions
65
- output.scan(/(?:dimension|Dimension):\s*(\w+)/i) do |match|
66
- dim_sym = match[0].downcase.to_sym
67
- dimensions[dim_sym] ||= { normative: 0, informative: 0, unknown: 0 }
68
- dimensions[dim_sym][:unknown] ||= 0
69
- dimensions[dim_sym][:unknown] += 1
70
- end
71
-
72
- if dimensions.any?
73
- puts "\nšŸ“Š Dimensions found:"
74
- dimensions.each do |dim, counts|
75
- puts " #{dim}:"
76
- counts.each do |type, count|
77
- puts " #{type}: #{count}" if count.positive?
78
- end
79
- end
80
- else
81
- puts "\nāš ļø No dimension information extracted"
82
-
83
- # Try to find any diff-related output
84
- if /expected.*to eq/mi.match?(output)
85
- puts " Found RSpec expectation failure"
86
- end
87
- if /differ/i.match?(output)
88
- puts " Found 'differ' mentions: #{output.scan(/differ/i).length}"
89
- end
90
- end
91
-
92
- # Check for specific match option mentions
93
- check_match_options_usage(output)
94
- end
95
-
96
- def check_match_options_usage(output)
97
- puts "\nšŸ”§ Match Options Application:"
98
-
99
- # Check if attribute_order is mentioned
100
- if /attribute.order/i.match?(output)
101
- attr_order_count = output.scan(/attribute.order/i).length
102
- puts " āœ“ attribute_order mentioned (#{attr_order_count} times)"
103
- puts " Expected: INFORMATIVE (match option: ignore)"
104
- end
105
-
106
- # Check if text normalization is mentioned
107
- if /text.*normaliz/i.match?(output)
108
- puts " āœ“ text normalization mentioned"
109
- puts " Expected: differences after normalization = NORMATIVE"
110
- end
111
-
112
- # Check if whitespace is mentioned
113
- if /whitespace/i.match?(output)
114
- ws_count = output.scan(/whitespace/i).length
115
- puts " āœ“ whitespace mentioned (#{ws_count} times)"
116
- puts " Expected: structural_whitespace = NORMATIVE (normalized)"
117
- end
118
-
119
- # Check if comments are mentioned
120
- if /comment/i.match?(output)
121
- comment_count = output.scan(/comment/i).length
122
- puts " āœ“ comments mentioned (#{comment_count} times)"
123
- puts " Expected: INFORMATIVE (match option: ignore for HTML)"
124
- end
125
- end
126
-
127
- def compare_algorithms(test_info)
128
- puts "\n#{'=' * 80}"
129
- puts "Test: #{test_info[:file]}:#{test_info[:line]}"
130
- puts "Description: #{test_info[:desc]}"
131
- puts "=" * 80
132
-
133
- dom_output = run_test_with_verbose(test_info, "dom")
134
- semantic_output = run_test_with_verbose(test_info, "semantic")
135
-
136
- if dom_output && semantic_output
137
- puts "\nšŸ” Comparing algorithm outputs:"
138
-
139
- # Extract dimension info from both
140
- dom_dims = extract_dimension_list(dom_output)
141
- sem_dims = extract_dimension_list(semantic_output)
142
-
143
- if dom_dims == sem_dims
144
- puts " āœ… Both algorithms report same dimensions: #{dom_dims.sort.join(', ')}"
145
- else
146
- puts " āš ļø Algorithms report different dimensions:"
147
- puts " DOM: #{dom_dims.sort.join(', ')}"
148
- puts " Semantic: #{sem_dims.sort.join(', ')}"
149
- puts " Only in DOM: #{(dom_dims - sem_dims).sort.join(', ')}" if (dom_dims - sem_dims).any?
150
- puts " Only in Semantic: #{(sem_dims - dom_dims).sort.join(', ')}" if (sem_dims - dom_dims).any?
151
- end
152
- end
153
- end
154
-
155
- def extract_dimension_list(output)
156
- dimensions = []
157
- output.scan(/(?:dimension|Dimension):\s*(\w+)/i) do |match|
158
- dim = match[0].downcase.to_sym
159
- dimensions << dim unless dimensions.include?(dim)
160
- end
161
- dimensions
162
- end
163
-
164
- def main
165
- puts "Detailed Canon Classification Investigation"
166
- puts "Examining actual failing tests to verify correct classification"
167
- puts "\nMatch Options for HTML (default isodoc format):"
168
- puts " - attribute_order: ignore → INFORMATIVE āœ“"
169
- puts " - text_content: normalize → NORMATIVE (after normalization)"
170
- puts " - structural_whitespace: normalize → NORMATIVE (after normalization)"
171
- puts " - comments: ignore → INFORMATIVE āœ“"
172
- puts " - attribute_values: strict → NORMATIVE āœ“"
173
- puts " - attribute_presence: strict → NORMATIVE āœ“"
174
-
175
- FAILING_TESTS.each do |test_info|
176
- compare_algorithms(test_info)
177
- puts "\n"
178
- end
179
-
180
- puts "=" * 80
181
- puts "Investigation Complete"
182
- puts "=" * 80
183
- puts "\nKey Findings to Check:"
184
- puts "1. Do both algorithms classify the same dimensions?"
185
- puts "2. Are 'ignore' dimensions (attribute_order, comments) INFORMATIVE?"
186
- puts "3. Are 'normalize' dimensions NORMATIVE when differences persist?"
187
- puts "4. Are 'strict' dimensions always NORMATIVE?"
188
- end
189
-
190
- main if __FILE__ == $PROGRAM_NAME