canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -1,131 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require_relative "../lib/canon"
5
- require_relative "../lib/canon/comparison"
6
- require_relative "../lib/canon/pretty_printer/xml"
7
- require_relative "../lib/canon/xml/c14n"
8
- require_relative "../lib/canon/config"
9
-
10
- puts "Testing corrected README.adoc examples..."
11
- puts "=" * 60
12
-
13
- # Test 1: Canon.format (default pretty-print)
14
- puts "\n1. Testing Canon.format with XML (default)..."
15
- begin
16
- result = Canon.format("<root><b>2</b><a>1</a></root>", :xml)
17
- if result.include?("<?xml") && result.include?("<root>")
18
- puts "✓ Canon.format works (returns pretty-printed XML)"
19
- else
20
- puts "✗ Canon.format unexpected output"
21
- end
22
- rescue StandardError => e
23
- puts "✗ Canon.format failed: #{e.message}"
24
- end
25
-
26
- # Test 2: Canonical form (compact)
27
- puts "\n2. Testing Canon::Xml::C14n.canonicalize..."
28
- begin
29
- result = Canon::Xml::C14n.canonicalize("<root><b>2</b><a>1</a></root>",
30
- with_comments: false)
31
- expected = "<root><b>2</b><a>1</a></root>"
32
- if result == expected
33
- puts "✓ Canon::Xml::C14n.canonicalize works"
34
- puts " Result: #{result}"
35
- else
36
- puts "✗ Unexpected result: #{result}"
37
- end
38
- rescue StandardError => e
39
- puts "✗ Canon::Xml::C14n.canonicalize failed: #{e.message}"
40
- end
41
-
42
- # Test 3: Pretty printer with variable
43
- puts "\n3. Testing Canon::PrettyPrinter::Xml with defined variable..."
44
- begin
45
- xml_input = "<root><b>2</b><a>1</a></root>"
46
- result = Canon::PrettyPrinter::Xml.new(indent: 2).format(xml_input)
47
- if result.include?("<?xml") && result.include?("<root>")
48
- puts "✓ Canon::PrettyPrinter::Xml works with defined variable"
49
- else
50
- puts "✗ Unexpected output"
51
- end
52
- rescue StandardError => e
53
- puts "✗ Canon::PrettyPrinter::Xml failed: #{e.message}"
54
- end
55
-
56
- # Test 4: Basic comparison
57
- puts "\n4. Testing Canon::Comparison.equivalent?..."
58
- begin
59
- xml1 = "<root><a>1</a><b>2</b></root>"
60
- xml2 = "<root> <b>2</b> <a>1</a> </root>"
61
- result = Canon::Comparison.equivalent?(xml1, xml2)
62
- puts "✓ Canon::Comparison.equivalent? works"
63
- puts " Result: #{result}"
64
- rescue StandardError => e
65
- puts "✗ Canon::Comparison.equivalent? failed: #{e.message}"
66
- end
67
-
68
- # Test 5: Semantic tree diff
69
- puts "\n5. Testing semantic tree diff with operations..."
70
- begin
71
- xml1 = "<root><a>1</a><b>2</b></root>"
72
- xml2 = "<root> <b>2</b> <a>1</a> </root>"
73
- result = Canon::Comparison.equivalent?(xml1, xml2,
74
- verbose: true,
75
- diff_algorithm: :semantic)
76
- if result.respond_to?(:operations)
77
- puts "✓ Semantic tree diff works"
78
- puts " Result class: #{result.class}"
79
- puts " Operations available: #{result.operations.class}"
80
- else
81
- puts "✗ Result doesn't have operations method"
82
- end
83
- rescue StandardError => e
84
- puts "✗ Semantic tree diff failed: #{e.message}"
85
- end
86
-
87
- # Test 6: RSpec configuration (using Canon::Config)
88
- puts "\n6. Testing Canon::Config.configure..."
89
- begin
90
- Canon::Config.configure do |config|
91
- config.xml.match.profile = :spec_friendly
92
- config.xml.diff.use_color = true
93
- end
94
- profile = Canon::Config.instance.xml.match.profile
95
- use_color = Canon::Config.instance.xml.diff.use_color
96
- if profile == :spec_friendly && use_color == true
97
- puts "✓ Canon::Config.configure works correctly"
98
- puts " Profile: #{profile}"
99
- puts " Use color: #{use_color}"
100
- else
101
- puts "✗ Configuration values not set correctly"
102
- end
103
- rescue StandardError => e
104
- puts "✗ Canon::Config.configure failed: #{e.message}"
105
- end
106
-
107
- # Test 7: Comparison with custom options
108
- puts "\n7. Testing Canon::Comparison with match options..."
109
- begin
110
- doc1 = "<root><a> text </a></root>"
111
- doc2 = "<root><a>text</a></root>"
112
- result = Canon::Comparison.equivalent?(doc1, doc2,
113
- match: {
114
- text_content: :normalize,
115
- structural_whitespace: :ignore,
116
- comments: :ignore,
117
- },
118
- verbose: true)
119
- if result.respond_to?(:equivalent?)
120
- puts "✓ Canon::Comparison with match options works"
121
- puts " Result equivalent: #{result.equivalent?}"
122
- else
123
- puts "✗ Result doesn't have expected methods"
124
- end
125
- rescue StandardError => e
126
- puts "✗ Canon::Comparison with match options failed: #{e.message}"
127
- end
128
-
129
- puts "\n#{'=' * 60}"
130
- puts "All tests completed successfully! ✓"
131
- puts "=" * 60
@@ -1,99 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "bundler/setup"
5
- require "canon"
6
- require "canon/diff_formatter"
7
-
8
- # Test XML with differences
9
- xml1 = <<~XML
10
- <root>
11
- <section id="1">
12
- <title>First Section</title>
13
- <paragraph>Original text</paragraph>
14
- </section>
15
- <section id="2">
16
- <title>Second Section</title>
17
- </section>
18
- </root>
19
- XML
20
-
21
- xml2 = <<~XML
22
- <root>
23
- <section id="1">
24
- <title>First Section Modified</title>
25
- <paragraph>Changed text</paragraph>
26
- </section>
27
- <section id="3">
28
- <title>Third Section</title>
29
- </section>
30
- </root>
31
- XML
32
-
33
- puts "=" * 80
34
- puts "Testing Semantic Tree Diff vs DOM Diff"
35
- puts "=" * 80
36
- puts
37
-
38
- # Test with show_compare enabled
39
- puts "Running with show_compare=true (verbose mode)..."
40
- puts "-" * 80
41
-
42
- result = Canon::Comparison.equivalent?(
43
- xml1,
44
- xml2,
45
- verbose: true,
46
- show_compare: true,
47
- )
48
-
49
- if result.is_a?(Canon::Comparison::CombinedComparisonResult)
50
- puts "\n✓ Got CombinedComparisonResult"
51
- puts " Decision Algorithm: #{result.decision_algorithm}"
52
- puts " Equivalent? #{result.equivalent?}"
53
- puts
54
-
55
- puts "DOM Diff Algorithm:"
56
- puts "-" * 40
57
- dom_result = result.dom_result
58
- if dom_result.respond_to?(:differences)
59
- puts " Number of differences: #{dom_result.differences.length}"
60
- dom_result.differences.first(3).each_with_index do |diff, i|
61
- puts " #{i + 1}. #{diff.inspect[0..200]}"
62
- end
63
- else
64
- puts " Result: #{dom_result.inspect[0..200]}"
65
- end
66
- puts
67
-
68
- puts "Tree Diff Algorithm:"
69
- puts "-" * 40
70
- tree_result = result.tree_result
71
- if tree_result.respond_to?(:differences)
72
- puts " Number of differences: #{tree_result.differences.length}"
73
- tree_result.differences.first(3).each_with_index do |diff, i|
74
- puts " #{i + 1}. #{diff.inspect[0..200]}"
75
- end
76
- else
77
- puts " Result: #{tree_result.inspect[0..200]}"
78
- end
79
- puts
80
-
81
- # Test formatting
82
- puts "Formatted Output:"
83
- puts "-" * 40
84
- begin
85
- formatted = Canon::DiffFormatter.format(result, mode: :by_line)
86
- puts formatted[0..500]
87
- rescue StandardError => e
88
- puts " Error formatting: #{e.message}"
89
- puts " #{e.backtrace.first(3).join("\n ")}"
90
- end
91
- else
92
- puts "✗ Did not get CombinedComparisonResult, got: #{result.class}"
93
- puts " Result: #{result.inspect[0..200]}"
94
- end
95
-
96
- puts
97
- puts "=" * 80
98
- puts "Test Complete"
99
- puts "=" * 80
@@ -1,135 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Test semantic tree diff UX improvements
5
- # Verifies that:
6
- # 1. XPath includes position numbers
7
- # 2. Element content previews are shown
8
- # 3. Specific error categories match DOM clarity
9
-
10
- require_relative "../lib/canon"
11
-
12
- # Test case: HTML with multiple paragraphs to verify position numbers
13
- html1 = <<~HTML
14
- <html>
15
- <body>
16
- <div>
17
- <p>First paragraph</p>
18
- <p>Second paragraph</p>
19
- <p>Third paragraph</p>
20
- </div>
21
- </body>
22
- </html>
23
- HTML
24
-
25
- html2 = <<~HTML
26
- <html>
27
- <body>
28
- <div>
29
- <p>First paragraph</p>
30
- <p id="modified">Second paragraph with changes</p>
31
- <p>Third paragraph</p>
32
- <p>Fourth paragraph added</p>
33
- </div>
34
- </body>
35
- </html>
36
- HTML
37
-
38
- puts "=" * 80
39
- puts "Testing Semantic Tree Diff UX Improvements"
40
- puts "=" * 80
41
- puts
42
-
43
- # Test with semantic algorithm
44
- result = Canon::Comparison.equivalent?(
45
- html1,
46
- html2,
47
- diff_algorithm: :semantic,
48
- format: :html,
49
- verbose: true,
50
- )
51
-
52
- puts "Equivalent: #{result.equivalent?}"
53
- puts
54
- puts "Differences found: #{result.differences.length}"
55
- puts
56
-
57
- # Check each difference for UX improvements
58
- result.differences.each_with_index do |diff, i|
59
- puts "─" * 80
60
- puts "Difference ##{i + 1}:"
61
- puts " Dimension: #{diff.dimension}"
62
- puts " Reason: #{diff.reason}"
63
-
64
- # Check XPath has position numbers
65
- node = diff.node1 || diff.node2
66
- if node
67
- xpath = if node.respond_to?(:path)
68
- node.path
69
- else
70
- "(no xpath)"
71
- end
72
- puts " XPath: #{xpath}"
73
-
74
- # Verify position numbers are included
75
- if xpath.include?("[") && xpath.include?("]")
76
- puts " ✓ XPath includes position numbers"
77
- else
78
- puts " ✗ WARNING: XPath missing position numbers"
79
- end
80
- end
81
-
82
- # Check if reason is specific and actionable
83
- if diff.reason
84
- if diff.reason.include?("Missing") || diff.reason.include?("Extra") ||
85
- diff.reason.include?("changed:") || diff.reason.include?("→")
86
- puts " ✓ Reason is specific and actionable"
87
- else
88
- puts " ⚠ Reason could be more specific: #{diff.reason}"
89
- end
90
- end
91
-
92
- puts
93
- end
94
-
95
- puts "=" * 80
96
- puts "Formatted output:"
97
- puts "=" * 80
98
- puts result
99
-
100
- # Test attribute differences
101
- puts "\n\n"
102
- puts "=" * 80
103
- puts "Testing Attribute Difference Details"
104
- puts "=" * 80
105
-
106
- attr_html1 = '<div class="old" id="test" data-value="1">Content</div>'
107
- attr_html2 = '<div class="new" id="test" data-value="2" data-extra="added">Content</div>'
108
-
109
- attr_result = Canon::Comparison.equivalent?(
110
- attr_html1,
111
- attr_html2,
112
- diff_algorithm: :semantic,
113
- format: :html,
114
- verbose: true,
115
- )
116
-
117
- puts "Differences:"
118
- attr_result.differences.each do |diff|
119
- puts " Dimension: #{diff.dimension}"
120
- puts " Reason: #{diff.reason}"
121
-
122
- # Check for specific attribute details
123
- if diff.reason.include?("Missing:") || diff.reason.include?("Extra:") || diff.reason.include?("Changed:")
124
- puts " ✓ Shows specific attribute changes"
125
- end
126
- puts
127
- end
128
-
129
- puts "Formatted output:"
130
- puts attr_result
131
-
132
- puts "\n"
133
- puts "=" * 80
134
- puts "Test complete!"
135
- puts "=" * 80
@@ -1,119 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Test a single false positive case to understand the pattern
5
- # Usage: ruby scripts/test_single_false_positive.rb
6
-
7
- require "bundler/setup"
8
- require "canon"
9
-
10
- # Based on MIXED_CONTENT_FIX_RESULTS.md, one false positive is in sourcecode_spec.rb:124
11
- # Let's test a simple sourcecode case with whitespace
12
-
13
- # This represents a typical sourcecode/pre element case
14
- expected = <<~HTML
15
- <div class="example">
16
- <pre class="sourcecode" id="X">
17
- Line 1
18
- Line 2
19
- </pre>
20
- </div>
21
- HTML
22
-
23
- actual = <<~HTML
24
- <div class="example">
25
- <pre class="sourcecode" id="X">Line 1
26
- Line 2</pre>
27
- </div>
28
- HTML
29
-
30
- puts "=" * 80
31
- puts "TEST: Sourcecode whitespace handling (False Positive Pattern)"
32
- puts "=" * 80
33
-
34
- # Test DOM algorithm
35
- puts "\n1. DOM ALGORITHM (Baseline):"
36
- puts "-" * 40
37
- dom_result = Canon::Comparison.equivalent?(expected, actual,
38
- format: :html,
39
- diff_algorithm: :dom,
40
- verbose: false)
41
- puts "Result: #{dom_result ? '✅ PASS (no difference)' : '❌ FAIL (has difference)'}"
42
-
43
- # Test Semantic algorithm
44
- puts "\n2. SEMANTIC ALGORITHM (Under Test):"
45
- puts "-" * 40
46
- semantic_result = Canon::Comparison.equivalent?(expected, actual,
47
- format: :html,
48
- diff_algorithm: :semantic,
49
- verbose: false)
50
- puts "Result: #{semantic_result ? '✅ PASS (no difference)' : '❌ FAIL (has difference)'}"
51
-
52
- # Analysis
53
- puts "\n#{'=' * 80}"
54
- if dom_result && !semantic_result
55
- puts "❌ FALSE POSITIVE DETECTED"
56
- puts " DOM says: equivalent"
57
- puts " Semantic says: different"
58
- puts "\nThis is the pattern we need to fix!"
59
-
60
- # Get detailed diff
61
- puts "\nDetailed semantic diff:"
62
- Canon::Comparison.equivalent?(expected, actual,
63
- format: :html,
64
- diff_algorithm: :semantic,
65
- verbose: true)
66
- elsif !dom_result && semantic_result
67
- puts "❌ FALSE NEGATIVE DETECTED"
68
- puts " DOM says: different"
69
- puts " Semantic says: equivalent"
70
- elsif dom_result && semantic_result
71
- puts "✅ BOTH AGREE: No difference (correct match)"
72
- else
73
- puts "✅ BOTH AGREE: Has difference (correct non-match)"
74
- end
75
-
76
- # Now test with a metadata element case
77
- puts "\n\n#{'=' * 80}"
78
- puts "TEST: Metadata element handling (Another False Positive Pattern)"
79
- puts "=" * 80
80
-
81
- expected2 = <<~HTML
82
- <p id="X">
83
- <span class="fmt-xref-label">Clause 1</span>
84
- <a name="X">Content here</a>
85
- </p>
86
- HTML
87
-
88
- actual2 = <<~HTML
89
- <p id="X">
90
- <span class="fmt-xref-label">Clause 1</span>
91
- <bookmark id="X"/>
92
- Content here
93
- </p>
94
- HTML
95
-
96
- puts "\n1. DOM ALGORITHM:"
97
- dom2 = Canon::Comparison.equivalent?(expected2, actual2,
98
- format: :html,
99
- diff_algorithm: :dom,
100
- verbose: false)
101
- puts "Result: #{dom2 ? '✅ PASS' : '❌ FAIL'}"
102
-
103
- puts "\n2. SEMANTIC ALGORITHM:"
104
- semantic2 = Canon::Comparison.equivalent?(expected2, actual2,
105
- format: :html,
106
- diff_algorithm: :semantic,
107
- verbose: false)
108
- puts "Result: #{semantic2 ? '✅ PASS' : '❌ FAIL'}"
109
-
110
- puts "\n#{'=' * 80}"
111
- if dom2 && !semantic2
112
- puts "❌ FALSE POSITIVE in metadata handling"
113
- elsif !dom2 && semantic2
114
- puts "❌ FALSE NEGATIVE in metadata handling"
115
- elsif dom2 && semantic2
116
- puts "✅ BOTH AGREE: equivalent"
117
- else
118
- puts "✅ BOTH AGREE: different"
119
- end
@@ -1,99 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Test script to verify size limit functionality
5
-
6
- require_relative "../lib/canon"
7
- require_relative "../lib/canon/config"
8
- require_relative "../lib/canon/commands/diff_command"
9
-
10
- puts "Testing Canon Size Limits"
11
- puts "=" * 60
12
-
13
- # Test 1: File size limit configuration
14
- puts "\n1. Testing file size limit configuration:"
15
- config = Canon::Config.instance
16
- puts " Default max_file_size: #{config.xml.diff.max_file_size}"
17
- puts " Expected: 5242880 (5MB)"
18
-
19
- # Test 2: Node count limit configuration
20
- puts "\n2. Testing node count limit configuration:"
21
- puts " Default max_node_count: #{config.xml.diff.max_node_count}"
22
- puts " Expected: 10000"
23
-
24
- # Test 3: Diff lines limit configuration
25
- puts "\n3. Testing diff lines limit configuration:"
26
- puts " Default max_diff_lines: #{config.xml.diff.max_diff_lines}"
27
- puts " Expected: 10000"
28
-
29
- # Test 4: ENV variable override
30
- puts "\n4. Testing ENV variable override:"
31
- ENV["CANON_MAX_FILE_SIZE"] = "1000000"
32
- ENV["CANON_MAX_NODE_COUNT"] = "5000"
33
- ENV["CANON_MAX_DIFF_LINES"] = "2000"
34
-
35
- # Reset config to pick up ENV vars
36
- Canon::Config.reset!
37
- config = Canon::Config.instance
38
-
39
- puts " After setting ENV vars:"
40
- puts " max_file_size: #{config.xml.diff.max_file_size} (expected: 1000000)"
41
- puts " max_node_count: #{config.xml.diff.max_node_count} (expected: 5000)"
42
- puts " max_diff_lines: #{config.xml.diff.max_diff_lines} (expected: 2000)"
43
-
44
- # Test 5: SizeLimitExceededError
45
- puts "\n5. Testing SizeLimitExceededError:"
46
- begin
47
- error = Canon::SizeLimitExceededError.new(:file_size, 10_000_000, 5_242_880)
48
- puts " Created error: #{error.class}"
49
- puts " Message: #{error.message}"
50
- rescue StandardError => e
51
- puts " ERROR: #{e.class}: #{e.message}"
52
- end
53
-
54
- # Test 6: File size checking
55
- puts "\n6. Testing file size checking in diff command:"
56
- require "tempfile"
57
-
58
- # Create a small test file
59
- Tempfile.create(["test", ".xml"]) do |f1|
60
- f1.write("<root><child>content</child></root>")
61
- f1.flush
62
-
63
- Tempfile.create(["test2", ".xml"]) do |f2|
64
- f2.write("<root><child>different</child></root>")
65
- f2.flush
66
-
67
- # Set a very low limit to trigger error
68
- ENV["CANON_MAX_FILE_SIZE"] = "10" # 10 bytes
69
- Canon::Config.reset!
70
-
71
- begin
72
- cmd = Canon::Commands::DiffCommand.new(
73
- format: :xml,
74
- verbose: true,
75
- color: false,
76
- )
77
- cmd.run(f1.path, f2.path)
78
- puts " ✗ Should have raised SizeLimitExceededError"
79
- rescue Canon::SizeLimitExceededError => e
80
- puts " ✓ Correctly raised SizeLimitExceededError"
81
- puts " Message: #{e.message.lines.first.strip}"
82
- rescue SystemExit => e
83
- # abort() in diff_command causes SystemExit
84
- # Check if it was due to size limit by checking stderr
85
- puts " ✓ File size check triggered (command aborted as expected)"
86
- rescue StandardError => e
87
- puts " ✗ Unexpected error: #{e.class}: #{e.message}"
88
- end
89
- end
90
- end
91
-
92
- # Clean up ENV
93
- ENV.delete("CANON_MAX_FILE_SIZE")
94
- ENV.delete("CANON_MAX_NODE_COUNT")
95
- ENV.delete("CANON_MAX_DIFF_LINES")
96
- Canon::Config.reset!
97
-
98
- puts "\n#{'=' * 60}"
99
- puts "Size limits test completed!"
data/test_html_1.html DELETED
@@ -1,21 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Test Page</title>
5
- <!-- Version 1.0 stylesheet -->
6
- </head>
7
- <body>
8
- <div class="container">
9
- <!-- TODO: Add navigation menu -->
10
- <h1>Welcome</h1>
11
- <p>This is a test page.</p>
12
- <!-- End of header section -->
13
-
14
- <section id="content">
15
- <h2>Main Content</h2>
16
- <!-- Important: This section is critical -->
17
- <p>Lorem ipsum dolor sit amet.</p>
18
- </section>
19
- </div>
20
- </body>
21
- </html>
data/test_html_2.html DELETED
@@ -1,21 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Test Page</title>
5
- <!-- Version 2.0 updated styles -->
6
- </head>
7
- <body>
8
- <div class="container">
9
- <!-- DONE: Navigation menu added -->
10
- <h1>Welcome</h1>
11
- <p>This is a test page.</p>
12
- <!-- End of navigation section -->
13
-
14
- <section id="content">
15
- <h2>Main Content</h2>
16
- <!-- Note: Section has been reviewed -->
17
- <p>Lorem ipsum dolor sit amet.</p>
18
- </section>
19
- </div>
20
- </body>
21
- </html>
data/test_nokogiri.rb DELETED
@@ -1,33 +0,0 @@
1
- require "bundler/setup"
2
- require "nokogiri"
3
-
4
- html1 = "<html><body><p>Test</p></body></html>"
5
- html2 = "<html>\n\n<body>\n\n<p>Test</p>\n\n</body>\n\n</html>"
6
-
7
- puts "=== Nokogiri HTML5.fragment ==="
8
- frag1 = Nokogiri::HTML5.fragment(html1)
9
- frag2 = Nokogiri::HTML5.fragment(html2)
10
-
11
- puts "html1 children count: #{frag1.children.count}"
12
- frag1.children.each_with_index do |child, i|
13
- puts " Child #{i}: #{child.class} - #{child.name}"
14
- end
15
-
16
- puts "\nhtml2 children count: #{frag2.children.count}"
17
- frag2.children.each_with_index do |child, i|
18
- puts " Child #{i}: #{child.class} - #{child.name}"
19
- end
20
-
21
- puts "\n=== Nokogiri::XML.fragment ==="
22
- frag3 = Nokogiri::XML.fragment(html1)
23
- frag4 = Nokogiri::XML.fragment(html2)
24
-
25
- puts "html1 children count: #{frag3.children.count}"
26
- frag3.children.each_with_index do |child, i|
27
- puts " Child #{i}: #{child.class} - #{child.name}"
28
- end
29
-
30
- puts "\nhtml2 children count: #{frag4.children.count}"
31
- frag4.children.each_with_index do |child, i|
32
- puts " Child #{i}: #{child.class} - #{child.name}"
33
- end