canon 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +112 -25
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +82 -2
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  11. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  12. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  13. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  14. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  15. data/lib/canon/comparison/xml_comparator.rb +48 -23
  16. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  17. data/lib/canon/diff/diff_classifier.rb +101 -2
  18. data/lib/canon/diff/formatting_detector.rb +1 -1
  19. data/lib/canon/rspec_matchers.rb +37 -8
  20. data/lib/canon/version.rb +1 -1
  21. data/lib/canon/xml/data_model.rb +24 -13
  22. metadata +3 -78
  23. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  24. data/false_positive_analysis.txt +0 -0
  25. data/file1.html +0 -1
  26. data/file2.html +0 -1
  27. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  28. data/old-docs/BASIC_USAGE.adoc +0 -16
  29. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  30. data/old-docs/CLI.adoc +0 -497
  31. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  32. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  33. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  34. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  35. data/old-docs/DOM_DIFF.adoc +0 -1017
  36. data/old-docs/ENV_CONFIG.adoc +0 -876
  37. data/old-docs/FORMATS.adoc +0 -867
  38. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  39. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  40. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  41. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  42. data/old-docs/MODES.adoc +0 -432
  43. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  44. data/old-docs/OPTIONS.adoc +0 -1387
  45. data/old-docs/PREPROCESSING.adoc +0 -491
  46. data/old-docs/README.old.adoc +0 -2831
  47. data/old-docs/RSPEC.adoc +0 -814
  48. data/old-docs/RUBY_API.adoc +0 -485
  49. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  50. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  51. data/old-docs/STRING_COMPARE.adoc +0 -345
  52. data/old-docs/TMP.adoc +0 -3384
  53. data/old-docs/TREE_DIFF.adoc +0 -1080
  54. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  55. data/old-docs/VERBOSE.adoc +0 -482
  56. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  57. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  58. data/scripts/analyze_current_state.rb +0 -85
  59. data/scripts/analyze_false_positives.rb +0 -114
  60. data/scripts/analyze_remaining_failures.rb +0 -105
  61. data/scripts/compare_current_failures.rb +0 -95
  62. data/scripts/compare_dom_tree_diff.rb +0 -158
  63. data/scripts/compare_failures.rb +0 -151
  64. data/scripts/debug_attribute_extraction.rb +0 -66
  65. data/scripts/debug_blocks_839.rb +0 -115
  66. data/scripts/debug_meta_matching.rb +0 -52
  67. data/scripts/debug_p_matching.rb +0 -192
  68. data/scripts/debug_signature_matching.rb +0 -118
  69. data/scripts/debug_sourcecode_124.rb +0 -32
  70. data/scripts/debug_whitespace_sensitive.rb +0 -192
  71. data/scripts/extract_false_positives.rb +0 -138
  72. data/scripts/find_actual_false_positives.rb +0 -125
  73. data/scripts/investigate_all_false_positives.rb +0 -161
  74. data/scripts/investigate_batch1.rb +0 -127
  75. data/scripts/investigate_classification.rb +0 -150
  76. data/scripts/investigate_classification_detailed.rb +0 -190
  77. data/scripts/investigate_common_failures.rb +0 -342
  78. data/scripts/investigate_false_negative.rb +0 -80
  79. data/scripts/investigate_false_positive.rb +0 -83
  80. data/scripts/investigate_false_positives.rb +0 -227
  81. data/scripts/investigate_false_positives_batch.rb +0 -163
  82. data/scripts/investigate_mixed_content.rb +0 -125
  83. data/scripts/investigate_remaining_16.rb +0 -214
  84. data/scripts/run_single_test.rb +0 -29
  85. data/scripts/test_all_false_positives.rb +0 -95
  86. data/scripts/test_attribute_details.rb +0 -61
  87. data/scripts/test_both_algorithms.rb +0 -49
  88. data/scripts/test_both_simple.rb +0 -49
  89. data/scripts/test_enhanced_semantic_output.rb +0 -125
  90. data/scripts/test_readme_examples.rb +0 -131
  91. data/scripts/test_semantic_tree_diff.rb +0 -99
  92. data/scripts/test_semantic_ux_improvements.rb +0 -135
  93. data/scripts/test_single_false_positive.rb +0 -119
  94. data/scripts/test_size_limits.rb +0 -99
  95. data/test_html_1.html +0 -21
  96. data/test_html_2.html +0 -21
  97. data/test_nokogiri.rb +0 -33
  98. data/test_normalize.rb +0 -45
data/test_normalize.rb DELETED
@@ -1,45 +0,0 @@
1
- require "bundler/setup"
2
- require "canon/html/data_model"
3
-
4
- html1 = "<html><body><p>Test</p></body></html>"
5
- html2 = "<html>\n\n<body>\n\n<p>Test</p>\n\n</body>\n\n</html>"
6
-
7
- # Parse both without preprocessing
8
- node1 = Canon::Html::DataModel.from_html(html1)
9
- node2 = Canon::Html::DataModel.from_html(html2)
10
-
11
- puts "=== Without preprocessing ==="
12
- puts "node1 root children count: #{node1.children.count}"
13
- node1.children.each_with_index do |child, i|
14
- puts " Child #{i}: #{child.class}"
15
- if child.is_a?(Canon::Xml::Nodes::ElementNode)
16
- puts " Name: #{child.name}"
17
- puts " Children count: #{child.children.count}"
18
- end
19
- end
20
-
21
- puts "\nnode2 root children count: #{node2.children.count}"
22
- node2.children.each_with_index do |child, i|
23
- puts " Child #{i}: #{child.class}"
24
- if child.is_a?(Canon::Xml::Nodes::ElementNode)
25
- puts " Name: #{child.name}"
26
- puts " Children count: #{child.children.count}"
27
- end
28
- end
29
-
30
- # Now with normalize preprocessing
31
- html2_norm = html2.lines.map(&:strip).reject(&:empty?).join("\n")
32
- node2_norm = Canon::Html::DataModel.from_html(html2_norm)
33
-
34
- puts "\n=== With :normalize preprocessing ==="
35
- puts "html2_norm:"
36
- puts html2_norm
37
- puts ""
38
- puts "node2_norm root children count: #{node2_norm.children.count}"
39
- node2_norm.children.each_with_index do |child, i|
40
- puts " Child #{i}: #{child.class}"
41
- if child.is_a?(Canon::Xml::Nodes::ElementNode)
42
- puts " Name: #{child.name}"
43
- puts " Children count: #{child.children.count}"
44
- end
45
- end