canon 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +83 -22
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +196 -24
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/markup_comparator.rb +109 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +240 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +119 -5
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +4 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
data/test_normalize.rb
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
require "bundler/setup"
|
|
2
|
-
require "canon/html/data_model"
|
|
3
|
-
|
|
4
|
-
html1 = "<html><body><p>Test</p></body></html>"
|
|
5
|
-
html2 = "<html>\n\n<body>\n\n<p>Test</p>\n\n</body>\n\n</html>"
|
|
6
|
-
|
|
7
|
-
# Parse both without preprocessing
|
|
8
|
-
node1 = Canon::Html::DataModel.from_html(html1)
|
|
9
|
-
node2 = Canon::Html::DataModel.from_html(html2)
|
|
10
|
-
|
|
11
|
-
puts "=== Without preprocessing ==="
|
|
12
|
-
puts "node1 root children count: #{node1.children.count}"
|
|
13
|
-
node1.children.each_with_index do |child, i|
|
|
14
|
-
puts " Child #{i}: #{child.class}"
|
|
15
|
-
if child.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
16
|
-
puts " Name: #{child.name}"
|
|
17
|
-
puts " Children count: #{child.children.count}"
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
puts "\nnode2 root children count: #{node2.children.count}"
|
|
22
|
-
node2.children.each_with_index do |child, i|
|
|
23
|
-
puts " Child #{i}: #{child.class}"
|
|
24
|
-
if child.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
25
|
-
puts " Name: #{child.name}"
|
|
26
|
-
puts " Children count: #{child.children.count}"
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# Now with normalize preprocessing
|
|
31
|
-
html2_norm = html2.lines.map(&:strip).reject(&:empty?).join("\n")
|
|
32
|
-
node2_norm = Canon::Html::DataModel.from_html(html2_norm)
|
|
33
|
-
|
|
34
|
-
puts "\n=== With :normalize preprocessing ==="
|
|
35
|
-
puts "html2_norm:"
|
|
36
|
-
puts html2_norm
|
|
37
|
-
puts ""
|
|
38
|
-
puts "node2_norm root children count: #{node2_norm.children.count}"
|
|
39
|
-
node2_norm.children.each_with_index do |child, i|
|
|
40
|
-
puts " Child #{i}: #{child.class}"
|
|
41
|
-
if child.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
42
|
-
puts " Name: #{child.name}"
|
|
43
|
-
puts " Children count: #{child.children.count}"
|
|
44
|
-
end
|
|
45
|
-
end
|