canon 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +25 -135
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/advanced/extending-canon.adoc +193 -0
  6. data/docs/internals/diffnode-enrichment.adoc +611 -0
  7. data/docs/internals/index.adoc +251 -0
  8. data/docs/lychee.toml +13 -6
  9. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
  10. data/docs/understanding/architecture.adoc +749 -33
  11. data/docs/understanding/comparison-pipeline.adoc +122 -0
  12. data/false_positive_analysis.txt +0 -0
  13. data/file1.html +1 -0
  14. data/file2.html +1 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +86 -0
  27. data/lib/canon/comparison/html_comparator.rb +51 -18
  28. data/lib/canon/comparison/html_parser.rb +80 -0
  29. data/lib/canon/comparison/json_comparator.rb +12 -0
  30. data/lib/canon/comparison/json_parser.rb +19 -0
  31. data/lib/canon/comparison/markup_comparator.rb +293 -0
  32. data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
  33. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  34. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  35. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  36. data/lib/canon/comparison/match_options.rb +68 -463
  37. data/lib/canon/comparison/profile_definition.rb +149 -0
  38. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  39. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  40. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  41. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  42. data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
  43. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  44. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  45. data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
  46. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
  47. data/lib/canon/comparison/xml_comparator.rb +52 -664
  48. data/lib/canon/comparison/xml_node_comparison.rb +297 -0
  49. data/lib/canon/comparison/xml_parser.rb +19 -0
  50. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  51. data/lib/canon/comparison.rb +265 -110
  52. data/lib/canon/diff/diff_node.rb +32 -2
  53. data/lib/canon/diff/node_serializer.rb +191 -0
  54. data/lib/canon/diff/path_builder.rb +143 -0
  55. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  56. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  57. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  58. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  59. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  60. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  61. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  64. data/lib/canon/diff_formatter.rb +1 -1
  65. data/lib/canon/rspec_matchers.rb +1 -1
  66. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  67. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  68. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  69. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  70. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  71. data/lib/canon/version.rb +1 -1
  72. data/old-docs/ADVANCED_TOPICS.adoc +20 -0
  73. data/old-docs/BASIC_USAGE.adoc +16 -0
  74. data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
  75. data/old-docs/CLI.adoc +497 -0
  76. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  77. data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
  78. data/old-docs/DIFF_FORMATTING.adoc +540 -0
  79. data/old-docs/DIFF_PARAMETERS.adoc +261 -0
  80. data/old-docs/DOM_DIFF.adoc +1017 -0
  81. data/old-docs/ENV_CONFIG.adoc +876 -0
  82. data/old-docs/FORMATS.adoc +867 -0
  83. data/old-docs/INPUT_VALIDATION.adoc +477 -0
  84. data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
  85. data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
  86. data/old-docs/MATCH_OPTIONS.adoc +912 -0
  87. data/old-docs/MODES.adoc +432 -0
  88. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  89. data/old-docs/OPTIONS.adoc +1387 -0
  90. data/old-docs/PREPROCESSING.adoc +491 -0
  91. data/old-docs/README.old.adoc +2831 -0
  92. data/old-docs/RSPEC.adoc +814 -0
  93. data/old-docs/RUBY_API.adoc +485 -0
  94. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
  95. data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
  96. data/old-docs/STRING_COMPARE.adoc +345 -0
  97. data/old-docs/TMP.adoc +3384 -0
  98. data/old-docs/TREE_DIFF.adoc +1080 -0
  99. data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
  100. data/old-docs/VERBOSE.adoc +482 -0
  101. data/old-docs/VISUALIZATION_MAP.adoc +625 -0
  102. data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
  103. data/scripts/analyze_current_state.rb +85 -0
  104. data/scripts/analyze_false_positives.rb +114 -0
  105. data/scripts/analyze_remaining_failures.rb +105 -0
  106. data/scripts/compare_current_failures.rb +95 -0
  107. data/scripts/compare_dom_tree_diff.rb +158 -0
  108. data/scripts/compare_failures.rb +151 -0
  109. data/scripts/debug_attribute_extraction.rb +66 -0
  110. data/scripts/debug_blocks_839.rb +115 -0
  111. data/scripts/debug_meta_matching.rb +52 -0
  112. data/scripts/debug_p_matching.rb +192 -0
  113. data/scripts/debug_signature_matching.rb +118 -0
  114. data/scripts/debug_sourcecode_124.rb +32 -0
  115. data/scripts/debug_whitespace_sensitive.rb +192 -0
  116. data/scripts/extract_false_positives.rb +138 -0
  117. data/scripts/find_actual_false_positives.rb +125 -0
  118. data/scripts/investigate_all_false_positives.rb +161 -0
  119. data/scripts/investigate_batch1.rb +127 -0
  120. data/scripts/investigate_classification.rb +150 -0
  121. data/scripts/investigate_classification_detailed.rb +190 -0
  122. data/scripts/investigate_common_failures.rb +342 -0
  123. data/scripts/investigate_false_negative.rb +80 -0
  124. data/scripts/investigate_false_positive.rb +83 -0
  125. data/scripts/investigate_false_positives.rb +227 -0
  126. data/scripts/investigate_false_positives_batch.rb +163 -0
  127. data/scripts/investigate_mixed_content.rb +125 -0
  128. data/scripts/investigate_remaining_16.rb +214 -0
  129. data/scripts/run_single_test.rb +29 -0
  130. data/scripts/test_all_false_positives.rb +95 -0
  131. data/scripts/test_attribute_details.rb +61 -0
  132. data/scripts/test_both_algorithms.rb +49 -0
  133. data/scripts/test_both_simple.rb +49 -0
  134. data/scripts/test_enhanced_semantic_output.rb +125 -0
  135. data/scripts/test_readme_examples.rb +131 -0
  136. data/scripts/test_semantic_tree_diff.rb +99 -0
  137. data/scripts/test_semantic_ux_improvements.rb +135 -0
  138. data/scripts/test_single_false_positive.rb +119 -0
  139. data/scripts/test_size_limits.rb +99 -0
  140. data/test_html_1.html +21 -0
  141. data/test_html_2.html +21 -0
  142. data/test_nokogiri.rb +33 -0
  143. data/test_normalize.rb +45 -0
  144. metadata +123 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 74adcfa8d9b6ab7cc95fba2557dcfecbfa0dbd134e90497a30195fb476de6b42
4
- data.tar.gz: dab1ef7a6c5fe852b6935561ddb85e2ffdde6edf4463b36cbe2f9deb5251b4f2
3
+ metadata.gz: 1d94be550a90d23eb695f46579b13fa327434993b89a995b3c95ba658a143fb9
4
+ data.tar.gz: 2ac083712aa9d0153aa2e0898186a7cbf669e775368378c9de8de6c42c52a257
5
5
  SHA512:
6
- metadata.gz: 98b8e102b110e5a85363a3ccceb28ffb87082f9695ad0c39b182a1f9f3bca7fc652c471a2134cc621b84d9acfaa858f10aea5c9ddb6a90f5e098ed5b3cd795c5
7
- data.tar.gz: 1db527549bde5c60b0f45a8d34a59c877ed9b880a9ea80f076b2501319886e1300d036fd5502820d361d06d9a4957a9b648d479aaf98dcf0bc55bb26f9673066
6
+ metadata.gz: 45b4502c83bfd367c5933e66f610de529b87f27d5cfef18c0fe808bcbc91e20c1c3c0e32ba36ed6163a70de37c5ae3a1023c67fed586491e7eea5f7c621e2769
7
+ data.tar.gz: 3713856d8dfdfb4164cfd9e8bdcd4dcbdce7b303619d703b7defe58d09c5468eb3acc5f32435f3184d35f20b3a6f7470c2960d8e5cef21ba720ebd7dc44ccfbf
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-12-22 03:34:50 UTC using RuboCop version 1.81.7.
3
+ # on 2026-01-17 14:46:16 UTC using RuboCop version 1.81.7.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -18,96 +18,27 @@ Gemspec/RequiredRubyVersion:
18
18
  # SupportedStyles: with_first_argument, with_fixed_indentation
19
19
  Layout/ArgumentAlignment:
20
20
  Exclude:
21
- - 'spec/canon/comparison/multiple_differences_spec.rb'
22
-
23
- # Offense count: 2
24
- # This cop supports safe autocorrection (--autocorrect).
25
- # Configuration parameters: EnforcedStyleAlignWith.
26
- # SupportedStylesAlignWith: either, start_of_block, start_of_line
27
- Layout/BlockAlignment:
28
- Exclude:
29
- - 'lib/canon/diff_formatter/by_line/html_formatter.rb'
30
- - 'spec/canon/diff/diff_node_mapper_granularity_spec.rb'
21
+ - 'lib/canon/comparison.rb'
31
22
 
32
- # Offense count: 1
33
- # This cop supports safe autocorrection (--autocorrect).
34
- Layout/BlockEndNewline:
35
- Exclude:
36
- - 'spec/canon/diff/diff_node_mapper_granularity_spec.rb'
37
-
38
- # Offense count: 1
39
- # This cop supports safe autocorrection (--autocorrect).
40
- Layout/ElseAlignment:
41
- Exclude:
42
- - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
43
-
44
- # Offense count: 1
45
- # This cop supports safe autocorrection (--autocorrect).
46
- # Configuration parameters: EnforcedStyleAlignWith, Severity.
47
- # SupportedStylesAlignWith: keyword, variable, start_of_line
48
- Layout/EndAlignment:
49
- Exclude:
50
- - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
51
-
52
- # Offense count: 1
53
- # This cop supports safe autocorrection (--autocorrect).
54
- # Configuration parameters: AllowForAlignment, AllowBeforeTrailingComments, ForceEqualSignAlignment.
55
- Layout/ExtraSpacing:
56
- Exclude:
57
- - 'lib/canon/commands/diff_command.rb'
58
-
59
- # Offense count: 3
60
- # This cop supports safe autocorrection (--autocorrect).
61
- # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
62
- # SupportedHashRocketStyles: key, separator, table
63
- # SupportedColonStyles: key, separator, table
64
- # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
65
- Layout/HashAlignment:
66
- Exclude:
67
- - 'spec/canon/comparison/multiple_differences_spec.rb'
68
-
69
- # Offense count: 4
70
- # This cop supports safe autocorrection (--autocorrect).
71
- # Configuration parameters: Width, AllowedPatterns.
72
- Layout/IndentationWidth:
73
- Exclude:
74
- - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
75
- - 'spec/canon/diff/diff_node_mapper_granularity_spec.rb'
76
-
77
- # Offense count: 654
23
+ # Offense count: 697
78
24
  # This cop supports safe autocorrection (--autocorrect).
79
25
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
80
26
  # URISchemes: http, https
81
27
  Layout/LineLength:
82
28
  Enabled: false
83
29
 
84
- # Offense count: 3
85
- # This cop supports safe autocorrection (--autocorrect).
86
- # Configuration parameters: EnforcedStyle.
87
- # SupportedStyles: final_newline, final_blank_line
88
- Layout/TrailingEmptyLines:
89
- Exclude:
90
- - 'spec/canon/commands/diff_command_spec.rb'
91
- - 'spec/canon/comparison/multiple_differences_spec.rb'
92
- - 'spec/canon/diff/diff_classifier_spec.rb'
93
-
94
- # Offense count: 10
30
+ # Offense count: 1
95
31
  # This cop supports safe autocorrection (--autocorrect).
96
32
  # Configuration parameters: AllowInHeredoc.
97
33
  Layout/TrailingWhitespace:
98
34
  Exclude:
99
- - 'spec/canon/comparison/multiple_differences_spec.rb'
35
+ - 'lib/canon/comparison.rb'
100
36
 
101
- # Offense count: 46
37
+ # Offense count: 48
102
38
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
103
39
  Lint/DuplicateBranch:
104
40
  Enabled: false
105
41
 
106
- # Offense count: 2
107
- Lint/DuplicateMethods:
108
- Exclude:
109
- - 'lib/canon/diff_formatter/by_line/html_formatter.rb'
110
-
111
42
  # Offense count: 2
112
43
  # This cop supports safe autocorrection (--autocorrect).
113
44
  # Configuration parameters: AllowComments.
@@ -132,18 +63,18 @@ Lint/UnreachableCode:
132
63
  Exclude:
133
64
  - 'lib/canon/diff_formatter/debug_output.rb'
134
65
 
135
- # Offense count: 7
66
+ # Offense count: 6
136
67
  # This cop supports safe autocorrection (--autocorrect).
137
68
  # Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
138
69
  # NotImplementedExceptions: NotImplementedError
139
70
  Lint/UnusedMethodArgument:
140
71
  Exclude:
141
- - 'lib/canon/comparison/match_options.rb'
72
+ - 'lib/canon/diff/path_builder.rb'
142
73
  - 'lib/canon/diff_formatter/by_line/base_formatter.rb'
143
74
  - 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
144
75
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
145
76
 
146
- # Offense count: 219
77
+ # Offense count: 225
147
78
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
148
79
  Metrics/AbcSize:
149
80
  Enabled: false
@@ -154,27 +85,22 @@ Metrics/AbcSize:
154
85
  Metrics/BlockLength:
155
86
  Max: 84
156
87
 
157
- # Offense count: 2
158
- # Configuration parameters: CountBlocks, CountModifierForms.
159
- Metrics/BlockNesting:
160
- Max: 4
161
-
162
- # Offense count: 180
88
+ # Offense count: 178
163
89
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
164
90
  Metrics/CyclomaticComplexity:
165
91
  Enabled: false
166
92
 
167
- # Offense count: 347
93
+ # Offense count: 376
168
94
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
169
95
  Metrics/MethodLength:
170
- Max: 107
96
+ Max: 110
171
97
 
172
- # Offense count: 23
98
+ # Offense count: 39
173
99
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
174
100
  Metrics/ParameterLists:
175
101
  Max: 9
176
102
 
177
- # Offense count: 142
103
+ # Offense count: 143
178
104
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
179
105
  Metrics/PerceivedComplexity:
180
106
  Enabled: false
@@ -185,10 +111,11 @@ Metrics/PerceivedComplexity:
185
111
  Naming/MethodParameterName:
186
112
  Exclude:
187
113
  - 'lib/canon/comparison/xml_comparator.rb'
114
+ - 'lib/canon/comparison/xml_comparator/attribute_comparator.rb'
188
115
  - 'lib/canon/xml/namespace_handler.rb'
189
116
  - 'scripts/investigate_all_false_positives.rb'
190
117
 
191
- # Offense count: 2
118
+ # Offense count: 1
192
119
  # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
193
120
  # NamePrefix: is_, has_, have_, does_
194
121
  # ForbiddenPrefixes: is_, has_, have_, does_
@@ -197,7 +124,6 @@ Naming/MethodParameterName:
197
124
  Naming/PredicatePrefix:
198
125
  Exclude:
199
126
  - 'lib/canon/comparison/html_comparator.rb'
200
- - 'lib/canon/comparison/xml_comparator.rb'
201
127
 
202
128
  # Offense count: 2
203
129
  # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
@@ -207,21 +133,20 @@ Naming/VariableNumber:
207
133
  Exclude:
208
134
  - 'lib/canon/comparison/json_comparator.rb'
209
135
 
210
- # Offense count: 4
136
+ # Offense count: 2
211
137
  # Configuration parameters: MinSize.
212
138
  Performance/CollectionLiteralInLoop:
213
139
  Exclude:
214
140
  - 'lib/canon/comparison/html_comparator.rb'
215
- - 'lib/canon/diff_formatter/diff_detail_formatter.rb'
216
141
  - 'lib/canon/xml/xml_base_handler.rb'
217
142
 
218
- # Offense count: 58
143
+ # Offense count: 62
219
144
  # Configuration parameters: Prefixes, AllowedPatterns.
220
145
  # Prefixes: when, with, without
221
146
  RSpec/ContextWording:
222
147
  Enabled: false
223
148
 
224
- # Offense count: 24
149
+ # Offense count: 25
225
150
  # Configuration parameters: IgnoredMetadata.
226
151
  RSpec/DescribeClass:
227
152
  Enabled: false
@@ -232,7 +157,7 @@ RSpec/DescribeMethod:
232
157
  - 'spec/canon/comparison/multiple_differences_spec.rb'
233
158
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
234
159
 
235
- # Offense count: 592
160
+ # Offense count: 624
236
161
  # Configuration parameters: CountAsOne.
237
162
  RSpec/ExampleLength:
238
163
  Max: 67
@@ -269,12 +194,11 @@ RSpec/InstanceVariable:
269
194
  Exclude:
270
195
  - 'spec/canon/rspec_matchers_spec.rb'
271
196
 
272
- # Offense count: 16
197
+ # Offense count: 15
273
198
  # Configuration parameters: EnforcedStyle.
274
199
  # SupportedStyles: have_received, receive
275
200
  RSpec/MessageSpies:
276
201
  Exclude:
277
- - 'spec/canon/comparison/yaml_comparator_spec.rb'
278
202
  - 'spec/canon/diff/diff_classifier_spec.rb'
279
203
  - 'spec/canon_spec.rb'
280
204
  - 'spec/xml_c14n_spec.rb'
@@ -284,7 +208,7 @@ RSpec/MultipleDescribes:
284
208
  Exclude:
285
209
  - 'spec/canon/comparison/match_options_spec.rb'
286
210
 
287
- # Offense count: 489
211
+ # Offense count: 515
288
212
  RSpec/MultipleExpectations:
289
213
  Max: 15
290
214
 
@@ -302,7 +226,7 @@ RSpec/NamedSubject:
302
226
  - 'spec/canon/pretty_printer/json_spec.rb'
303
227
  - 'spec/canon/pretty_printer/xml_spec.rb'
304
228
 
305
- # Offense count: 29
229
+ # Offense count: 30
306
230
  # Configuration parameters: AllowedGroups.
307
231
  RSpec/NestedGroups:
308
232
  Max: 4
@@ -330,32 +254,14 @@ RSpec/SpecFilePathFormat:
330
254
  - 'spec/canon/yaml/formatter_spec.rb'
331
255
  - 'spec/xml_c14n_spec.rb'
332
256
 
333
- # Offense count: 68
257
+ # Offense count: 94
334
258
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
335
259
  RSpec/VerifiedDoubles:
336
260
  Exclude:
337
261
  - 'spec/canon/diff/diff_classifier_spec.rb'
262
+ - 'spec/canon/diff/path_builder_spec.rb'
338
263
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
339
264
 
340
- # Offense count: 1
341
- # This cop supports safe autocorrection (--autocorrect).
342
- # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
343
- # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
344
- # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
345
- # FunctionalMethods: let, let!, subject, watch
346
- # AllowedMethods: lambda, proc, it
347
- Style/BlockDelimiters:
348
- Exclude:
349
- - 'spec/canon/diff/diff_node_mapper_granularity_spec.rb'
350
-
351
- # Offense count: 1
352
- # This cop supports safe autocorrection (--autocorrect).
353
- # Configuration parameters: EnforcedStyle, AllowComments.
354
- # SupportedStyles: empty, nil, both
355
- Style/EmptyElse:
356
- Exclude:
357
- - 'lib/canon/diff_formatter/diff_detail_formatter.rb'
358
-
359
265
  # Offense count: 3
360
266
  # Configuration parameters: MinBranchesCount.
361
267
  Style/HashLikeCase:
@@ -376,19 +282,3 @@ Style/IdenticalConditionalBranches:
376
282
  Style/OptionalBooleanParameter:
377
283
  Exclude:
378
284
  - 'lib/canon/diff_formatter/debug_output.rb'
379
-
380
- # Offense count: 1
381
- # This cop supports unsafe autocorrection (--autocorrect-all).
382
- # Configuration parameters: Mode.
383
- Style/StringConcatenation:
384
- Exclude:
385
- - 'lib/canon/diff_formatter/diff_detail_formatter.rb'
386
-
387
- # Offense count: 6
388
- # This cop supports safe autocorrection (--autocorrect).
389
- # Configuration parameters: EnforcedStyleForMultiline.
390
- # SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
391
- Style/TrailingCommaInArguments:
392
- Exclude:
393
- - 'lib/canon/commands/diff_command.rb'
394
- - 'spec/canon/commands/diff_command_spec.rb'
data/README.adoc CHANGED
@@ -69,7 +69,7 @@ Canon::PrettyPrinter::Xml.new(indent: 2).format(xml_input)
69
69
  require 'canon/comparison'
70
70
 
71
71
  xml1 = '<root><a>1</a><b>2</b></root>'
72
- xml2 = '<root> <b>2</b> <a>1</a> </root>'
72
+ xml2 = '<root> <a>1</a> <b>2</b> </root>'
73
73
 
74
74
  Canon::Comparison.equivalent?(xml1, xml2)
75
75
  # => true (semantically equivalent despite formatting differences)
@@ -534,14 +534,14 @@ Each dimension can have one of three behaviors:
534
534
  ====
535
535
  ----
536
536
  # Default (strict mode): whitespace differences are normative
537
- xml1 = '<root><p>Hello world</p></root>'
538
- xml2 = '<root><p>Hello\nworld</p></root>'
537
+ xml1 = "<root><p>Hello world</p></root>"
538
+ xml2 = "<root><p>Hello \nworld</p></root>"
539
539
  Canon::Comparison.equivalent?(xml1, xml2) # => false
540
540
 
541
541
  # Normalize mode: whitespace-only differences are formatting-only
542
542
  Canon::Comparison.equivalent?(xml1, xml2,
543
- match: { text_content: :normalize, structural_whitespace: :normalize }
544
- ) # => true
543
+ match: { text_content: :normalize }
544
+ ) # => true
545
545
  ----
546
546
  ====
547
547
 
@@ -620,11 +620,11 @@ html1 = '<div><!-- comment --><p>Text</p></div>'
620
620
  html2 = '<div><p>Text</p></div>'
621
621
 
622
622
  # HTML defaults: comments are ignored (presentational)
623
- result = Canon::Comparison.equivalent?(html1, html2)
623
+ result = Canon::Comparison.equivalent?(html1, html2, format: :html)
624
624
  # => true (comments don't affect HTML equivalence by default)
625
625
 
626
626
  # Explicit strict matching
627
- result = Canon::Comparison.equivalent?(html1, html2,
627
+ result = Canon::Comparison.equivalent?(html1, html2, format: :html,
628
628
  match: { comments: :strict }
629
629
  )
630
630
  # => false (comments now affect equivalence)
@@ -638,17 +638,17 @@ don't affect the semantic meaning unless explicitly configured to `:strict`.
638
638
  ====
639
639
  [source,ruby]
640
640
  ----
641
- html1 = '<pre>Line 1\n Line 2</pre>'
642
- html2 = '<pre>Line 1\nLine 2</pre>'
641
+ html1 = "<pre>Line 1\n Line 2</pre>"
642
+ html2 = "<pre>Line 1\nLine 2</pre>"
643
643
 
644
644
  # Whitespace is preserved in <pre> elements
645
- result = Canon::Comparison.equivalent?(html1, html2)
645
+ result = Canon::Comparison::HtmlComparator.equivalent?(html1, html2)
646
646
  # => false (whitespace differs in pre element)
647
647
 
648
648
  # But normalized in other elements
649
- html3 = '<div>Text with spaces</div>'
650
- html4 = '<div>Text with spaces</div>'
651
- result = Canon::Comparison.equivalent?(html3, html4)
649
+ html3 = "<div>Text with\n spaces</div>"
650
+ html4 = "<div>Text with\n spaces</div>"
651
+ result = Canon::Comparison::HtmlComparator.equivalent?(html3, html4)
652
652
  # => true (whitespace normalized in regular elements)
653
653
  ----
654
654
 
@@ -0,0 +1,69 @@
1
+ # Lychee ignore patterns
2
+ # These link types are valid but lychee can't verify them
3
+
4
+ # Ignore all .adoc files (source files that don't exist in built site)
5
+ \.adoc$
6
+
7
+ # Ignore all .html files with inconsistent paths
8
+ \.html$
9
+
10
+ # Ignore all file:// URLs (link checker has inconsistent base URL handling for local files)
11
+ file://
12
+
13
+ # Ignore assets directory (CSS/JS files)
14
+ file://.*/assets/.*
15
+
16
+ # Ignore directory links with trailing slashes (web servers handle these with index.html)
17
+ file://.*/canon/$
18
+ file://.*/canon/.*/$
19
+ file://.*/docs/$
20
+ file://.*/docs/.*/$
21
+
22
+ # Ignore all /canon/* and /docs/* paths (may have inconsistent base URLs during link checking)
23
+ file://.*/canon/.*
24
+ file://.*/docs/.*
25
+
26
+ # Ignore asciidoc-style directory links without .html extension
27
+ file://.*/extending-canon$
28
+ file://.*/verbose-mode-architecture$
29
+ file://.*/cli/diff-command$
30
+ file://.*/cli/format-command$
31
+ file://.*/rspec/matchers$
32
+ file://.*/ruby-api/comparison$
33
+ file://.*/ruby-api/formatting$
34
+ file://.*/core-concepts$
35
+ file://.*/installation$
36
+ file://.*/quick-start$
37
+ file://.*/profiles$
38
+ file://.*/comparison-pipeline$
39
+ file://.*/migrating-from-xmldiff$
40
+ file://.*/performance-optimization$
41
+ file://.*/testing-xml-generation$
42
+ file://.*/debugging-test-failures$
43
+ file://.*/choosing-configuration$
44
+ file://.*/guides/choosing-configuration$
45
+ file://.*/environment-configuration/size-limits$
46
+ file://.*/environment-configuration/override-system$
47
+ file://.*/environment-configuration/input-validation$
48
+ file://.*/architecture$
49
+
50
+ # Ignore example domain links (these don't exist)
51
+ http://a\.com/.*
52
+ http://b\.com/.*
53
+ http://c\.com/.*
54
+ http://example\.com/.*
55
+ http://ns\.com/.*
56
+ http://shop\.com/.*
57
+ http://z\.com/.*
58
+
59
+ # Ignore GitHub-specific links
60
+ https://github\.com/[^/]+/[^/]+/edit/.*
61
+ https://github\.com/[^/]+/[^/]+/blob/.*
62
+
63
+ # Ignore mailto links
64
+ mailto:.*
65
+
66
+ # Ignore localhost links for development
67
+ http://localhost:.*
68
+ http://127\.0\.0\.1:.*
69
+ http://0\.0\.0\.0:.*
@@ -0,0 +1,193 @@
1
+ ---
2
+ title: Extending Canon
3
+ parent: Advanced
4
+ nav_order: 8
5
+ ---
6
+ = Extending Canon
7
+
8
+ == Purpose
9
+
10
+ This document explains how to extend Canon with custom functionality, including creating custom comparators, formatters, and adapters for different document formats.
11
+
12
+ == Overview
13
+
14
+ Canon is designed to be extensible at multiple layers:
15
+
16
+ * **Layer 1**: Custom preprocessing/normalization
17
+ * **Layer 2**: Custom comparison algorithms
18
+ * **Layer 3**: Custom match options and dimensions
19
+ * **Layer 4**: Custom diff formatters and renderers
20
+
21
+ == Adapter Pattern
22
+
23
+ Canon uses an adapter pattern to work with different parsing libraries (Nokogiri, Moxml, etc.).
24
+
25
+ === Adapter Structure
26
+
27
+ [source,ruby]
28
+ ----
29
+ module Canon
30
+ module Adapters
31
+ class NokogiriAdapter
32
+ def parse(input)
33
+ # Parse with Nokogiri
34
+ Nokogiri::XML(input)
35
+ end
36
+
37
+ def serialize(node)
38
+ # Serialize with Nokogiri
39
+ node.to_xml
40
+ end
41
+ end
42
+ end
43
+ end
44
+ ----
45
+
46
+ === Creating a Custom Adapter
47
+
48
+ To add support for a new parsing library:
49
+
50
+ 1. Create an adapter class that implements `parse` and `serialize` methods
51
+ 2. Register the adapter with the format system
52
+ 3. Add tests for the new adapter
53
+
54
+ == Custom Comparators
55
+
56
+ === Creating a Custom Comparison Algorithm
57
+
58
+ [source,ruby]
59
+ ----
60
+ module Canon
61
+ module Comparison
62
+ class CustomComparator < BaseComparator
63
+ def compare(node1, node2, opts)
64
+ # Your comparison logic here
65
+ differences = []
66
+
67
+ # Your algorithm implementation
68
+
69
+ differences
70
+ end
71
+ end
72
+ end
73
+ end
74
+ ----
75
+
76
+ === Registering Your Algorithm
77
+
78
+ [source,ruby]
79
+ ----
80
+ Canon::Comparison.register_algorithm(:custom, CustomComparator)
81
+ ----
82
+
83
+ Then use it:
84
+
85
+ [source,ruby]
86
+ ----
87
+ Canon::Comparison.equivalent?(doc1, doc2, diff_algorithm: :custom)
88
+ ----
89
+
90
+ == Custom Formatters
91
+
92
+ === Creating a Custom Diff Formatter
93
+
94
+ [source,ruby]
95
+ ----
96
+ module Canon
97
+ class DiffFormatter
98
+ class CustomFormatter
99
+ def format(differences, opts)
100
+ # Your formatting logic here
101
+ formatted_output = ""
102
+
103
+ differences.each do |diff|
104
+ formatted_output += format_difference(diff, opts)
105
+ end
106
+
107
+ formatted_output
108
+ end
109
+ end
110
+ end
111
+ end
112
+ ----
113
+
114
+ === Using Your Formatter
115
+
116
+ [source,ruby]
117
+ ----
118
+ result = Canon::Comparison.equivalent?(doc1, doc2, verbose: true)
119
+
120
+ formatter = Canon::DiffFormatter::CustomFormatter.new
121
+ output = formatter.format(result.differences, use_color: true)
122
+ puts output
123
+ ----
124
+
125
+ == Custom Match Options
126
+
127
+ === Defining Custom Dimensions
128
+
129
+ [source,ruby]
130
+ ----
131
+ module Canon
132
+ module Comparison
133
+ class CustomDimension
134
+ def self.key
135
+ :custom_dimension
136
+ end
137
+
138
+ def self.compare(node1, node2, behavior, opts)
139
+ # Your comparison logic for this dimension
140
+ case behavior
141
+ when :strict
142
+ node1 == node2
143
+ when :normalize
144
+ normalize(node1) == normalize(node2)
145
+ when :ignore
146
+ true
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+ ----
153
+
154
+ Register your dimension:
155
+
156
+ [source,ruby]
157
+ ----
158
+ Canon::Comparison.register_dimension(CustomDimension)
159
+ ----
160
+
161
+ == Best Practices
162
+
163
+ === Testing Your Extensions
164
+
165
+ 1. Write comprehensive tests for your extensions
166
+ 2. Use the existing test helpers and fixtures
167
+ 3. Test edge cases and error conditions
168
+
169
+ === Performance Considerations
170
+
171
+ 1. Cache expensive computations
172
+ 2. Use lazy evaluation where appropriate
173
+ 3. Avoid unnecessary node cloning
174
+
175
+ === Error Handling
176
+
177
+ 1. Provide clear error messages
178
+ 2. Use Canon's error classes consistently
179
+ 3. Document error conditions
180
+
181
+ == Examples
182
+
183
+ See the source code for examples of:
184
+
185
+ * link:xml-comparator[DOM Comparator implementation]
186
+ * link:semantic-tree-diff-internals[Semantic Diff implementation]
187
+ * link:diff-formatting/[Diff Formatter implementations]
188
+
189
+ == See Also
190
+
191
+ * link:../understanding/architecture.adoc[Architecture] - 4-layer architecture overview
192
+ * link:../features/diff-formatting/[Diff Formatting] - Layer 4 rendering options
193
+ * link:diff-pipeline[Comparison Pipeline] - Technical pipeline details