canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -20,6 +20,10 @@ module Canon
20
20
  format1 = @options[:format1] || @options[:format] || detect_format(file1)
21
21
  format2 = @options[:format2] || @options[:format] || detect_format(file2)
22
22
 
23
+ # Check file sizes before reading
24
+ check_file_size(file1, format1)
25
+ check_file_size(file2, format2)
26
+
23
27
  # Read raw content for potential by-line diff
24
28
  content1 = File.read(file1)
25
29
  content2 = File.read(file2)
@@ -48,25 +52,36 @@ module Canon
48
52
  mode: mode,
49
53
  context_lines: @options.fetch(:context_lines, 3),
50
54
  diff_grouping_lines: @options[:diff_grouping_lines],
55
+ show_diffs: @options[:show_diffs]&.to_sym || :all,
51
56
  )
52
- if comp_opts[:verbose]
53
- # result is always a ComparisonResult object
54
- output = formatter.format(
57
+
58
+ # Show configuration in verbose mode using shared DebugOutput
59
+ if @options[:verbose]
60
+ require_relative "../diff_formatter/debug_output"
61
+ config_output = Canon::DiffFormatter::DebugOutput.verbose_tables_only(
55
62
  result,
56
- format1,
57
- doc1: formatted1,
58
- doc2: formatted2,
63
+ {
64
+ use_color: @options[:color],
65
+ mode: mode,
66
+ context_lines: @options.fetch(:context_lines, 3),
67
+ diff_grouping_lines: @options[:diff_grouping_lines],
68
+ show_diffs: @options[:show_diffs]&.to_sym || :all,
69
+ verbose_diff: true, # Enable verbose table output
70
+ },
59
71
  )
60
- puts output
61
- exit result.equivalent? ? 0 : 1
62
- elsif result
63
- # result is a boolean
64
- puts formatter.send(:success_message)
65
- exit 0
66
- else
67
- puts "Files are semantically different"
68
- exit 1
72
+ puts config_output unless config_output.empty?
69
73
  end
74
+
75
+ # Always show diff when files are not equivalent
76
+ # result is always a ComparisonResult object when verbose: true
77
+ output = formatter.format(
78
+ result,
79
+ format1,
80
+ doc1: formatted1,
81
+ doc2: formatted2,
82
+ )
83
+ puts output
84
+ exit result.equivalent? ? 0 : 1
70
85
  rescue Errno::ENOENT => e
71
86
  abort "Error: #{e.message}"
72
87
  rescue JSON::ParserError => e
@@ -90,7 +105,12 @@ module Canon
90
105
 
91
106
  opts[:match] = match_opts unless match_opts.empty?
92
107
  opts[:ignore_attr_order] = @options.fetch(:ignore_attr_order, true)
93
- opts[:verbose] = @options.fetch(:verbose, false)
108
+ # Always request verbose comparison to get ComparisonResult with differences
109
+ # The CLI --verbose flag only affects output formatting, not comparison detail
110
+ opts[:verbose] = true
111
+
112
+ add_algorithm_option(opts)
113
+ add_show_diffs_option(opts)
94
114
 
95
115
  opts
96
116
  end
@@ -113,7 +133,7 @@ module Canon
113
133
  def build_match_dimension_options
114
134
  dimensions = %i[
115
135
  text_content structural_whitespace attribute_whitespace
116
- comments key_order
136
+ attribute_order attribute_values comments key_order
117
137
  ]
118
138
 
119
139
  dimensions.each_with_object({}) do |dim, opts|
@@ -121,16 +141,46 @@ module Canon
121
141
  end
122
142
  end
123
143
 
144
+ # Add show_diffs option to comparison options
145
+ # @param opts [Hash] Options hash to modify
146
+ def add_show_diffs_option(opts)
147
+ return unless @options[:show_diffs]
148
+
149
+ opts[:show_diffs] = @options[:show_diffs].to_sym
150
+ end
151
+
152
+ # Add diff_algorithm option to comparison options
153
+ # @param opts [Hash] Options hash to modify
154
+ def add_algorithm_option(opts)
155
+ opts[:diff_algorithm] = determine_algorithm
156
+ end
157
+
124
158
  # Determine diff mode based on format and options
125
159
  def determine_mode(format)
126
- # HTML always uses by-line mode
127
- return :by_line if format == :html
160
+ # Check for explicit --diff-mode flag (new approach)
161
+ if @options[:diff_mode]
162
+ return @options[:diff_mode].to_sym
163
+ end
128
164
 
129
- # Check for explicit --by-line flag for XML, JSON, YAML
130
- return :by_line if @options[:by_line]
165
+ # Backward compatibility: check --by-line flag (deprecated)
166
+ if @options[:by_line]
167
+ warn "WARNING: --by-line is deprecated. Use --diff-mode by_line instead."
168
+ return :by_line
169
+ end
170
+
171
+ # Format-specific defaults
172
+ case format
173
+ when :html
174
+ :by_line
175
+ else
176
+ :by_object
177
+ end
178
+ end
131
179
 
132
- # Default: by-object mode for JSON and YAML, by-object for XML
133
- :by_object
180
+ # Determine diff algorithm based on options
181
+ def determine_algorithm
182
+ algo = @options[:diff_algorithm] || "dom"
183
+ algo.to_sym
134
184
  end
135
185
 
136
186
  # Parse document content based on its format
@@ -190,6 +240,41 @@ module Canon
190
240
  "Please specify --format (xml, html, json, or yaml)"
191
241
  end
192
242
  end
243
+
244
+ # Check if file size exceeds configured limit
245
+ #
246
+ # @param filename [String] Path to file
247
+ # @param format [Symbol] File format
248
+ # @raise [Canon::SizeLimitExceededError] if file exceeds limit
249
+ def check_file_size(filename, format)
250
+ file_size = File.size(filename)
251
+ max_size = get_max_file_size(format)
252
+
253
+ return unless max_size&.positive?
254
+ return if file_size <= max_size
255
+
256
+ raise Canon::SizeLimitExceededError.new(:file_size, file_size, max_size)
257
+ end
258
+
259
+ # Get max file size limit for format
260
+ #
261
+ # @param format [Symbol] File format
262
+ # @return [Integer, nil] Max file size in bytes
263
+ def get_max_file_size(format)
264
+ config = Canon::Config.instance
265
+ case format
266
+ when :xml
267
+ config.xml.diff.max_file_size
268
+ when :html
269
+ config.html.diff.max_file_size
270
+ when :json
271
+ config.json.diff.max_file_size
272
+ when :yaml
273
+ config.yaml.diff.max_file_size
274
+ else
275
+ 5_242_880 # Default 5MB
276
+ end
277
+ end
193
278
  end
194
279
  end
195
280
  end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # CompareProfile encapsulates the policy decisions about how differences
6
+ # in various dimensions should be handled during comparison
7
+ #
8
+ # This class provides separation of concerns:
9
+ # - CompareProfile: Policy decisions (what to track, what affects equivalence)
10
+ # - Comparator: Comparison logic (detect differences)
11
+ # - DiffClassifier: Classification logic (normative vs informative vs formatting)
12
+ class CompareProfile
13
+ attr_reader :match_options
14
+
15
+ # @param match_options [ResolvedMatchOptions, Hash] The match options to use
16
+ def initialize(match_options)
17
+ @match_options = match_options
18
+ end
19
+
20
+ # Should DiffNodes be created for differences in this dimension?
21
+ #
22
+ # In verbose mode, we want to track ALL differences for reporting.
23
+ # In non-verbose mode, we only need to track normative differences.
24
+ #
25
+ # @param dimension [Symbol] The match dimension to check
26
+ # @return [Boolean] true if differences should be tracked
27
+ def track_dimension?(_dimension)
28
+ # Always track dimensions that affect equivalence
29
+ # In verbose mode, also track informative dimensions
30
+ true
31
+ end
32
+
33
+ # Should differences in this dimension affect equivalence?
34
+ #
35
+ # This determines the return value of the comparison:
36
+ # - true: differences make documents non-equivalent
37
+ # - false: differences are informative only
38
+ #
39
+ # @param dimension [Symbol] The match dimension to check
40
+ # @return [Boolean] true if differences affect equivalence
41
+ def affects_equivalence?(dimension)
42
+ behavior = behavior_for(dimension)
43
+
44
+ # :strict → affects equivalence
45
+ # :normalize → might affect (if normalization fails)
46
+ # :ignore → does NOT affect equivalence
47
+ behavior != :ignore
48
+ end
49
+
50
+ # Is a difference in this dimension normative (affects equivalence)?
51
+ #
52
+ # This is used by DiffClassifier to determine the normative flag.
53
+ #
54
+ # @param dimension [Symbol] The match dimension to check
55
+ # @return [Boolean] true if normative, false if informative
56
+ def normative_dimension?(dimension)
57
+ # Element structure changes are ALWAYS normative
58
+ return true if dimension == :element_structure
59
+
60
+ # Structural whitespace with :normalize or :ignore behavior is INFORMATIVE
61
+ # Only :strict mode makes whitespace normative
62
+ if dimension == :structural_whitespace
63
+ behavior = behavior_for(dimension)
64
+ return behavior == :strict
65
+ end
66
+
67
+ # For other dimensions, if behavior affects equivalence, it's normative
68
+ affects_equivalence?(dimension)
69
+ end
70
+
71
+ # Can a difference in this dimension be formatting-only?
72
+ #
73
+ # This determines whether FormattingDetector should be applied.
74
+ # Only text/content dimensions can have formatting-only differences.
75
+ #
76
+ # @param dimension [Symbol] The match dimension to check
77
+ # @return [Boolean] true if formatting detection should apply
78
+ def supports_formatting_detection?(dimension)
79
+ # Only text_content and structural_whitespace can have formatting-only diffs
80
+ # Comments are policy-based (strict/ignore), not formatting-based
81
+ %i[text_content structural_whitespace].include?(dimension)
82
+ end
83
+
84
+ private
85
+
86
+ # Get the behavior setting for a dimension
87
+ # @param dimension [Symbol] The match dimension
88
+ # @return [Symbol] The behavior (:strict, :normalize, :ignore)
89
+ def behavior_for(dimension)
90
+ # Handle both ResolvedMatchOptions and Hash
91
+ if match_options.respond_to?(:behavior_for)
92
+ match_options.behavior_for(dimension)
93
+ elsif match_options.is_a?(Hash)
94
+ match_options[dimension] || :strict
95
+ else
96
+ :strict
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -6,20 +6,24 @@ module Canon
6
6
  # Provides methods to query equivalence based on normative diffs
7
7
  class ComparisonResult
8
8
  attr_reader :differences, :preprocessed_strings, :format, :html_version,
9
- :match_options
9
+ :match_options, :algorithm, :original_strings
10
10
 
11
11
  # @param differences [Array<DiffNode>] Array of difference nodes
12
12
  # @param preprocessed_strings [Array<String, String>] Pre-processed content for display
13
13
  # @param format [Symbol] Format type (:xml, :html, :json, :yaml)
14
14
  # @param html_version [Symbol, nil] HTML version (:html4 or :html5) for HTML format only
15
15
  # @param match_options [Hash, nil] Resolved match options used for comparison
16
+ # @param algorithm [Symbol] Diff algorithm used (:dom or :semantic)
17
+ # @param original_strings [Array<String, String>, nil] Original unprocessed content for line diff
16
18
  def initialize(differences:, preprocessed_strings:, format:,
17
- html_version: nil, match_options: nil)
19
+ html_version: nil, match_options: nil, algorithm: :dom, original_strings: nil)
18
20
  @differences = differences
19
21
  @preprocessed_strings = preprocessed_strings
22
+ @original_strings = original_strings || preprocessed_strings
20
23
  @format = format
21
24
  @html_version = html_version
22
25
  @match_options = match_options
26
+ @algorithm = algorithm
23
27
  end
24
28
 
25
29
  # Check if documents are semantically equivalent (no normative diffs)
@@ -74,6 +78,41 @@ html_version: nil, match_options: nil)
74
78
  diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
75
79
  end
76
80
  end
81
+
82
+ # Get tree diff operations (only available when diff_algorithm: :semantic)
83
+ #
84
+ # @return [Array<Operation>] Array of tree diff operations
85
+ def operations
86
+ @match_options&.[](:tree_diff_operations) || []
87
+ end
88
+
89
+ # Generate formatted diff output
90
+ #
91
+ # @param use_color [Boolean] Whether to use ANSI color codes
92
+ # @param context_lines [Integer] Number of context lines to show
93
+ # @param diff_grouping_lines [Integer] Maximum gap for grouping diffs
94
+ # @param show_diffs [Symbol] Which diffs to show (:all, :normative, :informative)
95
+ # @return [String] Formatted diff output
96
+ def diff(use_color: true, context_lines: 3, diff_grouping_lines: nil,
97
+ show_diffs: :all)
98
+ require_relative "../diff_formatter"
99
+
100
+ formatter = Canon::DiffFormatter.new(
101
+ use_color: use_color,
102
+ mode: :by_line,
103
+ context_lines: context_lines,
104
+ diff_grouping_lines: diff_grouping_lines,
105
+ show_diffs: show_diffs,
106
+ )
107
+
108
+ formatter.format(
109
+ @differences,
110
+ @format,
111
+ doc1: @original_strings[0],
112
+ doc2: @original_strings[1],
113
+ html_version: @html_version,
114
+ )
115
+ end
77
116
  end
78
117
  end
79
118
  end