canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "moxml"
4
+ require "nokogiri"
5
+ require_relative "xml/whitespace_normalizer"
6
+ require_relative "comparison/xml_comparator"
7
+ require_relative "comparison/html_comparator"
8
+ require_relative "comparison/json_comparator"
9
+ require_relative "comparison/yaml_comparator"
10
+ require_relative "diff/diff_node_mapper"
11
+ require_relative "diff/diff_line"
12
+ require_relative "diff/diff_block_builder"
13
+ require_relative "diff/diff_context_builder"
14
+ require_relative "diff/diff_report_builder"
15
+
16
+ module Canon
17
+ # Comparison module for XML, HTML, JSON, and YAML documents
18
+ #
19
+ # This module provides a unified comparison API for multiple serialization formats.
20
+ # It auto-detects the format and delegates to specialized comparators while
21
+ # maintaining a CompareXML-compatible API.
22
+ #
23
+ # == Supported Formats
24
+ #
25
+ # - **XML**: Uses Moxml for parsing, supports namespaces
26
+ # - **HTML**: Uses Nokogiri, handles HTML4/HTML5 differences
27
+ # - **JSON**: Direct Ruby object comparison with deep equality
28
+ # - **YAML**: Parses to Ruby objects, compares semantically
29
+ #
30
+ # == Format Detection
31
+ #
32
+ # The module automatically detects format from:
33
+ # - Object type (Moxml::Node, Nokogiri::HTML::Document, Hash, Array)
34
+ # - String content (DOCTYPE, opening tags, YAML/JSON syntax)
35
+ #
36
+ # == Comparison Options
37
+ #
38
+ # Common options across all formats:
39
+ # - collapse_whitespace: Normalize whitespace in text (default: true)
40
+ # - ignore_attr_order: Ignore attribute/key ordering (default: true)
41
+ # - ignore_comments: Skip comment nodes (default: true)
42
+ # - ignore_text_nodes: Skip all text content (default: false)
43
+ # - ignore_children: Skip child nodes (default: false)
44
+ # - verbose: Return detailed diff array (default: false)
45
+ #
46
+ # == Usage Examples
47
+ #
48
+ # # XML comparison
49
+ # Canon::Comparison.equivalent?(xml1, xml2)
50
+ # Canon::Comparison.equivalent?(xml1, xml2, verbose: true)
51
+ #
52
+ # # HTML comparison
53
+ # Canon::Comparison.equivalent?(html1, html2, ignore_comments: true)
54
+ #
55
+ # # JSON comparison
56
+ # Canon::Comparison.equivalent?(json1, json2)
57
+ # Canon::Comparison.equivalent?(hash1, hash2) # Pre-parsed objects
58
+ #
59
+ # # With detailed output
60
+ # diffs = Canon::Comparison.equivalent?(doc1, doc2, verbose: true)
61
+ # diffs.each { |diff| puts diff.inspect }
62
+ #
63
+ # == Return Values
64
+ #
65
+ # - When verbose: false (default) → Boolean (true if equivalent)
66
+ # - When verbose: true → Array of difference hashes with details
67
+ #
68
+ # == Difference Hash Format
69
+ #
70
+ # Each difference contains:
71
+ # - node1, node2: The nodes being compared (XML/HTML)
72
+ # - diff1, diff2: Comparison result codes
73
+ # - OR for JSON/YAML:
74
+ # - path: String path to the difference (e.g., "user.address.city")
75
+ # - value1, value2: The differing values
76
+ # - diff_code: Type of difference
77
+ #
78
+ module Comparison
79
+ # Comparison result constants
80
+ EQUIVALENT = 1
81
+ MISSING_ATTRIBUTE = 2
82
+ MISSING_NODE = 3
83
+ UNEQUAL_ATTRIBUTES = 4
84
+ UNEQUAL_COMMENTS = 5
85
+ UNEQUAL_DOCUMENTS = 6
86
+ UNEQUAL_ELEMENTS = 7
87
+ UNEQUAL_NODES_TYPES = 8
88
+ UNEQUAL_TEXT_CONTENTS = 9
89
+ MISSING_HASH_KEY = 10
90
+ UNEQUAL_HASH_VALUES = 11
91
+ UNEQUAL_ARRAY_LENGTHS = 12
92
+ UNEQUAL_ARRAY_ELEMENTS = 13
93
+ UNEQUAL_TYPES = 14
94
+ UNEQUAL_PRIMITIVES = 15
95
+
96
+ class << self
97
+ # Auto-detect format and compare two objects
98
+ #
99
+ # @param obj1 [Object] First object to compare
100
+ # @param obj2 [Object] Second object to compare
101
+ # @param opts [Hash] Comparison options
102
+ # - :format - Format hint (:xml, :html, :html4, :html5, :json, :yaml, :string)
103
+ # @return [Boolean, Array] true if equivalent, or array of diffs if verbose
104
+ def equivalent?(obj1, obj2, opts = {})
105
+ # Use format hint if provided
106
+ if opts[:format]
107
+ format1 = format2 = opts[:format]
108
+ # Parse HTML strings if format is html/html4/html5
109
+ if %i[html html4 html5].include?(opts[:format])
110
+ obj1 = parse_html(obj1, opts[:format]) if obj1.is_a?(String)
111
+ obj2 = parse_html(obj2, opts[:format]) if obj2.is_a?(String)
112
+ # Normalize html4/html5 to html for comparison
113
+ format1 = format2 = :html
114
+ end
115
+ else
116
+ format1 = detect_format(obj1)
117
+ format2 = detect_format(obj2)
118
+ end
119
+
120
+ # Handle string format (plain text comparison)
121
+ if format1 == :string
122
+ if opts[:verbose]
123
+ return obj1.to_s == obj2.to_s ? [] : [:different]
124
+ else
125
+ return obj1.to_s == obj2.to_s
126
+ end
127
+ end
128
+
129
+ # Allow comparing json/yaml strings with ruby objects
130
+ # since they parse to the same structure
131
+ formats_compatible = format1 == format2 ||
132
+ (%i[json ruby_object].include?(format1) &&
133
+ %i[json ruby_object].include?(format2)) ||
134
+ (%i[yaml ruby_object].include?(format1) &&
135
+ %i[yaml ruby_object].include?(format2))
136
+
137
+ unless formats_compatible
138
+ raise Canon::CompareFormatMismatchError.new(format1, format2)
139
+ end
140
+
141
+ # Normalize format for comparison
142
+ comparison_format = case format1
143
+ when :ruby_object
144
+ # If comparing ruby_object with json/yaml, use that format
145
+ %i[json yaml].include?(format2) ? format2 : :json
146
+ else
147
+ format1
148
+ end
149
+
150
+ case comparison_format
151
+ when :xml
152
+ XmlComparator.equivalent?(obj1, obj2, opts)
153
+ when :html
154
+ HtmlComparator.equivalent?(obj1, obj2, opts)
155
+ when :json
156
+ JsonComparator.equivalent?(obj1, obj2, opts)
157
+ when :yaml
158
+ YamlComparator.equivalent?(obj1, obj2, opts)
159
+ end
160
+ end
161
+
162
+ private
163
+
164
+ # Parse HTML string into Nokogiri document
165
+ #
166
+ # @param content [String, Object] Content to parse (returns as-is if not a string)
167
+ # @param format [Symbol] HTML format (:html, :html4, :html5)
168
+ # @return [Nokogiri::HTML::Document, Nokogiri::HTML5::Document, Nokogiri::HTML::DocumentFragment, Object]
169
+ def parse_html(content, _format)
170
+ return content unless content.is_a?(String)
171
+ return content if content.is_a?(Nokogiri::HTML::Document) ||
172
+ content.is_a?(Nokogiri::HTML5::Document) ||
173
+ content.is_a?(Nokogiri::XML::Document) ||
174
+ content.is_a?(Nokogiri::HTML::DocumentFragment) ||
175
+ content.is_a?(Nokogiri::HTML5::DocumentFragment) ||
176
+ content.is_a?(Nokogiri::XML::DocumentFragment)
177
+
178
+ # Let HtmlComparator's parse_node handle parsing with preprocessing
179
+ # For now, just return the string and let it be parsed by HtmlComparator
180
+ content
181
+ rescue StandardError
182
+ content
183
+ end
184
+
185
+ # Detect the format of an object
186
+ #
187
+ # @param obj [Object] Object to detect format of
188
+ # @return [Symbol] Format type
189
+ def detect_format(obj)
190
+ case obj
191
+ when Moxml::Node, Moxml::Document
192
+ :xml
193
+ when Nokogiri::HTML::DocumentFragment, Nokogiri::HTML5::DocumentFragment
194
+ # HTML DocumentFragments
195
+ :html
196
+ when Nokogiri::XML::DocumentFragment
197
+ # XML DocumentFragments - check if it's actually HTML
198
+ obj.document&.html? ? :html : :xml
199
+ when Nokogiri::XML::Document, Nokogiri::XML::Node
200
+ # Check if it's HTML by looking at the document type
201
+ obj.html? ? :html : :xml
202
+ when Nokogiri::HTML::Document, Nokogiri::HTML5::Document
203
+ :html
204
+ when String
205
+ detect_string_format(obj)
206
+ when Hash, Array
207
+ # Raw Ruby objects (from parsed JSON/YAML)
208
+ :ruby_object
209
+ else
210
+ raise Canon::Error, "Unknown format for object: #{obj.class}"
211
+ end
212
+ end
213
+
214
+ # Detect the format of a string
215
+ #
216
+ # @param str [String] String to detect format of
217
+ # @return [Symbol] Format type
218
+ def detect_string_format(str)
219
+ trimmed = str.strip
220
+
221
+ # YAML indicators
222
+ return :yaml if trimmed.start_with?("---")
223
+ return :yaml if trimmed.match?(/^[a-zA-Z_]\w*:\s/)
224
+
225
+ # JSON indicators
226
+ return :json if trimmed.start_with?("{", "[")
227
+
228
+ # HTML indicators
229
+ return :html if trimmed.start_with?("<!DOCTYPE html", "<html", "<HTML")
230
+
231
+ # XML indicators - must start with < and end with >
232
+ return :xml if trimmed.start_with?("<") && trimmed.end_with?(">")
233
+
234
+ # Default to plain string for everything else
235
+ :string
236
+ end
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Global configuration for Canon
5
+ # Provides unified configuration across CLI, Ruby API, and RSpec interfaces
6
+ class Config
7
+ class << self
8
+ def instance
9
+ @instance ||= new
10
+ end
11
+
12
+ def configure
13
+ yield instance if block_given?
14
+ instance
15
+ end
16
+
17
+ def reset!
18
+ @instance = new
19
+ end
20
+
21
+ # Delegate to instance
22
+ def method_missing(method, ...)
23
+ if @instance.respond_to?(method)
24
+ @instance.send(method, ...)
25
+ else
26
+ super
27
+ end
28
+ end
29
+
30
+ def respond_to_missing?(method, include_private = false)
31
+ @instance.respond_to?(method) || super
32
+ end
33
+ end
34
+
35
+ attr_reader :xml, :html, :json, :yaml, :string
36
+
37
+ def initialize
38
+ @xml = FormatConfig.new(:xml)
39
+ @html = FormatConfig.new(:html)
40
+ @json = FormatConfig.new(:json)
41
+ @yaml = FormatConfig.new(:yaml)
42
+ @string = FormatConfig.new(:string)
43
+ end
44
+
45
+ def reset!
46
+ @xml.reset!
47
+ @html.reset!
48
+ @json.reset!
49
+ @yaml.reset!
50
+ @string.reset!
51
+ end
52
+
53
+ # Backward compatibility methods for top-level diff configuration
54
+ # These delegate to XML diff config for backward compatibility
55
+ def diff_mode
56
+ @xml.diff.mode
57
+ end
58
+
59
+ def diff_mode=(value)
60
+ @xml.diff.mode = value
61
+ end
62
+
63
+ def use_color
64
+ @xml.diff.use_color
65
+ end
66
+
67
+ def use_color=(value)
68
+ @xml.diff.use_color = value
69
+ end
70
+
71
+ # Backward compatibility methods for match profile configuration
72
+ def xml_match_profile
73
+ @xml.match.profile
74
+ end
75
+
76
+ def xml_match_profile=(value)
77
+ @xml.match.profile = value
78
+ end
79
+
80
+ def html_match_profile
81
+ @html.match.profile
82
+ end
83
+
84
+ def html_match_profile=(value)
85
+ @html.match.profile = value
86
+ end
87
+
88
+ # Format-specific configuration
89
+ # Each format (XML, HTML, JSON, YAML) has its own instance
90
+ class FormatConfig
91
+ attr_reader :format, :match, :diff
92
+ attr_accessor :preprocessing
93
+
94
+ def initialize(format)
95
+ @format = format
96
+ @match = MatchConfig.new
97
+ @diff = DiffConfig.new
98
+ @preprocessing = nil
99
+ end
100
+
101
+ def reset!
102
+ @match.reset!
103
+ @diff.reset!
104
+ @preprocessing = nil
105
+ end
106
+ end
107
+
108
+ # Match configuration for comparison behavior
109
+ class MatchConfig
110
+ attr_accessor :profile
111
+ attr_reader :options
112
+
113
+ def initialize
114
+ @profile = nil
115
+ @options = {}
116
+ end
117
+
118
+ def options=(value)
119
+ @options = value || {}
120
+ end
121
+
122
+ def reset!
123
+ @profile = nil
124
+ @options = {}
125
+ end
126
+
127
+ # Build match options from profile and options
128
+ def to_h
129
+ result = {}
130
+ result[:match_profile] = @profile if @profile
131
+ result[:match] = @options if @options && !@options.empty?
132
+ result
133
+ end
134
+ end
135
+
136
+ # Diff configuration for output formatting
137
+ class DiffConfig
138
+ attr_accessor :mode, :use_color, :context_lines, :grouping_lines,
139
+ :show_diffs, :verbose_diff
140
+
141
+ def initialize
142
+ @mode = :by_line
143
+ @use_color = true
144
+ @context_lines = 3
145
+ @grouping_lines = 10
146
+ @show_diffs = :all
147
+ @verbose_diff = false
148
+ end
149
+
150
+ def reset!
151
+ @mode = :by_line
152
+ @use_color = true
153
+ @context_lines = 3
154
+ @grouping_lines = 10
155
+ @show_diffs = :all
156
+ @verbose_diff = false
157
+ end
158
+
159
+ # Build diff options
160
+ def to_h
161
+ {
162
+ diff: @mode,
163
+ use_color: @use_color,
164
+ context_lines: @context_lines,
165
+ grouping_lines: @grouping_lines,
166
+ show_diffs: @show_diffs,
167
+ verbose_diff: @verbose_diff,
168
+ }
169
+ end
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Diff
5
+ # Represents a contiguous block of changes in a diff
6
+ # A diff block is a run of consecutive change lines (-, +, !)
7
+ class DiffBlock
8
+ attr_reader :start_idx, :end_idx, :types, :diff_lines, :diff_node
9
+ attr_accessor :normative
10
+
11
+ def initialize(start_idx:, end_idx:, types: [], diff_lines: [],
12
+ diff_node: nil)
13
+ @start_idx = start_idx
14
+ @end_idx = end_idx
15
+ @types = types
16
+ @diff_lines = diff_lines
17
+ @diff_node = diff_node
18
+ @normative = nil
19
+ end
20
+
21
+ # Number of lines in this block
22
+ def size
23
+ end_idx - start_idx + 1
24
+ end
25
+
26
+ # @return [Boolean] true if this block represents a normative difference
27
+ def normative?
28
+ return @normative unless @normative.nil?
29
+
30
+ # If we have a diff_node, use its normative status
31
+ return diff_node.normative? if diff_node
32
+
33
+ # If we have diff_lines, check if any are normative
34
+ return diff_lines.any?(&:normative?) if diff_lines&.any?
35
+
36
+ # Default to true (treat as normative if we can't determine)
37
+ true
38
+ end
39
+
40
+ # @return [Boolean] true if this block represents an informative-only difference
41
+ def informative?
42
+ !normative?
43
+ end
44
+
45
+ # Check if this block contains a specific type of change
46
+ def includes_type?(type)
47
+ types.include?(type)
48
+ end
49
+
50
+ def to_h
51
+ {
52
+ start_idx: start_idx,
53
+ end_idx: end_idx,
54
+ types: types,
55
+ diff_lines: diff_lines.map(&:to_h),
56
+ diff_node: diff_node&.to_h,
57
+ normative: normative?,
58
+ }
59
+ end
60
+
61
+ def ==(other)
62
+ other.is_a?(DiffBlock) &&
63
+ start_idx == other.start_idx &&
64
+ end_idx == other.end_idx &&
65
+ types == other.types &&
66
+ diff_lines == other.diff_lines &&
67
+ diff_node == other.diff_node
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "diff_block"
4
+
5
+ module Canon
6
+ module Diff
7
+ # Builds DiffBlocks from DiffLines
8
+ # Handles grouping of contiguous changed lines and filtering by normative/informative
9
+ class DiffBlockBuilder
10
+ # Build diff blocks from diff lines
11
+ #
12
+ # @param diff_lines [Array<DiffLine>] The diff lines to process
13
+ # @param show_diffs [Symbol] Filter setting (:normative, :informative, :all)
14
+ # @return [Array<DiffBlock>] Filtered diff blocks
15
+ def self.build_blocks(diff_lines, show_diffs: :all)
16
+ new(diff_lines, show_diffs).build
17
+ end
18
+
19
+ def initialize(diff_lines, show_diffs)
20
+ @diff_lines = diff_lines
21
+ @show_diffs = show_diffs
22
+ end
23
+
24
+ def build
25
+ # Group contiguous changed lines into blocks
26
+ blocks = group_into_blocks
27
+
28
+ # Filter blocks based on show_diffs setting
29
+ filter_blocks(blocks)
30
+ end
31
+
32
+ private
33
+
34
+ # Group contiguous changed lines into DiffBlock objects
35
+ def group_into_blocks
36
+ blocks = []
37
+ current_block_lines = []
38
+ current_start_idx = nil
39
+
40
+ @diff_lines.each_with_index do |line, idx|
41
+ if line.unchanged?
42
+ # End current block if any
43
+ if !current_block_lines.empty?
44
+ blocks << create_block(current_start_idx, idx - 1,
45
+ current_block_lines)
46
+ current_block_lines = []
47
+ current_start_idx = nil
48
+ end
49
+ else
50
+ # Start or continue block
51
+ current_start_idx = idx if current_start_idx.nil?
52
+ current_block_lines << line
53
+ end
54
+ end
55
+
56
+ # Don't forget last block
57
+ unless current_block_lines.empty?
58
+ blocks << create_block(current_start_idx,
59
+ @diff_lines.length - 1,
60
+ current_block_lines)
61
+ end
62
+
63
+ blocks
64
+ end
65
+
66
+ # Create a DiffBlock from lines
67
+ def create_block(start_idx, end_idx, diff_lines)
68
+ # Determine types from diff_lines
69
+ types = diff_lines.map(&:type).uniq.map do |t|
70
+ case t
71
+ when :added then "+"
72
+ when :removed then "-"
73
+ when :changed then "!"
74
+ end
75
+ end.compact
76
+
77
+ # Create block
78
+ block = DiffBlock.new(
79
+ start_idx: start_idx,
80
+ end_idx: end_idx,
81
+ types: types,
82
+ diff_lines: diff_lines,
83
+ )
84
+
85
+ # Determine if block is normative
86
+ # A block is normative if ANY of its lines are normative
87
+ block.normative = diff_lines.any?(&:normative?)
88
+
89
+ block
90
+ end
91
+
92
+ # Filter blocks based on show_diffs setting
93
+ def filter_blocks(blocks)
94
+ case @show_diffs
95
+ when :normative
96
+ blocks.select(&:normative?)
97
+ when :informative
98
+ blocks.select(&:informative?)
99
+ else # :all
100
+ blocks
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Diff
5
+ # Classifies DiffNodes as normative (affects equivalence) or informative (doesn't affect equivalence)
6
+ # based on the match options in effect
7
+ class DiffClassifier
8
+ attr_reader :match_options
9
+
10
+ # @param match_options [Canon::Comparison::ResolvedMatchOptions] The match options
11
+ def initialize(match_options)
12
+ @match_options = match_options
13
+ end
14
+
15
+ # Classify a single DiffNode as normative or informative
16
+ # @param diff_node [DiffNode] The diff node to classify
17
+ # @return [DiffNode] The same diff node with normative attribute set
18
+ def classify(diff_node)
19
+ diff_node.normative = normative_for_dimension?(diff_node.dimension)
20
+ diff_node
21
+ end
22
+
23
+ # Classify multiple DiffNodes
24
+ # @param diff_nodes [Array<DiffNode>] The diff nodes to classify
25
+ # @return [Array<DiffNode>] The same diff nodes with normative attributes set
26
+ def classify_all(diff_nodes)
27
+ diff_nodes.each { |node| classify(node) }
28
+ end
29
+
30
+ private
31
+
32
+ # Determine if a difference in a given dimension is normative
33
+ # @param dimension [Symbol] The match dimension
34
+ # @return [Boolean] true if differences in this dimension are normative
35
+ def normative_for_dimension?(dimension)
36
+ behavior = match_options.behavior_for(dimension)
37
+
38
+ # :ignore → informative (difference doesn't matter)
39
+ # :strict or :normalize → normative (difference persisted through matching)
40
+ # Note: If a DiffNode exists, it means the comparison FAILED even after
41
+ # applying normalization, so it's a real (normative) difference
42
+ behavior != :ignore
43
+ end
44
+ end
45
+ end
46
+ end