canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../comparison"
4
+ require_relative "../diff_formatter"
5
+ require "json"
6
+ require "yaml"
7
+
8
+ module Canon
9
+ module Commands
10
+ # Command for semantic diffing of two files
11
+ class DiffCommand
12
+ def initialize(options = {})
13
+ @options = options
14
+ end
15
+
16
+ # rubocop:disable Metrics/MethodLength
17
+ # rubocop:disable Metrics/AbcSize
18
+ def run(file1, file2)
19
+ # Detect formats
20
+ format1 = @options[:format1] || @options[:format] || detect_format(file1)
21
+ format2 = @options[:format2] || @options[:format] || detect_format(file2)
22
+
23
+ # Read raw content for potential by-line diff
24
+ content1 = File.read(file1)
25
+ content2 = File.read(file2)
26
+
27
+ # Parse documents
28
+ doc1 = parse_document_content(content1, format1)
29
+ doc2 = parse_document_content(content2, format2)
30
+
31
+ # Build comparison options
32
+ comp_opts = build_comparison_options
33
+
34
+ # Perform semantic comparison
35
+ result = Canon::Comparison.equivalent?(doc1, doc2, comp_opts)
36
+
37
+ # Determine diff mode
38
+ mode = determine_mode(format1)
39
+
40
+ # Prepare formatted content for by-line mode
41
+ formatted1, formatted2 = prepare_formatted_content(
42
+ content1, content2, format1, mode
43
+ )
44
+
45
+ # Format and output results
46
+ formatter = Canon::DiffFormatter.new(
47
+ use_color: @options[:color],
48
+ mode: mode,
49
+ context_lines: @options.fetch(:context_lines, 3),
50
+ diff_grouping_lines: @options[:diff_grouping_lines],
51
+ )
52
+ if comp_opts[:verbose]
53
+ # result is always a ComparisonResult object
54
+ output = formatter.format(
55
+ result,
56
+ format1,
57
+ doc1: formatted1,
58
+ doc2: formatted2,
59
+ )
60
+ puts output
61
+ exit result.equivalent? ? 0 : 1
62
+ elsif result
63
+ # result is a boolean
64
+ puts formatter.send(:success_message)
65
+ exit 0
66
+ else
67
+ puts "Files are semantically different"
68
+ exit 1
69
+ end
70
+ rescue Errno::ENOENT => e
71
+ abort "Error: #{e.message}"
72
+ rescue JSON::ParserError => e
73
+ abort "Error parsing JSON: #{e.message}"
74
+ rescue Psych::SyntaxError => e
75
+ abort "Error parsing YAML: #{e.message}"
76
+ rescue Canon::Error => e
77
+ abort "Error: #{e.message}"
78
+ rescue StandardError => e
79
+ abort "Error processing files: #{e.message}"
80
+ end
81
+ # rubocop:enable Metrics/AbcSize
82
+ # rubocop:enable Metrics/MethodLength
83
+
84
+ private
85
+
86
+ # Build comparison options from CLI options
87
+ def build_comparison_options
88
+ opts = build_profile_and_preprocessing_options
89
+ match_opts = build_match_dimension_options
90
+
91
+ opts[:match] = match_opts unless match_opts.empty?
92
+ opts[:ignore_attr_order] = @options.fetch(:ignore_attr_order, true)
93
+ opts[:verbose] = @options.fetch(:verbose, false)
94
+
95
+ opts
96
+ end
97
+
98
+ # Build profile and preprocessing options
99
+ def build_profile_and_preprocessing_options
100
+ opts = {}
101
+ if @options[:match_profile]
102
+ opts[:match_profile] =
103
+ @options[:match_profile].to_sym
104
+ end
105
+ if @options[:preprocessing]
106
+ opts[:preprocessing] =
107
+ @options[:preprocessing].to_sym
108
+ end
109
+ opts
110
+ end
111
+
112
+ # Build match dimension options
113
+ def build_match_dimension_options
114
+ dimensions = %i[
115
+ text_content structural_whitespace attribute_whitespace
116
+ comments key_order
117
+ ]
118
+
119
+ dimensions.each_with_object({}) do |dim, opts|
120
+ opts[dim] = @options[dim].to_sym if @options[dim]
121
+ end
122
+ end
123
+
124
+ # Determine diff mode based on format and options
125
+ def determine_mode(format)
126
+ # HTML always uses by-line mode
127
+ return :by_line if format == :html
128
+
129
+ # Check for explicit --by-line flag for XML, JSON, YAML
130
+ return :by_line if @options[:by_line]
131
+
132
+ # Default: by-object mode for JSON and YAML, by-object for XML
133
+ :by_object
134
+ end
135
+
136
+ # Parse document content based on its format
137
+ def parse_document_content(content, format)
138
+ case format
139
+ when :xml
140
+ # Return string for Canon::Comparison to parse
141
+ content
142
+ when :html
143
+ # Return string for Canon::Comparison to parse
144
+ content
145
+ when :json
146
+ # Parse JSON to Ruby object
147
+ JSON.parse(content)
148
+ when :yaml
149
+ # Parse YAML to Ruby object
150
+ YAML.safe_load(content)
151
+ else
152
+ abort "Error: Unsupported format '#{format}'"
153
+ end
154
+ end
155
+
156
+ # Prepare formatted content for by-line diff
157
+ def prepare_formatted_content(content1, content2, format, mode)
158
+ return [content1, content2] unless mode == :by_line
159
+
160
+ case format
161
+ when :xml
162
+ require_relative "../pretty_printer/xml"
163
+ formatted1 = Canon::PrettyPrinter::Xml.new(indent: 2).format(content1)
164
+ formatted2 = Canon::PrettyPrinter::Xml.new(indent: 2).format(content2)
165
+ [formatted1, formatted2]
166
+ when :html
167
+ require_relative "../pretty_printer/html"
168
+ formatted1 = Canon::PrettyPrinter::Html.new(indent: 2).format(content1)
169
+ formatted2 = Canon::PrettyPrinter::Html.new(indent: 2).format(content2)
170
+ [formatted1, formatted2]
171
+ else
172
+ [content1, content2]
173
+ end
174
+ end
175
+
176
+ # Detect format from file extension
177
+ def detect_format(filename)
178
+ ext = File.extname(filename).downcase
179
+ case ext
180
+ when ".xml"
181
+ :xml
182
+ when ".html", ".htm"
183
+ :html
184
+ when ".json"
185
+ :json
186
+ when ".yaml", ".yml"
187
+ :yaml
188
+ else
189
+ abort "Error: Cannot detect format from extension '#{ext}'. " \
190
+ "Please specify --format (xml, html, json, or yaml)"
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../pretty_printer/xml"
4
+ require_relative "../pretty_printer/json"
5
+ require_relative "../pretty_printer/html"
6
+
7
+ module Canon
8
+ module Commands
9
+ # Command for canonicalizing files
10
+ class FormatCommand
11
+ def initialize(options = {})
12
+ @options = options
13
+ end
14
+
15
+ # rubocop:disable Metrics/MethodLength
16
+ def run(input_file)
17
+ # Read input file
18
+ content = File.read(input_file)
19
+
20
+ # Detect or use specified format
21
+ format = detect_format(input_file)
22
+
23
+ # Format based on mode
24
+ result = format_content(content, format)
25
+
26
+ # Output
27
+ if @options[:output]
28
+ File.write(@options[:output], result)
29
+ mode_name = @options[:mode] == "pretty" ? "Pretty-printed" : "Canonicalized"
30
+ puts "#{mode_name} #{format.upcase} written to #{@options[:output]}"
31
+ else
32
+ puts result
33
+ end
34
+ rescue Errno::ENOENT
35
+ abort "Error: File '#{input_file}' not found"
36
+ rescue Canon::Error => e
37
+ abort "Error: #{e.message}"
38
+ rescue StandardError => e
39
+ abort "Error processing file: #{e.message}"
40
+ end
41
+
42
+ private
43
+
44
+ def format_content(content, format)
45
+ mode = @options[:mode] || "c14n"
46
+
47
+ case mode
48
+ when "pretty"
49
+ format_pretty(content, format)
50
+ when "c14n"
51
+ format_canonical(content, format)
52
+ else
53
+ abort "Error: Invalid mode '#{mode}'. Use 'c14n' or 'pretty'"
54
+ end
55
+ end
56
+
57
+ # rubocop:disable Metrics/MethodLength
58
+ def format_pretty(content, format)
59
+ indent = (@options[:indent] || 2).to_i
60
+ indent_type = @options[:indent_type] || "space"
61
+
62
+ case format
63
+ when :xml
64
+ Canon::PrettyPrinter::Xml.new(
65
+ indent: indent,
66
+ indent_type: indent_type,
67
+ ).format(content)
68
+ when :json
69
+ Canon::PrettyPrinter::Json.new(
70
+ indent: indent,
71
+ indent_type: indent_type,
72
+ ).format(content)
73
+ when :html
74
+ Canon::PrettyPrinter::Html.new(
75
+ indent: indent,
76
+ indent_type: indent_type,
77
+ ).format(content)
78
+ when :yaml
79
+ # YAML formatter already pretty-prints
80
+ Canon.format(content, format)
81
+ end
82
+ end
83
+
84
+ def format_canonical(content, format)
85
+ if format == :xml && @options[:with_comments]
86
+ Canon::Xml::C14n.canonicalize(content, with_comments: true)
87
+ else
88
+ Canon.format(content, format)
89
+ end
90
+ end
91
+
92
+ # rubocop:disable Metrics/MethodLength
93
+ def detect_format(filename)
94
+ return @options[:format].to_sym if @options[:format]
95
+
96
+ ext = File.extname(filename).downcase
97
+ case ext
98
+ when ".xml"
99
+ :xml
100
+ when ".json"
101
+ :json
102
+ when ".yaml", ".yml"
103
+ :yaml
104
+ when ".html", ".htm"
105
+ :html
106
+ else
107
+ abort "Error: Cannot detect format from extension '#{ext}'. " \
108
+ "Please specify --format (xml, json, yaml, or html)"
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Base module for comparators providing common patterns
6
+ # Each comparator should include this module and implement:
7
+ # - serialize_for_display(content, match_opts)
8
+ module BaseComparator
9
+ # Build verbose result hash with preprocessed strings
10
+ #
11
+ # @param differences [Array] Array of difference hashes
12
+ # @param content1 [Object] First content to compare
13
+ # @param content2 [Object] Second content to compare
14
+ # @param match_opts [Hash] Match options used during comparison
15
+ # @return [Hash] Hash with :differences and :preprocessed keys
16
+ def build_verbose_result(differences, content1, content2, match_opts)
17
+ {
18
+ differences: differences,
19
+ preprocessed: [
20
+ serialize_for_display(content1, match_opts),
21
+ serialize_for_display(content2, match_opts),
22
+ ],
23
+ }
24
+ end
25
+
26
+ # Serialize content for display in diffs
27
+ # This method must be implemented by each comparator
28
+ #
29
+ # @param content [Object] Content to serialize
30
+ # @param match_opts [Hash] Match options that were applied during comparison
31
+ # @return [String] Serialized content reflecting match options
32
+ # @raise [NotImplementedError] if not implemented by including class
33
+ def serialize_for_display(content, match_opts)
34
+ raise NotImplementedError,
35
+ "#{self.class.name} must implement serialize_for_display"
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Encapsulates the result of a comparison operation
6
+ # Provides methods to query equivalence based on normative diffs
7
+ class ComparisonResult
8
+ attr_reader :differences, :preprocessed_strings, :format, :html_version,
9
+ :match_options
10
+
11
+ # @param differences [Array<DiffNode>] Array of difference nodes
12
+ # @param preprocessed_strings [Array<String, String>] Pre-processed content for display
13
+ # @param format [Symbol] Format type (:xml, :html, :json, :yaml)
14
+ # @param html_version [Symbol, nil] HTML version (:html4 or :html5) for HTML format only
15
+ # @param match_options [Hash, nil] Resolved match options used for comparison
16
+ def initialize(differences:, preprocessed_strings:, format:,
17
+ html_version: nil, match_options: nil)
18
+ @differences = differences
19
+ @preprocessed_strings = preprocessed_strings
20
+ @format = format
21
+ @html_version = html_version
22
+ @match_options = match_options
23
+ end
24
+
25
+ # Check if documents are semantically equivalent (no normative diffs)
26
+ #
27
+ # @return [Boolean] true if no normative differences present
28
+ def equivalent?
29
+ !has_normative_diffs?
30
+ end
31
+
32
+ # Check if there are any normative (semantic) differences
33
+ # Includes both DiffNode objects marked as normative AND legacy Hash differences
34
+ # (which represent structural differences like element name mismatches)
35
+ #
36
+ # @return [Boolean] true if at least one normative diff exists
37
+ def has_normative_diffs?
38
+ @differences.any? do |diff|
39
+ # DiffNode objects - check if marked normative
40
+ if diff.is_a?(Canon::Diff::DiffNode)
41
+ diff.normative?
42
+ # Legacy Hash format - always considered normative (structural differences)
43
+ elsif diff.is_a?(Hash)
44
+ true
45
+ else
46
+ false
47
+ end
48
+ end
49
+ end
50
+
51
+ # Check if there are any informative (textual-only) differences
52
+ #
53
+ # @return [Boolean] true if at least one informative diff exists
54
+ def has_informative_diffs?
55
+ @differences.any? do |diff|
56
+ diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
57
+ end
58
+ end
59
+
60
+ # Get all normative differences
61
+ #
62
+ # @return [Array<DiffNode>] Normative differences only
63
+ def normative_differences
64
+ @differences.select do |diff|
65
+ diff.is_a?(Canon::Diff::DiffNode) && diff.normative?
66
+ end
67
+ end
68
+
69
+ # Get all informative differences
70
+ #
71
+ # @return [Array<DiffNode>] Informative differences only
72
+ def informative_differences
73
+ @differences.select do |diff|
74
+ diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end