canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
require "yaml"
|
|
5
|
+
require_relative "comparison"
|
|
6
|
+
require_relative "diff/diff_block"
|
|
7
|
+
require_relative "diff/diff_context"
|
|
8
|
+
require_relative "diff/diff_report"
|
|
9
|
+
require_relative "diff_formatter/debug_output"
|
|
10
|
+
|
|
11
|
+
module Canon
|
|
12
|
+
# Formatter for displaying semantic differences with color support
|
|
13
|
+
#
|
|
14
|
+
# This is a pure orchestrator class that delegates formatting to mode-specific
|
|
15
|
+
# and format-specific formatters. It provides a unified interface for generating
|
|
16
|
+
# both by-line and by-object diffs across multiple formats (XML, HTML, JSON, YAML).
|
|
17
|
+
#
|
|
18
|
+
# == Architecture
|
|
19
|
+
#
|
|
20
|
+
# DiffFormatter follows the orchestrator pattern with MECE (Mutually Exclusive,
|
|
21
|
+
# Collectively Exhaustive) delegation:
|
|
22
|
+
#
|
|
23
|
+
# 1. **Mode Selection**: Chooses by-line or by-object visualization
|
|
24
|
+
# 2. **Format Delegation**: Dispatches to format-specific formatter
|
|
25
|
+
# 3. **Customization**: Applies color, context, and visualization options
|
|
26
|
+
#
|
|
27
|
+
# == Diff Modes
|
|
28
|
+
#
|
|
29
|
+
# **By-Object Mode** (default for XML/JSON/YAML):
|
|
30
|
+
# - Tree-based semantic diff
|
|
31
|
+
# - Shows only what changed in the structure
|
|
32
|
+
# - Visual tree with box-drawing characters
|
|
33
|
+
# - Best for configuration files and structured data
|
|
34
|
+
#
|
|
35
|
+
# **By-Line Mode** (default for HTML):
|
|
36
|
+
# - Traditional line-by-line diff
|
|
37
|
+
# - Shows changes in document order with context
|
|
38
|
+
# - Syntax-aware token highlighting
|
|
39
|
+
# - Best for markup and when line context matters
|
|
40
|
+
#
|
|
41
|
+
# == Visualization Features
|
|
42
|
+
#
|
|
43
|
+
# - **Color support**: Red (deletions), green (additions), yellow (structure), cyan (informative)
|
|
44
|
+
# - **Whitespace visualization**: Makes invisible characters visible
|
|
45
|
+
# - **Context lines**: Shows unchanged lines around changes
|
|
46
|
+
# - **Diff grouping**: Groups nearby changes into blocks
|
|
47
|
+
# - **Character map customization**: CJK-safe Unicode symbols
|
|
48
|
+
#
|
|
49
|
+
# == Usage
|
|
50
|
+
#
|
|
51
|
+
# # Basic usage
|
|
52
|
+
# formatter = Canon::DiffFormatter.new(use_color: true, mode: :by_object)
|
|
53
|
+
# output = formatter.format(differences, :xml, doc1: xml1, doc2: xml2)
|
|
54
|
+
#
|
|
55
|
+
# # With options
|
|
56
|
+
# formatter = Canon::DiffFormatter.new(
|
|
57
|
+
# use_color: true,
|
|
58
|
+
# mode: :by_line,
|
|
59
|
+
# context_lines: 5,
|
|
60
|
+
# diff_grouping_lines: 10,
|
|
61
|
+
# show_diffs: :normative
|
|
62
|
+
# )
|
|
63
|
+
#
|
|
64
|
+
class DiffFormatter
|
|
65
|
+
# Namespace for by-object mode formatters
|
|
66
|
+
module ByObject
|
|
67
|
+
autoload :BaseFormatter, "canon/diff_formatter/by_object/base_formatter"
|
|
68
|
+
autoload :XmlFormatter, "canon/diff_formatter/by_object/xml_formatter"
|
|
69
|
+
autoload :JsonFormatter, "canon/diff_formatter/by_object/json_formatter"
|
|
70
|
+
autoload :YamlFormatter, "canon/diff_formatter/by_object/yaml_formatter"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Namespace for by-line mode formatters
|
|
74
|
+
module ByLine
|
|
75
|
+
autoload :BaseFormatter, "canon/diff_formatter/by_line/base_formatter"
|
|
76
|
+
autoload :SimpleFormatter, "canon/diff_formatter/by_line/simple_formatter"
|
|
77
|
+
autoload :XmlFormatter, "canon/diff_formatter/by_line/xml_formatter"
|
|
78
|
+
autoload :JsonFormatter, "canon/diff_formatter/by_line/json_formatter"
|
|
79
|
+
autoload :YamlFormatter, "canon/diff_formatter/by_line/yaml_formatter"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Load character map from YAML file
|
|
83
|
+
#
|
|
84
|
+
# @return [Hash] Hash with :visualization_map, :category_map, :category_names
|
|
85
|
+
def self.load_character_map
|
|
86
|
+
yaml_path = File.join(__dir__, "diff_formatter", "character_map.yml")
|
|
87
|
+
data = YAML.load_file(yaml_path)
|
|
88
|
+
|
|
89
|
+
visualization_map = {}
|
|
90
|
+
category_map = {}
|
|
91
|
+
character_metadata = {}
|
|
92
|
+
|
|
93
|
+
data["characters"].each do |char_data|
|
|
94
|
+
# Get character from either unicode code point or character field
|
|
95
|
+
char = if char_data["unicode"]
|
|
96
|
+
# Convert hex string to character
|
|
97
|
+
[char_data["unicode"].to_i(16)].pack("U")
|
|
98
|
+
else
|
|
99
|
+
# Use character field directly (handles \n, \r, \t, etc.)
|
|
100
|
+
char_data["character"]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
vis = char_data["visualization"]
|
|
104
|
+
category = char_data["category"].to_sym
|
|
105
|
+
name = char_data["name"]
|
|
106
|
+
|
|
107
|
+
visualization_map[char] = vis
|
|
108
|
+
category_map[char] = category
|
|
109
|
+
character_metadata[char] = {
|
|
110
|
+
visualization: vis,
|
|
111
|
+
category: category,
|
|
112
|
+
name: name,
|
|
113
|
+
}
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
category_names = {}
|
|
117
|
+
data["category_names"].each do |key, value|
|
|
118
|
+
category_names[key.to_sym] = value
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
{
|
|
122
|
+
visualization_map: visualization_map,
|
|
123
|
+
category_map: category_map,
|
|
124
|
+
category_names: category_names,
|
|
125
|
+
character_metadata: character_metadata,
|
|
126
|
+
}
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Lazily load and cache character map data
|
|
130
|
+
def self.character_map_data
|
|
131
|
+
@character_map_data ||= load_character_map
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Default character visualization map (loaded from YAML)
|
|
135
|
+
DEFAULT_VISUALIZATION_MAP = character_map_data[:visualization_map].freeze
|
|
136
|
+
|
|
137
|
+
# Character category map (loaded from YAML)
|
|
138
|
+
CHARACTER_CATEGORY_MAP = character_map_data[:category_map].freeze
|
|
139
|
+
|
|
140
|
+
# Category display names (loaded from YAML)
|
|
141
|
+
CHARACTER_CATEGORY_NAMES = character_map_data[:category_names].freeze
|
|
142
|
+
|
|
143
|
+
# Character metadata including names (loaded from YAML)
|
|
144
|
+
CHARACTER_METADATA = character_map_data[:character_metadata].freeze
|
|
145
|
+
|
|
146
|
+
# Map difference codes to human-readable descriptions
|
|
147
|
+
DIFF_DESCRIPTIONS = {
|
|
148
|
+
Comparison::EQUIVALENT => "Equivalent",
|
|
149
|
+
Comparison::MISSING_ATTRIBUTE => "Missing attribute",
|
|
150
|
+
Comparison::MISSING_NODE => "Missing node",
|
|
151
|
+
Comparison::UNEQUAL_ATTRIBUTES => "Unequal attributes",
|
|
152
|
+
Comparison::UNEQUAL_COMMENTS => "Unequal comments",
|
|
153
|
+
Comparison::UNEQUAL_DOCUMENTS => "Unequal documents",
|
|
154
|
+
Comparison::UNEQUAL_ELEMENTS => "Unequal elements",
|
|
155
|
+
Comparison::UNEQUAL_NODES_TYPES => "Unequal node types",
|
|
156
|
+
Comparison::UNEQUAL_TEXT_CONTENTS => "Unequal text contents",
|
|
157
|
+
Comparison::MISSING_HASH_KEY => "Missing hash key",
|
|
158
|
+
Comparison::UNEQUAL_HASH_VALUES => "Unequal hash values",
|
|
159
|
+
Comparison::UNEQUAL_ARRAY_LENGTHS => "Unequal array lengths",
|
|
160
|
+
Comparison::UNEQUAL_ARRAY_ELEMENTS => "Unequal array elements",
|
|
161
|
+
Comparison::UNEQUAL_TYPES => "Unequal types",
|
|
162
|
+
Comparison::UNEQUAL_PRIMITIVES => "Unequal primitive values",
|
|
163
|
+
}.freeze
|
|
164
|
+
|
|
165
|
+
def initialize(use_color: true, mode: :by_object, context_lines: 3,
|
|
166
|
+
diff_grouping_lines: nil, visualization_map: nil,
|
|
167
|
+
character_map_file: nil, character_definitions: nil,
|
|
168
|
+
show_diffs: :all, verbose_diff: false)
|
|
169
|
+
@use_color = use_color
|
|
170
|
+
@mode = mode
|
|
171
|
+
@context_lines = context_lines
|
|
172
|
+
@diff_grouping_lines = diff_grouping_lines
|
|
173
|
+
@show_diffs = show_diffs
|
|
174
|
+
@verbose_diff = verbose_diff
|
|
175
|
+
@visualization_map = build_visualization_map(
|
|
176
|
+
visualization_map: visualization_map,
|
|
177
|
+
character_map_file: character_map_file,
|
|
178
|
+
character_definitions: character_definitions,
|
|
179
|
+
)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Merge custom character visualization map with defaults
|
|
183
|
+
#
|
|
184
|
+
# @param custom_map [Hash, nil] Custom character mappings
|
|
185
|
+
# @return [Hash] Merged character visualization map
|
|
186
|
+
def self.merge_visualization_map(custom_map)
|
|
187
|
+
DEFAULT_VISUALIZATION_MAP.merge(custom_map || {})
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Load character map from custom YAML file
|
|
191
|
+
#
|
|
192
|
+
# @param file_path [String] Path to YAML file with character definitions
|
|
193
|
+
# @return [Hash] Character visualization map
|
|
194
|
+
def self.load_custom_character_map(file_path)
|
|
195
|
+
data = YAML.load_file(file_path)
|
|
196
|
+
visualization_map = {}
|
|
197
|
+
|
|
198
|
+
data["characters"].each do |char_data|
|
|
199
|
+
# Get character from either unicode code point or character field
|
|
200
|
+
char = if char_data["unicode"]
|
|
201
|
+
[char_data["unicode"].to_i(16)].pack("U")
|
|
202
|
+
else
|
|
203
|
+
char_data["character"]
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
visualization_map[char] = char_data["visualization"]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
visualization_map
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Build character definition from hash
|
|
213
|
+
#
|
|
214
|
+
# @param definition [Hash] Character definition with keys (matching YAML format):
|
|
215
|
+
# - :character or :unicode (required)
|
|
216
|
+
# - :visualization (required)
|
|
217
|
+
# - :category (required)
|
|
218
|
+
# - :name (required)
|
|
219
|
+
# @return [Hash] Single-entry visualization map
|
|
220
|
+
def self.build_character_definition(definition)
|
|
221
|
+
# Validate required fields
|
|
222
|
+
char = if definition[:unicode]
|
|
223
|
+
[definition[:unicode].to_i(16)].pack("U")
|
|
224
|
+
elsif definition[:character]
|
|
225
|
+
definition[:character]
|
|
226
|
+
else
|
|
227
|
+
raise ArgumentError,
|
|
228
|
+
"Character definition must include :character or :unicode"
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
unless definition[:visualization]
|
|
232
|
+
raise ArgumentError, "Character definition must include :visualization"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
unless definition[:category]
|
|
236
|
+
raise ArgumentError, "Character definition must include :category"
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
unless definition[:name]
|
|
240
|
+
raise ArgumentError, "Character definition must include :name"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
{ char => definition[:visualization] }
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Format differences array for display
|
|
247
|
+
#
|
|
248
|
+
# @param differences [Array] Array of difference hashes
|
|
249
|
+
# @param format [Symbol] Format type (:xml, :html, :json, :yaml)
|
|
250
|
+
# @param doc1 [String, nil] First document content (for by-line mode)
|
|
251
|
+
# @param doc2 [String, nil] Second document content (for by-line mode)
|
|
252
|
+
# @param html_version [Symbol, nil] HTML version (:html4 or :html5)
|
|
253
|
+
# @return [String] Formatted output
|
|
254
|
+
def format(differences, format, doc1: nil, doc2: nil, html_version: nil)
|
|
255
|
+
# In by-line mode with doc1/doc2, always perform diff regardless of differences
|
|
256
|
+
if @mode == :by_line && doc1 && doc2
|
|
257
|
+
return by_line_diff(doc1, doc2, format: format,
|
|
258
|
+
html_version: html_version,
|
|
259
|
+
differences: differences)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Check if no differences (handle both ComparisonResult and legacy Array)
|
|
263
|
+
no_diffs = if differences.respond_to?(:equivalent?)
|
|
264
|
+
# ComparisonResult object (production path)
|
|
265
|
+
differences.equivalent?
|
|
266
|
+
else
|
|
267
|
+
# Legacy Array (for low-level tests)
|
|
268
|
+
differences.empty?
|
|
269
|
+
end
|
|
270
|
+
return success_message if no_diffs
|
|
271
|
+
|
|
272
|
+
case @mode
|
|
273
|
+
when :by_line
|
|
274
|
+
by_line_diff(doc1, doc2, format: format, html_version: html_version,
|
|
275
|
+
differences: differences)
|
|
276
|
+
else
|
|
277
|
+
by_object_diff(differences, format)
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Format comparison result from Canon::Comparison.equivalent?
|
|
282
|
+
# This is the single entry point for generating diffs from comparison results
|
|
283
|
+
#
|
|
284
|
+
# @param comparison_result [ComparisonResult, Hash, Array, Boolean] Result from Canon::Comparison.equivalent?
|
|
285
|
+
# @param expected [Object] Expected value
|
|
286
|
+
# @param actual [Object] Actual value
|
|
287
|
+
# @return [String] Formatted diff output
|
|
288
|
+
def format_comparison_result(comparison_result, expected, actual)
|
|
289
|
+
# Detect format from expected content
|
|
290
|
+
format = Canon::Comparison.send(:detect_format, expected)
|
|
291
|
+
|
|
292
|
+
formatter_options = {
|
|
293
|
+
use_color: @use_color,
|
|
294
|
+
mode: @mode,
|
|
295
|
+
context_lines: @context_lines,
|
|
296
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
297
|
+
show_diffs: @show_diffs,
|
|
298
|
+
verbose_diff: @verbose_diff,
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
output = []
|
|
302
|
+
|
|
303
|
+
# 1. CANON VERBOSE tables (ONLY if CANON_VERBOSE=1)
|
|
304
|
+
verbose_tables = DebugOutput.verbose_tables_only(
|
|
305
|
+
comparison_result,
|
|
306
|
+
formatter_options,
|
|
307
|
+
)
|
|
308
|
+
output << verbose_tables unless verbose_tables.empty?
|
|
309
|
+
|
|
310
|
+
# 2. Semantic Diff Report (ALWAYS if diffs exist)
|
|
311
|
+
if comparison_result.is_a?(Canon::Comparison::ComparisonResult) &&
|
|
312
|
+
comparison_result.differences.any?
|
|
313
|
+
require_relative "diff_formatter/diff_detail_formatter"
|
|
314
|
+
output << DiffDetailFormatter.format_report(
|
|
315
|
+
comparison_result.differences,
|
|
316
|
+
use_color: @use_color,
|
|
317
|
+
)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# 3. Main diff output (by-line or by-object) - ALWAYS
|
|
321
|
+
|
|
322
|
+
# Check if comparison result is a ComparisonResult object
|
|
323
|
+
if comparison_result.is_a?(Canon::Comparison::ComparisonResult)
|
|
324
|
+
# Use preprocessed strings from comparison - avoids re-preprocessing
|
|
325
|
+
doc1, doc2 = comparison_result.preprocessed_strings
|
|
326
|
+
differences = comparison_result.differences
|
|
327
|
+
html_version = comparison_result.html_version
|
|
328
|
+
elsif comparison_result.is_a?(Hash) && comparison_result[:preprocessed]
|
|
329
|
+
# Legacy Hash format - Use preprocessed strings from comparison
|
|
330
|
+
doc1, doc2 = comparison_result[:preprocessed]
|
|
331
|
+
differences = comparison_result[:differences]
|
|
332
|
+
html_version = comparison_result[:html_version]
|
|
333
|
+
else
|
|
334
|
+
# Legacy path: normalize content for display
|
|
335
|
+
doc1, doc2 = normalize_content_for_display(expected, actual, format)
|
|
336
|
+
# comparison_result is an array of differences when verbose: true
|
|
337
|
+
differences = comparison_result.is_a?(Array) ? comparison_result : []
|
|
338
|
+
html_version = nil
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Generate diff using existing format method
|
|
342
|
+
output << format(differences, format, doc1: doc1, doc2: doc2,
|
|
343
|
+
html_version: html_version)
|
|
344
|
+
|
|
345
|
+
output.compact.join("\n")
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
private
|
|
349
|
+
|
|
350
|
+
# Normalize content for display in diffs
|
|
351
|
+
#
|
|
352
|
+
# @param expected [Object] Expected value
|
|
353
|
+
# @param actual [Object] Actual value
|
|
354
|
+
# @param format [Symbol] Detected format
|
|
355
|
+
# @return [Array<String, String>] Normalized [expected, actual] strings
|
|
356
|
+
def normalize_content_for_display(expected, actual, format)
|
|
357
|
+
case format
|
|
358
|
+
when :xml
|
|
359
|
+
[
|
|
360
|
+
Canon::Xml::C14n.canonicalize(expected, with_comments: false).gsub(
|
|
361
|
+
/></, ">\n<"
|
|
362
|
+
),
|
|
363
|
+
Canon::Xml::C14n.canonicalize(actual, with_comments: false).gsub(
|
|
364
|
+
/></, ">\n<"
|
|
365
|
+
),
|
|
366
|
+
]
|
|
367
|
+
when :html
|
|
368
|
+
require "nokogiri"
|
|
369
|
+
[
|
|
370
|
+
parse_and_format_html(expected),
|
|
371
|
+
parse_and_format_html(actual),
|
|
372
|
+
]
|
|
373
|
+
when :json
|
|
374
|
+
[
|
|
375
|
+
Canon.format(expected, :json),
|
|
376
|
+
Canon.format(actual, :json),
|
|
377
|
+
]
|
|
378
|
+
when :yaml
|
|
379
|
+
[
|
|
380
|
+
Canon.format(expected, :yaml),
|
|
381
|
+
Canon.format(actual, :yaml),
|
|
382
|
+
]
|
|
383
|
+
when :ruby_object
|
|
384
|
+
# For Ruby objects, format as JSON for display
|
|
385
|
+
require "json"
|
|
386
|
+
[
|
|
387
|
+
JSON.pretty_generate(expected),
|
|
388
|
+
JSON.pretty_generate(actual),
|
|
389
|
+
]
|
|
390
|
+
else
|
|
391
|
+
# Default case including :string format
|
|
392
|
+
[expected.to_s, actual.to_s]
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Parse and format HTML for display
|
|
397
|
+
#
|
|
398
|
+
# @param html [Object] HTML content
|
|
399
|
+
# @return [String] Formatted HTML
|
|
400
|
+
def parse_and_format_html(html)
|
|
401
|
+
return html.to_html if html.is_a?(Nokogiri::HTML::Document) ||
|
|
402
|
+
html.is_a?(Nokogiri::HTML5::Document)
|
|
403
|
+
|
|
404
|
+
require "nokogiri"
|
|
405
|
+
Nokogiri::HTML(html).to_html
|
|
406
|
+
rescue StandardError
|
|
407
|
+
html.to_s
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Build the final visualization map from various customization options
|
|
411
|
+
#
|
|
412
|
+
# @param visualization_map [Hash, nil] Complete custom visualization map
|
|
413
|
+
# @param character_map_file [String, nil] Path to custom YAML file
|
|
414
|
+
# @param character_definitions [Array<Hash>, nil] Individual character definitions
|
|
415
|
+
# @return [Hash] Final visualization map
|
|
416
|
+
def build_visualization_map(visualization_map: nil, character_map_file: nil,
|
|
417
|
+
character_definitions: nil)
|
|
418
|
+
# Priority order:
|
|
419
|
+
# 1. If visualization_map is provided, use it as complete replacement
|
|
420
|
+
# 2. Otherwise, start with defaults and apply customizations
|
|
421
|
+
|
|
422
|
+
return visualization_map if visualization_map
|
|
423
|
+
|
|
424
|
+
# Start with defaults
|
|
425
|
+
result = DEFAULT_VISUALIZATION_MAP.dup
|
|
426
|
+
|
|
427
|
+
# Apply custom file if provided
|
|
428
|
+
if character_map_file
|
|
429
|
+
custom_map = self.class.load_custom_character_map(character_map_file)
|
|
430
|
+
result.merge!(custom_map)
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# Apply individual character definitions if provided
|
|
434
|
+
character_definitions&.each do |definition|
|
|
435
|
+
char_map = self.class.build_character_definition(definition)
|
|
436
|
+
result.merge!(char_map)
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
result
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# Generate success message based on mode
|
|
443
|
+
def success_message
|
|
444
|
+
emoji = @use_color ? "✅ " : ""
|
|
445
|
+
message = case @mode
|
|
446
|
+
when :by_line
|
|
447
|
+
"Files are identical"
|
|
448
|
+
else
|
|
449
|
+
"Files are semantically equivalent"
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
colorize("#{emoji}#{message}\n", :green, :bold)
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Generate by-object diff with tree visualization
|
|
456
|
+
# Delegates to format-specific by-object formatters
|
|
457
|
+
def by_object_diff(differences, format)
|
|
458
|
+
require_relative "diff_formatter/by_object/base_formatter"
|
|
459
|
+
|
|
460
|
+
output = []
|
|
461
|
+
output << colorize("Visual Diff:", :cyan, :bold)
|
|
462
|
+
|
|
463
|
+
# Delegate to format-specific formatter
|
|
464
|
+
formatter = ByObject::BaseFormatter.for_format(
|
|
465
|
+
format,
|
|
466
|
+
use_color: @use_color,
|
|
467
|
+
visualization_map: @visualization_map,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
output << formatter.format(differences, format)
|
|
471
|
+
|
|
472
|
+
output.join("\n")
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Generate by-line diff
|
|
476
|
+
# Delegates to format-specific by-line formatters
|
|
477
|
+
def by_line_diff(doc1, doc2, format: :xml, html_version: nil,
|
|
478
|
+
differences: [])
|
|
479
|
+
require_relative "diff_formatter/by_line/base_formatter"
|
|
480
|
+
|
|
481
|
+
# For HTML format, use html_version if provided, otherwise default to :html4
|
|
482
|
+
if format == :html && html_version
|
|
483
|
+
format = html_version # Use :html4 or :html5
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Format display name for header
|
|
487
|
+
format_name = format.to_s.upcase
|
|
488
|
+
|
|
489
|
+
output = []
|
|
490
|
+
output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
|
|
491
|
+
:bold)
|
|
492
|
+
|
|
493
|
+
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
494
|
+
|
|
495
|
+
# Delegate to format-specific formatter
|
|
496
|
+
formatter = ByLine::BaseFormatter.for_format(
|
|
497
|
+
format,
|
|
498
|
+
use_color: @use_color,
|
|
499
|
+
context_lines: @context_lines,
|
|
500
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
501
|
+
visualization_map: @visualization_map,
|
|
502
|
+
show_diffs: @show_diffs,
|
|
503
|
+
differences: differences,
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
output << formatter.format(doc1, doc2)
|
|
507
|
+
|
|
508
|
+
output.join("\n")
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
# Colorize text if color is enabled
|
|
512
|
+
# RSpec-aware: resets any existing ANSI codes before applying new colors
|
|
513
|
+
def colorize(text, *colors)
|
|
514
|
+
return text unless @use_color
|
|
515
|
+
|
|
516
|
+
# Reset ANSI codes first to prevent RSpec's initial red from interfering
|
|
517
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
end
|
data/lib/canon/errors.rb
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Base error class for Canon gem
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Error raised when attempting to compare different formats
|
|
8
|
+
class CompareFormatMismatchError < Error
|
|
9
|
+
# Initialize a new CompareFormatMismatchError
|
|
10
|
+
#
|
|
11
|
+
# @param format1 [Symbol] The first format
|
|
12
|
+
# @param format2 [Symbol] The second format
|
|
13
|
+
def initialize(format1, format2)
|
|
14
|
+
super("Cannot compare different formats: #{format1} vs #{format2}")
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Error raised when input validation fails
|
|
19
|
+
#
|
|
20
|
+
# This error is raised when input (XML, HTML, JSON, YAML) is malformed
|
|
21
|
+
# or fails validation checks. It includes detailed information about
|
|
22
|
+
# the error location and nature.
|
|
23
|
+
class ValidationError < Error
|
|
24
|
+
attr_reader :format, :line, :column, :details
|
|
25
|
+
|
|
26
|
+
# Initialize a new ValidationError
|
|
27
|
+
#
|
|
28
|
+
# @param message [String] The error message
|
|
29
|
+
# @param format [Symbol] The format being validated (:xml, :html, :json,
|
|
30
|
+
# :yaml)
|
|
31
|
+
# @param line [Integer, nil] The line number where the error occurred
|
|
32
|
+
# @param column [Integer, nil] The column number where the error occurred
|
|
33
|
+
# @param details [String, nil] Additional details about the error
|
|
34
|
+
def initialize(message, format:, line: nil, column: nil, details: nil)
|
|
35
|
+
@format = format
|
|
36
|
+
@line = line
|
|
37
|
+
@column = column
|
|
38
|
+
@details = details
|
|
39
|
+
super(build_message(message))
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
# Build a detailed error message with location information
|
|
45
|
+
#
|
|
46
|
+
# @param msg [String] The base error message
|
|
47
|
+
# @return [String] The formatted error message
|
|
48
|
+
def build_message(msg)
|
|
49
|
+
parts = ["#{format.to_s.upcase} Validation Error: #{msg}"]
|
|
50
|
+
parts << " Line: #{line}" if line
|
|
51
|
+
parts << " Column: #{column}" if column
|
|
52
|
+
parts << " Details: #{details}" if details
|
|
53
|
+
parts.join("\n")
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "html_formatter_base"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Formatters
|
|
7
|
+
# HTML4 formatter using Nokogiri::HTML parser
|
|
8
|
+
class Html4Formatter < HtmlFormatterBase
|
|
9
|
+
# Parse HTML4 document
|
|
10
|
+
# @param html [String] HTML document to parse
|
|
11
|
+
# @return [Nokogiri::HTML::Document] Parsed HTML4 document
|
|
12
|
+
def self.parse(html)
|
|
13
|
+
Nokogiri::HTML(html)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "html_formatter_base"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Formatters
|
|
7
|
+
# HTML5 formatter using Nokogiri::HTML5 parser
|
|
8
|
+
class Html5Formatter < HtmlFormatterBase
|
|
9
|
+
# Parse HTML5 document
|
|
10
|
+
# @param html [String] HTML document to parse
|
|
11
|
+
# @return [Nokogiri::HTML5::Document] Parsed HTML5 document
|
|
12
|
+
def self.parse(html)
|
|
13
|
+
Nokogiri::HTML5(html)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require_relative "html_formatter_base"
|
|
5
|
+
require_relative "../pretty_printer/html"
|
|
6
|
+
require_relative "../validators/html_validator"
|
|
7
|
+
|
|
8
|
+
module Canon
|
|
9
|
+
module Formatters
|
|
10
|
+
# HTML formatter for HTML 4/5 and XHTML
|
|
11
|
+
class HtmlFormatter < HtmlFormatterBase
|
|
12
|
+
# Parse HTML into a Nokogiri document
|
|
13
|
+
# @param html [String] HTML document to parse
|
|
14
|
+
# @return [Nokogiri::HTML::Document, Nokogiri::XML::Document]
|
|
15
|
+
# Parsed HTML or XML document
|
|
16
|
+
def self.parse(html)
|
|
17
|
+
# Validate before parsing
|
|
18
|
+
Canon::Validators::HtmlValidator.validate!(html)
|
|
19
|
+
|
|
20
|
+
if xhtml?(html)
|
|
21
|
+
Nokogiri::XML(html)
|
|
22
|
+
else
|
|
23
|
+
Nokogiri::HTML5(html)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Check if HTML is XHTML
|
|
28
|
+
def self.xhtml?(html)
|
|
29
|
+
html.include?("XHTML") ||
|
|
30
|
+
html.include?('xmlns="http://www.w3.org/1999/xhtml"') ||
|
|
31
|
+
html.match?(/xmlns:\w+/)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private_class_method :xhtml?
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|