canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Abstract base class for format-specific data models
|
|
5
|
+
# Provides common interface for parsing and serializing documents
|
|
6
|
+
class DataModel
|
|
7
|
+
class << self
|
|
8
|
+
# Parse input into data model
|
|
9
|
+
# Must be implemented by subclasses
|
|
10
|
+
#
|
|
11
|
+
# @param input [String] Input content to parse
|
|
12
|
+
# @return [Object] Parsed data model representation
|
|
13
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
14
|
+
def parse(input)
|
|
15
|
+
raise NotImplementedError, "#{self} must implement #parse"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Serialize data model node
|
|
19
|
+
# Must be implemented by subclasses
|
|
20
|
+
#
|
|
21
|
+
# @param node [Object] Node to serialize
|
|
22
|
+
# @return [String] Serialized representation
|
|
23
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
24
|
+
def serialize(node)
|
|
25
|
+
raise NotImplementedError, "#{self} must implement #serialize"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -1,22 +1,54 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "formatting_detector"
|
|
4
|
+
require_relative "../comparison/compare_profile"
|
|
5
|
+
|
|
3
6
|
module Canon
|
|
4
7
|
module Diff
|
|
5
8
|
# Classifies DiffNodes as normative (affects equivalence) or informative (doesn't affect equivalence)
|
|
6
9
|
# based on the match options in effect
|
|
7
10
|
class DiffClassifier
|
|
8
|
-
attr_reader :match_options
|
|
11
|
+
attr_reader :match_options, :profile
|
|
9
12
|
|
|
10
13
|
# @param match_options [Canon::Comparison::ResolvedMatchOptions] The match options
|
|
11
14
|
def initialize(match_options)
|
|
12
15
|
@match_options = match_options
|
|
16
|
+
# Use the compare_profile from ResolvedMatchOptions if available (e.g., HtmlCompareProfile)
|
|
17
|
+
# Otherwise create a base CompareProfile
|
|
18
|
+
@profile = if match_options.respond_to?(:compare_profile) && match_options.compare_profile
|
|
19
|
+
match_options.compare_profile
|
|
20
|
+
else
|
|
21
|
+
Canon::Comparison::CompareProfile.new(match_options)
|
|
22
|
+
end
|
|
13
23
|
end
|
|
14
24
|
|
|
15
25
|
# Classify a single DiffNode as normative or informative
|
|
26
|
+
# Hierarchy: formatting-only < informative < normative
|
|
27
|
+
# CompareProfile determines base classification, FormattingDetector refines informative differences
|
|
16
28
|
# @param diff_node [DiffNode] The diff node to classify
|
|
17
|
-
# @return [DiffNode] The same diff node with normative
|
|
29
|
+
# @return [DiffNode] The same diff node with normative/formatting attributes set
|
|
18
30
|
def classify(diff_node)
|
|
19
|
-
|
|
31
|
+
# FIRST: Determine if this dimension is normative based on CompareProfile
|
|
32
|
+
# This respects the policy settings (strict/normalize/ignore)
|
|
33
|
+
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
34
|
+
|
|
35
|
+
# SECOND: Check if FormattingDetector should be consulted
|
|
36
|
+
# Only check for formatting-only when dimension is NOT normative
|
|
37
|
+
# This ensures strict mode differences remain normative
|
|
38
|
+
should_check_formatting = !is_normative &&
|
|
39
|
+
profile.supports_formatting_detection?(diff_node.dimension)
|
|
40
|
+
|
|
41
|
+
# If we should check formatting, see if it's formatting-only
|
|
42
|
+
if should_check_formatting && formatting_only_diff?(diff_node)
|
|
43
|
+
diff_node.formatting = true
|
|
44
|
+
diff_node.normative = false
|
|
45
|
+
return diff_node
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Otherwise, use the normative determination from CompareProfile
|
|
49
|
+
diff_node.formatting = false
|
|
50
|
+
diff_node.normative = is_normative
|
|
51
|
+
|
|
20
52
|
diff_node
|
|
21
53
|
end
|
|
22
54
|
|
|
@@ -29,17 +61,45 @@ module Canon
|
|
|
29
61
|
|
|
30
62
|
private
|
|
31
63
|
|
|
32
|
-
#
|
|
33
|
-
# @param
|
|
34
|
-
# @return [Boolean] true if
|
|
35
|
-
def
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
64
|
+
# Check if a DiffNode represents a formatting-only difference
|
|
65
|
+
# @param diff_node [DiffNode] The diff node to check
|
|
66
|
+
# @return [Boolean] true if formatting-only
|
|
67
|
+
def formatting_only_diff?(diff_node)
|
|
68
|
+
text1 = extract_text_content(diff_node.node1)
|
|
69
|
+
text2 = extract_text_content(diff_node.node2)
|
|
70
|
+
|
|
71
|
+
FormattingDetector.formatting_only?(text1, text2)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Extract text content from a node for formatting comparison
|
|
75
|
+
# @param node [Object] The node to extract text from
|
|
76
|
+
# @return [String, nil] The text content or nil
|
|
77
|
+
def extract_text_content(node)
|
|
78
|
+
return nil if node.nil?
|
|
79
|
+
|
|
80
|
+
# For TextNode with value attribute (Canon::Xml::Nodes::TextNode)
|
|
81
|
+
return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
82
|
+
|
|
83
|
+
# For XML/HTML nodes with text_content method
|
|
84
|
+
return node.text_content if node.respond_to?(:text_content)
|
|
85
|
+
|
|
86
|
+
# For nodes with text method
|
|
87
|
+
return node.text if node.respond_to?(:text)
|
|
88
|
+
|
|
89
|
+
# For nodes with content method
|
|
90
|
+
return node.content if node.respond_to?(:content)
|
|
91
|
+
|
|
92
|
+
# For nodes with value method (other types)
|
|
93
|
+
return node.value if node.respond_to?(:value)
|
|
94
|
+
|
|
95
|
+
# For simple text nodes or strings
|
|
96
|
+
return node.to_s if node.is_a?(String)
|
|
97
|
+
|
|
98
|
+
# For other node types, try to_s
|
|
99
|
+
node.to_s
|
|
100
|
+
rescue StandardError
|
|
101
|
+
# If extraction fails, return nil (not formatting-only)
|
|
102
|
+
nil
|
|
43
103
|
end
|
|
44
104
|
end
|
|
45
105
|
end
|
|
@@ -41,6 +41,9 @@ grouping_lines: nil)
|
|
|
41
41
|
create_context_for_blocks(block_group)
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
+
# Merge overlapping contexts to avoid duplicate line display
|
|
45
|
+
contexts = merge_overlapping_contexts(contexts)
|
|
46
|
+
|
|
44
47
|
# Filter out all-informative contexts if show_diffs was :normative
|
|
45
48
|
# Note: The filtering based on show_diffs happens at the block level
|
|
46
49
|
# in DiffBlockBuilder, so we don't need to re-filter here.
|
|
@@ -51,6 +54,44 @@ grouping_lines: nil)
|
|
|
51
54
|
|
|
52
55
|
private
|
|
53
56
|
|
|
57
|
+
# Merge overlapping contexts into single contexts
|
|
58
|
+
# When contexts have overlapping line ranges, combine them
|
|
59
|
+
def merge_overlapping_contexts(contexts)
|
|
60
|
+
return contexts if contexts.empty?
|
|
61
|
+
|
|
62
|
+
# Sort by start_idx
|
|
63
|
+
sorted = contexts.sort_by(&:start_idx)
|
|
64
|
+
merged = [sorted.first]
|
|
65
|
+
|
|
66
|
+
sorted[1..].each do |context|
|
|
67
|
+
last = merged.last
|
|
68
|
+
|
|
69
|
+
# Check if contexts overlap (including touching contexts)
|
|
70
|
+
if context.start_idx <= last.end_idx + 1
|
|
71
|
+
# Merge: extend the range and combine blocks
|
|
72
|
+
new_end = [last.end_idx, context.end_idx].max
|
|
73
|
+
combined_blocks = (last.blocks + context.blocks).uniq
|
|
74
|
+
|
|
75
|
+
# Extract combined lines
|
|
76
|
+
combined_lines = @all_lines[last.start_idx..new_end]
|
|
77
|
+
|
|
78
|
+
# Replace last context with merged one
|
|
79
|
+
merged[-1] = DiffContext.new(
|
|
80
|
+
start_line: last.start_idx,
|
|
81
|
+
end_line: new_end,
|
|
82
|
+
blocks: combined_blocks,
|
|
83
|
+
lines: combined_lines,
|
|
84
|
+
normative: last.normative? || context.normative?,
|
|
85
|
+
)
|
|
86
|
+
else
|
|
87
|
+
# No overlap, add as separate context
|
|
88
|
+
merged << context
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
merged
|
|
93
|
+
end
|
|
94
|
+
|
|
54
95
|
# Group blocks that are close together
|
|
55
96
|
def group_nearby_blocks(blocks, max_gap)
|
|
56
97
|
return [] if blocks.empty?
|
data/lib/canon/diff/diff_line.rb
CHANGED
|
@@ -11,17 +11,22 @@ module Canon
|
|
|
11
11
|
# @param content [String] The text content of the line
|
|
12
12
|
# @param type [Symbol] The type of line (:unchanged, :added, :removed, :changed)
|
|
13
13
|
# @param diff_node [DiffNode, nil] The semantic diff node this line belongs to
|
|
14
|
-
|
|
14
|
+
# @param formatting [Boolean] Whether this is a formatting-only difference
|
|
15
|
+
def initialize(line_number:, content:, type:, diff_node: nil,
|
|
16
|
+
formatting: false)
|
|
15
17
|
@line_number = line_number
|
|
16
18
|
@content = content
|
|
17
19
|
@type = type
|
|
18
20
|
@diff_node = diff_node
|
|
21
|
+
@formatting = formatting
|
|
19
22
|
end
|
|
20
23
|
|
|
21
24
|
# @return [Boolean] true if this line represents a normative difference
|
|
22
25
|
# If diff_node is nil (not linked to any semantic difference), the line
|
|
23
26
|
# is considered informative (cosmetic/unchanged)
|
|
27
|
+
# Formatting-only diffs are never normative
|
|
24
28
|
def normative?
|
|
29
|
+
return false if formatting?
|
|
25
30
|
return false if diff_node.nil?
|
|
26
31
|
|
|
27
32
|
diff_node.normative?
|
|
@@ -29,12 +34,20 @@ module Canon
|
|
|
29
34
|
|
|
30
35
|
# @return [Boolean] true if this line represents an informative-only difference
|
|
31
36
|
# If diff_node is nil (not linked), it's not informative either (it's unchanged/cosmetic)
|
|
37
|
+
# Formatting-only diffs are never informative
|
|
32
38
|
def informative?
|
|
39
|
+
return false if formatting?
|
|
33
40
|
return false if diff_node.nil?
|
|
34
41
|
|
|
35
42
|
diff_node.informative?
|
|
36
43
|
end
|
|
37
44
|
|
|
45
|
+
# @return [Boolean] true if this line represents a formatting-only difference
|
|
46
|
+
# Formatting diffs are purely cosmetic (whitespace, line breaks) with no semantic meaning
|
|
47
|
+
def formatting?
|
|
48
|
+
@formatting == true
|
|
49
|
+
end
|
|
50
|
+
|
|
38
51
|
# @return [Boolean] true if this line is unchanged
|
|
39
52
|
def unchanged?
|
|
40
53
|
type == :unchanged
|
|
@@ -62,6 +75,8 @@ module Canon
|
|
|
62
75
|
type: type,
|
|
63
76
|
diff_node: diff_node&.to_h,
|
|
64
77
|
normative: normative?,
|
|
78
|
+
informative: informative?,
|
|
79
|
+
formatting: formatting?,
|
|
65
80
|
}
|
|
66
81
|
end
|
|
67
82
|
|
|
@@ -70,7 +85,8 @@ module Canon
|
|
|
70
85
|
line_number == other.line_number &&
|
|
71
86
|
content == other.content &&
|
|
72
87
|
type == other.type &&
|
|
73
|
-
diff_node == other.diff_node
|
|
88
|
+
diff_node == other.diff_node &&
|
|
89
|
+
@formatting == other.instance_variable_get(:@formatting)
|
|
74
90
|
end
|
|
75
91
|
end
|
|
76
92
|
end
|
data/lib/canon/diff/diff_node.rb
CHANGED
|
@@ -6,8 +6,8 @@ module Canon
|
|
|
6
6
|
# This is created during the Comparison Layer and carries information about
|
|
7
7
|
# which dimension caused the difference and whether it's normative or informative
|
|
8
8
|
class DiffNode
|
|
9
|
-
attr_reader :node1, :node2
|
|
10
|
-
attr_accessor :normative
|
|
9
|
+
attr_reader :node1, :node2
|
|
10
|
+
attr_accessor :dimension, :reason, :normative, :formatting
|
|
11
11
|
|
|
12
12
|
# @param node1 [Object] The first node being compared
|
|
13
13
|
# @param node2 [Object] The second node being compared
|
|
@@ -21,18 +21,31 @@ module Canon
|
|
|
21
21
|
@dimension = dimension
|
|
22
22
|
@reason = reason
|
|
23
23
|
@normative = nil # Will be set by DiffClassifier
|
|
24
|
+
@formatting = nil # Will be set by DiffClassifier
|
|
24
25
|
end
|
|
25
26
|
|
|
26
27
|
# @return [Boolean] true if this diff is normative (affects equivalence)
|
|
28
|
+
# Formatting-only diffs are never normative
|
|
27
29
|
def normative?
|
|
30
|
+
return false if formatting?
|
|
31
|
+
|
|
28
32
|
@normative == true
|
|
29
33
|
end
|
|
30
34
|
|
|
31
35
|
# @return [Boolean] true if this diff is informative only (doesn't affect equivalence)
|
|
36
|
+
# Formatting-only diffs are never informative
|
|
32
37
|
def informative?
|
|
38
|
+
return false if formatting?
|
|
39
|
+
|
|
33
40
|
@normative == false
|
|
34
41
|
end
|
|
35
42
|
|
|
43
|
+
# @return [Boolean] true if this diff is formatting-only (purely cosmetic)
|
|
44
|
+
# Formatting diffs are whitespace/line break differences with no semantic meaning
|
|
45
|
+
def formatting?
|
|
46
|
+
@formatting == true
|
|
47
|
+
end
|
|
48
|
+
|
|
36
49
|
def to_h
|
|
37
50
|
{
|
|
38
51
|
node1: node1,
|
|
@@ -40,6 +53,7 @@ module Canon
|
|
|
40
53
|
dimension: dimension,
|
|
41
54
|
reason: reason,
|
|
42
55
|
normative: normative,
|
|
56
|
+
formatting: formatting,
|
|
43
57
|
}
|
|
44
58
|
end
|
|
45
59
|
|
|
@@ -49,7 +63,8 @@ module Canon
|
|
|
49
63
|
node2 == other.node2 &&
|
|
50
64
|
dimension == other.dimension &&
|
|
51
65
|
reason == other.reason &&
|
|
52
|
-
normative == other.normative
|
|
66
|
+
normative == other.normative &&
|
|
67
|
+
formatting == other.formatting
|
|
53
68
|
end
|
|
54
69
|
end
|
|
55
70
|
end
|
|
@@ -61,31 +61,75 @@ module Canon
|
|
|
61
61
|
diff_node: nil,
|
|
62
62
|
)
|
|
63
63
|
when "-"
|
|
64
|
+
# Find the diff node for this line
|
|
65
|
+
node = shared_informative_node || find_diff_node_for_line(
|
|
66
|
+
line_num, lines1, :removed
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Check if this is formatting-only:
|
|
70
|
+
# 1. First check if the DiffNode itself is marked as formatting-only
|
|
71
|
+
# 2. Otherwise, check line-level formatting
|
|
72
|
+
formatting = if node.respond_to?(:formatting?) && node.formatting?
|
|
73
|
+
true
|
|
74
|
+
else
|
|
75
|
+
formatting_only_line?(
|
|
76
|
+
change.old_element, ""
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
64
80
|
DiffLine.new(
|
|
65
81
|
line_number: line_num,
|
|
66
82
|
content: change.old_element,
|
|
67
83
|
type: :removed,
|
|
68
|
-
diff_node:
|
|
69
|
-
|
|
70
|
-
),
|
|
84
|
+
diff_node: node,
|
|
85
|
+
formatting: formatting,
|
|
71
86
|
)
|
|
72
87
|
when "+"
|
|
88
|
+
# Find the diff node for this line
|
|
89
|
+
node = shared_informative_node || find_diff_node_for_line(
|
|
90
|
+
line_num, lines2, :added
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Check if this is formatting-only:
|
|
94
|
+
# 1. First check if the DiffNode itself is marked as formatting-only
|
|
95
|
+
# 2. Otherwise, check line-level formatting
|
|
96
|
+
formatting = if node.respond_to?(:formatting?) && node.formatting?
|
|
97
|
+
true
|
|
98
|
+
else
|
|
99
|
+
formatting_only_line?("",
|
|
100
|
+
change.new_element)
|
|
101
|
+
end
|
|
102
|
+
|
|
73
103
|
DiffLine.new(
|
|
74
104
|
line_number: line_num,
|
|
75
105
|
content: change.new_element,
|
|
76
106
|
type: :added,
|
|
77
|
-
diff_node:
|
|
78
|
-
|
|
79
|
-
),
|
|
107
|
+
diff_node: node,
|
|
108
|
+
formatting: formatting,
|
|
80
109
|
)
|
|
81
110
|
when "!"
|
|
111
|
+
# Find the diff node for this line
|
|
112
|
+
node = shared_informative_node || find_diff_node_for_line(
|
|
113
|
+
line_num, lines2, :changed
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Check if this is formatting-only:
|
|
117
|
+
# 1. First check if the DiffNode itself is marked as formatting-only
|
|
118
|
+
# 2. Otherwise, check line-level formatting
|
|
119
|
+
formatting = if node.respond_to?(:formatting?) && node.formatting?
|
|
120
|
+
true
|
|
121
|
+
else
|
|
122
|
+
formatting_only_line?(
|
|
123
|
+
change.old_element, change.new_element
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
82
127
|
DiffLine.new(
|
|
83
128
|
line_number: line_num,
|
|
84
129
|
content: change.new_element,
|
|
85
130
|
type: :changed,
|
|
86
|
-
diff_node:
|
|
87
|
-
|
|
88
|
-
),
|
|
131
|
+
diff_node: node,
|
|
132
|
+
formatting: formatting,
|
|
89
133
|
)
|
|
90
134
|
end
|
|
91
135
|
|
|
@@ -98,6 +142,15 @@ module Canon
|
|
|
98
142
|
|
|
99
143
|
private
|
|
100
144
|
|
|
145
|
+
# Check if two lines differ only in formatting (whitespace)
|
|
146
|
+
# @param line1 [String] First line
|
|
147
|
+
# @param line2 [String] Second line
|
|
148
|
+
# @return [Boolean] true if formatting-only difference
|
|
149
|
+
def formatting_only_line?(line1, line2)
|
|
150
|
+
require_relative "formatting_detector"
|
|
151
|
+
FormattingDetector.formatting_only?(line1, line2)
|
|
152
|
+
end
|
|
153
|
+
|
|
101
154
|
# Find the DiffNode associated with a line
|
|
102
155
|
# Uses element name matching for precise line-level linking
|
|
103
156
|
def find_diff_node_for_line(line_num, lines, change_type)
|
|
@@ -125,9 +178,15 @@ module Canon
|
|
|
125
178
|
end
|
|
126
179
|
|
|
127
180
|
nodes_to_check.any? do |node|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
181
|
+
# Check if the node itself has the matching name
|
|
182
|
+
if node.respond_to?(:name) && node.name == line_element_name
|
|
183
|
+
true
|
|
184
|
+
# Check if the node's parent has the matching name (for TextNode diffs)
|
|
185
|
+
elsif node.respond_to?(:parent) && node.parent.respond_to?(:name) && node.parent.name == line_element_name
|
|
186
|
+
true
|
|
187
|
+
else
|
|
188
|
+
false
|
|
189
|
+
end
|
|
131
190
|
end
|
|
132
191
|
end
|
|
133
192
|
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Diff
|
|
5
|
+
# Detects if differences between lines are formatting-only
|
|
6
|
+
# (whitespace, line breaks) with no semantic content changes
|
|
7
|
+
class FormattingDetector
|
|
8
|
+
# Detect if two lines differ only in formatting
|
|
9
|
+
#
|
|
10
|
+
# @param line1 [String, nil] First line to compare
|
|
11
|
+
# @param line2 [String, nil] Second line to compare
|
|
12
|
+
# @return [Boolean] true if lines differ only in formatting
|
|
13
|
+
def self.formatting_only?(line1, line2)
|
|
14
|
+
# If both are nil or empty, not a formatting diff
|
|
15
|
+
return false if blank?(line1) && blank?(line2)
|
|
16
|
+
|
|
17
|
+
# If only one is blank, it's not just formatting
|
|
18
|
+
return false if blank?(line1) || blank?(line2)
|
|
19
|
+
|
|
20
|
+
# Compare normalized versions
|
|
21
|
+
normalize_for_comparison(line1) == normalize_for_comparison(line2)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Aggressive normalization for formatting comparison
|
|
25
|
+
# Collapses all whitespace to single space and strips
|
|
26
|
+
# Also normalizes whitespace around tag delimiters
|
|
27
|
+
#
|
|
28
|
+
# @param line [String, nil] Line to normalize
|
|
29
|
+
# @return [String] Normalized line
|
|
30
|
+
def self.normalize_for_comparison(line)
|
|
31
|
+
return "" if line.nil?
|
|
32
|
+
|
|
33
|
+
# Collapse all whitespace (spaces, tabs, newlines) to single space
|
|
34
|
+
normalized = line.gsub(/\s+/, " ").strip
|
|
35
|
+
|
|
36
|
+
# Normalize whitespace around tag delimiters
|
|
37
|
+
# Remove spaces before > and after <
|
|
38
|
+
normalized = normalized.gsub(/\s+>/, ">") # "div >" -> "div>"
|
|
39
|
+
normalized.gsub(/<\s+/, "<") # "< div" -> "<div"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Check if a line is blank (nil or whitespace-only)
|
|
43
|
+
#
|
|
44
|
+
# @param line [String, nil] Line to check
|
|
45
|
+
# @return [Boolean] true if blank
|
|
46
|
+
def self.blank?(line)
|
|
47
|
+
line.nil? || line.strip.empty?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private_class_method :normalize_for_comparison, :blank?
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -68,6 +68,35 @@ module Canon
|
|
|
68
68
|
|
|
69
69
|
protected
|
|
70
70
|
|
|
71
|
+
# Filter differences for display based on show_diffs setting
|
|
72
|
+
#
|
|
73
|
+
# @param differences [Array<Canon::Diff::DiffNode>] Array of differences
|
|
74
|
+
# @return [Array<Canon::Diff::DiffNode>] Filtered differences
|
|
75
|
+
def filter_differences_for_display(differences)
|
|
76
|
+
return differences if @show_diffs.nil? || @show_diffs == :all
|
|
77
|
+
|
|
78
|
+
differences.select do |diff|
|
|
79
|
+
# Handle both DiffNode objects and legacy Hash format
|
|
80
|
+
is_normative = if diff.respond_to?(:normative?)
|
|
81
|
+
diff.normative?
|
|
82
|
+
elsif diff.is_a?(Hash) && diff.key?(:normative)
|
|
83
|
+
diff[:normative]
|
|
84
|
+
else
|
|
85
|
+
# Default to normative if unknown
|
|
86
|
+
true
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
case @show_diffs
|
|
90
|
+
when :normative
|
|
91
|
+
is_normative
|
|
92
|
+
when :informative
|
|
93
|
+
!is_normative
|
|
94
|
+
else
|
|
95
|
+
true # Unknown value, show all
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
71
100
|
# Build hunks from diff with context lines
|
|
72
101
|
#
|
|
73
102
|
# @param diffs [Array] LCS diff array
|
|
@@ -252,6 +281,7 @@ module Canon
|
|
|
252
281
|
# @return [String] Formatted context
|
|
253
282
|
def format_context(context, diffs, base_line1, base_line2)
|
|
254
283
|
output = []
|
|
284
|
+
max_lines = get_max_diff_lines
|
|
255
285
|
|
|
256
286
|
(context.start_idx..context.end_idx).each do |idx|
|
|
257
287
|
change = diffs[idx]
|
|
@@ -275,6 +305,18 @@ module Canon
|
|
|
275
305
|
change.old_element,
|
|
276
306
|
change.new_element)
|
|
277
307
|
end
|
|
308
|
+
|
|
309
|
+
# Check if we've exceeded the line limit
|
|
310
|
+
if max_lines&.positive? && output.size >= max_lines
|
|
311
|
+
output << ""
|
|
312
|
+
output << colorize(
|
|
313
|
+
"... Output truncated at #{max_lines} lines ...", :yellow, :bold
|
|
314
|
+
)
|
|
315
|
+
output << colorize(
|
|
316
|
+
"Increase limit via CANON_MAX_DIFF_LINES or config.diff.max_diff_lines", :yellow
|
|
317
|
+
)
|
|
318
|
+
break
|
|
319
|
+
end
|
|
278
320
|
end
|
|
279
321
|
|
|
280
322
|
output.join("\n")
|
|
@@ -299,19 +341,22 @@ module Canon
|
|
|
299
341
|
#
|
|
300
342
|
# @param old_num [Integer, nil] Line number in old file
|
|
301
343
|
# @param new_num [Integer, nil] Line number in new file
|
|
302
|
-
# @param marker [String] Diff marker (' ', '-', '+', '
|
|
344
|
+
# @param marker [String] Diff marker (' ', '-', '+', '<', '>', '[', ']')
|
|
303
345
|
# @param content [String] Line content
|
|
304
346
|
# @param color [Symbol, nil] Color for diff lines
|
|
305
347
|
# @param informative [Boolean] Whether this is an informative diff
|
|
348
|
+
# @param formatting [Boolean] Whether this is a formatting-only diff
|
|
306
349
|
# @return [String] Formatted line
|
|
307
350
|
def format_unified_line(old_num, new_num, marker, content, color = nil,
|
|
308
|
-
informative: false)
|
|
351
|
+
informative: false, formatting: false)
|
|
309
352
|
old_str = old_num ? "%4d" % old_num : " "
|
|
310
353
|
new_str = new_num ? "%4d" % new_num : " "
|
|
311
|
-
marker_part = "#{marker} "
|
|
312
354
|
|
|
313
|
-
#
|
|
314
|
-
|
|
355
|
+
# Formatting and informative diffs use directional colors already passed in
|
|
356
|
+
# No need to override since callers set the correct color
|
|
357
|
+
effective_color = color
|
|
358
|
+
|
|
359
|
+
marker_part = "#{marker} "
|
|
315
360
|
|
|
316
361
|
visualized_content = if effective_color
|
|
317
362
|
apply_visualization(content, effective_color)
|
|
@@ -401,6 +446,16 @@ module Canon
|
|
|
401
446
|
visual
|
|
402
447
|
end
|
|
403
448
|
end
|
|
449
|
+
|
|
450
|
+
# Get max diff lines limit
|
|
451
|
+
#
|
|
452
|
+
# @return [Integer, nil] Max diff output lines
|
|
453
|
+
def get_max_diff_lines
|
|
454
|
+
# Try to get from config if available
|
|
455
|
+
config = Canon::Config.instance
|
|
456
|
+
# Default to 10,000 if config not available
|
|
457
|
+
config&.xml&.diff&.max_diff_lines || 10_000
|
|
458
|
+
end
|
|
404
459
|
end
|
|
405
460
|
end
|
|
406
461
|
end
|