canon 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +14 -71
- data/Rakefile +17 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +18 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +146 -80
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +61 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +23 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +5 -2
|
@@ -32,6 +32,19 @@ module Canon
|
|
|
32
32
|
@cache = nil
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
+
# Deep merge two hashes. Arrays are replaced (not concatenated).
|
|
36
|
+
def deep_merge(base, overlay)
|
|
37
|
+
result = base.dup
|
|
38
|
+
overlay.each do |key, value|
|
|
39
|
+
result[key] = if result[key].is_a?(Hash) && value.is_a?(Hash)
|
|
40
|
+
deep_merge(result[key], value)
|
|
41
|
+
else
|
|
42
|
+
value
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
result
|
|
46
|
+
end
|
|
47
|
+
|
|
35
48
|
private
|
|
36
49
|
|
|
37
50
|
def cache
|
|
@@ -116,19 +129,6 @@ module Canon
|
|
|
116
129
|
content = File.read(path)
|
|
117
130
|
YAML.safe_load(content, permitted_classes: [Symbol]) || {}
|
|
118
131
|
end
|
|
119
|
-
|
|
120
|
-
# Deep merge two hashes. Arrays are replaced (not concatenated).
|
|
121
|
-
def deep_merge(base, overlay)
|
|
122
|
-
result = base.dup
|
|
123
|
-
overlay.each do |key, value|
|
|
124
|
-
result[key] = if result[key].is_a?(Hash) && value.is_a?(Hash)
|
|
125
|
-
deep_merge(result[key], value)
|
|
126
|
-
else
|
|
127
|
-
value
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
result
|
|
131
|
-
end
|
|
132
132
|
end
|
|
133
133
|
end
|
|
134
134
|
end
|
data/lib/canon/config.rb
CHANGED
|
@@ -25,15 +25,19 @@ module Canon
|
|
|
25
25
|
|
|
26
26
|
# Delegate to instance
|
|
27
27
|
def method_missing(method, ...)
|
|
28
|
-
if
|
|
29
|
-
|
|
28
|
+
if %i[xml html json yaml string profile profile= diff_mode diff_mode=
|
|
29
|
+
use_color use_color= xml_match_profile xml_match_profile=
|
|
30
|
+
html_match_profile html_match_profile= reset!].include?(method)
|
|
31
|
+
@instance.public_send(method, ...)
|
|
30
32
|
else
|
|
31
33
|
super
|
|
32
34
|
end
|
|
33
35
|
end
|
|
34
36
|
|
|
35
37
|
def respond_to_missing?(method, include_private = false)
|
|
36
|
-
|
|
38
|
+
%i[xml html json yaml string profile profile= diff_mode diff_mode=
|
|
39
|
+
use_color use_color= xml_match_profile xml_match_profile=
|
|
40
|
+
html_match_profile html_match_profile= reset!].include?(method) || super
|
|
37
41
|
end
|
|
38
42
|
end
|
|
39
43
|
|
|
@@ -700,6 +704,24 @@ module Canon
|
|
|
700
704
|
@resolver.set_programmatic(:theme, value)
|
|
701
705
|
end
|
|
702
706
|
|
|
707
|
+
# Theme inheritance (custom theme with base + overrides)
|
|
708
|
+
def theme_inheritance
|
|
709
|
+
@resolver.resolve(:theme_inheritance)
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
def theme_inheritance=(value)
|
|
713
|
+
@resolver.set_programmatic(:theme_inheritance, value)
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
# Full custom theme hash
|
|
717
|
+
def custom_theme
|
|
718
|
+
@resolver.resolve(:custom_theme)
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
def custom_theme=(value)
|
|
722
|
+
@resolver.set_programmatic(:custom_theme, value)
|
|
723
|
+
end
|
|
724
|
+
|
|
703
725
|
# File size limit in bytes (default 5MB)
|
|
704
726
|
def max_file_size
|
|
705
727
|
@resolver.resolve(:max_file_size)
|
|
@@ -806,6 +828,8 @@ module Canon
|
|
|
806
828
|
max_node_count: 10_000, # Maximum nodes in tree
|
|
807
829
|
max_diff_lines: 10_000, # Maximum diff output lines
|
|
808
830
|
theme: :dark, # Default theme
|
|
831
|
+
theme_inheritance: nil, # Custom theme with base + overrides
|
|
832
|
+
custom_theme: nil, # Full custom theme hash
|
|
809
833
|
}
|
|
810
834
|
|
|
811
835
|
env = format ? EnvProvider.load_diff_for_format(format) : {}
|
|
@@ -846,8 +870,8 @@ module Canon
|
|
|
846
870
|
formats = data["formats"] || {}
|
|
847
871
|
|
|
848
872
|
format_configs.each do |fmt_key, fmt_cfg|
|
|
849
|
-
fmt_data = ProfileLoader.
|
|
850
|
-
|
|
873
|
+
fmt_data = ProfileLoader.deep_merge(shared,
|
|
874
|
+
formats[fmt_key.to_s] || {})
|
|
851
875
|
fmt_cfg.apply_profile_data(fmt_data)
|
|
852
876
|
end
|
|
853
877
|
end
|
|
@@ -22,7 +22,7 @@ module Canon
|
|
|
22
22
|
@match_options = match_options
|
|
23
23
|
# Use the compare_profile from ResolvedMatchOptions if available (e.g., HtmlCompareProfile)
|
|
24
24
|
# Otherwise create a base CompareProfile
|
|
25
|
-
@profile = if match_options.
|
|
25
|
+
@profile = if match_options.is_a?(Canon::Comparison::ResolvedMatchOptions) && match_options.compare_profile
|
|
26
26
|
match_options.compare_profile
|
|
27
27
|
else
|
|
28
28
|
Canon::Comparison::CompareProfile.new(match_options)
|
|
@@ -65,7 +65,7 @@ module Canon
|
|
|
65
65
|
# (since the dimension affects equivalence), which would prevent formatting
|
|
66
66
|
# detection from being applied.
|
|
67
67
|
if diff_node.dimension == :text_content &&
|
|
68
|
-
profile.
|
|
68
|
+
profile.behavior_for(:text_content) == :normalize &&
|
|
69
69
|
!inside_preserve_element?(diff_node) &&
|
|
70
70
|
formatting_only_diff?(diff_node)
|
|
71
71
|
diff_node.formatting = true
|
|
@@ -184,16 +184,12 @@ module Canon
|
|
|
184
184
|
end
|
|
185
185
|
|
|
186
186
|
# HTML: non-breaking space (U+00A0) is never insignificant
|
|
187
|
-
text =
|
|
188
|
-
node.content
|
|
189
|
-
elsif node.respond_to?(:value)
|
|
190
|
-
node.value
|
|
191
|
-
end
|
|
187
|
+
text = Canon::Comparison::NodeInspector.text_content(node)
|
|
192
188
|
if text && Canon::Comparison::WhitespaceSensitivity.contains_nbsp?(text)
|
|
193
189
|
return true
|
|
194
190
|
end
|
|
195
191
|
|
|
196
|
-
return false unless node.
|
|
192
|
+
return false unless Canon::XmlParsing.element?(node) || node.is_a?(Canon::Xml::Node)
|
|
197
193
|
|
|
198
194
|
parent = node.parent
|
|
199
195
|
return false unless parent
|
|
@@ -223,49 +219,19 @@ module Canon
|
|
|
223
219
|
end
|
|
224
220
|
|
|
225
221
|
# Extract text content from a node for formatting comparison
|
|
226
|
-
# @param node [Object] The node to extract text from
|
|
227
|
-
# @return [String, nil] The text content or nil
|
|
228
222
|
def extract_text_content(node)
|
|
229
223
|
return nil if node.nil?
|
|
230
224
|
|
|
231
|
-
|
|
232
|
-
when Canon::Xml::Nodes::TextNode
|
|
233
|
-
node.value
|
|
234
|
-
when Canon::Xml::Node
|
|
235
|
-
node.text_content
|
|
236
|
-
when Nokogiri::XML::Node
|
|
237
|
-
node.content.to_s
|
|
238
|
-
when Moxml::Node
|
|
239
|
-
node.content.to_s
|
|
240
|
-
when String
|
|
241
|
-
node
|
|
242
|
-
else
|
|
243
|
-
node.to_s
|
|
244
|
-
end
|
|
225
|
+
Canon::Comparison::NodeInspector.text_content(node)
|
|
245
226
|
rescue StandardError
|
|
246
227
|
nil
|
|
247
228
|
end
|
|
248
229
|
|
|
249
|
-
# Check if a node is a text node
|
|
250
|
-
# @param node [Object] The node to check
|
|
251
|
-
# @return [Boolean] true if the node is a text node
|
|
252
230
|
def text_node?(node)
|
|
253
231
|
return false if node.nil?
|
|
232
|
+
return true if node.is_a?(String)
|
|
254
233
|
|
|
255
|
-
|
|
256
|
-
when Canon::Xml::Nodes::TextNode
|
|
257
|
-
true
|
|
258
|
-
when Canon::Xml::Node
|
|
259
|
-
node.node_type == :text
|
|
260
|
-
when Nokogiri::XML::Node
|
|
261
|
-
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
262
|
-
when Moxml::Node
|
|
263
|
-
node.text?
|
|
264
|
-
when String
|
|
265
|
-
true
|
|
266
|
-
else
|
|
267
|
-
false
|
|
268
|
-
end
|
|
234
|
+
Canon::Comparison::NodeInspector.text_node?(node)
|
|
269
235
|
end
|
|
270
236
|
end
|
|
271
237
|
end
|
data/lib/canon/diff/diff_line.rb
CHANGED
|
@@ -1047,8 +1047,8 @@ module Canon
|
|
|
1047
1047
|
end
|
|
1048
1048
|
|
|
1049
1049
|
# search_start now points inside the innermost element
|
|
1050
|
-
line_idx = SourceLocator.
|
|
1051
|
-
|
|
1050
|
+
line_idx = SourceLocator.find_line_for_offset(search_start,
|
|
1051
|
+
line_map)
|
|
1052
1052
|
return nil unless line_idx
|
|
1053
1053
|
|
|
1054
1054
|
col = search_start - line_map[line_idx][:start_offset]
|
|
@@ -1133,8 +1133,8 @@ range_start, range_end)
|
|
|
1133
1133
|
# Walk up ancestors to find one with an "id" attribute
|
|
1134
1134
|
ancestors = []
|
|
1135
1135
|
current = node
|
|
1136
|
-
while current.
|
|
1137
|
-
ancestors << current
|
|
1136
|
+
while current.is_a?(Canon::Xml::Node)
|
|
1137
|
+
ancestors << current
|
|
1138
1138
|
current = current.parent
|
|
1139
1139
|
end
|
|
1140
1140
|
|
|
@@ -1143,14 +1143,14 @@ range_start, range_end)
|
|
|
1143
1143
|
anchor_name = nil
|
|
1144
1144
|
anchor_id = nil
|
|
1145
1145
|
ancestors.each do |anc|
|
|
1146
|
-
next unless anc.
|
|
1146
|
+
next unless anc.attribute_nodes
|
|
1147
1147
|
|
|
1148
1148
|
anc.attribute_nodes.each do |attr|
|
|
1149
|
-
next unless attr.
|
|
1149
|
+
next unless attr.name == "id"
|
|
1150
1150
|
|
|
1151
1151
|
anchor = anc
|
|
1152
1152
|
anchor_name = anc.name
|
|
1153
|
-
anchor_id = attr.
|
|
1153
|
+
anchor_id = attr.value
|
|
1154
1154
|
break
|
|
1155
1155
|
end
|
|
1156
1156
|
break if anchor
|
|
@@ -1219,8 +1219,8 @@ range_start, range_end)
|
|
|
1219
1219
|
# Search for value inside leaf element
|
|
1220
1220
|
value_pos = text.index(value, leaf_tag_end + 1)
|
|
1221
1221
|
if value_pos && value_pos < leaf_close
|
|
1222
|
-
line_idx = SourceLocator.
|
|
1223
|
-
|
|
1222
|
+
line_idx = SourceLocator.find_line_for_offset(value_pos,
|
|
1223
|
+
line_map)
|
|
1224
1224
|
return nil unless line_idx
|
|
1225
1225
|
|
|
1226
1226
|
col = value_pos - line_map[line_idx][:start_offset]
|
|
@@ -1234,8 +1234,8 @@ range_start, range_end)
|
|
|
1234
1234
|
# Direct search: value might be directly in the anchor's content
|
|
1235
1235
|
value_pos = text.index(value, anchor_tag_end + 1)
|
|
1236
1236
|
if value_pos && value_pos < anchor_close
|
|
1237
|
-
line_idx = SourceLocator.
|
|
1238
|
-
|
|
1237
|
+
line_idx = SourceLocator.find_line_for_offset(value_pos,
|
|
1238
|
+
line_map)
|
|
1239
1239
|
return nil unless line_idx
|
|
1240
1240
|
|
|
1241
1241
|
col = value_pos - line_map[line_idx][:start_offset]
|
|
@@ -1255,10 +1255,10 @@ range_start, range_end)
|
|
|
1255
1255
|
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1256
1256
|
# @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
|
|
1257
1257
|
def locate_textnode_parent(textnode, value, text, line_map)
|
|
1258
|
-
return nil unless textnode.
|
|
1258
|
+
return nil unless textnode.is_a?(Canon::Xml::Node) && textnode.parent
|
|
1259
1259
|
|
|
1260
1260
|
parent = textnode.parent
|
|
1261
|
-
return nil unless parent.
|
|
1261
|
+
return nil unless parent.name
|
|
1262
1262
|
|
|
1263
1263
|
parent_name = parent.name
|
|
1264
1264
|
parent_attrs = element_attribute_signature(parent)
|
|
@@ -1286,8 +1286,8 @@ range_start, range_end)
|
|
|
1286
1286
|
# Search for value within this element
|
|
1287
1287
|
value_pos = text.index(value, anchor_tag_end + 1)
|
|
1288
1288
|
if value_pos && value_pos < anchor_close
|
|
1289
|
-
line_idx = SourceLocator.
|
|
1290
|
-
|
|
1289
|
+
line_idx = SourceLocator.find_line_for_offset(value_pos,
|
|
1290
|
+
line_map)
|
|
1291
1291
|
return nil unless line_idx
|
|
1292
1292
|
|
|
1293
1293
|
col = value_pos - line_map[line_idx][:start_offset]
|
|
@@ -1310,10 +1310,10 @@ range_start, range_end)
|
|
|
1310
1310
|
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1311
1311
|
# @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
|
|
1312
1312
|
def locate_element_in_text2(textnode, text, line_map)
|
|
1313
|
-
return nil unless textnode.
|
|
1313
|
+
return nil unless textnode.is_a?(Canon::Xml::Node) && textnode.parent
|
|
1314
1314
|
|
|
1315
1315
|
parent = textnode.parent
|
|
1316
|
-
return nil unless parent.
|
|
1316
|
+
return nil unless parent.name
|
|
1317
1317
|
|
|
1318
1318
|
parent_name = parent.name
|
|
1319
1319
|
parent_attrs = element_attribute_signature(parent)
|
|
@@ -1340,8 +1340,8 @@ range_start, range_end)
|
|
|
1340
1340
|
|
|
1341
1341
|
if is_self_closing
|
|
1342
1342
|
# Self-closing element - return position of <
|
|
1343
|
-
line_idx = SourceLocator.
|
|
1344
|
-
|
|
1343
|
+
line_idx = SourceLocator.find_line_for_offset(anchor_pos,
|
|
1344
|
+
line_map)
|
|
1345
1345
|
return nil unless line_idx
|
|
1346
1346
|
|
|
1347
1347
|
col = anchor_pos - line_map[line_idx][:start_offset]
|
|
@@ -1349,8 +1349,8 @@ range_start, range_end)
|
|
|
1349
1349
|
col: col }
|
|
1350
1350
|
else
|
|
1351
1351
|
# Regular element - return position of >
|
|
1352
|
-
line_idx = SourceLocator.
|
|
1353
|
-
|
|
1352
|
+
line_idx = SourceLocator.find_line_for_offset(tag_end_pos,
|
|
1353
|
+
line_map)
|
|
1354
1354
|
return nil unless line_idx
|
|
1355
1355
|
|
|
1356
1356
|
col = tag_end_pos - line_map[line_idx][:start_offset]
|
|
@@ -1368,10 +1368,8 @@ range_start, range_end)
|
|
|
1368
1368
|
# Build a string representation of an element's attributes for matching.
|
|
1369
1369
|
def element_attribute_signature(element)
|
|
1370
1370
|
sig = {}
|
|
1371
|
-
if element.
|
|
1371
|
+
if element.is_a?(Canon::Xml::Node) && element.attribute_nodes
|
|
1372
1372
|
element.attribute_nodes.each do |attr|
|
|
1373
|
-
next unless attr.respond_to?(:name) && attr.respond_to?(:value)
|
|
1374
|
-
|
|
1375
1373
|
sig[attr.name] = attr.value
|
|
1376
1374
|
end
|
|
1377
1375
|
end
|
|
@@ -51,16 +51,16 @@ module Canon
|
|
|
51
51
|
return node.children.map { |child| serialize(child) }.join
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
-
# Handle Nokogiri nodes
|
|
55
|
-
if
|
|
56
|
-
return node
|
|
54
|
+
# Handle Nokogiri/moxml nodes
|
|
55
|
+
if Canon::XmlParsing.xml_node?(node)
|
|
56
|
+
return Canon::XmlParsing.serialize(node)
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
# Handle tree diff nodes and other objects with serialization
|
|
60
|
+
if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
61
|
+
return serialize_treenode(node)
|
|
61
62
|
end
|
|
62
63
|
|
|
63
|
-
# Fallback to string
|
|
64
64
|
node.to_s
|
|
65
65
|
end
|
|
66
66
|
|
|
@@ -105,23 +105,20 @@ module Canon
|
|
|
105
105
|
return attrs
|
|
106
106
|
end
|
|
107
107
|
|
|
108
|
-
# Handle Nokogiri elements
|
|
109
|
-
if
|
|
108
|
+
# Handle Nokogiri/moxml elements via XmlParsing
|
|
109
|
+
if Canon::XmlParsing.element?(node)
|
|
110
110
|
attrs = {}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
value = if attr.respond_to?(:value)
|
|
114
|
-
attr.value
|
|
115
|
-
elsif attr.is_a?(String)
|
|
116
|
-
attr
|
|
117
|
-
else
|
|
118
|
-
attr.to_s
|
|
119
|
-
end
|
|
120
|
-
attrs[name] = value
|
|
111
|
+
Canon::XmlParsing.attributes(node).each do |attr|
|
|
112
|
+
attrs[attr.name] = attr.value
|
|
121
113
|
end
|
|
122
114
|
return attrs
|
|
123
115
|
end
|
|
124
116
|
|
|
117
|
+
# Handle other elements with attributes method
|
|
118
|
+
if node.is_a?(Canon::Xml::Node)
|
|
119
|
+
return {}
|
|
120
|
+
end
|
|
121
|
+
|
|
125
122
|
# Handle TreeNode attributes (already a hash)
|
|
126
123
|
if node.is_a?(Hash)
|
|
127
124
|
return node
|
|
@@ -143,10 +140,9 @@ module Canon
|
|
|
143
140
|
return node.name
|
|
144
141
|
end
|
|
145
142
|
|
|
146
|
-
# Handle Nokogiri elements
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
end
|
|
143
|
+
# Handle Nokogiri/moxml elements
|
|
144
|
+
name = Canon::XmlParsing.name(node)
|
|
145
|
+
return name.to_s if name
|
|
150
146
|
|
|
151
147
|
""
|
|
152
148
|
end
|
|
@@ -164,16 +160,13 @@ module Canon
|
|
|
164
160
|
return node.value.to_s
|
|
165
161
|
end
|
|
166
162
|
|
|
167
|
-
# Handle
|
|
168
|
-
if node.
|
|
169
|
-
return node.
|
|
163
|
+
# Handle Canon::Xml::Node
|
|
164
|
+
if node.is_a?(Canon::Xml::Node)
|
|
165
|
+
return node.text_content.to_s
|
|
170
166
|
end
|
|
171
167
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
""
|
|
168
|
+
# Handle Nokogiri/moxml nodes
|
|
169
|
+
Canon::XmlParsing.text_content(node).to_s
|
|
177
170
|
end
|
|
178
171
|
|
|
179
172
|
# Serialize attributes to string format
|
|
@@ -54,10 +54,10 @@ module Canon
|
|
|
54
54
|
while current && depth < max_depth
|
|
55
55
|
segments.unshift(segment_for_node(current))
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
break unless
|
|
57
|
+
parent = node_parent(current)
|
|
58
|
+
break unless parent
|
|
59
59
|
|
|
60
|
-
current =
|
|
60
|
+
current = parent
|
|
61
61
|
depth += 1
|
|
62
62
|
end
|
|
63
63
|
|
|
@@ -71,27 +71,16 @@ module Canon
|
|
|
71
71
|
# @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
|
|
72
72
|
# @return [String] Path segment with ordinal index
|
|
73
73
|
def self.segment_for_node(tree_node)
|
|
74
|
-
|
|
75
|
-
label = if tree_node.respond_to?(:label)
|
|
76
|
-
tree_node.label
|
|
77
|
-
elsif tree_node.respond_to?(:name)
|
|
78
|
-
tree_node.name
|
|
79
|
-
else
|
|
80
|
-
"unknown"
|
|
81
|
-
end
|
|
74
|
+
label = node_label(tree_node)
|
|
82
75
|
|
|
83
76
|
# Get ordinal index (position among siblings with same label)
|
|
84
77
|
index = ordinal_index(tree_node)
|
|
85
78
|
|
|
86
79
|
# For text nodes, use parent element name for clarity
|
|
87
80
|
# e.g., instead of "/p/#text[0]" use "/p/text()[0]"
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
parent_name =
|
|
91
|
-
tree_node.parent.label
|
|
92
|
-
elsif tree_node.parent.respond_to?(:name)
|
|
93
|
-
tree_node.parent.name
|
|
94
|
-
end
|
|
81
|
+
parent = node_parent(tree_node)
|
|
82
|
+
if ["text", "#text"].include?(label) && parent
|
|
83
|
+
parent_name = node_label(parent)
|
|
95
84
|
if parent_name && parent_name != "#document" && parent_name != "#document-fragment"
|
|
96
85
|
return "#{parent_name}/text()[#{index}]"
|
|
97
86
|
end
|
|
@@ -106,35 +95,21 @@ module Canon
|
|
|
106
95
|
# @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
|
|
107
96
|
# @return [Integer] Zero-based ordinal index
|
|
108
97
|
def self.ordinal_index(tree_node)
|
|
109
|
-
|
|
110
|
-
return 0 unless
|
|
111
|
-
return 0 unless tree_node.parent
|
|
98
|
+
parent = node_parent(tree_node)
|
|
99
|
+
return 0 unless parent
|
|
112
100
|
|
|
113
|
-
|
|
114
|
-
return 0 unless tree_node.parent.respond_to?(:children)
|
|
115
|
-
|
|
116
|
-
siblings = tree_node.parent.children
|
|
101
|
+
siblings = node_children(parent)
|
|
117
102
|
return 0 unless siblings
|
|
118
103
|
|
|
119
104
|
# Convert to array if it's a NodeSet (Nokogiri) or similar
|
|
120
105
|
siblings = siblings.to_a unless siblings.is_a?(Array)
|
|
121
106
|
|
|
122
|
-
|
|
123
|
-
my_label = if tree_node.respond_to?(:label)
|
|
124
|
-
tree_node.label
|
|
125
|
-
elsif tree_node.respond_to?(:name)
|
|
126
|
-
tree_node.name
|
|
127
|
-
end
|
|
128
|
-
|
|
107
|
+
my_label = node_label(tree_node)
|
|
129
108
|
return 0 unless my_label
|
|
130
109
|
|
|
131
110
|
# Count siblings with same label that appear before this node
|
|
132
111
|
same_label_siblings = siblings.select do |s|
|
|
133
|
-
sibling_label =
|
|
134
|
-
s.label
|
|
135
|
-
elsif s.respond_to?(:name)
|
|
136
|
-
s.name
|
|
137
|
-
end
|
|
112
|
+
sibling_label = node_label(s)
|
|
138
113
|
sibling_label == my_label
|
|
139
114
|
end
|
|
140
115
|
|
|
@@ -152,6 +127,18 @@ module Canon
|
|
|
152
127
|
segments = build_segments(tree_node)
|
|
153
128
|
segments.join(" → ")
|
|
154
129
|
end
|
|
130
|
+
|
|
131
|
+
def self.node_label(node)
|
|
132
|
+
Canon::Comparison::NodeInspector.name(node) || "unknown"
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def self.node_parent(node)
|
|
136
|
+
Canon::Comparison::NodeInspector.parent(node)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def self.node_children(node)
|
|
140
|
+
Canon::Comparison::NodeInspector.children(node)
|
|
141
|
+
end
|
|
155
142
|
end
|
|
156
143
|
end
|
|
157
144
|
end
|
|
@@ -86,15 +86,12 @@ module Canon
|
|
|
86
86
|
end
|
|
87
87
|
|
|
88
88
|
class << self
|
|
89
|
-
private
|
|
90
|
-
|
|
91
89
|
# Binary search for the line containing a character offset.
|
|
92
90
|
#
|
|
93
91
|
# @param char_offset [Integer] the character offset
|
|
94
92
|
# @param line_map [Array<Hash>] the line offset map
|
|
95
93
|
# @return [Integer, nil] the 0-based line index, or nil
|
|
96
94
|
def find_line_for_offset(char_offset, line_map)
|
|
97
|
-
# Use bsearch for efficiency on large files
|
|
98
95
|
line_map.bsearch_index do |entry|
|
|
99
96
|
entry[:end_offset] > char_offset
|
|
100
97
|
end
|