canon 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +14 -71
  4. data/Rakefile +17 -0
  5. data/lib/canon/cli.rb +1 -1
  6. data/lib/canon/color_detector.rb +3 -5
  7. data/lib/canon/comparison/compare_profile.rb +1 -4
  8. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  9. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  15. data/lib/canon/comparison/format_detector.rb +29 -20
  16. data/lib/canon/comparison/html_comparator.rb +18 -29
  17. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  18. data/lib/canon/comparison/html_parser.rb +1 -1
  19. data/lib/canon/comparison/json_comparator.rb +8 -0
  20. data/lib/canon/comparison/node_inspector.rb +146 -80
  21. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  22. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  23. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  24. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  25. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
  26. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  28. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  29. data/lib/canon/comparison/xml_comparator.rb +61 -83
  30. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  31. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  32. data/lib/canon/comparison.rb +23 -23
  33. data/lib/canon/config/profile_loader.rb +13 -13
  34. data/lib/canon/config.rb +29 -5
  35. data/lib/canon/diff/diff_classifier.rb +7 -41
  36. data/lib/canon/diff/diff_line.rb +1 -1
  37. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  38. data/lib/canon/diff/node_serializer.rb +23 -30
  39. data/lib/canon/diff/path_builder.rb +24 -37
  40. data/lib/canon/diff/source_locator.rb +0 -3
  41. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  42. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  43. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  44. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  45. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  46. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  49. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  50. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  52. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  53. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  54. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  55. data/lib/canon/diff_formatter/legend.rb +2 -2
  56. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  57. data/lib/canon/diff_formatter/theme.rb +4 -4
  58. data/lib/canon/diff_formatter.rb +2 -2
  59. data/lib/canon/formatters/html_formatter.rb +1 -1
  60. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  61. data/lib/canon/formatters/xml_formatter.rb +7 -32
  62. data/lib/canon/html/data_model.rb +1 -1
  63. data/lib/canon/pretty_printer/html.rb +1 -1
  64. data/lib/canon/pretty_printer/xml.rb +16 -7
  65. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  66. data/lib/canon/rspec_matchers.rb +2 -2
  67. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  68. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  69. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  70. data/lib/canon/validators/html_validator.rb +1 -1
  71. data/lib/canon/validators/xml_validator.rb +1 -1
  72. data/lib/canon/version.rb +1 -1
  73. data/lib/canon/xml/data_model.rb +131 -137
  74. data/lib/canon/xml/namespace_helper.rb +5 -0
  75. data/lib/canon/xml/node.rb +2 -1
  76. data/lib/canon/xml/nodes/root_node.rb +4 -0
  77. data/lib/canon/xml/nodes/text_node.rb +6 -1
  78. data/lib/canon/xml/sax_builder.rb +4 -6
  79. data/lib/canon/xml_backend.rb +49 -0
  80. data/lib/canon/xml_parsing.rb +271 -0
  81. data/lib/canon.rb +3 -1
  82. data/lib/tasks/benchmark_runner.rb +1 -1
  83. data/lib/tasks/performance_helpers.rb +1 -1
  84. metadata +5 -2
@@ -32,6 +32,19 @@ module Canon
32
32
  @cache = nil
33
33
  end
34
34
 
35
+ # Deep merge two hashes. Arrays are replaced (not concatenated).
36
+ def deep_merge(base, overlay)
37
+ result = base.dup
38
+ overlay.each do |key, value|
39
+ result[key] = if result[key].is_a?(Hash) && value.is_a?(Hash)
40
+ deep_merge(result[key], value)
41
+ else
42
+ value
43
+ end
44
+ end
45
+ result
46
+ end
47
+
35
48
  private
36
49
 
37
50
  def cache
@@ -116,19 +129,6 @@ module Canon
116
129
  content = File.read(path)
117
130
  YAML.safe_load(content, permitted_classes: [Symbol]) || {}
118
131
  end
119
-
120
- # Deep merge two hashes. Arrays are replaced (not concatenated).
121
- def deep_merge(base, overlay)
122
- result = base.dup
123
- overlay.each do |key, value|
124
- result[key] = if result[key].is_a?(Hash) && value.is_a?(Hash)
125
- deep_merge(result[key], value)
126
- else
127
- value
128
- end
129
- end
130
- result
131
- end
132
132
  end
133
133
  end
134
134
  end
data/lib/canon/config.rb CHANGED
@@ -25,15 +25,19 @@ module Canon
25
25
 
26
26
  # Delegate to instance
27
27
  def method_missing(method, ...)
28
- if @instance.respond_to?(method)
29
- @instance.send(method, ...)
28
+ if %i[xml html json yaml string profile profile= diff_mode diff_mode=
29
+ use_color use_color= xml_match_profile xml_match_profile=
30
+ html_match_profile html_match_profile= reset!].include?(method)
31
+ @instance.public_send(method, ...)
30
32
  else
31
33
  super
32
34
  end
33
35
  end
34
36
 
35
37
  def respond_to_missing?(method, include_private = false)
36
- @instance.respond_to?(method) || super
38
+ %i[xml html json yaml string profile profile= diff_mode diff_mode=
39
+ use_color use_color= xml_match_profile xml_match_profile=
40
+ html_match_profile html_match_profile= reset!].include?(method) || super
37
41
  end
38
42
  end
39
43
 
@@ -700,6 +704,24 @@ module Canon
700
704
  @resolver.set_programmatic(:theme, value)
701
705
  end
702
706
 
707
+ # Theme inheritance (custom theme with base + overrides)
708
+ def theme_inheritance
709
+ @resolver.resolve(:theme_inheritance)
710
+ end
711
+
712
+ def theme_inheritance=(value)
713
+ @resolver.set_programmatic(:theme_inheritance, value)
714
+ end
715
+
716
+ # Full custom theme hash
717
+ def custom_theme
718
+ @resolver.resolve(:custom_theme)
719
+ end
720
+
721
+ def custom_theme=(value)
722
+ @resolver.set_programmatic(:custom_theme, value)
723
+ end
724
+
703
725
  # File size limit in bytes (default 5MB)
704
726
  def max_file_size
705
727
  @resolver.resolve(:max_file_size)
@@ -806,6 +828,8 @@ module Canon
806
828
  max_node_count: 10_000, # Maximum nodes in tree
807
829
  max_diff_lines: 10_000, # Maximum diff output lines
808
830
  theme: :dark, # Default theme
831
+ theme_inheritance: nil, # Custom theme with base + overrides
832
+ custom_theme: nil, # Full custom theme hash
809
833
  }
810
834
 
811
835
  env = format ? EnvProvider.load_diff_for_format(format) : {}
@@ -846,8 +870,8 @@ module Canon
846
870
  formats = data["formats"] || {}
847
871
 
848
872
  format_configs.each do |fmt_key, fmt_cfg|
849
- fmt_data = ProfileLoader.send(:deep_merge, shared,
850
- formats[fmt_key.to_s] || {})
873
+ fmt_data = ProfileLoader.deep_merge(shared,
874
+ formats[fmt_key.to_s] || {})
851
875
  fmt_cfg.apply_profile_data(fmt_data)
852
876
  end
853
877
  end
@@ -22,7 +22,7 @@ module Canon
22
22
  @match_options = match_options
23
23
  # Use the compare_profile from ResolvedMatchOptions if available (e.g., HtmlCompareProfile)
24
24
  # Otherwise create a base CompareProfile
25
- @profile = if match_options.respond_to?(:compare_profile) && match_options.compare_profile
25
+ @profile = if match_options.is_a?(Canon::Comparison::ResolvedMatchOptions) && match_options.compare_profile
26
26
  match_options.compare_profile
27
27
  else
28
28
  Canon::Comparison::CompareProfile.new(match_options)
@@ -65,7 +65,7 @@ module Canon
65
65
  # (since the dimension affects equivalence), which would prevent formatting
66
66
  # detection from being applied.
67
67
  if diff_node.dimension == :text_content &&
68
- profile.send(:behavior_for, :text_content) == :normalize &&
68
+ profile.behavior_for(:text_content) == :normalize &&
69
69
  !inside_preserve_element?(diff_node) &&
70
70
  formatting_only_diff?(diff_node)
71
71
  diff_node.formatting = true
@@ -184,16 +184,12 @@ module Canon
184
184
  end
185
185
 
186
186
  # HTML: non-breaking space (U+00A0) is never insignificant
187
- text = if node.respond_to?(:content)
188
- node.content
189
- elsif node.respond_to?(:value)
190
- node.value
191
- end
187
+ text = Canon::Comparison::NodeInspector.text_content(node)
192
188
  if text && Canon::Comparison::WhitespaceSensitivity.contains_nbsp?(text)
193
189
  return true
194
190
  end
195
191
 
196
- return false unless node.respond_to?(:parent)
192
+ return false unless Canon::XmlParsing.element?(node) || node.is_a?(Canon::Xml::Node)
197
193
 
198
194
  parent = node.parent
199
195
  return false unless parent
@@ -223,49 +219,19 @@ module Canon
223
219
  end
224
220
 
225
221
  # Extract text content from a node for formatting comparison
226
- # @param node [Object] The node to extract text from
227
- # @return [String, nil] The text content or nil
228
222
  def extract_text_content(node)
229
223
  return nil if node.nil?
230
224
 
231
- case node
232
- when Canon::Xml::Nodes::TextNode
233
- node.value
234
- when Canon::Xml::Node
235
- node.text_content
236
- when Nokogiri::XML::Node
237
- node.content.to_s
238
- when Moxml::Node
239
- node.content.to_s
240
- when String
241
- node
242
- else
243
- node.to_s
244
- end
225
+ Canon::Comparison::NodeInspector.text_content(node)
245
226
  rescue StandardError
246
227
  nil
247
228
  end
248
229
 
249
- # Check if a node is a text node
250
- # @param node [Object] The node to check
251
- # @return [Boolean] true if the node is a text node
252
230
  def text_node?(node)
253
231
  return false if node.nil?
232
+ return true if node.is_a?(String)
254
233
 
255
- case node
256
- when Canon::Xml::Nodes::TextNode
257
- true
258
- when Canon::Xml::Node
259
- node.node_type == :text
260
- when Nokogiri::XML::Node
261
- node.node_type == Nokogiri::XML::Node::TEXT_NODE
262
- when Moxml::Node
263
- node.text?
264
- when String
265
- true
266
- else
267
- false
268
- end
234
+ Canon::Comparison::NodeInspector.text_node?(node)
269
235
  end
270
236
  end
271
237
  end
@@ -130,7 +130,7 @@ module Canon
130
130
  content == other.content &&
131
131
  type == other.type &&
132
132
  diff_node == other.diff_node &&
133
- @formatting == other.instance_variable_get(:@formatting)
133
+ formatting? == other.formatting?
134
134
  end
135
135
  end
136
136
  end
@@ -1047,8 +1047,8 @@ module Canon
1047
1047
  end
1048
1048
 
1049
1049
  # search_start now points inside the innermost element
1050
- line_idx = SourceLocator.send(:find_line_for_offset, search_start,
1051
- line_map)
1050
+ line_idx = SourceLocator.find_line_for_offset(search_start,
1051
+ line_map)
1052
1052
  return nil unless line_idx
1053
1053
 
1054
1054
  col = search_start - line_map[line_idx][:start_offset]
@@ -1133,8 +1133,8 @@ range_start, range_end)
1133
1133
  # Walk up ancestors to find one with an "id" attribute
1134
1134
  ancestors = []
1135
1135
  current = node
1136
- while current.respond_to?(:parent)
1137
- ancestors << current if current.respond_to?(:name)
1136
+ while current.is_a?(Canon::Xml::Node)
1137
+ ancestors << current
1138
1138
  current = current.parent
1139
1139
  end
1140
1140
 
@@ -1143,14 +1143,14 @@ range_start, range_end)
1143
1143
  anchor_name = nil
1144
1144
  anchor_id = nil
1145
1145
  ancestors.each do |anc|
1146
- next unless anc.respond_to?(:attribute_nodes) && anc.attribute_nodes
1146
+ next unless anc.attribute_nodes
1147
1147
 
1148
1148
  anc.attribute_nodes.each do |attr|
1149
- next unless attr.respond_to?(:name) && attr.name == "id"
1149
+ next unless attr.name == "id"
1150
1150
 
1151
1151
  anchor = anc
1152
1152
  anchor_name = anc.name
1153
- anchor_id = attr.respond_to?(:value) ? attr.value : nil
1153
+ anchor_id = attr.value
1154
1154
  break
1155
1155
  end
1156
1156
  break if anchor
@@ -1219,8 +1219,8 @@ range_start, range_end)
1219
1219
  # Search for value inside leaf element
1220
1220
  value_pos = text.index(value, leaf_tag_end + 1)
1221
1221
  if value_pos && value_pos < leaf_close
1222
- line_idx = SourceLocator.send(:find_line_for_offset, value_pos,
1223
- line_map)
1222
+ line_idx = SourceLocator.find_line_for_offset(value_pos,
1223
+ line_map)
1224
1224
  return nil unless line_idx
1225
1225
 
1226
1226
  col = value_pos - line_map[line_idx][:start_offset]
@@ -1234,8 +1234,8 @@ range_start, range_end)
1234
1234
  # Direct search: value might be directly in the anchor's content
1235
1235
  value_pos = text.index(value, anchor_tag_end + 1)
1236
1236
  if value_pos && value_pos < anchor_close
1237
- line_idx = SourceLocator.send(:find_line_for_offset, value_pos,
1238
- line_map)
1237
+ line_idx = SourceLocator.find_line_for_offset(value_pos,
1238
+ line_map)
1239
1239
  return nil unless line_idx
1240
1240
 
1241
1241
  col = value_pos - line_map[line_idx][:start_offset]
@@ -1255,10 +1255,10 @@ range_start, range_end)
1255
1255
  # @param line_map [Array<Hash>] pre-built line offset map
1256
1256
  # @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
1257
1257
  def locate_textnode_parent(textnode, value, text, line_map)
1258
- return nil unless textnode.respond_to?(:parent) && textnode.parent
1258
+ return nil unless textnode.is_a?(Canon::Xml::Node) && textnode.parent
1259
1259
 
1260
1260
  parent = textnode.parent
1261
- return nil unless parent.respond_to?(:name) && parent.name
1261
+ return nil unless parent.name
1262
1262
 
1263
1263
  parent_name = parent.name
1264
1264
  parent_attrs = element_attribute_signature(parent)
@@ -1286,8 +1286,8 @@ range_start, range_end)
1286
1286
  # Search for value within this element
1287
1287
  value_pos = text.index(value, anchor_tag_end + 1)
1288
1288
  if value_pos && value_pos < anchor_close
1289
- line_idx = SourceLocator.send(:find_line_for_offset, value_pos,
1290
- line_map)
1289
+ line_idx = SourceLocator.find_line_for_offset(value_pos,
1290
+ line_map)
1291
1291
  return nil unless line_idx
1292
1292
 
1293
1293
  col = value_pos - line_map[line_idx][:start_offset]
@@ -1310,10 +1310,10 @@ range_start, range_end)
1310
1310
  # @param line_map [Array<Hash>] pre-built line offset map
1311
1311
  # @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
1312
1312
  def locate_element_in_text2(textnode, text, line_map)
1313
- return nil unless textnode.respond_to?(:parent) && textnode.parent
1313
+ return nil unless textnode.is_a?(Canon::Xml::Node) && textnode.parent
1314
1314
 
1315
1315
  parent = textnode.parent
1316
- return nil unless parent.respond_to?(:name) && parent.name
1316
+ return nil unless parent.name
1317
1317
 
1318
1318
  parent_name = parent.name
1319
1319
  parent_attrs = element_attribute_signature(parent)
@@ -1340,8 +1340,8 @@ range_start, range_end)
1340
1340
 
1341
1341
  if is_self_closing
1342
1342
  # Self-closing element - return position of <
1343
- line_idx = SourceLocator.send(:find_line_for_offset, anchor_pos,
1344
- line_map)
1343
+ line_idx = SourceLocator.find_line_for_offset(anchor_pos,
1344
+ line_map)
1345
1345
  return nil unless line_idx
1346
1346
 
1347
1347
  col = anchor_pos - line_map[line_idx][:start_offset]
@@ -1349,8 +1349,8 @@ range_start, range_end)
1349
1349
  col: col }
1350
1350
  else
1351
1351
  # Regular element - return position of >
1352
- line_idx = SourceLocator.send(:find_line_for_offset, tag_end_pos,
1353
- line_map)
1352
+ line_idx = SourceLocator.find_line_for_offset(tag_end_pos,
1353
+ line_map)
1354
1354
  return nil unless line_idx
1355
1355
 
1356
1356
  col = tag_end_pos - line_map[line_idx][:start_offset]
@@ -1368,10 +1368,8 @@ range_start, range_end)
1368
1368
  # Build a string representation of an element's attributes for matching.
1369
1369
  def element_attribute_signature(element)
1370
1370
  sig = {}
1371
- if element.respond_to?(:attribute_nodes) && element.attribute_nodes
1371
+ if element.is_a?(Canon::Xml::Node) && element.attribute_nodes
1372
1372
  element.attribute_nodes.each do |attr|
1373
- next unless attr.respond_to?(:name) && attr.respond_to?(:value)
1374
-
1375
1373
  sig[attr.name] = attr.value
1376
1374
  end
1377
1375
  end
@@ -51,16 +51,16 @@ module Canon
51
51
  return node.children.map { |child| serialize(child) }.join
52
52
  end
53
53
 
54
- # Handle Nokogiri nodes
55
- if node.respond_to?(:to_html)
56
- return node.to_html
54
+ # Handle Nokogiri/moxml nodes
55
+ if Canon::XmlParsing.xml_node?(node)
56
+ return Canon::XmlParsing.serialize(node)
57
57
  end
58
58
 
59
- if node.respond_to?(:to_xml)
60
- return node.to_xml
59
+ # Handle tree diff nodes and other objects with serialization
60
+ if node.is_a?(Canon::TreeDiff::Core::TreeNode)
61
+ return serialize_treenode(node)
61
62
  end
62
63
 
63
- # Fallback to string
64
64
  node.to_s
65
65
  end
66
66
 
@@ -105,23 +105,20 @@ module Canon
105
105
  return attrs
106
106
  end
107
107
 
108
- # Handle Nokogiri elements
109
- if node.respond_to?(:attributes) && node.attributes.is_a?(Hash)
108
+ # Handle Nokogiri/moxml elements via XmlParsing
109
+ if Canon::XmlParsing.element?(node)
110
110
  attrs = {}
111
- node.attributes.each do |name, attr|
112
- # Nokogiri attributes have different structure
113
- value = if attr.respond_to?(:value)
114
- attr.value
115
- elsif attr.is_a?(String)
116
- attr
117
- else
118
- attr.to_s
119
- end
120
- attrs[name] = value
111
+ Canon::XmlParsing.attributes(node).each do |attr|
112
+ attrs[attr.name] = attr.value
121
113
  end
122
114
  return attrs
123
115
  end
124
116
 
117
+ # Handle other elements with attributes method
118
+ if node.is_a?(Canon::Xml::Node)
119
+ return {}
120
+ end
121
+
125
122
  # Handle TreeNode attributes (already a hash)
126
123
  if node.is_a?(Hash)
127
124
  return node
@@ -143,10 +140,9 @@ module Canon
143
140
  return node.name
144
141
  end
145
142
 
146
- # Handle Nokogiri elements
147
- if node.respond_to?(:name)
148
- return node.name.to_s
149
- end
143
+ # Handle Nokogiri/moxml elements
144
+ name = Canon::XmlParsing.name(node)
145
+ return name.to_s if name
150
146
 
151
147
  ""
152
148
  end
@@ -164,16 +160,13 @@ module Canon
164
160
  return node.value.to_s
165
161
  end
166
162
 
167
- # Handle Nokogiri text nodes
168
- if node.respond_to?(:text)
169
- return node.text.to_s
163
+ # Handle Canon::Xml::Node
164
+ if node.is_a?(Canon::Xml::Node)
165
+ return node.text_content.to_s
170
166
  end
171
167
 
172
- if node.respond_to?(:content)
173
- return node.content.to_s
174
- end
175
-
176
- ""
168
+ # Handle Nokogiri/moxml nodes
169
+ Canon::XmlParsing.text_content(node).to_s
177
170
  end
178
171
 
179
172
  # Serialize attributes to string format
@@ -54,10 +54,10 @@ module Canon
54
54
  while current && depth < max_depth
55
55
  segments.unshift(segment_for_node(current))
56
56
 
57
- # Move to parent if available
58
- break unless current.respond_to?(:parent)
57
+ parent = node_parent(current)
58
+ break unless parent
59
59
 
60
- current = current.parent
60
+ current = parent
61
61
  depth += 1
62
62
  end
63
63
 
@@ -71,27 +71,16 @@ module Canon
71
71
  # @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
72
72
  # @return [String] Path segment with ordinal index
73
73
  def self.segment_for_node(tree_node)
74
- # Handle both TreeNodes (with label) and raw nodes (with name)
75
- label = if tree_node.respond_to?(:label)
76
- tree_node.label
77
- elsif tree_node.respond_to?(:name)
78
- tree_node.name
79
- else
80
- "unknown"
81
- end
74
+ label = node_label(tree_node)
82
75
 
83
76
  # Get ordinal index (position among siblings with same label)
84
77
  index = ordinal_index(tree_node)
85
78
 
86
79
  # For text nodes, use parent element name for clarity
87
80
  # e.g., instead of "/p/#text[0]" use "/p/text()[0]"
88
- if ["text",
89
- "#text"].include?(label) && tree_node.respond_to?(:parent) && tree_node.parent
90
- parent_name = if tree_node.parent.respond_to?(:label)
91
- tree_node.parent.label
92
- elsif tree_node.parent.respond_to?(:name)
93
- tree_node.parent.name
94
- end
81
+ parent = node_parent(tree_node)
82
+ if ["text", "#text"].include?(label) && parent
83
+ parent_name = node_label(parent)
95
84
  if parent_name && parent_name != "#document" && parent_name != "#document-fragment"
96
85
  return "#{parent_name}/text()[#{index}]"
97
86
  end
@@ -106,35 +95,21 @@ module Canon
106
95
  # @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
107
96
  # @return [Integer] Zero-based ordinal index
108
97
  def self.ordinal_index(tree_node)
109
- # Defensive: return 0 if no parent or doesn't respond to parent
110
- return 0 unless tree_node.respond_to?(:parent)
111
- return 0 unless tree_node.parent
98
+ parent = node_parent(tree_node)
99
+ return 0 unless parent
112
100
 
113
- # Check if parent has children
114
- return 0 unless tree_node.parent.respond_to?(:children)
115
-
116
- siblings = tree_node.parent.children
101
+ siblings = node_children(parent)
117
102
  return 0 unless siblings
118
103
 
119
104
  # Convert to array if it's a NodeSet (Nokogiri) or similar
120
105
  siblings = siblings.to_a unless siblings.is_a?(Array)
121
106
 
122
- # Get the label/name for comparison
123
- my_label = if tree_node.respond_to?(:label)
124
- tree_node.label
125
- elsif tree_node.respond_to?(:name)
126
- tree_node.name
127
- end
128
-
107
+ my_label = node_label(tree_node)
129
108
  return 0 unless my_label
130
109
 
131
110
  # Count siblings with same label that appear before this node
132
111
  same_label_siblings = siblings.select do |s|
133
- sibling_label = if s.respond_to?(:label)
134
- s.label
135
- elsif s.respond_to?(:name)
136
- s.name
137
- end
112
+ sibling_label = node_label(s)
138
113
  sibling_label == my_label
139
114
  end
140
115
 
@@ -152,6 +127,18 @@ module Canon
152
127
  segments = build_segments(tree_node)
153
128
  segments.join(" → ")
154
129
  end
130
+
131
+ def self.node_label(node)
132
+ Canon::Comparison::NodeInspector.name(node) || "unknown"
133
+ end
134
+
135
+ def self.node_parent(node)
136
+ Canon::Comparison::NodeInspector.parent(node)
137
+ end
138
+
139
+ def self.node_children(node)
140
+ Canon::Comparison::NodeInspector.children(node)
141
+ end
155
142
  end
156
143
  end
157
144
  end
@@ -86,15 +86,12 @@ module Canon
86
86
  end
87
87
 
88
88
  class << self
89
- private
90
-
91
89
  # Binary search for the line containing a character offset.
92
90
  #
93
91
  # @param char_offset [Integer] the character offset
94
92
  # @param line_map [Array<Hash>] the line offset map
95
93
  # @return [Integer, nil] the 0-based line index, or nil
96
94
  def find_line_for_offset(char_offset, line_map)
97
- # Use bsearch for efficiency on large files
98
95
  line_map.bsearch_index do |entry|
99
96
  entry[:end_offset] > char_offset
100
97
  end