moxml 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +31 -0
  3. data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
  4. data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
  5. data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
  6. data/TODO.remaining/4-lenient-entities-mode.md +106 -0
  7. data/TODO.remaining/5-fixture-integrity.md +65 -0
  8. data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
  9. data/TODO.remaining/7-headed-ox-limitations.md +95 -0
  10. data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
  11. data/TODO.remaining/9-cleanup-hygiene.md +42 -0
  12. data/TODO.remaining/README.md +54 -0
  13. data/benchmarks/generate_report.rb +1 -1
  14. data/lib/moxml/adapter/base.rb +14 -0
  15. data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
  16. data/lib/moxml/adapter/customized_rexml/formatter.rb +42 -20
  17. data/lib/moxml/adapter/headed_ox.rb +28 -11
  18. data/lib/moxml/adapter/libxml.rb +165 -65
  19. data/lib/moxml/adapter/nokogiri.rb +19 -7
  20. data/lib/moxml/adapter/oga.rb +28 -12
  21. data/lib/moxml/adapter/ox.rb +11 -3
  22. data/lib/moxml/adapter/rexml.rb +40 -8
  23. data/lib/moxml/attribute.rb +1 -1
  24. data/lib/moxml/builder.rb +77 -24
  25. data/lib/moxml/config.rb +18 -1
  26. data/lib/moxml/declaration.rb +4 -2
  27. data/lib/moxml/document.rb +5 -2
  28. data/lib/moxml/document_builder.rb +9 -8
  29. data/lib/moxml/element.rb +10 -5
  30. data/lib/moxml/entity_registry.rb +16 -2
  31. data/lib/moxml/native_attachment.rb +65 -0
  32. data/lib/moxml/node.rb +17 -49
  33. data/lib/moxml/node_set.rb +1 -1
  34. data/lib/moxml/version.rb +1 -1
  35. data/lib/moxml/xpath/compiler.rb +4 -1
  36. data/lib/moxml.rb +1 -0
  37. data/scripts/format_xml.rb +16 -0
  38. data/scripts/pretty_format_xml.rb +14 -0
  39. data/spec/consistency/round_trip_spec.rb +3 -30
  40. data/spec/integration/all_adapters_spec.rb +1 -0
  41. data/spec/integration/headed_ox_integration_spec.rb +0 -2
  42. data/spec/integration/shared_examples/edge_cases.rb +4 -4
  43. data/spec/integration/shared_examples/integration_workflows.rb +3 -3
  44. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
  45. data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
  46. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
  47. data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
  48. data/spec/moxml/builder_spec.rb +234 -0
  49. data/spec/moxml/xpath/axes_spec.rb +1 -1
  50. data/spec/moxml/xpath/compiler_spec.rb +2 -2
  51. data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
  52. data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
  53. data/spec/performance/memory_usage_spec.rb +0 -4
  54. metadata +15 -1
@@ -0,0 +1,65 @@
1
+ # TODO 5: Fixture Integrity and CI Validation
2
+
3
+ ## Problem
4
+
5
+ The Metanorma bilingual presentation fixture was previously corrupted (error
6
+ messages appended after valid XML). It was truncated to fix the corruption,
7
+ but the truncated version has not been verified against the upstream source.
8
+
9
+ Additionally, there is no automated validation of XML fixtures in CI — a
10
+ corrupted fixture could be introduced and not caught until round-trip tests
11
+ fail with confusing errors.
12
+
13
+ ## Remaining Tasks
14
+
15
+ ### 1. Verify Bilingual Fixture Against Upstream
16
+
17
+ The file `spec/fixtures/round-trips/metanorma/bilingual.presentation.xml`
18
+ was truncated from 111,606 lines to fix corruption. Need to:
19
+
20
+ - Obtain a clean copy from the Metanorma project
21
+ - Compare with the current truncated version (21,211 lines — different from
22
+ the 55,802 lines mentioned in the original TODO, suggesting further changes)
23
+ - Confirm no data loss occurred in truncation
24
+
25
+ ### 2. Add CI Fixture Validation
26
+
27
+ Add a Rake task or RSpec test that validates all XML fixtures are well-formed
28
+ before running round-trip tests. This prevents silent corruption.
29
+
30
+ **Option A**: Rake task using `xmllint`:
31
+ ```ruby
32
+ # In Rakefile
33
+ namespace :spec do
34
+ task :validate_fixtures do
35
+ errors = []
36
+ Dir.glob("spec/fixtures/**/*.xml").each do |path|
37
+ output = `xmllint --noout "#{path}" 2>&1`
38
+ errors << "#{path}: #{output}" unless $?.success?
39
+ end
40
+ raise "Invalid fixtures:\n#{errors.join("\n")}" unless errors.empty?
41
+ end
42
+ end
43
+ task spec: ["spec:validate_fixtures"]
44
+ ```
45
+
46
+ **Option B**: RSpec test:
47
+ ```ruby
48
+ # spec/integration/fixture_validation_spec.rb
49
+ RSpec.describe "XML fixtures" do
50
+ Dir.glob("spec/fixtures/**/*.xml").each do |path|
51
+ it "#{path} is valid XML" do
52
+ ctx = Moxml.new(:nokogiri)
53
+ expect { ctx.parse(File.read(path)) }.not_to raise_error
54
+ end
55
+ end
56
+ end
57
+ ```
58
+
59
+ Option A is preferred — `xmllint` is stricter and catches issues that
60
+ lenient parsers might silently accept.
61
+
62
+ ## Files to Create/Modify
63
+
64
+ - `Rakefile` — add `spec:validate_fixtures` task
65
+ - Verify/replace `spec/fixtures/round-trips/metanorma/bilingual.presentation.xml`
@@ -0,0 +1,36 @@
1
+ # TODO 6: Ox Adapter Element Ordering Bug
2
+
3
+ ## Problem
4
+
5
+ When round-tripping certain XML fixtures through the Ox adapter, child elements
6
+ are produced in a different order compared to Nokogiri, Oga, and REXML. This
7
+ causes cross-adapter consistency failures for `elements_with_attributes`
8
+ comparisons.
9
+
10
+ The semantic equivalence check (double round-trip) still passes, so the
11
+ document content is correct — only the ordering is wrong.
12
+
13
+ ## Current State
14
+
15
+ Suppressed in `spec/consistency/round_trip_spec.rb:332` via
16
+ `KNOWN_ELEMENT_ORDERING_ISSUES` set. Affected fixture/adapter combinations:
17
+
18
+ ```
19
+ niso-jats/element_citation.xml nokogiri <-> ox
20
+ niso-jats/element_citation.xml ox <-> oga
21
+ niso-jats/element_citation.xml rexml <-> ox
22
+ ```
23
+
24
+ ## Investigation Needed
25
+
26
+ - Determine whether Ox's DOM building reorders nodes or if the issue is in
27
+ Moxml's tree traversal during serialization.
28
+ - Check if Ox's `Ox::Element#nodes` preserves insertion order.
29
+ - Compare Ox's native serialization (`Ox.dump`) with Moxml's custom serializer
30
+ to narrow down where the reorder happens.
31
+
32
+ ## Files
33
+
34
+ - `spec/consistency/round_trip_spec.rb` — suppression set
35
+ - `lib/moxml/adapter/ox.rb` — serialization path
36
+ - `lib/moxml/adapter/customized_ox/` — wrapper classes involved in tree walk
@@ -0,0 +1,95 @@
1
+ # TODO 7: HeadedOx Adapter Limitations (15 Skipped Tests)
2
+
3
+ ## Problem
4
+
5
+ HeadedOx (Ox + pure-Ruby XPath engine) has 15 skipped tests representing 7
6
+ distinct limitation areas. Some require upstream Ox gem enhancements; others
7
+ need investigation or Moxml-side fixes.
8
+
9
+ Full details in `docs/_pages/headed-ox-limitations.adoc`.
10
+
11
+ ## Limitation Areas
12
+
13
+ ### 7a. XPath `@*` Attribute Wildcard (3 tests)
14
+
15
+ The XPath parser does not support wildcard in the attribute axis.
16
+
17
+ **Tests:**
18
+ - `spec/moxml/xpath/compiler_spec.rb:156` — descendant-or-self wildcards
19
+ - `spec/moxml/xpath/compiler_spec.rb:192` — attribute axis wildcards
20
+ - `spec/moxml/xpath/axes_spec.rb:225` — attribute + predicate combinations
21
+
22
+ **Workaround:** Use `element.attributes.values` via Ruby enumeration.
23
+
24
+ ### 7b. Namespace Methods (4 tests)
25
+
26
+ Ox does not expose namespace information through its public API. The adapter
27
+ cannot implement `node.namespace`, `node.namespaces`, or namespace inheritance.
28
+
29
+ **Tests:**
30
+ - `spec/integration/shared_examples/edge_cases.rb:93` — default namespace changes
31
+ - `spec/integration/shared_examples/edge_cases.rb:119` — recursive namespace defs
32
+ - `spec/integration/shared_examples/edge_cases.rb:139` — namespace-prefixed attr access
33
+ - `spec/integration/shared_examples/integration_workflows.rb:83` — complex namespaces
34
+
35
+ **Requires:** Ox gem API enhancement (namespace accessors on `Ox::Element`).
36
+
37
+ ### 7c. Text Content from Nested XPath Results (4 tests)
38
+
39
+ Accessing text content from child elements of XPath result nodes returns empty
40
+ strings. Likely a node wrapping or text node handling issue in HeadedOx.
41
+
42
+ **Tests:**
43
+ - `spec/moxml/adapter/headed_ox_spec.rb:74` — string functions in predicates
44
+ - `spec/moxml/adapter/headed_ox_spec.rb:82` — position functions
45
+ - `spec/moxml/adapter/headed_ox_spec.rb:304` — last() function
46
+ - `spec/integration/shared_examples/node_wrappers/node_behavior.rb:113` — XPath text access
47
+
48
+ **Needs:** Investigation — check node wrapping and text node registration.
49
+
50
+ ### 7d. CDATA `]]>` Escaping (2 tests)
51
+
52
+ Ox serializes CDATA sections as-is without splitting on `]]>` sequences, which
53
+ violates the XML spec.
54
+
55
+ **Tests:**
56
+ - `spec/integration/shared_examples/edge_cases.rb:39`
57
+ - `spec/integration/shared_examples/node_wrappers/cdata_behavior.rb:44`
58
+
59
+ **Requires:** Ox gem enhancement or Moxml-side serialization override.
60
+
61
+ ### 7e. Parent Node Setter (1 test)
62
+
63
+ Ox has no native method to change a node's parent after creation.
64
+
65
+ **Test:**
66
+ - `spec/integration/shared_examples/integration_workflows.rb:126`
67
+
68
+ **Requires:** Ox gem reparenting API or workaround via remove + re-add.
69
+
70
+ ### 7f. Namespace-Aware XPath with Predicates (1 test)
71
+
72
+ Queries like `//xmlns:item[@id="123"]` return empty results under HeadedOx.
73
+
74
+ **Test:**
75
+ - `spec/integration/shared_examples/integration_workflows.rb:63`
76
+
77
+ **Needs:** Investigation — check namespace resolution in predicate context.
78
+
79
+ ### 7g. Wildcard Element Counting (1 test)
80
+
81
+ `//*` returns a different count (6) vs Nokogiri (7+), likely due to Ox's DOM
82
+ structure.
83
+
84
+ **Test:**
85
+ - `spec/moxml/xpath/compiler_spec.rb:156`
86
+
87
+ **Impact:** Low — real-world queries typically use specific element names.
88
+
89
+ ## Files
90
+
91
+ - `docs/_pages/headed-ox-limitations.adoc` — full documentation
92
+ - `lib/moxml/adapter/headed_ox.rb`
93
+ - `lib/moxml/adapter/ox.rb`
94
+ - `lib/moxml/xpath/` — pure-Ruby XPath engine
95
+ - All spec files listed above
@@ -0,0 +1,68 @@
1
+ # TODO 8: XPath Engine Predicate Gaps (5 xit Tests)
2
+
3
+ ## Problem
4
+
5
+ The pure-Ruby XPath engine (used by HeadedOx) does not fully support
6
+ `position()`, `last()`, and `id()` inside predicates. Five tests are marked
7
+ `xit` pending predicate support.
8
+
9
+ These gaps affect the XPath engine in `lib/moxml/xpath/` — they are not
10
+ adapter-specific.
11
+
12
+ ## Failing Tests
13
+
14
+ ### `position()` in Predicates (2 tests)
15
+
16
+ `spec/moxml/xpath/functions/position_functions_spec.rb`
17
+
18
+ ```ruby
19
+ xit "returns current position in predicate" do
20
+ # /root/item[position() = 2]
21
+ end
22
+
23
+ xit "works with position comparison" do
24
+ # /root/item[position() > 1]
25
+ end
26
+ ```
27
+
28
+ ### `last()` in Predicates (2 tests)
29
+
30
+ `spec/moxml/xpath/functions/position_functions_spec.rb`
31
+
32
+ ```ruby
33
+ xit "returns size of context in predicate" do
34
+ # /root/item[position() = last()]
35
+ end
36
+
37
+ xit "works with last() - 1" do
38
+ # /root/item[position() = last() - 1]
39
+ end
40
+ ```
41
+
42
+ ### `id()` with Nodeset Argument (1 test)
43
+
44
+ `spec/moxml/xpath/functions/special_functions_spec.rb:69`
45
+
46
+ ```ruby
47
+ xit "accepts nodeset argument containing IDs" do
48
+ # id(nodeset) where nodeset is path-evaluated
49
+ end
50
+ ```
51
+
52
+ ## Investigation Needed
53
+
54
+ - The XPath compiler likely needs to pass predicate context (position, size)
55
+ into the evaluation environment when compiling predicate expressions.
56
+ - `position()` and `last()` are defined but raise `InvalidContextError` when
57
+ used inside predicates — the predicate evaluation path doesn't set up the
58
+ context they need.
59
+ - `id()` with a nodeset argument requires evaluating the argument as an XPath
60
+ path first, then extracting ID values from the resulting nodes.
61
+
62
+ ## Files
63
+
64
+ - `lib/moxml/xpath/compiler.rb` — predicate compilation
65
+ - `lib/moxml/xpath/engine.rb` — runtime evaluation context
66
+ - `lib/moxml/xpath/context.rb` — context setup for position/last
67
+ - `spec/moxml/xpath/functions/position_functions_spec.rb`
68
+ - `spec/moxml/xpath/functions/special_functions_spec.rb`
@@ -0,0 +1,42 @@
1
+ # TODO 9: Cleanup and Hygiene
2
+
3
+ Small items that don't affect functionality but should be addressed.
4
+
5
+ ## 9a. Stale Doc Links in Skip Messages
6
+
7
+ 15+ test skip messages reference `docs/HEADED_OX_LIMITATIONS.md` but the
8
+ actual file is at `docs/_pages/headed-ox-limitations.adoc`. The referenced
9
+ path does not exist.
10
+
11
+ **Fix:** Update all skip messages to reference
12
+ `docs/_pages/headed-ox-limitations.adoc` instead.
13
+
14
+ **Affected files:**
15
+ - `spec/integration/headed_ox_integration_spec.rb`
16
+ - `spec/integration/shared_examples/integration_workflows.rb`
17
+ - `spec/integration/shared_examples/node_wrappers/node_behavior.rb`
18
+ - `spec/integration/shared_examples/node_wrappers/cdata_behavior.rb`
19
+ - `spec/integration/shared_examples/edge_cases.rb`
20
+ - `spec/moxml/xpath/axes_spec.rb`
21
+ - `spec/moxml/xpath/compiler_spec.rb`
22
+ - `spec/moxml/adapter/headed_ox_spec.rb`
23
+
24
+ ## 9b. Untracked `scripts/` Directory
25
+
26
+ `scripts/format_xml.rb` and `scripts/pretty_format_xml.rb` exist as untracked
27
+ files. Decide whether to commit (and add to `.gitignore` pattern or document)
28
+ or remove.
29
+
30
+ ## 9c. Superseded Root TODO Files
31
+
32
+ The following root-level files are marked as superseded in
33
+ `TODO.remaining/README.md` but still exist:
34
+
35
+ - `TODO.entities-work.md`
36
+ - `TODO.entity-handling.md`
37
+ - `TODO.entity-support.md`
38
+ - `TODO.full-entity-support.md`
39
+ - `TODO.full-entity.md`
40
+ - `TODO.mn-bilingual-round-trip.md`
41
+
42
+ Once all work is confirmed tracked in `TODO.remaining/`, these can be deleted.
@@ -0,0 +1,54 @@
1
+ # TODO.remaining — Consolidated Action Items
2
+
3
+ Consolidated from: TODO.entities-work.md, TODO.entity-support.md,
4
+ TODO.full-entity.md, TODO.full-entity-support.md, TODO.entity-handling.md,
5
+ TODO.mn-bilingual-round-trip.md, plus code audit (2026-04-22).
6
+
7
+ Those root files are superseded and can be removed.
8
+
9
+ ## Dependency Order
10
+
11
+ ```
12
+ TODO 1 (Adapter Support)
13
+ |
14
+ v
15
+ TODO 2 (Model-Driven Restoration) ---> TODO 4 (Lenient Entities Mode)
16
+ |
17
+ v
18
+ TODO 3 (Test Coverage)
19
+
20
+ TODO 5 (Fixture Integrity) — independent
21
+ TODO 6 (Ox Element Ordering) — independent
22
+ TODO 7 (HeadedOx Limitations) — independent
23
+ TODO 8 (XPath Predicate Gaps) — independent
24
+ TODO 9 (Cleanup Hygiene) — independent
25
+ ```
26
+
27
+ ## Summary
28
+
29
+ | # | File | Description | Status |
30
+ |---|------|-------------|--------|
31
+ | 1 | `1-entity-reference-adapter-support.md` | EntityReference in Ox, Oga, REXML, LibXML, HeadedOx | Not started |
32
+ | 2 | `2-entity-restoration-model-driven.md` | Use EntityRegistry as source of truth for restoration | Not started |
33
+ | 3 | `3-entity-reference-test-coverage.md` | Tests for EntityReference nodes and round-trips | Not started |
34
+ | 4 | `4-lenient-entities-mode.md` | Strict vs lenient entity restoration mode | Not started |
35
+ | 5 | `5-fixture-integrity.md` | Bilingual fixture verification + CI validation | Not started |
36
+ | 6 | `6-ox-element-ordering-bug.md` | Ox adapter reorders elements in certain fixtures | Not started |
37
+ | 7 | `7-headed-ox-limitations.md` | 15 skipped tests across 7 HeadedOx limitation areas | Not started |
38
+ | 8 | `8-xpath-predicate-gaps.md` | position()/last()/id() not working in XPath predicates | Not started |
39
+ | 9 | `9-cleanup-hygiene.md` | Stale doc links, untracked scripts, superseded files | Not started |
40
+
41
+ ## What's Already Done
42
+
43
+ - EntityReference node class (`lib/moxml/entity_reference.rb`)
44
+ - EntityRegistry with 2125 W3C entities (`lib/moxml/entity_registry.rb`)
45
+ - Node type registry includes `:entity_reference`
46
+ - Base adapter template: `create_entity_reference`, `validate_entity_reference_name`
47
+ - Nokogiri adapter: full native EntityReference support
48
+ - Document factory: `create_entity_reference(name)`
49
+ - DocumentBuilder: `visit_entity_reference` + partial `restore_entities_in_text`
50
+ - Builder DSL: `entity_reference(name)`
51
+ - Config: `restore_entities`, `entity_load_mode`, `entity_provider`, `preload_entity_sets`
52
+ - Context: entity registry integration
53
+ - EntityRegistry tests (24 examples passing)
54
+ - HeadedOx limitations documented in `docs/_pages/headed-ox-limitations.adoc`
@@ -488,7 +488,7 @@ class MoxmlBenchmarkReport
488
488
  f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
489
489
  f.puts ""
490
490
  f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
491
- f.puts "See docs/HEADED_OX_LIMITATIONS.md for complete details."
491
+ f.puts "See docs/_pages/headed-ox-limitations.adoc for complete details."
492
492
  f.puts ""
493
493
  end
494
494
 
@@ -149,6 +149,20 @@ namespace_validation_mode: :strict)
149
149
  node
150
150
  end
151
151
 
152
+ # Check if the native document has an XML declaration
153
+ # @param native_doc the native document object
154
+ # @param wrapper [Moxml::Document] the wrapper with has_xml_declaration flag
155
+ # @return [Boolean]
156
+ def has_declaration?(_native_doc, wrapper)
157
+ wrapper.has_xml_declaration
158
+ end
159
+
160
+ # Return the actual native node after an add_child operation.
161
+ # Override for adapters where node identity may change (e.g., LibXML doc.root=).
162
+ def actual_native(child_native, _parent_native)
163
+ child_native
164
+ end
165
+
152
166
  protected
153
167
 
154
168
  def create_native_element(_name, _owner_doc = nil)
@@ -9,7 +9,7 @@ module Moxml
9
9
  # are read-only after creation. This wrapper allows mutation by
10
10
  # storing values internally and regenerating XML when needed.
11
11
  class Declaration
12
- attr_accessor :version, :encoding
12
+ attr_accessor :version, :encoding, :removed, :parent_doc
13
13
  attr_reader :native
14
14
 
15
15
  def initialize(native_doc, version = nil, encoding = nil,
@@ -7,12 +7,13 @@ module Moxml
7
7
  module CustomizedRexml
8
8
  # Custom REXML formatter that fixes indentation and wrapping issues
9
9
  class Formatter < ::REXML::Formatters::Pretty
10
- def initialize(indentation: 2, self_close_empty: false)
10
+ def initialize(indentation: 2, self_close_empty: false, adapter: nil)
11
11
  @indentation = " " * indentation
12
12
  @level = 0
13
13
  @compact = true
14
14
  @width = -1 # Disable line wrapping
15
15
  @self_close_empty = self_close_empty
16
+ @adapter = adapter
16
17
  end
17
18
 
18
19
  def write(node, output)
@@ -31,7 +32,13 @@ module Moxml
31
32
  output << "<#{node.expanded_name}"
32
33
  write_attributes(node, output)
33
34
 
34
- if node.children.empty? && @self_close_empty
35
+ # Check for entity refs stored in adapter attachments
36
+ entity_refs = @adapter&.attachments&.get(node, :entity_refs)
37
+ child_sequence = @adapter&.attachments&.get(node, :child_sequence)
38
+
39
+ has_no_children = node.children.empty? && !(entity_refs && !entity_refs.empty?)
40
+
41
+ if has_no_children && @self_close_empty
35
42
  output << "/>"
36
43
  return
37
44
  end
@@ -44,26 +51,41 @@ module Moxml
44
51
  mixed = has_text && has_elements
45
52
 
46
53
  # Handle children based on content type
47
- unless node.children.empty?
54
+ all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
55
+ unless all_children_empty
48
56
  @level += @indentation.length unless mixed
49
57
 
50
- node.children.each_with_index do |child, _index|
51
- # Skip insignificant whitespace
52
- next if child.is_a?(::REXML::Text) &&
53
- child.to_s.strip.empty? &&
54
- !(child.next_sibling.nil? && child.previous_sibling.nil?)
55
-
56
- # Indent non-text nodes in non-mixed content
57
- # if !mixed && !child.is_a?(::REXML::Text)
58
- # output << ' ' * @level
59
- # end
60
-
61
- write(child, output)
62
-
63
- # Add newlines between elements in non-mixed content
64
- # if !mixed && !child.is_a?(::REXML::Text) && index < node.children.size - 1
65
- # output << "\n"
66
- # end
58
+ if entity_refs && !entity_refs.empty? && child_sequence
59
+ # Interleave native children with entity refs using tracked sequence
60
+ eref_idx = 0
61
+ native_idx = 0
62
+ child_sequence.each do |type|
63
+ case type
64
+ when :native
65
+ if native_idx < node.children.size
66
+ child = node.children[native_idx]
67
+ native_idx += 1
68
+ next if child.is_a?(::REXML::Text) &&
69
+ child.to_s.strip.empty? &&
70
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
71
+ write(child, output)
72
+ end
73
+ when :eref
74
+ if eref_idx < entity_refs.size
75
+ write(entity_refs[eref_idx], output)
76
+ eref_idx += 1
77
+ end
78
+ end
79
+ end
80
+ else
81
+ node.children.each_with_index do |child, _index|
82
+ # Skip insignificant whitespace
83
+ next if child.is_a?(::REXML::Text) &&
84
+ child.to_s.strip.empty? &&
85
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
86
+
87
+ write(child, output)
88
+ end
67
89
  end
68
90
 
69
91
  # Reset indentation for closing tag in non-mixed content
@@ -60,10 +60,10 @@ module Moxml
60
60
  #
61
61
  # This overrides the Ox adapter's xpath method which uses locate().
62
62
  #
63
- # @param [Moxml::Node] node Starting node (wrapped Moxml node)
63
+ # @param node Starting node (native or wrapped)
64
64
  # @param [String] expression XPath expression
65
65
  # @param [Hash] namespaces Namespace prefix mappings
66
- # @return [Moxml::NodeSet, Object] Query results
66
+ # @return [Array, Object] Native node array or scalar value
67
67
  def xpath(node, expression, namespaces = {})
68
68
  # If we receive a native node, wrap it first
69
69
  # Document#xpath passes @native, but our compiled XPath needs Moxml nodes
@@ -85,16 +85,33 @@ module Moxml
85
85
  # Execute on the node (now guaranteed to be wrapped Moxml node)
86
86
  result = proc.call(node)
87
87
 
88
- # Wrap Array results in NodeSet, return other types directly
88
+ # Return native arrays for Node#xpath to wrap, scalars directly.
89
+ # The adapter contract: xpath() returns Array<native> | scalar.
89
90
  case result
90
91
  when Array
91
- # Deduplicate by native object identity to handle descendant-or-self
92
- # which may yield the same native node multiple times
93
- nodeset = NodeSet.new(result, node.context)
94
- nodeset.uniq_by_native
92
+ # XPath engine returns wrapped Moxml::Node objects.
93
+ # Extract native nodes and deduplicate by object identity.
94
+ native_nodes = result.map { |n| n.is_a?(Moxml::Node) ? n.native : n }
95
+ seen = {}
96
+ native_nodes.select do |native|
97
+ id = native.object_id
98
+ if seen[id]
99
+ false
100
+ else
101
+ seen[id] = true
102
+ end
103
+ end
95
104
  when NodeSet
96
- # Deduplicate NodeSet results as well
97
- result.uniq_by_native
105
+ # NodeSet from intermediate evaluation - extract natives and deduplicate
106
+ seen = {}
107
+ result.to_a.map(&:native).select do |native|
108
+ id = native.object_id
109
+ if seen[id]
110
+ false
111
+ else
112
+ seen[id] = true
113
+ end
114
+ end
98
115
  else
99
116
  # Scalar values (string, number, boolean) - return as-is
100
117
  result
@@ -113,10 +130,10 @@ module Moxml
113
130
  # @param [Moxml::Node] node Starting node
114
131
  # @param [String] expression XPath expression
115
132
  # @param [Hash] namespaces Namespace prefix mappings
116
- # @return [Moxml::Node, Object, nil] First result or nil
133
+ # @return [Object, nil] First native node or scalar value
117
134
  def at_xpath(node, expression, namespaces = {})
118
135
  result = xpath(node, expression, namespaces)
119
- result.is_a?(NodeSet) ? result.first : result
136
+ result.is_a?(Array) ? result.first : result
120
137
  end
121
138
 
122
139
  # Check if XPath is supported