moxml 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +34 -1
  6. data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
  7. data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
  8. data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
  9. data/TODO.remaining/4-lenient-entities-mode.md +106 -0
  10. data/TODO.remaining/5-fixture-integrity.md +65 -0
  11. data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
  12. data/TODO.remaining/7-headed-ox-limitations.md +95 -0
  13. data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
  14. data/TODO.remaining/9-cleanup-hygiene.md +42 -0
  15. data/TODO.remaining/README.md +54 -0
  16. data/benchmarks/generate_report.rb +1 -1
  17. data/docs/_pages/configuration.adoc +22 -19
  18. data/docs/_tutorials/namespace-handling.adoc +5 -5
  19. data/lib/moxml/adapter/base.rb +22 -3
  20. data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
  21. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  22. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  23. data/lib/moxml/adapter/customized_oga.rb +10 -0
  24. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  25. data/lib/moxml/adapter/customized_ox.rb +12 -0
  26. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  27. data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
  28. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  29. data/lib/moxml/adapter/headed_ox.rb +37 -14
  30. data/lib/moxml/adapter/libxml.rb +233 -119
  31. data/lib/moxml/adapter/nokogiri.rb +22 -11
  32. data/lib/moxml/adapter/oga.rb +64 -25
  33. data/lib/moxml/adapter/ox.rb +198 -42
  34. data/lib/moxml/adapter/rexml.rb +64 -13
  35. data/lib/moxml/attribute.rb +3 -0
  36. data/lib/moxml/builder.rb +78 -24
  37. data/lib/moxml/config.rb +24 -7
  38. data/lib/moxml/declaration.rb +4 -2
  39. data/lib/moxml/document.rb +8 -1
  40. data/lib/moxml/document_builder.rb +44 -37
  41. data/lib/moxml/element.rb +18 -5
  42. data/lib/moxml/entity_registry.rb +51 -1
  43. data/lib/moxml/native_attachment.rb +65 -0
  44. data/lib/moxml/node.rb +39 -50
  45. data/lib/moxml/node_set.rb +43 -15
  46. data/lib/moxml/version.rb +1 -1
  47. data/lib/moxml/xml_utils.rb +1 -1
  48. data/lib/moxml/xpath/compiler.rb +4 -1
  49. data/lib/moxml.rb +1 -0
  50. data/scripts/format_xml.rb +16 -0
  51. data/scripts/pretty_format_xml.rb +14 -0
  52. data/spec/consistency/round_trip_spec.rb +3 -30
  53. data/spec/integration/all_adapters_spec.rb +1 -0
  54. data/spec/integration/headed_ox_integration_spec.rb +0 -2
  55. data/spec/integration/shared_examples/edge_cases.rb +7 -4
  56. data/spec/integration/shared_examples/integration_workflows.rb +3 -3
  57. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
  58. data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
  59. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
  60. data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
  61. data/spec/moxml/adapter/oga_spec.rb +46 -0
  62. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  63. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  64. data/spec/moxml/allocation_guard_spec.rb +282 -0
  65. data/spec/moxml/builder_spec.rb +256 -0
  66. data/spec/moxml/config_spec.rb +11 -11
  67. data/spec/moxml/doctype_spec.rb +41 -0
  68. data/spec/moxml/lazy_parse_spec.rb +115 -0
  69. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  70. data/spec/moxml/node_cache_spec.rb +110 -0
  71. data/spec/moxml/node_set_cache_spec.rb +90 -0
  72. data/spec/moxml/xml_utils_spec.rb +32 -0
  73. data/spec/moxml/xpath/axes_spec.rb +1 -1
  74. data/spec/moxml/xpath/compiler_spec.rb +2 -2
  75. data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
  76. data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
  77. data/spec/performance/memory_usage_spec.rb +0 -4
  78. data/spec/support/allocation_helper.rb +165 -0
  79. data/spec/support/w3c_namespace_helpers.rb +2 -1
  80. metadata +29 -2
@@ -0,0 +1,68 @@
1
+ # TODO 8: XPath Engine Predicate Gaps (5 xit Tests)
2
+
3
+ ## Problem
4
+
5
+ The pure-Ruby XPath engine (used by HeadedOx) does not fully support
6
+ `position()`, `last()`, and `id()` inside predicates. Five tests are marked
7
+ `xit` pending predicate support.
8
+
9
+ These gaps affect the XPath engine in `lib/moxml/xpath/` — they are not
10
+ adapter-specific.
11
+
12
+ ## Failing Tests
13
+
14
+ ### `position()` in Predicates (2 tests)
15
+
16
+ `spec/moxml/xpath/functions/position_functions_spec.rb`
17
+
18
+ ```ruby
19
+ xit "returns current position in predicate" do
20
+ # /root/item[position() = 2]
21
+ end
22
+
23
+ xit "works with position comparison" do
24
+ # /root/item[position() > 1]
25
+ end
26
+ ```
27
+
28
+ ### `last()` in Predicates (2 tests)
29
+
30
+ `spec/moxml/xpath/functions/position_functions_spec.rb`
31
+
32
+ ```ruby
33
+ xit "returns size of context in predicate" do
34
+ # /root/item[position() = last()]
35
+ end
36
+
37
+ xit "works with last() - 1" do
38
+ # /root/item[position() = last() - 1]
39
+ end
40
+ ```
41
+
42
+ ### `id()` with Nodeset Argument (1 test)
43
+
44
+ `spec/moxml/xpath/functions/special_functions_spec.rb:69`
45
+
46
+ ```ruby
47
+ xit "accepts nodeset argument containing IDs" do
48
+ # id(nodeset) where nodeset is path-evaluated
49
+ end
50
+ ```
51
+
52
+ ## Investigation Needed
53
+
54
+ - The XPath compiler likely needs to pass predicate context (position, size)
55
+ into the evaluation environment when compiling predicate expressions.
56
+ - `position()` and `last()` are defined but raise `InvalidContextError` when
57
+ used inside predicates — the predicate evaluation path doesn't set up the
58
+ context they need.
59
+ - `id()` with a nodeset argument requires evaluating the argument as an XPath
60
+ path first, then extracting ID values from the resulting nodes.
61
+
62
+ ## Files
63
+
64
+ - `lib/moxml/xpath/compiler.rb` — predicate compilation
65
+ - `lib/moxml/xpath/engine.rb` — runtime evaluation context
66
+ - `lib/moxml/xpath/context.rb` — context setup for position/last
67
+ - `spec/moxml/xpath/functions/position_functions_spec.rb`
68
+ - `spec/moxml/xpath/functions/special_functions_spec.rb`
@@ -0,0 +1,42 @@
1
+ # TODO 9: Cleanup and Hygiene
2
+
3
+ Small items that don't affect functionality but should be addressed.
4
+
5
+ ## 9a. Stale Doc Links in Skip Messages
6
+
7
+ 15+ test skip messages reference `docs/HEADED_OX_LIMITATIONS.md` but the
8
+ actual file is at `docs/_pages/headed-ox-limitations.adoc`. The referenced
9
+ path does not exist.
10
+
11
+ **Fix:** Update all skip messages to reference
12
+ `docs/_pages/headed-ox-limitations.adoc` instead.
13
+
14
+ **Affected files:**
15
+ - `spec/integration/headed_ox_integration_spec.rb`
16
+ - `spec/integration/shared_examples/integration_workflows.rb`
17
+ - `spec/integration/shared_examples/node_wrappers/node_behavior.rb`
18
+ - `spec/integration/shared_examples/node_wrappers/cdata_behavior.rb`
19
+ - `spec/integration/shared_examples/edge_cases.rb`
20
+ - `spec/moxml/xpath/axes_spec.rb`
21
+ - `spec/moxml/xpath/compiler_spec.rb`
22
+ - `spec/moxml/adapter/headed_ox_spec.rb`
23
+
24
+ ## 9b. Untracked `scripts/` Directory
25
+
26
+ `scripts/format_xml.rb` and `scripts/pretty_format_xml.rb` exist as untracked
27
+ files. Decide whether to commit (and add to `.gitignore` pattern or document)
28
+ or remove.
29
+
30
+ ## 9c. Superseded Root TODO Files
31
+
32
+ The following root-level files are marked as superseded in
33
+ `TODO.remaining/README.md` but still exist:
34
+
35
+ - `TODO.entities-work.md`
36
+ - `TODO.entity-handling.md`
37
+ - `TODO.entity-support.md`
38
+ - `TODO.full-entity-support.md`
39
+ - `TODO.full-entity.md`
40
+ - `TODO.mn-bilingual-round-trip.md`
41
+
42
+ Once all work is confirmed tracked in `TODO.remaining/`, these can be deleted.
@@ -0,0 +1,54 @@
1
+ # TODO.remaining — Consolidated Action Items
2
+
3
+ Consolidated from: TODO.entities-work.md, TODO.entity-support.md,
4
+ TODO.full-entity.md, TODO.full-entity-support.md, TODO.entity-handling.md,
5
+ TODO.mn-bilingual-round-trip.md, plus code audit (2026-04-22).
6
+
7
+ Those root files are superseded and can be removed.
8
+
9
+ ## Dependency Order
10
+
11
+ ```
12
+ TODO 1 (Adapter Support)
13
+ |
14
+ v
15
+ TODO 2 (Model-Driven Restoration) ---> TODO 4 (Lenient Entities Mode)
16
+ |
17
+ v
18
+ TODO 3 (Test Coverage)
19
+
20
+ TODO 5 (Fixture Integrity) — independent
21
+ TODO 6 (Ox Element Ordering) — independent
22
+ TODO 7 (HeadedOx Limitations) — independent
23
+ TODO 8 (XPath Predicate Gaps) — independent
24
+ TODO 9 (Cleanup Hygiene) — independent
25
+ ```
26
+
27
+ ## Summary
28
+
29
+ | # | File | Description | Status |
30
+ |---|------|-------------|--------|
31
+ | 1 | `1-entity-reference-adapter-support.md` | EntityReference in Ox, Oga, REXML, LibXML, HeadedOx | Not started |
32
+ | 2 | `2-entity-restoration-model-driven.md` | Use EntityRegistry as source of truth for restoration | Not started |
33
+ | 3 | `3-entity-reference-test-coverage.md` | Tests for EntityReference nodes and round-trips | Not started |
34
+ | 4 | `4-lenient-entities-mode.md` | Strict vs lenient entity restoration mode | Not started |
35
+ | 5 | `5-fixture-integrity.md` | Bilingual fixture verification + CI validation | Not started |
36
+ | 6 | `6-ox-element-ordering-bug.md` | Ox adapter reorders elements in certain fixtures | Not started |
37
+ | 7 | `7-headed-ox-limitations.md` | 15 skipped tests across 7 HeadedOx limitation areas | Not started |
38
+ | 8 | `8-xpath-predicate-gaps.md` | position()/last()/id() not working in XPath predicates | Not started |
39
+ | 9 | `9-cleanup-hygiene.md` | Stale doc links, untracked scripts, superseded files | Not started |
40
+
41
+ ## What's Already Done
42
+
43
+ - EntityReference node class (`lib/moxml/entity_reference.rb`)
44
+ - EntityRegistry with 2125 W3C entities (`lib/moxml/entity_registry.rb`)
45
+ - Node type registry includes `:entity_reference`
46
+ - Base adapter template: `create_entity_reference`, `validate_entity_reference_name`
47
+ - Nokogiri adapter: full native EntityReference support
48
+ - Document factory: `create_entity_reference(name)`
49
+ - DocumentBuilder: `visit_entity_reference` + partial `restore_entities_in_text`
50
+ - Builder DSL: `entity_reference(name)`
51
+ - Config: `restore_entities`, `entity_load_mode`, `entity_provider`, `preload_entity_sets`
52
+ - Context: entity registry integration
53
+ - EntityRegistry tests (24 examples passing)
54
+ - HeadedOx limitations documented in `docs/_pages/headed-ox-limitations.adoc`
@@ -488,7 +488,7 @@ class MoxmlBenchmarkReport
488
488
  f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
489
489
  f.puts ""
490
490
  f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
491
- f.puts "See docs/HEADED_OX_LIMITATIONS.md for complete details."
491
+ f.puts "See docs/_pages/headed-ox-limitations.adoc for complete details."
492
492
  f.puts ""
493
493
  end
494
494
 
@@ -97,19 +97,21 @@ context.config.default_encoding = 'UTF-16'
97
97
 
98
98
  **Default:** `"UTF-8"`
99
99
 
100
- ==== Namespace URI validation mode
100
+ ==== Namespace validation mode
101
101
 
102
- Control how strictly namespace URIs are validated:
102
+ Control how strictly namespace URIs and prefixes are validated:
103
103
 
104
104
  [source,ruby]
105
105
  ----
106
- # Strict mode (default) — validates namespace URIs against RFC 3986
107
- context.config.namespace_uri_mode = :strict
108
- doc = context.parse(xml) # Raises ValidationError for invalid URIs
109
-
110
- # Lenient mode — accepts any string as a namespace URI
111
- context.config.namespace_uri_mode = :lenient
112
- doc = context.parse(xml) # Accepts non-standard namespace URIs
106
+ # Strict mode (default) — validates namespace URIs against RFC 3986 and
107
+ # prefixes against NCName rules
108
+ context.config.namespace_validation_mode = :strict
109
+ doc = context.parse(xml) # Raises ValidationError for invalid URIs/prefixes
110
+
111
+ # Lenient mode — accepts any URI string and defers prefix validation to the
112
+ # underlying XML parser
113
+ context.config.namespace_validation_mode = :lenient
114
+ doc = context.parse(xml) # Accepts non-standard namespace URIs/prefixes
113
115
  ----
114
116
 
115
117
  **Default:** `:strict`
@@ -117,23 +119,24 @@ doc = context.parse(xml) # Accepts non-standard namespace URIs
117
119
  **Modes:**
118
120
 
119
121
  `:strict`:: Validates namespace URIs against the
120
- https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] URI-reference specification, as
121
- required by https://www.w3.org/TR/xml-names/[Namespaces in XML]. Invalid URIs
122
- raise a `Moxml::ValidationError`. This is the recommended mode for
123
- standards-compliant XML processing.
122
+ https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] URI-reference specification and
123
+ namespace prefixes against NCName rules, as required by
124
+ https://www.w3.org/TR/xml-names/[Namespaces in XML]. Invalid values raise a
125
+ `Moxml::ValidationError`. This is the recommended mode for standards-compliant
126
+ XML processing.
124
127
 
125
- `:lenient`:: Accepts any string as a namespace URI, only rejecting strings
126
- containing XML-invalid control characters (`0x00`-`0x08`, `0x0B`, `0x0C`,
127
- `0x0E`-`0x1F`). Use this mode when processing XML documents that use
128
- non-standard namespace identifiers such as URNs or other non-URI strings.
128
+ `:lenient`:: Accepts any string as a namespace URI (only rejecting control
129
+ characters) and defers prefix validation to the underlying XML parser. Use this
130
+ mode when processing XML documents that use non-standard namespace identifiers
131
+ or prefixes (e.g., `xmlns_1.0`).
129
132
 
130
133
  **Example:**
131
134
 
132
135
  [source,ruby]
133
136
  ----
134
- # Process documents with non-standard namespace URIs
137
+ # Process documents with non-standard namespace URIs and prefixes
135
138
  context = Moxml.new do |config|
136
- config.namespace_uri_mode = :lenient
139
+ config.namespace_validation_mode = :lenient
137
140
  end
138
141
 
139
142
  xml = '<root xmlns:ex="not a valid URI but accepted in lenient mode"/>'
@@ -280,18 +280,18 @@ puts all_children.length # => 2
280
280
 
281
281
  By default, Moxml validates namespace URIs against
282
282
  https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] (strict mode). To accept
283
- non-standard namespace identifiers, use lenient mode:
283
+ non-standard namespace identifiers or prefixes, use lenient mode:
284
284
 
285
285
  [source,ruby]
286
286
  ----
287
- # Strict mode (default) — validates URIs per RFC 3986
287
+ # Strict mode (default) — validates URIs per RFC 3986 and prefixes per NCName
288
288
  context = Moxml.new do |config|
289
- config.namespace_uri_mode = :strict
289
+ config.namespace_validation_mode = :strict
290
290
  end
291
291
 
292
- # Lenient mode — accepts any string as namespace URI
292
+ # Lenient mode — accepts any URI string and defers prefix validation to parser
293
293
  context = Moxml.new do |config|
294
- config.namespace_uri_mode = :lenient
294
+ config.namespace_validation_mode = :lenient
295
295
  end
296
296
  ----
297
297
 
@@ -98,7 +98,8 @@ module Moxml
98
98
  create_native_declaration(version, encoding, standalone)
99
99
  end
100
100
 
101
- def create_namespace(element, prefix, uri, namespace_uri_mode: :strict)
101
+ def create_namespace(element, prefix, uri,
102
+ namespace_validation_mode: :strict)
102
103
  if prefix && uri.to_s.empty?
103
104
  raise NamespaceError.new(
104
105
  "Prefixed namespace declaration cannot have an empty URI",
@@ -106,8 +107,12 @@ module Moxml
106
107
  uri: uri,
107
108
  )
108
109
  end
109
- validate_prefix(prefix) if prefix
110
- validate_uri(uri, mode: namespace_uri_mode)
110
+ if namespace_validation_mode == :strict
111
+ validate_prefix(prefix) if prefix
112
+ validate_uri(uri, mode: :strict)
113
+ else
114
+ validate_uri(uri, mode: :lenient)
115
+ end
111
116
  create_native_namespace(element, prefix, uri)
112
117
  end
113
118
 
@@ -144,6 +149,20 @@ module Moxml
144
149
  node
145
150
  end
146
151
 
152
+ # Check if the native document has an XML declaration
153
+ # @param native_doc the native document object
154
+ # @param wrapper [Moxml::Document] the wrapper with has_xml_declaration flag
155
+ # @return [Boolean]
156
+ def has_declaration?(_native_doc, wrapper)
157
+ wrapper.has_xml_declaration
158
+ end
159
+
160
+ # Return the actual native node after an add_child operation.
161
+ # Override for adapters where node identity may change (e.g., LibXML doc.root=).
162
+ def actual_native(child_native, _parent_native)
163
+ child_native
164
+ end
165
+
147
166
  protected
148
167
 
149
168
  def create_native_element(_name, _owner_doc = nil)
@@ -9,7 +9,7 @@ module Moxml
9
9
  # are read-only after creation. This wrapper allows mutation by
10
10
  # storing values internally and regenerating XML when needed.
11
11
  class Declaration
12
- attr_accessor :version, :encoding
12
+ attr_accessor :version, :encoding, :removed, :parent_doc
13
13
  attr_reader :native
14
14
 
15
15
  def initialize(native_doc, version = nil, encoding = nil,
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedLibxml
6
+ class EntityReference
7
+ attr_reader :name
8
+
9
+ def initialize(name)
10
+ @name = name
11
+ end
12
+
13
+ def to_xml
14
+ "&#{@name};"
15
+ end
16
+
17
+ def ==(other)
18
+ other.is_a?(self.class) && @name == other.name
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedLibxml
6
+ autoload :Cdata, "moxml/adapter/customized_libxml/cdata"
7
+ autoload :Comment, "moxml/adapter/customized_libxml/comment"
8
+ autoload :Declaration, "moxml/adapter/customized_libxml/declaration"
9
+ autoload :Element, "moxml/adapter/customized_libxml/element"
10
+ autoload :EntityReference,
11
+ "moxml/adapter/customized_libxml/entity_reference"
12
+ autoload :Node, "moxml/adapter/customized_libxml/node"
13
+ autoload :ProcessingInstruction,
14
+ "moxml/adapter/customized_libxml/processing_instruction"
15
+ autoload :Text, "moxml/adapter/customized_libxml/text"
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedOga
6
+ autoload :XmlDeclaration, "moxml/adapter/customized_oga/xml_declaration"
7
+ autoload :XmlGenerator, "moxml/adapter/customized_oga/xml_generator"
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedOx
6
+ class EntityReference
7
+ attr_reader :name
8
+ attr_accessor :parent
9
+
10
+ def initialize(name)
11
+ @name = name
12
+ @parent = nil
13
+ end
14
+
15
+ def to_xml
16
+ "&#{@name};"
17
+ end
18
+
19
+ def ==(other)
20
+ other.is_a?(self.class) && @name == other.name
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedOx
6
+ autoload :Attribute, "moxml/adapter/customized_ox/attribute"
7
+ autoload :EntityReference, "moxml/adapter/customized_ox/entity_reference"
8
+ autoload :Namespace, "moxml/adapter/customized_ox/namespace"
9
+ autoload :Text, "moxml/adapter/customized_ox/text"
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedRexml
6
+ class EntityReference
7
+ attr_reader :name
8
+
9
+ def initialize(name)
10
+ @name = name
11
+ end
12
+
13
+ def ==(other)
14
+ other.is_a?(self.class) && @name == other.name
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -7,18 +7,21 @@ module Moxml
7
7
  module CustomizedRexml
8
8
  # Custom REXML formatter that fixes indentation and wrapping issues
9
9
  class Formatter < ::REXML::Formatters::Pretty
10
- def initialize(indentation: 2, self_close_empty: false)
10
+ def initialize(indentation: 2, self_close_empty: false, adapter: nil)
11
11
  @indentation = " " * indentation
12
12
  @level = 0
13
13
  @compact = true
14
14
  @width = -1 # Disable line wrapping
15
15
  @self_close_empty = self_close_empty
16
+ @adapter = adapter
16
17
  end
17
18
 
18
19
  def write(node, output)
19
20
  case node
20
21
  when ::REXML::XMLDecl
21
22
  write_declaration(node, output)
23
+ when ::Moxml::Adapter::CustomizedRexml::EntityReference
24
+ output << "&#{node.name};"
22
25
  else
23
26
  super
24
27
  end
@@ -29,7 +32,13 @@ module Moxml
29
32
  output << "<#{node.expanded_name}"
30
33
  write_attributes(node, output)
31
34
 
32
- if node.children.empty? && @self_close_empty
35
+ # Check for entity refs stored in adapter attachments
36
+ entity_refs = @adapter&.attachments&.get(node, :entity_refs)
37
+ child_sequence = @adapter&.attachments&.get(node, :child_sequence)
38
+
39
+ has_no_children = node.children.empty? && !(entity_refs && !entity_refs.empty?)
40
+
41
+ if has_no_children && @self_close_empty
33
42
  output << "/>"
34
43
  return
35
44
  end
@@ -42,26 +51,41 @@ module Moxml
42
51
  mixed = has_text && has_elements
43
52
 
44
53
  # Handle children based on content type
45
- unless node.children.empty?
54
+ all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
55
+ unless all_children_empty
46
56
  @level += @indentation.length unless mixed
47
57
 
48
- node.children.each_with_index do |child, _index|
49
- # Skip insignificant whitespace
50
- next if child.is_a?(::REXML::Text) &&
51
- child.to_s.strip.empty? &&
52
- !(child.next_sibling.nil? && child.previous_sibling.nil?)
53
-
54
- # Indent non-text nodes in non-mixed content
55
- # if !mixed && !child.is_a?(::REXML::Text)
56
- # output << ' ' * @level
57
- # end
58
-
59
- write(child, output)
60
-
61
- # Add newlines between elements in non-mixed content
62
- # if !mixed && !child.is_a?(::REXML::Text) && index < node.children.size - 1
63
- # output << "\n"
64
- # end
58
+ if entity_refs && !entity_refs.empty? && child_sequence
59
+ # Interleave native children with entity refs using tracked sequence
60
+ eref_idx = 0
61
+ native_idx = 0
62
+ child_sequence.each do |type|
63
+ case type
64
+ when :native
65
+ if native_idx < node.children.size
66
+ child = node.children[native_idx]
67
+ native_idx += 1
68
+ next if child.is_a?(::REXML::Text) &&
69
+ child.to_s.strip.empty? &&
70
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
71
+ write(child, output)
72
+ end
73
+ when :eref
74
+ if eref_idx < entity_refs.size
75
+ write(entity_refs[eref_idx], output)
76
+ eref_idx += 1
77
+ end
78
+ end
79
+ end
80
+ else
81
+ node.children.each_with_index do |child, _index|
82
+ # Skip insignificant whitespace
83
+ next if child.is_a?(::REXML::Text) &&
84
+ child.to_s.strip.empty? &&
85
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
86
+
87
+ write(child, output)
88
+ end
65
89
  end
66
90
 
67
91
  # Reset indentation for closing tag in non-mixed content
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedRexml
6
+ autoload :EntityReference,
7
+ "moxml/adapter/customized_rexml/entity_reference"
8
+ autoload :Formatter, "moxml/adapter/customized_rexml/formatter"
9
+ end
10
+ end
11
+ end
@@ -25,17 +25,23 @@ module Moxml
25
25
  #
26
26
  class HeadedOx < Ox
27
27
  class << self
28
- # Override parse to use HeadedOx context instead of Ox context
29
- def parse(xml, _options = {}, _context = nil)
28
+ # Override parse to use lazy wrapping like the Ox adapter.
29
+ # Previously used DocumentBuilder (eager tree construction causing
30
+ # ~176K allocations per 100-element parse). Lazy parse defers wrapper
31
+ # creation until nodes are accessed, matching Ox adapter behavior.
32
+ def parse(xml, options = {}, _context = nil)
30
33
  native_doc = begin
31
34
  result = ::Ox.parse(xml)
32
35
 
33
36
  # result can be either Document or Element
34
37
  if result.is_a?(::Ox::Document)
38
+ assign_parents(result)
39
+ validate_single_root(result) if options[:strict]
35
40
  result
36
41
  else
37
42
  doc = ::Ox::Document.new
38
43
  doc << result
44
+ assign_parents(doc)
39
45
  doc
40
46
  end
41
47
  rescue ::Ox::ParseError => e
@@ -47,17 +53,17 @@ module Moxml
47
53
 
48
54
  # Use provided context if available, otherwise create new one
49
55
  ctx = _context || Context.new(:headed_ox)
50
- DocumentBuilder.new(ctx).build(native_doc)
56
+ Document.new(native_doc, ctx)
51
57
  end
52
58
 
53
59
  # Execute XPath query using Moxml's XPath engine
54
60
  #
55
61
  # This overrides the Ox adapter's xpath method which uses locate().
56
62
  #
57
- # @param [Moxml::Node] node Starting node (wrapped Moxml node)
63
+ # @param node Starting node (native or wrapped)
58
64
  # @param [String] expression XPath expression
59
65
  # @param [Hash] namespaces Namespace prefix mappings
60
- # @return [Moxml::NodeSet, Object] Query results
66
+ # @return [Array, Object] Native node array or scalar value
61
67
  def xpath(node, expression, namespaces = {})
62
68
  # If we receive a native node, wrap it first
63
69
  # Document#xpath passes @native, but our compiled XPath needs Moxml nodes
@@ -79,16 +85,33 @@ module Moxml
79
85
  # Execute on the node (now guaranteed to be wrapped Moxml node)
80
86
  result = proc.call(node)
81
87
 
82
- # Wrap Array results in NodeSet, return other types directly
88
+ # Return native arrays for Node#xpath to wrap, scalars directly.
89
+ # The adapter contract: xpath() returns Array<native> | scalar.
83
90
  case result
84
91
  when Array
85
- # Deduplicate by native object identity to handle descendant-or-self
86
- # which may yield the same native node multiple times
87
- nodeset = NodeSet.new(result, node.context)
88
- nodeset.uniq_by_native
92
+ # XPath engine returns wrapped Moxml::Node objects.
93
+ # Extract native nodes and deduplicate by object identity.
94
+ native_nodes = result.map { |n| n.is_a?(Moxml::Node) ? n.native : n }
95
+ seen = {}
96
+ native_nodes.select do |native|
97
+ id = native.object_id
98
+ if seen[id]
99
+ false
100
+ else
101
+ seen[id] = true
102
+ end
103
+ end
89
104
  when NodeSet
90
- # Deduplicate NodeSet results as well
91
- result.uniq_by_native
105
+ # NodeSet from intermediate evaluation - extract natives and deduplicate
106
+ seen = {}
107
+ result.to_a.map(&:native).select do |native|
108
+ id = native.object_id
109
+ if seen[id]
110
+ false
111
+ else
112
+ seen[id] = true
113
+ end
114
+ end
92
115
  else
93
116
  # Scalar values (string, number, boolean) - return as-is
94
117
  result
@@ -107,10 +130,10 @@ module Moxml
107
130
  # @param [Moxml::Node] node Starting node
108
131
  # @param [String] expression XPath expression
109
132
  # @param [Hash] namespaces Namespace prefix mappings
110
- # @return [Moxml::Node, Object, nil] First result or nil
133
+ # @return [Object, nil] First native node or scalar value
111
134
  def at_xpath(node, expression, namespaces = {})
112
135
  result = xpath(node, expression, namespaces)
113
- result.is_a?(NodeSet) ? result.first : result
136
+ result.is_a?(Array) ? result.first : result
114
137
  end
115
138
 
116
139
  # Check if XPath is supported