moxml 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +1 -1
  3. data/.github/workflows/rake.yml +16 -13
  4. data/.github/workflows/release.yml +1 -0
  5. data/.github/workflows/round-trip.yml +74 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +1 -0
  8. data/.rubocop_todo.yml +160 -38
  9. data/Gemfile +2 -1
  10. data/README.adoc +287 -20
  11. data/Rakefile +11 -0
  12. data/data/w3c_entities.json +2131 -0
  13. data/docs/ENTITY_SUPPORT_FOR_LUTAML_MODEL.md +102 -0
  14. data/docs/_guides/index.adoc +14 -12
  15. data/docs/_guides/node-api-consistency.adoc +572 -0
  16. data/docs/_guides/xml-declaration.adoc +5 -5
  17. data/docs/_pages/adapters/ox.adoc +30 -0
  18. data/docs/_pages/adapters/rexml.adoc +1 -1
  19. data/docs/_pages/configuration.adoc +43 -0
  20. data/docs/_pages/node-api-reference.adoc +128 -3
  21. data/docs/_tutorials/namespace-handling.adoc +21 -0
  22. data/examples/rss_parser/rss_parser.rb +1 -3
  23. data/lib/moxml/adapter/base.rb +26 -2
  24. data/lib/moxml/adapter/headed_ox.rb +5 -4
  25. data/lib/moxml/adapter/libxml.rb +18 -3
  26. data/lib/moxml/adapter/nokogiri.rb +26 -2
  27. data/lib/moxml/adapter/oga.rb +137 -20
  28. data/lib/moxml/adapter/ox.rb +29 -3
  29. data/lib/moxml/adapter/rexml.rb +54 -7
  30. data/lib/moxml/attribute.rb +6 -0
  31. data/lib/moxml/builder.rb +6 -0
  32. data/lib/moxml/config.rb +52 -1
  33. data/lib/moxml/context.rb +21 -2
  34. data/lib/moxml/doctype.rb +33 -0
  35. data/lib/moxml/document.rb +6 -1
  36. data/lib/moxml/document_builder.rb +45 -1
  37. data/lib/moxml/element.rb +10 -3
  38. data/lib/moxml/entity_reference.rb +29 -0
  39. data/lib/moxml/entity_registry.rb +278 -0
  40. data/lib/moxml/error.rb +5 -5
  41. data/lib/moxml/node.rb +22 -8
  42. data/lib/moxml/node_set.rb +10 -6
  43. data/lib/moxml/processing_instruction.rb +6 -0
  44. data/lib/moxml/version.rb +1 -1
  45. data/lib/moxml/xml_utils.rb +25 -2
  46. data/lib/moxml/xpath/errors.rb +1 -1
  47. data/lib/moxml.rb +1 -0
  48. data/spec/consistency/README.md +3 -1
  49. data/spec/consistency/round_trip_spec.rb +479 -0
  50. data/spec/examples/readme_examples_spec.rb +1 -1
  51. data/spec/fixtures/round-trips/metanorma/a.xml +66 -0
  52. data/spec/fixtures/round-trips/metanorma/bilingual-en.xml +7682 -0
  53. data/spec/fixtures/round-trips/metanorma/bilingual-fr.xml +7520 -0
  54. data/spec/fixtures/round-trips/metanorma/bilingual.presentation.xml +21211 -0
  55. data/spec/fixtures/round-trips/metanorma/collection1.xml +313 -0
  56. data/spec/fixtures/round-trips/metanorma/collection1nested.xml +291 -0
  57. data/spec/fixtures/round-trips/metanorma/collection_docinline.xml +544 -0
  58. data/spec/fixtures/round-trips/metanorma/collection_full.xml +1776 -0
  59. data/spec/fixtures/round-trips/metanorma/dummy.1.xml +295 -0
  60. data/spec/fixtures/round-trips/metanorma/dummy.xml +349 -0
  61. data/spec/fixtures/round-trips/metanorma/footnotes.xml +70 -0
  62. data/spec/fixtures/round-trips/metanorma/iho.xml +116 -0
  63. data/spec/fixtures/round-trips/metanorma/rice-amd.final.xml +186 -0
  64. data/spec/fixtures/round-trips/metanorma/rice-amd.final_1.xml +180 -0
  65. data/spec/fixtures/round-trips/metanorma/rice-en.final.norepo.xml +116 -0
  66. data/spec/fixtures/round-trips/metanorma/rice-en.final.xml +149 -0
  67. data/spec/fixtures/round-trips/metanorma/rice-en.final_1.xml +144 -0
  68. data/spec/fixtures/round-trips/metanorma/rice1-en.final.xml +120 -0
  69. data/spec/fixtures/round-trips/metanorma/rice2-en.final.xml +116 -0
  70. data/spec/fixtures/round-trips/metanorma/test_sectionsplit.xml +119 -0
  71. data/spec/fixtures/round-trips/niso-jats/bmj_sample.xml +1068 -0
  72. data/spec/fixtures/round-trips/niso-jats/element_citation.xml +7 -0
  73. data/spec/fixtures/round-trips/niso-jats/pnas_sample.xml +3768 -0
  74. data/spec/fixtures/round-trips/rfcxml/rfc8881.xml +45848 -0
  75. data/spec/fixtures/round-trips/rfcxml/rfc8994.xml +6607 -0
  76. data/spec/fixtures/round-trips/rfcxml/rfc9000.xml +9064 -0
  77. data/spec/fixtures/round-trips/rfcxml/rfc9043.xml +5527 -0
  78. data/spec/fixtures/round-trips/rfcxml/rfc9051.xml +14286 -0
  79. data/spec/fixtures/round-trips/rfcxml/rfc9110.xml +18156 -0
  80. data/spec/fixtures/round-trips/rfcxml/rfc9260.xml +9136 -0
  81. data/spec/fixtures/round-trips/rfcxml/rfc9293.xml +8300 -0
  82. data/spec/fixtures/round-trips/rfcxml/rfc9380.xml +8916 -0
  83. data/spec/fixtures/round-trips/rfcxml/rfc9420.xml +8927 -0
  84. data/spec/fixtures/w3c/namespaces/1.0/001.xml +7 -0
  85. data/spec/fixtures/w3c/namespaces/1.0/002.xml +8 -0
  86. data/spec/fixtures/w3c/namespaces/1.0/003.xml +7 -0
  87. data/spec/fixtures/w3c/namespaces/1.0/004.xml +7 -0
  88. data/spec/fixtures/w3c/namespaces/1.0/005.xml +7 -0
  89. data/spec/fixtures/w3c/namespaces/1.0/006.xml +7 -0
  90. data/spec/fixtures/w3c/namespaces/1.0/007.xml +20 -0
  91. data/spec/fixtures/w3c/namespaces/1.0/008.xml +20 -0
  92. data/spec/fixtures/w3c/namespaces/1.0/009.xml +19 -0
  93. data/spec/fixtures/w3c/namespaces/1.0/010.xml +19 -0
  94. data/spec/fixtures/w3c/namespaces/1.0/011.xml +20 -0
  95. data/spec/fixtures/w3c/namespaces/1.0/012.xml +19 -0
  96. data/spec/fixtures/w3c/namespaces/1.0/013.xml +5 -0
  97. data/spec/fixtures/w3c/namespaces/1.0/014.xml +3 -0
  98. data/spec/fixtures/w3c/namespaces/1.0/015.xml +3 -0
  99. data/spec/fixtures/w3c/namespaces/1.0/016.xml +3 -0
  100. data/spec/fixtures/w3c/namespaces/1.0/017.xml +3 -0
  101. data/spec/fixtures/w3c/namespaces/1.0/018.xml +3 -0
  102. data/spec/fixtures/w3c/namespaces/1.0/019.xml +3 -0
  103. data/spec/fixtures/w3c/namespaces/1.0/020.xml +3 -0
  104. data/spec/fixtures/w3c/namespaces/1.0/021.xml +6 -0
  105. data/spec/fixtures/w3c/namespaces/1.0/022.xml +6 -0
  106. data/spec/fixtures/w3c/namespaces/1.0/023.xml +6 -0
  107. data/spec/fixtures/w3c/namespaces/1.0/024.xml +6 -0
  108. data/spec/fixtures/w3c/namespaces/1.0/025.xml +3 -0
  109. data/spec/fixtures/w3c/namespaces/1.0/026.xml +3 -0
  110. data/spec/fixtures/w3c/namespaces/1.0/027.xml +3 -0
  111. data/spec/fixtures/w3c/namespaces/1.0/028.xml +3 -0
  112. data/spec/fixtures/w3c/namespaces/1.0/029.xml +4 -0
  113. data/spec/fixtures/w3c/namespaces/1.0/030.xml +4 -0
  114. data/spec/fixtures/w3c/namespaces/1.0/031.xml +4 -0
  115. data/spec/fixtures/w3c/namespaces/1.0/032.xml +5 -0
  116. data/spec/fixtures/w3c/namespaces/1.0/033.xml +4 -0
  117. data/spec/fixtures/w3c/namespaces/1.0/034.xml +3 -0
  118. data/spec/fixtures/w3c/namespaces/1.0/035.xml +8 -0
  119. data/spec/fixtures/w3c/namespaces/1.0/036.xml +8 -0
  120. data/spec/fixtures/w3c/namespaces/1.0/037.xml +8 -0
  121. data/spec/fixtures/w3c/namespaces/1.0/038.xml +8 -0
  122. data/spec/fixtures/w3c/namespaces/1.0/039.xml +10 -0
  123. data/spec/fixtures/w3c/namespaces/1.0/040.xml +9 -0
  124. data/spec/fixtures/w3c/namespaces/1.0/041.xml +8 -0
  125. data/spec/fixtures/w3c/namespaces/1.0/042.xml +4 -0
  126. data/spec/fixtures/w3c/namespaces/1.0/043.xml +7 -0
  127. data/spec/fixtures/w3c/namespaces/1.0/044.xml +7 -0
  128. data/spec/fixtures/w3c/namespaces/1.0/045.xml +7 -0
  129. data/spec/fixtures/w3c/namespaces/1.0/046.xml +10 -0
  130. data/spec/fixtures/w3c/namespaces/1.0/047.xml +4 -0
  131. data/spec/fixtures/w3c/namespaces/1.0/048.xml +5 -0
  132. data/spec/fixtures/w3c/namespaces/1.0/LICENSE.md +32 -0
  133. data/spec/fixtures/w3c/namespaces/1.0/README.adoc +42 -0
  134. data/spec/fixtures/w3c/namespaces/1.0/rmt-ns10.xml +156 -0
  135. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +14 -2
  136. data/spec/integration/shared_examples/w3c_namespace_examples.rb +10 -0
  137. data/spec/integration/w3c_namespace_spec.rb +69 -0
  138. data/spec/moxml/adapter/libxml_spec.rb +7 -1
  139. data/spec/moxml/adapter/oga_spec.rb +92 -0
  140. data/spec/moxml/config_spec.rb +75 -0
  141. data/spec/moxml/doctype_spec.rb +19 -3
  142. data/spec/moxml/entity_registry_spec.rb +184 -0
  143. data/spec/moxml/error_spec.rb +2 -2
  144. data/spec/moxml/namespace_uri_validation_spec.rb +140 -0
  145. data/spec/moxml/xpath/axes_spec.rb +3 -4
  146. data/spec/performance/xpath_benchmark_spec.rb +6 -54
  147. data/spec/support/w3c_namespace_helpers.rb +41 -0
  148. data/spec/unit/rexml_isolated_test.rb +271 -0
  149. metadata +99 -3
  150. data/.ruby-version +0 -1
@@ -48,7 +48,37 @@ doc.xpath("//book").find { |book| book["id"] == "123" }
48
48
  IMPORTANT: For complete XPath 1.0 specification with zero limitations today, use
49
49
  Nokogiri or Oga adapters.
50
50
 
51
+ ==== Element Ordering in Round-Trip Tests
51
52
 
53
+ [IMPORTANT]
54
+ .Round-trip tests for Ox are experimental
55
+ --
56
+ Ox round-trip tests (`nokogiri-ox` CI job) are **experimental** because Ox lacks full XML
57
+ conformance:
58
+
59
+ * **Namespace support**: Ox does not properly handle namespaced elements in XPath queries
60
+ * **XPath limitations**: Uses `locate()` instead of standard XPath; no attribute value
61
+ predicates, no logical operators, no position predicates
62
+ * **Element ordering**: Ox produces elements in a different order than Nokogiri/Oga for
63
+ certain complex fixtures
64
+
65
+ For production use with complex XML, prefer Nokogiri or Oga adapters.
66
+ --
67
+
68
+ The Ox adapter produces elements in a different order than other adapters for certain
69
+ fixtures with complex nested structures. In round-trip tests, this causes the
70
+ `elements_with_attributes` array length comparison to fail, even though the semantic
71
+ equivalence check (double round-trip) passes.
72
+
73
+ Known affected fixtures:
74
+
75
+ * `niso-jats/element_citation.xml`
76
+ * `niso-jats/pnas_sample.xml`
77
+ * `metanorma/collection1nested.xml`
78
+
79
+ This is a known limitation tracked in the round-trip test suite via
80
+ `KNOWN_ELEMENT_ORDERING_ISSUES`. The `elements_with_attributes` comparison is
81
+ automatically skipped for these Ox adapter pairs.
52
82
 
53
83
  See also:
54
84
 
@@ -283,7 +283,7 @@ end
283
283
  === References
284
284
 
285
285
  * link:https://github.com/ruby/rexml[REXML on GitHub]
286
- * link:https://ruby-doc.org/stdlib/libdoc/rexml/rdoc/REXML.html[REXML documentation]
286
+ * link:https://ruby-doc.org/stdlib/libdoc/rexml/rdoc/REXML[REXML documentation]
287
287
 
288
288
  === See also
289
289
 
@@ -97,6 +97,49 @@ context.config.default_encoding = 'UTF-16'
97
97
 
98
98
  **Default:** `"UTF-8"`
99
99
 
100
+ ==== Namespace URI validation mode
101
+
102
+ Control how strictly namespace URIs are validated:
103
+
104
+ [source,ruby]
105
+ ----
106
+ # Strict mode (default) — validates namespace URIs against RFC 3986
107
+ context.config.namespace_uri_mode = :strict
108
+ doc = context.parse(xml) # Raises ValidationError for invalid URIs
109
+
110
+ # Lenient mode — accepts any string as a namespace URI
111
+ context.config.namespace_uri_mode = :lenient
112
+ doc = context.parse(xml) # Accepts non-standard namespace URIs
113
+ ----
114
+
115
+ **Default:** `:strict`
116
+
117
+ **Modes:**
118
+
119
+ `:strict`:: Validates namespace URIs against the
120
+ https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] URI-reference specification, as
121
+ required by https://www.w3.org/TR/xml-names/[Namespaces in XML]. Invalid URIs
122
+ raise a `Moxml::ValidationError`. This is the recommended mode for
123
+ standards-compliant XML processing.
124
+
125
+ `:lenient`:: Accepts any string as a namespace URI, only rejecting strings
126
+ containing XML-invalid control characters (`0x00`-`0x08`, `0x0B`, `0x0C`,
127
+ `0x0E`-`0x1F`). Use this mode when processing XML documents that use
128
+ non-standard namespace identifiers such as URNs or other non-URI strings.
129
+
130
+ **Example:**
131
+
132
+ [source,ruby]
133
+ ----
134
+ # Process documents with non-standard namespace URIs
135
+ context = Moxml.new do |config|
136
+ config.namespace_uri_mode = :lenient
137
+ end
138
+
139
+ xml = '<root xmlns:ex="not a valid URI but accepted in lenient mode"/>'
140
+ doc = context.parse(xml) # Parses successfully
141
+ ----
142
+
100
143
  === Context switching
101
144
 
102
145
  Use different configurations for different tasks:
@@ -1,9 +1,81 @@
1
1
  ---
2
- title: Node API reference
3
- nav_order: 5
2
+ title: Node API Reference
3
+ :toc:
4
+ :toclevels: 3
4
5
  ---
5
6
 
6
- == Node API reference
7
+ == Node API Reference
8
+
9
+ This reference documents the API of all node types in Moxml. For a guide on API consistency and safe coding patterns, see the link:../guides/node-api-consistency[Node API Consistency Guide].
10
+
11
+ == Node Identity: The #identifier Method
12
+
13
+ All node types in Moxml support the `#identifier` method, which returns the primary identifier for a node:
14
+
15
+ [cols="1,2,1"]
16
+ |===
17
+ | Node Type | #identifier Returns | Example
18
+
19
+ | Element
20
+ | The tag name
21
+ | `"book"`, `"title"`
22
+
23
+ | Attribute
24
+ | The attribute name
25
+ | `"id"`, `"class"`
26
+
27
+ | ProcessingInstruction
28
+ | The PI target
29
+ | `"xml-stylesheet"`
30
+
31
+ | Text, Comment, Cdata
32
+ | `nil` (no identifier)
33
+ | `nil`
34
+
35
+ | EntityReference
36
+ | The entity name
37
+ | `"nbsp"`, `"copy"`
38
+
39
+ | Declaration
40
+ | `nil` (no identifier)
41
+ | `nil`
42
+
43
+ | Document
44
+ | `nil` (no identifier)
45
+ | `nil`
46
+ |===
47
+
48
+ **Example usage:**
49
+
50
+ [source,ruby]
51
+ ----
52
+ element = doc.at_xpath("//book")
53
+ puts element.identifier # => "book"
54
+
55
+ attr = element.attribute("id")
56
+ puts attr.identifier # => "id"
57
+
58
+ pi = doc.children.find { |n| n.processing_instruction? }
59
+ puts pi.identifier # => "xml-stylesheet"
60
+
61
+ text = element.children.find { |n| n.text? }
62
+ puts text.identifier # => nil
63
+ ----
64
+
65
+ **Safe iteration over mixed nodes:**
66
+
67
+ [source,ruby]
68
+ ----
69
+ doc.root.children.each do |node|
70
+ if id = node.identifier
71
+ puts "#{node.class.name.split('::').last}: #{id}"
72
+ else
73
+ puts "#{node.class.name.split('::').last}: (no identifier)"
74
+ end
75
+ end
76
+ ----
77
+
78
+ == Common Node Methods
7
79
 
8
80
  == XML objects and their methods
9
81
 
@@ -47,3 +119,56 @@ See also:
47
119
 
48
120
  * link:../guides/working-with-documents[Working with documents guide]
49
121
  * link:../guides/advanced-features[Advanced features guide]
122
+ === Doctype nodes
123
+
124
+ Doctype nodes represent DOCTYPE declarations in XML documents.
125
+
126
+ [source,ruby]
127
+ ----
128
+ doctype = doc.create_doctype("html", "-//W3C//DTD HTML 4.01//EN",
129
+ "http://www.w3.org/TR/html4/strict.dtd")
130
+ doctype.name # => "html"
131
+ doctype.external_id # => "-//W3C//DTD HTML 4.01//EN"
132
+ doctype.system_id # => "http://www.w3.org/TR/html4/strict.dtd"
133
+ doctype.identifier # => "html"
134
+ ----
135
+
136
+ *Available methods:*
137
+
138
+ * `name` - Returns the DOCTYPE name (root element name)
139
+ * `external_id` - Returns the PUBLIC identifier (or nil)
140
+ * `system_id` - Returns the SYSTEM identifier (DTD URI, or nil)
141
+ * `identifier` - Returns the primary identifier (same as `name`)
142
+
143
+ All Doctype accessor methods are fully implemented across all 6 adapters.
144
+
145
+ === EntityReference nodes
146
+
147
+ EntityReference nodes represent XML entity references like `&nbsp;`, `&copy;`, or custom entities declared in the DOCTYPE.
148
+
149
+ [source,ruby]
150
+ ----
151
+ # Create programmatically
152
+ ref = doc.create_entity_reference('nbsp')
153
+ element.add_child(ref)
154
+
155
+ # Or via builder
156
+ doc = Moxml::Builder.new(Moxml.new).build do
157
+ element 'text' do
158
+ entity_reference 'ndash'
159
+ end
160
+ end
161
+ ----
162
+
163
+ *Available methods:*
164
+
165
+ * `name` - Returns the entity name (e.g., `"nbsp"`, `"copy"`)
166
+ * `identifier` - Returns the primary identifier (same as `name`)
167
+ * `text` - Returns empty string (`""`) since entity has no text content
168
+ * `content` - Returns empty string (entity content is in the name)
169
+ * `to_xml` - Returns the entity syntax (e.g., `"&nbsp;"`)
170
+
171
+ *Adapter notes:*
172
+
173
+ * *Nokogiri*: Preserves custom declared entities as `EntityReference` nodes
174
+ * *Ox, Oga*: These adapters resolve entities during parsing and do not expose entity reference nodes. Use Nokogiri or LibXML for entity preservation.
@@ -276,6 +276,27 @@ puts all_children.length # => 2
276
276
  * link:../pages/adapters/rexml[REXML] - ⚠️ No namespace XPath
277
277
  * link:../pages/adapters/ox[Ox] - ⚠️ Basic only, no XPath
278
278
 
279
+ === Namespace URI validation
280
+
281
+ By default, Moxml validates namespace URIs against
282
+ https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] (strict mode). To accept
283
+ non-standard namespace identifiers, use lenient mode:
284
+
285
+ [source,ruby]
286
+ ----
287
+ # Strict mode (default) — validates URIs per RFC 3986
288
+ context = Moxml.new do |config|
289
+ config.namespace_uri_mode = :strict
290
+ end
291
+
292
+ # Lenient mode — accepts any string as namespace URI
293
+ context = Moxml.new do |config|
294
+ config.namespace_uri_mode = :lenient
295
+ end
296
+ ----
297
+
298
+ See link:../pages/configuration[Configuration] for details.
299
+
279
300
  === Troubleshooting
280
301
 
281
302
  **Namespace XPath not working:**
@@ -179,9 +179,7 @@ if __FILE__ == $0
179
179
 
180
180
  # Count categories
181
181
  all_categories = articles.flat_map(&:categories)
182
- category_counts = all_categories.each_with_object(Hash.new(0)) do |cat, counts|
183
- counts[cat] += 1
184
- end
182
+ category_counts = all_categories.tally
185
183
  puts "Categories: #{category_counts.map do |cat, count|
186
184
  "#{cat} (#{count})"
187
185
  end.join(', ')}"
@@ -98,12 +98,24 @@ module Moxml
98
98
  create_native_declaration(version, encoding, standalone)
99
99
  end
100
100
 
101
- def create_namespace(element, prefix, uri)
101
+ def create_namespace(element, prefix, uri, namespace_uri_mode: :strict)
102
+ if prefix && uri.to_s.empty?
103
+ raise NamespaceError.new(
104
+ "Prefixed namespace declaration cannot have an empty URI",
105
+ prefix: prefix,
106
+ uri: uri,
107
+ )
108
+ end
102
109
  validate_prefix(prefix) if prefix
103
- validate_uri(uri)
110
+ validate_uri(uri, mode: namespace_uri_mode)
104
111
  create_native_namespace(element, prefix, uri)
105
112
  end
106
113
 
114
+ def create_entity_reference(name)
115
+ validate_entity_reference_name(name)
116
+ create_native_entity_reference(name)
117
+ end
118
+
107
119
  def set_attribute_name(attribute, name)
108
120
  attribute.name = name
109
121
  end
@@ -112,6 +124,10 @@ module Moxml
112
124
  attribute.value = value
113
125
  end
114
126
 
127
+ def entity_reference_name(node)
128
+ node.name
129
+ end
130
+
115
131
  def duplicate_node(node)
116
132
  node.dup
117
133
  end
@@ -193,6 +209,14 @@ module Moxml
193
209
  adapter: name,
194
210
  )
195
211
  end
212
+
213
+ def create_native_entity_reference(_name)
214
+ raise Moxml::NotImplementedError.new(
215
+ "create_native_entity_reference not implemented",
216
+ feature: "create_native_entity_reference",
217
+ adapter: name,
218
+ )
219
+ end
196
220
  end
197
221
  end
198
222
  end
@@ -26,7 +26,7 @@ module Moxml
26
26
  class HeadedOx < Ox
27
27
  class << self
28
28
  # Override parse to use HeadedOx context instead of Ox context
29
- def parse(xml, _options = {})
29
+ def parse(xml, _options = {}, _context = nil)
30
30
  native_doc = begin
31
31
  result = ::Ox.parse(xml)
32
32
 
@@ -45,8 +45,9 @@ module Moxml
45
45
  )
46
46
  end
47
47
 
48
- # Use :headed_ox context instead of :ox
49
- DocumentBuilder.new(Context.new(:headed_ox)).build(native_doc)
48
+ # Use provided context if available, otherwise create new one
49
+ ctx = _context || Context.new(:headed_ox)
50
+ DocumentBuilder.new(ctx).build(native_doc)
50
51
  end
51
52
 
52
53
  # Execute XPath query using Moxml's XPath engine
@@ -66,7 +67,7 @@ module Moxml
66
67
  ctx = Context.new(:headed_ox)
67
68
 
68
69
  # Wrap the native node - don't rebuild the whole document
69
- node = Node.wrap(node, ctx)
70
+ node = Moxml::Node.wrap(node, ctx)
70
71
  end
71
72
 
72
73
  # Parse XPath expression to AST
@@ -48,7 +48,7 @@ module Moxml
48
48
  doc.root = element
49
49
  end
50
50
 
51
- def parse(xml, options = {})
51
+ def parse(xml, options = {}, _context = nil)
52
52
  # LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
53
53
  xml_string = if xml.is_a?(String)
54
54
  xml
@@ -94,7 +94,8 @@ module Moxml
94
94
  native_doc.instance_variable_set(:@moxml_doctype, doctype_wrapper)
95
95
  end
96
96
 
97
- DocumentBuilder.new(Context.new(:libxml)).build(native_doc)
97
+ ctx = _context || Context.new(:libxml)
98
+ DocumentBuilder.new(ctx).build(native_doc)
98
99
  end
99
100
 
100
101
  # SAX parsing implementation for LibXML
@@ -788,6 +789,20 @@ module Moxml
788
789
  end
789
790
  end
790
791
 
792
+ # Doctype accessor methods
793
+ def doctype_name(native)
794
+ # LibXML uses DoctypeWrapper which stores the values
795
+ native.name
796
+ end
797
+
798
+ def doctype_external_id(native)
799
+ native.external_id
800
+ end
801
+
802
+ def doctype_system_id(native)
803
+ native.system_id
804
+ end
805
+
791
806
  def xpath(node, expression, namespaces = nil)
792
807
  native_node = unpatch_node(node)
793
808
  return [] unless native_node
@@ -1149,7 +1164,7 @@ module Moxml
1149
1164
  # Add namespace definitions (only on this element, not ancestors)
1150
1165
  if elem.respond_to?(:namespaces)
1151
1166
  seen_ns = {}
1152
- elem.namespaces.definitions.each do |ns|
1167
+ elem.namespaces.each do |ns|
1153
1168
  prefix = ns.prefix
1154
1169
  uri = ns.href
1155
1170
  next if seen_ns.key?(prefix)
@@ -11,7 +11,7 @@ module Moxml
11
11
  doc.root = element
12
12
  end
13
13
 
14
- def parse(xml, options = {})
14
+ def parse(xml, options = {}, _context = nil)
15
15
  native_doc = begin
16
16
  if options[:fragment]
17
17
  ::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
@@ -29,7 +29,9 @@ module Moxml
29
29
  column: e.column)
30
30
  end
31
31
 
32
- DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
32
+ # Use provided context if available, otherwise create new one
33
+ ctx = _context || Context.new(:nokogiri)
34
+ DocumentBuilder.new(ctx).build(native_doc)
33
35
  end
34
36
 
35
37
  # SAX parsing implementation for Nokogiri
@@ -104,6 +106,14 @@ module Moxml
104
106
  )
105
107
  end
106
108
 
109
+ def create_native_entity_reference(name)
110
+ ::Nokogiri::XML::EntityReference.new(create_document, name)
111
+ end
112
+
113
+ def entity_reference_name(node)
114
+ node.name
115
+ end
116
+
107
117
  def declaration_attribute(declaration, attr_name)
108
118
  return nil unless declaration.content
109
119
 
@@ -150,6 +160,7 @@ module Moxml
150
160
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
151
161
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
152
162
  when ::Nokogiri::XML::DTD then :doctype
163
+ when ::Nokogiri::XML::EntityReference then :entity_reference
153
164
  else :unknown
154
165
  end
155
166
  end
@@ -321,6 +332,19 @@ module Moxml
321
332
  node.namespace_definitions
322
333
  end
323
334
 
335
+ # Doctype accessor methods
336
+ def doctype_name(native)
337
+ native.name
338
+ end
339
+
340
+ def doctype_external_id(native)
341
+ native.external_id
342
+ end
343
+
344
+ def doctype_system_id(native)
345
+ native.system_id
346
+ end
347
+
324
348
  def xpath(node, expression, namespaces = nil)
325
349
  node.xpath(expression, namespaces).to_a
326
350
  rescue ::Nokogiri::XML::XPath::SyntaxError => e