moxml 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +1 -1
- data/.github/workflows/rake.yml +16 -13
- data/.github/workflows/release.yml +1 -0
- data/.github/workflows/round-trip.yml +74 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +160 -38
- data/Gemfile +2 -1
- data/README.adoc +287 -20
- data/Rakefile +11 -0
- data/data/w3c_entities.json +2131 -0
- data/docs/ENTITY_SUPPORT_FOR_LUTAML_MODEL.md +102 -0
- data/docs/_guides/index.adoc +14 -12
- data/docs/_guides/node-api-consistency.adoc +572 -0
- data/docs/_guides/xml-declaration.adoc +5 -5
- data/docs/_pages/adapters/ox.adoc +30 -0
- data/docs/_pages/adapters/rexml.adoc +1 -1
- data/docs/_pages/configuration.adoc +43 -0
- data/docs/_pages/node-api-reference.adoc +128 -3
- data/docs/_tutorials/namespace-handling.adoc +21 -0
- data/examples/rss_parser/rss_parser.rb +1 -3
- data/lib/moxml/adapter/base.rb +26 -2
- data/lib/moxml/adapter/headed_ox.rb +5 -4
- data/lib/moxml/adapter/libxml.rb +18 -3
- data/lib/moxml/adapter/nokogiri.rb +26 -2
- data/lib/moxml/adapter/oga.rb +137 -20
- data/lib/moxml/adapter/ox.rb +29 -3
- data/lib/moxml/adapter/rexml.rb +54 -7
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +6 -0
- data/lib/moxml/config.rb +52 -1
- data/lib/moxml/context.rb +21 -2
- data/lib/moxml/doctype.rb +33 -0
- data/lib/moxml/document.rb +6 -1
- data/lib/moxml/document_builder.rb +45 -1
- data/lib/moxml/element.rb +10 -3
- data/lib/moxml/entity_reference.rb +29 -0
- data/lib/moxml/entity_registry.rb +278 -0
- data/lib/moxml/error.rb +5 -5
- data/lib/moxml/node.rb +22 -8
- data/lib/moxml/node_set.rb +10 -6
- data/lib/moxml/processing_instruction.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +25 -2
- data/lib/moxml/xpath/errors.rb +1 -1
- data/lib/moxml.rb +1 -0
- data/spec/consistency/README.md +3 -1
- data/spec/consistency/round_trip_spec.rb +479 -0
- data/spec/examples/readme_examples_spec.rb +1 -1
- data/spec/fixtures/round-trips/metanorma/a.xml +66 -0
- data/spec/fixtures/round-trips/metanorma/bilingual-en.xml +7682 -0
- data/spec/fixtures/round-trips/metanorma/bilingual-fr.xml +7520 -0
- data/spec/fixtures/round-trips/metanorma/bilingual.presentation.xml +21211 -0
- data/spec/fixtures/round-trips/metanorma/collection1.xml +313 -0
- data/spec/fixtures/round-trips/metanorma/collection1nested.xml +291 -0
- data/spec/fixtures/round-trips/metanorma/collection_docinline.xml +544 -0
- data/spec/fixtures/round-trips/metanorma/collection_full.xml +1776 -0
- data/spec/fixtures/round-trips/metanorma/dummy.1.xml +295 -0
- data/spec/fixtures/round-trips/metanorma/dummy.xml +349 -0
- data/spec/fixtures/round-trips/metanorma/footnotes.xml +70 -0
- data/spec/fixtures/round-trips/metanorma/iho.xml +116 -0
- data/spec/fixtures/round-trips/metanorma/rice-amd.final.xml +186 -0
- data/spec/fixtures/round-trips/metanorma/rice-amd.final_1.xml +180 -0
- data/spec/fixtures/round-trips/metanorma/rice-en.final.norepo.xml +116 -0
- data/spec/fixtures/round-trips/metanorma/rice-en.final.xml +149 -0
- data/spec/fixtures/round-trips/metanorma/rice-en.final_1.xml +144 -0
- data/spec/fixtures/round-trips/metanorma/rice1-en.final.xml +120 -0
- data/spec/fixtures/round-trips/metanorma/rice2-en.final.xml +116 -0
- data/spec/fixtures/round-trips/metanorma/test_sectionsplit.xml +119 -0
- data/spec/fixtures/round-trips/niso-jats/bmj_sample.xml +1068 -0
- data/spec/fixtures/round-trips/niso-jats/element_citation.xml +7 -0
- data/spec/fixtures/round-trips/niso-jats/pnas_sample.xml +3768 -0
- data/spec/fixtures/round-trips/rfcxml/rfc8881.xml +45848 -0
- data/spec/fixtures/round-trips/rfcxml/rfc8994.xml +6607 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9000.xml +9064 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9043.xml +5527 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9051.xml +14286 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9110.xml +18156 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9260.xml +9136 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9293.xml +8300 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9380.xml +8916 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9420.xml +8927 -0
- data/spec/fixtures/w3c/namespaces/1.0/001.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/002.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/003.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/004.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/005.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/006.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/007.xml +20 -0
- data/spec/fixtures/w3c/namespaces/1.0/008.xml +20 -0
- data/spec/fixtures/w3c/namespaces/1.0/009.xml +19 -0
- data/spec/fixtures/w3c/namespaces/1.0/010.xml +19 -0
- data/spec/fixtures/w3c/namespaces/1.0/011.xml +20 -0
- data/spec/fixtures/w3c/namespaces/1.0/012.xml +19 -0
- data/spec/fixtures/w3c/namespaces/1.0/013.xml +5 -0
- data/spec/fixtures/w3c/namespaces/1.0/014.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/015.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/016.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/017.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/018.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/019.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/020.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/021.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/022.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/023.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/024.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/025.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/026.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/027.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/028.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/029.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/030.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/031.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/032.xml +5 -0
- data/spec/fixtures/w3c/namespaces/1.0/033.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/034.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/035.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/036.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/037.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/038.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/039.xml +10 -0
- data/spec/fixtures/w3c/namespaces/1.0/040.xml +9 -0
- data/spec/fixtures/w3c/namespaces/1.0/041.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/042.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/043.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/044.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/045.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/046.xml +10 -0
- data/spec/fixtures/w3c/namespaces/1.0/047.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/048.xml +5 -0
- data/spec/fixtures/w3c/namespaces/1.0/LICENSE.md +32 -0
- data/spec/fixtures/w3c/namespaces/1.0/README.adoc +42 -0
- data/spec/fixtures/w3c/namespaces/1.0/rmt-ns10.xml +156 -0
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +14 -2
- data/spec/integration/shared_examples/w3c_namespace_examples.rb +10 -0
- data/spec/integration/w3c_namespace_spec.rb +69 -0
- data/spec/moxml/adapter/libxml_spec.rb +7 -1
- data/spec/moxml/adapter/oga_spec.rb +92 -0
- data/spec/moxml/config_spec.rb +75 -0
- data/spec/moxml/doctype_spec.rb +19 -3
- data/spec/moxml/entity_registry_spec.rb +184 -0
- data/spec/moxml/error_spec.rb +2 -2
- data/spec/moxml/namespace_uri_validation_spec.rb +140 -0
- data/spec/moxml/xpath/axes_spec.rb +3 -4
- data/spec/performance/xpath_benchmark_spec.rb +6 -54
- data/spec/support/w3c_namespace_helpers.rb +41 -0
- data/spec/unit/rexml_isolated_test.rb +271 -0
- metadata +99 -3
- data/.ruby-version +0 -1
|
@@ -48,7 +48,37 @@ doc.xpath("//book").find { |book| book["id"] == "123" }
|
|
|
48
48
|
IMPORTANT: For complete XPath 1.0 specification with zero limitations today, use
|
|
49
49
|
Nokogiri or Oga adapters.
|
|
50
50
|
|
|
51
|
+
==== Element Ordering in Round-Trip Tests
|
|
51
52
|
|
|
53
|
+
[IMPORTANT]
|
|
54
|
+
.Round-trip tests for Ox are experimental
|
|
55
|
+
--
|
|
56
|
+
Ox round-trip tests (`nokogiri-ox` CI job) are **experimental** because Ox lacks full XML
|
|
57
|
+
conformance:
|
|
58
|
+
|
|
59
|
+
* **Namespace support**: Ox does not properly handle namespaced elements in XPath queries
|
|
60
|
+
* **XPath limitations**: Uses `locate()` instead of standard XPath; no attribute value
|
|
61
|
+
predicates, no logical operators, no position predicates
|
|
62
|
+
* **Element ordering**: Ox produces elements in a different order than Nokogiri/Oga for
|
|
63
|
+
certain complex fixtures
|
|
64
|
+
|
|
65
|
+
For production use with complex XML, prefer Nokogiri or Oga adapters.
|
|
66
|
+
--
|
|
67
|
+
|
|
68
|
+
The Ox adapter produces elements in a different order than other adapters for certain
|
|
69
|
+
fixtures with complex nested structures. In round-trip tests, this causes the
|
|
70
|
+
`elements_with_attributes` array length comparison to fail, even though the semantic
|
|
71
|
+
equivalence check (double round-trip) passes.
|
|
72
|
+
|
|
73
|
+
Known affected fixtures:
|
|
74
|
+
|
|
75
|
+
* `niso-jats/element_citation.xml`
|
|
76
|
+
* `niso-jats/pnas_sample.xml`
|
|
77
|
+
* `metanorma/collection1nested.xml`
|
|
78
|
+
|
|
79
|
+
This is a known limitation tracked in the round-trip test suite via
|
|
80
|
+
`KNOWN_ELEMENT_ORDERING_ISSUES`. The `elements_with_attributes` comparison is
|
|
81
|
+
automatically skipped for these Ox adapter pairs.
|
|
52
82
|
|
|
53
83
|
See also:
|
|
54
84
|
|
|
@@ -283,7 +283,7 @@ end
|
|
|
283
283
|
=== References
|
|
284
284
|
|
|
285
285
|
* link:https://github.com/ruby/rexml[REXML on GitHub]
|
|
286
|
-
* link:https://ruby-doc.org/stdlib/libdoc/rexml/rdoc/REXML
|
|
286
|
+
* link:https://ruby-doc.org/stdlib/libdoc/rexml/rdoc/REXML[REXML documentation]
|
|
287
287
|
|
|
288
288
|
=== See also
|
|
289
289
|
|
|
@@ -97,6 +97,49 @@ context.config.default_encoding = 'UTF-16'
|
|
|
97
97
|
|
|
98
98
|
**Default:** `"UTF-8"`
|
|
99
99
|
|
|
100
|
+
==== Namespace URI validation mode
|
|
101
|
+
|
|
102
|
+
Control how strictly namespace URIs are validated:
|
|
103
|
+
|
|
104
|
+
[source,ruby]
|
|
105
|
+
----
|
|
106
|
+
# Strict mode (default) — validates namespace URIs against RFC 3986
|
|
107
|
+
context.config.namespace_uri_mode = :strict
|
|
108
|
+
doc = context.parse(xml) # Raises ValidationError for invalid URIs
|
|
109
|
+
|
|
110
|
+
# Lenient mode — accepts any string as a namespace URI
|
|
111
|
+
context.config.namespace_uri_mode = :lenient
|
|
112
|
+
doc = context.parse(xml) # Accepts non-standard namespace URIs
|
|
113
|
+
----
|
|
114
|
+
|
|
115
|
+
**Default:** `:strict`
|
|
116
|
+
|
|
117
|
+
**Modes:**
|
|
118
|
+
|
|
119
|
+
`:strict`:: Validates namespace URIs against the
|
|
120
|
+
https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] URI-reference specification, as
|
|
121
|
+
required by https://www.w3.org/TR/xml-names/[Namespaces in XML]. Invalid URIs
|
|
122
|
+
raise a `Moxml::ValidationError`. This is the recommended mode for
|
|
123
|
+
standards-compliant XML processing.
|
|
124
|
+
|
|
125
|
+
`:lenient`:: Accepts any string as a namespace URI, only rejecting strings
|
|
126
|
+
containing XML-invalid control characters (`0x00`-`0x08`, `0x0B`, `0x0C`,
|
|
127
|
+
`0x0E`-`0x1F`). Use this mode when processing XML documents that use
|
|
128
|
+
non-standard namespace identifiers such as URNs or other non-URI strings.
|
|
129
|
+
|
|
130
|
+
**Example:**
|
|
131
|
+
|
|
132
|
+
[source,ruby]
|
|
133
|
+
----
|
|
134
|
+
# Process documents with non-standard namespace URIs
|
|
135
|
+
context = Moxml.new do |config|
|
|
136
|
+
config.namespace_uri_mode = :lenient
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
xml = '<root xmlns:ex="not a valid URI but accepted in lenient mode"/>'
|
|
140
|
+
doc = context.parse(xml) # Parses successfully
|
|
141
|
+
----
|
|
142
|
+
|
|
100
143
|
=== Context switching
|
|
101
144
|
|
|
102
145
|
Use different configurations for different tasks:
|
|
@@ -1,9 +1,81 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: Node API
|
|
3
|
-
|
|
2
|
+
title: Node API Reference
|
|
3
|
+
:toc:
|
|
4
|
+
:toclevels: 3
|
|
4
5
|
---
|
|
5
6
|
|
|
6
|
-
== Node API
|
|
7
|
+
== Node API Reference
|
|
8
|
+
|
|
9
|
+
This reference documents the API of all node types in Moxml. For a guide on API consistency and safe coding patterns, see the link:../guides/node-api-consistency[Node API Consistency Guide].
|
|
10
|
+
|
|
11
|
+
== Node Identity: The #identifier Method
|
|
12
|
+
|
|
13
|
+
All node types in Moxml support the `#identifier` method, which returns the primary identifier for a node:
|
|
14
|
+
|
|
15
|
+
[cols="1,2,1"]
|
|
16
|
+
|===
|
|
17
|
+
| Node Type | #identifier Returns | Example
|
|
18
|
+
|
|
19
|
+
| Element
|
|
20
|
+
| The tag name
|
|
21
|
+
| `"book"`, `"title"`
|
|
22
|
+
|
|
23
|
+
| Attribute
|
|
24
|
+
| The attribute name
|
|
25
|
+
| `"id"`, `"class"`
|
|
26
|
+
|
|
27
|
+
| ProcessingInstruction
|
|
28
|
+
| The PI target
|
|
29
|
+
| `"xml-stylesheet"`
|
|
30
|
+
|
|
31
|
+
| Text, Comment, Cdata
|
|
32
|
+
| `nil` (no identifier)
|
|
33
|
+
| `nil`
|
|
34
|
+
|
|
35
|
+
| EntityReference
|
|
36
|
+
| The entity name
|
|
37
|
+
| `"nbsp"`, `"copy"`
|
|
38
|
+
|
|
39
|
+
| Declaration
|
|
40
|
+
| `nil` (no identifier)
|
|
41
|
+
| `nil`
|
|
42
|
+
|
|
43
|
+
| Document
|
|
44
|
+
| `nil` (no identifier)
|
|
45
|
+
| `nil`
|
|
46
|
+
|===
|
|
47
|
+
|
|
48
|
+
**Example usage:**
|
|
49
|
+
|
|
50
|
+
[source,ruby]
|
|
51
|
+
----
|
|
52
|
+
element = doc.at_xpath("//book")
|
|
53
|
+
puts element.identifier # => "book"
|
|
54
|
+
|
|
55
|
+
attr = element.attribute("id")
|
|
56
|
+
puts attr.identifier # => "id"
|
|
57
|
+
|
|
58
|
+
pi = doc.children.find { |n| n.processing_instruction? }
|
|
59
|
+
puts pi.identifier # => "xml-stylesheet"
|
|
60
|
+
|
|
61
|
+
text = element.children.find { |n| n.text? }
|
|
62
|
+
puts text.identifier # => nil
|
|
63
|
+
----
|
|
64
|
+
|
|
65
|
+
**Safe iteration over mixed nodes:**
|
|
66
|
+
|
|
67
|
+
[source,ruby]
|
|
68
|
+
----
|
|
69
|
+
doc.root.children.each do |node|
|
|
70
|
+
if id = node.identifier
|
|
71
|
+
puts "#{node.class.name.split('::').last}: #{id}"
|
|
72
|
+
else
|
|
73
|
+
puts "#{node.class.name.split('::').last}: (no identifier)"
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
----
|
|
77
|
+
|
|
78
|
+
== Common Node Methods
|
|
7
79
|
|
|
8
80
|
== XML objects and their methods
|
|
9
81
|
|
|
@@ -47,3 +119,56 @@ See also:
|
|
|
47
119
|
|
|
48
120
|
* link:../guides/working-with-documents[Working with documents guide]
|
|
49
121
|
* link:../guides/advanced-features[Advanced features guide]
|
|
122
|
+
=== Doctype nodes
|
|
123
|
+
|
|
124
|
+
Doctype nodes represent DOCTYPE declarations in XML documents.
|
|
125
|
+
|
|
126
|
+
[source,ruby]
|
|
127
|
+
----
|
|
128
|
+
doctype = doc.create_doctype("html", "-//W3C//DTD HTML 4.01//EN",
|
|
129
|
+
"http://www.w3.org/TR/html4/strict.dtd")
|
|
130
|
+
doctype.name # => "html"
|
|
131
|
+
doctype.external_id # => "-//W3C//DTD HTML 4.01//EN"
|
|
132
|
+
doctype.system_id # => "http://www.w3.org/TR/html4/strict.dtd"
|
|
133
|
+
doctype.identifier # => "html"
|
|
134
|
+
----
|
|
135
|
+
|
|
136
|
+
*Available methods:*
|
|
137
|
+
|
|
138
|
+
* `name` - Returns the DOCTYPE name (root element name)
|
|
139
|
+
* `external_id` - Returns the PUBLIC identifier (or nil)
|
|
140
|
+
* `system_id` - Returns the SYSTEM identifier (DTD URI, or nil)
|
|
141
|
+
* `identifier` - Returns the primary identifier (same as `name`)
|
|
142
|
+
|
|
143
|
+
All Doctype accessor methods are fully implemented across all 6 adapters.
|
|
144
|
+
|
|
145
|
+
=== EntityReference nodes
|
|
146
|
+
|
|
147
|
+
EntityReference nodes represent XML entity references like ` `, `©`, or custom entities declared in the DOCTYPE.
|
|
148
|
+
|
|
149
|
+
[source,ruby]
|
|
150
|
+
----
|
|
151
|
+
# Create programmatically
|
|
152
|
+
ref = doc.create_entity_reference('nbsp')
|
|
153
|
+
element.add_child(ref)
|
|
154
|
+
|
|
155
|
+
# Or via builder
|
|
156
|
+
doc = Moxml::Builder.new(Moxml.new).build do
|
|
157
|
+
element 'text' do
|
|
158
|
+
entity_reference 'ndash'
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
----
|
|
162
|
+
|
|
163
|
+
*Available methods:*
|
|
164
|
+
|
|
165
|
+
* `name` - Returns the entity name (e.g., `"nbsp"`, `"copy"`)
|
|
166
|
+
* `identifier` - Returns the primary identifier (same as `name`)
|
|
167
|
+
* `text` - Returns empty string (`""`) since entity has no text content
|
|
168
|
+
* `content` - Returns empty string (entity content is in the name)
|
|
169
|
+
* `to_xml` - Returns the entity syntax (e.g., `" "`)
|
|
170
|
+
|
|
171
|
+
*Adapter notes:*
|
|
172
|
+
|
|
173
|
+
* *Nokogiri*: Preserves custom declared entities as `EntityReference` nodes
|
|
174
|
+
* *Ox, Oga*: These adapters resolve entities during parsing and do not expose entity reference nodes. Use Nokogiri or LibXML for entity preservation.
|
|
@@ -276,6 +276,27 @@ puts all_children.length # => 2
|
|
|
276
276
|
* link:../pages/adapters/rexml[REXML] - ⚠️ No namespace XPath
|
|
277
277
|
* link:../pages/adapters/ox[Ox] - ⚠️ Basic only, no XPath
|
|
278
278
|
|
|
279
|
+
=== Namespace URI validation
|
|
280
|
+
|
|
281
|
+
By default, Moxml validates namespace URIs against
|
|
282
|
+
https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] (strict mode). To accept
|
|
283
|
+
non-standard namespace identifiers, use lenient mode:
|
|
284
|
+
|
|
285
|
+
[source,ruby]
|
|
286
|
+
----
|
|
287
|
+
# Strict mode (default) — validates URIs per RFC 3986
|
|
288
|
+
context = Moxml.new do |config|
|
|
289
|
+
config.namespace_uri_mode = :strict
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Lenient mode — accepts any string as namespace URI
|
|
293
|
+
context = Moxml.new do |config|
|
|
294
|
+
config.namespace_uri_mode = :lenient
|
|
295
|
+
end
|
|
296
|
+
----
|
|
297
|
+
|
|
298
|
+
See link:../pages/configuration[Configuration] for details.
|
|
299
|
+
|
|
279
300
|
=== Troubleshooting
|
|
280
301
|
|
|
281
302
|
**Namespace XPath not working:**
|
|
@@ -179,9 +179,7 @@ if __FILE__ == $0
|
|
|
179
179
|
|
|
180
180
|
# Count categories
|
|
181
181
|
all_categories = articles.flat_map(&:categories)
|
|
182
|
-
category_counts = all_categories.
|
|
183
|
-
counts[cat] += 1
|
|
184
|
-
end
|
|
182
|
+
category_counts = all_categories.tally
|
|
185
183
|
puts "Categories: #{category_counts.map do |cat, count|
|
|
186
184
|
"#{cat} (#{count})"
|
|
187
185
|
end.join(', ')}"
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -98,12 +98,24 @@ module Moxml
|
|
|
98
98
|
create_native_declaration(version, encoding, standalone)
|
|
99
99
|
end
|
|
100
100
|
|
|
101
|
-
def create_namespace(element, prefix, uri)
|
|
101
|
+
def create_namespace(element, prefix, uri, namespace_uri_mode: :strict)
|
|
102
|
+
if prefix && uri.to_s.empty?
|
|
103
|
+
raise NamespaceError.new(
|
|
104
|
+
"Prefixed namespace declaration cannot have an empty URI",
|
|
105
|
+
prefix: prefix,
|
|
106
|
+
uri: uri,
|
|
107
|
+
)
|
|
108
|
+
end
|
|
102
109
|
validate_prefix(prefix) if prefix
|
|
103
|
-
validate_uri(uri)
|
|
110
|
+
validate_uri(uri, mode: namespace_uri_mode)
|
|
104
111
|
create_native_namespace(element, prefix, uri)
|
|
105
112
|
end
|
|
106
113
|
|
|
114
|
+
def create_entity_reference(name)
|
|
115
|
+
validate_entity_reference_name(name)
|
|
116
|
+
create_native_entity_reference(name)
|
|
117
|
+
end
|
|
118
|
+
|
|
107
119
|
def set_attribute_name(attribute, name)
|
|
108
120
|
attribute.name = name
|
|
109
121
|
end
|
|
@@ -112,6 +124,10 @@ module Moxml
|
|
|
112
124
|
attribute.value = value
|
|
113
125
|
end
|
|
114
126
|
|
|
127
|
+
def entity_reference_name(node)
|
|
128
|
+
node.name
|
|
129
|
+
end
|
|
130
|
+
|
|
115
131
|
def duplicate_node(node)
|
|
116
132
|
node.dup
|
|
117
133
|
end
|
|
@@ -193,6 +209,14 @@ module Moxml
|
|
|
193
209
|
adapter: name,
|
|
194
210
|
)
|
|
195
211
|
end
|
|
212
|
+
|
|
213
|
+
def create_native_entity_reference(_name)
|
|
214
|
+
raise Moxml::NotImplementedError.new(
|
|
215
|
+
"create_native_entity_reference not implemented",
|
|
216
|
+
feature: "create_native_entity_reference",
|
|
217
|
+
adapter: name,
|
|
218
|
+
)
|
|
219
|
+
end
|
|
196
220
|
end
|
|
197
221
|
end
|
|
198
222
|
end
|
|
@@ -26,7 +26,7 @@ module Moxml
|
|
|
26
26
|
class HeadedOx < Ox
|
|
27
27
|
class << self
|
|
28
28
|
# Override parse to use HeadedOx context instead of Ox context
|
|
29
|
-
def parse(xml, _options = {})
|
|
29
|
+
def parse(xml, _options = {}, _context = nil)
|
|
30
30
|
native_doc = begin
|
|
31
31
|
result = ::Ox.parse(xml)
|
|
32
32
|
|
|
@@ -45,8 +45,9 @@ module Moxml
|
|
|
45
45
|
)
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
# Use
|
|
49
|
-
|
|
48
|
+
# Use provided context if available, otherwise create new one
|
|
49
|
+
ctx = _context || Context.new(:headed_ox)
|
|
50
|
+
DocumentBuilder.new(ctx).build(native_doc)
|
|
50
51
|
end
|
|
51
52
|
|
|
52
53
|
# Execute XPath query using Moxml's XPath engine
|
|
@@ -66,7 +67,7 @@ module Moxml
|
|
|
66
67
|
ctx = Context.new(:headed_ox)
|
|
67
68
|
|
|
68
69
|
# Wrap the native node - don't rebuild the whole document
|
|
69
|
-
node = Node.wrap(node, ctx)
|
|
70
|
+
node = Moxml::Node.wrap(node, ctx)
|
|
70
71
|
end
|
|
71
72
|
|
|
72
73
|
# Parse XPath expression to AST
|
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -48,7 +48,7 @@ module Moxml
|
|
|
48
48
|
doc.root = element
|
|
49
49
|
end
|
|
50
50
|
|
|
51
|
-
def parse(xml, options = {})
|
|
51
|
+
def parse(xml, options = {}, _context = nil)
|
|
52
52
|
# LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
|
|
53
53
|
xml_string = if xml.is_a?(String)
|
|
54
54
|
xml
|
|
@@ -94,7 +94,8 @@ module Moxml
|
|
|
94
94
|
native_doc.instance_variable_set(:@moxml_doctype, doctype_wrapper)
|
|
95
95
|
end
|
|
96
96
|
|
|
97
|
-
|
|
97
|
+
ctx = _context || Context.new(:libxml)
|
|
98
|
+
DocumentBuilder.new(ctx).build(native_doc)
|
|
98
99
|
end
|
|
99
100
|
|
|
100
101
|
# SAX parsing implementation for LibXML
|
|
@@ -788,6 +789,20 @@ module Moxml
|
|
|
788
789
|
end
|
|
789
790
|
end
|
|
790
791
|
|
|
792
|
+
# Doctype accessor methods
|
|
793
|
+
def doctype_name(native)
|
|
794
|
+
# LibXML uses DoctypeWrapper which stores the values
|
|
795
|
+
native.name
|
|
796
|
+
end
|
|
797
|
+
|
|
798
|
+
def doctype_external_id(native)
|
|
799
|
+
native.external_id
|
|
800
|
+
end
|
|
801
|
+
|
|
802
|
+
def doctype_system_id(native)
|
|
803
|
+
native.system_id
|
|
804
|
+
end
|
|
805
|
+
|
|
791
806
|
def xpath(node, expression, namespaces = nil)
|
|
792
807
|
native_node = unpatch_node(node)
|
|
793
808
|
return [] unless native_node
|
|
@@ -1149,7 +1164,7 @@ module Moxml
|
|
|
1149
1164
|
# Add namespace definitions (only on this element, not ancestors)
|
|
1150
1165
|
if elem.respond_to?(:namespaces)
|
|
1151
1166
|
seen_ns = {}
|
|
1152
|
-
elem.namespaces.
|
|
1167
|
+
elem.namespaces.each do |ns|
|
|
1153
1168
|
prefix = ns.prefix
|
|
1154
1169
|
uri = ns.href
|
|
1155
1170
|
next if seen_ns.key?(prefix)
|
|
@@ -11,7 +11,7 @@ module Moxml
|
|
|
11
11
|
doc.root = element
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
def parse(xml, options = {})
|
|
14
|
+
def parse(xml, options = {}, _context = nil)
|
|
15
15
|
native_doc = begin
|
|
16
16
|
if options[:fragment]
|
|
17
17
|
::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
|
|
@@ -29,7 +29,9 @@ module Moxml
|
|
|
29
29
|
column: e.column)
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
# Use provided context if available, otherwise create new one
|
|
33
|
+
ctx = _context || Context.new(:nokogiri)
|
|
34
|
+
DocumentBuilder.new(ctx).build(native_doc)
|
|
33
35
|
end
|
|
34
36
|
|
|
35
37
|
# SAX parsing implementation for Nokogiri
|
|
@@ -104,6 +106,14 @@ module Moxml
|
|
|
104
106
|
)
|
|
105
107
|
end
|
|
106
108
|
|
|
109
|
+
def create_native_entity_reference(name)
|
|
110
|
+
::Nokogiri::XML::EntityReference.new(create_document, name)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def entity_reference_name(node)
|
|
114
|
+
node.name
|
|
115
|
+
end
|
|
116
|
+
|
|
107
117
|
def declaration_attribute(declaration, attr_name)
|
|
108
118
|
return nil unless declaration.content
|
|
109
119
|
|
|
@@ -150,6 +160,7 @@ module Moxml
|
|
|
150
160
|
when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
|
|
151
161
|
when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
|
|
152
162
|
when ::Nokogiri::XML::DTD then :doctype
|
|
163
|
+
when ::Nokogiri::XML::EntityReference then :entity_reference
|
|
153
164
|
else :unknown
|
|
154
165
|
end
|
|
155
166
|
end
|
|
@@ -321,6 +332,19 @@ module Moxml
|
|
|
321
332
|
node.namespace_definitions
|
|
322
333
|
end
|
|
323
334
|
|
|
335
|
+
# Doctype accessor methods
|
|
336
|
+
def doctype_name(native)
|
|
337
|
+
native.name
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def doctype_external_id(native)
|
|
341
|
+
native.external_id
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def doctype_system_id(native)
|
|
345
|
+
native.system_id
|
|
346
|
+
end
|
|
347
|
+
|
|
324
348
|
def xpath(node, expression, namespaces = nil)
|
|
325
349
|
node.xpath(expression, namespaces).to_a
|
|
326
350
|
rescue ::Nokogiri::XML::XPath::SyntaxError => e
|