moxml 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +34 -1
- data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
- data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
- data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
- data/TODO.remaining/4-lenient-entities-mode.md +106 -0
- data/TODO.remaining/5-fixture-integrity.md +65 -0
- data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
- data/TODO.remaining/7-headed-ox-limitations.md +95 -0
- data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
- data/TODO.remaining/9-cleanup-hygiene.md +42 -0
- data/TODO.remaining/README.md +54 -0
- data/benchmarks/generate_report.rb +1 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +22 -3
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +37 -14
- data/lib/moxml/adapter/libxml.rb +233 -119
- data/lib/moxml/adapter/nokogiri.rb +22 -11
- data/lib/moxml/adapter/oga.rb +64 -25
- data/lib/moxml/adapter/ox.rb +198 -42
- data/lib/moxml/adapter/rexml.rb +64 -13
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +78 -24
- data/lib/moxml/config.rb +24 -7
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +8 -1
- data/lib/moxml/document_builder.rb +44 -37
- data/lib/moxml/element.rb +18 -5
- data/lib/moxml/entity_registry.rb +51 -1
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +39 -50
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +4 -1
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +7 -4
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/adapter/oga_spec.rb +46 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +256 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/moxml/xpath/axes_spec.rb +1 -1
- data/spec/moxml/xpath/compiler_spec.rb +2 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +29 -2
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# TODO 8: XPath Engine Predicate Gaps (5 xit Tests)
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
The pure-Ruby XPath engine (used by HeadedOx) does not fully support
|
|
6
|
+
`position()`, `last()`, and `id()` inside predicates. Five tests are marked
|
|
7
|
+
`xit` pending predicate support.
|
|
8
|
+
|
|
9
|
+
These gaps affect the XPath engine in `lib/moxml/xpath/` — they are not
|
|
10
|
+
adapter-specific.
|
|
11
|
+
|
|
12
|
+
## Failing Tests
|
|
13
|
+
|
|
14
|
+
### `position()` in Predicates (2 tests)
|
|
15
|
+
|
|
16
|
+
`spec/moxml/xpath/functions/position_functions_spec.rb`
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
xit "returns current position in predicate" do
|
|
20
|
+
# /root/item[position() = 2]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
xit "works with position comparison" do
|
|
24
|
+
# /root/item[position() > 1]
|
|
25
|
+
end
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### `last()` in Predicates (2 tests)
|
|
29
|
+
|
|
30
|
+
`spec/moxml/xpath/functions/position_functions_spec.rb`
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
xit "returns size of context in predicate" do
|
|
34
|
+
# /root/item[position() = last()]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
xit "works with last() - 1" do
|
|
38
|
+
# /root/item[position() = last() - 1]
|
|
39
|
+
end
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### `id()` with Nodeset Argument (1 test)
|
|
43
|
+
|
|
44
|
+
`spec/moxml/xpath/functions/special_functions_spec.rb:69`
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
xit "accepts nodeset argument containing IDs" do
|
|
48
|
+
# id(nodeset) where nodeset is path-evaluated
|
|
49
|
+
end
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Investigation Needed
|
|
53
|
+
|
|
54
|
+
- The XPath compiler likely needs to pass predicate context (position, size)
|
|
55
|
+
into the evaluation environment when compiling predicate expressions.
|
|
56
|
+
- `position()` and `last()` are defined but raise `InvalidContextError` when
|
|
57
|
+
used inside predicates — the predicate evaluation path doesn't set up the
|
|
58
|
+
context they need.
|
|
59
|
+
- `id()` with a nodeset argument requires evaluating the argument as an XPath
|
|
60
|
+
path first, then extracting ID values from the resulting nodes.
|
|
61
|
+
|
|
62
|
+
## Files
|
|
63
|
+
|
|
64
|
+
- `lib/moxml/xpath/compiler.rb` — predicate compilation
|
|
65
|
+
- `lib/moxml/xpath/engine.rb` — runtime evaluation context
|
|
66
|
+
- `lib/moxml/xpath/context.rb` — context setup for position/last
|
|
67
|
+
- `spec/moxml/xpath/functions/position_functions_spec.rb`
|
|
68
|
+
- `spec/moxml/xpath/functions/special_functions_spec.rb`
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# TODO 9: Cleanup and Hygiene
|
|
2
|
+
|
|
3
|
+
Small items that don't affect functionality but should be addressed.
|
|
4
|
+
|
|
5
|
+
## 9a. Stale Doc Links in Skip Messages
|
|
6
|
+
|
|
7
|
+
15+ test skip messages reference `docs/HEADED_OX_LIMITATIONS.md` but the
|
|
8
|
+
actual file is at `docs/_pages/headed-ox-limitations.adoc`. The referenced
|
|
9
|
+
path does not exist.
|
|
10
|
+
|
|
11
|
+
**Fix:** Update all skip messages to reference
|
|
12
|
+
`docs/_pages/headed-ox-limitations.adoc` instead.
|
|
13
|
+
|
|
14
|
+
**Affected files:**
|
|
15
|
+
- `spec/integration/headed_ox_integration_spec.rb`
|
|
16
|
+
- `spec/integration/shared_examples/integration_workflows.rb`
|
|
17
|
+
- `spec/integration/shared_examples/node_wrappers/node_behavior.rb`
|
|
18
|
+
- `spec/integration/shared_examples/node_wrappers/cdata_behavior.rb`
|
|
19
|
+
- `spec/integration/shared_examples/edge_cases.rb`
|
|
20
|
+
- `spec/moxml/xpath/axes_spec.rb`
|
|
21
|
+
- `spec/moxml/xpath/compiler_spec.rb`
|
|
22
|
+
- `spec/moxml/adapter/headed_ox_spec.rb`
|
|
23
|
+
|
|
24
|
+
## 9b. Untracked `scripts/` Directory
|
|
25
|
+
|
|
26
|
+
`scripts/format_xml.rb` and `scripts/pretty_format_xml.rb` exist as untracked
|
|
27
|
+
files. Decide whether to commit (and add to `.gitignore` pattern or document)
|
|
28
|
+
or remove.
|
|
29
|
+
|
|
30
|
+
## 9c. Superseded Root TODO Files
|
|
31
|
+
|
|
32
|
+
The following root-level files are marked as superseded in
|
|
33
|
+
`TODO.remaining/README.md` but still exist:
|
|
34
|
+
|
|
35
|
+
- `TODO.entities-work.md`
|
|
36
|
+
- `TODO.entity-handling.md`
|
|
37
|
+
- `TODO.entity-support.md`
|
|
38
|
+
- `TODO.full-entity-support.md`
|
|
39
|
+
- `TODO.full-entity.md`
|
|
40
|
+
- `TODO.mn-bilingual-round-trip.md`
|
|
41
|
+
|
|
42
|
+
Once all work is confirmed tracked in `TODO.remaining/`, these can be deleted.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# TODO.remaining — Consolidated Action Items
|
|
2
|
+
|
|
3
|
+
Consolidated from: TODO.entities-work.md, TODO.entity-support.md,
|
|
4
|
+
TODO.full-entity.md, TODO.full-entity-support.md, TODO.entity-handling.md,
|
|
5
|
+
TODO.mn-bilingual-round-trip.md, plus code audit (2026-04-22).
|
|
6
|
+
|
|
7
|
+
Those root files are superseded and can be removed.
|
|
8
|
+
|
|
9
|
+
## Dependency Order
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
TODO 1 (Adapter Support)
|
|
13
|
+
|
|
|
14
|
+
v
|
|
15
|
+
TODO 2 (Model-Driven Restoration) ---> TODO 4 (Lenient Entities Mode)
|
|
16
|
+
|
|
|
17
|
+
v
|
|
18
|
+
TODO 3 (Test Coverage)
|
|
19
|
+
|
|
20
|
+
TODO 5 (Fixture Integrity) — independent
|
|
21
|
+
TODO 6 (Ox Element Ordering) — independent
|
|
22
|
+
TODO 7 (HeadedOx Limitations) — independent
|
|
23
|
+
TODO 8 (XPath Predicate Gaps) — independent
|
|
24
|
+
TODO 9 (Cleanup Hygiene) — independent
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Summary
|
|
28
|
+
|
|
29
|
+
| # | File | Description | Status |
|
|
30
|
+
|---|------|-------------|--------|
|
|
31
|
+
| 1 | `1-entity-reference-adapter-support.md` | EntityReference in Ox, Oga, REXML, LibXML, HeadedOx | Not started |
|
|
32
|
+
| 2 | `2-entity-restoration-model-driven.md` | Use EntityRegistry as source of truth for restoration | Not started |
|
|
33
|
+
| 3 | `3-entity-reference-test-coverage.md` | Tests for EntityReference nodes and round-trips | Not started |
|
|
34
|
+
| 4 | `4-lenient-entities-mode.md` | Strict vs lenient entity restoration mode | Not started |
|
|
35
|
+
| 5 | `5-fixture-integrity.md` | Bilingual fixture verification + CI validation | Not started |
|
|
36
|
+
| 6 | `6-ox-element-ordering-bug.md` | Ox adapter reorders elements in certain fixtures | Not started |
|
|
37
|
+
| 7 | `7-headed-ox-limitations.md` | 15 skipped tests across 7 HeadedOx limitation areas | Not started |
|
|
38
|
+
| 8 | `8-xpath-predicate-gaps.md` | position()/last()/id() not working in XPath predicates | Not started |
|
|
39
|
+
| 9 | `9-cleanup-hygiene.md` | Stale doc links, untracked scripts, superseded files | Not started |
|
|
40
|
+
|
|
41
|
+
## What's Already Done
|
|
42
|
+
|
|
43
|
+
- EntityReference node class (`lib/moxml/entity_reference.rb`)
|
|
44
|
+
- EntityRegistry with 2125 W3C entities (`lib/moxml/entity_registry.rb`)
|
|
45
|
+
- Node type registry includes `:entity_reference`
|
|
46
|
+
- Base adapter template: `create_entity_reference`, `validate_entity_reference_name`
|
|
47
|
+
- Nokogiri adapter: full native EntityReference support
|
|
48
|
+
- Document factory: `create_entity_reference(name)`
|
|
49
|
+
- DocumentBuilder: `visit_entity_reference` + partial `restore_entities_in_text`
|
|
50
|
+
- Builder DSL: `entity_reference(name)`
|
|
51
|
+
- Config: `restore_entities`, `entity_load_mode`, `entity_provider`, `preload_entity_sets`
|
|
52
|
+
- Context: entity registry integration
|
|
53
|
+
- EntityRegistry tests (24 examples passing)
|
|
54
|
+
- HeadedOx limitations documented in `docs/_pages/headed-ox-limitations.adoc`
|
|
@@ -488,7 +488,7 @@ class MoxmlBenchmarkReport
|
|
|
488
488
|
f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
|
|
489
489
|
f.puts ""
|
|
490
490
|
f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
|
|
491
|
-
f.puts "See docs/
|
|
491
|
+
f.puts "See docs/_pages/headed-ox-limitations.adoc for complete details."
|
|
492
492
|
f.puts ""
|
|
493
493
|
end
|
|
494
494
|
|
|
@@ -97,19 +97,21 @@ context.config.default_encoding = 'UTF-16'
|
|
|
97
97
|
|
|
98
98
|
**Default:** `"UTF-8"`
|
|
99
99
|
|
|
100
|
-
==== Namespace
|
|
100
|
+
==== Namespace validation mode
|
|
101
101
|
|
|
102
|
-
Control how strictly namespace URIs are validated:
|
|
102
|
+
Control how strictly namespace URIs and prefixes are validated:
|
|
103
103
|
|
|
104
104
|
[source,ruby]
|
|
105
105
|
----
|
|
106
|
-
# Strict mode (default) — validates namespace URIs against RFC 3986
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
106
|
+
# Strict mode (default) — validates namespace URIs against RFC 3986 and
|
|
107
|
+
# prefixes against NCName rules
|
|
108
|
+
context.config.namespace_validation_mode = :strict
|
|
109
|
+
doc = context.parse(xml) # Raises ValidationError for invalid URIs/prefixes
|
|
110
|
+
|
|
111
|
+
# Lenient mode — accepts any URI string and defers prefix validation to the
|
|
112
|
+
# underlying XML parser
|
|
113
|
+
context.config.namespace_validation_mode = :lenient
|
|
114
|
+
doc = context.parse(xml) # Accepts non-standard namespace URIs/prefixes
|
|
113
115
|
----
|
|
114
116
|
|
|
115
117
|
**Default:** `:strict`
|
|
@@ -117,23 +119,24 @@ doc = context.parse(xml) # Accepts non-standard namespace URIs
|
|
|
117
119
|
**Modes:**
|
|
118
120
|
|
|
119
121
|
`:strict`:: Validates namespace URIs against the
|
|
120
|
-
https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] URI-reference specification
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
standards-compliant
|
|
122
|
+
https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] URI-reference specification and
|
|
123
|
+
namespace prefixes against NCName rules, as required by
|
|
124
|
+
https://www.w3.org/TR/xml-names/[Namespaces in XML]. Invalid values raise a
|
|
125
|
+
`Moxml::ValidationError`. This is the recommended mode for standards-compliant
|
|
126
|
+
XML processing.
|
|
124
127
|
|
|
125
|
-
`:lenient`:: Accepts any string as a namespace URI
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
128
|
+
`:lenient`:: Accepts any string as a namespace URI (only rejecting control
|
|
129
|
+
characters) and defers prefix validation to the underlying XML parser. Use this
|
|
130
|
+
mode when processing XML documents that use non-standard namespace identifiers
|
|
131
|
+
or prefixes (e.g., `xmlns_1.0`).
|
|
129
132
|
|
|
130
133
|
**Example:**
|
|
131
134
|
|
|
132
135
|
[source,ruby]
|
|
133
136
|
----
|
|
134
|
-
# Process documents with non-standard namespace URIs
|
|
137
|
+
# Process documents with non-standard namespace URIs and prefixes
|
|
135
138
|
context = Moxml.new do |config|
|
|
136
|
-
config.
|
|
139
|
+
config.namespace_validation_mode = :lenient
|
|
137
140
|
end
|
|
138
141
|
|
|
139
142
|
xml = '<root xmlns:ex="not a valid URI but accepted in lenient mode"/>'
|
|
@@ -280,18 +280,18 @@ puts all_children.length # => 2
|
|
|
280
280
|
|
|
281
281
|
By default, Moxml validates namespace URIs against
|
|
282
282
|
https://www.rfc-editor.org/rfc/rfc3986[RFC 3986] (strict mode). To accept
|
|
283
|
-
non-standard namespace identifiers, use lenient mode:
|
|
283
|
+
non-standard namespace identifiers or prefixes, use lenient mode:
|
|
284
284
|
|
|
285
285
|
[source,ruby]
|
|
286
286
|
----
|
|
287
|
-
# Strict mode (default) — validates URIs per RFC 3986
|
|
287
|
+
# Strict mode (default) — validates URIs per RFC 3986 and prefixes per NCName
|
|
288
288
|
context = Moxml.new do |config|
|
|
289
|
-
config.
|
|
289
|
+
config.namespace_validation_mode = :strict
|
|
290
290
|
end
|
|
291
291
|
|
|
292
|
-
# Lenient mode — accepts any string
|
|
292
|
+
# Lenient mode — accepts any URI string and defers prefix validation to parser
|
|
293
293
|
context = Moxml.new do |config|
|
|
294
|
-
config.
|
|
294
|
+
config.namespace_validation_mode = :lenient
|
|
295
295
|
end
|
|
296
296
|
----
|
|
297
297
|
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -98,7 +98,8 @@ module Moxml
|
|
|
98
98
|
create_native_declaration(version, encoding, standalone)
|
|
99
99
|
end
|
|
100
100
|
|
|
101
|
-
def create_namespace(element, prefix, uri,
|
|
101
|
+
def create_namespace(element, prefix, uri,
|
|
102
|
+
namespace_validation_mode: :strict)
|
|
102
103
|
if prefix && uri.to_s.empty?
|
|
103
104
|
raise NamespaceError.new(
|
|
104
105
|
"Prefixed namespace declaration cannot have an empty URI",
|
|
@@ -106,8 +107,12 @@ module Moxml
|
|
|
106
107
|
uri: uri,
|
|
107
108
|
)
|
|
108
109
|
end
|
|
109
|
-
|
|
110
|
-
|
|
110
|
+
if namespace_validation_mode == :strict
|
|
111
|
+
validate_prefix(prefix) if prefix
|
|
112
|
+
validate_uri(uri, mode: :strict)
|
|
113
|
+
else
|
|
114
|
+
validate_uri(uri, mode: :lenient)
|
|
115
|
+
end
|
|
111
116
|
create_native_namespace(element, prefix, uri)
|
|
112
117
|
end
|
|
113
118
|
|
|
@@ -144,6 +149,20 @@ module Moxml
|
|
|
144
149
|
node
|
|
145
150
|
end
|
|
146
151
|
|
|
152
|
+
# Check if the native document has an XML declaration
|
|
153
|
+
# @param native_doc the native document object
|
|
154
|
+
# @param wrapper [Moxml::Document] the wrapper with has_xml_declaration flag
|
|
155
|
+
# @return [Boolean]
|
|
156
|
+
def has_declaration?(_native_doc, wrapper)
|
|
157
|
+
wrapper.has_xml_declaration
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Return the actual native node after an add_child operation.
|
|
161
|
+
# Override for adapters where node identity may change (e.g., LibXML doc.root=).
|
|
162
|
+
def actual_native(child_native, _parent_native)
|
|
163
|
+
child_native
|
|
164
|
+
end
|
|
165
|
+
|
|
147
166
|
protected
|
|
148
167
|
|
|
149
168
|
def create_native_element(_name, _owner_doc = nil)
|
|
@@ -9,7 +9,7 @@ module Moxml
|
|
|
9
9
|
# are read-only after creation. This wrapper allows mutation by
|
|
10
10
|
# storing values internally and regenerating XML when needed.
|
|
11
11
|
class Declaration
|
|
12
|
-
attr_accessor :version, :encoding
|
|
12
|
+
attr_accessor :version, :encoding, :removed, :parent_doc
|
|
13
13
|
attr_reader :native
|
|
14
14
|
|
|
15
15
|
def initialize(native_doc, version = nil, encoding = nil,
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedLibxml
|
|
6
|
+
class EntityReference
|
|
7
|
+
attr_reader :name
|
|
8
|
+
|
|
9
|
+
def initialize(name)
|
|
10
|
+
@name = name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_xml
|
|
14
|
+
"&#{@name};"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def ==(other)
|
|
18
|
+
other.is_a?(self.class) && @name == other.name
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedLibxml
|
|
6
|
+
autoload :Cdata, "moxml/adapter/customized_libxml/cdata"
|
|
7
|
+
autoload :Comment, "moxml/adapter/customized_libxml/comment"
|
|
8
|
+
autoload :Declaration, "moxml/adapter/customized_libxml/declaration"
|
|
9
|
+
autoload :Element, "moxml/adapter/customized_libxml/element"
|
|
10
|
+
autoload :EntityReference,
|
|
11
|
+
"moxml/adapter/customized_libxml/entity_reference"
|
|
12
|
+
autoload :Node, "moxml/adapter/customized_libxml/node"
|
|
13
|
+
autoload :ProcessingInstruction,
|
|
14
|
+
"moxml/adapter/customized_libxml/processing_instruction"
|
|
15
|
+
autoload :Text, "moxml/adapter/customized_libxml/text"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedOx
|
|
6
|
+
class EntityReference
|
|
7
|
+
attr_reader :name
|
|
8
|
+
attr_accessor :parent
|
|
9
|
+
|
|
10
|
+
def initialize(name)
|
|
11
|
+
@name = name
|
|
12
|
+
@parent = nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_xml
|
|
16
|
+
"&#{@name};"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def ==(other)
|
|
20
|
+
other.is_a?(self.class) && @name == other.name
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedOx
|
|
6
|
+
autoload :Attribute, "moxml/adapter/customized_ox/attribute"
|
|
7
|
+
autoload :EntityReference, "moxml/adapter/customized_ox/entity_reference"
|
|
8
|
+
autoload :Namespace, "moxml/adapter/customized_ox/namespace"
|
|
9
|
+
autoload :Text, "moxml/adapter/customized_ox/text"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedRexml
|
|
6
|
+
class EntityReference
|
|
7
|
+
attr_reader :name
|
|
8
|
+
|
|
9
|
+
def initialize(name)
|
|
10
|
+
@name = name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def ==(other)
|
|
14
|
+
other.is_a?(self.class) && @name == other.name
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -7,18 +7,21 @@ module Moxml
|
|
|
7
7
|
module CustomizedRexml
|
|
8
8
|
# Custom REXML formatter that fixes indentation and wrapping issues
|
|
9
9
|
class Formatter < ::REXML::Formatters::Pretty
|
|
10
|
-
def initialize(indentation: 2, self_close_empty: false)
|
|
10
|
+
def initialize(indentation: 2, self_close_empty: false, adapter: nil)
|
|
11
11
|
@indentation = " " * indentation
|
|
12
12
|
@level = 0
|
|
13
13
|
@compact = true
|
|
14
14
|
@width = -1 # Disable line wrapping
|
|
15
15
|
@self_close_empty = self_close_empty
|
|
16
|
+
@adapter = adapter
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def write(node, output)
|
|
19
20
|
case node
|
|
20
21
|
when ::REXML::XMLDecl
|
|
21
22
|
write_declaration(node, output)
|
|
23
|
+
when ::Moxml::Adapter::CustomizedRexml::EntityReference
|
|
24
|
+
output << "&#{node.name};"
|
|
22
25
|
else
|
|
23
26
|
super
|
|
24
27
|
end
|
|
@@ -29,7 +32,13 @@ module Moxml
|
|
|
29
32
|
output << "<#{node.expanded_name}"
|
|
30
33
|
write_attributes(node, output)
|
|
31
34
|
|
|
32
|
-
|
|
35
|
+
# Check for entity refs stored in adapter attachments
|
|
36
|
+
entity_refs = @adapter&.attachments&.get(node, :entity_refs)
|
|
37
|
+
child_sequence = @adapter&.attachments&.get(node, :child_sequence)
|
|
38
|
+
|
|
39
|
+
has_no_children = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
40
|
+
|
|
41
|
+
if has_no_children && @self_close_empty
|
|
33
42
|
output << "/>"
|
|
34
43
|
return
|
|
35
44
|
end
|
|
@@ -42,26 +51,41 @@ module Moxml
|
|
|
42
51
|
mixed = has_text && has_elements
|
|
43
52
|
|
|
44
53
|
# Handle children based on content type
|
|
45
|
-
|
|
54
|
+
all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
55
|
+
unless all_children_empty
|
|
46
56
|
@level += @indentation.length unless mixed
|
|
47
57
|
|
|
48
|
-
|
|
49
|
-
#
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
58
|
+
if entity_refs && !entity_refs.empty? && child_sequence
|
|
59
|
+
# Interleave native children with entity refs using tracked sequence
|
|
60
|
+
eref_idx = 0
|
|
61
|
+
native_idx = 0
|
|
62
|
+
child_sequence.each do |type|
|
|
63
|
+
case type
|
|
64
|
+
when :native
|
|
65
|
+
if native_idx < node.children.size
|
|
66
|
+
child = node.children[native_idx]
|
|
67
|
+
native_idx += 1
|
|
68
|
+
next if child.is_a?(::REXML::Text) &&
|
|
69
|
+
child.to_s.strip.empty? &&
|
|
70
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
71
|
+
write(child, output)
|
|
72
|
+
end
|
|
73
|
+
when :eref
|
|
74
|
+
if eref_idx < entity_refs.size
|
|
75
|
+
write(entity_refs[eref_idx], output)
|
|
76
|
+
eref_idx += 1
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
node.children.each_with_index do |child, _index|
|
|
82
|
+
# Skip insignificant whitespace
|
|
83
|
+
next if child.is_a?(::REXML::Text) &&
|
|
84
|
+
child.to_s.strip.empty? &&
|
|
85
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
86
|
+
|
|
87
|
+
write(child, output)
|
|
88
|
+
end
|
|
65
89
|
end
|
|
66
90
|
|
|
67
91
|
# Reset indentation for closing tag in non-mixed content
|
|
@@ -25,17 +25,23 @@ module Moxml
|
|
|
25
25
|
#
|
|
26
26
|
class HeadedOx < Ox
|
|
27
27
|
class << self
|
|
28
|
-
# Override parse to use
|
|
29
|
-
|
|
28
|
+
# Override parse to use lazy wrapping like the Ox adapter.
|
|
29
|
+
# Previously used DocumentBuilder (eager tree construction causing
|
|
30
|
+
# ~176K allocations per 100-element parse). Lazy parse defers wrapper
|
|
31
|
+
# creation until nodes are accessed, matching Ox adapter behavior.
|
|
32
|
+
def parse(xml, options = {}, _context = nil)
|
|
30
33
|
native_doc = begin
|
|
31
34
|
result = ::Ox.parse(xml)
|
|
32
35
|
|
|
33
36
|
# result can be either Document or Element
|
|
34
37
|
if result.is_a?(::Ox::Document)
|
|
38
|
+
assign_parents(result)
|
|
39
|
+
validate_single_root(result) if options[:strict]
|
|
35
40
|
result
|
|
36
41
|
else
|
|
37
42
|
doc = ::Ox::Document.new
|
|
38
43
|
doc << result
|
|
44
|
+
assign_parents(doc)
|
|
39
45
|
doc
|
|
40
46
|
end
|
|
41
47
|
rescue ::Ox::ParseError => e
|
|
@@ -47,17 +53,17 @@ module Moxml
|
|
|
47
53
|
|
|
48
54
|
# Use provided context if available, otherwise create new one
|
|
49
55
|
ctx = _context || Context.new(:headed_ox)
|
|
50
|
-
|
|
56
|
+
Document.new(native_doc, ctx)
|
|
51
57
|
end
|
|
52
58
|
|
|
53
59
|
# Execute XPath query using Moxml's XPath engine
|
|
54
60
|
#
|
|
55
61
|
# This overrides the Ox adapter's xpath method which uses locate().
|
|
56
62
|
#
|
|
57
|
-
# @param
|
|
63
|
+
# @param node Starting node (native or wrapped)
|
|
58
64
|
# @param [String] expression XPath expression
|
|
59
65
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
60
|
-
# @return [
|
|
66
|
+
# @return [Array, Object] Native node array or scalar value
|
|
61
67
|
def xpath(node, expression, namespaces = {})
|
|
62
68
|
# If we receive a native node, wrap it first
|
|
63
69
|
# Document#xpath passes @native, but our compiled XPath needs Moxml nodes
|
|
@@ -79,16 +85,33 @@ module Moxml
|
|
|
79
85
|
# Execute on the node (now guaranteed to be wrapped Moxml node)
|
|
80
86
|
result = proc.call(node)
|
|
81
87
|
|
|
82
|
-
#
|
|
88
|
+
# Return native arrays for Node#xpath to wrap, scalars directly.
|
|
89
|
+
# The adapter contract: xpath() returns Array<native> | scalar.
|
|
83
90
|
case result
|
|
84
91
|
when Array
|
|
85
|
-
#
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
92
|
+
# XPath engine returns wrapped Moxml::Node objects.
|
|
93
|
+
# Extract native nodes and deduplicate by object identity.
|
|
94
|
+
native_nodes = result.map { |n| n.is_a?(Moxml::Node) ? n.native : n }
|
|
95
|
+
seen = {}
|
|
96
|
+
native_nodes.select do |native|
|
|
97
|
+
id = native.object_id
|
|
98
|
+
if seen[id]
|
|
99
|
+
false
|
|
100
|
+
else
|
|
101
|
+
seen[id] = true
|
|
102
|
+
end
|
|
103
|
+
end
|
|
89
104
|
when NodeSet
|
|
90
|
-
#
|
|
91
|
-
|
|
105
|
+
# NodeSet from intermediate evaluation - extract natives and deduplicate
|
|
106
|
+
seen = {}
|
|
107
|
+
result.to_a.map(&:native).select do |native|
|
|
108
|
+
id = native.object_id
|
|
109
|
+
if seen[id]
|
|
110
|
+
false
|
|
111
|
+
else
|
|
112
|
+
seen[id] = true
|
|
113
|
+
end
|
|
114
|
+
end
|
|
92
115
|
else
|
|
93
116
|
# Scalar values (string, number, boolean) - return as-is
|
|
94
117
|
result
|
|
@@ -107,10 +130,10 @@ module Moxml
|
|
|
107
130
|
# @param [Moxml::Node] node Starting node
|
|
108
131
|
# @param [String] expression XPath expression
|
|
109
132
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
110
|
-
# @return [
|
|
133
|
+
# @return [Object, nil] First native node or scalar value
|
|
111
134
|
def at_xpath(node, expression, namespaces = {})
|
|
112
135
|
result = xpath(node, expression, namespaces)
|
|
113
|
-
result.is_a?(
|
|
136
|
+
result.is_a?(Array) ? result.first : result
|
|
114
137
|
end
|
|
115
138
|
|
|
116
139
|
# Check if XPath is supported
|