moxml 0.1.15 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +31 -0
- data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
- data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
- data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
- data/TODO.remaining/4-lenient-entities-mode.md +106 -0
- data/TODO.remaining/5-fixture-integrity.md +65 -0
- data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
- data/TODO.remaining/7-headed-ox-limitations.md +95 -0
- data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
- data/TODO.remaining/9-cleanup-hygiene.md +42 -0
- data/TODO.remaining/README.md +54 -0
- data/benchmarks/generate_report.rb +1 -1
- data/lib/moxml/adapter/base.rb +14 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +42 -20
- data/lib/moxml/adapter/headed_ox.rb +28 -11
- data/lib/moxml/adapter/libxml.rb +165 -65
- data/lib/moxml/adapter/nokogiri.rb +19 -7
- data/lib/moxml/adapter/oga.rb +28 -12
- data/lib/moxml/adapter/ox.rb +11 -3
- data/lib/moxml/adapter/rexml.rb +40 -8
- data/lib/moxml/attribute.rb +1 -1
- data/lib/moxml/builder.rb +77 -24
- data/lib/moxml/config.rb +18 -1
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +5 -2
- data/lib/moxml/document_builder.rb +9 -8
- data/lib/moxml/element.rb +10 -5
- data/lib/moxml/entity_registry.rb +16 -2
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +17 -49
- data/lib/moxml/node_set.rb +1 -1
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +4 -1
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +4 -4
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/builder_spec.rb +234 -0
- data/spec/moxml/xpath/axes_spec.rb +1 -1
- data/spec/moxml/xpath/compiler_spec.rb +2 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- metadata +15 -1
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# TODO 5: Fixture Integrity and CI Validation
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
The Metanorma bilingual presentation fixture was previously corrupted (error
|
|
6
|
+
messages appended after valid XML). It was truncated to fix the corruption,
|
|
7
|
+
but the truncated version has not been verified against the upstream source.
|
|
8
|
+
|
|
9
|
+
Additionally, there is no automated validation of XML fixtures in CI — a
|
|
10
|
+
corrupted fixture could be introduced and not caught until round-trip tests
|
|
11
|
+
fail with confusing errors.
|
|
12
|
+
|
|
13
|
+
## Remaining Tasks
|
|
14
|
+
|
|
15
|
+
### 1. Verify Bilingual Fixture Against Upstream
|
|
16
|
+
|
|
17
|
+
The file `spec/fixtures/round-trips/metanorma/bilingual.presentation.xml`
|
|
18
|
+
was truncated from 111,606 lines to fix corruption. Need to:
|
|
19
|
+
|
|
20
|
+
- Obtain a clean copy from the Metanorma project
|
|
21
|
+
- Compare with the current truncated version (21,211 lines — different from
|
|
22
|
+
the 55,802 lines mentioned in the original TODO, suggesting further changes)
|
|
23
|
+
- Confirm no data loss occurred in truncation
|
|
24
|
+
|
|
25
|
+
### 2. Add CI Fixture Validation
|
|
26
|
+
|
|
27
|
+
Add a Rake task or RSpec test that validates all XML fixtures are well-formed
|
|
28
|
+
before running round-trip tests. This prevents silent corruption.
|
|
29
|
+
|
|
30
|
+
**Option A**: Rake task using `xmllint`:
|
|
31
|
+
```ruby
|
|
32
|
+
# In Rakefile
|
|
33
|
+
namespace :spec do
|
|
34
|
+
task :validate_fixtures do
|
|
35
|
+
errors = []
|
|
36
|
+
Dir.glob("spec/fixtures/**/*.xml").each do |path|
|
|
37
|
+
output = `xmllint --noout "#{path}" 2>&1`
|
|
38
|
+
errors << "#{path}: #{output}" unless $?.success?
|
|
39
|
+
end
|
|
40
|
+
raise "Invalid fixtures:\n#{errors.join("\n")}" unless errors.empty?
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
task spec: ["spec:validate_fixtures"]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Option B**: RSpec test:
|
|
47
|
+
```ruby
|
|
48
|
+
# spec/integration/fixture_validation_spec.rb
|
|
49
|
+
RSpec.describe "XML fixtures" do
|
|
50
|
+
Dir.glob("spec/fixtures/**/*.xml").each do |path|
|
|
51
|
+
it "#{path} is valid XML" do
|
|
52
|
+
ctx = Moxml.new(:nokogiri)
|
|
53
|
+
expect { ctx.parse(File.read(path)) }.not_to raise_error
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Option A is preferred — `xmllint` is stricter and catches issues that
|
|
60
|
+
lenient parsers might silently accept.
|
|
61
|
+
|
|
62
|
+
## Files to Create/Modify
|
|
63
|
+
|
|
64
|
+
- `Rakefile` — add `spec:validate_fixtures` task
|
|
65
|
+
- Verify/replace `spec/fixtures/round-trips/metanorma/bilingual.presentation.xml`
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# TODO 6: Ox Adapter Element Ordering Bug
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
When round-tripping certain XML fixtures through the Ox adapter, child elements
|
|
6
|
+
are produced in a different order compared to Nokogiri, Oga, and REXML. This
|
|
7
|
+
causes cross-adapter consistency failures for `elements_with_attributes`
|
|
8
|
+
comparisons.
|
|
9
|
+
|
|
10
|
+
The semantic equivalence check (double round-trip) still passes, so the
|
|
11
|
+
document content is correct — only the ordering is wrong.
|
|
12
|
+
|
|
13
|
+
## Current State
|
|
14
|
+
|
|
15
|
+
Suppressed in `spec/consistency/round_trip_spec.rb:332` via
|
|
16
|
+
`KNOWN_ELEMENT_ORDERING_ISSUES` set. Affected fixture/adapter combinations:
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
niso-jats/element_citation.xml nokogiri <-> ox
|
|
20
|
+
niso-jats/element_citation.xml ox <-> oga
|
|
21
|
+
niso-jats/element_citation.xml rexml <-> ox
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Investigation Needed
|
|
25
|
+
|
|
26
|
+
- Determine whether Ox's DOM building reorders nodes or if the issue is in
|
|
27
|
+
Moxml's tree traversal during serialization.
|
|
28
|
+
- Check if Ox's `Ox::Element#nodes` preserves insertion order.
|
|
29
|
+
- Compare Ox's native serialization (`Ox.dump`) with Moxml's custom serializer
|
|
30
|
+
to narrow down where the reorder happens.
|
|
31
|
+
|
|
32
|
+
## Files
|
|
33
|
+
|
|
34
|
+
- `spec/consistency/round_trip_spec.rb` — suppression set
|
|
35
|
+
- `lib/moxml/adapter/ox.rb` — serialization path
|
|
36
|
+
- `lib/moxml/adapter/customized_ox/` — wrapper classes involved in tree walk
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# TODO 7: HeadedOx Adapter Limitations (15 Skipped Tests)
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
HeadedOx (Ox + pure-Ruby XPath engine) has 15 skipped tests representing 7
|
|
6
|
+
distinct limitation areas. Some require upstream Ox gem enhancements; others
|
|
7
|
+
need investigation or Moxml-side fixes.
|
|
8
|
+
|
|
9
|
+
Full details in `docs/_pages/headed-ox-limitations.adoc`.
|
|
10
|
+
|
|
11
|
+
## Limitation Areas
|
|
12
|
+
|
|
13
|
+
### 7a. XPath `@*` Attribute Wildcard (3 tests)
|
|
14
|
+
|
|
15
|
+
The XPath parser does not support wildcard in the attribute axis.
|
|
16
|
+
|
|
17
|
+
**Tests:**
|
|
18
|
+
- `spec/moxml/xpath/compiler_spec.rb:156` — descendant-or-self wildcards
|
|
19
|
+
- `spec/moxml/xpath/compiler_spec.rb:192` — attribute axis wildcards
|
|
20
|
+
- `spec/moxml/xpath/axes_spec.rb:225` — attribute + predicate combinations
|
|
21
|
+
|
|
22
|
+
**Workaround:** Use `element.attributes.values` via Ruby enumeration.
|
|
23
|
+
|
|
24
|
+
### 7b. Namespace Methods (4 tests)
|
|
25
|
+
|
|
26
|
+
Ox does not expose namespace information through its public API. The adapter
|
|
27
|
+
cannot implement `node.namespace`, `node.namespaces`, or namespace inheritance.
|
|
28
|
+
|
|
29
|
+
**Tests:**
|
|
30
|
+
- `spec/integration/shared_examples/edge_cases.rb:93` — default namespace changes
|
|
31
|
+
- `spec/integration/shared_examples/edge_cases.rb:119` — recursive namespace defs
|
|
32
|
+
- `spec/integration/shared_examples/edge_cases.rb:139` — namespace-prefixed attr access
|
|
33
|
+
- `spec/integration/shared_examples/integration_workflows.rb:83` — complex namespaces
|
|
34
|
+
|
|
35
|
+
**Requires:** Ox gem API enhancement (namespace accessors on `Ox::Element`).
|
|
36
|
+
|
|
37
|
+
### 7c. Text Content from Nested XPath Results (4 tests)
|
|
38
|
+
|
|
39
|
+
Accessing text content from child elements of XPath result nodes returns empty
|
|
40
|
+
strings. Likely a node wrapping or text node handling issue in HeadedOx.
|
|
41
|
+
|
|
42
|
+
**Tests:**
|
|
43
|
+
- `spec/moxml/adapter/headed_ox_spec.rb:74` — string functions in predicates
|
|
44
|
+
- `spec/moxml/adapter/headed_ox_spec.rb:82` — position functions
|
|
45
|
+
- `spec/moxml/adapter/headed_ox_spec.rb:304` — last() function
|
|
46
|
+
- `spec/integration/shared_examples/node_wrappers/node_behavior.rb:113` — XPath text access
|
|
47
|
+
|
|
48
|
+
**Needs:** Investigation — check node wrapping and text node registration.
|
|
49
|
+
|
|
50
|
+
### 7d. CDATA `]]>` Escaping (2 tests)
|
|
51
|
+
|
|
52
|
+
Ox serializes CDATA sections as-is without splitting on `]]>` sequences, which
|
|
53
|
+
violates the XML spec.
|
|
54
|
+
|
|
55
|
+
**Tests:**
|
|
56
|
+
- `spec/integration/shared_examples/edge_cases.rb:39`
|
|
57
|
+
- `spec/integration/shared_examples/node_wrappers/cdata_behavior.rb:44`
|
|
58
|
+
|
|
59
|
+
**Requires:** Ox gem enhancement or Moxml-side serialization override.
|
|
60
|
+
|
|
61
|
+
### 7e. Parent Node Setter (1 test)
|
|
62
|
+
|
|
63
|
+
Ox has no native method to change a node's parent after creation.
|
|
64
|
+
|
|
65
|
+
**Test:**
|
|
66
|
+
- `spec/integration/shared_examples/integration_workflows.rb:126`
|
|
67
|
+
|
|
68
|
+
**Requires:** Ox gem reparenting API or workaround via remove + re-add.
|
|
69
|
+
|
|
70
|
+
### 7f. Namespace-Aware XPath with Predicates (1 test)
|
|
71
|
+
|
|
72
|
+
Queries like `//xmlns:item[@id="123"]` return empty results under HeadedOx.
|
|
73
|
+
|
|
74
|
+
**Test:**
|
|
75
|
+
- `spec/integration/shared_examples/integration_workflows.rb:63`
|
|
76
|
+
|
|
77
|
+
**Needs:** Investigation — check namespace resolution in predicate context.
|
|
78
|
+
|
|
79
|
+
### 7g. Wildcard Element Counting (1 test)
|
|
80
|
+
|
|
81
|
+
`//*` returns a different count (6) vs Nokogiri (7+), likely due to Ox's DOM
|
|
82
|
+
structure.
|
|
83
|
+
|
|
84
|
+
**Test:**
|
|
85
|
+
- `spec/moxml/xpath/compiler_spec.rb:156`
|
|
86
|
+
|
|
87
|
+
**Impact:** Low — real-world queries typically use specific element names.
|
|
88
|
+
|
|
89
|
+
## Files
|
|
90
|
+
|
|
91
|
+
- `docs/_pages/headed-ox-limitations.adoc` — full documentation
|
|
92
|
+
- `lib/moxml/adapter/headed_ox.rb`
|
|
93
|
+
- `lib/moxml/adapter/ox.rb`
|
|
94
|
+
- `lib/moxml/xpath/` — pure-Ruby XPath engine
|
|
95
|
+
- All spec files listed above
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# TODO 8: XPath Engine Predicate Gaps (5 xit Tests)
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
The pure-Ruby XPath engine (used by HeadedOx) does not fully support
|
|
6
|
+
`position()`, `last()`, and `id()` inside predicates. Five tests are marked
|
|
7
|
+
`xit` pending predicate support.
|
|
8
|
+
|
|
9
|
+
These gaps affect the XPath engine in `lib/moxml/xpath/` — they are not
|
|
10
|
+
adapter-specific.
|
|
11
|
+
|
|
12
|
+
## Failing Tests
|
|
13
|
+
|
|
14
|
+
### `position()` in Predicates (2 tests)
|
|
15
|
+
|
|
16
|
+
`spec/moxml/xpath/functions/position_functions_spec.rb`
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
xit "returns current position in predicate" do
|
|
20
|
+
# /root/item[position() = 2]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
xit "works with position comparison" do
|
|
24
|
+
# /root/item[position() > 1]
|
|
25
|
+
end
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### `last()` in Predicates (2 tests)
|
|
29
|
+
|
|
30
|
+
`spec/moxml/xpath/functions/position_functions_spec.rb`
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
xit "returns size of context in predicate" do
|
|
34
|
+
# /root/item[position() = last()]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
xit "works with last() - 1" do
|
|
38
|
+
# /root/item[position() = last() - 1]
|
|
39
|
+
end
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### `id()` with Nodeset Argument (1 test)
|
|
43
|
+
|
|
44
|
+
`spec/moxml/xpath/functions/special_functions_spec.rb:69`
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
xit "accepts nodeset argument containing IDs" do
|
|
48
|
+
# id(nodeset) where nodeset is path-evaluated
|
|
49
|
+
end
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Investigation Needed
|
|
53
|
+
|
|
54
|
+
- The XPath compiler likely needs to pass predicate context (position, size)
|
|
55
|
+
into the evaluation environment when compiling predicate expressions.
|
|
56
|
+
- `position()` and `last()` are defined but raise `InvalidContextError` when
|
|
57
|
+
used inside predicates — the predicate evaluation path doesn't set up the
|
|
58
|
+
context they need.
|
|
59
|
+
- `id()` with a nodeset argument requires evaluating the argument as an XPath
|
|
60
|
+
path first, then extracting ID values from the resulting nodes.
|
|
61
|
+
|
|
62
|
+
## Files
|
|
63
|
+
|
|
64
|
+
- `lib/moxml/xpath/compiler.rb` — predicate compilation
|
|
65
|
+
- `lib/moxml/xpath/engine.rb` — runtime evaluation context
|
|
66
|
+
- `lib/moxml/xpath/context.rb` — context setup for position/last
|
|
67
|
+
- `spec/moxml/xpath/functions/position_functions_spec.rb`
|
|
68
|
+
- `spec/moxml/xpath/functions/special_functions_spec.rb`
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# TODO 9: Cleanup and Hygiene
|
|
2
|
+
|
|
3
|
+
Small items that don't affect functionality but should be addressed.
|
|
4
|
+
|
|
5
|
+
## 9a. Stale Doc Links in Skip Messages
|
|
6
|
+
|
|
7
|
+
15+ test skip messages reference `docs/HEADED_OX_LIMITATIONS.md` but the
|
|
8
|
+
actual file is at `docs/_pages/headed-ox-limitations.adoc`. The referenced
|
|
9
|
+
path does not exist.
|
|
10
|
+
|
|
11
|
+
**Fix:** Update all skip messages to reference
|
|
12
|
+
`docs/_pages/headed-ox-limitations.adoc` instead.
|
|
13
|
+
|
|
14
|
+
**Affected files:**
|
|
15
|
+
- `spec/integration/headed_ox_integration_spec.rb`
|
|
16
|
+
- `spec/integration/shared_examples/integration_workflows.rb`
|
|
17
|
+
- `spec/integration/shared_examples/node_wrappers/node_behavior.rb`
|
|
18
|
+
- `spec/integration/shared_examples/node_wrappers/cdata_behavior.rb`
|
|
19
|
+
- `spec/integration/shared_examples/edge_cases.rb`
|
|
20
|
+
- `spec/moxml/xpath/axes_spec.rb`
|
|
21
|
+
- `spec/moxml/xpath/compiler_spec.rb`
|
|
22
|
+
- `spec/moxml/adapter/headed_ox_spec.rb`
|
|
23
|
+
|
|
24
|
+
## 9b. Untracked `scripts/` Directory
|
|
25
|
+
|
|
26
|
+
`scripts/format_xml.rb` and `scripts/pretty_format_xml.rb` exist as untracked
|
|
27
|
+
files. Decide whether to commit (and add to `.gitignore` pattern or document)
|
|
28
|
+
or remove.
|
|
29
|
+
|
|
30
|
+
## 9c. Superseded Root TODO Files
|
|
31
|
+
|
|
32
|
+
The following root-level files are marked as superseded in
|
|
33
|
+
`TODO.remaining/README.md` but still exist:
|
|
34
|
+
|
|
35
|
+
- `TODO.entities-work.md`
|
|
36
|
+
- `TODO.entity-handling.md`
|
|
37
|
+
- `TODO.entity-support.md`
|
|
38
|
+
- `TODO.full-entity-support.md`
|
|
39
|
+
- `TODO.full-entity.md`
|
|
40
|
+
- `TODO.mn-bilingual-round-trip.md`
|
|
41
|
+
|
|
42
|
+
Once all work is confirmed tracked in `TODO.remaining/`, these can be deleted.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# TODO.remaining — Consolidated Action Items
|
|
2
|
+
|
|
3
|
+
Consolidated from: TODO.entities-work.md, TODO.entity-support.md,
|
|
4
|
+
TODO.full-entity.md, TODO.full-entity-support.md, TODO.entity-handling.md,
|
|
5
|
+
TODO.mn-bilingual-round-trip.md, plus code audit (2026-04-22).
|
|
6
|
+
|
|
7
|
+
Those root files are superseded and can be removed.
|
|
8
|
+
|
|
9
|
+
## Dependency Order
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
TODO 1 (Adapter Support)
|
|
13
|
+
|
|
|
14
|
+
v
|
|
15
|
+
TODO 2 (Model-Driven Restoration) ---> TODO 4 (Lenient Entities Mode)
|
|
16
|
+
|
|
|
17
|
+
v
|
|
18
|
+
TODO 3 (Test Coverage)
|
|
19
|
+
|
|
20
|
+
TODO 5 (Fixture Integrity) — independent
|
|
21
|
+
TODO 6 (Ox Element Ordering) — independent
|
|
22
|
+
TODO 7 (HeadedOx Limitations) — independent
|
|
23
|
+
TODO 8 (XPath Predicate Gaps) — independent
|
|
24
|
+
TODO 9 (Cleanup Hygiene) — independent
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Summary
|
|
28
|
+
|
|
29
|
+
| # | File | Description | Status |
|
|
30
|
+
|---|------|-------------|--------|
|
|
31
|
+
| 1 | `1-entity-reference-adapter-support.md` | EntityReference in Ox, Oga, REXML, LibXML, HeadedOx | Not started |
|
|
32
|
+
| 2 | `2-entity-restoration-model-driven.md` | Use EntityRegistry as source of truth for restoration | Not started |
|
|
33
|
+
| 3 | `3-entity-reference-test-coverage.md` | Tests for EntityReference nodes and round-trips | Not started |
|
|
34
|
+
| 4 | `4-lenient-entities-mode.md` | Strict vs lenient entity restoration mode | Not started |
|
|
35
|
+
| 5 | `5-fixture-integrity.md` | Bilingual fixture verification + CI validation | Not started |
|
|
36
|
+
| 6 | `6-ox-element-ordering-bug.md` | Ox adapter reorders elements in certain fixtures | Not started |
|
|
37
|
+
| 7 | `7-headed-ox-limitations.md` | 15 skipped tests across 7 HeadedOx limitation areas | Not started |
|
|
38
|
+
| 8 | `8-xpath-predicate-gaps.md` | position()/last()/id() not working in XPath predicates | Not started |
|
|
39
|
+
| 9 | `9-cleanup-hygiene.md` | Stale doc links, untracked scripts, superseded files | Not started |
|
|
40
|
+
|
|
41
|
+
## What's Already Done
|
|
42
|
+
|
|
43
|
+
- EntityReference node class (`lib/moxml/entity_reference.rb`)
|
|
44
|
+
- EntityRegistry with 2125 W3C entities (`lib/moxml/entity_registry.rb`)
|
|
45
|
+
- Node type registry includes `:entity_reference`
|
|
46
|
+
- Base adapter template: `create_entity_reference`, `validate_entity_reference_name`
|
|
47
|
+
- Nokogiri adapter: full native EntityReference support
|
|
48
|
+
- Document factory: `create_entity_reference(name)`
|
|
49
|
+
- DocumentBuilder: `visit_entity_reference` + partial `restore_entities_in_text`
|
|
50
|
+
- Builder DSL: `entity_reference(name)`
|
|
51
|
+
- Config: `restore_entities`, `entity_load_mode`, `entity_provider`, `preload_entity_sets`
|
|
52
|
+
- Context: entity registry integration
|
|
53
|
+
- EntityRegistry tests (24 examples passing)
|
|
54
|
+
- HeadedOx limitations documented in `docs/_pages/headed-ox-limitations.adoc`
|
|
@@ -488,7 +488,7 @@ class MoxmlBenchmarkReport
|
|
|
488
488
|
f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
|
|
489
489
|
f.puts ""
|
|
490
490
|
f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
|
|
491
|
-
f.puts "See docs/
|
|
491
|
+
f.puts "See docs/_pages/headed-ox-limitations.adoc for complete details."
|
|
492
492
|
f.puts ""
|
|
493
493
|
end
|
|
494
494
|
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -149,6 +149,20 @@ namespace_validation_mode: :strict)
|
|
|
149
149
|
node
|
|
150
150
|
end
|
|
151
151
|
|
|
152
|
+
# Check if the native document has an XML declaration
|
|
153
|
+
# @param native_doc the native document object
|
|
154
|
+
# @param wrapper [Moxml::Document] the wrapper with has_xml_declaration flag
|
|
155
|
+
# @return [Boolean]
|
|
156
|
+
def has_declaration?(_native_doc, wrapper)
|
|
157
|
+
wrapper.has_xml_declaration
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Return the actual native node after an add_child operation.
|
|
161
|
+
# Override for adapters where node identity may change (e.g., LibXML doc.root=).
|
|
162
|
+
def actual_native(child_native, _parent_native)
|
|
163
|
+
child_native
|
|
164
|
+
end
|
|
165
|
+
|
|
152
166
|
protected
|
|
153
167
|
|
|
154
168
|
def create_native_element(_name, _owner_doc = nil)
|
|
@@ -9,7 +9,7 @@ module Moxml
|
|
|
9
9
|
# are read-only after creation. This wrapper allows mutation by
|
|
10
10
|
# storing values internally and regenerating XML when needed.
|
|
11
11
|
class Declaration
|
|
12
|
-
attr_accessor :version, :encoding
|
|
12
|
+
attr_accessor :version, :encoding, :removed, :parent_doc
|
|
13
13
|
attr_reader :native
|
|
14
14
|
|
|
15
15
|
def initialize(native_doc, version = nil, encoding = nil,
|
|
@@ -7,12 +7,13 @@ module Moxml
|
|
|
7
7
|
module CustomizedRexml
|
|
8
8
|
# Custom REXML formatter that fixes indentation and wrapping issues
|
|
9
9
|
class Formatter < ::REXML::Formatters::Pretty
|
|
10
|
-
def initialize(indentation: 2, self_close_empty: false)
|
|
10
|
+
def initialize(indentation: 2, self_close_empty: false, adapter: nil)
|
|
11
11
|
@indentation = " " * indentation
|
|
12
12
|
@level = 0
|
|
13
13
|
@compact = true
|
|
14
14
|
@width = -1 # Disable line wrapping
|
|
15
15
|
@self_close_empty = self_close_empty
|
|
16
|
+
@adapter = adapter
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def write(node, output)
|
|
@@ -31,7 +32,13 @@ module Moxml
|
|
|
31
32
|
output << "<#{node.expanded_name}"
|
|
32
33
|
write_attributes(node, output)
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
# Check for entity refs stored in adapter attachments
|
|
36
|
+
entity_refs = @adapter&.attachments&.get(node, :entity_refs)
|
|
37
|
+
child_sequence = @adapter&.attachments&.get(node, :child_sequence)
|
|
38
|
+
|
|
39
|
+
has_no_children = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
40
|
+
|
|
41
|
+
if has_no_children && @self_close_empty
|
|
35
42
|
output << "/>"
|
|
36
43
|
return
|
|
37
44
|
end
|
|
@@ -44,26 +51,41 @@ module Moxml
|
|
|
44
51
|
mixed = has_text && has_elements
|
|
45
52
|
|
|
46
53
|
# Handle children based on content type
|
|
47
|
-
|
|
54
|
+
all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
55
|
+
unless all_children_empty
|
|
48
56
|
@level += @indentation.length unless mixed
|
|
49
57
|
|
|
50
|
-
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
58
|
+
if entity_refs && !entity_refs.empty? && child_sequence
|
|
59
|
+
# Interleave native children with entity refs using tracked sequence
|
|
60
|
+
eref_idx = 0
|
|
61
|
+
native_idx = 0
|
|
62
|
+
child_sequence.each do |type|
|
|
63
|
+
case type
|
|
64
|
+
when :native
|
|
65
|
+
if native_idx < node.children.size
|
|
66
|
+
child = node.children[native_idx]
|
|
67
|
+
native_idx += 1
|
|
68
|
+
next if child.is_a?(::REXML::Text) &&
|
|
69
|
+
child.to_s.strip.empty? &&
|
|
70
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
71
|
+
write(child, output)
|
|
72
|
+
end
|
|
73
|
+
when :eref
|
|
74
|
+
if eref_idx < entity_refs.size
|
|
75
|
+
write(entity_refs[eref_idx], output)
|
|
76
|
+
eref_idx += 1
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
node.children.each_with_index do |child, _index|
|
|
82
|
+
# Skip insignificant whitespace
|
|
83
|
+
next if child.is_a?(::REXML::Text) &&
|
|
84
|
+
child.to_s.strip.empty? &&
|
|
85
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
86
|
+
|
|
87
|
+
write(child, output)
|
|
88
|
+
end
|
|
67
89
|
end
|
|
68
90
|
|
|
69
91
|
# Reset indentation for closing tag in non-mixed content
|
|
@@ -60,10 +60,10 @@ module Moxml
|
|
|
60
60
|
#
|
|
61
61
|
# This overrides the Ox adapter's xpath method which uses locate().
|
|
62
62
|
#
|
|
63
|
-
# @param
|
|
63
|
+
# @param node Starting node (native or wrapped)
|
|
64
64
|
# @param [String] expression XPath expression
|
|
65
65
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
66
|
-
# @return [
|
|
66
|
+
# @return [Array, Object] Native node array or scalar value
|
|
67
67
|
def xpath(node, expression, namespaces = {})
|
|
68
68
|
# If we receive a native node, wrap it first
|
|
69
69
|
# Document#xpath passes @native, but our compiled XPath needs Moxml nodes
|
|
@@ -85,16 +85,33 @@ module Moxml
|
|
|
85
85
|
# Execute on the node (now guaranteed to be wrapped Moxml node)
|
|
86
86
|
result = proc.call(node)
|
|
87
87
|
|
|
88
|
-
#
|
|
88
|
+
# Return native arrays for Node#xpath to wrap, scalars directly.
|
|
89
|
+
# The adapter contract: xpath() returns Array<native> | scalar.
|
|
89
90
|
case result
|
|
90
91
|
when Array
|
|
91
|
-
#
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
# XPath engine returns wrapped Moxml::Node objects.
|
|
93
|
+
# Extract native nodes and deduplicate by object identity.
|
|
94
|
+
native_nodes = result.map { |n| n.is_a?(Moxml::Node) ? n.native : n }
|
|
95
|
+
seen = {}
|
|
96
|
+
native_nodes.select do |native|
|
|
97
|
+
id = native.object_id
|
|
98
|
+
if seen[id]
|
|
99
|
+
false
|
|
100
|
+
else
|
|
101
|
+
seen[id] = true
|
|
102
|
+
end
|
|
103
|
+
end
|
|
95
104
|
when NodeSet
|
|
96
|
-
#
|
|
97
|
-
|
|
105
|
+
# NodeSet from intermediate evaluation - extract natives and deduplicate
|
|
106
|
+
seen = {}
|
|
107
|
+
result.to_a.map(&:native).select do |native|
|
|
108
|
+
id = native.object_id
|
|
109
|
+
if seen[id]
|
|
110
|
+
false
|
|
111
|
+
else
|
|
112
|
+
seen[id] = true
|
|
113
|
+
end
|
|
114
|
+
end
|
|
98
115
|
else
|
|
99
116
|
# Scalar values (string, number, boolean) - return as-is
|
|
100
117
|
result
|
|
@@ -113,10 +130,10 @@ module Moxml
|
|
|
113
130
|
# @param [Moxml::Node] node Starting node
|
|
114
131
|
# @param [String] expression XPath expression
|
|
115
132
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
116
|
-
# @return [
|
|
133
|
+
# @return [Object, nil] First native node or scalar value
|
|
117
134
|
def at_xpath(node, expression, namespaces = {})
|
|
118
135
|
result = xpath(node, expression, namespaces)
|
|
119
|
-
result.is_a?(
|
|
136
|
+
result.is_a?(Array) ? result.first : result
|
|
120
137
|
end
|
|
121
138
|
|
|
122
139
|
# Check if XPath is supported
|