moxml 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/.rubocop_todo.yml +49 -133
  4. data/README.adoc +18 -0
  5. data/lib/moxml/adapter/base.rb +65 -8
  6. data/lib/moxml/adapter/headed_ox.rb +2 -1
  7. data/lib/moxml/adapter/libxml.rb +16 -3
  8. data/lib/moxml/adapter/nokogiri.rb +14 -4
  9. data/lib/moxml/adapter/oga.rb +26 -87
  10. data/lib/moxml/adapter/ox.rb +69 -19
  11. data/lib/moxml/adapter/rexml.rb +24 -3
  12. data/lib/moxml/attribute.rb +6 -0
  13. data/lib/moxml/element.rb +12 -8
  14. data/lib/moxml/node.rb +4 -1
  15. data/lib/moxml/text.rb +6 -0
  16. data/lib/moxml/version.rb +1 -1
  17. data/lib/moxml/xpath/compiler.rb +40 -21
  18. data/lib/moxml/xpath/parser.rb +12 -7
  19. data/spec/integration/all_adapters_spec.rb +1 -0
  20. data/spec/integration/shared_examples/edge_cases.rb +0 -6
  21. data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
  22. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
  23. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
  24. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
  25. data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
  26. data/spec/moxml/builder_spec.rb +16 -1
  27. data/spec/moxml/entity_preservation_spec.rb +130 -0
  28. data/spec/moxml/entity_reference_spec.rb +114 -0
  29. data/spec/moxml/entity_registry_spec.rb +68 -0
  30. data/spec/moxml/xpath/axes_spec.rb +0 -1
  31. data/spec/moxml/xpath/compiler_spec.rb +0 -2
  32. metadata +6 -12
  33. data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
  34. data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
  35. data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
  36. data/TODO.remaining/4-lenient-entities-mode.md +0 -106
  37. data/TODO.remaining/5-fixture-integrity.md +0 -65
  38. data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
  39. data/TODO.remaining/7-headed-ox-limitations.md +0 -95
  40. data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
  41. data/TODO.remaining/9-cleanup-hygiene.md +0 -42
  42. data/TODO.remaining/README.md +0 -54
@@ -181,4 +181,72 @@ RSpec.describe Moxml::EntityRegistry do
181
181
  expect(registry.load_all).to be(registry)
182
182
  end
183
183
  end
184
+
185
+ describe "#standard_entity?" do
186
+ it "returns true for the 5 standard XML entities" do
187
+ registry = described_class.new
188
+ expect(registry.standard_entity?(0x26)).to be true # amp
189
+ expect(registry.standard_entity?(0x3C)).to be true # lt
190
+ expect(registry.standard_entity?(0x3E)).to be true # gt
191
+ expect(registry.standard_entity?(0x22)).to be true # quot
192
+ expect(registry.standard_entity?(0x27)).to be true # apos
193
+ end
194
+
195
+ it "returns false for non-standard codepoints" do
196
+ registry = described_class.new
197
+ expect(registry.standard_entity?(0xA0)).to be false # nbsp
198
+ expect(registry.standard_entity?(0xA9)).to be false # copy
199
+ expect(registry.standard_entity?(0x30)).to be false # '0'
200
+ end
201
+ end
202
+
203
+ describe "#should_restore?" do
204
+ it "always restores the 5 standard XML entities regardless of config" do
205
+ registry = described_class.new
206
+ config = Moxml::Config.new(:nokogiri)
207
+ config.restore_entities = false
208
+ expect(registry.should_restore?(0x26, config: config)).to be true # amp
209
+ expect(registry.should_restore?(0x3C, config: config)).to be true # lt
210
+ end
211
+
212
+ it "restores non-standard entities when restore_entities is true and mode is lenient" do
213
+ registry = described_class.new
214
+ config = Moxml::Config.new(:nokogiri)
215
+ config.restore_entities = true
216
+ config.entity_restoration_mode = :lenient
217
+ expect(registry.should_restore?(0xA0, config: config)).to be true # nbsp
218
+ expect(registry.should_restore?(0xA9, config: config)).to be true # copy
219
+ end
220
+
221
+ it "does not restore non-standard entities when restore_entities is false" do
222
+ registry = described_class.new
223
+ config = Moxml::Config.new(:nokogiri)
224
+ config.restore_entities = false
225
+ expect(registry.should_restore?(0xA0, config: config)).to be false
226
+ end
227
+
228
+ it "returns false for codepoints not in the registry" do
229
+ registry = described_class.new(mode: :disabled)
230
+ config = Moxml::Config.new(:nokogiri)
231
+ config.restore_entities = true
232
+ expect(registry.should_restore?(0x30, config: config)).to be false # '0'
233
+ end
234
+ end
235
+
236
+ describe "#restorable_codepoints" do
237
+ it "returns the set of codepoints that could be restored" do
238
+ registry = described_class.new
239
+ codepoints = registry.restorable_codepoints
240
+ expect(codepoints).to be_a(Set)
241
+ expect(codepoints).to include(0x26) # amp
242
+ expect(codepoints).to include(0xA0) # nbsp
243
+ expect(codepoints.size).to be > 100
244
+ end
245
+
246
+ it "returns only standard codepoints for empty registry" do
247
+ registry = described_class.new(mode: :disabled)
248
+ codepoints = registry.restorable_codepoints
249
+ expect(codepoints).to eq(described_class::STANDARD_CODEPOINTS)
250
+ end
251
+ end
184
252
  end
@@ -222,7 +222,6 @@ RSpec.describe "XPath Axes" do
222
222
  end
223
223
 
224
224
  it "combines attribute axis with wildcards" do
225
- skip "HeadedOx limitation: Attribute wildcard (@*) not supported by XPath parser. See docs/_pages/headed-ox-limitations.adoc"
226
225
  ast = Moxml::XPath::Parser.parse("//book/@*")
227
226
  proc = Moxml::XPath::Compiler.compile_with_cache(ast)
228
227
  result = proc.call(book_doc)
@@ -153,7 +153,6 @@ RSpec.describe Moxml::XPath::Compiler do
153
153
  end
154
154
 
155
155
  it "works with wildcards" do
156
- skip "HeadedOx limitation: Wildcard count differs due to Ox's DOM structure. See docs/_pages/headed-ox-limitations.adoc"
157
156
  ast = Moxml::XPath::Parser.parse("//*")
158
157
  proc = described_class.compile_with_cache(ast)
159
158
  result = proc.call(nested_doc)
@@ -189,7 +188,6 @@ RSpec.describe Moxml::XPath::Compiler do
189
188
  end
190
189
 
191
190
  it "works with wildcards" do
192
- skip "HeadedOx limitation: Attribute wildcard (@*) not supported by XPath parser. See docs/_pages/headed-ox-limitations.adoc"
193
191
  ast = Moxml::XPath::Parser.parse("/root/book/@*")
194
192
  proc = described_class.compile_with_cache(ast)
195
193
  result = proc.call(attr_doc)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.16
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-22 00:00:00.000000000 Z
11
+ date: 2026-04-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  Moxml is a unified XML manipulation library that provides a common API
@@ -34,16 +34,6 @@ files:
34
34
  - LICENSE.md
35
35
  - README.adoc
36
36
  - Rakefile
37
- - TODO.remaining/1-entity-reference-adapter-support.md
38
- - TODO.remaining/2-entity-restoration-model-driven.md
39
- - TODO.remaining/3-entity-reference-test-coverage.md
40
- - TODO.remaining/4-lenient-entities-mode.md
41
- - TODO.remaining/5-fixture-integrity.md
42
- - TODO.remaining/6-ox-element-ordering-bug.md
43
- - TODO.remaining/7-headed-ox-limitations.md
44
- - TODO.remaining/8-xpath-predicate-gaps.md
45
- - TODO.remaining/9-cleanup-hygiene.md
46
- - TODO.remaining/README.md
47
37
  - benchmarks/.gitignore
48
38
  - benchmarks/generate_report.rb
49
39
  - bin/console
@@ -277,6 +267,7 @@ files:
277
267
  - spec/integration/all_adapters_spec.rb
278
268
  - spec/integration/headed_ox_integration_spec.rb
279
269
  - spec/integration/shared_examples/edge_cases.rb
270
+ - spec/integration/shared_examples/entity_reference_whitespace.rb
280
271
  - spec/integration/shared_examples/high_level/.gitkeep
281
272
  - spec/integration/shared_examples/high_level/builder_behavior.rb
282
273
  - spec/integration/shared_examples/high_level/context_behavior.rb
@@ -302,6 +293,7 @@ files:
302
293
  - spec/moxml/adapter/.gitkeep
303
294
  - spec/moxml/adapter/README.md
304
295
  - spec/moxml/adapter/base_spec.rb
296
+ - spec/moxml/adapter/entity_restoration_spec.rb
305
297
  - spec/moxml/adapter/headed_ox_spec.rb
306
298
  - spec/moxml/adapter/libxml_spec.rb
307
299
  - spec/moxml/adapter/nokogiri_spec.rb
@@ -325,6 +317,8 @@ files:
325
317
  - spec/moxml/document_builder_spec.rb
326
318
  - spec/moxml/document_spec.rb
327
319
  - spec/moxml/element_spec.rb
320
+ - spec/moxml/entity_preservation_spec.rb
321
+ - spec/moxml/entity_reference_spec.rb
328
322
  - spec/moxml/entity_registry_spec.rb
329
323
  - spec/moxml/error_spec.rb
330
324
  - spec/moxml/lazy_parse_spec.rb
@@ -1,157 +0,0 @@
1
- # TODO 1: EntityReference Adapter Support for Ox, Oga, REXML, LibXML, HeadedOx
2
-
3
- ## Problem
4
-
5
- Only the Nokogiri adapter implements `create_native_entity_reference` and maps
6
- its native type to `:entity_reference` in `node_type`. The other 5 adapters
7
- will raise `NotImplementedError` if `restore_entities` is enabled or if any
8
- code path calls `create_entity_reference`. This makes the entire
9
- EntityReference feature **non-functional** outside Nokogiri.
10
-
11
- ## Current State (verified)
12
-
13
- | Adapter | `create_native_entity_reference` | `node_type` mapping | Serialization | Status |
14
- |-----------|----------------------------------|---------------------|---------------|--------|
15
- | Nokogiri | Done (`Nokogiri::XML::EntityReference.new`) | Done | Native | Working |
16
- | Ox | Missing | Missing | Uses `Ox.dump` (C-level, won't handle custom types) | Broken |
17
- | HeadedOx | Missing (inherits Ox) | Missing | Same as Ox | Broken |
18
- | Oga | Missing | Missing | Uses `CustomizedOga::XmlGenerator` | Broken |
19
- | REXML | Missing | Missing | Uses REXML's `write` | Broken |
20
- | LibXML | Missing | Missing | Uses custom serializer with wrapper detection | Broken |
21
-
22
- ## Architecture
23
-
24
- EntityReference follows the same pattern as other non-native node types in Moxml:
25
- a **wrapper class** that represents what the underlying library cannot express natively.
26
-
27
- Each adapter needs three things:
28
- 1. **Wrapper class** (`CustomizedXxx::EntityReference`) — holds the entity name
29
- 2. **`node_type` mapping** — so `Node.wrap` can create the correct Moxml type
30
- 3. **Serialization** — so `to_xml` outputs `&name;`
31
-
32
- The existing pattern: `CustomizedOx::Text` extends `::Ox::Node`,
33
- `CustomizedOx::Attribute` extends `::Ox::Node`. EntityReference should follow suit.
34
-
35
- ### Serialization Challenge for Ox
36
-
37
- Ox's `serialize` calls `::Ox.dump(node)` which is C-level — it only handles
38
- Ox native types. For EntityReference wrappers to survive serialization, we need
39
- one of:
40
-
41
- - **Option A**: Custom serialization in the adapter that walks the tree manually,
42
- detecting EntityReference wrappers and emitting `&name;` directly.
43
- - **Option B**: Convert EntityReferences to their text equivalent before calling
44
- `Ox.dump`, restoring them in a post-processing step. This is fragile.
45
- - **Option C**: Override `serialize` for Element nodes to handle children
46
- individually, using `Ox.dump` for native children but handling wrappers
47
- directly.
48
-
49
- **Recommended: Option A** — it's how `CustomizedOga::XmlGenerator` already works
50
- for Oga. A similar tree-walking serializer for Ox gives full control.
51
-
52
- For LibXML, the existing serializer already checks `node.respond_to?(:to_xml)`
53
- for wrapper classes, so adding an EntityReference wrapper with `to_xml` returning
54
- `"&#{name};"` should integrate cleanly.
55
-
56
- ## Implementation Steps
57
-
58
- ### Ox Adapter
59
-
60
- 1. Create `lib/moxml/adapter/customized_ox/entity_reference.rb`:
61
- ```ruby
62
- module Moxml::Adapter::CustomizedOx
63
- class EntityReference < ::Ox::Node
64
- attr_reader :name
65
-
66
- def initialize(name)
67
- @name = name
68
- super() # Ox::Node requires no args or a value
69
- end
70
-
71
- def to_xml
72
- "&#{@name};"
73
- end
74
- alias to_s to_xml
75
- end
76
- end
77
- ```
78
-
79
- 2. Add to `lib/moxml/adapter/ox.rb`:
80
- - `create_native_entity_reference(name)` → `CustomizedOx::EntityReference.new(name)`
81
- - `node_type`: add `when CustomizedOx::EntityReference then :entity_reference`
82
- - `patch_node`: handle EntityReference wrapper in child list
83
- - `entity_reference_name(node)`: return `node.name`
84
- - Serialization: handle EntityReference children when walking the tree
85
-
86
- 3. Add to `lib/moxml/adapter/ox.rb` `unpatch_node`: return wrapper as-is
87
- (it extends Ox::Node so it can stay in the tree)
88
-
89
- ### HeadedOx Adapter
90
-
91
- HeadedOx inherits from Ox, so it gets Ox's EntityReference support
92
- automatically once Ox is done. Verify that the XPath engine doesn't
93
- break when encountering EntityReference nodes in the tree.
94
-
95
- ### Oga Adapter
96
-
97
- 1. Create `lib/moxml/adapter/customized_oga/entity_reference.rb`:
98
- ```ruby
99
- module Moxml::Adapter::CustomizedOga
100
- class EntityReference
101
- attr_reader :name
102
-
103
- def initialize(name)
104
- @name = name
105
- end
106
-
107
- def to_xml
108
- "&#{@name};"
109
- end
110
- end
111
- end
112
- ```
113
-
114
- 2. Add to `lib/moxml/adapter/oga.rb`:
115
- - `create_native_entity_reference(name)` → `CustomizedOga::EntityReference.new(name)`
116
- - `node_type`: add `when CustomizedOga::EntityReference then :entity_reference`
117
- - Update `CustomizedOga::XmlGenerator` to handle EntityReference children
118
- - `entity_reference_name(node)`: return `node.name`
119
-
120
- ### REXML Adapter
121
-
122
- 1. Investigate: REXML has `REXML::Entity` and `REXML::EntityRef` classes.
123
- Check if they can be used as native entity reference nodes, or if a
124
- wrapper is needed.
125
-
126
- 2. Add to `lib/moxml/adapter/rexml.rb`:
127
- - `create_native_entity_reference(name)` — native or wrapper
128
- - `node_type`: add mapping
129
- - `entity_reference_name(node)`
130
-
131
- ### LibXML Adapter
132
-
133
- 1. Investigate: LibXML Ruby has `LibXML::XML::Node::ENTITY_REF_NODE` constant
134
- (value 5). Check if native entity reference nodes can be created.
135
-
136
- 2. Create `lib/moxml/adapter/customized_libxml/entity_reference.rb` if needed.
137
-
138
- 3. Add to `lib/moxml/adapter/libxml.rb`:
139
- - `create_native_entity_reference(name)`
140
- - `node_type`: add `ENTITY_REF_NODE` mapping or wrapper mapping
141
- - `entity_reference_name(node)`
142
- - The existing serializer already handles wrappers with `to_xml` —
143
- verify EntityReference works in this path.
144
-
145
- ## Files to Create/Modify
146
-
147
- ### New Files
148
- - `lib/moxml/adapter/customized_ox/entity_reference.rb`
149
- - `lib/moxml/adapter/customized_oga/entity_reference.rb`
150
- - Possibly: `lib/moxml/adapter/customized_libxml/entity_reference.rb`
151
-
152
- ### Modified Files
153
- - `lib/moxml/adapter/ox.rb` — create_native_entity_reference, node_type, serialization
154
- - `lib/moxml/adapter/oga.rb` — create_native_entity_reference, node_type, XmlGenerator
155
- - `lib/moxml/adapter/rexml.rb` — create_native_entity_reference, node_type
156
- - `lib/moxml/adapter/libxml.rb` — create_native_entity_reference, node_type
157
- - `lib/moxml/adapter/headed_ox.rb` — verify inheritance works (likely no changes)
@@ -1,169 +0,0 @@
1
- # TODO 2: Model-Driven Entity Restoration
2
-
3
- ## Problem
4
-
5
- The `restore_entities` feature in `DocumentBuilder` is hardcoded to only handle
6
- the 5 standard XML entities (amp, lt, gt, quot, apos). It ignores the
7
- EntityRegistry entirely — despite EntityRegistry knowing 2125+ entities from
8
- the W3C HTML/MathML set. This means non-standard entities like `&nbsp;`,
9
- `&copy;`, `&mdash;` are never restored, which is the core round-trip problem
10
- that motivated the entire entity feature.
11
-
12
- Additionally, the restoration logic lives in DocumentBuilder with hardcoded
13
- knowledge that belongs in the model layer.
14
-
15
- ## Current State (verified)
16
-
17
- `lib/moxml/document_builder.rb:80-110` — `restore_entities_in_text`:
18
- ```ruby
19
- entity_chars = {
20
- "<" => "lt", ">" => "gt", "&" => "amp",
21
- '"' => "quot", "'" => "apos",
22
- }
23
- ```
24
-
25
- This is a hardcoded lookup that duplicates knowledge already in EntityRegistry.
26
- It only triggers for characters `<`, `>`, `&`, `"`, `'` — the regex guard
27
- `/[<>&"']/` on line 73 prevents it from ever seeing characters like U+00A0
28
- (non-breaking space, `&nbsp;`).
29
-
30
- **Critical**: Because only Nokogiri has `create_native_entity_reference`
31
- (see TODO 1), `restore_entities` raises `NotImplementedError` on all other
32
- adapters even for the 5 standard entities.
33
-
34
- ## XML Entity Model
35
-
36
- XML has a clear entity model:
37
-
38
- 1. **5 predefined entities** (amp, lt, gt, quot, apos) — always available per
39
- XML spec. These characters MUST be entity-encoded in certain contexts
40
- (e.g., `<` and `&` in text content).
41
-
42
- 2. **DTD-declared entities** — declared via `<!ENTITY name "value">` in the
43
- document's DOCTYPE internal subset or external subset.
44
-
45
- 3. **API-supplied entities** — registered by the user via
46
- `EntityRegistry.register` or `entity_provider` callback.
47
-
48
- 4. **Bundled detection set** — the W3C HTML/MathML entities bundled in
49
- `data/w3c_entities.json`. These are not "declared" in any DTD but are
50
- recognized by Moxml for restoration purposes.
51
-
52
- The EntityRegistry already knows about categories 1, 3, and 4. Category 2
53
- (DTD parsing) is future work.
54
-
55
- ## Design: Model-Driven Restoration
56
-
57
- EntityRegistry should be THE source of truth for "should this character become
58
- an entity reference?" The restoration policy should be:
59
-
60
- ```ruby
61
- # In EntityRegistry (or a cooperating policy object)
62
- STANDARD_CODEPOINTS = [0x26, 0x3C, 0x3E, 0x22, 0x27].freeze # amp, lt, gt, quot, apos
63
-
64
- def should_restore?(codepoint, config:)
65
- name = primary_name_for_codepoint(codepoint)
66
- return false unless name
67
-
68
- # 1. The 5 standard XML entities are ALWAYS restored.
69
- # These are syntactically required — the XML wouldn't be well-formed
70
- # without encoding them.
71
- return true if STANDARD_CODEPOINTS.include?(codepoint)
72
-
73
- # 2. Non-standard entities: only if restore_entities is enabled.
74
- return false unless config.restore_entities
75
-
76
- # 3. In the future, strict vs lenient mode will gate this further.
77
- # Strict: only if declared in DTD (not yet implemented).
78
- # Lenient: any known entity name.
79
- true
80
- end
81
- ```
82
-
83
- ### Changes to DocumentBuilder
84
-
85
- Replace the hardcoded hash with delegation to the registry:
86
-
87
- ```ruby
88
- def visit_text(node)
89
- prepared = adapter.prepare_for_new_document(node, @current_doc.native)
90
- content = adapter.text_content(node)
91
-
92
- if should_restore_entities?(content)
93
- restore_entities_in_text(content)
94
- else
95
- @node_stack.last&.add_child(Text.new(prepared, context))
96
- end
97
- end
98
-
99
- private
100
-
101
- def should_restore_entities?(content)
102
- return false unless context.config.restore_entities
103
- # Scan for any character that the registry knows about
104
- content.to_s.chars.any? { |c| context.entity_registry.should_restore?(c.ord, config: context.config) }
105
- end
106
-
107
- def restore_entities_in_text(content)
108
- parent = @node_stack.last
109
- return unless parent
110
-
111
- content.to_s.chars.each do |char|
112
- codepoint = char.ord
113
- name = context.entity_registry.primary_name_for_codepoint(codepoint)
114
-
115
- if context.entity_registry.should_restore?(codepoint, config: context.config)
116
- entity_node = adapter.create_entity_reference(name)
117
- parent.add_child(EntityReference.new(entity_node, context))
118
- else
119
- text_node = adapter.create_text(char)
120
- parent.add_child(Text.new(text_node, context))
121
- end
122
- end
123
- end
124
- ```
125
-
126
- **Note**: This splits each text node into per-character nodes. For documents
127
- with few entity references, this creates unnecessary overhead. A future
128
- optimization should buffer consecutive non-entity characters into a single
129
- text node.
130
-
131
- ### Performance Optimization (deferred)
132
-
133
- Instead of character-by-character processing:
134
- 1. Scan the text for characters that have entity names in the registry
135
- 2. Split only at those positions, keeping runs of plain characters together
136
- 3. This reduces node count dramatically for typical documents
137
-
138
- ```ruby
139
- def restore_entities_in_text(content)
140
- parent = @node_stack.last
141
- return unless parent
142
-
143
- buffer = +""
144
- content.to_s.chars.each do |char|
145
- codepoint = char.ord
146
- name = context.entity_registry.primary_name_for_codepoint(codepoint)
147
-
148
- if name && context.entity_registry.should_restore?(codepoint, config: context.config)
149
- # Flush buffer before entity
150
- if !buffer.empty?
151
- parent.add_child(Text.new(adapter.create_text(buffer), context))
152
- buffer.clear
153
- end
154
- parent.add_child(EntityReference.new(adapter.create_entity_reference(name), context))
155
- else
156
- buffer << char
157
- end
158
- end
159
- # Flush remaining buffer
160
- if !buffer.empty?
161
- parent.add_child(Text.new(adapter.create_text(buffer), context))
162
- end
163
- end
164
- ```
165
-
166
- ## Files to Modify
167
-
168
- - `lib/moxml/entity_registry.rb` — add `should_restore?` method
169
- - `lib/moxml/document_builder.rb` — replace hardcoded entity_chars with registry-driven logic
@@ -1,170 +0,0 @@
1
- # TODO 3: EntityReference Test Coverage
2
-
3
- ## Problem
4
-
5
- There are zero tests for EntityReference node behavior, zero tests for
6
- entity round-trip preservation, and zero adapter-level tests for entity
7
- reference creation or serialization. Only `EntityRegistry` has tests
8
- (`spec/moxml/entity_registry_spec.rb`).
9
-
10
- This means the entire EntityReference feature is untested — including the
11
- `restore_entities` config, `create_entity_reference` factory, `visit_entity_reference`
12
- in DocumentBuilder, and the `entity_reference` Builder DSL method.
13
-
14
- ## Required Test Coverage
15
-
16
- ### 1. EntityReference Node Tests
17
-
18
- **File**: `spec/moxml/entity_reference_spec.rb`
19
-
20
- ```ruby
21
- RSpec.describe Moxml::EntityReference do
22
- # Test per adapter (use shared examples)
23
- %i[nokogiri].each do |adapter| # expand as adapters gain support
24
- context "with #{adapter} adapter" do
25
- let(:ctx) { Moxml.new(adapter) }
26
-
27
- it "creates an entity reference node" do
28
- doc = ctx.create_document
29
- ref = doc.create_entity_reference("nbsp")
30
- expect(ref).to be_a(Moxml::EntityReference)
31
- expect(ref.name).to eq("nbsp")
32
- end
33
-
34
- it "has empty text content" do
35
- doc = ctx.create_document
36
- ref = doc.create_entity_reference("amp")
37
- expect(ref.text).to eq("")
38
- expect(ref.content).to eq("")
39
- end
40
-
41
- it "serializes to entity syntax" do
42
- doc = ctx.create_document
43
- ref = doc.create_entity_reference("mdash")
44
- expect(ref.to_xml).to eq("&mdash;")
45
- end
46
-
47
- it "is recognized as entity_reference type" do
48
- doc = ctx.create_document
49
- ref = doc.create_entity_reference("copy")
50
- expect(ref.entity_reference?).to be true
51
- end
52
-
53
- it "survives add_child and retrieval" do
54
- doc = ctx.create_document
55
- root = doc.create_element("p")
56
- doc.root = root
57
- ref = doc.create_entity_reference("nbsp")
58
- root.add_child(ref)
59
- expect(root.children.first).to be_a(Moxml::EntityReference)
60
- expect(root.children.first.name).to eq("nbsp")
61
- end
62
-
63
- it "validates entity reference name" do
64
- doc = ctx.create_document
65
- expect {
66
- doc.create_entity_reference("123invalid")
67
- }.to raise_error(Moxml::ValidationError)
68
- end
69
- end
70
- end
71
- end
72
- ```
73
-
74
- ### 2. Builder DSL Tests
75
-
76
- **File**: `spec/moxml/builder_spec.rb` (add to existing or create new section)
77
-
78
- ```ruby
79
- it "creates entity references via DSL" do
80
- doc = Moxml::Builder.new(ctx).build do
81
- element("p") { entity_reference("nbsp") }
82
- end
83
- expect(doc.root.children.first).to be_a(Moxml::EntityReference)
84
- expect(doc.to_xml).to include("&nbsp;")
85
- end
86
- ```
87
-
88
- ### 3. Restore Entities Integration Tests
89
-
90
- **File**: `spec/moxml/adapter/entity_restoration_spec.rb` (shared examples)
91
-
92
- ```ruby
93
- RSpec.shared_examples "entity restoration" do |adapter_name|
94
- context "with #{adapter_name}" do
95
- let(:ctx) { Moxml.new(adapter_name, restore_entities: true) }
96
-
97
- it "restores standard XML entities" do
98
- doc = ctx.parse("<p>a &amp; b</p>")
99
- output = doc.to_xml
100
- expect(output).to include("&amp;")
101
- end
102
-
103
- it "restores non-standard entities from registry" do
104
- # nbsp (U+00A0) is in the bundled W3C entity set
105
- doc = ctx.parse("<p>\u00A0</p>")
106
- output = doc.to_xml
107
- expect(output).to include("&nbsp;")
108
- end
109
-
110
- it "preserves entity syntax through round-trip" do
111
- doc = ctx.parse("<p>&nbsp;&copy;&mdash;</p>")
112
- output = doc.to_xml
113
- reparsed = ctx.parse(output)
114
- # Text content should be identical after round-trip
115
- expect(reparsed.root.text).to eq(doc.root.text)
116
- end
117
-
118
- it "does not restore entities when restore_entities is false" do
119
- ctx_no_restore = Moxml.new(adapter_name, restore_entities: false)
120
- doc = ctx_no_restore.parse("<p>a &amp; b</p>")
121
- output = doc.to_xml
122
- # Standard entities may still appear as &amp; due to XML escaping,
123
- # but no EntityReference nodes should be created
124
- expect(doc.root.children).not_to include(a_kind_of(Moxml::EntityReference))
125
- end
126
- end
127
- end
128
- ```
129
-
130
- ### 4. Cross-Adapter Consistency Tests
131
-
132
- **File**: `spec/consistency/entity_reference_consistency_spec.rb`
133
-
134
- Verify that EntityReference behavior is consistent across all adapters that
135
- support it:
136
- - Same entity name produces same serialization
137
- - Same text content after round-trip
138
- - Children enumeration includes EntityReference nodes
139
-
140
- ### 5. EntityRegistry.should_restore? Tests
141
-
142
- **File**: Add to `spec/moxml/entity_registry_spec.rb`
143
-
144
- ```ruby
145
- describe "#should_restore?" do
146
- it "always restores the 5 standard XML entities" do
147
- registry = described_class.new
148
- config = Moxml::Config.new(:nokogiri)
149
- expect(registry.should_restore?(0x26, config: config)).to be true # amp
150
- expect(registry.should_restore?(0x3C, config: config)).to be true # lt
151
- end
152
-
153
- it "restores non-standard entities only when restore_entities is true" do
154
- registry = described_class.new
155
- config_on = Moxml::Config.new(:nokogiri)
156
- config_on.restore_entities = true
157
- config_off = Moxml::Config.new(:nokogiri)
158
- config_off.restore_entities = false
159
-
160
- expect(registry.should_restore?(0xA0, config: config_on)).to be true # nbsp
161
- expect(registry.should_restore?(0xA0, config: config_off)).to be false
162
- end
163
- end
164
- ```
165
-
166
- ## Dependencies
167
-
168
- - TODO 1 must be partially complete (at least one adapter working) before
169
- adapter-level tests can pass
170
- - TODO 2 must be complete before non-standard entity restoration tests can pass