moxml 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +22 -39
  3. data/docs/_config.yml +3 -3
  4. data/docs/_guides/index.adoc +6 -0
  5. data/docs/_guides/modifying-xml.adoc +0 -1
  6. data/docs/_guides/parsing-xml.adoc +0 -1
  7. data/docs/_guides/xml-declaration.adoc +450 -0
  8. data/docs/_pages/adapter-compatibility.adoc +1 -1
  9. data/docs/_pages/adapters/headed-ox.adoc +9 -9
  10. data/docs/_pages/adapters/index.adoc +0 -1
  11. data/docs/_pages/adapters/libxml.adoc +1 -2
  12. data/docs/_pages/adapters/nokogiri.adoc +1 -2
  13. data/docs/_pages/adapters/oga.adoc +1 -2
  14. data/docs/_pages/adapters/ox.adoc +2 -1
  15. data/docs/_pages/adapters/rexml.adoc +1 -2
  16. data/docs/_pages/best-practices.adoc +0 -1
  17. data/docs/_pages/compatibility.adoc +0 -1
  18. data/docs/_pages/configuration.adoc +0 -1
  19. data/docs/_pages/error-handling.adoc +0 -1
  20. data/docs/_pages/headed-ox-limitations.adoc +16 -0
  21. data/docs/_pages/installation.adoc +0 -1
  22. data/docs/_pages/node-api-reference.adoc +0 -1
  23. data/docs/_pages/performance.adoc +0 -1
  24. data/docs/_pages/quick-start.adoc +0 -1
  25. data/docs/_pages/thread-safety.adoc +0 -1
  26. data/docs/_references/document-api.adoc +0 -1
  27. data/docs/_tutorials/basic-usage.adoc +0 -1
  28. data/docs/_tutorials/builder-pattern.adoc +0 -1
  29. data/docs/_tutorials/namespace-handling.adoc +0 -1
  30. data/docs/_tutorials/xpath-queries.adoc +0 -1
  31. data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -2
  32. data/lib/moxml/adapter/libxml.rb +19 -3
  33. data/lib/moxml/adapter/nokogiri.rb +37 -2
  34. data/lib/moxml/adapter/oga.rb +67 -3
  35. data/lib/moxml/adapter/ox.rb +45 -7
  36. data/lib/moxml/adapter/rexml.rb +32 -10
  37. data/lib/moxml/context.rb +18 -1
  38. data/lib/moxml/declaration.rb +9 -0
  39. data/lib/moxml/document.rb +14 -0
  40. data/lib/moxml/document_builder.rb +7 -0
  41. data/lib/moxml/node.rb +61 -1
  42. data/lib/moxml/version.rb +1 -1
  43. data/lib/moxml/xpath/compiler.rb +2 -0
  44. data/spec/integration/shared_examples/node_wrappers/declaration_behavior.rb +0 -3
  45. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  46. data/spec/performance/memory_usage_spec.rb +3 -2
  47. metadata +3 -1
@@ -23,6 +23,7 @@ module Moxml
23
23
  end
24
24
  create_document
25
25
  end
26
+
26
27
  DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
27
28
  end
28
29
 
@@ -255,6 +256,12 @@ module Moxml
255
256
  end
256
257
 
257
258
  def add_child(element, child)
259
+ # Special handling for declarations on REXML documents
260
+ if element.is_a?(::REXML::Document) && child.is_a?(::REXML::XMLDecl)
261
+ # Set document's xml_decl directly
262
+ element.instance_variable_set(:@xml_declaration, child)
263
+ end
264
+
258
265
  case child
259
266
  when String
260
267
  element.add_text(child)
@@ -279,6 +286,12 @@ module Moxml
279
286
  end
280
287
 
281
288
  def remove(node)
289
+ # Special handling for declarations on REXML documents
290
+ if node.is_a?(::REXML::XMLDecl) && node.parent.is_a?(::REXML::Document)
291
+ # Clear document's xml_declaration when removing declaration
292
+ node.parent.instance_variable_set(:@xml_declaration, nil)
293
+ end
294
+
282
295
  node.remove
283
296
  end
284
297
 
@@ -453,16 +466,25 @@ module Moxml
453
466
  output = +""
454
467
 
455
468
  if node.is_a?(::REXML::Document)
456
- # Always include XML declaration
457
- decl = node.xml_decl || ::REXML::XMLDecl.new("1.0",
458
- options[:encoding] || "UTF-8")
459
- decl.encoding = options[:encoding] if options[:encoding]
460
- output << "<?xml"
461
- output << %( version="#{decl.version}") if decl.version
462
- output << %( encoding="#{decl.encoding}") if decl.encoding
463
- output << %( standalone="#{decl.standalone}") if decl.standalone
464
- output << "?>"
465
- # output << "\n"
469
+ # Check if we should include declaration
470
+ # Priority: explicit option > check if document has xml_decl
471
+ should_include_decl = if options.key?(:no_declaration)
472
+ !options[:no_declaration]
473
+ else
474
+ # Include declaration only if document has xml_decl
475
+ !node.xml_decl.nil?
476
+ end
477
+
478
+ # Include XML declaration only if should_include_decl and xml_decl exists
479
+ if should_include_decl && node.xml_decl
480
+ decl = node.xml_decl
481
+ decl.encoding = options[:encoding] if options[:encoding]
482
+ output << "<?xml"
483
+ output << %( version="#{decl.version}") if decl.version
484
+ output << %( encoding="#{decl.encoding}") if decl.encoding
485
+ output << %( standalone="#{decl.standalone}") if decl.standalone
486
+ output << "?>"
487
+ end
466
488
 
467
489
  # output << "\n"
468
490
  node.doctype&.write(output)
data/lib/moxml/context.rb CHANGED
@@ -13,7 +13,24 @@ module Moxml
13
13
  end
14
14
 
15
15
  def parse(xml, options = {})
16
- config.adapter.parse(xml, default_options.merge(options))
16
+ # Detect if input has XML declaration
17
+ xml_string = if xml.respond_to?(:read)
18
+ xml.read.tap do
19
+ xml.rewind if xml.respond_to?(:rewind)
20
+ end
21
+ else
22
+ xml.to_s
23
+ end
24
+ has_declaration = xml_string.strip.start_with?("<?xml")
25
+
26
+ # Parse with adapter (without declaration info - adapters don't need it)
27
+ parsed_options = default_options.merge(options)
28
+ doc = config.adapter.parse(xml_string, parsed_options)
29
+
30
+ # Set declaration flag on Document wrapper (proper OOP)
31
+ doc.has_xml_declaration = has_declaration if doc.is_a?(Document)
32
+
33
+ doc
17
34
  end
18
35
 
19
36
  # Parse XML using SAX (event-driven) parsing
@@ -33,6 +33,15 @@ module Moxml
33
33
  adapter.set_declaration_attribute(@native, "standalone", new_standalone)
34
34
  end
35
35
 
36
+ def remove
37
+ # Mark document as having no declaration when declaration is removed
38
+ # Store on native document so all wrappers see it
39
+ native_doc = adapter.document(@native)
40
+ native_doc&.instance_variable_set(:@moxml_has_declaration, false)
41
+
42
+ super
43
+ end
44
+
36
45
  def declaration?
37
46
  true
38
47
  end
@@ -12,6 +12,17 @@ require_relative "doctype"
12
12
 
13
13
  module Moxml
14
14
  class Document < Node
15
+ attr_accessor :has_xml_declaration
16
+
17
+ def initialize(native, context)
18
+ super
19
+ @has_xml_declaration = false
20
+ end
21
+
22
+ def document
23
+ self
24
+ end
25
+
15
26
  def root=(element)
16
27
  adapter.set_root(@native, element.native)
17
28
  end
@@ -61,6 +72,9 @@ module Moxml
61
72
  node = prepare_node(node)
62
73
 
63
74
  if node.is_a?(Declaration)
75
+ # Mark that document now has a declaration
76
+ @has_xml_declaration = true
77
+
64
78
  if children.empty?
65
79
  adapter.add_child(@native, node.native)
66
80
  else
@@ -12,6 +12,13 @@ module Moxml
12
12
  def build(native_doc)
13
13
  @current_doc = context.create_document(native_doc)
14
14
 
15
+ # Transfer has_declaration flag if present
16
+ if native_doc.respond_to?(:instance_variable_get) &&
17
+ native_doc.instance_variable_defined?(:@moxml_has_declaration)
18
+ has_declaration = native_doc.instance_variable_get(:@moxml_has_declaration)
19
+ @current_doc.has_xml_declaration = has_declaration
20
+ end
21
+
15
22
  # Transfer DOCTYPE from parsed document if it exists
16
23
  if native_doc.respond_to?(:instance_variable_get) &&
17
24
  native_doc.instance_variable_defined?(:@moxml_doctype)
data/lib/moxml/node.rb CHANGED
@@ -73,7 +73,13 @@ module Moxml
73
73
  end
74
74
 
75
75
  def to_xml(options = {})
76
- adapter.serialize(@native, default_options.merge(options))
76
+ # Determine if we should include XML declaration
77
+ # For Document nodes: check native then wrapper, unless explicitly overridden
78
+ # For other nodes: default to no declaration unless explicitly set
79
+ serialize_options = default_options.merge(options)
80
+ serialize_options[:no_declaration] = !should_include_declaration?(options)
81
+
82
+ adapter.serialize(@native, serialize_options)
77
83
  end
78
84
 
79
85
  def xpath(expression, namespaces = {})
@@ -235,5 +241,59 @@ module Moxml
235
241
  expand_empty: true,
236
242
  }
237
243
  end
244
+
245
+ def should_include_declaration?(options)
246
+ return options[:declaration] if options.key?(:declaration)
247
+ return options.fetch(:declaration, false) unless is_a?(Document)
248
+
249
+ # For Document nodes, check both wrapper flag and native state
250
+ # Wrapper flag is set by Context.parse for parsed documents
251
+ # Native state reflects programmatic changes (e.g., add/remove)
252
+
253
+ adapter_name = adapter.to_s.split("::").last
254
+
255
+ case adapter_name
256
+ when "Nokogiri"
257
+ # Nokogiri: if @xml_decl is explicitly set, use that state
258
+ # Otherwise, trust wrapper flag (for parsed documents)
259
+ if native.respond_to?(:instance_variable_defined?) &&
260
+ native.instance_variable_defined?(:@xml_decl)
261
+ # Explicitly set (programmatically added) - check if nil
262
+ !native.instance_variable_get(:@xml_decl).nil?
263
+ else
264
+ # Not set (parsed document) - trust wrapper flag
265
+ has_xml_declaration
266
+ end
267
+ when "Rexml"
268
+ # REXML: check @xml_declaration instance variable
269
+ # If not defined (parsed doc), trust wrapper flag
270
+ if native.respond_to?(:instance_variable_defined?) &&
271
+ native.instance_variable_defined?(:@xml_declaration)
272
+ # Explicitly set - check if nil
273
+ !native.instance_variable_get(:@xml_declaration).nil?
274
+ else
275
+ # Not set (parsed document) - trust wrapper flag
276
+ has_xml_declaration
277
+ end
278
+ when "Oga"
279
+ native.respond_to?(:xml_declaration) && !native.xml_declaration.nil?
280
+ when "Ox", "HeadedOx"
281
+ # Ox stores declaration in document attributes
282
+ native[:version] || native[:encoding] || native[:standalone]
283
+ when "Libxml"
284
+ # LibXML stores declaration wrapper as instance variable
285
+ if native.respond_to?(:instance_variable_defined?) &&
286
+ native.instance_variable_defined?(:@moxml_declaration)
287
+ # Explicitly set - check if nil
288
+ !native.instance_variable_get(:@moxml_declaration).nil?
289
+ else
290
+ # Not set - trust wrapper flag
291
+ has_xml_declaration
292
+ end
293
+ else
294
+ # Fallback - trust wrapper flag
295
+ has_xml_declaration
296
+ end
297
+ end
238
298
  end
239
299
  end
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.8"
4
+ VERSION = "0.1.9"
5
5
  end
@@ -976,8 +976,10 @@ module Moxml
976
976
  conversions << conversion.to_string(arg_var)
977
977
  end
978
978
 
979
+ # rubocop:disable Style/RedundantSum, Performance/Sum
979
980
  concatted = assigns.inject(:followed_by)
980
981
  .followed_by(conversions.inject(:+))
982
+ # rubocop:enable Style/RedundantSum, Performance/Sum
981
983
 
982
984
  block_given? ? concatted.empty?.if_false { yield concatted } : concatted
983
985
  end
@@ -98,9 +98,6 @@ RSpec.shared_examples "Moxml::Declaration" do
98
98
  end
99
99
 
100
100
  it "removes from document" do
101
- if Moxml.new.config.adapter.name.match?(/Nokogiri|Rexml|Ox/)
102
- pending("The document contains a default declaration")
103
- end
104
101
  doc.add_child(declaration)
105
102
  declaration.remove
106
103
  expect(doc.to_xml).not_to include("<?xml")
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ RSpec.describe "XML Declaration Preservation" do
6
+ # Test with all available adapters
7
+ ADAPTERS = %i[nokogiri oga rexml ox libxml headed_ox].freeze
8
+
9
+ ADAPTERS.each do |adapter_name|
10
+ context "with #{adapter_name} adapter" do
11
+ let(:context) { Moxml.new(adapter_name) }
12
+
13
+ describe "automatic preservation" do
14
+ context "when input has no XML declaration" do
15
+ let(:xml_without_decl) { '<svg xmlns="http://www.w3.org/2000/svg"><rect/></svg>' }
16
+
17
+ it "does not add XML declaration to output" do
18
+ doc = context.parse(xml_without_decl)
19
+ output = doc.to_xml
20
+
21
+ expect(output).not_to include("<?xml")
22
+ expect(output).to include("<svg")
23
+ end
24
+
25
+ it "sets has_xml_declaration to false" do
26
+ doc = context.parse(xml_without_decl)
27
+ expect(doc.has_xml_declaration).to be false
28
+ end
29
+ end
30
+
31
+ context "when input has XML declaration" do
32
+ let(:xml_with_decl) do
33
+ '<?xml version="1.0" encoding="UTF-8"?><root><child/></root>'
34
+ end
35
+
36
+ it "preserves XML declaration in output" do
37
+ doc = context.parse(xml_with_decl)
38
+ output = doc.to_xml
39
+
40
+ expect(output).to include("<?xml")
41
+ expect(output).to include('version="1.0"')
42
+ end
43
+
44
+ it "sets has_xml_declaration to true" do
45
+ doc = context.parse(xml_with_decl)
46
+ expect(doc.has_xml_declaration).to be true
47
+ end
48
+ end
49
+
50
+ context "when input has declaration with standalone attribute" do
51
+ let(:xml_with_standalone) do
52
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root/>'
53
+ end
54
+
55
+ it "preserves the declaration" do
56
+ doc = context.parse(xml_with_standalone)
57
+ output = doc.to_xml
58
+
59
+ expect(output).to include("<?xml")
60
+ end
61
+ end
62
+ end
63
+
64
+ describe "explicit override" do
65
+ let(:xml_without_decl) { "<root><child/></root>" }
66
+ let(:xml_with_decl) { '<?xml version="1.0"?><root><child/></root>' }
67
+
68
+ context "when forcing declaration on document without one" do
69
+ it "adds declaration when declaration: true" do
70
+ doc = context.parse(xml_without_decl)
71
+ output = doc.to_xml(declaration: true)
72
+
73
+ expect(output).to include("<?xml")
74
+ end
75
+ end
76
+
77
+ context "when removing declaration from document with one" do
78
+ it "removes declaration when declaration: false" do
79
+ doc = context.parse(xml_with_decl)
80
+ output = doc.to_xml(declaration: false)
81
+
82
+ expect(output).not_to include("<?xml")
83
+ expect(output).to include("<root")
84
+ end
85
+ end
86
+
87
+ context "when explicitly preserving declaration" do
88
+ it "keeps declaration when declaration: true" do
89
+ doc = context.parse(xml_with_decl)
90
+ output = doc.to_xml(declaration: true)
91
+
92
+ expect(output).to include("<?xml")
93
+ end
94
+ end
95
+ end
96
+
97
+ describe "round-trip fidelity" do
98
+ context "for document without declaration" do
99
+ let(:original) { "<root><item id=\"1\"/></root>" }
100
+
101
+ it "maintains absence of declaration through parse and serialize" do
102
+ doc = context.parse(original)
103
+ output = doc.to_xml
104
+
105
+ expect(output).not_to include("<?xml")
106
+
107
+ # Parse again and verify
108
+ doc2 = context.parse(output)
109
+ expect(doc2.has_xml_declaration).to be false
110
+ end
111
+ end
112
+
113
+ context "for document with declaration" do
114
+ let(:original) do
115
+ '<?xml version="1.0" encoding="UTF-8"?><root><item id="1"/></root>'
116
+ end
117
+
118
+ it "maintains presence of declaration through parse and serialize" do
119
+ doc = context.parse(original)
120
+ output = doc.to_xml
121
+
122
+ expect(output).to include("<?xml")
123
+
124
+ # Parse again and verify
125
+ doc2 = context.parse(output)
126
+ expect(doc2.has_xml_declaration).to be true
127
+ end
128
+ end
129
+ end
130
+
131
+ describe "edge cases" do
132
+ context "with empty document" do
133
+ it "does not add declaration to empty document" do
134
+ doc = context.create_document
135
+
136
+ # Empty documents should not have declaration by default
137
+ expect(doc.has_xml_declaration).to be false
138
+ end
139
+ end
140
+
141
+ context "with built document" do
142
+ it "does not add declaration to programmatically built document" do
143
+ doc = context.create_document
144
+ root = doc.create_element("root")
145
+ doc.root = root
146
+
147
+ output = doc.to_xml
148
+
149
+ expect(output).not_to include("<?xml")
150
+ expect(doc.has_xml_declaration).to be false
151
+ end
152
+
153
+ it "can explicitly add declaration to built document" do
154
+ doc = context.create_document
155
+ root = doc.create_element("root")
156
+ doc.root = root
157
+
158
+ output = doc.to_xml(declaration: true)
159
+
160
+ expect(output).to include("<?xml")
161
+ end
162
+ end
163
+ end
164
+
165
+ describe "non-document nodes" do
166
+ let(:xml) { '<?xml version="1.0"?><root><child>text</child></root>' }
167
+
168
+ it "does not add declaration when serializing element nodes" do
169
+ doc = context.parse(xml)
170
+ root = doc.root
171
+ output = root.to_xml
172
+
173
+ expect(output).not_to include("<?xml")
174
+ expect(output).to include("<root>")
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ describe "integration with svg_conform use case" do
181
+ let(:context) { Moxml.new }
182
+
183
+ context "remediating SVG without declaration" do
184
+ let(:svg_input) { '<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100"><rect x="10" y="10" width="80" height="80"/></svg>' }
185
+
186
+ it "does not add declaration to remediated output" do
187
+ doc = context.parse(svg_input)
188
+
189
+ # Simulate remediation: add viewport
190
+ root = doc.root
191
+ root["viewBox"] = "0 0 100 100"
192
+
193
+ output = doc.to_xml
194
+
195
+ expect(output).not_to include("<?xml")
196
+ expect(output).to include('viewBox="0 0 100 100"')
197
+ end
198
+ end
199
+
200
+ context "remediating SVG with declaration" do
201
+ let(:svg_input) { '<?xml version="1.0" encoding="UTF-8"?><svg xmlns="http://www.w3.org/2000/svg"><rect/></svg>' }
202
+
203
+ it "preserves declaration in remediated output" do
204
+ doc = context.parse(svg_input)
205
+
206
+ # Simulate remediation
207
+ root = doc.root
208
+ root["viewBox"] = "0 0 100 100"
209
+
210
+ output = doc.to_xml
211
+
212
+ expect(output).to include("<?xml")
213
+ expect(output).to include('viewBox="0 0 100 100"')
214
+ end
215
+ end
216
+ end
217
+ end
@@ -51,9 +51,10 @@ RSpec.shared_examples "Memory Usage Examples" do
51
51
  end
52
52
 
53
53
  it "handles streaming processing" do
54
- if %i[ox headed_ox].include?(context.config.adapter_name)
55
- pending "Ox/HeadedOx have load_file method but not stream parsing"
54
+ if context.config.adapter_name == :headed_ox
55
+ pending "HeadedOx double-wrapping issue with file-based parsing"
56
56
  end
57
+
57
58
  # Process file
58
59
  doc = nil
59
60
  File.open("spec/fixtures/small.xml") do |f|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -47,6 +47,7 @@ files:
47
47
  - docs/_guides/parsing-xml.adoc
48
48
  - docs/_guides/sax-parsing.adoc
49
49
  - docs/_guides/working-with-documents.adoc
50
+ - docs/_guides/xml-declaration.adoc
50
51
  - docs/_pages/adapter-compatibility.adoc
51
52
  - docs/_pages/adapters/headed-ox.adoc
52
53
  - docs/_pages/adapters/index.adoc
@@ -203,6 +204,7 @@ files:
203
204
  - spec/moxml/comment_spec.rb
204
205
  - spec/moxml/config_spec.rb
205
206
  - spec/moxml/context_spec.rb
207
+ - spec/moxml/declaration_preservation_spec.rb
206
208
  - spec/moxml/declaration_spec.rb
207
209
  - spec/moxml/doctype_spec.rb
208
210
  - spec/moxml/document_builder_spec.rb