moxml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.adoc ADDED
@@ -0,0 +1,770 @@
1
+ = Moxml: Modular XML processing for Ruby
2
+
3
+ Moxml provides a unified API for XML processing in Ruby, supporting multiple XML parsing backends (Nokogiri, Ox, and Oga).
4
+
5
+ Moxml ("mox-em-el") stands for "Modular XML" and aims to provide a consistent
6
+ interface for working with XML documents, regardless of the underlying XML
7
+ library.
8
+
9
+ == Installation
10
+
11
+ [source,ruby]
12
+ ----
13
+ gem 'moxml'
14
+ ----
15
+
16
+ == Basic usage
17
+
18
+ === Configuration
19
+
20
+ Configure Moxml to use your preferred XML backend:
21
+
22
+ [source,ruby]
23
+ ----
24
+ require 'moxml'
25
+
26
+ Moxml.configure do |config|
27
+ config.backend = :nokogiri # or :ox, :oga
28
+ end
29
+ ----
30
+
31
+ === Creating and parsing documents
32
+
33
+ [source,ruby]
34
+ ----
35
+ # Create new empty document
36
+ doc = Moxml::Document.new
37
+
38
+ # Parse from string
39
+ doc = Moxml::Document.parse("<root><child>content</child></root>")
40
+
41
+ # Parse with encoding
42
+ doc = Moxml::Document.parse(xml_string, encoding: 'UTF-8')
43
+ ----
44
+
45
+ === Document creation patterns
46
+
47
+ [source,ruby]
48
+ ----
49
+ # Method 1: Create and build
50
+ doc = Moxml::Document.new
51
+ root = doc.create_element('root')
52
+ doc.add_child(root)
53
+
54
+ # Method 2: Parse from string
55
+ doc = Moxml::Document.parse("<root/>")
56
+
57
+ # Method 3: Parse with encoding
58
+ doc = Moxml::Document.parse(xml_string, encoding: 'UTF-8')
59
+
60
+ # Method 4: Parse with options
61
+ doc = Moxml::Document.parse(xml_string, {
62
+ encoding: 'UTF-8',
63
+ strict: true
64
+ })
65
+ ----
66
+
67
+ === Common XML patterns
68
+
69
+ [source,ruby]
70
+ ----
71
+ # Working with namespaces
72
+ doc = Moxml::Document.new
73
+ root = doc.create_element('root')
74
+ root['xmlns:custom'] = 'http://example.com/ns'
75
+ child = doc.create_element('custom:element')
76
+ root.add_child(child)
77
+
78
+ # Creating structured data
79
+ person = doc.create_element('person')
80
+ person['id'] = '123'
81
+ name = doc.create_element('name')
82
+ name.add_child(doc.create_text('John Doe'))
83
+ person.add_child(name)
84
+
85
+ # Working with attributes
86
+ element = doc.create_element('div')
87
+ element['class'] = 'container'
88
+ element['data-id'] = '123'
89
+ element['style'] = 'color: blue'
90
+
91
+ # Handling special characters
92
+ text = doc.create_text('Special chars: < > & " \'')
93
+ cdata = doc.create_cdata('<script>alert("Hello!");</script>')
94
+
95
+ # Processing instructions
96
+ pi = doc.create_processing_instruction('xml-stylesheet',
97
+ 'type="text/xsl" href="style.xsl"')
98
+ doc.add_child(pi)
99
+ ----
100
+
101
+ === Working with elements
102
+
103
+ [source,ruby]
104
+ ----
105
+ # Create new element
106
+ element = Moxml::Element.new('tagname')
107
+
108
+ # Add attributes
109
+ element['class'] = 'content'
110
+
111
+ # Access attributes
112
+ class_attr = element['class']
113
+
114
+ # Add child elements
115
+ child = element.create_element('child')
116
+ element.add_child(child)
117
+
118
+ # Access text content
119
+ text_content = element.text
120
+
121
+ # Add text content
122
+ text = element.create_text('content')
123
+ element.add_child(text)
124
+
125
+ # Chaining operations
126
+ element
127
+ .add_child(doc.create_element('child'))
128
+ .add_child(doc.create_text('content'))
129
+ ['class'] = 'new-class'
130
+
131
+ # Complex element creation
132
+ div = doc.create_element('div')
133
+ div['class'] = 'container'
134
+ div.add_child(doc.create_element('span'))
135
+ .add_child(doc.create_text('Hello'))
136
+ div.add_child(doc.create_element('br'))
137
+ div.add_child(doc.create_text('World'))
138
+ ----
139
+
140
+ === Working with different node types
141
+
142
+ [source,ruby]
143
+ ----
144
+ # Text nodes with various content
145
+ plain_text = Moxml::Text.new("Simple text")
146
+ multiline_text = Moxml::Text.new("Line 1\nLine 2")
147
+ special_chars = Moxml::Text.new("Special: & < > \" '")
148
+
149
+ # CDATA sections for different content types
150
+ script_cdata = Moxml::Cdata.new("function() { alert('Hello!'); }")
151
+ xml_cdata = Moxml::Cdata.new("<data><item>value</item></data>")
152
+ mixed_cdata = Moxml::Cdata.new("Text with ]]> characters")
153
+
154
+ # Comments for documentation
155
+ todo_comment = Moxml::Comment.new("TODO: Add validation")
156
+ section_comment = Moxml::Comment.new("----- Section Break -----")
157
+ debug_comment = Moxml::Comment.new("DEBUG: Remove in production")
158
+
159
+ # Processing instructions for various uses
160
+ style_pi = Moxml::ProcessingInstruction.new(
161
+ "xml-stylesheet",
162
+ 'type="text/css" href="style.css"'
163
+ )
164
+ php_pi = Moxml::ProcessingInstruction.new(
165
+ "php",
166
+ 'echo "<?php echo $var; ?>>";'
167
+ )
168
+ custom_pi = Moxml::ProcessingInstruction.new(
169
+ "custom-processor",
170
+ 'param1="value1" param2="value2"'
171
+ )
172
+ ----
173
+
174
+ === Element manipulation examples
175
+
176
+ [source,ruby]
177
+ ----
178
+ # Building complex structures
179
+ doc = Moxml::Document.new
180
+ root = doc.create_element('html')
181
+ doc.add_child(root)
182
+
183
+ # Create head section
184
+ head = doc.create_element('head')
185
+ root.add_child(head)
186
+
187
+ title = doc.create_element('title')
188
+ title.add_child(doc.create_text('Example Page'))
189
+ head.add_child(title)
190
+
191
+ meta = doc.create_element('meta')
192
+ meta['charset'] = 'UTF-8'
193
+ head.add_child(meta)
194
+
195
+ # Create body section
196
+ body = doc.create_element('body')
197
+ root.add_child(body)
198
+
199
+ div = doc.create_element('div')
200
+ div['class'] = 'container'
201
+ body.add_child(div)
202
+
203
+ # Add multiple paragraphs
204
+ 3.times do |i|
205
+ p = doc.create_element('p')
206
+ p.add_child(doc.create_text("Paragraph #{i + 1}"))
207
+ div.add_child(p)
208
+ end
209
+
210
+ # Working with lists
211
+ ul = doc.create_element('ul')
212
+ div.add_child(ul)
213
+
214
+ ['Item 1', 'Item 2', 'Item 3'].each do |text|
215
+ li = doc.create_element('li')
216
+ li.add_child(doc.create_text(text))
217
+ ul.add_child(li)
218
+ end
219
+
220
+ # Adding link element
221
+ a = doc.create_element('a')
222
+ a['href'] = 'https://example.com'
223
+ a.add_child(doc.create_text('Visit Example'))
224
+ div.add_child(a)
225
+ ----
226
+
227
+ === Advanced node manipulation
228
+
229
+ [source,ruby]
230
+ ----
231
+ # Cloning nodes
232
+ original = doc.create_element('div')
233
+ original['id'] = 'original'
234
+ clone = original.clone
235
+
236
+ # Moving nodes
237
+ target = doc.create_element('target')
238
+ source = doc.create_element('source')
239
+ source.add_child(doc.create_text('Content'))
240
+ target.add_child(source)
241
+
242
+ # Replacing nodes
243
+ old_node = doc.at_xpath('//old')
244
+ new_node = doc.create_element('new')
245
+ old_node.replace(new_node)
246
+
247
+ # Inserting before/after
248
+ reference = doc.create_element('reference')
249
+ before = doc.create_element('before')
250
+ after = doc.create_element('after')
251
+ reference.add_previous_sibling(before)
252
+ reference.add_next_sibling(after)
253
+
254
+ # Conditional manipulation
255
+ element = doc.at_xpath('//conditional')
256
+ if element['flag'] == 'true'
257
+ element.add_child(doc.create_text('Flag is true'))
258
+ else
259
+ element.remove
260
+ end
261
+ ----
262
+
263
+ === Working with namespaces
264
+
265
+ [source,ruby]
266
+ ----
267
+ # Creating namespaced document
268
+ doc = Moxml::Document.new
269
+ root = doc.create_element('root')
270
+ root['xmlns'] = 'http://example.com/default'
271
+ root['xmlns:custom'] = 'http://example.com/custom'
272
+ doc.add_child(root)
273
+
274
+ # Adding namespaced elements
275
+ default_elem = doc.create_element('default-elem')
276
+ custom_elem = doc.create_element('custom:elem')
277
+
278
+ root.add_child(default_elem)
279
+ root.add_child(custom_elem)
280
+
281
+ # Working with attributes in namespaces
282
+ custom_elem['custom:attr'] = 'value'
283
+
284
+ # Accessing namespaced content
285
+ ns_elem = doc.at_xpath('//custom:elem')
286
+ ns_attr = ns_elem['custom:attr']
287
+ ----
288
+
289
+ === Document serialization examples
290
+
291
+ [source,ruby]
292
+ ----
293
+ # Basic serialization
294
+ xml_string = doc.to_xml
295
+
296
+ # Pretty printing with indentation
297
+ formatted_xml = doc.to_xml(
298
+ indent: 2,
299
+ pretty: true
300
+ )
301
+
302
+ # Controlling XML declaration
303
+ with_declaration = doc.to_xml(
304
+ xml_declaration: true,
305
+ encoding: 'UTF-8',
306
+ standalone: 'yes'
307
+ )
308
+
309
+ # Compact output
310
+ minimal_xml = doc.to_xml(
311
+ indent: 0,
312
+ pretty: false,
313
+ xml_declaration: false
314
+ )
315
+
316
+ # Custom formatting
317
+ custom_format = doc.to_xml(
318
+ indent: 4,
319
+ encoding: 'ISO-8859-1',
320
+ xml_declaration: true
321
+ )
322
+ ----
323
+
324
+ == Implementation details
325
+
326
+ === Memory management
327
+
328
+ [source,ruby]
329
+ ----
330
+ # Efficient document handling
331
+ doc = Moxml::Document.parse(large_xml)
332
+ begin
333
+ # Process document
334
+ result = process_document(doc)
335
+ ensure
336
+ # Clear references
337
+ doc = nil
338
+ GC.start
339
+ end
340
+
341
+ # Streaming large node sets
342
+ doc.xpath('//large-set/*').each do |node|
343
+ # Process node
344
+ process_node(node)
345
+ # Clear reference
346
+ node = nil
347
+ end
348
+
349
+ # Handling large collections
350
+ def process_large_nodeset(nodeset)
351
+ nodeset.each do |node|
352
+ yield node if block_given?
353
+ end
354
+ ensure
355
+ # Clear references
356
+ nodeset = nil
357
+ GC.start
358
+ end
359
+ ----
360
+
361
+ === Backend-specific optimizations
362
+
363
+ [source,ruby]
364
+ ----
365
+ # Nokogiri-specific optimizations
366
+ if Moxml.config.backend == :nokogiri
367
+ # Use native CSS selectors
368
+ nodes = doc.native.css('complex > selector')
369
+ nodes.each do |native_node|
370
+ node = Moxml::Node.wrap(native_node)
371
+ # Process node
372
+ end
373
+
374
+ # Use native XPath
375
+ results = doc.native.xpath('//complex/xpath/expression')
376
+ end
377
+
378
+ # Ox-specific optimizations
379
+ if Moxml.config.backend == :ox
380
+ # Use native parsing options
381
+ doc = Moxml::Document.parse(xml, {
382
+ mode: :generic,
383
+ effort: :tolerant,
384
+ smart: true
385
+ })
386
+
387
+ # Direct element creation
388
+ element = Ox::Element.new('name')
389
+ wrapped = Moxml::Element.new(element)
390
+ end
391
+
392
+ # Oga-specific optimizations
393
+ if Moxml.config.backend == :oga
394
+ # Use native parsing features
395
+ doc = Moxml::Document.parse(xml, {
396
+ encoding: 'UTF-8',
397
+ strict: true
398
+ })
399
+
400
+ # Direct access to native methods
401
+ nodes = doc.native.xpath('//element')
402
+ end
403
+ ----
404
+
405
+ === Threading patterns
406
+
407
+ [source,ruby]
408
+ ----
409
+ # Thread-safe document creation
410
+ require 'thread'
411
+
412
+ class ThreadSafeXmlProcessor
413
+ def initialize
414
+ @mutex = Mutex.new
415
+ end
416
+
417
+ def process_document(xml_string)
418
+ @mutex.synchronize do
419
+ doc = Moxml::Document.parse(xml_string)
420
+ # Process document
421
+ result = doc.to_xml
422
+ doc = nil
423
+ result
424
+ end
425
+ end
426
+ end
427
+
428
+ # Parallel document processing
429
+ def process_documents(xml_strings)
430
+ threads = xml_strings.map do |xml|
431
+ Thread.new do
432
+ doc = Moxml::Document.parse(xml)
433
+ # Process document
434
+ doc = nil
435
+ end
436
+ end
437
+ threads.each(&:join)
438
+ end
439
+
440
+ # Thread-local document storage
441
+ Thread.new do
442
+ Thread.current[:document] = Moxml::Document.new
443
+ # Process document
444
+ ensure
445
+ Thread.current[:document] = nil
446
+ end
447
+ ----
448
+
449
+ == Troubleshooting
450
+
451
+ === Common issues and solutions
452
+
453
+ ==== Parsing errors
454
+
455
+ [source,ruby]
456
+ ----
457
+ # Handle malformed XML
458
+ begin
459
+ doc = Moxml::Document.parse(xml_string)
460
+ rescue Moxml::ParseError => e
461
+ puts "Parse error at line #{e.line}, column #{e.column}: #{e.message}"
462
+ # Attempt recovery
463
+ xml_string = cleanup_xml(xml_string)
464
+ retry
465
+ end
466
+
467
+ # Handle encoding issues
468
+ begin
469
+ doc = Moxml::Document.parse(xml_string, encoding: 'UTF-8')
470
+ rescue Moxml::ParseError => e
471
+ if e.message =~ /encoding/
472
+ # Try detecting encoding
473
+ detected_encoding = detect_encoding(xml_string)
474
+ retry if detected_encoding
475
+ end
476
+ raise
477
+ end
478
+ ----
479
+
480
+ ==== Memory issues
481
+
482
+ [source,ruby]
483
+ ----
484
+ # Handle large documents
485
+ def process_large_document(path)
486
+ # Read and process in chunks
487
+ File.open(path) do |file|
488
+ doc = Moxml::Document.parse(file)
489
+ doc.xpath('//chunk').each do |chunk|
490
+ process_chunk(chunk)
491
+ chunk = nil
492
+ end
493
+ doc = nil
494
+ end
495
+ GC.start
496
+ end
497
+
498
+ # Monitor memory usage
499
+ require 'get_process_mem'
500
+
501
+ def memory_safe_processing(xml)
502
+ memory = GetProcessMem.new
503
+ initial_memory = memory.mb
504
+
505
+ doc = Moxml::Document.parse(xml)
506
+ result = process_document(doc)
507
+ doc = nil
508
+ GC.start
509
+
510
+ final_memory = memory.mb
511
+ puts "Memory usage: #{final_memory - initial_memory}MB"
512
+
513
+ result
514
+ end
515
+ ----
516
+
517
+ ==== Backend-specific issues
518
+
519
+ [source,ruby]
520
+ ----
521
+ # Handle backend limitations
522
+ def safe_xpath(doc, xpath)
523
+ case Moxml.config.backend
524
+ when :nokogiri
525
+ doc.xpath(xpath)
526
+ when :ox
527
+ # Ox has limited XPath support
528
+ fallback_xpath_search(doc, xpath)
529
+ when :oga
530
+ # Handle Oga-specific XPath syntax
531
+ modified_xpath = adjust_xpath_for_oga(xpath)
532
+ doc.xpath(modified_xpath)
533
+ end
534
+ end
535
+
536
+ # Handle backend switching
537
+ def with_backend(backend)
538
+ original_backend = Moxml.config.backend
539
+ Moxml.config.backend = backend
540
+ yield
541
+ ensure
542
+ Moxml.config.backend = original_backend
543
+ end
544
+ ----
545
+
546
+ === Performance optimization
547
+
548
+ ==== Document creation
549
+
550
+ [source,ruby]
551
+ ----
552
+ # Efficient document building
553
+ def build_large_document
554
+ doc = Moxml::Document.new
555
+ root = doc.create_element('root')
556
+ doc.add_child(root)
557
+
558
+ # Pre-allocate elements
559
+ elements = Array.new(1000) do |i|
560
+ elem = doc.create_element('item')
561
+ elem['id'] = i.to_s
562
+ elem
563
+ end
564
+
565
+ # Batch add elements
566
+ elements.each do |elem|
567
+ root.add_child(elem)
568
+ end
569
+
570
+ doc
571
+ end
572
+
573
+ # Memory-efficient processing
574
+ def process_large_xml(xml_string)
575
+ result = []
576
+ doc = Moxml::Document.parse(xml_string)
577
+
578
+ doc.xpath('//item').each do |item|
579
+ # Process and immediately discard
580
+ result << process_item(item)
581
+ item = nil
582
+ end
583
+
584
+ doc = nil
585
+ GC.start
586
+
587
+ result
588
+ end
589
+ ----
590
+
591
+ ==== Query optimization
592
+
593
+ [source,ruby]
594
+ ----
595
+ # Optimize node selection
596
+ def efficient_node_selection(doc)
597
+ # Cache frequently used nodes
598
+ @header_nodes ||= doc.xpath('//header').to_a
599
+
600
+ # Use specific selectors
601
+ doc.xpath('//specific/path') # Better than '//*[name()="specific"]'
602
+
603
+ # Combine queries when possible
604
+ doc.xpath('//a | //b') # Better than two separate queries
605
+ end
606
+
607
+ # Optimize attribute access
608
+ def efficient_attribute_handling(element)
609
+ # Cache attribute values
610
+ @cached_attrs ||= element.attributes
611
+
612
+ # Direct attribute access
613
+ value = element['attr'] # Better than element.attributes['attr']
614
+
615
+ # Batch attribute updates
616
+ attrs = {'id' => '1', 'class' => 'new', 'data' => 'value'}
617
+ attrs.each { |k,v| element[k] = v }
618
+ end
619
+ ----
620
+
621
+ ==== Serialization optimization
622
+
623
+ [source,ruby]
624
+ ----
625
+ # Efficient output generation
626
+ def optimized_serialization(doc)
627
+ # Minimal output
628
+ compact = doc.to_xml(
629
+ indent: 0,
630
+ pretty: false,
631
+ xml_declaration: false
632
+ )
633
+
634
+ # Balanced formatting
635
+ readable = doc.to_xml(
636
+ indent: 2,
637
+ pretty: true,
638
+ xml_declaration: true
639
+ )
640
+
641
+ # Stream large documents
642
+ File.open('large.xml', 'w') do |file|
643
+ doc.write_to(file, indent: 2)
644
+ end
645
+ end
646
+ ----
647
+
648
+ === Debugging tips
649
+
650
+ ==== Inspection helpers
651
+
652
+ [source,ruby]
653
+ ----
654
+ # Debug node structure
655
+ def inspect_node(node, level = 0)
656
+ indent = " " * level
657
+ puts "#{indent}#{node.class.name}: #{node.name}"
658
+
659
+ if node.respond_to?(:attributes)
660
+ node.attributes.each do |name, attr|
661
+ puts "#{indent} @#{name}=#{attr.value.inspect}"
662
+ end
663
+ end
664
+
665
+ if node.respond_to?(:children)
666
+ node.children.each { |child| inspect_node(child, level + 1) }
667
+ end
668
+ end
669
+
670
+ # Track node operations
671
+ def debug_node_operations
672
+ nodes_created = 0
673
+ nodes_removed = 0
674
+
675
+ yield
676
+ ensure
677
+ puts "Nodes created: #{nodes_created}"
678
+ puts "Nodes removed: #{nodes_removed}"
679
+ end
680
+ ----
681
+
682
+ ==== Backend validation
683
+
684
+ [source,ruby]
685
+ ----
686
+ # Verify backend behavior
687
+ def verify_backend_compatibility
688
+ doc = Moxml::Document.new
689
+
690
+ # Test basic operations
691
+ element = doc.create_element('test')
692
+ doc.add_child(element)
693
+
694
+ # Verify node handling
695
+ raise "Node creation failed" unless doc.root
696
+ raise "Node type wrong" unless doc.root.is_a?(Moxml::Element)
697
+
698
+ # Verify serialization
699
+ xml = doc.to_xml
700
+ raise "Serialization failed" unless xml.include?('<test/>')
701
+
702
+ puts "Backend verification successful"
703
+ rescue => e
704
+ puts "Backend verification failed: #{e.message}"
705
+ end
706
+ ----
707
+
708
+ == Error handling
709
+
710
+ Moxml provides unified error handling:
711
+
712
+ * `Moxml::Error` - Base error class
713
+ * `Moxml::ParseError` - XML parsing errors
714
+ * `Moxml::ArgumentError` - Invalid argument errors
715
+
716
+ === Error handling patterns
717
+
718
+ [source,ruby]
719
+ ----
720
+ # Handle parsing errors
721
+ begin
722
+ doc = Moxml::Document.parse(xml_string)
723
+ rescue Moxml::ParseError => e
724
+ logger.error "Parse error: #{e.message}"
725
+ logger.error "At line #{e.line}, column #{e.column}"
726
+ raise
727
+ end
728
+
729
+ # Handle invalid operations
730
+ begin
731
+ element['invalid/name'] = 'value'
732
+ rescue Moxml::ArgumentError => e
733
+ logger.warn "Invalid operation: #{e.message}"
734
+ # Use alternative approach
735
+ end
736
+
737
+ # Custom error handling
738
+ class XmlProcessor
739
+ def process(xml)
740
+ doc = Moxml::Document.parse(xml)
741
+ yield doc
742
+ rescue Moxml::Error => e
743
+ handle_moxml_error(e)
744
+ rescue StandardError => e
745
+ handle_standard_error(e)
746
+ ensure
747
+ doc = nil
748
+ end
749
+ end
750
+ ----
751
+
752
+ == Contributing
753
+
754
+ Bug reports and pull requests are welcome on GitHub at
755
+ https://github.com/lutaml/moxml.
756
+
757
+ === Development guidelines
758
+
759
+ * Follow Ruby style guide
760
+ * Add tests for new features
761
+ * Update documentation
762
+ * Ensure backwards compatibility
763
+ * Consider performance implications
764
+ * Test with all supported backends
765
+
766
+ == Copyright and license
767
+
768
+ Copyright Ribose.
769
+
770
+ The gem is available as open source under the terms of the BSD-2-Clause License.