xml_mini 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ require 'xml_mini/node_hash'
2
+
3
+ module XmlMini_LibXML
4
+ extend self
5
+
6
+ # Parse an XML Document string or IO into a simple hash using libxml.
7
+ # data::
8
+ # XML Document string or IO to parse
9
+ def parse(data)
10
+ if !data.respond_to?(:read)
11
+ data = StringIO.new(data || '')
12
+ end
13
+
14
+ char = data.getc
15
+ if char.nil?
16
+ {}
17
+ else
18
+ data.ungetc(char)
19
+ LibXML::XML::Parser.io(data).parse.to_hash
20
+ end
21
+ end
22
+ end
23
+
24
+ module LibXML #:nodoc:
25
+ module Conversions #:nodoc:
26
+ module Document #:nodoc:
27
+ def to_hash
28
+ root.to_hash
29
+ end
30
+ end
31
+
32
+ module Node
33
+ include NodeHash
34
+
35
+ # Convert XML document to hash
36
+ #
37
+ # hash::
38
+ # Hash to merge the converted element into.
39
+ def to_hash(hash={})
40
+ node_hash = {}
41
+
42
+ # Insert node hash into parent hash correctly.
43
+ insert_node_hash_into_parent(hash, name, node_hash)
44
+
45
+ # Handle child elements
46
+ each_child do |child|
47
+ handle_child_element(child, node_hash)
48
+ end
49
+
50
+ # Remove content node if it is blank
51
+ remove_blank_content_node node_hash
52
+
53
+ # Handle attributes
54
+ each_attr { |a| node_hash[a.name] = a.value }
55
+
56
+ hash
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
63
+ LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
@@ -0,0 +1,84 @@
1
+ require 'stringio'
2
+
3
+ module XmlMini_LibXMLSAX
4
+ extend self
5
+
6
+ # Class that will build the hash while the XML document
7
+ # is being parsed using SAX events.
8
+ class HashBuilder
9
+
10
+ include LibXML::XML::SaxParser::Callbacks
11
+
12
+ CONTENT_KEY = '__content__'.freeze
13
+ HASH_SIZE_KEY = '__hash_size__'.freeze
14
+
15
+ attr_reader :hash
16
+
17
+ def current_hash
18
+ @hash_stack.last
19
+ end
20
+
21
+ def on_start_document
22
+ @hash = {CONTENT_KEY => ''}
23
+ @hash_stack = [@hash]
24
+ end
25
+
26
+ def on_end_document
27
+ @hash = @hash_stack.pop
28
+ @hash.delete(CONTENT_KEY)
29
+ end
30
+
31
+ def on_start_element(name, attrs = {})
32
+ new_hash = {CONTENT_KEY => ''}.merge(attrs)
33
+ new_hash[HASH_SIZE_KEY] = new_hash.size + 1
34
+
35
+ case current_hash[name]
36
+ when Array then
37
+ current_hash[name] << new_hash
38
+ when Hash then
39
+ current_hash[name] = [current_hash[name], new_hash]
40
+ when nil then
41
+ current_hash[name] = new_hash
42
+ end
43
+
44
+ @hash_stack.push(new_hash)
45
+ end
46
+
47
+ def on_end_element(name)
48
+ if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
49
+ current_hash.delete(CONTENT_KEY)
50
+ end
51
+ @hash_stack.pop
52
+ end
53
+
54
+ def on_characters(string)
55
+ current_hash[CONTENT_KEY] << string
56
+ end
57
+
58
+ alias_method :on_cdata_block, :on_characters
59
+ end
60
+
61
+ attr_accessor :document_class
62
+ self.document_class = HashBuilder
63
+
64
+ def parse(data)
65
+ if !data.respond_to?(:read)
66
+ data = StringIO.new(data || '')
67
+ end
68
+
69
+ char = data.getc
70
+ if char.nil?
71
+ {}
72
+ else
73
+ data.ungetc(char)
74
+
75
+ LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER)
76
+ parser = LibXML::XML::SaxParser.io(data)
77
+ document = self.document_class.new
78
+
79
+ parser.callbacks = document
80
+ parser.parse
81
+ document.hash
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,29 @@
1
+ module NodeHash
2
+ CONTENT_ROOT = '__content__'.freeze
3
+
4
+ def insert_node_hash_into_parent(hash, name, node_hash)
5
+ case hash[name]
6
+ when Array then
7
+ hash[name] << node_hash
8
+ when Hash then
9
+ hash[name] = [hash[name], node_hash]
10
+ when nil then
11
+ hash[name] = node_hash
12
+ end
13
+ end
14
+
15
+ def handle_child_element(child, node_hash)
16
+ if child.element?
17
+ child.to_hash(node_hash)
18
+ elsif child.text? || child.cdata?
19
+ node_hash[CONTENT_ROOT] ||= ''
20
+ node_hash[CONTENT_ROOT] << child.content
21
+ end
22
+ end
23
+
24
+ def remove_blank_content_node(node_hash)
25
+ if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
26
+ node_hash.delete(CONTENT_ROOT)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,64 @@
1
+ require 'stringio'
2
+ require 'xml_mini/node_hash'
3
+
4
+ module XmlMini_Nokogiri
5
+ extend self
6
+
7
+ # Parse an XML Document string or IO into a simple hash using libxml / nokogiri.
8
+ # data::
9
+ # XML Document string or IO to parse
10
+ def parse(data)
11
+ if !data.respond_to?(:read)
12
+ data = StringIO.new(data || '')
13
+ end
14
+
15
+ char = data.getc
16
+ if char.nil?
17
+ {}
18
+ else
19
+ data.ungetc(char)
20
+ doc = Nokogiri::XML(data)
21
+ raise doc.errors.first if doc.errors.length > 0
22
+ doc.to_hash
23
+ end
24
+ end
25
+
26
+ module Conversions #:nodoc:
27
+ module Document #:nodoc:
28
+ def to_hash
29
+ root.to_hash
30
+ end
31
+ end
32
+
33
+ module Node
34
+ include NodeHash
35
+
36
+ # Convert XML document to hash
37
+ #
38
+ # hash::
39
+ # Hash to merge the converted element into.
40
+ def to_hash(hash={})
41
+ node_hash = {}
42
+
43
+ # Insert node hash into parent hash correctly.
44
+ insert_node_hash_into_parent(hash, name, node_hash)
45
+
46
+ # Handle child elements
47
+ children.each do |child|
48
+ handle_child_element(child, node_hash)
49
+ end
50
+
51
+ # Remove content node if it is empty and there are child tags
52
+ remove_blank_content_node node_hash
53
+
54
+ # Handle attributes
55
+ attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
56
+
57
+ hash
58
+ end
59
+ end
60
+ end
61
+
62
+ Nokogiri::XML::Document.send(:include, Conversions::Document)
63
+ Nokogiri::XML::Node.send(:include, Conversions::Node)
64
+ end
@@ -0,0 +1,81 @@
1
+ require 'stringio'
2
+
3
+ module XmlMini_NokogiriSAX
4
+ extend self
5
+
6
+ # Class that will build the hash while the XML document
7
+ # is being parsed using SAX events.
8
+ class HashBuilder < Nokogiri::XML::SAX::Document
9
+
10
+ CONTENT_KEY = '__content__'.freeze
11
+ HASH_SIZE_KEY = '__hash_size__'.freeze
12
+
13
+ attr_reader :hash
14
+
15
+ def current_hash
16
+ @hash_stack.last
17
+ end
18
+
19
+ def start_document
20
+ @hash = {}
21
+ @hash_stack = [@hash]
22
+ end
23
+
24
+ def end_document
25
+ raise "Parse stack not empty!" if @hash_stack.size > 1
26
+ end
27
+
28
+ def error(error_message)
29
+ raise error_message
30
+ end
31
+
32
+ def start_element(name, attrs = [])
33
+ new_hash = {CONTENT_KEY => ''}.merge(Hash[attrs])
34
+ new_hash[HASH_SIZE_KEY] = new_hash.size + 1
35
+
36
+ case current_hash[name]
37
+ when Array then
38
+ current_hash[name] << new_hash
39
+ when Hash then
40
+ current_hash[name] = [current_hash[name], new_hash]
41
+ when nil then
42
+ current_hash[name] = new_hash
43
+ end
44
+
45
+ @hash_stack.push(new_hash)
46
+ end
47
+
48
+ def end_element(name)
49
+ if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
50
+ current_hash.delete(CONTENT_KEY)
51
+ end
52
+ @hash_stack.pop
53
+ end
54
+
55
+ def characters(string)
56
+ current_hash[CONTENT_KEY] << string
57
+ end
58
+
59
+ alias_method :cdata_block, :characters
60
+ end
61
+
62
+ attr_accessor :document_class
63
+ self.document_class = HashBuilder
64
+
65
+ def parse(data)
66
+ if !data.respond_to?(:read)
67
+ data = StringIO.new(data || '')
68
+ end
69
+
70
+ char = data.getc
71
+ if char.nil?
72
+ {}
73
+ else
74
+ data.ungetc(char)
75
+ document = self.document_class.new
76
+ parser = Nokogiri::XML::SAX::Parser.new(document)
77
+ parser.parse(data)
78
+ document.hash
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,126 @@
1
+ require 'stringio'
2
+
3
+ module XmlMini_REXML
4
+ extend self
5
+
6
+ CONTENT_KEY = '__content__'.freeze
7
+
8
+ # Parse an XML Document string or IO into a simple hash
9
+ #
10
+ # Same as XmlSimple::xml_in but doesn't shoot itself in the foot,
11
+ # and uses the defaults from Active Support.
12
+ #
13
+ # data::
14
+ # XML Document string or IO to parse
15
+ def parse(data)
16
+ if !data.respond_to?(:read)
17
+ data = StringIO.new(data || '')
18
+ end
19
+
20
+ char = data.getc
21
+ if char.nil?
22
+ {}
23
+ else
24
+ data.ungetc(char)
25
+ require 'rexml/document' unless defined?(REXML::Document)
26
+ doc = REXML::Document.new(data)
27
+
28
+ if doc.root
29
+ merge_element!({}, doc.root)
30
+ else
31
+ raise REXML::ParseException,
32
+ "The document #{doc.to_s.inspect} does not have a valid root"
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ # Convert an XML element and merge into the hash
40
+ #
41
+ # hash::
42
+ # Hash to merge the converted element into.
43
+ # element::
44
+ # XML element to merge into hash
45
+ def merge_element!(hash, element)
46
+ merge!(hash, element.name, collapse(element))
47
+ end
48
+
49
+ # Actually converts an XML document element into a data structure.
50
+ #
51
+ # element::
52
+ # The document element to be collapsed.
53
+ def collapse(element)
54
+ hash = get_attributes(element)
55
+
56
+ if element.has_elements?
57
+ element.each_element { |child| merge_element!(hash, child) }
58
+ merge_texts!(hash, element) unless empty_content?(element)
59
+ hash
60
+ else
61
+ merge_texts!(hash, element)
62
+ end
63
+ end
64
+
65
+ # Merge all the texts of an element into the hash
66
+ #
67
+ # hash::
68
+ # Hash to add the converted element to.
69
+ # element::
70
+ # XML element whose texts are to me merged into the hash
71
+ def merge_texts!(hash, element)
72
+ unless element.has_text?
73
+ hash
74
+ else
75
+ # must use value to prevent double-escaping
76
+ texts = ''
77
+ element.texts.each { |t| texts << t.value }
78
+ merge!(hash, CONTENT_KEY, texts)
79
+ end
80
+ end
81
+
82
+ # Adds a new key/value pair to an existing Hash. If the key to be added
83
+ # already exists and the existing value associated with key is not
84
+ # an Array, it will be wrapped in an Array. Then the new value is
85
+ # appended to that Array.
86
+ #
87
+ # hash::
88
+ # Hash to add key/value pair to.
89
+ # key::
90
+ # Key to be added.
91
+ # value::
92
+ # Value to be associated with key.
93
+ def merge!(hash, key, value)
94
+ if hash.has_key?(key)
95
+ if hash[key].instance_of?(Array)
96
+ hash[key] << value
97
+ else
98
+ hash[key] = [hash[key], value]
99
+ end
100
+ elsif value.instance_of?(Array)
101
+ hash[key] = [value]
102
+ else
103
+ hash[key] = value
104
+ end
105
+ hash
106
+ end
107
+
108
+ # Converts the attributes array of an XML element into a hash.
109
+ # Returns an empty Hash if node has no attributes.
110
+ #
111
+ # element::
112
+ # XML element to extract attributes from.
113
+ def get_attributes(element)
114
+ attributes = {}
115
+ element.attributes.each { |n, v| attributes[n] = v }
116
+ attributes
117
+ end
118
+
119
+ # Determines if a document element has text content
120
+ #
121
+ # element::
122
+ # XML element to be checked.
123
+ def empty_content?(element)
124
+ element.texts.join.blank?
125
+ end
126
+ end