xml_mini 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,63 @@
1
+ require 'xml_mini/node_hash'
2
+
3
+ module XmlMini_LibXML
4
+ extend self
5
+
6
+ # Parse an XML Document string or IO into a simple hash using libxml.
7
+ # data::
8
+ # XML Document string or IO to parse
9
+ def parse(data)
10
+ if !data.respond_to?(:read)
11
+ data = StringIO.new(data || '')
12
+ end
13
+
14
+ char = data.getc
15
+ if char.nil?
16
+ {}
17
+ else
18
+ data.ungetc(char)
19
+ LibXML::XML::Parser.io(data).parse.to_hash
20
+ end
21
+ end
22
+ end
23
+
24
+ module LibXML #:nodoc:
25
+ module Conversions #:nodoc:
26
+ module Document #:nodoc:
27
+ def to_hash
28
+ root.to_hash
29
+ end
30
+ end
31
+
32
+ module Node
33
+ include NodeHash
34
+
35
+ # Convert XML document to hash
36
+ #
37
+ # hash::
38
+ # Hash to merge the converted element into.
39
+ def to_hash(hash={})
40
+ node_hash = {}
41
+
42
+ # Insert node hash into parent hash correctly.
43
+ insert_node_hash_into_parent(hash, name, node_hash)
44
+
45
+ # Handle child elements
46
+ each_child do |child|
47
+ handle_child_element(child, node_hash)
48
+ end
49
+
50
+ # Remove content node if it is blank
51
+ remove_blank_content_node node_hash
52
+
53
+ # Handle attributes
54
+ each_attr { |a| node_hash[a.name] = a.value }
55
+
56
+ hash
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
63
+ LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
@@ -0,0 +1,84 @@
1
+ require 'stringio'
2
+
3
+ module XmlMini_LibXMLSAX
4
+ extend self
5
+
6
+ # Class that will build the hash while the XML document
7
+ # is being parsed using SAX events.
8
+ class HashBuilder
9
+
10
+ include LibXML::XML::SaxParser::Callbacks
11
+
12
+ CONTENT_KEY = '__content__'.freeze
13
+ HASH_SIZE_KEY = '__hash_size__'.freeze
14
+
15
+ attr_reader :hash
16
+
17
+ def current_hash
18
+ @hash_stack.last
19
+ end
20
+
21
+ def on_start_document
22
+ @hash = {CONTENT_KEY => ''}
23
+ @hash_stack = [@hash]
24
+ end
25
+
26
+ def on_end_document
27
+ @hash = @hash_stack.pop
28
+ @hash.delete(CONTENT_KEY)
29
+ end
30
+
31
+ def on_start_element(name, attrs = {})
32
+ new_hash = {CONTENT_KEY => ''}.merge(attrs)
33
+ new_hash[HASH_SIZE_KEY] = new_hash.size + 1
34
+
35
+ case current_hash[name]
36
+ when Array then
37
+ current_hash[name] << new_hash
38
+ when Hash then
39
+ current_hash[name] = [current_hash[name], new_hash]
40
+ when nil then
41
+ current_hash[name] = new_hash
42
+ end
43
+
44
+ @hash_stack.push(new_hash)
45
+ end
46
+
47
+ def on_end_element(name)
48
+ if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
49
+ current_hash.delete(CONTENT_KEY)
50
+ end
51
+ @hash_stack.pop
52
+ end
53
+
54
+ def on_characters(string)
55
+ current_hash[CONTENT_KEY] << string
56
+ end
57
+
58
+ alias_method :on_cdata_block, :on_characters
59
+ end
60
+
61
+ attr_accessor :document_class
62
+ self.document_class = HashBuilder
63
+
64
+ def parse(data)
65
+ if !data.respond_to?(:read)
66
+ data = StringIO.new(data || '')
67
+ end
68
+
69
+ char = data.getc
70
+ if char.nil?
71
+ {}
72
+ else
73
+ data.ungetc(char)
74
+
75
+ LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER)
76
+ parser = LibXML::XML::SaxParser.io(data)
77
+ document = self.document_class.new
78
+
79
+ parser.callbacks = document
80
+ parser.parse
81
+ document.hash
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,29 @@
1
+ module NodeHash
2
+ CONTENT_ROOT = '__content__'.freeze
3
+
4
+ def insert_node_hash_into_parent(hash, name, node_hash)
5
+ case hash[name]
6
+ when Array then
7
+ hash[name] << node_hash
8
+ when Hash then
9
+ hash[name] = [hash[name], node_hash]
10
+ when nil then
11
+ hash[name] = node_hash
12
+ end
13
+ end
14
+
15
+ def handle_child_element(child, node_hash)
16
+ if child.element?
17
+ child.to_hash(node_hash)
18
+ elsif child.text? || child.cdata?
19
+ node_hash[CONTENT_ROOT] ||= ''
20
+ node_hash[CONTENT_ROOT] << child.content
21
+ end
22
+ end
23
+
24
+ def remove_blank_content_node(node_hash)
25
+ if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
26
+ node_hash.delete(CONTENT_ROOT)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,64 @@
1
+ require 'stringio'
2
+ require 'xml_mini/node_hash'
3
+
4
+ module XmlMini_Nokogiri
5
+ extend self
6
+
7
+ # Parse an XML Document string or IO into a simple hash using libxml / nokogiri.
8
+ # data::
9
+ # XML Document string or IO to parse
10
+ def parse(data)
11
+ if !data.respond_to?(:read)
12
+ data = StringIO.new(data || '')
13
+ end
14
+
15
+ char = data.getc
16
+ if char.nil?
17
+ {}
18
+ else
19
+ data.ungetc(char)
20
+ doc = Nokogiri::XML(data)
21
+ raise doc.errors.first if doc.errors.length > 0
22
+ doc.to_hash
23
+ end
24
+ end
25
+
26
+ module Conversions #:nodoc:
27
+ module Document #:nodoc:
28
+ def to_hash
29
+ root.to_hash
30
+ end
31
+ end
32
+
33
+ module Node
34
+ include NodeHash
35
+
36
+ # Convert XML document to hash
37
+ #
38
+ # hash::
39
+ # Hash to merge the converted element into.
40
+ def to_hash(hash={})
41
+ node_hash = {}
42
+
43
+ # Insert node hash into parent hash correctly.
44
+ insert_node_hash_into_parent(hash, name, node_hash)
45
+
46
+ # Handle child elements
47
+ children.each do |child|
48
+ handle_child_element(child, node_hash)
49
+ end
50
+
51
+ # Remove content node if it is empty and there are child tags
52
+ remove_blank_content_node node_hash
53
+
54
+ # Handle attributes
55
+ attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
56
+
57
+ hash
58
+ end
59
+ end
60
+ end
61
+
62
+ Nokogiri::XML::Document.send(:include, Conversions::Document)
63
+ Nokogiri::XML::Node.send(:include, Conversions::Node)
64
+ end
@@ -0,0 +1,81 @@
1
+ require 'stringio'
2
+
3
+ module XmlMini_NokogiriSAX
4
+ extend self
5
+
6
+ # Class that will build the hash while the XML document
7
+ # is being parsed using SAX events.
8
+ class HashBuilder < Nokogiri::XML::SAX::Document
9
+
10
+ CONTENT_KEY = '__content__'.freeze
11
+ HASH_SIZE_KEY = '__hash_size__'.freeze
12
+
13
+ attr_reader :hash
14
+
15
+ def current_hash
16
+ @hash_stack.last
17
+ end
18
+
19
+ def start_document
20
+ @hash = {}
21
+ @hash_stack = [@hash]
22
+ end
23
+
24
+ def end_document
25
+ raise "Parse stack not empty!" if @hash_stack.size > 1
26
+ end
27
+
28
+ def error(error_message)
29
+ raise error_message
30
+ end
31
+
32
+ def start_element(name, attrs = [])
33
+ new_hash = {CONTENT_KEY => ''}.merge(Hash[attrs])
34
+ new_hash[HASH_SIZE_KEY] = new_hash.size + 1
35
+
36
+ case current_hash[name]
37
+ when Array then
38
+ current_hash[name] << new_hash
39
+ when Hash then
40
+ current_hash[name] = [current_hash[name], new_hash]
41
+ when nil then
42
+ current_hash[name] = new_hash
43
+ end
44
+
45
+ @hash_stack.push(new_hash)
46
+ end
47
+
48
+ def end_element(name)
49
+ if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
50
+ current_hash.delete(CONTENT_KEY)
51
+ end
52
+ @hash_stack.pop
53
+ end
54
+
55
+ def characters(string)
56
+ current_hash[CONTENT_KEY] << string
57
+ end
58
+
59
+ alias_method :cdata_block, :characters
60
+ end
61
+
62
+ attr_accessor :document_class
63
+ self.document_class = HashBuilder
64
+
65
+ def parse(data)
66
+ if !data.respond_to?(:read)
67
+ data = StringIO.new(data || '')
68
+ end
69
+
70
+ char = data.getc
71
+ if char.nil?
72
+ {}
73
+ else
74
+ data.ungetc(char)
75
+ document = self.document_class.new
76
+ parser = Nokogiri::XML::SAX::Parser.new(document)
77
+ parser.parse(data)
78
+ document.hash
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,126 @@
1
+ require 'stringio'
2
+
3
+ module XmlMini_REXML
4
+ extend self
5
+
6
+ CONTENT_KEY = '__content__'.freeze
7
+
8
+ # Parse an XML Document string or IO into a simple hash
9
+ #
10
+ # Same as XmlSimple::xml_in but doesn't shoot itself in the foot,
11
+ # and uses the defaults from Active Support.
12
+ #
13
+ # data::
14
+ # XML Document string or IO to parse
15
+ def parse(data)
16
+ if !data.respond_to?(:read)
17
+ data = StringIO.new(data || '')
18
+ end
19
+
20
+ char = data.getc
21
+ if char.nil?
22
+ {}
23
+ else
24
+ data.ungetc(char)
25
+ require 'rexml/document' unless defined?(REXML::Document)
26
+ doc = REXML::Document.new(data)
27
+
28
+ if doc.root
29
+ merge_element!({}, doc.root)
30
+ else
31
+ raise REXML::ParseException,
32
+ "The document #{doc.to_s.inspect} does not have a valid root"
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ # Convert an XML element and merge into the hash
40
+ #
41
+ # hash::
42
+ # Hash to merge the converted element into.
43
+ # element::
44
+ # XML element to merge into hash
45
+ def merge_element!(hash, element)
46
+ merge!(hash, element.name, collapse(element))
47
+ end
48
+
49
+ # Actually converts an XML document element into a data structure.
50
+ #
51
+ # element::
52
+ # The document element to be collapsed.
53
+ def collapse(element)
54
+ hash = get_attributes(element)
55
+
56
+ if element.has_elements?
57
+ element.each_element { |child| merge_element!(hash, child) }
58
+ merge_texts!(hash, element) unless empty_content?(element)
59
+ hash
60
+ else
61
+ merge_texts!(hash, element)
62
+ end
63
+ end
64
+
65
+ # Merge all the texts of an element into the hash
66
+ #
67
+ # hash::
68
+ # Hash to add the converted element to.
69
+ # element::
70
+ # XML element whose texts are to me merged into the hash
71
+ def merge_texts!(hash, element)
72
+ unless element.has_text?
73
+ hash
74
+ else
75
+ # must use value to prevent double-escaping
76
+ texts = ''
77
+ element.texts.each { |t| texts << t.value }
78
+ merge!(hash, CONTENT_KEY, texts)
79
+ end
80
+ end
81
+
82
+ # Adds a new key/value pair to an existing Hash. If the key to be added
83
+ # already exists and the existing value associated with key is not
84
+ # an Array, it will be wrapped in an Array. Then the new value is
85
+ # appended to that Array.
86
+ #
87
+ # hash::
88
+ # Hash to add key/value pair to.
89
+ # key::
90
+ # Key to be added.
91
+ # value::
92
+ # Value to be associated with key.
93
+ def merge!(hash, key, value)
94
+ if hash.has_key?(key)
95
+ if hash[key].instance_of?(Array)
96
+ hash[key] << value
97
+ else
98
+ hash[key] = [hash[key], value]
99
+ end
100
+ elsif value.instance_of?(Array)
101
+ hash[key] = [value]
102
+ else
103
+ hash[key] = value
104
+ end
105
+ hash
106
+ end
107
+
108
+ # Converts the attributes array of an XML element into a hash.
109
+ # Returns an empty Hash if node has no attributes.
110
+ #
111
+ # element::
112
+ # XML element to extract attributes from.
113
+ def get_attributes(element)
114
+ attributes = {}
115
+ element.attributes.each { |n, v| attributes[n] = v }
116
+ attributes
117
+ end
118
+
119
+ # Determines if a document element has text content
120
+ #
121
+ # element::
122
+ # XML element to be checked.
123
+ def empty_content?(element)
124
+ element.texts.join.blank?
125
+ end
126
+ end