xml_mini 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/.rvmrc +1 -0
- data/Gemfile +20 -0
- data/Guardfile +15 -0
- data/LICENSE +22 -0
- data/README.md +31 -0
- data/Rakefile +11 -0
- data/lib/core_ext/array.rb +45 -0
- data/lib/core_ext/blank.rb +112 -0
- data/lib/core_ext/hash.rb +72 -0
- data/lib/xml_mini.rb +160 -0
- data/lib/xml_mini/libxml.rb +63 -0
- data/lib/xml_mini/libxmlsax.rb +84 -0
- data/lib/xml_mini/node_hash.rb +29 -0
- data/lib/xml_mini/nokogiri.rb +64 -0
- data/lib/xml_mini/nokogirisax.rb +81 -0
- data/lib/xml_mini/rexml.rb +126 -0
- data/lib/xml_mini/version.rb +3 -0
- data/test/lib/xml_mini/libxml_engine_test.rb +202 -0
- data/test/lib/xml_mini/libxmlsax_engine_test.rb +193 -0
- data/test/lib/xml_mini/nokogiri_engine_test.rb +215 -0
- data/test/lib/xml_mini/nokogirisax_engine_test.rb +216 -0
- data/test/lib/xml_mini/rexml_engine_test.rb +28 -0
- data/test/lib/xml_mini_test.rb +89 -0
- data/test/test_helper.rb +7 -0
- data/xml_mini.gemspec +17 -0
- metadata +84 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'xml_mini/node_hash'
|
2
|
+
|
3
|
+
module XmlMini_LibXML
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Parse an XML Document string or IO into a simple hash using libxml.
|
7
|
+
# data::
|
8
|
+
# XML Document string or IO to parse
|
9
|
+
def parse(data)
|
10
|
+
if !data.respond_to?(:read)
|
11
|
+
data = StringIO.new(data || '')
|
12
|
+
end
|
13
|
+
|
14
|
+
char = data.getc
|
15
|
+
if char.nil?
|
16
|
+
{}
|
17
|
+
else
|
18
|
+
data.ungetc(char)
|
19
|
+
LibXML::XML::Parser.io(data).parse.to_hash
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module LibXML #:nodoc:
|
25
|
+
module Conversions #:nodoc:
|
26
|
+
module Document #:nodoc:
|
27
|
+
def to_hash
|
28
|
+
root.to_hash
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
module Node
|
33
|
+
include NodeHash
|
34
|
+
|
35
|
+
# Convert XML document to hash
|
36
|
+
#
|
37
|
+
# hash::
|
38
|
+
# Hash to merge the converted element into.
|
39
|
+
def to_hash(hash={})
|
40
|
+
node_hash = {}
|
41
|
+
|
42
|
+
# Insert node hash into parent hash correctly.
|
43
|
+
insert_node_hash_into_parent(hash, name, node_hash)
|
44
|
+
|
45
|
+
# Handle child elements
|
46
|
+
each_child do |child|
|
47
|
+
handle_child_element(child, node_hash)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Remove content node if it is blank
|
51
|
+
remove_blank_content_node node_hash
|
52
|
+
|
53
|
+
# Handle attributes
|
54
|
+
each_attr { |a| node_hash[a.name] = a.value }
|
55
|
+
|
56
|
+
hash
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
|
63
|
+
LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlMini_LibXMLSAX
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Class that will build the hash while the XML document
|
7
|
+
# is being parsed using SAX events.
|
8
|
+
class HashBuilder
|
9
|
+
|
10
|
+
include LibXML::XML::SaxParser::Callbacks
|
11
|
+
|
12
|
+
CONTENT_KEY = '__content__'.freeze
|
13
|
+
HASH_SIZE_KEY = '__hash_size__'.freeze
|
14
|
+
|
15
|
+
attr_reader :hash
|
16
|
+
|
17
|
+
def current_hash
|
18
|
+
@hash_stack.last
|
19
|
+
end
|
20
|
+
|
21
|
+
def on_start_document
|
22
|
+
@hash = {CONTENT_KEY => ''}
|
23
|
+
@hash_stack = [@hash]
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_end_document
|
27
|
+
@hash = @hash_stack.pop
|
28
|
+
@hash.delete(CONTENT_KEY)
|
29
|
+
end
|
30
|
+
|
31
|
+
def on_start_element(name, attrs = {})
|
32
|
+
new_hash = {CONTENT_KEY => ''}.merge(attrs)
|
33
|
+
new_hash[HASH_SIZE_KEY] = new_hash.size + 1
|
34
|
+
|
35
|
+
case current_hash[name]
|
36
|
+
when Array then
|
37
|
+
current_hash[name] << new_hash
|
38
|
+
when Hash then
|
39
|
+
current_hash[name] = [current_hash[name], new_hash]
|
40
|
+
when nil then
|
41
|
+
current_hash[name] = new_hash
|
42
|
+
end
|
43
|
+
|
44
|
+
@hash_stack.push(new_hash)
|
45
|
+
end
|
46
|
+
|
47
|
+
def on_end_element(name)
|
48
|
+
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
|
49
|
+
current_hash.delete(CONTENT_KEY)
|
50
|
+
end
|
51
|
+
@hash_stack.pop
|
52
|
+
end
|
53
|
+
|
54
|
+
def on_characters(string)
|
55
|
+
current_hash[CONTENT_KEY] << string
|
56
|
+
end
|
57
|
+
|
58
|
+
alias_method :on_cdata_block, :on_characters
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_accessor :document_class
|
62
|
+
self.document_class = HashBuilder
|
63
|
+
|
64
|
+
def parse(data)
|
65
|
+
if !data.respond_to?(:read)
|
66
|
+
data = StringIO.new(data || '')
|
67
|
+
end
|
68
|
+
|
69
|
+
char = data.getc
|
70
|
+
if char.nil?
|
71
|
+
{}
|
72
|
+
else
|
73
|
+
data.ungetc(char)
|
74
|
+
|
75
|
+
LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER)
|
76
|
+
parser = LibXML::XML::SaxParser.io(data)
|
77
|
+
document = self.document_class.new
|
78
|
+
|
79
|
+
parser.callbacks = document
|
80
|
+
parser.parse
|
81
|
+
document.hash
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module NodeHash
|
2
|
+
CONTENT_ROOT = '__content__'.freeze
|
3
|
+
|
4
|
+
def insert_node_hash_into_parent(hash, name, node_hash)
|
5
|
+
case hash[name]
|
6
|
+
when Array then
|
7
|
+
hash[name] << node_hash
|
8
|
+
when Hash then
|
9
|
+
hash[name] = [hash[name], node_hash]
|
10
|
+
when nil then
|
11
|
+
hash[name] = node_hash
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def handle_child_element(child, node_hash)
|
16
|
+
if child.element?
|
17
|
+
child.to_hash(node_hash)
|
18
|
+
elsif child.text? || child.cdata?
|
19
|
+
node_hash[CONTENT_ROOT] ||= ''
|
20
|
+
node_hash[CONTENT_ROOT] << child.content
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def remove_blank_content_node(node_hash)
|
25
|
+
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
|
26
|
+
node_hash.delete(CONTENT_ROOT)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'xml_mini/node_hash'
|
3
|
+
|
4
|
+
module XmlMini_Nokogiri
|
5
|
+
extend self
|
6
|
+
|
7
|
+
# Parse an XML Document string or IO into a simple hash using libxml / nokogiri.
|
8
|
+
# data::
|
9
|
+
# XML Document string or IO to parse
|
10
|
+
def parse(data)
|
11
|
+
if !data.respond_to?(:read)
|
12
|
+
data = StringIO.new(data || '')
|
13
|
+
end
|
14
|
+
|
15
|
+
char = data.getc
|
16
|
+
if char.nil?
|
17
|
+
{}
|
18
|
+
else
|
19
|
+
data.ungetc(char)
|
20
|
+
doc = Nokogiri::XML(data)
|
21
|
+
raise doc.errors.first if doc.errors.length > 0
|
22
|
+
doc.to_hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
module Conversions #:nodoc:
|
27
|
+
module Document #:nodoc:
|
28
|
+
def to_hash
|
29
|
+
root.to_hash
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Node
|
34
|
+
include NodeHash
|
35
|
+
|
36
|
+
# Convert XML document to hash
|
37
|
+
#
|
38
|
+
# hash::
|
39
|
+
# Hash to merge the converted element into.
|
40
|
+
def to_hash(hash={})
|
41
|
+
node_hash = {}
|
42
|
+
|
43
|
+
# Insert node hash into parent hash correctly.
|
44
|
+
insert_node_hash_into_parent(hash, name, node_hash)
|
45
|
+
|
46
|
+
# Handle child elements
|
47
|
+
children.each do |child|
|
48
|
+
handle_child_element(child, node_hash)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Remove content node if it is empty and there are child tags
|
52
|
+
remove_blank_content_node node_hash
|
53
|
+
|
54
|
+
# Handle attributes
|
55
|
+
attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
|
56
|
+
|
57
|
+
hash
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
Nokogiri::XML::Document.send(:include, Conversions::Document)
|
63
|
+
Nokogiri::XML::Node.send(:include, Conversions::Node)
|
64
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlMini_NokogiriSAX
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Class that will build the hash while the XML document
|
7
|
+
# is being parsed using SAX events.
|
8
|
+
class HashBuilder < Nokogiri::XML::SAX::Document
|
9
|
+
|
10
|
+
CONTENT_KEY = '__content__'.freeze
|
11
|
+
HASH_SIZE_KEY = '__hash_size__'.freeze
|
12
|
+
|
13
|
+
attr_reader :hash
|
14
|
+
|
15
|
+
def current_hash
|
16
|
+
@hash_stack.last
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_document
|
20
|
+
@hash = {}
|
21
|
+
@hash_stack = [@hash]
|
22
|
+
end
|
23
|
+
|
24
|
+
def end_document
|
25
|
+
raise "Parse stack not empty!" if @hash_stack.size > 1
|
26
|
+
end
|
27
|
+
|
28
|
+
def error(error_message)
|
29
|
+
raise error_message
|
30
|
+
end
|
31
|
+
|
32
|
+
def start_element(name, attrs = [])
|
33
|
+
new_hash = {CONTENT_KEY => ''}.merge(Hash[attrs])
|
34
|
+
new_hash[HASH_SIZE_KEY] = new_hash.size + 1
|
35
|
+
|
36
|
+
case current_hash[name]
|
37
|
+
when Array then
|
38
|
+
current_hash[name] << new_hash
|
39
|
+
when Hash then
|
40
|
+
current_hash[name] = [current_hash[name], new_hash]
|
41
|
+
when nil then
|
42
|
+
current_hash[name] = new_hash
|
43
|
+
end
|
44
|
+
|
45
|
+
@hash_stack.push(new_hash)
|
46
|
+
end
|
47
|
+
|
48
|
+
def end_element(name)
|
49
|
+
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
|
50
|
+
current_hash.delete(CONTENT_KEY)
|
51
|
+
end
|
52
|
+
@hash_stack.pop
|
53
|
+
end
|
54
|
+
|
55
|
+
def characters(string)
|
56
|
+
current_hash[CONTENT_KEY] << string
|
57
|
+
end
|
58
|
+
|
59
|
+
alias_method :cdata_block, :characters
|
60
|
+
end
|
61
|
+
|
62
|
+
attr_accessor :document_class
|
63
|
+
self.document_class = HashBuilder
|
64
|
+
|
65
|
+
def parse(data)
|
66
|
+
if !data.respond_to?(:read)
|
67
|
+
data = StringIO.new(data || '')
|
68
|
+
end
|
69
|
+
|
70
|
+
char = data.getc
|
71
|
+
if char.nil?
|
72
|
+
{}
|
73
|
+
else
|
74
|
+
data.ungetc(char)
|
75
|
+
document = self.document_class.new
|
76
|
+
parser = Nokogiri::XML::SAX::Parser.new(document)
|
77
|
+
parser.parse(data)
|
78
|
+
document.hash
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlMini_REXML
|
4
|
+
extend self
|
5
|
+
|
6
|
+
CONTENT_KEY = '__content__'.freeze
|
7
|
+
|
8
|
+
# Parse an XML Document string or IO into a simple hash
|
9
|
+
#
|
10
|
+
# Same as XmlSimple::xml_in but doesn't shoot itself in the foot,
|
11
|
+
# and uses the defaults from Active Support.
|
12
|
+
#
|
13
|
+
# data::
|
14
|
+
# XML Document string or IO to parse
|
15
|
+
def parse(data)
|
16
|
+
if !data.respond_to?(:read)
|
17
|
+
data = StringIO.new(data || '')
|
18
|
+
end
|
19
|
+
|
20
|
+
char = data.getc
|
21
|
+
if char.nil?
|
22
|
+
{}
|
23
|
+
else
|
24
|
+
data.ungetc(char)
|
25
|
+
require 'rexml/document' unless defined?(REXML::Document)
|
26
|
+
doc = REXML::Document.new(data)
|
27
|
+
|
28
|
+
if doc.root
|
29
|
+
merge_element!({}, doc.root)
|
30
|
+
else
|
31
|
+
raise REXML::ParseException,
|
32
|
+
"The document #{doc.to_s.inspect} does not have a valid root"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# Convert an XML element and merge into the hash
|
40
|
+
#
|
41
|
+
# hash::
|
42
|
+
# Hash to merge the converted element into.
|
43
|
+
# element::
|
44
|
+
# XML element to merge into hash
|
45
|
+
def merge_element!(hash, element)
|
46
|
+
merge!(hash, element.name, collapse(element))
|
47
|
+
end
|
48
|
+
|
49
|
+
# Actually converts an XML document element into a data structure.
|
50
|
+
#
|
51
|
+
# element::
|
52
|
+
# The document element to be collapsed.
|
53
|
+
def collapse(element)
|
54
|
+
hash = get_attributes(element)
|
55
|
+
|
56
|
+
if element.has_elements?
|
57
|
+
element.each_element { |child| merge_element!(hash, child) }
|
58
|
+
merge_texts!(hash, element) unless empty_content?(element)
|
59
|
+
hash
|
60
|
+
else
|
61
|
+
merge_texts!(hash, element)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Merge all the texts of an element into the hash
|
66
|
+
#
|
67
|
+
# hash::
|
68
|
+
# Hash to add the converted element to.
|
69
|
+
# element::
|
70
|
+
# XML element whose texts are to me merged into the hash
|
71
|
+
def merge_texts!(hash, element)
|
72
|
+
unless element.has_text?
|
73
|
+
hash
|
74
|
+
else
|
75
|
+
# must use value to prevent double-escaping
|
76
|
+
texts = ''
|
77
|
+
element.texts.each { |t| texts << t.value }
|
78
|
+
merge!(hash, CONTENT_KEY, texts)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Adds a new key/value pair to an existing Hash. If the key to be added
|
83
|
+
# already exists and the existing value associated with key is not
|
84
|
+
# an Array, it will be wrapped in an Array. Then the new value is
|
85
|
+
# appended to that Array.
|
86
|
+
#
|
87
|
+
# hash::
|
88
|
+
# Hash to add key/value pair to.
|
89
|
+
# key::
|
90
|
+
# Key to be added.
|
91
|
+
# value::
|
92
|
+
# Value to be associated with key.
|
93
|
+
def merge!(hash, key, value)
|
94
|
+
if hash.has_key?(key)
|
95
|
+
if hash[key].instance_of?(Array)
|
96
|
+
hash[key] << value
|
97
|
+
else
|
98
|
+
hash[key] = [hash[key], value]
|
99
|
+
end
|
100
|
+
elsif value.instance_of?(Array)
|
101
|
+
hash[key] = [value]
|
102
|
+
else
|
103
|
+
hash[key] = value
|
104
|
+
end
|
105
|
+
hash
|
106
|
+
end
|
107
|
+
|
108
|
+
# Converts the attributes array of an XML element into a hash.
|
109
|
+
# Returns an empty Hash if node has no attributes.
|
110
|
+
#
|
111
|
+
# element::
|
112
|
+
# XML element to extract attributes from.
|
113
|
+
def get_attributes(element)
|
114
|
+
attributes = {}
|
115
|
+
element.attributes.each { |n, v| attributes[n] = v }
|
116
|
+
attributes
|
117
|
+
end
|
118
|
+
|
119
|
+
# Determines if a document element has text content
|
120
|
+
#
|
121
|
+
# element::
|
122
|
+
# XML element to be checked.
|
123
|
+
def empty_content?(element)
|
124
|
+
element.texts.join.blank?
|
125
|
+
end
|
126
|
+
end
|