xml_mini 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/.rvmrc +1 -0
- data/Gemfile +20 -0
- data/Guardfile +15 -0
- data/LICENSE +22 -0
- data/README.md +31 -0
- data/Rakefile +11 -0
- data/lib/core_ext/array.rb +45 -0
- data/lib/core_ext/blank.rb +112 -0
- data/lib/core_ext/hash.rb +72 -0
- data/lib/xml_mini.rb +160 -0
- data/lib/xml_mini/libxml.rb +63 -0
- data/lib/xml_mini/libxmlsax.rb +84 -0
- data/lib/xml_mini/node_hash.rb +29 -0
- data/lib/xml_mini/nokogiri.rb +64 -0
- data/lib/xml_mini/nokogirisax.rb +81 -0
- data/lib/xml_mini/rexml.rb +126 -0
- data/lib/xml_mini/version.rb +3 -0
- data/test/lib/xml_mini/libxml_engine_test.rb +202 -0
- data/test/lib/xml_mini/libxmlsax_engine_test.rb +193 -0
- data/test/lib/xml_mini/nokogiri_engine_test.rb +215 -0
- data/test/lib/xml_mini/nokogirisax_engine_test.rb +216 -0
- data/test/lib/xml_mini/rexml_engine_test.rb +28 -0
- data/test/lib/xml_mini_test.rb +89 -0
- data/test/test_helper.rb +7 -0
- data/xml_mini.gemspec +17 -0
- metadata +84 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'xml_mini/node_hash'
|
2
|
+
|
3
|
+
module XmlMini_LibXML
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Parse an XML Document string or IO into a simple hash using libxml.
|
7
|
+
# data::
|
8
|
+
# XML Document string or IO to parse
|
9
|
+
def parse(data)
|
10
|
+
if !data.respond_to?(:read)
|
11
|
+
data = StringIO.new(data || '')
|
12
|
+
end
|
13
|
+
|
14
|
+
char = data.getc
|
15
|
+
if char.nil?
|
16
|
+
{}
|
17
|
+
else
|
18
|
+
data.ungetc(char)
|
19
|
+
LibXML::XML::Parser.io(data).parse.to_hash
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module LibXML #:nodoc:
|
25
|
+
module Conversions #:nodoc:
|
26
|
+
module Document #:nodoc:
|
27
|
+
def to_hash
|
28
|
+
root.to_hash
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
module Node
|
33
|
+
include NodeHash
|
34
|
+
|
35
|
+
# Convert XML document to hash
|
36
|
+
#
|
37
|
+
# hash::
|
38
|
+
# Hash to merge the converted element into.
|
39
|
+
def to_hash(hash={})
|
40
|
+
node_hash = {}
|
41
|
+
|
42
|
+
# Insert node hash into parent hash correctly.
|
43
|
+
insert_node_hash_into_parent(hash, name, node_hash)
|
44
|
+
|
45
|
+
# Handle child elements
|
46
|
+
each_child do |child|
|
47
|
+
handle_child_element(child, node_hash)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Remove content node if it is blank
|
51
|
+
remove_blank_content_node node_hash
|
52
|
+
|
53
|
+
# Handle attributes
|
54
|
+
each_attr { |a| node_hash[a.name] = a.value }
|
55
|
+
|
56
|
+
hash
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
|
63
|
+
LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlMini_LibXMLSAX
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Class that will build the hash while the XML document
|
7
|
+
# is being parsed using SAX events.
|
8
|
+
class HashBuilder
|
9
|
+
|
10
|
+
include LibXML::XML::SaxParser::Callbacks
|
11
|
+
|
12
|
+
CONTENT_KEY = '__content__'.freeze
|
13
|
+
HASH_SIZE_KEY = '__hash_size__'.freeze
|
14
|
+
|
15
|
+
attr_reader :hash
|
16
|
+
|
17
|
+
def current_hash
|
18
|
+
@hash_stack.last
|
19
|
+
end
|
20
|
+
|
21
|
+
def on_start_document
|
22
|
+
@hash = {CONTENT_KEY => ''}
|
23
|
+
@hash_stack = [@hash]
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_end_document
|
27
|
+
@hash = @hash_stack.pop
|
28
|
+
@hash.delete(CONTENT_KEY)
|
29
|
+
end
|
30
|
+
|
31
|
+
def on_start_element(name, attrs = {})
|
32
|
+
new_hash = {CONTENT_KEY => ''}.merge(attrs)
|
33
|
+
new_hash[HASH_SIZE_KEY] = new_hash.size + 1
|
34
|
+
|
35
|
+
case current_hash[name]
|
36
|
+
when Array then
|
37
|
+
current_hash[name] << new_hash
|
38
|
+
when Hash then
|
39
|
+
current_hash[name] = [current_hash[name], new_hash]
|
40
|
+
when nil then
|
41
|
+
current_hash[name] = new_hash
|
42
|
+
end
|
43
|
+
|
44
|
+
@hash_stack.push(new_hash)
|
45
|
+
end
|
46
|
+
|
47
|
+
def on_end_element(name)
|
48
|
+
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
|
49
|
+
current_hash.delete(CONTENT_KEY)
|
50
|
+
end
|
51
|
+
@hash_stack.pop
|
52
|
+
end
|
53
|
+
|
54
|
+
def on_characters(string)
|
55
|
+
current_hash[CONTENT_KEY] << string
|
56
|
+
end
|
57
|
+
|
58
|
+
alias_method :on_cdata_block, :on_characters
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_accessor :document_class
|
62
|
+
self.document_class = HashBuilder
|
63
|
+
|
64
|
+
def parse(data)
|
65
|
+
if !data.respond_to?(:read)
|
66
|
+
data = StringIO.new(data || '')
|
67
|
+
end
|
68
|
+
|
69
|
+
char = data.getc
|
70
|
+
if char.nil?
|
71
|
+
{}
|
72
|
+
else
|
73
|
+
data.ungetc(char)
|
74
|
+
|
75
|
+
LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER)
|
76
|
+
parser = LibXML::XML::SaxParser.io(data)
|
77
|
+
document = self.document_class.new
|
78
|
+
|
79
|
+
parser.callbacks = document
|
80
|
+
parser.parse
|
81
|
+
document.hash
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module NodeHash
|
2
|
+
CONTENT_ROOT = '__content__'.freeze
|
3
|
+
|
4
|
+
def insert_node_hash_into_parent(hash, name, node_hash)
|
5
|
+
case hash[name]
|
6
|
+
when Array then
|
7
|
+
hash[name] << node_hash
|
8
|
+
when Hash then
|
9
|
+
hash[name] = [hash[name], node_hash]
|
10
|
+
when nil then
|
11
|
+
hash[name] = node_hash
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def handle_child_element(child, node_hash)
|
16
|
+
if child.element?
|
17
|
+
child.to_hash(node_hash)
|
18
|
+
elsif child.text? || child.cdata?
|
19
|
+
node_hash[CONTENT_ROOT] ||= ''
|
20
|
+
node_hash[CONTENT_ROOT] << child.content
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def remove_blank_content_node(node_hash)
|
25
|
+
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
|
26
|
+
node_hash.delete(CONTENT_ROOT)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'xml_mini/node_hash'
|
3
|
+
|
4
|
+
module XmlMini_Nokogiri
|
5
|
+
extend self
|
6
|
+
|
7
|
+
# Parse an XML Document string or IO into a simple hash using libxml / nokogiri.
|
8
|
+
# data::
|
9
|
+
# XML Document string or IO to parse
|
10
|
+
def parse(data)
|
11
|
+
if !data.respond_to?(:read)
|
12
|
+
data = StringIO.new(data || '')
|
13
|
+
end
|
14
|
+
|
15
|
+
char = data.getc
|
16
|
+
if char.nil?
|
17
|
+
{}
|
18
|
+
else
|
19
|
+
data.ungetc(char)
|
20
|
+
doc = Nokogiri::XML(data)
|
21
|
+
raise doc.errors.first if doc.errors.length > 0
|
22
|
+
doc.to_hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
module Conversions #:nodoc:
|
27
|
+
module Document #:nodoc:
|
28
|
+
def to_hash
|
29
|
+
root.to_hash
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Node
|
34
|
+
include NodeHash
|
35
|
+
|
36
|
+
# Convert XML document to hash
|
37
|
+
#
|
38
|
+
# hash::
|
39
|
+
# Hash to merge the converted element into.
|
40
|
+
def to_hash(hash={})
|
41
|
+
node_hash = {}
|
42
|
+
|
43
|
+
# Insert node hash into parent hash correctly.
|
44
|
+
insert_node_hash_into_parent(hash, name, node_hash)
|
45
|
+
|
46
|
+
# Handle child elements
|
47
|
+
children.each do |child|
|
48
|
+
handle_child_element(child, node_hash)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Remove content node if it is empty and there are child tags
|
52
|
+
remove_blank_content_node node_hash
|
53
|
+
|
54
|
+
# Handle attributes
|
55
|
+
attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
|
56
|
+
|
57
|
+
hash
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
Nokogiri::XML::Document.send(:include, Conversions::Document)
|
63
|
+
Nokogiri::XML::Node.send(:include, Conversions::Node)
|
64
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlMini_NokogiriSAX
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Class that will build the hash while the XML document
|
7
|
+
# is being parsed using SAX events.
|
8
|
+
class HashBuilder < Nokogiri::XML::SAX::Document
|
9
|
+
|
10
|
+
CONTENT_KEY = '__content__'.freeze
|
11
|
+
HASH_SIZE_KEY = '__hash_size__'.freeze
|
12
|
+
|
13
|
+
attr_reader :hash
|
14
|
+
|
15
|
+
def current_hash
|
16
|
+
@hash_stack.last
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_document
|
20
|
+
@hash = {}
|
21
|
+
@hash_stack = [@hash]
|
22
|
+
end
|
23
|
+
|
24
|
+
def end_document
|
25
|
+
raise "Parse stack not empty!" if @hash_stack.size > 1
|
26
|
+
end
|
27
|
+
|
28
|
+
def error(error_message)
|
29
|
+
raise error_message
|
30
|
+
end
|
31
|
+
|
32
|
+
def start_element(name, attrs = [])
|
33
|
+
new_hash = {CONTENT_KEY => ''}.merge(Hash[attrs])
|
34
|
+
new_hash[HASH_SIZE_KEY] = new_hash.size + 1
|
35
|
+
|
36
|
+
case current_hash[name]
|
37
|
+
when Array then
|
38
|
+
current_hash[name] << new_hash
|
39
|
+
when Hash then
|
40
|
+
current_hash[name] = [current_hash[name], new_hash]
|
41
|
+
when nil then
|
42
|
+
current_hash[name] = new_hash
|
43
|
+
end
|
44
|
+
|
45
|
+
@hash_stack.push(new_hash)
|
46
|
+
end
|
47
|
+
|
48
|
+
def end_element(name)
|
49
|
+
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
|
50
|
+
current_hash.delete(CONTENT_KEY)
|
51
|
+
end
|
52
|
+
@hash_stack.pop
|
53
|
+
end
|
54
|
+
|
55
|
+
def characters(string)
|
56
|
+
current_hash[CONTENT_KEY] << string
|
57
|
+
end
|
58
|
+
|
59
|
+
alias_method :cdata_block, :characters
|
60
|
+
end
|
61
|
+
|
62
|
+
attr_accessor :document_class
|
63
|
+
self.document_class = HashBuilder
|
64
|
+
|
65
|
+
def parse(data)
|
66
|
+
if !data.respond_to?(:read)
|
67
|
+
data = StringIO.new(data || '')
|
68
|
+
end
|
69
|
+
|
70
|
+
char = data.getc
|
71
|
+
if char.nil?
|
72
|
+
{}
|
73
|
+
else
|
74
|
+
data.ungetc(char)
|
75
|
+
document = self.document_class.new
|
76
|
+
parser = Nokogiri::XML::SAX::Parser.new(document)
|
77
|
+
parser.parse(data)
|
78
|
+
document.hash
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlMini_REXML
|
4
|
+
extend self
|
5
|
+
|
6
|
+
CONTENT_KEY = '__content__'.freeze
|
7
|
+
|
8
|
+
# Parse an XML Document string or IO into a simple hash
|
9
|
+
#
|
10
|
+
# Same as XmlSimple::xml_in but doesn't shoot itself in the foot,
|
11
|
+
# and uses the defaults from Active Support.
|
12
|
+
#
|
13
|
+
# data::
|
14
|
+
# XML Document string or IO to parse
|
15
|
+
def parse(data)
|
16
|
+
if !data.respond_to?(:read)
|
17
|
+
data = StringIO.new(data || '')
|
18
|
+
end
|
19
|
+
|
20
|
+
char = data.getc
|
21
|
+
if char.nil?
|
22
|
+
{}
|
23
|
+
else
|
24
|
+
data.ungetc(char)
|
25
|
+
require 'rexml/document' unless defined?(REXML::Document)
|
26
|
+
doc = REXML::Document.new(data)
|
27
|
+
|
28
|
+
if doc.root
|
29
|
+
merge_element!({}, doc.root)
|
30
|
+
else
|
31
|
+
raise REXML::ParseException,
|
32
|
+
"The document #{doc.to_s.inspect} does not have a valid root"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# Convert an XML element and merge into the hash
|
40
|
+
#
|
41
|
+
# hash::
|
42
|
+
# Hash to merge the converted element into.
|
43
|
+
# element::
|
44
|
+
# XML element to merge into hash
|
45
|
+
def merge_element!(hash, element)
|
46
|
+
merge!(hash, element.name, collapse(element))
|
47
|
+
end
|
48
|
+
|
49
|
+
# Actually converts an XML document element into a data structure.
|
50
|
+
#
|
51
|
+
# element::
|
52
|
+
# The document element to be collapsed.
|
53
|
+
def collapse(element)
|
54
|
+
hash = get_attributes(element)
|
55
|
+
|
56
|
+
if element.has_elements?
|
57
|
+
element.each_element { |child| merge_element!(hash, child) }
|
58
|
+
merge_texts!(hash, element) unless empty_content?(element)
|
59
|
+
hash
|
60
|
+
else
|
61
|
+
merge_texts!(hash, element)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Merge all the texts of an element into the hash
|
66
|
+
#
|
67
|
+
# hash::
|
68
|
+
# Hash to add the converted element to.
|
69
|
+
# element::
|
70
|
+
# XML element whose texts are to me merged into the hash
|
71
|
+
def merge_texts!(hash, element)
|
72
|
+
unless element.has_text?
|
73
|
+
hash
|
74
|
+
else
|
75
|
+
# must use value to prevent double-escaping
|
76
|
+
texts = ''
|
77
|
+
element.texts.each { |t| texts << t.value }
|
78
|
+
merge!(hash, CONTENT_KEY, texts)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Adds a new key/value pair to an existing Hash. If the key to be added
|
83
|
+
# already exists and the existing value associated with key is not
|
84
|
+
# an Array, it will be wrapped in an Array. Then the new value is
|
85
|
+
# appended to that Array.
|
86
|
+
#
|
87
|
+
# hash::
|
88
|
+
# Hash to add key/value pair to.
|
89
|
+
# key::
|
90
|
+
# Key to be added.
|
91
|
+
# value::
|
92
|
+
# Value to be associated with key.
|
93
|
+
def merge!(hash, key, value)
|
94
|
+
if hash.has_key?(key)
|
95
|
+
if hash[key].instance_of?(Array)
|
96
|
+
hash[key] << value
|
97
|
+
else
|
98
|
+
hash[key] = [hash[key], value]
|
99
|
+
end
|
100
|
+
elsif value.instance_of?(Array)
|
101
|
+
hash[key] = [value]
|
102
|
+
else
|
103
|
+
hash[key] = value
|
104
|
+
end
|
105
|
+
hash
|
106
|
+
end
|
107
|
+
|
108
|
+
# Converts the attributes array of an XML element into a hash.
|
109
|
+
# Returns an empty Hash if node has no attributes.
|
110
|
+
#
|
111
|
+
# element::
|
112
|
+
# XML element to extract attributes from.
|
113
|
+
def get_attributes(element)
|
114
|
+
attributes = {}
|
115
|
+
element.attributes.each { |n, v| attributes[n] = v }
|
116
|
+
attributes
|
117
|
+
end
|
118
|
+
|
119
|
+
# Determines if a document element has text content
|
120
|
+
#
|
121
|
+
# element::
|
122
|
+
# XML element to be checked.
|
123
|
+
def empty_content?(element)
|
124
|
+
element.texts.join.blank?
|
125
|
+
end
|
126
|
+
end
|