multi_xml 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of multi_xml might be problematic. Click here for more details.

data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
- pkg/*
2
1
  *.gem
3
2
  .bundle
3
+ pkg/*
4
+ rdoc/*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format=nested
data/Gemfile CHANGED
@@ -1,9 +1,8 @@
1
1
  source "http://rubygems.org"
2
2
 
3
3
  group :development, :test do
4
- gem 'hpricot', "~> 0.8.2", :require => nil
5
- gem 'libxml-ruby', "~> 1.1.4", :require => nil
6
- gem 'nokogiri', "~> 1.4.3", :require => nil
4
+ gem 'libxml-ruby', "~> 1.1", :require => nil
5
+ gem 'nokogiri', "~> 1.4", :require => nil
7
6
  end
8
7
 
9
8
  gemspec
@@ -1,25 +1,33 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- multi_xml (0.0.1)
4
+ multi_xml (0.1.0)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
8
8
  specs:
9
- hpricot (0.8.2)
9
+ diff-lcs (1.1.2)
10
10
  libxml-ruby (1.1.4)
11
11
  nokogiri (1.4.3.1)
12
12
  rake (0.8.7)
13
- rspec (1.3.0)
13
+ rspec (2.0.0)
14
+ rspec-core (= 2.0.0)
15
+ rspec-expectations (= 2.0.0)
16
+ rspec-mocks (= 2.0.0)
17
+ rspec-core (2.0.0)
18
+ rspec-expectations (2.0.0)
19
+ diff-lcs (>= 1.1.2)
20
+ rspec-mocks (2.0.0)
21
+ rspec-core (= 2.0.0)
22
+ rspec-expectations (= 2.0.0)
14
23
 
15
24
  PLATFORMS
16
25
  ruby
17
26
 
18
27
  DEPENDENCIES
19
- bundler (~> 1.0.2)
20
- hpricot (~> 0.8.2)
21
- libxml-ruby (~> 1.1.4)
28
+ bundler (~> 1.0)
29
+ libxml-ruby (~> 1.1)
22
30
  multi_xml!
23
- nokogiri (~> 1.4.3)
24
- rake (~> 0.8.7)
25
- rspec (~> 1.3.0)
31
+ nokogiri (~> 1.4)
32
+ rake (~> 0.8)
33
+ rspec (~> 2.0)
@@ -4,18 +4,18 @@ Lots of Ruby libraries utilize XML parsing in some form, and everyone has their
4
4
 
5
5
  require 'multi_xml'
6
6
 
7
- MultiXml.engine = :nokogiri
7
+ MultiXml.parser = :nokogiri
8
8
  MultiXml.parse('<tag>This is the contents</tag>') # parsed using Nokogiri
9
9
 
10
- MultiXml.engine = :rexml
11
- MultiXml.engine = MultiJson::Engines::Rexml # equivalent to previous line
10
+ MultiXml.parser = :rexml
11
+ MultiXml.parser = MultiJson::Parser::Rexml # equivalent to previous line
12
12
  MultiXml.parse('<tag>This is the contents</tag>') # parsed using REXML
13
13
 
14
- The <tt>engine</tt> setter takes either a symbol or a class (to allow for custom XML parsers) that responds to <tt>.parse</tt> at the class level.
14
+ The <tt>parser</tt> setter takes either a symbol or a class (to allow for custom XML parsers) that responds to <tt>.parse</tt> at the class level.
15
15
 
16
- MultiXML tries to have intelligent defaulting. That is, if you have any of the supported engines already loaded, it will utilize them before attempting to load any. When loading, libraries are ordered by speed. First LibXML, then Nokogiri, then Hpricot, then REXML.
16
+ MultiXML tries to have intelligent defaulting. That is, if you have any of the supported parsers already loaded, it will utilize them before attempting to load any. When loading, libraries are ordered by speed. First LibXML, then Nokogiri, then REXML.
17
17
 
18
- == Inspriation
18
+ == Inspiration
19
19
 
20
20
  MultiXML was inspired by MultiJSON[http://github.com/intridea/multi_json/].
21
21
 
data/Rakefile CHANGED
@@ -1,28 +1,18 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- require 'rake'
4
- require 'rake/rdoctask'
5
- require 'spec/rake/spectask'
1
+ require "bundler"
2
+ require "rake/rdoctask"
3
+ require "rspec/core/rake_task"
6
4
 
7
5
  Bundler::GemHelper.install_tasks
8
6
 
9
7
  Rake::RDocTask.new do |rdoc|
10
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
11
8
  rdoc.rdoc_dir = 'rdoc'
12
- rdoc.title = "multi_xml #{version}"
9
+ rdoc.title = "multi_xml #{MultiXml::VERSION}"
13
10
  rdoc.rdoc_files.include('README*')
14
11
  rdoc.rdoc_files.include('lib/**/*.rb')
15
12
  end
16
13
 
17
- Spec::Rake::SpecTask.new(:spec) do |spec|
18
- spec.libs << 'lib' << 'spec'
19
- spec.spec_files = FileList['spec/**/*_spec.rb']
20
- end
21
-
22
- Spec::Rake::SpecTask.new(:rcov) do |spec|
23
- spec.libs << 'lib' << 'spec'
24
- spec.pattern = 'spec/**/*_spec.rb'
25
- spec.rcov = true
14
+ desc "Run all examples"
15
+ RSpec::Core::RakeTask.new(:spec) do |t|
26
16
  end
27
17
 
28
18
  task :default => :spec
@@ -1,65 +1,229 @@
1
+ require 'base64'
2
+ require 'bigdecimal'
3
+ require 'date'
4
+ require 'time'
5
+ require 'yaml'
6
+
1
7
  module MultiXml
2
- module_function
8
+ class << self
3
9
 
4
- # Get the current engine class.
5
- def engine
6
- return @engine if @engine
7
- self.engine = self.default_engine
8
- @engine
9
- end
10
+ REQUIREMENT_MAP = [
11
+ ['libxml', :libxml],
12
+ ['nokogiri', :nokogiri],
13
+ ['rexml/document', :rexml]
14
+ ]
15
+
16
+ CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
17
+
18
+ # TODO: use Time.xmlschema instead of Time.parse;
19
+ # use regexp instead of Date.parse
20
+ unless defined?(PARSING)
21
+ PARSING = {
22
+ 'symbol' => Proc.new{|symbol| symbol.to_sym},
23
+ 'date' => Proc.new{|date| ::Date.parse(date)},
24
+ 'datetime' => Proc.new{|time| ::Time.parse(time).utc rescue ::DateTime.parse(time).utc},
25
+ 'integer' => Proc.new{|integer| integer.to_i},
26
+ 'float' => Proc.new{|float| float.to_f},
27
+ 'decimal' => Proc.new{|number| BigDecimal(number)},
28
+ 'boolean' => Proc.new{|boolean| !%w(0 false).include?(boolean.strip)},
29
+ 'string' => Proc.new{|string| string.to_s},
30
+ 'yaml' => Proc.new{|yaml| YAML::load(yaml) rescue yaml},
31
+ 'base64Binary' => Proc.new{|binary| binary.unpack('m').first},
32
+ 'binary' => Proc.new{|binary, entity| parse_binary(binary, entity)},
33
+ 'file' => Proc.new{|file, entity| parse_file(file, entity)}
34
+ }
35
+
36
+ PARSING.update(
37
+ 'double' => PARSING['float'],
38
+ 'dateTime' => PARSING['datetime']
39
+ )
40
+ end
10
41
 
11
- REQUIREMENT_MAP = [
12
- ['libxml', :libxml],
13
- ['nokogiri', :nokogiri],
14
- ['hpricot', :hpricot],
15
- ['rexml/document', :rexml]
16
- ]
17
-
18
- # The default engine based on what you currently
19
- # have loaded and installed. First checks to see
20
- # if any engines are already loaded, then checks
21
- # to see which are installed if none are loaded.
22
- def default_engine
23
- return :libxml if defined?(::LibXML)
24
- return :nokogiri if defined?(::Nokogiri)
25
- return :hpricot if defined?(::Hpricot)
26
-
27
- REQUIREMENT_MAP.each do |(library, engine)|
28
- begin
29
- require library
30
- return engine
31
- rescue LoadError
32
- next
42
+ # Get the current parser class.
43
+ def parser
44
+ return @parser if @parser
45
+ self.parser = self.default_parser
46
+ @parser
47
+ end
48
+
49
+ # The default parser based on what you currently
50
+ # have loaded and installed. First checks to see
51
+ # if any parsers are already loaded, then checks
52
+ # to see which are installed if none are loaded.
53
+ def default_parser
54
+ return :libxml if defined?(::LibXML)
55
+ return :nokogiri if defined?(::Nokogiri)
56
+
57
+ REQUIREMENT_MAP.each do |(library, parser)|
58
+ begin
59
+ require library
60
+ return parser
61
+ rescue LoadError
62
+ next
63
+ end
64
+ end
65
+ end
66
+
67
+ # Set the XML parser utilizing a symbol, string, or class.
68
+ # Supported by default are:
69
+ #
70
+ # * <tt>:libxml</tt>
71
+ # * <tt>:nokogiri</tt>
72
+ # * <tt>:rexml</tt>
73
+ def parser=(new_parser)
74
+ case new_parser
75
+ when String, Symbol
76
+ require "multi_xml/parsers/#{new_parser.to_s.downcase}"
77
+ @parser = MultiXml::Parsers.const_get("#{new_parser.to_s.split('_').map{|s| s.capitalize}.join('')}")
78
+ when Class, Module
79
+ @parser = new_parser
80
+ else
81
+ raise "Did not recognize your parser specification. Please specify either a symbol or a class."
82
+ end
83
+ end
84
+
85
+ # Parse an XML string into Ruby.
86
+ #
87
+ # <b>Options</b>
88
+ #
89
+ # <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
90
+ def parse(xml, options={})
91
+ xml.strip!
92
+ hash = typecast_xml_value(undasherize_keys(parser.parse(xml)))
93
+ hash = symbolize_keys(hash) if options[:symbolize_keys]
94
+ hash = {} if hash.nil?
95
+ hash
96
+ end
97
+
98
+ # This module decorates files with the <tt>original_filename</tt>
99
+ # and <tt>content_type</tt> methods.
100
+ module FileLike #:nodoc:
101
+ attr_writer :original_filename, :content_type
102
+
103
+ def original_filename
104
+ @original_filename || 'untitled'
105
+ end
106
+
107
+ def content_type
108
+ @content_type || 'application/octet-stream'
33
109
  end
34
110
  end
35
- end
36
111
 
37
- # Set the XML parser utilizing a symbol, string, or class.
38
- # Supported by default are:
39
- #
40
- # * <tt>:libxml</tt>
41
- # * <tt>:nokogiri</tt>
42
- # * <tt>:hpricot</tt>
43
- # * <tt>:rexml</tt>
44
- def engine=(new_engine)
45
- case new_engine
46
- when String, Symbol
47
- require "multi_xml/engines/#{new_engine}"
48
- @engine = MultiXml::Engines.const_get("#{new_engine.to_s.split('_').map{|s| s.capitalize}.join('')}")
49
- when Class
50
- @engine = new_engine
112
+ private
113
+
114
+ # TODO: Add support for other encodings
115
+ def self.parse_binary(binary, entity) #:nodoc:
116
+ case entity['encoding']
117
+ when 'base64'
118
+ Base64.decode64(binary)
51
119
  else
52
- raise "Did not recognize your engine specification. Please specify either a symbol or a class."
120
+ binary
121
+ end
53
122
  end
54
- end
55
123
 
56
- # Parse a XML string into Ruby.
57
- #
58
- # <b>Options</b>
59
- #
60
- # <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
61
- def parse(string, options = {})
62
- engine.parse(string, options)
63
- end
124
+ def self.parse_file(file, entity)
125
+ f = StringIO.new(Base64.decode64(file))
126
+ f.extend(FileLike)
127
+ f.original_filename = entity['name']
128
+ f.content_type = entity['content_type']
129
+ f
130
+ end
64
131
 
132
+ def symbolize_keys(hash)
133
+ hash.inject({}) do |result, (key, value)|
134
+ new_key = case key
135
+ when String
136
+ key.to_sym
137
+ else
138
+ key
139
+ end
140
+ new_value = case value
141
+ when Hash
142
+ symbolize_keys(value)
143
+ else
144
+ value
145
+ end
146
+ result[new_key] = new_value
147
+ result
148
+ end
149
+ end
150
+
151
+ def undasherize_keys(params)
152
+ case params
153
+ when Hash
154
+ params.inject({}) do |hash, (key, value)|
155
+ hash[key.to_s.tr('-', '_')] = undasherize_keys(value)
156
+ hash
157
+ end
158
+ when Array
159
+ params.map{|value| undasherize_keys(value)}
160
+ else
161
+ params
162
+ end
163
+ end
164
+
165
+ def wrap(object)
166
+ if object.nil?
167
+ []
168
+ elsif object.respond_to?(:to_ary)
169
+ object.to_ary
170
+ else
171
+ [object]
172
+ end
173
+ end
174
+
175
+ def typecast_xml_value(value)
176
+ case value
177
+ when Hash
178
+ if value['type'] == 'array'
179
+ _, entries = wrap(value.detect{|key, value| key != 'type'})
180
+ if entries.nil? || entries.strip == '' || (c = value[CONTENT_ROOT] && c.nil?)
181
+ []
182
+ else
183
+ case entries
184
+ when Array
185
+ entries.map{|value| typecast_xml_value(value)}
186
+ when Hash
187
+ [typecast_xml_value(entries)]
188
+ else
189
+ raise "can't typecast #{entries.class.name}: #{entries.inspect}"
190
+ end
191
+ end
192
+ elsif value.has_key?(CONTENT_ROOT)
193
+ content = value[CONTENT_ROOT]
194
+ if block = PARSING[value['type']]
195
+ block.arity == 1 ? block.call(content) : block.call(content, value)
196
+ else
197
+ content
198
+ end
199
+ elsif value['type'] == 'string' && value['nil'] != 'true'
200
+ ''
201
+ # blank or nil parsed values are represented by nil
202
+ elsif value.nil? || value.empty? || value['nil'] == 'true'
203
+ nil
204
+ # If the type is the only element which makes it then
205
+ # this still makes the value nil, except if type is
206
+ # a XML node(where type['value'] is a Hash)
207
+ elsif value['type'] && value.size == 1 && !value['type'].is_a?(::Hash)
208
+ nil
209
+ else
210
+ xml_value = value.inject({}) do |hash, (key, value)|
211
+ hash[key] = typecast_xml_value(value)
212
+ hash
213
+ end
214
+
215
+ # Turn {:files => {:file => #<StringIO>} into {:files => #<StringIO>} so it is compatible with
216
+ # how multipart uploaded files from HTML appear
217
+ xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value
218
+ end
219
+ when Array
220
+ value.map!{|i| typecast_xml_value(i)}
221
+ value.length > 1 ? value : value.first
222
+ when String
223
+ value
224
+ else
225
+ raise "can't typecast #{value.class.name}: #{value.inspect}"
226
+ end
227
+ end
228
+ end
65
229
  end
@@ -0,0 +1,79 @@
1
+ require 'libxml' unless defined?(LibXML)
2
+
3
+ module MultiXml
4
+ module Parsers
5
+ # Use LibXML to parse XML.
6
+ module Libxml #:nodoc:
7
+ extend self
8
+
9
+ # Parse an XML Document string or IO into a simple hash using libxml.
10
+ # data::
11
+ # XML Document string or IO to parse
12
+ def parse(data)
13
+ if !data.respond_to?(:read)
14
+ data = StringIO.new(data || '')
15
+ end
16
+
17
+ char = data.getc
18
+ if char.nil?
19
+ {}
20
+ else
21
+ data.ungetc(char)
22
+ LibXML::XML::Parser.io(data).parse.to_hash
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ module LibXML #:nodoc:
30
+ module Conversions #:nodoc:
31
+ module Document #:nodoc:
32
+ def to_hash
33
+ root.to_hash
34
+ end
35
+ end
36
+
37
+ module Node #:nodoc:
38
+ CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
39
+
40
+ # Convert XML document to hash
41
+ #
42
+ # hash::
43
+ # Hash to merge the converted element into.
44
+ def to_hash(hash={})
45
+ node_hash = {}
46
+
47
+ # Insert node hash into parent hash correctly.
48
+ case hash[name]
49
+ when Array then hash[name] << node_hash
50
+ when Hash then hash[name] = [hash[name], node_hash]
51
+ when nil then hash[name] = node_hash
52
+ end
53
+
54
+ # Handle child elements
55
+ each_child do |c|
56
+ if c.element?
57
+ c.to_hash(node_hash)
58
+ elsif c.text? || c.cdata?
59
+ node_hash[CONTENT_ROOT] ||= ''
60
+ node_hash[CONTENT_ROOT] << c.content
61
+ end
62
+ end
63
+
64
+ # Remove content node if it is blank
65
+ if node_hash.length > 1 && (node_hash[CONTENT_ROOT].nil? || node_hash[CONTENT_ROOT].empty?)
66
+ node_hash.delete(CONTENT_ROOT)
67
+ end
68
+
69
+ # Handle attributes
70
+ each_attr { |a| node_hash[a.name] = a.value }
71
+
72
+ hash
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
79
+ LibXML::XML::Node.send(:include, LibXML::Conversions::Node)