multi_xml 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of multi_xml might be problematic. Click here for more details.

@@ -1,6 +1,5 @@
1
1
  rvm:
2
2
  - 1.8.7
3
- - 1.9.1
4
3
  - 1.9.2
5
4
  - jruby
6
5
  - rbx
data/Gemfile CHANGED
@@ -1,9 +1,9 @@
1
1
  source 'http://rubygems.org'
2
2
 
3
3
  group :development, :test do
4
- gem 'libxml-ruby', "~> 2.0", :require => nil, :platforms => :mri
5
- gem 'nokogiri', "~> 1.4", :require => nil
6
- gem 'ox', ["~> 1.2", ">= 1.2.15"], :require => nil
4
+ gem 'libxml-ruby', '~> 2.0', :require => nil, :platforms => :mri
5
+ gem 'nokogiri', '~> 1.4', :require => nil
6
+ gem 'ox', '~> 1.3', :require => nil
7
7
  end
8
8
 
9
9
  gemspec
data/README.md CHANGED
@@ -8,7 +8,7 @@ A generic swappable back-end for XML parsing
8
8
  [http://rdoc.info/gems/multi_xml](http://rdoc.info/gems/multi_xml)
9
9
 
10
10
  ## <a name="ci">Continuous Integration</a>
11
- [![Build Status](https://travis-ci.org/sferik/multi_xml.png)](http://travis-ci.org/sferik/multi_xml)
11
+ [![Build Status](https://secure.travis-ci.org/sferik/multi_xml.png)](http://travis-ci.org/sferik/multi_xml)
12
12
 
13
13
  ## <a name="examples">Usage Examples</a>
14
14
  Lots of Ruby libraries utilize XML parsing in some form, and everyone has their
@@ -26,6 +26,10 @@ library. You use it like so:
26
26
  MultiXml.parser = MultiXml::Parsers::Nokogiri # Same as above
27
27
  MultiXml.parse('<tag>This is the contents</tag>') # Parsed using Nokogiri
28
28
 
29
+ MultiXml.parser = :ox
30
+ MultiXml.parser = MultiXml::Parsers::Ox # Same as above
31
+ MultiXml.parse('<tag>This is the contents</tag>') # Parsed using Ox
32
+
29
33
  MultiXml.parser = :rexml
30
34
  MultiXml.parser = MultiXml::Parsers::Rexml # Same as above
31
35
  MultiXml.parse('<tag>This is the contents</tag>') # Parsed using REXML
@@ -35,7 +39,7 @@ responds to <tt>.parse</tt> at the class level.
35
39
 
36
40
  MultiXML tries to have intelligent defaulting. That is, if you have any of the supported parsers
37
41
  already loaded, it will utilize them before attempting to load any. When loading, libraries are
38
- ordered by speed: first LibXML, then Nokogiri, then REXML.
42
+ ordered by speed: first LibXML, then Nokogiri, then Ox, then REXML.
39
43
 
40
44
  ## <a name="contributing">Contributing</a>
41
45
  In the spirit of [free
@@ -56,6 +56,7 @@ module MultiXml
56
56
  def default_parser
57
57
  return :libxml if defined?(::LibXML)
58
58
  return :nokogiri if defined?(::Nokogiri)
59
+ return :ox if defined?(::Ox)
59
60
 
60
61
  REQUIREMENT_MAP.each do |(library, parser)|
61
62
  begin
@@ -95,20 +96,14 @@ module MultiXml
95
96
  xml ||= ''
96
97
 
97
98
  xml.strip! if xml.respond_to?(:strip!)
98
-
99
99
  begin
100
- if parser.respond_to?(:string_parser?) and parser.string_parser?
101
- raw_hash = parser.parse(xml)
102
- else
103
- xml = StringIO.new(xml) unless xml.respond_to?(:read)
100
+ xml = StringIO.new(xml) unless xml.respond_to?(:read)
104
101
 
105
- char = xml.getc
106
- return {} if char.nil?
107
- xml.ungetc(char)
102
+ char = xml.getc
103
+ return {} if char.nil?
104
+ xml.ungetc(char)
108
105
 
109
- raw_hash = parser.parse(xml)
110
- end
111
- hash = typecast_xml_value(undasherize_keys(raw_hash)) || {}
106
+ hash = typecast_xml_value(undasherize_keys(parser.parse(xml))) || {}
112
107
  rescue parser.parse_error => error
113
108
  raise ParseError, error.to_s, error.backtrace
114
109
  end
@@ -1,5 +1,23 @@
1
1
  require 'ox' unless defined?(Ox)
2
2
 
3
+ # Each MultiXml parser is expected to parse an XML document into a Hash. The
4
+ # conversion rules are:
5
+ #
6
+ # - Each document starts out as an empty Hash.
7
+ #
8
+ # - Reading an element created an entry in the parent Hash that has a key of
9
+ # the element name and a value of a Hash with attributes as key value
10
+ # pairs. Children are added as described by this rule.
11
+ #
12
+ # - Text and CDATE is stored in the parent element Hash with a key of
13
+ # '__content__' and a value of the text itself.
14
+ #
15
+ # - If a key already exists in the Hash then the value associated with the key
16
+ # is converted to an Array with the old and new value in it.
17
+ #
18
+ # - Other elements such as the xml prolog, doctype, and comments are ignored.
19
+ #
20
+
3
21
  module MultiXml
4
22
  module Parsers
5
23
  module Ox #:nodoc:
@@ -10,40 +28,70 @@ module MultiXml
10
28
  Exception
11
29
  end
12
30
 
13
- def parse(xml)
14
- doc = ::Ox.parse(xml)
15
- h = { }
16
- element_to_hash(doc, h) unless doc.nil?
17
- h
31
+ def parse(io)
32
+ handler = Handler.new
33
+ ::Ox.sax_parse(handler, io, :convert_special => true)
34
+ handler.doc
18
35
  end
19
36
 
20
- def element_to_hash(e, h)
21
- content = { }
22
- e.attributes.each do |k,v|
23
- content[k.to_s] = v
24
- end
25
- e.nodes.each do |n|
26
- if n.is_a?(::Ox::Element)
27
- element_to_hash(n, content)
28
- elsif n.is_a?(String)
29
- content['__content__'] = n
30
- elsif n.is_a?(::Ox::Node)
31
- content['__content__'] = n.value
37
+ class Handler
38
+ attr_accessor :stack
39
+
40
+ def initialize()
41
+ @stack = []
42
+ end
43
+
44
+ def doc
45
+ @stack[0]
46
+ end
47
+
48
+ def attr(name, value)
49
+ unless @stack.empty?
50
+ append(name, value)
32
51
  end
33
52
  end
34
- if (ex = h[e.name]).nil?
35
- h[e.name] = content
36
- elsif ex.is_a?(Array)
37
- ex << content
38
- else
39
- h[e.name] = [ex, content]
53
+
54
+ def text(value)
55
+ append('__content__', value)
56
+ end
57
+
58
+ def cdata(value)
59
+ append('__content__', value)
60
+ end
61
+
62
+ def start_element(name)
63
+ if @stack.empty?
64
+ @stack.push(Hash.new)
65
+ end
66
+ h = Hash.new
67
+ append(name, h)
68
+ @stack.push(h)
69
+ end
70
+
71
+ def end_element(name)
72
+ @stack.pop()
73
+ end
74
+
75
+ def error(message, line, column)
76
+ raise Exception.new("#{message} at #{line}:#{column}")
77
+ end
78
+
79
+ def append(key, value)
80
+ key = key.to_s
81
+ h = @stack.last
82
+ if h.has_key?(key)
83
+ v = h[key]
84
+ if v.is_a?(Array)
85
+ v << value
86
+ else
87
+ h[key] = [v, value]
88
+ end
89
+ else
90
+ h[key] = value
91
+ end
40
92
  end
41
- end
42
-
43
- def string_parser?
44
- true
45
- end
46
93
 
47
- end
48
- end
49
- end
94
+ end # Handler
95
+ end # Ox
96
+ end # Parsers
97
+ end # MultiXml
@@ -1,3 +1,3 @@
1
1
  module MultiXml
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
@@ -2,14 +2,16 @@
2
2
  require File.expand_path('../lib/multi_xml/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
+ gem.add_development_dependency 'libxml-ruby', '~> 2.0' unless RUBY_PLATFORM == 'java'
5
6
  gem.add_development_dependency 'nokogiri', '~> 1.4'
7
+ gem.add_development_dependency 'ox', '~> 1.3'
6
8
  gem.add_development_dependency 'rake', '~> 0.9'
7
9
  gem.add_development_dependency 'rdiscount', '~> 1.6'
8
10
  gem.add_development_dependency 'rspec', '~> 2.6'
9
11
  gem.add_development_dependency 'simplecov', '~> 0.4'
10
12
  gem.add_development_dependency 'yard', '~> 0.7'
11
13
  gem.author = "Erik Michaels-Ober"
12
- gem.description = %q{A gem to provide swappable XML backends utilizing LibXML, Nokogiri, or REXML.}
14
+ gem.description = %q{A gem to provide swappable XML backends utilizing LibXML, Nokogiri, Ox, or REXML.}
13
15
  gem.email = 'sferik@gmail.com'
14
16
  gem.files = `git ls-files`.split("\n")
15
17
  gem.homepage = 'https://github.com/sferik/multi_xml'
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby -wW1
2
+
3
+ $: << '.'
4
+ $: << '../lib'
5
+
6
+ if __FILE__ == $0
7
+ while (i = ARGV.index('-I'))
8
+ x,path = ARGV.slice!(i, 2)
9
+ $: << path
10
+ end
11
+ end
12
+
13
+ require 'optparse'
14
+ require 'stringio'
15
+ require 'multi_xml'
16
+
17
+ begin
18
+ require 'libxml'
19
+ rescue Exception => e
20
+ end
21
+ begin
22
+ require 'nokogiri'
23
+ rescue Exception => e
24
+ end
25
+ begin
26
+ require 'ox'
27
+ rescue Exception => e
28
+ end
29
+
30
+ $verbose = 0
31
+ $parsers = []
32
+ $iter = 10
33
+
34
+ opts = OptionParser.new
35
+ opts.on("-v", "increase verbosity") { $verbose += 1 }
36
+ opts.on("-p", "--parser [String]", String, "parser to test") { |p| $parsers = [p] }
37
+ opts.on("-i", "--iterations [Int]", Integer, "iterations") { |i| $iter = i }
38
+ opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
39
+ files = opts.parse(ARGV)
40
+
41
+ if $parsers.empty?
42
+ $parsers << 'libxml' if defined?(::LibXML)
43
+ $parsers << 'nokogiri' if defined?(::Nokogiri)
44
+ $parsers << 'ox' if defined?(::Ox)
45
+ end
46
+
47
+ files.each do |filename|
48
+ times = { }
49
+ xml = File.read(filename)
50
+ $parsers.each do |p|
51
+ MultiXml.parser = p
52
+ start = Time.now
53
+ $iter.times do |i|
54
+ io = StringIO.new(xml)
55
+ MultiXml.parse(io)
56
+ end
57
+ dt = Time.now - start
58
+ times[p] = Time.now - start
59
+ end
60
+ times.each do |p,t|
61
+ puts "%8s took %0.3f seconds to parse %s %d times." % [p, t, filename, $iter]
62
+ end
63
+ end
metadata CHANGED
@@ -1,89 +1,151 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: multi_xml
3
- version: !ruby/object:Gem::Version
4
- version: 0.4.0
3
+ version: !ruby/object:Gem::Version
4
+ hash: 13
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 4
9
+ - 1
10
+ version: 0.4.1
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Erik Michaels-Ober
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2011-09-06 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
17
+
18
+ date: 2011-09-26 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: libxml-ruby
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 2
31
+ - 0
32
+ version: "2.0"
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
15
36
  name: nokogiri
16
- requirement: &70174632551180 !ruby/object:Gem::Requirement
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
17
39
  none: false
18
- requirements:
40
+ requirements:
19
41
  - - ~>
20
- - !ruby/object:Gem::Version
21
- version: '1.4'
42
+ - !ruby/object:Gem::Version
43
+ hash: 7
44
+ segments:
45
+ - 1
46
+ - 4
47
+ version: "1.4"
22
48
  type: :development
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: ox
23
52
  prerelease: false
24
- version_requirements: *70174632551180
25
- - !ruby/object:Gem::Dependency
26
- name: rake
27
- requirement: &70174632547160 !ruby/object:Gem::Requirement
53
+ requirement: &id003 !ruby/object:Gem::Requirement
28
54
  none: false
29
- requirements:
55
+ requirements:
30
56
  - - ~>
31
- - !ruby/object:Gem::Version
32
- version: '0.9'
57
+ - !ruby/object:Gem::Version
58
+ hash: 9
59
+ segments:
60
+ - 1
61
+ - 3
62
+ version: "1.3"
33
63
  type: :development
64
+ version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: rake
34
67
  prerelease: false
35
- version_requirements: *70174632547160
36
- - !ruby/object:Gem::Dependency
37
- name: rdiscount
38
- requirement: &70174632529880 !ruby/object:Gem::Requirement
68
+ requirement: &id004 !ruby/object:Gem::Requirement
39
69
  none: false
40
- requirements:
70
+ requirements:
41
71
  - - ~>
42
- - !ruby/object:Gem::Version
43
- version: '1.6'
72
+ - !ruby/object:Gem::Version
73
+ hash: 25
74
+ segments:
75
+ - 0
76
+ - 9
77
+ version: "0.9"
44
78
  type: :development
79
+ version_requirements: *id004
80
+ - !ruby/object:Gem::Dependency
81
+ name: rdiscount
45
82
  prerelease: false
46
- version_requirements: *70174632529880
47
- - !ruby/object:Gem::Dependency
48
- name: rspec
49
- requirement: &70174632527760 !ruby/object:Gem::Requirement
83
+ requirement: &id005 !ruby/object:Gem::Requirement
50
84
  none: false
51
- requirements:
85
+ requirements:
52
86
  - - ~>
53
- - !ruby/object:Gem::Version
54
- version: '2.6'
87
+ - !ruby/object:Gem::Version
88
+ hash: 3
89
+ segments:
90
+ - 1
91
+ - 6
92
+ version: "1.6"
55
93
  type: :development
94
+ version_requirements: *id005
95
+ - !ruby/object:Gem::Dependency
96
+ name: rspec
56
97
  prerelease: false
57
- version_requirements: *70174632527760
58
- - !ruby/object:Gem::Dependency
59
- name: simplecov
60
- requirement: &70174632526060 !ruby/object:Gem::Requirement
98
+ requirement: &id006 !ruby/object:Gem::Requirement
61
99
  none: false
62
- requirements:
100
+ requirements:
63
101
  - - ~>
64
- - !ruby/object:Gem::Version
65
- version: '0.4'
102
+ - !ruby/object:Gem::Version
103
+ hash: 15
104
+ segments:
105
+ - 2
106
+ - 6
107
+ version: "2.6"
66
108
  type: :development
109
+ version_requirements: *id006
110
+ - !ruby/object:Gem::Dependency
111
+ name: simplecov
67
112
  prerelease: false
68
- version_requirements: *70174632526060
69
- - !ruby/object:Gem::Dependency
70
- name: yard
71
- requirement: &70174632525380 !ruby/object:Gem::Requirement
113
+ requirement: &id007 !ruby/object:Gem::Requirement
72
114
  none: false
73
- requirements:
115
+ requirements:
74
116
  - - ~>
75
- - !ruby/object:Gem::Version
76
- version: '0.7'
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ - 4
122
+ version: "0.4"
77
123
  type: :development
124
+ version_requirements: *id007
125
+ - !ruby/object:Gem::Dependency
126
+ name: yard
78
127
  prerelease: false
79
- version_requirements: *70174632525380
80
- description: A gem to provide swappable XML backends utilizing LibXML, Nokogiri, or
81
- REXML.
128
+ requirement: &id008 !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ hash: 5
134
+ segments:
135
+ - 0
136
+ - 7
137
+ version: "0.7"
138
+ type: :development
139
+ version_requirements: *id008
140
+ description: A gem to provide swappable XML backends utilizing LibXML, Nokogiri, Ox, or REXML.
82
141
  email: sferik@gmail.com
83
142
  executables: []
143
+
84
144
  extensions: []
145
+
85
146
  extra_rdoc_files: []
86
- files:
147
+
148
+ files:
87
149
  - .gemtest
88
150
  - .gitignore
89
151
  - .rspec
@@ -104,31 +166,42 @@ files:
104
166
  - spec/helper.rb
105
167
  - spec/multi_xml_spec.rb
106
168
  - spec/parser_shared_example.rb
169
+ - spec/speed.rb
107
170
  homepage: https://github.com/sferik/multi_xml
108
171
  licenses: []
172
+
109
173
  post_install_message:
110
174
  rdoc_options: []
111
- require_paths:
175
+
176
+ require_paths:
112
177
  - lib
113
- required_ruby_version: !ruby/object:Gem::Requirement
178
+ required_ruby_version: !ruby/object:Gem::Requirement
114
179
  none: false
115
- requirements:
116
- - - ! '>='
117
- - !ruby/object:Gem::Version
118
- version: '0'
119
- required_rubygems_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ hash: 3
184
+ segments:
185
+ - 0
186
+ version: "0"
187
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
188
  none: false
121
- requirements:
122
- - - ! '>='
123
- - !ruby/object:Gem::Version
124
- version: '0'
189
+ requirements:
190
+ - - ">="
191
+ - !ruby/object:Gem::Version
192
+ hash: 3
193
+ segments:
194
+ - 0
195
+ version: "0"
125
196
  requirements: []
197
+
126
198
  rubyforge_project:
127
199
  rubygems_version: 1.8.10
128
200
  signing_key:
129
201
  specification_version: 3
130
202
  summary: A generic swappable back-end for XML parsing
131
- test_files:
203
+ test_files:
132
204
  - spec/helper.rb
133
205
  - spec/multi_xml_spec.rb
134
206
  - spec/parser_shared_example.rb
207
+ - spec/speed.rb