xmlhasher_with_attributes 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +19 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +84 -0
- data/Rakefile +10 -0
- data/benchmark/benchmark.rb +95 -0
- data/lib/xmlhasher.rb +25 -0
- data/lib/xmlhasher/configurable.rb +20 -0
- data/lib/xmlhasher/handler.rb +55 -0
- data/lib/xmlhasher/node.rb +39 -0
- data/lib/xmlhasher/parser.rb +22 -0
- data/lib/xmlhasher/util.rb +7 -0
- data/lib/xmlhasher/version.rb +3 -0
- data/test/fixtures/institution.xml +43 -0
- data/test/fixtures/institutions.xml +1 -0
- data/test/test_helper.rb +22 -0
- data/test/xmlhasher/parser_test.rb +250 -0
- data/test/xmlhasher/xmlhasher_test.rb +41 -0
- data/xmlhasher.gemspec +28 -0
- metadata +124 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 281d60e1c23ea6fa21cdc0435c782ac5929b4e9485af5f9c751dc63582d0db22
|
4
|
+
data.tar.gz: be498e7f638bf459709f228a8c04763c19680a973b951450b31a504336d79ca1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b0bac84ba10e88731588afb4229aa4f5277054324c578eb1487012aeaa3f9548d96d41d8df0970a673cd991ba2eb416f1c574c870170aa5a175f662bfa0b40af
|
7
|
+
data.tar.gz: 415fcbbdef80104bb05210e07141ce13ca557b87aa016c96c46c777f4d388ef4f0a7c2e81b9fd70768a216998d68ae871da96039d0d890631747ab75aafa58d3
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
language: ruby
|
2
|
+
bundler_args: "--without development"
|
3
|
+
before_install:
|
4
|
+
- gem install bundler
|
5
|
+
rvm:
|
6
|
+
- 1.8.7
|
7
|
+
- 1.9.2
|
8
|
+
- 1.9.3
|
9
|
+
- 2.0.0
|
10
|
+
- jruby-18mode
|
11
|
+
- jruby-19mode
|
12
|
+
- rbx
|
13
|
+
- rbx-19mode
|
14
|
+
- ree
|
15
|
+
env:
|
16
|
+
- JRUBY_OPTS="-Xcext.enabled=true"
|
17
|
+
script:
|
18
|
+
- gem build xmlhasher.gemspec
|
19
|
+
- gem install xmlhasher-*
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Gene Drabkin
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# XmlHasher_with_attributes
|
2
|
+
|
3
|
+
Fast XML to Ruby Hash converter
|
4
|
+
|
5
|
+
This gem is a fork of a fork of [XmlHasher](https://github.com/cloocher/xmlhasher) (through [pawelma](https://github.com/pawelma/xmlhasher)).
|
6
|
+
Regarding the original code, there's only one difference : it does not skip attributes if only content is provided.
|
7
|
+
I did the work to publish the gem on rubygems.org with a different name to make it available.
|
8
|
+
|
9
|
+
Example:
|
10
|
+
```ruby
|
11
|
+
XmlHasher.parse('<tag attribute="attr_val">content</tag>')
|
12
|
+
|
13
|
+
# In original xmlhasher above command will return following hash:
|
14
|
+
{
|
15
|
+
tag: "content"
|
16
|
+
}
|
17
|
+
|
18
|
+
# With xmlhasher_with_attributes hash will be equal:
|
19
|
+
{
|
20
|
+
tag: {
|
21
|
+
attribute: "attr_val",
|
22
|
+
value: "content"
|
23
|
+
}
|
24
|
+
}
|
25
|
+
```
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
* clone this repo
|
30
|
+
* run
|
31
|
+
```ruby
|
32
|
+
bundle install
|
33
|
+
rake install
|
34
|
+
```
|
35
|
+
|
36
|
+
* require
|
37
|
+
```ruby
|
38
|
+
require 'xmlhasher_with_attributes'
|
39
|
+
```
|
40
|
+
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
require 'xmlhasher_with_attributes'
|
45
|
+
|
46
|
+
# XmlHasher global configuration
|
47
|
+
#
|
48
|
+
# snakecase - convert all keys to snake case notation
|
49
|
+
# ignore_namespaces - remove XML namespaces
|
50
|
+
# string_keys - represent keys as Strings instead of Symbols
|
51
|
+
#
|
52
|
+
# here is default configuration
|
53
|
+
XmlHasher.configure do |config|
|
54
|
+
config.snakecase = true
|
55
|
+
config.ignore_namespaces = true
|
56
|
+
config.string_keys = false
|
57
|
+
end
|
58
|
+
|
59
|
+
# alternatively, specify configuration options when instantiating a Parser
|
60
|
+
parser = XmlHasher::Parser.new(
|
61
|
+
:snakecase => true,
|
62
|
+
:ignore_namespaces => true
|
63
|
+
:string_keys => false
|
64
|
+
)
|
65
|
+
|
66
|
+
# by default, XmlHasher will convert all keys to symbols. If you want all keys to be Strings, set :string_keys option to 'true'
|
67
|
+
|
68
|
+
# parse XML file
|
69
|
+
XmlHasher.parse(File.new('/path/to/my/file.xml'))
|
70
|
+
|
71
|
+
# parse XML string
|
72
|
+
XmlHasher.parse("<tag1><tag2>content</tag2></tag1>")
|
73
|
+
# => {:tag1=>{:tag2=>"content"}}
|
74
|
+
```
|
75
|
+
|
76
|
+
## Requirements
|
77
|
+
|
78
|
+
* Ruby 1.8.7 or higher
|
79
|
+
|
80
|
+
## Copyright
|
81
|
+
Copyright (c) 2013 Gene Drabkin.
|
82
|
+
See [LICENSE][] for details.
|
83
|
+
|
84
|
+
[license]: LICENSE.md
|
data/Rakefile
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
$:.push File.expand_path('../../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'xmlhasher'
|
5
|
+
begin
|
6
|
+
require 'nori'
|
7
|
+
rescue
|
8
|
+
puts "nori gem in not installed, run 'gem install nori'"
|
9
|
+
end
|
10
|
+
begin
|
11
|
+
require 'active_support/core_ext/hash/conversions'
|
12
|
+
rescue
|
13
|
+
puts "active_support gem in not installed, run 'gem install activesupport'"
|
14
|
+
end
|
15
|
+
begin
|
16
|
+
require 'xmlsimple'
|
17
|
+
rescue
|
18
|
+
puts "xmlsimple gem in not installed, run 'gem install xml-simple'"
|
19
|
+
end
|
20
|
+
begin
|
21
|
+
require 'nokogiri'
|
22
|
+
rescue
|
23
|
+
puts "nokogiri gem in not installed, run 'gem install nokogiri'"
|
24
|
+
end
|
25
|
+
begin
|
26
|
+
require 'libxml'
|
27
|
+
rescue
|
28
|
+
puts "libxml gem in not installed, run 'gem install libxml-ruby'"
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
runs = 100
|
33
|
+
xml = File.read(File.expand_path('../../test/fixtures/institution.xml', __FILE__))
|
34
|
+
puts 'Converting small xml from text to Hash:'
|
35
|
+
Benchmark.bm 10 do |x|
|
36
|
+
ActiveSupport::XmlMini.backend = ActiveSupport::XmlMini_REXML
|
37
|
+
x.report 'activesupport(rexml) ' do
|
38
|
+
runs.times { Hash.from_xml(xml) }
|
39
|
+
end
|
40
|
+
|
41
|
+
ActiveSupport::XmlMini.backend = 'LibXML'
|
42
|
+
x.report 'activesupport(libxml) ' do
|
43
|
+
runs.times { Hash.from_xml(xml) }
|
44
|
+
end
|
45
|
+
|
46
|
+
ActiveSupport::XmlMini.backend = 'Nokogiri'
|
47
|
+
x.report 'activesupport(nokogiri)' do
|
48
|
+
runs.times { Hash.from_xml(xml) }
|
49
|
+
end
|
50
|
+
|
51
|
+
x.report 'xmlsimple ' do
|
52
|
+
runs.times { XmlSimple.xml_in(xml) }
|
53
|
+
end
|
54
|
+
|
55
|
+
x.report 'nori ' do
|
56
|
+
runs.times { Nori.new(:advanced_typecasting => false).parse(xml) }
|
57
|
+
end
|
58
|
+
|
59
|
+
x.report 'xmlhasher ' do
|
60
|
+
runs.times { XmlHasher.parse(xml) }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
puts
|
65
|
+
runs = 5
|
66
|
+
path = File.expand_path('../../test/fixtures/institutions.xml', __FILE__)
|
67
|
+
puts 'Converting large xml from file to Hash:'
|
68
|
+
Benchmark.bm 5 do |x|
|
69
|
+
ActiveSupport::XmlMini.backend = ActiveSupport::XmlMini_REXML
|
70
|
+
x.report 'activesupport(rexml) ' do
|
71
|
+
runs.times { Hash.from_xml(File.new(path)) }
|
72
|
+
end
|
73
|
+
|
74
|
+
ActiveSupport::XmlMini.backend = 'LibXML'
|
75
|
+
x.report 'activesupport(libxml) ' do
|
76
|
+
#runs.times { Hash.from_xml(File.new(path)) } # Segmentation fault
|
77
|
+
end
|
78
|
+
|
79
|
+
ActiveSupport::XmlMini.backend = 'Nokogiri'
|
80
|
+
x.report 'activesupport(nokogiri)' do
|
81
|
+
runs.times { Hash.from_xml(File.new(path)) }
|
82
|
+
end
|
83
|
+
|
84
|
+
x.report 'xmlsimple ' do
|
85
|
+
runs.times { XmlSimple.xml_in(path) }
|
86
|
+
end
|
87
|
+
|
88
|
+
x.report 'nori ' do
|
89
|
+
runs.times { Nori.new(:advanced_typecasting => false).parse(File.new(path).read) } # Nori doesn't support reading from a stream, load the file in memory
|
90
|
+
end
|
91
|
+
|
92
|
+
x.report 'xmlhasher ' do
|
93
|
+
runs.times { XmlHasher.parse(File.new(path)) }
|
94
|
+
end
|
95
|
+
end
|
data/lib/xmlhasher.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'xmlhasher/configurable'
|
2
|
+
require 'xmlhasher/handler'
|
3
|
+
require 'xmlhasher/parser'
|
4
|
+
require 'xmlhasher/node'
|
5
|
+
require 'xmlhasher/util'
|
6
|
+
require 'xmlhasher/version'
|
7
|
+
|
8
|
+
module XmlHasher
|
9
|
+
class << self
|
10
|
+
include XmlHasher::Configurable
|
11
|
+
|
12
|
+
def parser
|
13
|
+
@parser ||= XmlHasher::Parser.new(options)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def method_missing(method_name, *args, &block)
|
19
|
+
return super unless parser.respond_to?(method_name)
|
20
|
+
parser.send(method_name, *args, &block)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module XmlHasher
|
2
|
+
module Configurable
|
3
|
+
|
4
|
+
attr_writer :snakecase, :ignore_namespaces, :string_keys
|
5
|
+
|
6
|
+
KEYS = [:snakecase, :ignore_namespaces, :string_keys]
|
7
|
+
|
8
|
+
def configure
|
9
|
+
yield self
|
10
|
+
self
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def options
|
16
|
+
XmlHasher::Configurable::KEYS.inject({}) { |hash, key| hash[key] = instance_variable_get(:"@#{key}"); hash }
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'ox'
|
2
|
+
require 'escape_utils'
|
3
|
+
|
4
|
+
module XmlHasher
|
5
|
+
class Handler < ::Ox::Sax
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
@stack = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_hash
|
12
|
+
@hash || {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def start_element(name)
|
16
|
+
@stack.push(Node.new(transform(name)))
|
17
|
+
end
|
18
|
+
|
19
|
+
def attr(name, value)
|
20
|
+
unless ignore_attribute?(name)
|
21
|
+
@stack.last.attributes[transform(name)] = escape(value) unless @stack.empty?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def text(value)
|
26
|
+
@stack.last.text = escape(value)
|
27
|
+
end
|
28
|
+
|
29
|
+
def end_element(name)
|
30
|
+
if @stack.size == 1
|
31
|
+
@hash = @stack.pop.to_hash
|
32
|
+
else
|
33
|
+
node = @stack.pop
|
34
|
+
@stack.last.children << node
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def transform(name)
|
41
|
+
name = name.to_s.split(':').last if @options[:ignore_namespaces]
|
42
|
+
name = Util.snakecase(name) if @options[:snakecase]
|
43
|
+
name = name.to_sym unless @options[:string_keys]
|
44
|
+
name
|
45
|
+
end
|
46
|
+
|
47
|
+
def escape(value)
|
48
|
+
EscapeUtils.unescape_html(value)
|
49
|
+
end
|
50
|
+
|
51
|
+
def ignore_attribute?(name)
|
52
|
+
@options[:ignore_namespaces] ? !name.to_s[/^(xmlns|xsi)/].nil? : false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module XmlHasher
|
2
|
+
class Node
|
3
|
+
attr_accessor :name, :attributes, :children, :text
|
4
|
+
|
5
|
+
def initialize(name)
|
6
|
+
@name = name
|
7
|
+
@attributes = {}
|
8
|
+
@children = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_hash
|
12
|
+
h = {}
|
13
|
+
if text
|
14
|
+
if clean_attributes.empty?
|
15
|
+
h[name] = text
|
16
|
+
else
|
17
|
+
h[name] = clean_attributes.merge(value: text)
|
18
|
+
end
|
19
|
+
else
|
20
|
+
h[name] = clean_attributes
|
21
|
+
if children.size == 1
|
22
|
+
child = children.first
|
23
|
+
h[name].merge!(child.to_hash)
|
24
|
+
else
|
25
|
+
h[name].merge!(children.group_by { |c| c.name }.inject({}) { |r, (k, v)| v.length == 1 ? r.merge!(v.first.to_hash) : r[k] = v.map { |c| c.to_hash[c.name] }; r })
|
26
|
+
end
|
27
|
+
end
|
28
|
+
h[name] = nil if h[name].empty?
|
29
|
+
h
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def clean_attributes
|
35
|
+
return @clean_attributes if defined? @clean_attributes
|
36
|
+
@clean_attributes = attributes.inject({}) { |r, (key, value)| r[key] = value if !value.nil? && !value.to_s.empty?; r }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlHasher
|
4
|
+
class Parser
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(xml)
|
11
|
+
handler = Handler.new(@options)
|
12
|
+
Ox.sax_parse(handler, convert(xml))
|
13
|
+
handler.to_hash
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def convert(xml)
|
19
|
+
xml.respond_to?(:read) || xml.respond_to?(:readpartial) ? xml : StringIO.new(xml)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|