xmlhasher_with_attributes 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +19 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +84 -0
- data/Rakefile +10 -0
- data/benchmark/benchmark.rb +95 -0
- data/lib/xmlhasher.rb +25 -0
- data/lib/xmlhasher/configurable.rb +20 -0
- data/lib/xmlhasher/handler.rb +55 -0
- data/lib/xmlhasher/node.rb +39 -0
- data/lib/xmlhasher/parser.rb +22 -0
- data/lib/xmlhasher/util.rb +7 -0
- data/lib/xmlhasher/version.rb +3 -0
- data/test/fixtures/institution.xml +43 -0
- data/test/fixtures/institutions.xml +1 -0
- data/test/test_helper.rb +22 -0
- data/test/xmlhasher/parser_test.rb +250 -0
- data/test/xmlhasher/xmlhasher_test.rb +41 -0
- data/xmlhasher.gemspec +28 -0
- metadata +124 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 281d60e1c23ea6fa21cdc0435c782ac5929b4e9485af5f9c751dc63582d0db22
|
4
|
+
data.tar.gz: be498e7f638bf459709f228a8c04763c19680a973b951450b31a504336d79ca1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b0bac84ba10e88731588afb4229aa4f5277054324c578eb1487012aeaa3f9548d96d41d8df0970a673cd991ba2eb416f1c574c870170aa5a175f662bfa0b40af
|
7
|
+
data.tar.gz: 415fcbbdef80104bb05210e07141ce13ca557b87aa016c96c46c777f4d388ef4f0a7c2e81b9fd70768a216998d68ae871da96039d0d890631747ab75aafa58d3
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
language: ruby
|
2
|
+
bundler_args: "--without development"
|
3
|
+
before_install:
|
4
|
+
- gem install bundler
|
5
|
+
rvm:
|
6
|
+
- 1.8.7
|
7
|
+
- 1.9.2
|
8
|
+
- 1.9.3
|
9
|
+
- 2.0.0
|
10
|
+
- jruby-18mode
|
11
|
+
- jruby-19mode
|
12
|
+
- rbx
|
13
|
+
- rbx-19mode
|
14
|
+
- ree
|
15
|
+
env:
|
16
|
+
- JRUBY_OPTS="-Xcext.enabled=true"
|
17
|
+
script:
|
18
|
+
- gem build xmlhasher.gemspec
|
19
|
+
- gem install xmlhasher-*
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Gene Drabkin
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# XmlHasher_with_attributes
|
2
|
+
|
3
|
+
Fast XML to Ruby Hash converter
|
4
|
+
|
5
|
+
This gem is a fork of a fork of [XmlHasher](https://github.com/cloocher/xmlhasher) (through [pawelma](https://github.com/pawelma/xmlhasher)).
|
6
|
+
Regarding the original code, there's only one difference : it does not skip attributes if only content is provided.
|
7
|
+
I did the work to publish the gem on rubygems.org with a different name to make it available.
|
8
|
+
|
9
|
+
Example:
|
10
|
+
```ruby
|
11
|
+
XmlHasher.parse('<tag attribute="attr_val">content</tag>')
|
12
|
+
|
13
|
+
# In original xmlhasher above command will return following hash:
|
14
|
+
{
|
15
|
+
tag: "content"
|
16
|
+
}
|
17
|
+
|
18
|
+
# With xmlhasher_with_attributes hash will be equal:
|
19
|
+
{
|
20
|
+
tag: {
|
21
|
+
attribute: "attr_val",
|
22
|
+
value: "content"
|
23
|
+
}
|
24
|
+
}
|
25
|
+
```
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
* clone this repo
|
30
|
+
* run
|
31
|
+
```ruby
|
32
|
+
bundle install
|
33
|
+
rake install
|
34
|
+
```
|
35
|
+
|
36
|
+
* require
|
37
|
+
```ruby
|
38
|
+
require 'xmlhasher_with_attributes'
|
39
|
+
```
|
40
|
+
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
require 'xmlhasher_with_attributes'
|
45
|
+
|
46
|
+
# XmlHasher global configuration
|
47
|
+
#
|
48
|
+
# snakecase - convert all keys to snake case notation
|
49
|
+
# ignore_namespaces - remove XML namespaces
|
50
|
+
# string_keys - represent keys as Strings instead of Symbols
|
51
|
+
#
|
52
|
+
# here is default configuration
|
53
|
+
XmlHasher.configure do |config|
|
54
|
+
config.snakecase = true
|
55
|
+
config.ignore_namespaces = true
|
56
|
+
config.string_keys = false
|
57
|
+
end
|
58
|
+
|
59
|
+
# alternatively, specify configuration options when instantiating a Parser
|
60
|
+
parser = XmlHasher::Parser.new(
|
61
|
+
:snakecase => true,
|
62
|
+
:ignore_namespaces => true
|
63
|
+
:string_keys => false
|
64
|
+
)
|
65
|
+
|
66
|
+
# by default, XmlHasher will convert all keys to symbols. If you want all keys to be Strings, set :string_keys option to 'true'
|
67
|
+
|
68
|
+
# parse XML file
|
69
|
+
XmlHasher.parse(File.new('/path/to/my/file.xml'))
|
70
|
+
|
71
|
+
# parse XML string
|
72
|
+
XmlHasher.parse("<tag1><tag2>content</tag2></tag1>")
|
73
|
+
# => {:tag1=>{:tag2=>"content"}}
|
74
|
+
```
|
75
|
+
|
76
|
+
## Requirements
|
77
|
+
|
78
|
+
* Ruby 1.8.7 or higher
|
79
|
+
|
80
|
+
## Copyright
|
81
|
+
Copyright (c) 2013 Gene Drabkin.
|
82
|
+
See [LICENSE][] for details.
|
83
|
+
|
84
|
+
[license]: LICENSE.md
|
data/Rakefile
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
$:.push File.expand_path('../../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'xmlhasher'
|
5
|
+
begin
|
6
|
+
require 'nori'
|
7
|
+
rescue
|
8
|
+
puts "nori gem in not installed, run 'gem install nori'"
|
9
|
+
end
|
10
|
+
begin
|
11
|
+
require 'active_support/core_ext/hash/conversions'
|
12
|
+
rescue
|
13
|
+
puts "active_support gem in not installed, run 'gem install activesupport'"
|
14
|
+
end
|
15
|
+
begin
|
16
|
+
require 'xmlsimple'
|
17
|
+
rescue
|
18
|
+
puts "xmlsimple gem in not installed, run 'gem install xml-simple'"
|
19
|
+
end
|
20
|
+
begin
|
21
|
+
require 'nokogiri'
|
22
|
+
rescue
|
23
|
+
puts "nokogiri gem in not installed, run 'gem install nokogiri'"
|
24
|
+
end
|
25
|
+
begin
|
26
|
+
require 'libxml'
|
27
|
+
rescue
|
28
|
+
puts "libxml gem in not installed, run 'gem install libxml-ruby'"
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
runs = 100
|
33
|
+
xml = File.read(File.expand_path('../../test/fixtures/institution.xml', __FILE__))
|
34
|
+
puts 'Converting small xml from text to Hash:'
|
35
|
+
Benchmark.bm 10 do |x|
|
36
|
+
ActiveSupport::XmlMini.backend = ActiveSupport::XmlMini_REXML
|
37
|
+
x.report 'activesupport(rexml) ' do
|
38
|
+
runs.times { Hash.from_xml(xml) }
|
39
|
+
end
|
40
|
+
|
41
|
+
ActiveSupport::XmlMini.backend = 'LibXML'
|
42
|
+
x.report 'activesupport(libxml) ' do
|
43
|
+
runs.times { Hash.from_xml(xml) }
|
44
|
+
end
|
45
|
+
|
46
|
+
ActiveSupport::XmlMini.backend = 'Nokogiri'
|
47
|
+
x.report 'activesupport(nokogiri)' do
|
48
|
+
runs.times { Hash.from_xml(xml) }
|
49
|
+
end
|
50
|
+
|
51
|
+
x.report 'xmlsimple ' do
|
52
|
+
runs.times { XmlSimple.xml_in(xml) }
|
53
|
+
end
|
54
|
+
|
55
|
+
x.report 'nori ' do
|
56
|
+
runs.times { Nori.new(:advanced_typecasting => false).parse(xml) }
|
57
|
+
end
|
58
|
+
|
59
|
+
x.report 'xmlhasher ' do
|
60
|
+
runs.times { XmlHasher.parse(xml) }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
puts
|
65
|
+
runs = 5
|
66
|
+
path = File.expand_path('../../test/fixtures/institutions.xml', __FILE__)
|
67
|
+
puts 'Converting large xml from file to Hash:'
|
68
|
+
Benchmark.bm 5 do |x|
|
69
|
+
ActiveSupport::XmlMini.backend = ActiveSupport::XmlMini_REXML
|
70
|
+
x.report 'activesupport(rexml) ' do
|
71
|
+
runs.times { Hash.from_xml(File.new(path)) }
|
72
|
+
end
|
73
|
+
|
74
|
+
ActiveSupport::XmlMini.backend = 'LibXML'
|
75
|
+
x.report 'activesupport(libxml) ' do
|
76
|
+
#runs.times { Hash.from_xml(File.new(path)) } # Segmentation fault
|
77
|
+
end
|
78
|
+
|
79
|
+
ActiveSupport::XmlMini.backend = 'Nokogiri'
|
80
|
+
x.report 'activesupport(nokogiri)' do
|
81
|
+
runs.times { Hash.from_xml(File.new(path)) }
|
82
|
+
end
|
83
|
+
|
84
|
+
x.report 'xmlsimple ' do
|
85
|
+
runs.times { XmlSimple.xml_in(path) }
|
86
|
+
end
|
87
|
+
|
88
|
+
x.report 'nori ' do
|
89
|
+
runs.times { Nori.new(:advanced_typecasting => false).parse(File.new(path).read) } # Nori doesn't support reading from a stream, load the file in memory
|
90
|
+
end
|
91
|
+
|
92
|
+
x.report 'xmlhasher ' do
|
93
|
+
runs.times { XmlHasher.parse(File.new(path)) }
|
94
|
+
end
|
95
|
+
end
|
data/lib/xmlhasher.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'xmlhasher/configurable'
|
2
|
+
require 'xmlhasher/handler'
|
3
|
+
require 'xmlhasher/parser'
|
4
|
+
require 'xmlhasher/node'
|
5
|
+
require 'xmlhasher/util'
|
6
|
+
require 'xmlhasher/version'
|
7
|
+
|
8
|
+
module XmlHasher
|
9
|
+
class << self
|
10
|
+
include XmlHasher::Configurable
|
11
|
+
|
12
|
+
def parser
|
13
|
+
@parser ||= XmlHasher::Parser.new(options)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def method_missing(method_name, *args, &block)
|
19
|
+
return super unless parser.respond_to?(method_name)
|
20
|
+
parser.send(method_name, *args, &block)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module XmlHasher
|
2
|
+
module Configurable
|
3
|
+
|
4
|
+
attr_writer :snakecase, :ignore_namespaces, :string_keys
|
5
|
+
|
6
|
+
KEYS = [:snakecase, :ignore_namespaces, :string_keys]
|
7
|
+
|
8
|
+
def configure
|
9
|
+
yield self
|
10
|
+
self
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def options
|
16
|
+
XmlHasher::Configurable::KEYS.inject({}) { |hash, key| hash[key] = instance_variable_get(:"@#{key}"); hash }
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'ox'
|
2
|
+
require 'escape_utils'
|
3
|
+
|
4
|
+
module XmlHasher
|
5
|
+
class Handler < ::Ox::Sax
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
@stack = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_hash
|
12
|
+
@hash || {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def start_element(name)
|
16
|
+
@stack.push(Node.new(transform(name)))
|
17
|
+
end
|
18
|
+
|
19
|
+
def attr(name, value)
|
20
|
+
unless ignore_attribute?(name)
|
21
|
+
@stack.last.attributes[transform(name)] = escape(value) unless @stack.empty?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def text(value)
|
26
|
+
@stack.last.text = escape(value)
|
27
|
+
end
|
28
|
+
|
29
|
+
def end_element(name)
|
30
|
+
if @stack.size == 1
|
31
|
+
@hash = @stack.pop.to_hash
|
32
|
+
else
|
33
|
+
node = @stack.pop
|
34
|
+
@stack.last.children << node
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def transform(name)
|
41
|
+
name = name.to_s.split(':').last if @options[:ignore_namespaces]
|
42
|
+
name = Util.snakecase(name) if @options[:snakecase]
|
43
|
+
name = name.to_sym unless @options[:string_keys]
|
44
|
+
name
|
45
|
+
end
|
46
|
+
|
47
|
+
def escape(value)
|
48
|
+
EscapeUtils.unescape_html(value)
|
49
|
+
end
|
50
|
+
|
51
|
+
def ignore_attribute?(name)
|
52
|
+
@options[:ignore_namespaces] ? !name.to_s[/^(xmlns|xsi)/].nil? : false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module XmlHasher
|
2
|
+
class Node
|
3
|
+
attr_accessor :name, :attributes, :children, :text
|
4
|
+
|
5
|
+
def initialize(name)
|
6
|
+
@name = name
|
7
|
+
@attributes = {}
|
8
|
+
@children = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_hash
|
12
|
+
h = {}
|
13
|
+
if text
|
14
|
+
if clean_attributes.empty?
|
15
|
+
h[name] = text
|
16
|
+
else
|
17
|
+
h[name] = clean_attributes.merge(value: text)
|
18
|
+
end
|
19
|
+
else
|
20
|
+
h[name] = clean_attributes
|
21
|
+
if children.size == 1
|
22
|
+
child = children.first
|
23
|
+
h[name].merge!(child.to_hash)
|
24
|
+
else
|
25
|
+
h[name].merge!(children.group_by { |c| c.name }.inject({}) { |r, (k, v)| v.length == 1 ? r.merge!(v.first.to_hash) : r[k] = v.map { |c| c.to_hash[c.name] }; r })
|
26
|
+
end
|
27
|
+
end
|
28
|
+
h[name] = nil if h[name].empty?
|
29
|
+
h
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def clean_attributes
|
35
|
+
return @clean_attributes if defined? @clean_attributes
|
36
|
+
@clean_attributes = attributes.inject({}) { |r, (key, value)| r[key] = value if !value.nil? && !value.to_s.empty?; r }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module XmlHasher
|
4
|
+
class Parser
|
5
|
+
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(xml)
|
11
|
+
handler = Handler.new(@options)
|
12
|
+
Ox.sax_parse(handler, convert(xml))
|
13
|
+
handler.to_hash
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def convert(xml)
|
19
|
+
xml.respond_to?(:read) || xml.respond_to?(:readpartial) ? xml : StringIO.new(xml)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|