xmlhasher_with_attributes 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 281d60e1c23ea6fa21cdc0435c782ac5929b4e9485af5f9c751dc63582d0db22
4
+ data.tar.gz: be498e7f638bf459709f228a8c04763c19680a973b951450b31a504336d79ca1
5
+ SHA512:
6
+ metadata.gz: b0bac84ba10e88731588afb4229aa4f5277054324c578eb1487012aeaa3f9548d96d41d8df0970a673cd991ba2eb416f1c574c870170aa5a175f662bfa0b40af
7
+ data.tar.gz: 415fcbbdef80104bb05210e07141ce13ca557b87aa016c96c46c777f4d388ef4f0a7c2e81b9fd70768a216998d68ae871da96039d0d890631747ab75aafa58d3
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ /.idea
2
+ *.gem
3
+ *.rbc
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ language: ruby
2
+ bundler_args: "--without development"
3
+ before_install:
4
+ - gem install bundler
5
+ rvm:
6
+ - 1.8.7
7
+ - 1.9.2
8
+ - 1.9.3
9
+ - 2.0.0
10
+ - jruby-18mode
11
+ - jruby-19mode
12
+ - rbx
13
+ - rbx-19mode
14
+ - ree
15
+ env:
16
+ - JRUBY_OPTS="-Xcext.enabled=true"
17
+ script:
18
+ - gem build xmlhasher.gemspec
19
+ - gem install xmlhasher-*
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ gem 'rake'
6
+
7
+ group :test do
8
+ gem 'minitest'
9
+ gem 'test-unit'
10
+ gem 'simplecov', :require => false
11
+ gem 'coveralls', :require => false
12
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Gene Drabkin
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # XmlHasher_with_attributes
2
+
3
+ Fast XML to Ruby Hash converter
4
+
5
+ This gem is a fork of a fork of [XmlHasher](https://github.com/cloocher/xmlhasher) (through [pawelma](https://github.com/pawelma/xmlhasher)).
6
+ Regarding the original code, there's only one difference : it does not skip attributes if only content is provided.
7
+ I did the work to publish the gem on rubygems.org with a different name to make it available.
8
+
9
+ Example:
10
+ ```ruby
11
+ XmlHasher.parse('<tag attribute="attr_val">content</tag>')
12
+
13
+ # In original xmlhasher above command will return following hash:
14
+ {
15
+ tag: "content"
16
+ }
17
+
18
+ # With xmlhasher_with_attributes hash will be equal:
19
+ {
20
+ tag: {
21
+ attribute: "attr_val",
22
+ value: "content"
23
+ }
24
+ }
25
+ ```
26
+
27
+ ## Installation
28
+
29
+ * clone this repo
30
+ * run
31
+ ```ruby
32
+ bundle install
33
+ rake install
34
+ ```
35
+
36
+ * require
37
+ ```ruby
38
+ require 'xmlhasher_with_attributes'
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ```ruby
44
+ require 'xmlhasher_with_attributes'
45
+
46
+ # XmlHasher global configuration
47
+ #
48
+ # snakecase - convert all keys to snake case notation
49
+ # ignore_namespaces - remove XML namespaces
50
+ # string_keys - represent keys as Strings instead of Symbols
51
+ #
52
+ # here is default configuration
53
+ XmlHasher.configure do |config|
54
+ config.snakecase = true
55
+ config.ignore_namespaces = true
56
+ config.string_keys = false
57
+ end
58
+
59
+ # alternatively, specify configuration options when instantiating a Parser
60
+ parser = XmlHasher::Parser.new(
61
+ :snakecase => true,
62
+ :ignore_namespaces => true
63
+ :string_keys => false
64
+ )
65
+
66
+ # by default, XmlHasher will convert all keys to symbols. If you want all keys to be Strings, set :string_keys option to 'true'
67
+
68
+ # parse XML file
69
+ XmlHasher.parse(File.new('/path/to/my/file.xml'))
70
+
71
+ # parse XML string
72
+ XmlHasher.parse("<tag1><tag2>content</tag2></tag1>")
73
+ # => {:tag1=>{:tag2=>"content"}}
74
+ ```
75
+
76
+ ## Requirements
77
+
78
+ * Ruby 1.8.7 or higher
79
+
80
+ ## Copyright
81
+ Copyright (c) 2013 Gene Drabkin.
82
+ See [LICENSE][] for details.
83
+
84
+ [license]: LICENSE.md
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'lib' << 'test'
6
+ t.pattern = 'test/*/*_test.rb'
7
+ t.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,95 @@
1
+ $:.push File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'xmlhasher'
5
+ begin
6
+ require 'nori'
7
+ rescue
8
+ puts "nori gem in not installed, run 'gem install nori'"
9
+ end
10
+ begin
11
+ require 'active_support/core_ext/hash/conversions'
12
+ rescue
13
+ puts "active_support gem in not installed, run 'gem install activesupport'"
14
+ end
15
+ begin
16
+ require 'xmlsimple'
17
+ rescue
18
+ puts "xmlsimple gem in not installed, run 'gem install xml-simple'"
19
+ end
20
+ begin
21
+ require 'nokogiri'
22
+ rescue
23
+ puts "nokogiri gem in not installed, run 'gem install nokogiri'"
24
+ end
25
+ begin
26
+ require 'libxml'
27
+ rescue
28
+ puts "libxml gem in not installed, run 'gem install libxml-ruby'"
29
+ end
30
+
31
+
32
+ runs = 100
33
+ xml = File.read(File.expand_path('../../test/fixtures/institution.xml', __FILE__))
34
+ puts 'Converting small xml from text to Hash:'
35
+ Benchmark.bm 10 do |x|
36
+ ActiveSupport::XmlMini.backend = ActiveSupport::XmlMini_REXML
37
+ x.report 'activesupport(rexml) ' do
38
+ runs.times { Hash.from_xml(xml) }
39
+ end
40
+
41
+ ActiveSupport::XmlMini.backend = 'LibXML'
42
+ x.report 'activesupport(libxml) ' do
43
+ runs.times { Hash.from_xml(xml) }
44
+ end
45
+
46
+ ActiveSupport::XmlMini.backend = 'Nokogiri'
47
+ x.report 'activesupport(nokogiri)' do
48
+ runs.times { Hash.from_xml(xml) }
49
+ end
50
+
51
+ x.report 'xmlsimple ' do
52
+ runs.times { XmlSimple.xml_in(xml) }
53
+ end
54
+
55
+ x.report 'nori ' do
56
+ runs.times { Nori.new(:advanced_typecasting => false).parse(xml) }
57
+ end
58
+
59
+ x.report 'xmlhasher ' do
60
+ runs.times { XmlHasher.parse(xml) }
61
+ end
62
+ end
63
+
64
+ puts
65
+ runs = 5
66
+ path = File.expand_path('../../test/fixtures/institutions.xml', __FILE__)
67
+ puts 'Converting large xml from file to Hash:'
68
+ Benchmark.bm 5 do |x|
69
+ ActiveSupport::XmlMini.backend = ActiveSupport::XmlMini_REXML
70
+ x.report 'activesupport(rexml) ' do
71
+ runs.times { Hash.from_xml(File.new(path)) }
72
+ end
73
+
74
+ ActiveSupport::XmlMini.backend = 'LibXML'
75
+ x.report 'activesupport(libxml) ' do
76
+ #runs.times { Hash.from_xml(File.new(path)) } # Segmentation fault
77
+ end
78
+
79
+ ActiveSupport::XmlMini.backend = 'Nokogiri'
80
+ x.report 'activesupport(nokogiri)' do
81
+ runs.times { Hash.from_xml(File.new(path)) }
82
+ end
83
+
84
+ x.report 'xmlsimple ' do
85
+ runs.times { XmlSimple.xml_in(path) }
86
+ end
87
+
88
+ x.report 'nori ' do
89
+ runs.times { Nori.new(:advanced_typecasting => false).parse(File.new(path).read) } # Nori doesn't support reading from a stream, load the file in memory
90
+ end
91
+
92
+ x.report 'xmlhasher ' do
93
+ runs.times { XmlHasher.parse(File.new(path)) }
94
+ end
95
+ end
data/lib/xmlhasher.rb ADDED
@@ -0,0 +1,25 @@
1
+ require 'xmlhasher/configurable'
2
+ require 'xmlhasher/handler'
3
+ require 'xmlhasher/parser'
4
+ require 'xmlhasher/node'
5
+ require 'xmlhasher/util'
6
+ require 'xmlhasher/version'
7
+
8
+ module XmlHasher
9
+ class << self
10
+ include XmlHasher::Configurable
11
+
12
+ def parser
13
+ @parser ||= XmlHasher::Parser.new(options)
14
+ end
15
+
16
+ private
17
+
18
+ def method_missing(method_name, *args, &block)
19
+ return super unless parser.respond_to?(method_name)
20
+ parser.send(method_name, *args, &block)
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,20 @@
1
+ module XmlHasher
2
+ module Configurable
3
+
4
+ attr_writer :snakecase, :ignore_namespaces, :string_keys
5
+
6
+ KEYS = [:snakecase, :ignore_namespaces, :string_keys]
7
+
8
+ def configure
9
+ yield self
10
+ self
11
+ end
12
+
13
+ private
14
+
15
+ def options
16
+ XmlHasher::Configurable::KEYS.inject({}) { |hash, key| hash[key] = instance_variable_get(:"@#{key}"); hash }
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,55 @@
1
+ require 'ox'
2
+ require 'escape_utils'
3
+
4
+ module XmlHasher
5
+ class Handler < ::Ox::Sax
6
+ def initialize(options = {})
7
+ @options = options
8
+ @stack = []
9
+ end
10
+
11
+ def to_hash
12
+ @hash || {}
13
+ end
14
+
15
+ def start_element(name)
16
+ @stack.push(Node.new(transform(name)))
17
+ end
18
+
19
+ def attr(name, value)
20
+ unless ignore_attribute?(name)
21
+ @stack.last.attributes[transform(name)] = escape(value) unless @stack.empty?
22
+ end
23
+ end
24
+
25
+ def text(value)
26
+ @stack.last.text = escape(value)
27
+ end
28
+
29
+ def end_element(name)
30
+ if @stack.size == 1
31
+ @hash = @stack.pop.to_hash
32
+ else
33
+ node = @stack.pop
34
+ @stack.last.children << node
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def transform(name)
41
+ name = name.to_s.split(':').last if @options[:ignore_namespaces]
42
+ name = Util.snakecase(name) if @options[:snakecase]
43
+ name = name.to_sym unless @options[:string_keys]
44
+ name
45
+ end
46
+
47
+ def escape(value)
48
+ EscapeUtils.unescape_html(value)
49
+ end
50
+
51
+ def ignore_attribute?(name)
52
+ @options[:ignore_namespaces] ? !name.to_s[/^(xmlns|xsi)/].nil? : false
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,39 @@
1
+ module XmlHasher
2
+ class Node
3
+ attr_accessor :name, :attributes, :children, :text
4
+
5
+ def initialize(name)
6
+ @name = name
7
+ @attributes = {}
8
+ @children = []
9
+ end
10
+
11
+ def to_hash
12
+ h = {}
13
+ if text
14
+ if clean_attributes.empty?
15
+ h[name] = text
16
+ else
17
+ h[name] = clean_attributes.merge(value: text)
18
+ end
19
+ else
20
+ h[name] = clean_attributes
21
+ if children.size == 1
22
+ child = children.first
23
+ h[name].merge!(child.to_hash)
24
+ else
25
+ h[name].merge!(children.group_by { |c| c.name }.inject({}) { |r, (k, v)| v.length == 1 ? r.merge!(v.first.to_hash) : r[k] = v.map { |c| c.to_hash[c.name] }; r })
26
+ end
27
+ end
28
+ h[name] = nil if h[name].empty?
29
+ h
30
+ end
31
+
32
+ private
33
+
34
+ def clean_attributes
35
+ return @clean_attributes if defined? @clean_attributes
36
+ @clean_attributes = attributes.inject({}) { |r, (key, value)| r[key] = value if !value.nil? && !value.to_s.empty?; r }
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,22 @@
1
+ require 'stringio'
2
+
3
+ module XmlHasher
4
+ class Parser
5
+
6
+ def initialize(options = {})
7
+ @options = options
8
+ end
9
+
10
+ def parse(xml)
11
+ handler = Handler.new(@options)
12
+ Ox.sax_parse(handler, convert(xml))
13
+ handler.to_hash
14
+ end
15
+
16
+ private
17
+
18
+ def convert(xml)
19
+ xml.respond_to?(:read) || xml.respond_to?(:readpartial) ? xml : StringIO.new(xml)
20
+ end
21
+ end
22
+ end