saxerator 0.9.5 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +9 -12
- data/Gemfile +4 -2
- data/README.md +23 -3
- data/Rakefile +12 -1
- data/benchmark/benchmark.rb +54 -12
- data/benchmark/generate_sample_file.rb +1 -1
- data/lib/saxerator.rb +2 -3
- data/lib/saxerator/adapters/nokogiri.rb +39 -0
- data/lib/saxerator/adapters/ox.rb +69 -0
- data/lib/saxerator/adapters/rexml.rb +42 -0
- data/lib/saxerator/builder.rb +2 -2
- data/lib/saxerator/builder/array_element.rb +8 -2
- data/lib/saxerator/builder/empty_element.rb +2 -3
- data/lib/saxerator/builder/hash_builder.rb +8 -7
- data/lib/saxerator/builder/hash_element.rb +7 -8
- data/lib/saxerator/builder/string_element.rb +5 -7
- data/lib/saxerator/builder/xml_builder.rb +11 -7
- data/lib/saxerator/configuration.rb +27 -7
- data/lib/saxerator/document_fragment.rb +3 -5
- data/lib/saxerator/dsl.rb +6 -5
- data/lib/saxerator/full_document.rb +1 -1
- data/lib/saxerator/latches/abstract_latch.rb +1 -1
- data/lib/saxerator/latches/at_depth.rb +2 -2
- data/lib/saxerator/latches/child_of.rb +9 -14
- data/lib/saxerator/latches/for_tags.rb +2 -2
- data/lib/saxerator/latches/with_attributes.rb +2 -2
- data/lib/saxerator/latches/within.rb +6 -10
- data/lib/saxerator/parser/accumulator.rb +6 -9
- data/lib/saxerator/parser/latched_accumulator.rb +4 -49
- data/lib/saxerator/sax_handler.rb +9 -0
- data/lib/saxerator/version.rb +1 -1
- data/saxerator.gemspec +5 -2
- data/spec/lib/builder/hash_builder_spec.rb +25 -19
- data/spec/lib/builder/xml_builder_spec.rb +7 -26
- data/spec/lib/dsl/all_spec.rb +11 -6
- data/spec/lib/dsl/at_depth_spec.rb +10 -8
- data/spec/lib/dsl/child_of_spec.rb +6 -6
- data/spec/lib/dsl/for_tag_spec.rb +3 -3
- data/spec/lib/dsl/for_tags_spec.rb +5 -5
- data/spec/lib/dsl/with_attribute_spec.rb +4 -4
- data/spec/lib/dsl/with_attributes_spec.rb +8 -7
- data/spec/lib/dsl/within_spec.rb +6 -5
- data/spec/lib/saxerator_spec.rb +70 -58
- data/spec/spec_helper.rb +24 -3
- data/spec/support/fixture_file.rb +1 -1
- metadata +39 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e54e504ee27201c8e10d314e067250c1dd779514
|
4
|
+
data.tar.gz: 3a7338b78ab7e05f316c365903c3ab625e81d62d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 350eb22db29091954e33bdf251a4e5e8133c3397221f7318d66ab878366862c614096d8b55a63de7d8300070b4ef8c434e0584769e5bbd6da7bb062218a2224e
|
7
|
+
data.tar.gz: 46145d2ff6a104304e0abdf330ecca528d0e1b14bbe5ce789f1016aeb7fe51f77888cce1c101233e75ce37bc3b3af67cfd5469682697a4ec37c3e00d8a446e7e
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,14 +1,11 @@
|
|
1
|
-
|
1
|
+
before_install:
|
2
|
+
- gem install bundler -v 1.12.5
|
3
|
+
script: "bundle exec rake spec:adapters"
|
2
4
|
language: ruby
|
3
|
-
bundler_args: --without coverage
|
5
|
+
bundler_args: --without coverage --binstubs
|
6
|
+
sudo: false
|
7
|
+
cache: bundler
|
4
8
|
rvm:
|
5
|
-
- 2.1
|
6
|
-
- 2.
|
7
|
-
- 1.
|
8
|
-
- 1.9.3
|
9
|
-
- jruby-19mode
|
10
|
-
- rbx-2.2.1
|
11
|
-
matrix:
|
12
|
-
allow_failures:
|
13
|
-
- rvm: jruby-19mode
|
14
|
-
- rvm: rbx-2.2.1
|
9
|
+
- 2.3.1
|
10
|
+
- 2.2.2
|
11
|
+
- 2.1.6
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -13,6 +13,15 @@ You can parse any valid xml in 3 simple steps.
|
|
13
13
|
1. Perform your work in an `each` block, or using any [Enumerable](http://apidock.com/ruby/Enumerable)
|
14
14
|
method
|
15
15
|
|
16
|
+
Installation
|
17
|
+
------------
|
18
|
+
1. `gem install saxerator`
|
19
|
+
1. Choose an xml parser
|
20
|
+
* (default) Use ruby's built-in REXML parser - no other dependencies necessary
|
21
|
+
* `gem install nokogiri`
|
22
|
+
* `gem install ox`
|
23
|
+
1. If not using the default, specify your adapter in the [Saxerator configuration](#configuration)
|
24
|
+
|
16
25
|
The DSL
|
17
26
|
-------
|
18
27
|
The DSL consists of predicates that may be combined to describe which elements the parser should enumerate over.
|
@@ -90,7 +99,8 @@ end
|
|
90
99
|
|
91
100
|
| Setting | Default | Values | Description
|
92
101
|
|:------------------|:--------|-----------------|------------
|
93
|
-
| `
|
102
|
+
| `adapter` | `:nokogiri` | `:nokogiri`, `:ox`, `:rexml` | The XML parser used by Saxerator |
|
103
|
+
| `output_type` | `:hash` | `:hash`, `:xml` | The type of object generated by Saxerator's parsing. `:hash` generates a Ruby Hash, `:xml` generates a `REXML::Document`
|
94
104
|
| `symbolize_keys!` | n/a | n/a | Call this method if you want the hash keys to be symbols rather than strings
|
95
105
|
| `ignore_namespaces!`| n/a | n/a | Call this method if you want to treat the XML document as if it has no namespace information. It differs slightly from `strip_namespaces!` since it deals with how the XML is processed rather than how it is output
|
96
106
|
| `strip_namespaces!`| n/a | user-specified | Called with no arguments this strips all namespaces, or you may specify an arbitrary number of namespaces to strip, i.e. `config.strip_namespaces! :rss, :soapenv`
|
@@ -101,6 +111,12 @@ Known Issues
|
|
101
111
|
* JRuby closes the file stream at the end of parsing, therefor to perform multiple operations
|
102
112
|
which parse a file you will need to instantiate a new parser with a new File object.
|
103
113
|
|
114
|
+
Other Documentation
|
115
|
+
-------------------
|
116
|
+
* [REXML](http://www.germane-software.com/software/rexml/) ([api docs](http://ruby-doc.org/stdlib-2.4.0/libdoc/rexml/rdoc/REXML/Document.html))
|
117
|
+
* [Nokogiri](http://www.nokogiri.org/) ([api docs](http://www.rubydoc.info/github/sparklemotion/nokogiri))
|
118
|
+
* [Ox](https://github.com/ohler55/ox) ([api docs](http://www.ohler.com/ox/))
|
119
|
+
|
104
120
|
FAQ
|
105
121
|
---
|
106
122
|
Why the name 'Saxerator'?
|
@@ -129,9 +145,13 @@ When I fetch a tag that has one or more elements, sometimes I get an `Array`, an
|
|
129
145
|
> occasionally comes up is for elements that are sometimes-empty. Empty elements behave mostly like an
|
130
146
|
> empty `Hash`, however you may convert it to a more `String`-like object via `#to_s`
|
131
147
|
|
148
|
+
### Contribution ###
|
149
|
+
|
150
|
+
For running tests for all parsers run `rake spec:adapters`
|
151
|
+
|
132
152
|
### Acknowledgements ###
|
133
|
-
Saxerator was inspired by - but not affiliated with - [nori](https://github.com/savonrb/nori) and
|
153
|
+
Saxerator was inspired by - but not affiliated with - [nori](https://github.com/savonrb/nori) and Gregory Brown's
|
134
154
|
[Practicing Ruby](http://practicingruby.com/)
|
135
155
|
|
136
156
|
#### Legal Stuff ####
|
137
|
-
Copyright © Bradley Schaefer. MIT License (see LICENSE file).
|
157
|
+
Copyright © 2012-2017 Bradley Schaefer. MIT License (see LICENSE file).
|
data/Rakefile
CHANGED
@@ -4,4 +4,15 @@ require 'rspec/core/rake_task'
|
|
4
4
|
|
5
5
|
RSpec::Core::RakeTask.new(:spec)
|
6
6
|
|
7
|
-
task :
|
7
|
+
task default: :spec
|
8
|
+
|
9
|
+
namespace :spec do
|
10
|
+
desc 'Run specs against all available adapters'
|
11
|
+
task :adapters do |_|
|
12
|
+
%w(nokogiri ox rexml).each do |adapter|
|
13
|
+
ENV['SAXERATOR_ADAPTER'] = adapter
|
14
|
+
Rake::Task['spec'].invoke
|
15
|
+
::Rake.application['spec'].reenable
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/benchmark/benchmark.rb
CHANGED
@@ -1,23 +1,65 @@
|
|
1
|
-
|
1
|
+
$LOAD_PATH.push File.expand_path('../../lib', __FILE__)
|
2
2
|
require 'saxerator'
|
3
3
|
require 'benchmark'
|
4
4
|
|
5
5
|
file = ARGV.shift
|
6
|
-
|
6
|
+
unless File.exist?(file)
|
7
7
|
puts "Cannot find file #{file}"
|
8
8
|
exit 1
|
9
9
|
end
|
10
10
|
file = File.new(file)
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
ADAPTERS = [:nokogiri, :ox].freeze
|
13
|
+
|
14
|
+
class SaxeratorBenchmark
|
15
|
+
def initialize(file)
|
16
|
+
@file = file
|
17
|
+
end
|
18
|
+
|
19
|
+
def with_adapter(adapter) # rubocop:disable Metrics/MethodLength
|
20
|
+
puts '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
|
21
|
+
puts
|
22
|
+
puts "Benchmark with :#{adapter} parser"
|
23
|
+
puts
|
24
|
+
puts '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
|
25
|
+
puts
|
26
|
+
|
27
|
+
count = count2 = count3 = count4 = 0
|
28
|
+
|
29
|
+
Benchmark.bm do |x|
|
30
|
+
x.report('for_tag') do
|
31
|
+
Saxerator.parser(@file) { |confing| confing.adapter = adapter }
|
32
|
+
.for_tag(:artist).each { count += 1 }
|
33
|
+
end
|
34
|
+
|
35
|
+
x.report('at_depth') do
|
36
|
+
Saxerator.parser(@file) { |confing| confing.adapter = adapter }
|
37
|
+
.at_depth(2).each { count2 += 1 }
|
38
|
+
end
|
39
|
+
|
40
|
+
x.report('within') do
|
41
|
+
Saxerator.parser(@file) { |confing| confing.adapter = adapter }
|
42
|
+
.within(:artists).each { count3 += 1 }
|
43
|
+
end
|
44
|
+
|
45
|
+
x.report('composite') do
|
46
|
+
Saxerator.parser(@file) { |confing| confing.adapter = adapter }
|
47
|
+
.for_tag(:name)
|
48
|
+
.within(:artist).at_depth(3).each { count4 += 1 }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
puts
|
53
|
+
puts '##########################################################'
|
54
|
+
puts
|
55
|
+
puts "for_tag: #{count} artist elements parsed"
|
56
|
+
puts "at_depth: #{count2} elements parsed"
|
57
|
+
puts "within: #{count3} artists children parsed"
|
58
|
+
puts "composite: #{count4} names within artist nested 3 tags deep parsed"
|
59
|
+
puts
|
60
|
+
end
|
18
61
|
end
|
19
62
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
puts "composite: #{count4} names within artist nested 3 tags deep parsed"
|
63
|
+
saxerator_benchmark = SaxeratorBenchmark.new(file)
|
64
|
+
|
65
|
+
ADAPTERS.each { |adapter| saxerator_benchmark.with_adapter(adapter) }
|
data/lib/saxerator.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'saxerator/version'
|
2
2
|
|
3
|
-
require '
|
4
|
-
|
3
|
+
require 'saxerator/sax_handler'
|
5
4
|
require 'saxerator/dsl'
|
6
5
|
require 'saxerator/full_document'
|
7
6
|
require 'saxerator/document_fragment'
|
@@ -31,6 +30,6 @@ module Saxerator
|
|
31
30
|
config = Configuration.new
|
32
31
|
yield(config) if block_given?
|
33
32
|
|
34
|
-
|
33
|
+
FullDocument.new(xml, config)
|
35
34
|
end
|
36
35
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Saxerator
|
5
|
+
module Adapters
|
6
|
+
class Nokogiri < ::Nokogiri::XML::SAX::Document
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
def self.parse(source, reader)
|
10
|
+
parser = ::Nokogiri::XML::SAX::Parser.new(new(reader))
|
11
|
+
parser.parse(source)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(reader)
|
15
|
+
@reader = reader
|
16
|
+
@ignore_namespaces = reader.ignore_namespaces?
|
17
|
+
end
|
18
|
+
|
19
|
+
def_delegators :@reader, :start_element, :end_element, :characters
|
20
|
+
def_delegator :@reader, :characters, :cdata_block
|
21
|
+
|
22
|
+
def start_element_namespace(name, attrs = [], _prefix = nil, _uri = nil, _ns = [])
|
23
|
+
return super unless @ignore_namespaces
|
24
|
+
start_element(name, strip_namespace(attrs))
|
25
|
+
end
|
26
|
+
|
27
|
+
def end_element_namespace(name, _prefix = nil, _uri = nil)
|
28
|
+
return super unless @ignore_namespaces
|
29
|
+
end_element(name)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def strip_namespace(attrs)
|
35
|
+
attrs.map { |attr| [attr.localname, attr.value] }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'ox'
|
3
|
+
|
4
|
+
module Saxerator
|
5
|
+
module Adapters
|
6
|
+
class Ox # < ::Ox::Sax
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
def self.parse(source, reader)
|
10
|
+
handler = new(reader)
|
11
|
+
::Ox.sax_parse(handler, source)
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_accessor :name
|
15
|
+
attr_reader :attributes
|
16
|
+
attr_reader :reader
|
17
|
+
|
18
|
+
def initialize(reader)
|
19
|
+
@reader = reader
|
20
|
+
|
21
|
+
@attributes = {}
|
22
|
+
@name = ''
|
23
|
+
end
|
24
|
+
|
25
|
+
def guard!
|
26
|
+
reader.start_element(name, attributes.to_a) unless name.empty?
|
27
|
+
reset!
|
28
|
+
end
|
29
|
+
|
30
|
+
def attr(name, value)
|
31
|
+
attributes[name.to_s] = value
|
32
|
+
end
|
33
|
+
|
34
|
+
def start_element(name)
|
35
|
+
guard!
|
36
|
+
|
37
|
+
name = name.to_s
|
38
|
+
name = strip_namespace(name) if reader.ignore_namespaces?
|
39
|
+
self.name = name
|
40
|
+
end
|
41
|
+
|
42
|
+
def end_element(name)
|
43
|
+
guard!
|
44
|
+
|
45
|
+
name = name.to_s
|
46
|
+
name = strip_namespace(name) if reader.ignore_namespaces?
|
47
|
+
reader.end_element(name)
|
48
|
+
end
|
49
|
+
|
50
|
+
def text(str)
|
51
|
+
guard!
|
52
|
+
reader.characters(str)
|
53
|
+
end
|
54
|
+
|
55
|
+
alias cdata text
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def reset!
|
60
|
+
@attributes.clear
|
61
|
+
@name = ''
|
62
|
+
end
|
63
|
+
|
64
|
+
def strip_namespace(name)
|
65
|
+
name.split(':').last
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'rexml/document'
|
3
|
+
require 'rexml/streamlistener'
|
4
|
+
|
5
|
+
module Saxerator
|
6
|
+
module Adapters
|
7
|
+
class Rexml
|
8
|
+
extend Forwardable
|
9
|
+
include REXML::StreamListener
|
10
|
+
|
11
|
+
def self.parse(source, reader)
|
12
|
+
handler = new(reader)
|
13
|
+
REXML::Document.parse_stream(source, handler)
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(reader)
|
17
|
+
@reader = reader
|
18
|
+
@ignore_namespaces = reader.ignore_namespaces?
|
19
|
+
end
|
20
|
+
|
21
|
+
def_delegator :@reader, :characters, :text
|
22
|
+
def_delegator :@reader, :characters, :cdata
|
23
|
+
|
24
|
+
def tag_start(name, attrs)
|
25
|
+
name = strip_namespace(name) if @ignore_namespaces
|
26
|
+
@reader.start_element(name, attrs)
|
27
|
+
end
|
28
|
+
|
29
|
+
def tag_end(name)
|
30
|
+
name = strip_namespace(name) if @ignore_namespaces
|
31
|
+
@reader.end_element(name)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def strip_namespace(name)
|
37
|
+
name.split(':').last
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/saxerator/builder.rb
CHANGED
@@ -1,9 +1,15 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
|
1
3
|
module Saxerator
|
2
4
|
module Builder
|
3
|
-
class ArrayElement < Array
|
5
|
+
class ArrayElement < DelegateClass(Array)
|
4
6
|
attr_accessor :name
|
5
7
|
|
6
|
-
def
|
8
|
+
def initialize(arr = [], name = nil)
|
9
|
+
@name = name
|
10
|
+
super(arr)
|
11
|
+
end
|
12
|
+
|
7
13
|
def to_a; self end
|
8
14
|
end
|
9
15
|
end
|
@@ -4,6 +4,7 @@ module Saxerator
|
|
4
4
|
module Builder
|
5
5
|
class EmptyElement < HashElement
|
6
6
|
def nil?; true end
|
7
|
+
|
7
8
|
def !; true end
|
8
9
|
|
9
10
|
def to_s
|
@@ -15,9 +16,7 @@ module Saxerator
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def to_a
|
18
|
-
|
19
|
-
array.name = name
|
20
|
-
array
|
19
|
+
ArrayElement.new([], name)
|
21
20
|
end
|
22
21
|
end
|
23
22
|
end
|
@@ -45,12 +45,11 @@ module Saxerator
|
|
45
45
|
hash
|
46
46
|
end
|
47
47
|
|
48
|
-
def add_to_hash_element(
|
48
|
+
def add_to_hash_element(hash, name, element)
|
49
49
|
name = generate_key(name)
|
50
|
-
if hash
|
51
|
-
unless hash[name].is_a?(
|
52
|
-
hash[name] = ArrayElement[hash[name]]
|
53
|
-
hash[name].name = name
|
50
|
+
if hash.key? name
|
51
|
+
unless hash[name].is_a?(ArrayElement)
|
52
|
+
hash[name] = ArrayElement.new([hash[name]], name)
|
54
53
|
end
|
55
54
|
hash[name] << element
|
56
55
|
else
|
@@ -60,12 +59,14 @@ module Saxerator
|
|
60
59
|
|
61
60
|
def block_variable
|
62
61
|
return to_s if @text
|
63
|
-
|
62
|
+
if @children.count > 0 || (@attributes.count > 0 && @config.put_attributes_in_hash?)
|
63
|
+
return to_hash
|
64
|
+
end
|
64
65
|
to_empty_element
|
65
66
|
end
|
66
67
|
|
67
68
|
def normalize_attributes(attributes)
|
68
|
-
Hash[attributes.map {|key, value| [generate_key(key), value] }]
|
69
|
+
Hash[attributes.map { |key, value| [generate_key(key), value] }]
|
69
70
|
end
|
70
71
|
|
71
72
|
def generate_key(name)
|