saxerator 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +4 -6
- data/README.md +7 -5
- data/lib/saxerator/builder/hash_builder.rb +23 -9
- data/lib/saxerator/configuration.rb +37 -3
- data/lib/saxerator/dsl.rb +3 -3
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/dsl/all_spec.rb +11 -0
- data/spec/lib/saxerator_spec.rb +72 -2
- metadata +14 -20
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1f658aac7bdee282fb08e3b58640872e85736a2b
|
4
|
+
data.tar.gz: 59155cc7dfd7a43d3427fe2caa051a56bbf8b4d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 78ced56c7b227e0eca9286e70653bbc4301e838ce944be43f8d6864c29139b8b02546b35108dee6bb655a284aa28c80e341efbcd56820c6c9e1577dadaa29607
|
7
|
+
data.tar.gz: 41987c42a7ade852213f0c63b5ede55e1a832432c30943bac211fe36bcc76fe3fadda3ff5d4b06625103075963b61c0ff96589691f869f906549589d3da92991
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -2,14 +2,12 @@ script: "rspec spec"
|
|
2
2
|
language: ruby
|
3
3
|
bundler_args: --without coverage
|
4
4
|
rvm:
|
5
|
-
-
|
5
|
+
- 2.0.0
|
6
6
|
- 1.9.2
|
7
7
|
- 1.9.3
|
8
|
-
- jruby-18mode
|
9
8
|
- jruby-19mode
|
10
|
-
- rbx-
|
11
|
-
- rbx-19mode
|
9
|
+
- rbx-2.2.1
|
12
10
|
matrix:
|
13
11
|
allow_failures:
|
14
|
-
- rvm: jruby-
|
15
|
-
- rvm:
|
12
|
+
- rvm: jruby-19mode
|
13
|
+
- rvm: rbx-2.2.1
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Saxerator [](http://travis-ci.org/soulcutter/saxerator)
|
1
|
+
Saxerator [](http://travis-ci.org/soulcutter/saxerator) [](https://codeclimate.com/github/soulcutter/saxerator)
|
2
2
|
=========
|
3
3
|
|
4
4
|
Saxerator is a streaming xml-to-hash parser designed for working with very large xml files by
|
@@ -80,11 +80,11 @@ end
|
|
80
80
|
Configuration
|
81
81
|
-------------
|
82
82
|
|
83
|
-
Certain options are available at parser initialization.
|
83
|
+
Certain options are available via a configuration block at parser initialization.
|
84
84
|
|
85
85
|
```ruby
|
86
|
-
Saxerator.
|
87
|
-
|
86
|
+
Saxerator.parser(xml) do |config|
|
87
|
+
config.output_type = :xml
|
88
88
|
end
|
89
89
|
```
|
90
90
|
|
@@ -92,6 +92,8 @@ end
|
|
92
92
|
|:------------------|:--------|-----------------|------------
|
93
93
|
| `output_type` | `:hash` | `:hash`, `:xml` | The type of object generated by Saxerator's parsing. `:hash` should be self-explanatory, `:xml` generates a `Nokogiri::XML::Document`
|
94
94
|
| `symbolize_keys!` | n/a | n/a | Call this method if you want the hash keys to be symbols rather than strings
|
95
|
+
| `strip_namespaces!| n/a | user-specified | Called with no arguments this strips all namespaces, or you may specify an arbitrary number of namespaces to strip, i.e. `config.strip_namespaces! :rss, :soapenv`
|
96
|
+
| `put_attributes_in_hash!` | n/a | n/a | Call this method if you want xml attributes included as elements of the output hash - only valid with `output_type = :hash`
|
95
97
|
|
96
98
|
Known Issues
|
97
99
|
------------
|
@@ -121,4 +123,4 @@ Saxerator was inspired by - but not affiliated with - [nori](https://github.com/
|
|
121
123
|
[Practicing Ruby](http://practicingruby.com/)
|
122
124
|
|
123
125
|
#### Legal Stuff ####
|
124
|
-
Copyright © Bradley Schaefer. MIT License (see LICENSE file).
|
126
|
+
Copyright © Bradley Schaefer. MIT License (see LICENSE file).
|
@@ -5,7 +5,7 @@ module Saxerator
|
|
5
5
|
|
6
6
|
def initialize(config, name, attributes)
|
7
7
|
@config = config
|
8
|
-
@name = config.
|
8
|
+
@name = config.generate_key_for(name)
|
9
9
|
@attributes = attributes
|
10
10
|
@children = []
|
11
11
|
@text = false
|
@@ -27,23 +27,37 @@ module Saxerator
|
|
27
27
|
name = child.name
|
28
28
|
element = child.block_variable
|
29
29
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
add_to_hash_element( hash, name, element)
|
31
|
+
end
|
32
|
+
|
33
|
+
if @config.put_attributes_in_hash?
|
34
|
+
|
35
|
+
@attributes.each do |attribute|
|
36
|
+
attribute.each_slice(2) do |name, element|
|
37
|
+
add_to_hash_element( hash, name, element)
|
34
38
|
end
|
35
|
-
hash[name] << element
|
36
|
-
else
|
37
|
-
hash[name] = element
|
38
39
|
end
|
39
40
|
end
|
40
41
|
|
41
42
|
hash
|
42
43
|
end
|
43
44
|
|
45
|
+
def add_to_hash_element( hash, name, element)
|
46
|
+
name = @config.generate_key_for(name)
|
47
|
+
if hash[name]
|
48
|
+
if !hash[name].is_a?(Array)
|
49
|
+
hash[name] = ArrayElement[hash[name]]
|
50
|
+
hash[name].name = name
|
51
|
+
end
|
52
|
+
hash[name] << element
|
53
|
+
else
|
54
|
+
hash[name] = element
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
44
58
|
def block_variable
|
45
59
|
@text ? to_s : to_hash
|
46
60
|
end
|
47
61
|
end
|
48
62
|
end
|
49
|
-
end
|
63
|
+
end
|
@@ -5,19 +5,53 @@ module Saxerator
|
|
5
5
|
|
6
6
|
def initialize
|
7
7
|
@output_type = :hash
|
8
|
+
@put_attributes_in_hash = false
|
8
9
|
end
|
9
10
|
|
10
11
|
def output_type=(val)
|
11
12
|
raise ArgumentError.new("Unknown output_type '#{val.inspect}'") unless Builder.valid?(val)
|
12
13
|
@output_type = val
|
14
|
+
raise_error_if_using_put_attributes_in_hash_with_xml
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate_key_for(val)
|
18
|
+
hash_key_generator.call val
|
19
|
+
end
|
20
|
+
|
21
|
+
def hash_key_normalizer
|
22
|
+
@hash_key_normalizer ||= lambda { |x| x.to_s }
|
13
23
|
end
|
14
24
|
|
15
25
|
def hash_key_generator
|
16
|
-
@hash_key_generator
|
26
|
+
@hash_key_generator || hash_key_normalizer
|
17
27
|
end
|
18
28
|
|
19
29
|
def symbolize_keys!
|
20
|
-
@hash_key_generator = lambda { |x| x.to_sym }
|
30
|
+
@hash_key_generator = lambda { |x| hash_key_normalizer.call(x).to_sym }
|
31
|
+
end
|
32
|
+
|
33
|
+
def strip_namespaces!(*namespaces)
|
34
|
+
if namespaces.any?
|
35
|
+
matching_group = namespaces.join('|')
|
36
|
+
@hash_key_normalizer = lambda { |x| x.to_s.gsub(/(#{matching_group}):/, '') }
|
37
|
+
else
|
38
|
+
@hash_key_normalizer = lambda { |x| x.to_s.gsub(/\w+:/, '') }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def put_attributes_in_hash!
|
43
|
+
@put_attributes_in_hash = true
|
44
|
+
raise_error_if_using_put_attributes_in_hash_with_xml
|
45
|
+
end
|
46
|
+
|
47
|
+
def put_attributes_in_hash?
|
48
|
+
@put_attributes_in_hash
|
49
|
+
end
|
50
|
+
|
51
|
+
def raise_error_if_using_put_attributes_in_hash_with_xml
|
52
|
+
if @output_type != :hash && @put_attributes_in_hash
|
53
|
+
raise ArgumentError.new("put_attributes_in_hash! is only valid when using output_type = :hash (the default)'")
|
54
|
+
end
|
21
55
|
end
|
22
56
|
end
|
23
|
-
end
|
57
|
+
end
|
data/lib/saxerator/dsl.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Saxerator
|
2
2
|
module DSL
|
3
|
-
def for_tag(
|
4
|
-
for_tags(
|
3
|
+
def for_tag(*tags)
|
4
|
+
for_tags(tags)
|
5
5
|
end
|
6
6
|
|
7
7
|
def for_tags(tags)
|
@@ -41,4 +41,4 @@ module Saxerator
|
|
41
41
|
DocumentFragment.new(@source, @config, @latches + [predicate])
|
42
42
|
end
|
43
43
|
end
|
44
|
-
end
|
44
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/spec/lib/dsl/all_spec.rb
CHANGED
@@ -17,4 +17,15 @@ describe "Saxerator::FullDocument#all" do
|
|
17
17
|
it "should allow you to parse an entire document" do
|
18
18
|
parser.all.should == {'blurb' => ['one', 'two', 'three'], 'notablurb' => 'four'}
|
19
19
|
end
|
20
|
+
|
21
|
+
context "with_put_attributes_in_hash" do
|
22
|
+
subject(:parser) do
|
23
|
+
Saxerator.parser(xml) { |config| config.put_attributes_in_hash! }
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should allow you to parse an entire document" do
|
27
|
+
parser.all.should == {'blurb' => ['one', 'two', 'three'], 'notablurb' => 'four'}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
20
31
|
end
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -62,7 +62,77 @@ describe Saxerator do
|
|
62
62
|
end
|
63
63
|
|
64
64
|
specify { parser.all.should == { :bar => 'baz' } }
|
65
|
+
specify { parser.all.name.should == :foo }
|
66
|
+
end
|
67
|
+
|
68
|
+
context "with strip namespaces" do
|
69
|
+
let(:xml) { "<ns1:foo><ns3:bar>baz</ns3:bar></ns1:foo>" }
|
70
|
+
subject(:parser) do
|
71
|
+
Saxerator.parser(xml) { |config| config.strip_namespaces! }
|
72
|
+
end
|
73
|
+
|
74
|
+
specify { parser.all.should == {'bar' => 'baz'} }
|
75
|
+
specify { parser.all.name.should == 'foo' }
|
76
|
+
|
77
|
+
context "combined with symbolize keys" do
|
78
|
+
subject(:parser) do
|
79
|
+
Saxerator.parser(xml) do |config|
|
80
|
+
config.strip_namespaces!
|
81
|
+
config.symbolize_keys!
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
specify { parser.all.should == {:bar => 'baz'} }
|
86
|
+
end
|
87
|
+
|
88
|
+
context "for specific namespaces" do
|
89
|
+
let(:xml) do
|
90
|
+
<<-XML.gsub /^ {10}/, ''
|
91
|
+
<ns1:foo>
|
92
|
+
<ns2:bar>baz</ns2:bar>
|
93
|
+
<ns3:bar>biz</ns3:bar>
|
94
|
+
</ns1:foo>
|
95
|
+
XML
|
96
|
+
end
|
97
|
+
subject(:parser) do
|
98
|
+
Saxerator.parser(xml) { |config| config.strip_namespaces! :ns1, :ns3 }
|
99
|
+
end
|
100
|
+
|
101
|
+
specify { parser.all.should == {'ns2:bar' => 'baz', 'bar' => 'biz'} }
|
102
|
+
specify { parser.all.name.should == 'foo' }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
context "configuration with put_attributes_in_hash!" do
|
109
|
+
let(:xml) { '<foo foo="bar"><bar>baz</bar></foo>' }
|
110
|
+
|
111
|
+
subject(:parser) do
|
112
|
+
Saxerator.parser(xml) do |config|
|
113
|
+
config.put_attributes_in_hash!
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should be able to parse it" do
|
118
|
+
parser.all.should == { 'bar' => 'baz', 'foo' => 'bar' }
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
context "configuration with put_attributes_in_hash! and config.output_type = :xml" do
|
124
|
+
let(:xml) { '<foo foo="bar"><bar>baz</bar></foo>' }
|
125
|
+
|
126
|
+
subject(:parser) do
|
127
|
+
Saxerator.parser(xml) do |config|
|
128
|
+
config.put_attributes_in_hash!
|
129
|
+
config.output_type = :xml
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
context "should raise error with " do
|
134
|
+
specify { expect { parser }.to raise_error(ArgumentError) }
|
65
135
|
end
|
66
|
-
|
67
136
|
end
|
68
|
-
|
137
|
+
|
138
|
+
end
|
metadata
CHANGED
@@ -1,51 +1,46 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.9.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Bradley Schaefer
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-11-30 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 1.4.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 1.4.0
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rspec
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: 2.11.0
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: 2.11.0
|
46
|
-
description:
|
47
|
-
|
48
|
-
|
41
|
+
description: |2
|
42
|
+
Saxerator is a streaming xml-to-hash parser designed for working with very large xml files by
|
43
|
+
giving you Enumerable access to manageable chunks of the document.
|
49
44
|
email:
|
50
45
|
- bradley.schaefer@gmail.com
|
51
46
|
executables: []
|
@@ -99,27 +94,26 @@ files:
|
|
99
94
|
homepage: https://github.com/soulcutter/saxerator
|
100
95
|
licenses:
|
101
96
|
- MIT
|
97
|
+
metadata: {}
|
102
98
|
post_install_message:
|
103
99
|
rdoc_options: []
|
104
100
|
require_paths:
|
105
101
|
- lib
|
106
102
|
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
-
none: false
|
108
103
|
requirements:
|
109
|
-
- -
|
104
|
+
- - '>='
|
110
105
|
- !ruby/object:Gem::Version
|
111
106
|
version: '0'
|
112
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
108
|
requirements:
|
115
|
-
- -
|
109
|
+
- - '>='
|
116
110
|
- !ruby/object:Gem::Version
|
117
111
|
version: '0'
|
118
112
|
requirements: []
|
119
113
|
rubyforge_project: saxerator
|
120
|
-
rubygems_version:
|
114
|
+
rubygems_version: 2.0.3
|
121
115
|
signing_key:
|
122
|
-
specification_version:
|
116
|
+
specification_version: 4
|
123
117
|
summary: A SAX-based XML-to-hash parser for parsing large files into manageable chunks
|
124
118
|
test_files:
|
125
119
|
- spec/fixtures/flat_blurbs.xml
|