saxerator 0.7.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +4 -6
- data/README.md +7 -5
- data/lib/saxerator/builder/hash_builder.rb +23 -9
- data/lib/saxerator/configuration.rb +37 -3
- data/lib/saxerator/dsl.rb +3 -3
- data/lib/saxerator/version.rb +1 -1
- data/spec/lib/dsl/all_spec.rb +11 -0
- data/spec/lib/saxerator_spec.rb +72 -2
- metadata +14 -20
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1f658aac7bdee282fb08e3b58640872e85736a2b
|
4
|
+
data.tar.gz: 59155cc7dfd7a43d3427fe2caa051a56bbf8b4d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 78ced56c7b227e0eca9286e70653bbc4301e838ce944be43f8d6864c29139b8b02546b35108dee6bb655a284aa28c80e341efbcd56820c6c9e1577dadaa29607
|
7
|
+
data.tar.gz: 41987c42a7ade852213f0c63b5ede55e1a832432c30943bac211fe36bcc76fe3fadda3ff5d4b06625103075963b61c0ff96589691f869f906549589d3da92991
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -2,14 +2,12 @@ script: "rspec spec"
|
|
2
2
|
language: ruby
|
3
3
|
bundler_args: --without coverage
|
4
4
|
rvm:
|
5
|
-
-
|
5
|
+
- 2.0.0
|
6
6
|
- 1.9.2
|
7
7
|
- 1.9.3
|
8
|
-
- jruby-18mode
|
9
8
|
- jruby-19mode
|
10
|
-
- rbx-
|
11
|
-
- rbx-19mode
|
9
|
+
- rbx-2.2.1
|
12
10
|
matrix:
|
13
11
|
allow_failures:
|
14
|
-
- rvm: jruby-
|
15
|
-
- rvm:
|
12
|
+
- rvm: jruby-19mode
|
13
|
+
- rvm: rbx-2.2.1
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Saxerator [![Build Status](https://secure.travis-ci.org/soulcutter/saxerator.png?branch=master)](http://travis-ci.org/soulcutter/saxerator)
|
1
|
+
Saxerator [![Build Status](https://secure.travis-ci.org/soulcutter/saxerator.png?branch=master)](http://travis-ci.org/soulcutter/saxerator) [![Code Climate](https://codeclimate.com/github/soulcutter/saxerator.png)](https://codeclimate.com/github/soulcutter/saxerator)
|
2
2
|
=========
|
3
3
|
|
4
4
|
Saxerator is a streaming xml-to-hash parser designed for working with very large xml files by
|
@@ -80,11 +80,11 @@ end
|
|
80
80
|
Configuration
|
81
81
|
-------------
|
82
82
|
|
83
|
-
Certain options are available at parser initialization.
|
83
|
+
Certain options are available via a configuration block at parser initialization.
|
84
84
|
|
85
85
|
```ruby
|
86
|
-
Saxerator.
|
87
|
-
|
86
|
+
Saxerator.parser(xml) do |config|
|
87
|
+
config.output_type = :xml
|
88
88
|
end
|
89
89
|
```
|
90
90
|
|
@@ -92,6 +92,8 @@ end
|
|
92
92
|
|:------------------|:--------|-----------------|------------
|
93
93
|
| `output_type` | `:hash` | `:hash`, `:xml` | The type of object generated by Saxerator's parsing. `:hash` should be self-explanatory, `:xml` generates a `Nokogiri::XML::Document`
|
94
94
|
| `symbolize_keys!` | n/a | n/a | Call this method if you want the hash keys to be symbols rather than strings
|
95
|
+
| `strip_namespaces!| n/a | user-specified | Called with no arguments this strips all namespaces, or you may specify an arbitrary number of namespaces to strip, i.e. `config.strip_namespaces! :rss, :soapenv`
|
96
|
+
| `put_attributes_in_hash!` | n/a | n/a | Call this method if you want xml attributes included as elements of the output hash - only valid with `output_type = :hash`
|
95
97
|
|
96
98
|
Known Issues
|
97
99
|
------------
|
@@ -121,4 +123,4 @@ Saxerator was inspired by - but not affiliated with - [nori](https://github.com/
|
|
121
123
|
[Practicing Ruby](http://practicingruby.com/)
|
122
124
|
|
123
125
|
#### Legal Stuff ####
|
124
|
-
Copyright © Bradley Schaefer. MIT License (see LICENSE file).
|
126
|
+
Copyright © Bradley Schaefer. MIT License (see LICENSE file).
|
@@ -5,7 +5,7 @@ module Saxerator
|
|
5
5
|
|
6
6
|
def initialize(config, name, attributes)
|
7
7
|
@config = config
|
8
|
-
@name = config.
|
8
|
+
@name = config.generate_key_for(name)
|
9
9
|
@attributes = attributes
|
10
10
|
@children = []
|
11
11
|
@text = false
|
@@ -27,23 +27,37 @@ module Saxerator
|
|
27
27
|
name = child.name
|
28
28
|
element = child.block_variable
|
29
29
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
add_to_hash_element( hash, name, element)
|
31
|
+
end
|
32
|
+
|
33
|
+
if @config.put_attributes_in_hash?
|
34
|
+
|
35
|
+
@attributes.each do |attribute|
|
36
|
+
attribute.each_slice(2) do |name, element|
|
37
|
+
add_to_hash_element( hash, name, element)
|
34
38
|
end
|
35
|
-
hash[name] << element
|
36
|
-
else
|
37
|
-
hash[name] = element
|
38
39
|
end
|
39
40
|
end
|
40
41
|
|
41
42
|
hash
|
42
43
|
end
|
43
44
|
|
45
|
+
def add_to_hash_element( hash, name, element)
|
46
|
+
name = @config.generate_key_for(name)
|
47
|
+
if hash[name]
|
48
|
+
if !hash[name].is_a?(Array)
|
49
|
+
hash[name] = ArrayElement[hash[name]]
|
50
|
+
hash[name].name = name
|
51
|
+
end
|
52
|
+
hash[name] << element
|
53
|
+
else
|
54
|
+
hash[name] = element
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
44
58
|
def block_variable
|
45
59
|
@text ? to_s : to_hash
|
46
60
|
end
|
47
61
|
end
|
48
62
|
end
|
49
|
-
end
|
63
|
+
end
|
@@ -5,19 +5,53 @@ module Saxerator
|
|
5
5
|
|
6
6
|
def initialize
|
7
7
|
@output_type = :hash
|
8
|
+
@put_attributes_in_hash = false
|
8
9
|
end
|
9
10
|
|
10
11
|
def output_type=(val)
|
11
12
|
raise ArgumentError.new("Unknown output_type '#{val.inspect}'") unless Builder.valid?(val)
|
12
13
|
@output_type = val
|
14
|
+
raise_error_if_using_put_attributes_in_hash_with_xml
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate_key_for(val)
|
18
|
+
hash_key_generator.call val
|
19
|
+
end
|
20
|
+
|
21
|
+
def hash_key_normalizer
|
22
|
+
@hash_key_normalizer ||= lambda { |x| x.to_s }
|
13
23
|
end
|
14
24
|
|
15
25
|
def hash_key_generator
|
16
|
-
@hash_key_generator
|
26
|
+
@hash_key_generator || hash_key_normalizer
|
17
27
|
end
|
18
28
|
|
19
29
|
def symbolize_keys!
|
20
|
-
@hash_key_generator = lambda { |x| x.to_sym }
|
30
|
+
@hash_key_generator = lambda { |x| hash_key_normalizer.call(x).to_sym }
|
31
|
+
end
|
32
|
+
|
33
|
+
def strip_namespaces!(*namespaces)
|
34
|
+
if namespaces.any?
|
35
|
+
matching_group = namespaces.join('|')
|
36
|
+
@hash_key_normalizer = lambda { |x| x.to_s.gsub(/(#{matching_group}):/, '') }
|
37
|
+
else
|
38
|
+
@hash_key_normalizer = lambda { |x| x.to_s.gsub(/\w+:/, '') }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def put_attributes_in_hash!
|
43
|
+
@put_attributes_in_hash = true
|
44
|
+
raise_error_if_using_put_attributes_in_hash_with_xml
|
45
|
+
end
|
46
|
+
|
47
|
+
def put_attributes_in_hash?
|
48
|
+
@put_attributes_in_hash
|
49
|
+
end
|
50
|
+
|
51
|
+
def raise_error_if_using_put_attributes_in_hash_with_xml
|
52
|
+
if @output_type != :hash && @put_attributes_in_hash
|
53
|
+
raise ArgumentError.new("put_attributes_in_hash! is only valid when using output_type = :hash (the default)'")
|
54
|
+
end
|
21
55
|
end
|
22
56
|
end
|
23
|
-
end
|
57
|
+
end
|
data/lib/saxerator/dsl.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Saxerator
|
2
2
|
module DSL
|
3
|
-
def for_tag(
|
4
|
-
for_tags(
|
3
|
+
def for_tag(*tags)
|
4
|
+
for_tags(tags)
|
5
5
|
end
|
6
6
|
|
7
7
|
def for_tags(tags)
|
@@ -41,4 +41,4 @@ module Saxerator
|
|
41
41
|
DocumentFragment.new(@source, @config, @latches + [predicate])
|
42
42
|
end
|
43
43
|
end
|
44
|
-
end
|
44
|
+
end
|
data/lib/saxerator/version.rb
CHANGED
data/spec/lib/dsl/all_spec.rb
CHANGED
@@ -17,4 +17,15 @@ describe "Saxerator::FullDocument#all" do
|
|
17
17
|
it "should allow you to parse an entire document" do
|
18
18
|
parser.all.should == {'blurb' => ['one', 'two', 'three'], 'notablurb' => 'four'}
|
19
19
|
end
|
20
|
+
|
21
|
+
context "with_put_attributes_in_hash" do
|
22
|
+
subject(:parser) do
|
23
|
+
Saxerator.parser(xml) { |config| config.put_attributes_in_hash! }
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should allow you to parse an entire document" do
|
27
|
+
parser.all.should == {'blurb' => ['one', 'two', 'three'], 'notablurb' => 'four'}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
20
31
|
end
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -62,7 +62,77 @@ describe Saxerator do
|
|
62
62
|
end
|
63
63
|
|
64
64
|
specify { parser.all.should == { :bar => 'baz' } }
|
65
|
+
specify { parser.all.name.should == :foo }
|
66
|
+
end
|
67
|
+
|
68
|
+
context "with strip namespaces" do
|
69
|
+
let(:xml) { "<ns1:foo><ns3:bar>baz</ns3:bar></ns1:foo>" }
|
70
|
+
subject(:parser) do
|
71
|
+
Saxerator.parser(xml) { |config| config.strip_namespaces! }
|
72
|
+
end
|
73
|
+
|
74
|
+
specify { parser.all.should == {'bar' => 'baz'} }
|
75
|
+
specify { parser.all.name.should == 'foo' }
|
76
|
+
|
77
|
+
context "combined with symbolize keys" do
|
78
|
+
subject(:parser) do
|
79
|
+
Saxerator.parser(xml) do |config|
|
80
|
+
config.strip_namespaces!
|
81
|
+
config.symbolize_keys!
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
specify { parser.all.should == {:bar => 'baz'} }
|
86
|
+
end
|
87
|
+
|
88
|
+
context "for specific namespaces" do
|
89
|
+
let(:xml) do
|
90
|
+
<<-XML.gsub /^ {10}/, ''
|
91
|
+
<ns1:foo>
|
92
|
+
<ns2:bar>baz</ns2:bar>
|
93
|
+
<ns3:bar>biz</ns3:bar>
|
94
|
+
</ns1:foo>
|
95
|
+
XML
|
96
|
+
end
|
97
|
+
subject(:parser) do
|
98
|
+
Saxerator.parser(xml) { |config| config.strip_namespaces! :ns1, :ns3 }
|
99
|
+
end
|
100
|
+
|
101
|
+
specify { parser.all.should == {'ns2:bar' => 'baz', 'bar' => 'biz'} }
|
102
|
+
specify { parser.all.name.should == 'foo' }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
context "configuration with put_attributes_in_hash!" do
|
109
|
+
let(:xml) { '<foo foo="bar"><bar>baz</bar></foo>' }
|
110
|
+
|
111
|
+
subject(:parser) do
|
112
|
+
Saxerator.parser(xml) do |config|
|
113
|
+
config.put_attributes_in_hash!
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should be able to parse it" do
|
118
|
+
parser.all.should == { 'bar' => 'baz', 'foo' => 'bar' }
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
context "configuration with put_attributes_in_hash! and config.output_type = :xml" do
|
124
|
+
let(:xml) { '<foo foo="bar"><bar>baz</bar></foo>' }
|
125
|
+
|
126
|
+
subject(:parser) do
|
127
|
+
Saxerator.parser(xml) do |config|
|
128
|
+
config.put_attributes_in_hash!
|
129
|
+
config.output_type = :xml
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
context "should raise error with " do
|
134
|
+
specify { expect { parser }.to raise_error(ArgumentError) }
|
65
135
|
end
|
66
|
-
|
67
136
|
end
|
68
|
-
|
137
|
+
|
138
|
+
end
|
metadata
CHANGED
@@ -1,51 +1,46 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.9.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Bradley Schaefer
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-11-30 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 1.4.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 1.4.0
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rspec
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: 2.11.0
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: 2.11.0
|
46
|
-
description:
|
47
|
-
|
48
|
-
|
41
|
+
description: |2
|
42
|
+
Saxerator is a streaming xml-to-hash parser designed for working with very large xml files by
|
43
|
+
giving you Enumerable access to manageable chunks of the document.
|
49
44
|
email:
|
50
45
|
- bradley.schaefer@gmail.com
|
51
46
|
executables: []
|
@@ -99,27 +94,26 @@ files:
|
|
99
94
|
homepage: https://github.com/soulcutter/saxerator
|
100
95
|
licenses:
|
101
96
|
- MIT
|
97
|
+
metadata: {}
|
102
98
|
post_install_message:
|
103
99
|
rdoc_options: []
|
104
100
|
require_paths:
|
105
101
|
- lib
|
106
102
|
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
-
none: false
|
108
103
|
requirements:
|
109
|
-
- -
|
104
|
+
- - '>='
|
110
105
|
- !ruby/object:Gem::Version
|
111
106
|
version: '0'
|
112
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
108
|
requirements:
|
115
|
-
- -
|
109
|
+
- - '>='
|
116
110
|
- !ruby/object:Gem::Version
|
117
111
|
version: '0'
|
118
112
|
requirements: []
|
119
113
|
rubyforge_project: saxerator
|
120
|
-
rubygems_version:
|
114
|
+
rubygems_version: 2.0.3
|
121
115
|
signing_key:
|
122
|
-
specification_version:
|
116
|
+
specification_version: 4
|
123
117
|
summary: A SAX-based XML-to-hash parser for parsing large files into manageable chunks
|
124
118
|
test_files:
|
125
119
|
- spec/fixtures/flat_blurbs.xml
|