saxerator 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +13 -1
- data/Rakefile +5 -0
- data/lib/saxerator.rb +4 -0
- data/lib/saxerator/hash_with_attributes.rb +5 -0
- data/lib/saxerator/parser/nokogiri.rb +1 -1
- data/lib/saxerator/string_with_attributes.rb +0 -4
- data/lib/saxerator/version.rb +1 -1
- data/lib/saxerator/xml_node.rb +13 -4
- data/saxerator.gemspec +2 -6
- data/spec/fixtures/nested_elements.xml +2 -5
- data/spec/lib/saxerator_spec.rb +6 -0
- data/spec/spec_helper.rb +5 -3
- metadata +9 -57
- data/Guardfile +0 -13
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Saxerator
|
2
2
|
=========
|
3
3
|
|
4
|
-
Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
|
4
|
+
Saxerator is a SAX-based xml-to-hash parser designed for parsing very large files into manageable chunks. Rather than
|
5
5
|
dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
|
6
6
|
This approach is ideal for large xml files containing a collection of elements that you can process
|
7
7
|
independently.
|
@@ -24,6 +24,18 @@ end
|
|
24
24
|
puts "First title: #{parser.for_tag(:title).first}"
|
25
25
|
```
|
26
26
|
|
27
|
+
```ruby
|
28
|
+
# attributes are stored as a part of the Hash or String object they relate to
|
29
|
+
|
30
|
+
# author behaves like a String here, but also responds to .attributes
|
31
|
+
primary_authors = parser.for_tag(:author).select { |author| author.attributes['type'] == 'primary' }
|
32
|
+
puts "Primary authors: #{primary_authors.join(", ")}"
|
33
|
+
|
34
|
+
# item behaves like a Hash, but also responds to .attributes
|
35
|
+
favorite_items = parser.for_tag(:item).select { |item| item.attributes['favorite'] }
|
36
|
+
puts "First favorite title: #{favorite_items[0]['title']}"
|
37
|
+
```
|
38
|
+
|
27
39
|
Compatibility
|
28
40
|
-------------
|
29
41
|
Known compatible rubies:
|
data/Rakefile
CHANGED
data/lib/saxerator.rb
CHANGED
data/lib/saxerator/version.rb
CHANGED
data/lib/saxerator/xml_node.rb
CHANGED
@@ -15,20 +15,29 @@ module Saxerator
|
|
15
15
|
children << node
|
16
16
|
end
|
17
17
|
|
18
|
+
def to_s
|
19
|
+
string = StringWithAttributes.new(@text ? children.join : children.to_s)
|
20
|
+
string.attributes = attributes
|
21
|
+
string
|
22
|
+
end
|
23
|
+
|
18
24
|
def to_hash
|
19
25
|
if @text
|
20
|
-
|
26
|
+
to_s
|
21
27
|
else
|
22
|
-
out =
|
28
|
+
out = HashWithAttributes.new
|
29
|
+
out.attributes = attributes
|
30
|
+
|
23
31
|
@children.each do |child|
|
24
32
|
name = child.name
|
33
|
+
element = child.to_hash
|
25
34
|
if out[name]
|
26
35
|
if !out[name].is_a?(Array)
|
27
36
|
out[name] = [out[name]]
|
28
37
|
end
|
29
|
-
out[name] <<
|
38
|
+
out[name] << element
|
30
39
|
else
|
31
|
-
out[name] =
|
40
|
+
out[name] = element
|
32
41
|
end
|
33
42
|
end
|
34
43
|
out
|
data/saxerator.gemspec
CHANGED
@@ -9,9 +9,9 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.authors = ['Bradley Schaefer']
|
10
10
|
s.email = ['bradley.schaefer@gmail.com']
|
11
11
|
s.homepage = 'https://github.com/soulcutter/saxerator'
|
12
|
-
s.summary = 'A SAX-based XML parser for parsing large files into manageable chunks'
|
12
|
+
s.summary = 'A SAX-based XML-to-hash parser for parsing large files into manageable chunks'
|
13
13
|
s.description = <<-eos
|
14
|
-
Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
|
14
|
+
Saxerator is a SAX-based xml-to-hash parser designed for parsing very large files into manageable chunks. Rather than
|
15
15
|
dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
|
16
16
|
This approach is ideal for large xml files containing a collection of elements that you can process
|
17
17
|
independently.
|
@@ -28,7 +28,6 @@ Gem::Specification.new do |s|
|
|
28
28
|
'spec/**/*.*',
|
29
29
|
'benchmark/**/*.rb',
|
30
30
|
'Gemfile',
|
31
|
-
'Guardfile',
|
32
31
|
'Rakefile',
|
33
32
|
'.rvmrc',
|
34
33
|
'.gitignore'
|
@@ -41,9 +40,6 @@ Gem::Specification.new do |s|
|
|
41
40
|
|
42
41
|
s.add_development_dependency 'rake'
|
43
42
|
s.add_development_dependency 'rspec'
|
44
|
-
s.add_development_dependency 'guard'
|
45
|
-
s.add_development_dependency 'guard-bundler'
|
46
|
-
s.add_development_dependency 'guard-rspec'
|
47
43
|
s.add_development_dependency 'simplecov'
|
48
44
|
s.add_development_dependency 'ipsum'
|
49
45
|
end
|
@@ -11,15 +11,12 @@
|
|
11
11
|
<updated>2012-01-01T16:17:00-06:00</updated>
|
12
12
|
<link type="text/html" href="https://example.com/blog/how-to-eat-an-airplane" rel="alternate"/>
|
13
13
|
<title>How to eat an airplane</title>
|
14
|
-
<content type="html"><p>Airplanes are very large
|
15
|
-
|
16
|
-
<p>The key is to break it down into bite-sized portions and go slowly.</p>
|
17
|
-
</content>
|
14
|
+
<content type="html"><p>Airplanes are very large — this can present difficulty in digestion.</p></content>
|
18
15
|
<media:thumbnail url="http://www.gravatar.com/avatar/a9eb6ba22e482b71b266daadf9c9a080?s=80"/>
|
19
16
|
<author>
|
20
17
|
<name>Soulcutter</name>
|
21
18
|
</author>
|
22
|
-
<contributor>
|
19
|
+
<contributor type="primary">
|
23
20
|
<name>Jane Doe</name>
|
24
21
|
</contributor>
|
25
22
|
<contributor>
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
|
3
5
|
describe Saxerator do
|
@@ -55,6 +57,10 @@ describe Saxerator do
|
|
55
57
|
specify { subject['title'].should == 'How to eat an airplane' }
|
56
58
|
specify { subject['author'].should == {'name' => 'Soulcutter'} }
|
57
59
|
specify { subject['contributor'].should == [{'name' => 'Jane Doe'}, {'name' => 'Leviticus Alabaster'}] }
|
60
|
+
specify { subject['content'].should == "<p>Airplanes are very large — this can present difficulty in digestion.</p>"}
|
61
|
+
specify { subject['content'].attributes['type'].should == 'html' }
|
62
|
+
specify { subject['contributor'][0].attributes['type'].should == 'primary' }
|
63
|
+
specify { subject['contributor'][0]['name'].should == 'Jane Doe' }
|
58
64
|
end
|
59
65
|
end
|
60
66
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -59,54 +59,6 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: guard
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
|
-
requirements:
|
67
|
-
- - ! '>='
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '0'
|
70
|
-
type: :development
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
|
-
requirements:
|
75
|
-
- - ! '>='
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
version: '0'
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: guard-bundler
|
80
|
-
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
|
-
requirements:
|
83
|
-
- - ! '>='
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
|
-
type: :development
|
87
|
-
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
|
-
requirements:
|
91
|
-
- - ! '>='
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
version: '0'
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
|
-
name: guard-rspec
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - ! '>='
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version: '0'
|
102
|
-
type: :development
|
103
|
-
prerelease: false
|
104
|
-
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
|
-
requirements:
|
107
|
-
- - ! '>='
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: '0'
|
110
62
|
- !ruby/object:Gem::Dependency
|
111
63
|
name: simplecov
|
112
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,11 +91,11 @@ dependencies:
|
|
139
91
|
- - ! '>='
|
140
92
|
- !ruby/object:Gem::Version
|
141
93
|
version: '0'
|
142
|
-
description: ! " Saxerator is a SAX-based xml parser designed for parsing
|
143
|
-
large files into manageable chunks. Rather than\n dealing directly with
|
144
|
-
methods, Saxerator gives you Enumerable access to chunks of an xml
|
145
|
-
approach is ideal for large xml files containing a collection
|
146
|
-
can process\n independently.\n"
|
94
|
+
description: ! " Saxerator is a SAX-based xml-to-hash parser designed for parsing
|
95
|
+
very large files into manageable chunks. Rather than\n dealing directly with
|
96
|
+
SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml
|
97
|
+
document.\n This approach is ideal for large xml files containing a collection
|
98
|
+
of elements that you can process\n independently.\n"
|
147
99
|
email:
|
148
100
|
- bradley.schaefer@gmail.com
|
149
101
|
executables: []
|
@@ -154,6 +106,7 @@ files:
|
|
154
106
|
- README.md
|
155
107
|
- saxerator.gemspec
|
156
108
|
- lib/saxerator/configuration.rb
|
109
|
+
- lib/saxerator/hash_with_attributes.rb
|
157
110
|
- lib/saxerator/parser/nokogiri.rb
|
158
111
|
- lib/saxerator/string_with_attributes.rb
|
159
112
|
- lib/saxerator/version.rb
|
@@ -166,7 +119,6 @@ files:
|
|
166
119
|
- benchmark/benchmark.rb
|
167
120
|
- benchmark/generate_sample_file.rb
|
168
121
|
- Gemfile
|
169
|
-
- Guardfile
|
170
122
|
- Rakefile
|
171
123
|
- .rvmrc
|
172
124
|
- .gitignore
|
@@ -194,7 +146,7 @@ rubyforge_project: saxerator
|
|
194
146
|
rubygems_version: 1.8.21
|
195
147
|
signing_key:
|
196
148
|
specification_version: 3
|
197
|
-
summary: A SAX-based XML parser for parsing large files into manageable chunks
|
149
|
+
summary: A SAX-based XML-to-hash parser for parsing large files into manageable chunks
|
198
150
|
test_files:
|
199
151
|
- spec/fixtures/flat_blurbs.xml
|
200
152
|
- spec/fixtures/nested_elements.xml
|
data/Guardfile
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
# More info at https://github.com/guard/guard#readme
|
2
|
-
|
3
|
-
guard :bundler do
|
4
|
-
watch('Gemfile')
|
5
|
-
watch(/^saxerator\.gemspec$/)
|
6
|
-
end
|
7
|
-
|
8
|
-
guard :rspec, :cli => '--color' do
|
9
|
-
watch(%r{^spec/.+_spec\.rb$})
|
10
|
-
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
11
|
-
watch(%r{^spec/fixtures/.+\.xml$}) { :spec }
|
12
|
-
watch('spec/spec_helper.rb') { :spec }
|
13
|
-
end
|