saxerator 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Guardfile +1 -1
- data/README.md +14 -10
- data/lib/saxerator/configuration.rb +1 -2
- data/lib/saxerator/parser/nokogiri.rb +10 -15
- data/lib/saxerator/version.rb +1 -1
- data/saxerator.gemspec +9 -4
- data/spec/lib/saxerator_spec.rb +8 -0
- metadata +76 -20
data/Guardfile
CHANGED
@@ -5,7 +5,7 @@ guard :bundler do
|
|
5
5
|
watch(/^saxerator\.gemspec$/)
|
6
6
|
end
|
7
7
|
|
8
|
-
guard :rspec, :cli => '--color
|
8
|
+
guard :rspec, :cli => '--color' do
|
9
9
|
watch(%r{^spec/.+_spec\.rb$})
|
10
10
|
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
11
11
|
watch(%r{^spec/fixtures/.+\.xml$}) { :spec }
|
data/README.md
CHANGED
@@ -3,31 +3,35 @@ Saxerator
|
|
3
3
|
|
4
4
|
Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
|
5
5
|
dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
|
6
|
-
This approach is ideal for large xml files
|
6
|
+
This approach is ideal for large xml files containing a collection of elements that you can process
|
7
|
+
independently.
|
8
|
+
|
9
|
+
Each xml chunk is parsed into a JSON-like Ruby Hash structure for consumption.
|
7
10
|
|
8
11
|
Examples
|
9
12
|
--------
|
10
13
|
|
11
14
|
```ruby
|
12
|
-
Saxerator.parser(File.new("rss.xml"))
|
13
|
-
|
15
|
+
parser = Saxerator.parser(File.new("rss.xml"))
|
16
|
+
|
17
|
+
parser.for_tag(:item).each do |item|
|
18
|
+
# where the xml contains <item><title>...</title><author>...</author></item>
|
19
|
+
# item will look like {'title' => '...', 'author' => '...'}
|
20
|
+
puts "#{item['title']}: #{item['author']}"
|
14
21
|
end
|
22
|
+
|
23
|
+
# a String is returned here since the given element contains only character data
|
24
|
+
puts "First title: #{parser.for_tag(:title).first}"
|
15
25
|
```
|
16
26
|
|
17
27
|
Compatibility
|
18
28
|
-------------
|
19
|
-
|
29
|
+
Known compatible rubies:
|
20
30
|
|
21
31
|
* MRI 1.9.3-p125
|
22
32
|
* MRI 1.9.2-p318
|
23
33
|
* JRuby 1.6.7 (with JRUBY_OPTS=--1.9)
|
24
34
|
|
25
|
-
Saxerator may work with other versions with support for Fiber.
|
26
|
-
|
27
|
-
Known incompatible rubies:
|
28
|
-
|
29
|
-
* MRI 1.9.2-p290 (Fiber segfaults)
|
30
|
-
|
31
35
|
FAQ
|
32
36
|
---
|
33
37
|
Why the name 'Saxerator'?
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'fiber'
|
3
2
|
|
4
3
|
module Saxerator
|
5
4
|
module Parser
|
@@ -13,27 +12,23 @@ module Saxerator
|
|
13
12
|
end
|
14
13
|
|
15
14
|
def each(&block)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
result = fiber.resume
|
24
|
-
yield(result) unless result.nil?
|
25
|
-
end
|
26
|
-
rescue FiberError
|
27
|
-
end
|
15
|
+
document = Document.new(@config, @tag, block)
|
16
|
+
parser = ::Nokogiri::XML::SAX::Parser.new document
|
17
|
+
|
18
|
+
# Always have to start at the beginning of a File
|
19
|
+
@source.rewind if(@source.is_a?(File))
|
20
|
+
|
21
|
+
parser.parse(@source)
|
28
22
|
end
|
29
23
|
|
30
24
|
class Document < ::Nokogiri::XML::SAX::Document
|
31
25
|
attr_accessor :stack
|
32
26
|
|
33
|
-
def initialize(config, tag)
|
27
|
+
def initialize(config, tag, block)
|
34
28
|
@config = config
|
35
29
|
@tag = tag
|
36
30
|
@stack = []
|
31
|
+
@block = block
|
37
32
|
end
|
38
33
|
|
39
34
|
def start_element(name, attrs = [])
|
@@ -47,7 +42,7 @@ module Saxerator
|
|
47
42
|
last = stack.pop
|
48
43
|
stack.last.add_node last
|
49
44
|
elsif stack.size == 1
|
50
|
-
|
45
|
+
@block.yield(stack.pop.to_hash)
|
51
46
|
end
|
52
47
|
end
|
53
48
|
|
data/lib/saxerator/version.rb
CHANGED
data/saxerator.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
$:.push File.expand_path('../lib', __FILE__)
|
3
3
|
require 'saxerator/version'
|
4
|
-
require '
|
4
|
+
require 'rake' # for FileList
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'saxerator'
|
@@ -10,9 +10,13 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.email = ['bradley.schaefer@gmail.com']
|
11
11
|
s.homepage = 'https://github.com/soulcutter/saxerator'
|
12
12
|
s.summary = 'A SAX-based XML parser for parsing large files into manageable chunks'
|
13
|
-
s.description =
|
14
|
-
|
15
|
-
|
13
|
+
s.description = <<-eos
|
14
|
+
Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
|
15
|
+
dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
|
16
|
+
This approach is ideal for large xml files containing a collection of elements that you can process
|
17
|
+
independently.
|
18
|
+
eos
|
19
|
+
s.license = 'MIT'
|
16
20
|
|
17
21
|
s.rubyforge_project = 'saxerator'
|
18
22
|
|
@@ -35,6 +39,7 @@ Gem::Specification.new do |s|
|
|
35
39
|
|
36
40
|
s.add_runtime_dependency 'nokogiri'
|
37
41
|
|
42
|
+
s.add_development_dependency 'rake'
|
38
43
|
s.add_development_dependency 'rspec'
|
39
44
|
s.add_development_dependency 'guard'
|
40
45
|
s.add_development_dependency 'guard-bundler'
|
data/spec/lib/saxerator_spec.rb
CHANGED
@@ -38,6 +38,14 @@ describe Saxerator do
|
|
38
38
|
subject.for_tag(:blurb).each { |x| results << x }
|
39
39
|
results.should == ['one', 'two', 'three']
|
40
40
|
end
|
41
|
+
|
42
|
+
it "should allow multiple operations on the same parser" do
|
43
|
+
# This exposes a bug where if a File is not reset only the first
|
44
|
+
# Enumerable method works as expected
|
45
|
+
subject.for_tag(:blurb).first.should == 'one'
|
46
|
+
subject.for_tag(:blurb).first.should == 'one'
|
47
|
+
end
|
48
|
+
|
41
49
|
end
|
42
50
|
|
43
51
|
context "with a file with nested elements" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxerator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,31 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
25
46
|
- !ruby/object:Gem::Dependency
|
26
47
|
name: rspec
|
27
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
28
49
|
none: false
|
29
50
|
requirements:
|
30
51
|
- - ! '>='
|
@@ -32,10 +53,15 @@ dependencies:
|
|
32
53
|
version: '0'
|
33
54
|
type: :development
|
34
55
|
prerelease: false
|
35
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
36
62
|
- !ruby/object:Gem::Dependency
|
37
63
|
name: guard
|
38
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
39
65
|
none: false
|
40
66
|
requirements:
|
41
67
|
- - ! '>='
|
@@ -43,10 +69,15 @@ dependencies:
|
|
43
69
|
version: '0'
|
44
70
|
type: :development
|
45
71
|
prerelease: false
|
46
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
47
78
|
- !ruby/object:Gem::Dependency
|
48
79
|
name: guard-bundler
|
49
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
50
81
|
none: false
|
51
82
|
requirements:
|
52
83
|
- - ! '>='
|
@@ -54,10 +85,15 @@ dependencies:
|
|
54
85
|
version: '0'
|
55
86
|
type: :development
|
56
87
|
prerelease: false
|
57
|
-
version_requirements:
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
58
94
|
- !ruby/object:Gem::Dependency
|
59
95
|
name: guard-rspec
|
60
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
61
97
|
none: false
|
62
98
|
requirements:
|
63
99
|
- - ! '>='
|
@@ -65,10 +101,15 @@ dependencies:
|
|
65
101
|
version: '0'
|
66
102
|
type: :development
|
67
103
|
prerelease: false
|
68
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
69
110
|
- !ruby/object:Gem::Dependency
|
70
111
|
name: simplecov
|
71
|
-
requirement:
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
72
113
|
none: false
|
73
114
|
requirements:
|
74
115
|
- - ! '>='
|
@@ -76,10 +117,15 @@ dependencies:
|
|
76
117
|
version: '0'
|
77
118
|
type: :development
|
78
119
|
prerelease: false
|
79
|
-
version_requirements:
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
80
126
|
- !ruby/object:Gem::Dependency
|
81
127
|
name: ipsum
|
82
|
-
requirement:
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
83
129
|
none: false
|
84
130
|
requirements:
|
85
131
|
- - ! '>='
|
@@ -87,8 +133,17 @@ dependencies:
|
|
87
133
|
version: '0'
|
88
134
|
type: :development
|
89
135
|
prerelease: false
|
90
|
-
version_requirements:
|
91
|
-
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
description: ! " Saxerator is a SAX-based xml parser designed for parsing very
|
143
|
+
large files into manageable chunks. Rather than\n dealing directly with SAX callback
|
144
|
+
methods, Saxerator gives you Enumerable access to chunks of an xml document.\n This
|
145
|
+
approach is ideal for large xml files containing a collection of elements that you
|
146
|
+
can process\n independently.\n"
|
92
147
|
email:
|
93
148
|
- bradley.schaefer@gmail.com
|
94
149
|
executables: []
|
@@ -116,7 +171,8 @@ files:
|
|
116
171
|
- .rvmrc
|
117
172
|
- .gitignore
|
118
173
|
homepage: https://github.com/soulcutter/saxerator
|
119
|
-
licenses:
|
174
|
+
licenses:
|
175
|
+
- MIT
|
120
176
|
post_install_message:
|
121
177
|
rdoc_options: []
|
122
178
|
require_paths:
|
@@ -126,7 +182,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
126
182
|
requirements:
|
127
183
|
- - ! '>='
|
128
184
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
185
|
+
version: '0'
|
130
186
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
187
|
none: false
|
132
188
|
requirements:
|
@@ -135,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
191
|
version: '0'
|
136
192
|
requirements: []
|
137
193
|
rubyforge_project: saxerator
|
138
|
-
rubygems_version: 1.8.
|
194
|
+
rubygems_version: 1.8.21
|
139
195
|
signing_key:
|
140
196
|
specification_version: 3
|
141
197
|
summary: A SAX-based XML parser for parsing large files into manageable chunks
|