saxerator 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Guardfile CHANGED
@@ -5,7 +5,7 @@ guard :bundler do
5
5
  watch(/^saxerator\.gemspec$/)
6
6
  end
7
7
 
8
- guard :rspec, :cli => '--color --format doc' do
8
+ guard :rspec, :cli => '--color' do
9
9
  watch(%r{^spec/.+_spec\.rb$})
10
10
  watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
11
11
  watch(%r{^spec/fixtures/.+\.xml$}) { :spec }
data/README.md CHANGED
@@ -3,31 +3,35 @@ Saxerator
3
3
 
4
4
  Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
5
5
  dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
6
- This approach is ideal for large xml files that represent a collection of elements.
6
+ This approach is ideal for large xml files containing a collection of elements that you can process
7
+ independently.
8
+
9
+ Each xml chunk is parsed into a JSON-like Ruby Hash structure for consumption.
7
10
 
8
11
  Examples
9
12
  --------
10
13
 
11
14
  ```ruby
12
- Saxerator.parser(File.new("rss.xml")).for_tag(:item).each do |item|
13
- puts "#{item['title']}: #{item['author']}"
15
+ parser = Saxerator.parser(File.new("rss.xml"))
16
+
17
+ parser.for_tag(:item).each do |item|
18
+ # where the xml contains <item><title>...</title><author>...</author></item>
19
+ # item will look like {'title' => '...', 'author' => '...'}
20
+ puts "#{item['title']}: #{item['author']}"
14
21
  end
22
+
23
+ # a String is returned here since the given element contains only character data
24
+ puts "First title: #{parser.for_tag(:title).first}"
15
25
  ```
16
26
 
17
27
  Compatibility
18
28
  -------------
19
- This library is known to work with the following rubies:
29
+ Known compatible rubies:
20
30
 
21
31
  * MRI 1.9.3-p125
22
32
  * MRI 1.9.2-p318
23
33
  * JRuby 1.6.7 (with JRUBY_OPTS=--1.9)
24
34
 
25
- Saxerator may work with other versions with support for Fiber.
26
-
27
- Known incompatible rubies:
28
-
29
- * MRI 1.9.2-p290 (Fiber segfaults)
30
-
31
35
  FAQ
32
36
  ---
33
37
  Why the name 'Saxerator'?
@@ -7,8 +7,7 @@ module Saxerator
7
7
  end
8
8
 
9
9
  def for_tag(tag)
10
- tag = tag.to_s
11
- Saxerator::Parser::Nokogiri.new(self, source, tag)
10
+ Saxerator::Parser::Nokogiri.new(self, source, tag.to_s)
12
11
  end
13
12
  end
14
13
  end
@@ -1,5 +1,4 @@
1
1
  require 'nokogiri'
2
- require 'fiber'
3
2
 
4
3
  module Saxerator
5
4
  module Parser
@@ -13,27 +12,23 @@ module Saxerator
13
12
  end
14
13
 
15
14
  def each(&block)
16
- begin
17
- fiber = Fiber.new do
18
- document = Document.new(@config, @tag)
19
- parser = ::Nokogiri::XML::SAX::Parser.new document
20
- parser.parse(@source)
21
- end
22
- while fiber.alive? do
23
- result = fiber.resume
24
- yield(result) unless result.nil?
25
- end
26
- rescue FiberError
27
- end
15
+ document = Document.new(@config, @tag, block)
16
+ parser = ::Nokogiri::XML::SAX::Parser.new document
17
+
18
+ # Always have to start at the beginning of a File
19
+ @source.rewind if(@source.is_a?(File))
20
+
21
+ parser.parse(@source)
28
22
  end
29
23
 
30
24
  class Document < ::Nokogiri::XML::SAX::Document
31
25
  attr_accessor :stack
32
26
 
33
- def initialize(config, tag)
27
+ def initialize(config, tag, block)
34
28
  @config = config
35
29
  @tag = tag
36
30
  @stack = []
31
+ @block = block
37
32
  end
38
33
 
39
34
  def start_element(name, attrs = [])
@@ -47,7 +42,7 @@ module Saxerator
47
42
  last = stack.pop
48
43
  stack.last.add_node last
49
44
  elsif stack.size == 1
50
- Fiber.yield(stack.pop.to_hash)
45
+ @block.yield(stack.pop.to_hash)
51
46
  end
52
47
  end
53
48
 
@@ -1,3 +1,3 @@
1
1
  module Saxerator
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/saxerator.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  $:.push File.expand_path('../lib', __FILE__)
3
3
  require 'saxerator/version'
4
- require 'rubygems/package_task'
4
+ require 'rake' # for FileList
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'saxerator'
@@ -10,9 +10,13 @@ Gem::Specification.new do |s|
10
10
  s.email = ['bradley.schaefer@gmail.com']
11
11
  s.homepage = 'https://github.com/soulcutter/saxerator'
12
12
  s.summary = 'A SAX-based XML parser for parsing large files into manageable chunks'
13
- s.description = 'A SAX-based XML parser for parsing large files into manageable chunks'
14
-
15
- s.required_ruby_version = '>= 1.9.2'
13
+ s.description = <<-eos
14
+ Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
15
+ dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
16
+ This approach is ideal for large xml files containing a collection of elements that you can process
17
+ independently.
18
+ eos
19
+ s.license = 'MIT'
16
20
 
17
21
  s.rubyforge_project = 'saxerator'
18
22
 
@@ -35,6 +39,7 @@ Gem::Specification.new do |s|
35
39
 
36
40
  s.add_runtime_dependency 'nokogiri'
37
41
 
42
+ s.add_development_dependency 'rake'
38
43
  s.add_development_dependency 'rspec'
39
44
  s.add_development_dependency 'guard'
40
45
  s.add_development_dependency 'guard-bundler'
@@ -38,6 +38,14 @@ describe Saxerator do
38
38
  subject.for_tag(:blurb).each { |x| results << x }
39
39
  results.should == ['one', 'two', 'three']
40
40
  end
41
+
42
+ it "should allow multiple operations on the same parser" do
43
+ # This exposes a bug where if a File is not reset only the first
44
+ # Enumerable method works as expected
45
+ subject.for_tag(:blurb).first.should == 'one'
46
+ subject.for_tag(:blurb).first.should == 'one'
47
+ end
48
+
41
49
  end
42
50
 
43
51
  context "with a file with nested elements" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxerator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-07 00:00:00.000000000 Z
12
+ date: 2012-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70348029624860 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,31 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70348029624860
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
25
46
  - !ruby/object:Gem::Dependency
26
47
  name: rspec
27
- requirement: &70348029624300 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
28
49
  none: false
29
50
  requirements:
30
51
  - - ! '>='
@@ -32,10 +53,15 @@ dependencies:
32
53
  version: '0'
33
54
  type: :development
34
55
  prerelease: false
35
- version_requirements: *70348029624300
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
36
62
  - !ruby/object:Gem::Dependency
37
63
  name: guard
38
- requirement: &70348029623840 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
39
65
  none: false
40
66
  requirements:
41
67
  - - ! '>='
@@ -43,10 +69,15 @@ dependencies:
43
69
  version: '0'
44
70
  type: :development
45
71
  prerelease: false
46
- version_requirements: *70348029623840
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
47
78
  - !ruby/object:Gem::Dependency
48
79
  name: guard-bundler
49
- requirement: &70348029623320 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
50
81
  none: false
51
82
  requirements:
52
83
  - - ! '>='
@@ -54,10 +85,15 @@ dependencies:
54
85
  version: '0'
55
86
  type: :development
56
87
  prerelease: false
57
- version_requirements: *70348029623320
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
58
94
  - !ruby/object:Gem::Dependency
59
95
  name: guard-rspec
60
- requirement: &70348029622860 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
61
97
  none: false
62
98
  requirements:
63
99
  - - ! '>='
@@ -65,10 +101,15 @@ dependencies:
65
101
  version: '0'
66
102
  type: :development
67
103
  prerelease: false
68
- version_requirements: *70348029622860
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
69
110
  - !ruby/object:Gem::Dependency
70
111
  name: simplecov
71
- requirement: &70348029622300 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
72
113
  none: false
73
114
  requirements:
74
115
  - - ! '>='
@@ -76,10 +117,15 @@ dependencies:
76
117
  version: '0'
77
118
  type: :development
78
119
  prerelease: false
79
- version_requirements: *70348029622300
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
80
126
  - !ruby/object:Gem::Dependency
81
127
  name: ipsum
82
- requirement: &70348029621880 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
83
129
  none: false
84
130
  requirements:
85
131
  - - ! '>='
@@ -87,8 +133,17 @@ dependencies:
87
133
  version: '0'
88
134
  type: :development
89
135
  prerelease: false
90
- version_requirements: *70348029621880
91
- description: A SAX-based XML parser for parsing large files into manageable chunks
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description: ! " Saxerator is a SAX-based xml parser designed for parsing very
143
+ large files into manageable chunks. Rather than\n dealing directly with SAX callback
144
+ methods, Saxerator gives you Enumerable access to chunks of an xml document.\n This
145
+ approach is ideal for large xml files containing a collection of elements that you
146
+ can process\n independently.\n"
92
147
  email:
93
148
  - bradley.schaefer@gmail.com
94
149
  executables: []
@@ -116,7 +171,8 @@ files:
116
171
  - .rvmrc
117
172
  - .gitignore
118
173
  homepage: https://github.com/soulcutter/saxerator
119
- licenses: []
174
+ licenses:
175
+ - MIT
120
176
  post_install_message:
121
177
  rdoc_options: []
122
178
  require_paths:
@@ -126,7 +182,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
182
  requirements:
127
183
  - - ! '>='
128
184
  - !ruby/object:Gem::Version
129
- version: 1.9.2
185
+ version: '0'
130
186
  required_rubygems_version: !ruby/object:Gem::Requirement
131
187
  none: false
132
188
  requirements:
@@ -135,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
191
  version: '0'
136
192
  requirements: []
137
193
  rubyforge_project: saxerator
138
- rubygems_version: 1.8.11
194
+ rubygems_version: 1.8.21
139
195
  signing_key:
140
196
  specification_version: 3
141
197
  summary: A SAX-based XML parser for parsing large files into manageable chunks