saxerator 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Guardfile CHANGED
@@ -5,7 +5,7 @@ guard :bundler do
5
5
  watch(/^saxerator\.gemspec$/)
6
6
  end
7
7
 
8
- guard :rspec, :cli => '--color --format doc' do
8
+ guard :rspec, :cli => '--color' do
9
9
  watch(%r{^spec/.+_spec\.rb$})
10
10
  watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
11
11
  watch(%r{^spec/fixtures/.+\.xml$}) { :spec }
data/README.md CHANGED
@@ -3,31 +3,35 @@ Saxerator
3
3
 
4
4
  Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
5
5
  dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
6
- This approach is ideal for large xml files that represent a collection of elements.
6
+ This approach is ideal for large xml files containing a collection of elements that you can process
7
+ independently.
8
+
9
+ Each xml chunk is parsed into a JSON-like Ruby Hash structure for consumption.
7
10
 
8
11
  Examples
9
12
  --------
10
13
 
11
14
  ```ruby
12
- Saxerator.parser(File.new("rss.xml")).for_tag(:item).each do |item|
13
- puts "#{item['title']}: #{item['author']}"
15
+ parser = Saxerator.parser(File.new("rss.xml"))
16
+
17
+ parser.for_tag(:item).each do |item|
18
+ # where the xml contains <item><title>...</title><author>...</author></item>
19
+ # item will look like {'title' => '...', 'author' => '...'}
20
+ puts "#{item['title']}: #{item['author']}"
14
21
  end
22
+
23
+ # a String is returned here since the given element contains only character data
24
+ puts "First title: #{parser.for_tag(:title).first}"
15
25
  ```
16
26
 
17
27
  Compatibility
18
28
  -------------
19
- This library is known to work with the following rubies:
29
+ Known compatible rubies:
20
30
 
21
31
  * MRI 1.9.3-p125
22
32
  * MRI 1.9.2-p318
23
33
  * JRuby 1.6.7 (with JRUBY_OPTS=--1.9)
24
34
 
25
- Saxerator may work with other versions with support for Fiber.
26
-
27
- Known incompatible rubies:
28
-
29
- * MRI 1.9.2-p290 (Fiber segfaults)
30
-
31
35
  FAQ
32
36
  ---
33
37
  Why the name 'Saxerator'?
@@ -7,8 +7,7 @@ module Saxerator
7
7
  end
8
8
 
9
9
  def for_tag(tag)
10
- tag = tag.to_s
11
- Saxerator::Parser::Nokogiri.new(self, source, tag)
10
+ Saxerator::Parser::Nokogiri.new(self, source, tag.to_s)
12
11
  end
13
12
  end
14
13
  end
@@ -1,5 +1,4 @@
1
1
  require 'nokogiri'
2
- require 'fiber'
3
2
 
4
3
  module Saxerator
5
4
  module Parser
@@ -13,27 +12,23 @@ module Saxerator
13
12
  end
14
13
 
15
14
  def each(&block)
16
- begin
17
- fiber = Fiber.new do
18
- document = Document.new(@config, @tag)
19
- parser = ::Nokogiri::XML::SAX::Parser.new document
20
- parser.parse(@source)
21
- end
22
- while fiber.alive? do
23
- result = fiber.resume
24
- yield(result) unless result.nil?
25
- end
26
- rescue FiberError
27
- end
15
+ document = Document.new(@config, @tag, block)
16
+ parser = ::Nokogiri::XML::SAX::Parser.new document
17
+
18
+ # Always have to start at the beginning of a File
19
+ @source.rewind if(@source.is_a?(File))
20
+
21
+ parser.parse(@source)
28
22
  end
29
23
 
30
24
  class Document < ::Nokogiri::XML::SAX::Document
31
25
  attr_accessor :stack
32
26
 
33
- def initialize(config, tag)
27
+ def initialize(config, tag, block)
34
28
  @config = config
35
29
  @tag = tag
36
30
  @stack = []
31
+ @block = block
37
32
  end
38
33
 
39
34
  def start_element(name, attrs = [])
@@ -47,7 +42,7 @@ module Saxerator
47
42
  last = stack.pop
48
43
  stack.last.add_node last
49
44
  elsif stack.size == 1
50
- Fiber.yield(stack.pop.to_hash)
45
+ @block.yield(stack.pop.to_hash)
51
46
  end
52
47
  end
53
48
 
@@ -1,3 +1,3 @@
1
1
  module Saxerator
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/saxerator.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  $:.push File.expand_path('../lib', __FILE__)
3
3
  require 'saxerator/version'
4
- require 'rubygems/package_task'
4
+ require 'rake' # for FileList
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'saxerator'
@@ -10,9 +10,13 @@ Gem::Specification.new do |s|
10
10
  s.email = ['bradley.schaefer@gmail.com']
11
11
  s.homepage = 'https://github.com/soulcutter/saxerator'
12
12
  s.summary = 'A SAX-based XML parser for parsing large files into manageable chunks'
13
- s.description = 'A SAX-based XML parser for parsing large files into manageable chunks'
14
-
15
- s.required_ruby_version = '>= 1.9.2'
13
+ s.description = <<-eos
14
+ Saxerator is a SAX-based xml parser designed for parsing very large files into manageable chunks. Rather than
15
+ dealing directly with SAX callback methods, Saxerator gives you Enumerable access to chunks of an xml document.
16
+ This approach is ideal for large xml files containing a collection of elements that you can process
17
+ independently.
18
+ eos
19
+ s.license = 'MIT'
16
20
 
17
21
  s.rubyforge_project = 'saxerator'
18
22
 
@@ -35,6 +39,7 @@ Gem::Specification.new do |s|
35
39
 
36
40
  s.add_runtime_dependency 'nokogiri'
37
41
 
42
+ s.add_development_dependency 'rake'
38
43
  s.add_development_dependency 'rspec'
39
44
  s.add_development_dependency 'guard'
40
45
  s.add_development_dependency 'guard-bundler'
@@ -38,6 +38,14 @@ describe Saxerator do
38
38
  subject.for_tag(:blurb).each { |x| results << x }
39
39
  results.should == ['one', 'two', 'three']
40
40
  end
41
+
42
+ it "should allow multiple operations on the same parser" do
43
+ # This exposes a bug where if a File is not reset only the first
44
+ # Enumerable method works as expected
45
+ subject.for_tag(:blurb).first.should == 'one'
46
+ subject.for_tag(:blurb).first.should == 'one'
47
+ end
48
+
41
49
  end
42
50
 
43
51
  context "with a file with nested elements" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxerator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-07 00:00:00.000000000 Z
12
+ date: 2012-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70348029624860 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,31 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70348029624860
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
25
46
  - !ruby/object:Gem::Dependency
26
47
  name: rspec
27
- requirement: &70348029624300 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
28
49
  none: false
29
50
  requirements:
30
51
  - - ! '>='
@@ -32,10 +53,15 @@ dependencies:
32
53
  version: '0'
33
54
  type: :development
34
55
  prerelease: false
35
- version_requirements: *70348029624300
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
36
62
  - !ruby/object:Gem::Dependency
37
63
  name: guard
38
- requirement: &70348029623840 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
39
65
  none: false
40
66
  requirements:
41
67
  - - ! '>='
@@ -43,10 +69,15 @@ dependencies:
43
69
  version: '0'
44
70
  type: :development
45
71
  prerelease: false
46
- version_requirements: *70348029623840
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
47
78
  - !ruby/object:Gem::Dependency
48
79
  name: guard-bundler
49
- requirement: &70348029623320 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
50
81
  none: false
51
82
  requirements:
52
83
  - - ! '>='
@@ -54,10 +85,15 @@ dependencies:
54
85
  version: '0'
55
86
  type: :development
56
87
  prerelease: false
57
- version_requirements: *70348029623320
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
58
94
  - !ruby/object:Gem::Dependency
59
95
  name: guard-rspec
60
- requirement: &70348029622860 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
61
97
  none: false
62
98
  requirements:
63
99
  - - ! '>='
@@ -65,10 +101,15 @@ dependencies:
65
101
  version: '0'
66
102
  type: :development
67
103
  prerelease: false
68
- version_requirements: *70348029622860
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
69
110
  - !ruby/object:Gem::Dependency
70
111
  name: simplecov
71
- requirement: &70348029622300 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
72
113
  none: false
73
114
  requirements:
74
115
  - - ! '>='
@@ -76,10 +117,15 @@ dependencies:
76
117
  version: '0'
77
118
  type: :development
78
119
  prerelease: false
79
- version_requirements: *70348029622300
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
80
126
  - !ruby/object:Gem::Dependency
81
127
  name: ipsum
82
- requirement: &70348029621880 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
83
129
  none: false
84
130
  requirements:
85
131
  - - ! '>='
@@ -87,8 +133,17 @@ dependencies:
87
133
  version: '0'
88
134
  type: :development
89
135
  prerelease: false
90
- version_requirements: *70348029621880
91
- description: A SAX-based XML parser for parsing large files into manageable chunks
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description: ! " Saxerator is a SAX-based xml parser designed for parsing very
143
+ large files into manageable chunks. Rather than\n dealing directly with SAX callback
144
+ methods, Saxerator gives you Enumerable access to chunks of an xml document.\n This
145
+ approach is ideal for large xml files containing a collection of elements that you
146
+ can process\n independently.\n"
92
147
  email:
93
148
  - bradley.schaefer@gmail.com
94
149
  executables: []
@@ -116,7 +171,8 @@ files:
116
171
  - .rvmrc
117
172
  - .gitignore
118
173
  homepage: https://github.com/soulcutter/saxerator
119
- licenses: []
174
+ licenses:
175
+ - MIT
120
176
  post_install_message:
121
177
  rdoc_options: []
122
178
  require_paths:
@@ -126,7 +182,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
182
  requirements:
127
183
  - - ! '>='
128
184
  - !ruby/object:Gem::Version
129
- version: 1.9.2
185
+ version: '0'
130
186
  required_rubygems_version: !ruby/object:Gem::Requirement
131
187
  none: false
132
188
  requirements:
@@ -135,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
191
  version: '0'
136
192
  requirements: []
137
193
  rubyforge_project: saxerator
138
- rubygems_version: 1.8.11
194
+ rubygems_version: 1.8.21
139
195
  signing_key:
140
196
  specification_version: 3
141
197
  summary: A SAX-based XML parser for parsing large files into manageable chunks