saxony 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (8) hide show
  1. data/CHANGES.txt +5 -0
  2. data/LICENSE.txt +19 -0
  3. data/README.md +24 -0
  4. data/Rakefile +55 -0
  5. data/Rudyfile +227 -0
  6. data/lib/saxony.rb +122 -0
  7. data/saxony.gemspec +33 -0
  8. metadata +76 -0
data/CHANGES.txt ADDED
@@ -0,0 +1,5 @@
1
+ SAXONY, CHANGES
2
+
3
+ #### 0.1.0 (2010-01-31) ###########################
4
+
5
+ * Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2010 Solutious Inc, Delano Mandelbaum
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ ## Saxony - 0.1 ##
2
+
3
+ **Parse gigantic XML files with pleasure and ease.**
4
+
5
+ ## Example ##
6
+
7
+ sax = Saxony.new :SomeObject, 1000
8
+ sax.parse 'path/2/huge.xml' do
9
+ total_count # => Total number of SomeObjects processed
10
+ doc # => Nokogiri object for 1000 SomeObject
11
+ elapsed_time # => time processing current batch
12
+ end
13
+
14
+ ## Credits
15
+
16
+ * Delano Mandelbaum (http://solutious.com)
17
+
18
+
19
+ ## Thanks
20
+
21
+
22
+ ## License
23
+
24
+ See LICENSE.txt
data/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+
2
+ require 'rake/clean'
3
+ require 'rake/gempackagetask'
4
+ require 'hanna/rdoctask'
5
+ require 'rake/testtask'
6
+ require 'shoulda/tasks'
7
+ require 'rake/runtest'
8
+ require 'fileutils'
9
+ include FileUtils
10
+
11
+ task :default => :test
12
+
13
+
14
+ # PACKAGE =============================================================
15
+
16
+ name = "saxony"
17
+ load "#{name}.gemspec"
18
+
19
+ version = @spec.version
20
+
21
+ Rake::GemPackageTask.new(@spec) do |p|
22
+ p.need_tar = true if RUBY_PLATFORM !~ /mswin/
23
+ end
24
+
25
+ task :test do
26
+ puts "Success!"
27
+ end
28
+
29
+ task :install => [ :rdoc, :package ] do
30
+ sh %{sudo gem install pkg/#{name}-#{version}.gem}
31
+ end
32
+
33
+ task :uninstall => [ :clean ] do
34
+ sh %{sudo gem uninstall #{name}}
35
+ end
36
+
37
+
38
+
39
+ Rake::RDocTask.new do |t|
40
+ t.rdoc_dir = 'doc'
41
+ t.title = @spec.summary
42
+ t.options << '--line-numbers' << '-A cattr_accessor=object'
43
+ t.options << '--charset' << 'utf-8'
44
+ t.rdoc_files.include('LICENSE.txt')
45
+ t.rdoc_files.include('README.md')
46
+ t.rdoc_files.include('CHANGES.txt')
47
+ #t.rdoc_files.include('Rudyfile') # why is the formatting f'd?
48
+ #t.rdoc_files.include('bin/*')
49
+ t.rdoc_files.include('lib/**/*.rb')
50
+ end
51
+
52
+ CLEAN.include [ 'pkg', '*.gem', '.config', 'doc', 'coverage*' ]
53
+
54
+
55
+
data/Rudyfile ADDED
@@ -0,0 +1,227 @@
1
+ require 'stella'
2
+
3
+ machines do
4
+
5
+ region :'us-east-1' do
6
+ ami 'ami-212ccf48' # Stella Debian 5.0, 32-bit (US)
7
+ end
8
+ region :'eu-west-1' do
9
+ ami 'ami-6ecde51a' # Alestic Debian 5.0, 32-bit (EU)
10
+ end
11
+
12
+ env :stage do
13
+
14
+ role :app do
15
+ positions 2
16
+ user :root
17
+ size 'm1.small'
18
+ end
19
+
20
+ role :gen do
21
+ user :root
22
+ size 'm1.large'
23
+ ami 'ami-7133d018'
24
+ end
25
+
26
+ role :demo do
27
+ user :root
28
+ size 'm1.small'
29
+ end
30
+
31
+ end
32
+
33
+
34
+ end
35
+
36
+
37
+
38
+ commands do
39
+ allow :apt_get, "apt-get", :y, :q
40
+ allow :gem_install, "/usr/bin/gem", "install", :n, '/usr/bin', :y, :V, "--no-rdoc", "--no-ri"
41
+ allow :gem_sources, "/usr/bin/gem", "sources"
42
+ allow :gem_uninstall, "/usr/bin/gem", "uninstall", :V
43
+ allow :update_rubygems
44
+ allow :rake
45
+ allow :thin
46
+ allow :stella
47
+ allow :rm
48
+ allow :ulimit
49
+ allow :ruby19, "/usr/local/bin/ruby"
50
+ allow :gem19_install, "/usr/local/bin/gem", "install"
51
+ allow :rackup_path do
52
+ v = [Stella::VERSION::MAJOR, Stella::VERSION::MINOR, Stella::VERSION::TINY].join('.')
53
+ "/usr/lib/ruby/gems/1.8/gems/stella-#{v}/support/sample_webapp/config.ru"
54
+ end
55
+ end
56
+
57
+ routines do
58
+
59
+ role :app do
60
+
61
+ # rudy -r app -v start
62
+ start do
63
+ remote do
64
+ #ulimit :n, '30000'
65
+ #ulimit :n
66
+ rm :f, 'thin.log'
67
+ mkdir :p, 'stats'
68
+ thin :d, :l, 'thin.log', :p, 3114, :R, rackup_path, '--stats', './stats', '--max-conns', 8192, 'start'
69
+ end
70
+ end
71
+
72
+ # rudy -r app -v stop
73
+ stop do
74
+ remote do
75
+ thin :R, rackup_path, 'stop'
76
+ sleep 1
77
+ ps 'ux'
78
+ end
79
+ end
80
+
81
+ end
82
+
83
+
84
+ # rudy -v -r gen verify ip-10-251-27-245.ec2.internal:3114
85
+ verify do
86
+ remote do |arg|
87
+ file_upload 'examples/essentials/plan.rb'
88
+ file_upload 'examples/essentials/search_terms.txt'
89
+ file_upload 'examples/essentials/logo.png'
90
+ stella :v, 'verify', :p, 'plan.rb', "#{arg.first}"
91
+ end
92
+ end
93
+
94
+ # rudy -v -r gen generate ip-10-251-27-245.ec2.internal:3114
95
+ generate do
96
+ remote do |arg|
97
+ file_upload 'examples/essentials/plan.rb'
98
+ file_upload 'examples/essentials/search_terms.txt'
99
+ file_upload 'examples/essentials/logo.png'
100
+ stella :v, 'generate', :p, 'plan.rb', :c, 1, :d, '1m', :W, "#{arg.first}"
101
+ end
102
+ end
103
+
104
+
105
+ setup do
106
+ after :sysupdate, :installdeps, :install_ruby19
107
+ end
108
+
109
+ shutdown do
110
+ end
111
+
112
+ reboot do
113
+ end
114
+
115
+ install_netperf do
116
+ #ftp://ftp.netperf.org/netperf/netperf-2.4.5.tar.bz2
117
+ end
118
+
119
+ install_rubyforge do
120
+ remote :root do
121
+ gem19_install 'stella', :V
122
+ gem_install 'stella', :V
123
+ end
124
+ end
125
+
126
+ install_github do
127
+ remote :root do
128
+ gem_sources :a, "http://gems.github.com"
129
+ gem_install 'solutious-stella'
130
+ end
131
+ end
132
+
133
+ package_gem do
134
+ local do
135
+ rm :r, :f, 'pkg'
136
+ rake 'package'
137
+ end
138
+ end
139
+
140
+ remove_rudy do
141
+ remote :root do
142
+ gem_uninstall 'stella'
143
+ rm :r, :f, '.stella'
144
+ end
145
+ end
146
+
147
+ install_gem do
148
+ before :package_gem
149
+ remote :root do
150
+ file_upload "pkg/stella-#{Stella::VERSION}.gem", "/tmp/"
151
+ gem_install "/tmp/stella-#{Stella::VERSION}.gem"
152
+ end
153
+
154
+ end
155
+
156
+ install_zlib do
157
+ remote do
158
+ wget "http://www.zlib.net/zlib-1.2.3.tar.gz"
159
+ tar :x, :z, :f, "zlib-1.2.3.tar.gz"
160
+ cd "zlib-1.2.3"
161
+ configure '--prefix=/usr/local'
162
+ make
163
+ make "install"
164
+ end
165
+ end
166
+
167
+ installdeps do
168
+ remote :root do
169
+ gem_install "test-spec", "rspec", "camping", "fcgi", "memcache-client"
170
+ gem_install "mongrel"
171
+ gem_install "ruby-openid", :v, "2.0.4" # thin requires 2.0.x
172
+ gem_install "rack", "thin", "sinatra"
173
+ end
174
+ end
175
+
176
+ install_jruby do
177
+ remote do
178
+ wget 'http://jruby.kenai.com/downloads/1.4.0RC2/jruby-bin-1.4.0RC2.tar.gz'
179
+ tar :x, :z, :f, 'jruby-bin-1.4.0RC2.tar.gz'
180
+ mv 'jruby-1.4.0RC2', '/usr/jruby'
181
+ end
182
+ end
183
+
184
+ install_ruby19 do
185
+ before :install_zlib
186
+ remote do
187
+ apt_get "install", "libssl-dev", "libreadline5-dev", "zlib1g-dev"
188
+ #wget 'ftp://ftp.ruby-lang.org/pub/ruby/1.9/ruby-1.9.1-p243.tar.bz2'
189
+ rm :r, :f, 'ruby-1.9.1-p243'
190
+ tar :x, :j, :v, :f, 'ruby-1.9.1-p243.tar.bz2'
191
+ cd 'ruby-1.9.1-p243'
192
+ configure '--prefix=/usr/local'
193
+ make
194
+ make 'install'
195
+ end
196
+ end
197
+
198
+ sysupdate {
199
+ remote {
200
+ apt_get "update"
201
+ apt_get "install", "libxml2-dev", "libxslt-dev"
202
+ apt_get "install", "build-essential", "git-core"
203
+ apt_get "install", "ruby1.8-dev", "rdoc", "libzlib-ruby", "rubygems"
204
+ apt_get "install", "libfcgi-dev", "libfcgi-ruby1.8"
205
+ apt_get "install", "joe", "siege", "httperf"
206
+ gem_sources :a, "http://gems.github.com"
207
+ mkdir :p, "/var/lib/gems/1.8/bin" # Doesn't get created, but causes Rubygems to fail
208
+ gem_install "builder", "session"
209
+ gem_install 'hoe-seattlerb'
210
+ gem_install 'rubygems-update', "-v=1.3.4"
211
+ update_rubygems
212
+ gem_install 'hoe'
213
+ }
214
+ }
215
+
216
+
217
+ end
218
+
219
+
220
+ defaults do
221
+ zone :'us-east-1a'
222
+ environment :stage
223
+ role :app
224
+ color true
225
+ user :root
226
+ end
227
+
data/lib/saxony.rb ADDED
@@ -0,0 +1,122 @@
1
+ require 'nokogiri'
2
+ require 'stringio'
3
+
4
+ class Array
5
+ def chunk(number_of_chunks)
6
+ chunks = (1..number_of_chunks).collect { [] }
7
+ while self.any?
8
+ chunks.each do |a_chunk|
9
+ a_chunk << self.shift if self.any?
10
+ end
11
+ end
12
+ chunks
13
+ end
14
+ end
15
+
16
+ class Saxony
17
+ VERSION = "0.1.0".freeze unless defined?(Saxony::VERSION)
18
+
19
+ class Document < Nokogiri::XML::SAX::Document
20
+ attr_reader :total_count, :granularity
21
+ def initialize(element, granularity, &processor)
22
+ @root_element = nil
23
+ @start_time = Time.now
24
+ @element, @processor = element, processor
25
+ @granularity, @total_count = granularity, 0
26
+ reset
27
+ end
28
+
29
+ def elapsed_time
30
+ Time.now - @start_time
31
+ end
32
+ def xml
33
+ @xml ||= "<#{@root_element}>#{@buffer.string}</#{@root_element}>"
34
+ end
35
+ def doc
36
+ @doc ||= Nokogiri::XML(xml)
37
+ end
38
+
39
+ def start_element(element, attributes)
40
+ if element == @element.to_s
41
+ @count += 1 and @total_count += 1
42
+ @collect = true
43
+ @root_element = 'SAXONYDOC' if @root_element.nil?
44
+ else
45
+ @root_element = element if @root_element.nil?
46
+ end
47
+ @buffer << to_otag(element, attributes) if @collect
48
+ end
49
+ def characters(text)
50
+ @buffer << text if @collect
51
+ end
52
+ def cdata_block(text)
53
+ @buffer << to_cdata(text) if @collect
54
+ end
55
+ def end_element(element)
56
+ @buffer << to_ctag(element) if @collect
57
+ if element == @element.to_s
58
+ @collect = false
59
+ @buffer << $/
60
+ process_objects if @granularity > 0 && @count >= @granularity
61
+ end
62
+ end
63
+ def end_document
64
+ process_objects unless @buffer.pos <= 0
65
+ end
66
+
67
+ private
68
+ def process_objects
69
+ self.instance_eval &@processor
70
+ reset
71
+ end
72
+ def reset
73
+ @xml = nil
74
+ @buffer, @count, @doc, @start_time = StringIO.new, 0, nil, Time.now
75
+ end
76
+ def to_otag(name, attributes=[])
77
+ t = name
78
+ unless attributes.empty?
79
+ chunks = attributes.chunk(attributes.size/2)
80
+ t << chunks.collect { |a| %Q( #{a[0]}="#{a[1]}") }.join(' ')
81
+ end
82
+ "<#{t}>"
83
+ end
84
+ def to_ctag(name)
85
+ "</#{name}>"
86
+ end
87
+ def to_cdata(text)
88
+ "<![CDATA[#{text}]]>"
89
+ end
90
+ end
91
+
92
+ attr_reader :granularity, :element
93
+ def initialize(element, granularity=1000)
94
+ @element, @granularity = element, granularity
95
+ end
96
+
97
+ # * sources can be a list of file paths, IO objects, or XML strings
98
+ def parse *sources, &blk
99
+ sources.flatten!
100
+ sources.each do |src|
101
+ saxdoc = Saxony::Document.new @element, @granularity, &blk
102
+ parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
103
+ xml = (String === src && File.exists?(src)) ? File.open(src) : src
104
+ parser.parse xml
105
+ end
106
+ end
107
+ end
108
+
109
+ #STDERR.print '.' if @samples % 5000 == 0
110
+
111
+ if $0 == __FILE__
112
+ sax = Saxony.new :Listing, 1000
113
+ sax.parse ARGV do
114
+ # puts xml
115
+ #doc.xpath("//Listing").each do |obj|
116
+ #end
117
+ p [total_count, doc.xpath("//Listing").size, elapsed_time.to_f]
118
+ # p
119
+
120
+ end
121
+ end
122
+
data/saxony.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ @spec = Gem::Specification.new do |s|
2
+ s.name = "saxony"
3
+ s.rubyforge_project = 'bone'
4
+ s.version = "0.1.0"
5
+ s.summary = "Parse gigantic XML files with pleasure and ease."
6
+ s.description = s.summary
7
+ s.author = "Delano Mandelbaum"
8
+ s.email = "delano@solutious.com"
9
+ s.homepage = ""
10
+
11
+ s.extra_rdoc_files = %w[README.md LICENSE.txt CHANGES.txt]
12
+ s.has_rdoc = true
13
+ s.rdoc_options = ["--line-numbers", "--title", s.summary, "--main", "README.md"]
14
+ s.require_paths = %w[lib]
15
+
16
+ #s.executables = %w[bone]
17
+
18
+ s.add_dependency 'nokogiri'
19
+
20
+ # = MANIFEST =
21
+ # git ls-files
22
+ s.files = %w(
23
+ CHANGES.txt
24
+ LICENSE.txt
25
+ README.md
26
+ Rakefile
27
+ Rudyfile
28
+ lib/saxony.rb
29
+ saxony.gemspec
30
+ )
31
+
32
+
33
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: saxony
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Delano Mandelbaum
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-31 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Parse gigantic XML files with pleasure and ease.
26
+ email: delano@solutious.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.md
33
+ - LICENSE.txt
34
+ - CHANGES.txt
35
+ files:
36
+ - CHANGES.txt
37
+ - LICENSE.txt
38
+ - README.md
39
+ - Rakefile
40
+ - Rudyfile
41
+ - lib/saxony.rb
42
+ - saxony.gemspec
43
+ has_rdoc: true
44
+ homepage: ""
45
+ licenses: []
46
+
47
+ post_install_message:
48
+ rdoc_options:
49
+ - --line-numbers
50
+ - --title
51
+ - Parse gigantic XML files with pleasure and ease.
52
+ - --main
53
+ - README.md
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project: bone
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Parse gigantic XML files with pleasure and ease.
75
+ test_files: []
76
+