saxony 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (8) hide show
  1. data/CHANGES.txt +5 -0
  2. data/LICENSE.txt +19 -0
  3. data/README.md +24 -0
  4. data/Rakefile +55 -0
  5. data/Rudyfile +227 -0
  6. data/lib/saxony.rb +122 -0
  7. data/saxony.gemspec +33 -0
  8. metadata +76 -0
data/CHANGES.txt ADDED
@@ -0,0 +1,5 @@
1
+ SAXONY, CHANGES
2
+
3
+ #### 0.1.0 (2010-01-31) ###########################
4
+
5
+ * Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2010 Solutious Inc, Delano Mandelbaum
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ ## Saxony - 0.1 ##
2
+
3
+ **Parse gigantic XML files with pleasure and ease.**
4
+
5
+ ## Example ##
6
+
7
+ sax = Saxony.new :SomeObject, 1000
8
+ sax.parse 'path/2/huge.xml' do
9
+ total_count # => Total number of SomeObjects processed
10
+ doc # => Nokogiri object for 1000 SomeObject
11
+ elapsed_time # => time processing current batch
12
+ end
13
+
14
+ ## Credits
15
+
16
+ * Delano Mandelbaum (http://solutious.com)
17
+
18
+
19
+ ## Thanks
20
+
21
+
22
+ ## License
23
+
24
+ See LICENSE.txt
data/Rakefile ADDED
@@ -0,0 +1,55 @@
1
+
2
+ require 'rake/clean'
3
+ require 'rake/gempackagetask'
4
+ require 'hanna/rdoctask'
5
+ require 'rake/testtask'
6
+ require 'shoulda/tasks'
7
+ require 'rake/runtest'
8
+ require 'fileutils'
9
+ include FileUtils
10
+
11
+ task :default => :test
12
+
13
+
14
+ # PACKAGE =============================================================
15
+
16
+ name = "saxony"
17
+ load "#{name}.gemspec"
18
+
19
+ version = @spec.version
20
+
21
+ Rake::GemPackageTask.new(@spec) do |p|
22
+ p.need_tar = true if RUBY_PLATFORM !~ /mswin/
23
+ end
24
+
25
+ task :test do
26
+ puts "Success!"
27
+ end
28
+
29
+ task :install => [ :rdoc, :package ] do
30
+ sh %{sudo gem install pkg/#{name}-#{version}.gem}
31
+ end
32
+
33
+ task :uninstall => [ :clean ] do
34
+ sh %{sudo gem uninstall #{name}}
35
+ end
36
+
37
+
38
+
39
+ Rake::RDocTask.new do |t|
40
+ t.rdoc_dir = 'doc'
41
+ t.title = @spec.summary
42
+ t.options << '--line-numbers' << '-A cattr_accessor=object'
43
+ t.options << '--charset' << 'utf-8'
44
+ t.rdoc_files.include('LICENSE.txt')
45
+ t.rdoc_files.include('README.md')
46
+ t.rdoc_files.include('CHANGES.txt')
47
+ #t.rdoc_files.include('Rudyfile') # why is the formatting f'd?
48
+ #t.rdoc_files.include('bin/*')
49
+ t.rdoc_files.include('lib/**/*.rb')
50
+ end
51
+
52
+ CLEAN.include [ 'pkg', '*.gem', '.config', 'doc', 'coverage*' ]
53
+
54
+
55
+
data/Rudyfile ADDED
@@ -0,0 +1,227 @@
1
+ require 'stella'
2
+
3
+ machines do
4
+
5
+ region :'us-east-1' do
6
+ ami 'ami-212ccf48' # Stella Debian 5.0, 32-bit (US)
7
+ end
8
+ region :'eu-west-1' do
9
+ ami 'ami-6ecde51a' # Alestic Debian 5.0, 32-bit (EU)
10
+ end
11
+
12
+ env :stage do
13
+
14
+ role :app do
15
+ positions 2
16
+ user :root
17
+ size 'm1.small'
18
+ end
19
+
20
+ role :gen do
21
+ user :root
22
+ size 'm1.large'
23
+ ami 'ami-7133d018'
24
+ end
25
+
26
+ role :demo do
27
+ user :root
28
+ size 'm1.small'
29
+ end
30
+
31
+ end
32
+
33
+
34
+ end
35
+
36
+
37
+
38
+ commands do
39
+ allow :apt_get, "apt-get", :y, :q
40
+ allow :gem_install, "/usr/bin/gem", "install", :n, '/usr/bin', :y, :V, "--no-rdoc", "--no-ri"
41
+ allow :gem_sources, "/usr/bin/gem", "sources"
42
+ allow :gem_uninstall, "/usr/bin/gem", "uninstall", :V
43
+ allow :update_rubygems
44
+ allow :rake
45
+ allow :thin
46
+ allow :stella
47
+ allow :rm
48
+ allow :ulimit
49
+ allow :ruby19, "/usr/local/bin/ruby"
50
+ allow :gem19_install, "/usr/local/bin/gem", "install"
51
+ allow :rackup_path do
52
+ v = [Stella::VERSION::MAJOR, Stella::VERSION::MINOR, Stella::VERSION::TINY].join('.')
53
+ "/usr/lib/ruby/gems/1.8/gems/stella-#{v}/support/sample_webapp/config.ru"
54
+ end
55
+ end
56
+
57
+ routines do
58
+
59
+ role :app do
60
+
61
+ # rudy -r app -v start
62
+ start do
63
+ remote do
64
+ #ulimit :n, '30000'
65
+ #ulimit :n
66
+ rm :f, 'thin.log'
67
+ mkdir :p, 'stats'
68
+ thin :d, :l, 'thin.log', :p, 3114, :R, rackup_path, '--stats', './stats', '--max-conns', 8192, 'start'
69
+ end
70
+ end
71
+
72
+ # rudy -r app -v stop
73
+ stop do
74
+ remote do
75
+ thin :R, rackup_path, 'stop'
76
+ sleep 1
77
+ ps 'ux'
78
+ end
79
+ end
80
+
81
+ end
82
+
83
+
84
+ # rudy -v -r gen verify ip-10-251-27-245.ec2.internal:3114
85
+ verify do
86
+ remote do |arg|
87
+ file_upload 'examples/essentials/plan.rb'
88
+ file_upload 'examples/essentials/search_terms.txt'
89
+ file_upload 'examples/essentials/logo.png'
90
+ stella :v, 'verify', :p, 'plan.rb', "#{arg.first}"
91
+ end
92
+ end
93
+
94
+ # rudy -v -r gen generate ip-10-251-27-245.ec2.internal:3114
95
+ generate do
96
+ remote do |arg|
97
+ file_upload 'examples/essentials/plan.rb'
98
+ file_upload 'examples/essentials/search_terms.txt'
99
+ file_upload 'examples/essentials/logo.png'
100
+ stella :v, 'generate', :p, 'plan.rb', :c, 1, :d, '1m', :W, "#{arg.first}"
101
+ end
102
+ end
103
+
104
+
105
+ setup do
106
+ after :sysupdate, :installdeps, :install_ruby19
107
+ end
108
+
109
+ shutdown do
110
+ end
111
+
112
+ reboot do
113
+ end
114
+
115
+ install_netperf do
116
+ #ftp://ftp.netperf.org/netperf/netperf-2.4.5.tar.bz2
117
+ end
118
+
119
+ install_rubyforge do
120
+ remote :root do
121
+ gem19_install 'stella', :V
122
+ gem_install 'stella', :V
123
+ end
124
+ end
125
+
126
+ install_github do
127
+ remote :root do
128
+ gem_sources :a, "http://gems.github.com"
129
+ gem_install 'solutious-stella'
130
+ end
131
+ end
132
+
133
+ package_gem do
134
+ local do
135
+ rm :r, :f, 'pkg'
136
+ rake 'package'
137
+ end
138
+ end
139
+
140
+ remove_rudy do
141
+ remote :root do
142
+ gem_uninstall 'stella'
143
+ rm :r, :f, '.stella'
144
+ end
145
+ end
146
+
147
+ install_gem do
148
+ before :package_gem
149
+ remote :root do
150
+ file_upload "pkg/stella-#{Stella::VERSION}.gem", "/tmp/"
151
+ gem_install "/tmp/stella-#{Stella::VERSION}.gem"
152
+ end
153
+
154
+ end
155
+
156
+ install_zlib do
157
+ remote do
158
+ wget "http://www.zlib.net/zlib-1.2.3.tar.gz"
159
+ tar :x, :z, :f, "zlib-1.2.3.tar.gz"
160
+ cd "zlib-1.2.3"
161
+ configure '--prefix=/usr/local'
162
+ make
163
+ make "install"
164
+ end
165
+ end
166
+
167
+ installdeps do
168
+ remote :root do
169
+ gem_install "test-spec", "rspec", "camping", "fcgi", "memcache-client"
170
+ gem_install "mongrel"
171
+ gem_install "ruby-openid", :v, "2.0.4" # thin requires 2.0.x
172
+ gem_install "rack", "thin", "sinatra"
173
+ end
174
+ end
175
+
176
+ install_jruby do
177
+ remote do
178
+ wget 'http://jruby.kenai.com/downloads/1.4.0RC2/jruby-bin-1.4.0RC2.tar.gz'
179
+ tar :x, :z, :f, 'jruby-bin-1.4.0RC2.tar.gz'
180
+ mv 'jruby-1.4.0RC2', '/usr/jruby'
181
+ end
182
+ end
183
+
184
+ install_ruby19 do
185
+ before :install_zlib
186
+ remote do
187
+ apt_get "install", "libssl-dev", "libreadline5-dev", "zlib1g-dev"
188
+ #wget 'ftp://ftp.ruby-lang.org/pub/ruby/1.9/ruby-1.9.1-p243.tar.bz2'
189
+ rm :r, :f, 'ruby-1.9.1-p243'
190
+ tar :x, :j, :v, :f, 'ruby-1.9.1-p243.tar.bz2'
191
+ cd 'ruby-1.9.1-p243'
192
+ configure '--prefix=/usr/local'
193
+ make
194
+ make 'install'
195
+ end
196
+ end
197
+
198
+ sysupdate {
199
+ remote {
200
+ apt_get "update"
201
+ apt_get "install", "libxml2-dev", "libxslt-dev"
202
+ apt_get "install", "build-essential", "git-core"
203
+ apt_get "install", "ruby1.8-dev", "rdoc", "libzlib-ruby", "rubygems"
204
+ apt_get "install", "libfcgi-dev", "libfcgi-ruby1.8"
205
+ apt_get "install", "joe", "siege", "httperf"
206
+ gem_sources :a, "http://gems.github.com"
207
+ mkdir :p, "/var/lib/gems/1.8/bin" # Doesn't get created, but causes Rubygems to fail
208
+ gem_install "builder", "session"
209
+ gem_install 'hoe-seattlerb'
210
+ gem_install 'rubygems-update', "-v=1.3.4"
211
+ update_rubygems
212
+ gem_install 'hoe'
213
+ }
214
+ }
215
+
216
+
217
+ end
218
+
219
+
220
+ defaults do
221
+ zone :'us-east-1a'
222
+ environment :stage
223
+ role :app
224
+ color true
225
+ user :root
226
+ end
227
+
data/lib/saxony.rb ADDED
@@ -0,0 +1,122 @@
1
+ require 'nokogiri'
2
+ require 'stringio'
3
+
4
+ class Array
5
+ def chunk(number_of_chunks)
6
+ chunks = (1..number_of_chunks).collect { [] }
7
+ while self.any?
8
+ chunks.each do |a_chunk|
9
+ a_chunk << self.shift if self.any?
10
+ end
11
+ end
12
+ chunks
13
+ end
14
+ end
15
+
16
+ class Saxony
17
+ VERSION = "0.1.0".freeze unless defined?(Saxony::VERSION)
18
+
19
+ class Document < Nokogiri::XML::SAX::Document
20
+ attr_reader :total_count, :granularity
21
+ def initialize(element, granularity, &processor)
22
+ @root_element = nil
23
+ @start_time = Time.now
24
+ @element, @processor = element, processor
25
+ @granularity, @total_count = granularity, 0
26
+ reset
27
+ end
28
+
29
+ def elapsed_time
30
+ Time.now - @start_time
31
+ end
32
+ def xml
33
+ @xml ||= "<#{@root_element}>#{@buffer.string}</#{@root_element}>"
34
+ end
35
+ def doc
36
+ @doc ||= Nokogiri::XML(xml)
37
+ end
38
+
39
+ def start_element(element, attributes)
40
+ if element == @element.to_s
41
+ @count += 1 and @total_count += 1
42
+ @collect = true
43
+ @root_element = 'SAXONYDOC' if @root_element.nil?
44
+ else
45
+ @root_element = element if @root_element.nil?
46
+ end
47
+ @buffer << to_otag(element, attributes) if @collect
48
+ end
49
+ def characters(text)
50
+ @buffer << text if @collect
51
+ end
52
+ def cdata_block(text)
53
+ @buffer << to_cdata(text) if @collect
54
+ end
55
+ def end_element(element)
56
+ @buffer << to_ctag(element) if @collect
57
+ if element == @element.to_s
58
+ @collect = false
59
+ @buffer << $/
60
+ process_objects if @granularity > 0 && @count >= @granularity
61
+ end
62
+ end
63
+ def end_document
64
+ process_objects unless @buffer.pos <= 0
65
+ end
66
+
67
+ private
68
+ def process_objects
69
+ self.instance_eval &@processor
70
+ reset
71
+ end
72
+ def reset
73
+ @xml = nil
74
+ @buffer, @count, @doc, @start_time = StringIO.new, 0, nil, Time.now
75
+ end
76
+ def to_otag(name, attributes=[])
77
+ t = name
78
+ unless attributes.empty?
79
+ chunks = attributes.chunk(attributes.size/2)
80
+ t << chunks.collect { |a| %Q( #{a[0]}="#{a[1]}") }.join(' ')
81
+ end
82
+ "<#{t}>"
83
+ end
84
+ def to_ctag(name)
85
+ "</#{name}>"
86
+ end
87
+ def to_cdata(text)
88
+ "<![CDATA[#{text}]]>"
89
+ end
90
+ end
91
+
92
+ attr_reader :granularity, :element
93
+ def initialize(element, granularity=1000)
94
+ @element, @granularity = element, granularity
95
+ end
96
+
97
+ # * sources can be a list of file paths, IO objects, or XML strings
98
+ def parse *sources, &blk
99
+ sources.flatten!
100
+ sources.each do |src|
101
+ saxdoc = Saxony::Document.new @element, @granularity, &blk
102
+ parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
103
+ xml = (String === src && File.exists?(src)) ? File.open(src) : src
104
+ parser.parse xml
105
+ end
106
+ end
107
+ end
108
+
109
+ #STDERR.print '.' if @samples % 5000 == 0
110
+
111
+ if $0 == __FILE__
112
+ sax = Saxony.new :Listing, 1000
113
+ sax.parse ARGV do
114
+ # puts xml
115
+ #doc.xpath("//Listing").each do |obj|
116
+ #end
117
+ p [total_count, doc.xpath("//Listing").size, elapsed_time.to_f]
118
+ # p
119
+
120
+ end
121
+ end
122
+
data/saxony.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ @spec = Gem::Specification.new do |s|
2
+ s.name = "saxony"
3
+ s.rubyforge_project = 'bone'
4
+ s.version = "0.1.0"
5
+ s.summary = "Parse gigantic XML files with pleasure and ease."
6
+ s.description = s.summary
7
+ s.author = "Delano Mandelbaum"
8
+ s.email = "delano@solutious.com"
9
+ s.homepage = ""
10
+
11
+ s.extra_rdoc_files = %w[README.md LICENSE.txt CHANGES.txt]
12
+ s.has_rdoc = true
13
+ s.rdoc_options = ["--line-numbers", "--title", s.summary, "--main", "README.md"]
14
+ s.require_paths = %w[lib]
15
+
16
+ #s.executables = %w[bone]
17
+
18
+ s.add_dependency 'nokogiri'
19
+
20
+ # = MANIFEST =
21
+ # git ls-files
22
+ s.files = %w(
23
+ CHANGES.txt
24
+ LICENSE.txt
25
+ README.md
26
+ Rakefile
27
+ Rudyfile
28
+ lib/saxony.rb
29
+ saxony.gemspec
30
+ )
31
+
32
+
33
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: saxony
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Delano Mandelbaum
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-31 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Parse gigantic XML files with pleasure and ease.
26
+ email: delano@solutious.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.md
33
+ - LICENSE.txt
34
+ - CHANGES.txt
35
+ files:
36
+ - CHANGES.txt
37
+ - LICENSE.txt
38
+ - README.md
39
+ - Rakefile
40
+ - Rudyfile
41
+ - lib/saxony.rb
42
+ - saxony.gemspec
43
+ has_rdoc: true
44
+ homepage: ""
45
+ licenses: []
46
+
47
+ post_install_message:
48
+ rdoc_options:
49
+ - --line-numbers
50
+ - --title
51
+ - Parse gigantic XML files with pleasure and ease.
52
+ - --main
53
+ - README.md
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project: bone
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Parse gigantic XML files with pleasure and ease.
75
+ test_files: []
76
+