saxony 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES.txt +8 -0
- data/README.md +4 -4
- data/lib/saxony.rb +28 -5
- data/saxony.gemspec +1 -1
- metadata +2 -2
data/CHANGES.txt
CHANGED
data/README.md
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
## Saxony - 0.
|
1
|
+
## Saxony - 0.2 ##
|
2
2
|
|
3
|
-
**Parse gigantic XML files with pleasure and
|
3
|
+
**Parse gigantic XML files with pleasure and without running out of memory.**
|
4
4
|
|
5
5
|
## Example ##
|
6
6
|
|
7
7
|
sax = Saxony.new :SomeObject, 1000
|
8
8
|
sax.parse 'path/2/huge.xml' do
|
9
|
-
|
9
|
+
xml # => The XML containing 1000 SomeObjects
|
10
10
|
doc # => Nokogiri object for 1000 SomeObjects
|
11
|
+
total_count # => Total number of SomeObjects processed
|
11
12
|
elapsed_time # => time processing current batch
|
12
13
|
path # => Current file being processed
|
13
|
-
xml # => The XML containing 1000 SomeObjects
|
14
14
|
end
|
15
15
|
|
16
16
|
## Credits
|
data/lib/saxony.rb
CHANGED
@@ -3,7 +3,7 @@ require 'stringio'
|
|
3
3
|
|
4
4
|
|
5
5
|
class Saxony
|
6
|
-
VERSION = "0.
|
6
|
+
VERSION = "0.2.0".freeze unless defined?(Saxony::VERSION)
|
7
7
|
|
8
8
|
class Document < Nokogiri::XML::SAX::Document
|
9
9
|
attr_accessor :path
|
@@ -89,19 +89,42 @@ class Saxony
|
|
89
89
|
# * sources can be a list of file paths, IO objects, or XML strings
|
90
90
|
def parse *sources, &blk
|
91
91
|
sources.flatten!
|
92
|
+
@saxdoc = Saxony::Document.new @element, @granularity, &blk
|
92
93
|
sources.each do |src|
|
93
|
-
|
94
|
-
parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
|
94
|
+
parser = Nokogiri::XML::SAX::Parser.new(@saxdoc)
|
95
95
|
if (String === src && File.exists?(src))
|
96
96
|
xml = File.open(src)
|
97
|
-
saxdoc.path = src
|
97
|
+
@saxdoc.path = src
|
98
98
|
else
|
99
99
|
xml = src
|
100
|
-
saxdoc.path = src.class
|
100
|
+
@saxdoc.path = src.class
|
101
101
|
end
|
102
102
|
parser.parse xml
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
def total_count
|
107
|
+
@saxdoc.total_count
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def Saxony.fork(procs,*paths,&logic)
|
112
|
+
puts
|
113
|
+
paths.flatten!
|
114
|
+
if procs > 1
|
115
|
+
path_chunks = paths.chunk(procs)
|
116
|
+
procs.times do |idx|
|
117
|
+
proc_paths = path_chunks[idx]
|
118
|
+
pid = Kernel.fork do
|
119
|
+
logic.call(proc_paths,idx)
|
120
|
+
end
|
121
|
+
puts "PID #{pid} (#{idx+1}/#{procs}): #{proc_paths.join(', ')}"
|
122
|
+
end
|
123
|
+
else
|
124
|
+
logic.call paths, 1
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
105
128
|
end
|
106
129
|
|
107
130
|
class Array
|
data/saxony.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
@spec = Gem::Specification.new do |s|
|
2
2
|
s.name = "saxony"
|
3
3
|
s.rubyforge_project = 'bone'
|
4
|
-
s.version = "0.
|
4
|
+
s.version = "0.2.0"
|
5
5
|
s.summary = "Parse gigantic XML files with pleasure and a without running out of memory."
|
6
6
|
s.description = s.summary
|
7
7
|
s.author = "Delano Mandelbaum"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxony
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Delano Mandelbaum
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-02 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|