saxony 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES.txt +8 -0
- data/README.md +4 -4
- data/lib/saxony.rb +28 -5
- data/saxony.gemspec +1 -1
- metadata +2 -2
data/CHANGES.txt
CHANGED
data/README.md
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
## Saxony - 0.
|
1
|
+
## Saxony - 0.2 ##
|
2
2
|
|
3
|
-
**Parse gigantic XML files with pleasure and
|
3
|
+
**Parse gigantic XML files with pleasure and without running out of memory.**
|
4
4
|
|
5
5
|
## Example ##
|
6
6
|
|
7
7
|
sax = Saxony.new :SomeObject, 1000
|
8
8
|
sax.parse 'path/2/huge.xml' do
|
9
|
-
|
9
|
+
xml # => The XML containing 1000 SomeObjects
|
10
10
|
doc # => Nokogiri object for 1000 SomeObjects
|
11
|
+
total_count # => Total number of SomeObjects processed
|
11
12
|
elapsed_time # => time processing current batch
|
12
13
|
path # => Current file being processed
|
13
|
-
xml # => The XML containing 1000 SomeObjects
|
14
14
|
end
|
15
15
|
|
16
16
|
## Credits
|
data/lib/saxony.rb
CHANGED
@@ -3,7 +3,7 @@ require 'stringio'
|
|
3
3
|
|
4
4
|
|
5
5
|
class Saxony
|
6
|
-
VERSION = "0.
|
6
|
+
VERSION = "0.2.0".freeze unless defined?(Saxony::VERSION)
|
7
7
|
|
8
8
|
class Document < Nokogiri::XML::SAX::Document
|
9
9
|
attr_accessor :path
|
@@ -89,19 +89,42 @@ class Saxony
|
|
89
89
|
# * sources can be a list of file paths, IO objects, or XML strings
|
90
90
|
def parse *sources, &blk
|
91
91
|
sources.flatten!
|
92
|
+
@saxdoc = Saxony::Document.new @element, @granularity, &blk
|
92
93
|
sources.each do |src|
|
93
|
-
|
94
|
-
parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
|
94
|
+
parser = Nokogiri::XML::SAX::Parser.new(@saxdoc)
|
95
95
|
if (String === src && File.exists?(src))
|
96
96
|
xml = File.open(src)
|
97
|
-
saxdoc.path = src
|
97
|
+
@saxdoc.path = src
|
98
98
|
else
|
99
99
|
xml = src
|
100
|
-
saxdoc.path = src.class
|
100
|
+
@saxdoc.path = src.class
|
101
101
|
end
|
102
102
|
parser.parse xml
|
103
103
|
end
|
104
104
|
end
|
105
|
+
|
106
|
+
def total_count
|
107
|
+
@saxdoc.total_count
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def Saxony.fork(procs,*paths,&logic)
|
112
|
+
puts
|
113
|
+
paths.flatten!
|
114
|
+
if procs > 1
|
115
|
+
path_chunks = paths.chunk(procs)
|
116
|
+
procs.times do |idx|
|
117
|
+
proc_paths = path_chunks[idx]
|
118
|
+
pid = Kernel.fork do
|
119
|
+
logic.call(proc_paths,idx)
|
120
|
+
end
|
121
|
+
puts "PID #{pid} (#{idx+1}/#{procs}): #{proc_paths.join(', ')}"
|
122
|
+
end
|
123
|
+
else
|
124
|
+
logic.call paths, 1
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
105
128
|
end
|
106
129
|
|
107
130
|
class Array
|
data/saxony.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
@spec = Gem::Specification.new do |s|
|
2
2
|
s.name = "saxony"
|
3
3
|
s.rubyforge_project = 'bone'
|
4
|
-
s.version = "0.
|
4
|
+
s.version = "0.2.0"
|
5
5
|
s.summary = "Parse gigantic XML files with pleasure and a without running out of memory."
|
6
6
|
s.description = s.summary
|
7
7
|
s.author = "Delano Mandelbaum"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxony
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Delano Mandelbaum
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-02 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|