saxony 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES.txt +6 -0
- data/README.md +5 -2
- data/lib/saxony.rb +25 -32
- data/saxony.gemspec +2 -2
- metadata +4 -4
data/CHANGES.txt
CHANGED
data/README.md
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
## Saxony - 0.1 ##
|
|
2
2
|
|
|
3
|
-
**Parse gigantic XML files with pleasure and
|
|
3
|
+
**Parse gigantic XML files with pleasure and a without running out of memory.**
|
|
4
4
|
|
|
5
5
|
## Example ##
|
|
6
6
|
|
|
7
7
|
sax = Saxony.new :SomeObject, 1000
|
|
8
8
|
sax.parse 'path/2/huge.xml' do
|
|
9
9
|
total_count # => Total number of SomeObjects processed
|
|
10
|
-
doc # => Nokogiri object for 1000
|
|
10
|
+
doc # => Nokogiri object for 1000 SomeObjects
|
|
11
11
|
elapsed_time # => time processing current batch
|
|
12
|
+
path # => Current file being processed
|
|
13
|
+
xml # => The XML containing 1000 SomeObjects
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
## Credits
|
|
@@ -18,6 +20,7 @@
|
|
|
18
20
|
|
|
19
21
|
## Thanks
|
|
20
22
|
|
|
23
|
+
* [Nokogiri](http://nokogiri.org/)
|
|
21
24
|
|
|
22
25
|
## License
|
|
23
26
|
|
data/lib/saxony.rb
CHANGED
|
@@ -3,9 +3,10 @@ require 'stringio'
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class Saxony
|
|
6
|
-
VERSION = "0.1.
|
|
6
|
+
VERSION = "0.1.2".freeze unless defined?(Saxony::VERSION)
|
|
7
7
|
|
|
8
8
|
class Document < Nokogiri::XML::SAX::Document
|
|
9
|
+
attr_accessor :path
|
|
9
10
|
attr_reader :total_count, :granularity
|
|
10
11
|
def initialize(element, granularity, &processor)
|
|
11
12
|
@root_element = nil
|
|
@@ -59,7 +60,7 @@ class Saxony
|
|
|
59
60
|
reset
|
|
60
61
|
end
|
|
61
62
|
def reset
|
|
62
|
-
@xml = nil
|
|
63
|
+
@xml, @path = nil, nil
|
|
63
64
|
@buffer, @count, @doc, @start_time = StringIO.new, 0, nil, Time.now
|
|
64
65
|
end
|
|
65
66
|
def to_otag(name, attributes=[])
|
|
@@ -91,17 +92,37 @@ class Saxony
|
|
|
91
92
|
sources.each do |src|
|
|
92
93
|
saxdoc = Saxony::Document.new @element, @granularity, &blk
|
|
93
94
|
parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
|
|
94
|
-
|
|
95
|
+
if (String === src && File.exists?(src))
|
|
96
|
+
xml = File.open(src)
|
|
97
|
+
saxdoc.path = src
|
|
98
|
+
else
|
|
99
|
+
xml = src
|
|
100
|
+
saxdoc.path = src.class.to_s
|
|
101
|
+
end
|
|
95
102
|
parser.parse xml
|
|
96
103
|
end
|
|
97
104
|
end
|
|
98
105
|
end
|
|
99
106
|
|
|
107
|
+
class Array
|
|
108
|
+
def saxony_chunk(number_of_chunks)
|
|
109
|
+
chunks = (1..number_of_chunks).collect { [] }
|
|
110
|
+
while self.any?
|
|
111
|
+
chunks.each do |a_chunk|
|
|
112
|
+
a_chunk << self.shift if self.any?
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
chunks
|
|
116
|
+
end
|
|
117
|
+
alias_method :chunk, :saxony_chunk unless method_defined? :chunk
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
|
|
100
121
|
#STDERR.print '.' if @samples % 5000 == 0
|
|
101
122
|
|
|
102
123
|
if $0 == __FILE__
|
|
103
124
|
sax = Saxony.new :Listing, 1000
|
|
104
|
-
sax.parse
|
|
125
|
+
sax.parse DATA do
|
|
105
126
|
#doc.xpath("//Listing").each do |obj|
|
|
106
127
|
#end
|
|
107
128
|
p [total_count, doc.xpath("//Listing").size, elapsed_time.to_f]
|
|
@@ -110,32 +131,4 @@ if $0 == __FILE__
|
|
|
110
131
|
end
|
|
111
132
|
end
|
|
112
133
|
|
|
113
|
-
__END__
|
|
114
|
-
|
|
115
|
-
<BusinessListings>
|
|
116
|
-
<Listing><ListingId>17</ListingId><DBID>16</DBID><BusName>'A' Company Military Surplus</BusName><BusNameFr>'A' Company Military Surplus</BusNameFr><Address>2240 Alberni Hwy</Address><City>Parksville</City><PstCode>V0R1M0</PstCode><Phone><Primary><Prefix>+1</Prefix><NPA>250</NPA><NXX>951</NXX><XNUM>0609</XNUM><DisplayNumber>250-951-0609</DisplayNumber></Primary><Other Type="Click2Call"><Prefix>+1</Prefix><NPA>250</NPA><NXX>951</NXX><XNUM>0609</XNUM><DisplayNumber>250-951-0609</DisplayNumber></Other></Phone>
|
|
117
|
-
<ListingKeys>D00007295080000465894</ListingKeys><ReportId>16</ReportId><Paid>Y</Paid><ListEntry><DirProv>BC</DirProv><DirCode>022000</DirCode><HdCode>00866400</HdCode><Channel>2</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13980461ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
|
|
118
|
-
<Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
|
|
119
|
-
</Text></HS></Products>
|
|
120
|
-
</ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086494</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13912789ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
|
|
121
|
-
<Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
|
|
122
|
-
</Text></HS></Products>
|
|
123
|
-
</ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086604</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13908447ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
|
|
124
|
-
<Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
|
|
125
|
-
</Text></HS></Products>
|
|
126
|
-
</ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086652</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13890219ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
|
|
127
|
-
<Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
|
|
128
|
-
</Text></HS></Products>
|
|
129
|
-
</ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086926</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13980461ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
|
|
130
|
-
<Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
|
|
131
|
-
</Text></HS></Products>
|
|
132
|
-
</ListEntry></Listing>
|
|
133
|
-
<Listing><ListingId>19</ListingId><DBID>18</DBID><BusName>'Colleen All Dogs' Doggie Daycare</BusName><BusNameFr>'Colleen All Dogs' Doggie Daycare</BusNameFr><Address>6058 144 Street</Address><City>Surrey</City><Prov>BC</Prov><PstCode>V3X1A3</PstCode><Lat>49.113197</Lat><Long>-122.823369</Long><Phone><Primary><Prefix>+1</Prefix><NPA>604</NPA><NXX>319</NXX><XNUM>3895</XNUM><DisplayNumber>604-319-3895</DisplayNumber></Primary><Other Type="Click2Call"><Prefix>+1</Prefix><NPA>604</NPA><NXX>319</NXX><XNUM>3895</XNUM><DisplayNumber>604-319-3895</DisplayNumber></Other></Phone>
|
|
134
|
-
<ListingKeys>D00007440120000535278</ListingKeys><ReportId>18</ReportId><Paid>Y</Paid><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00980600</HdCode><Channel>1</Channel><Rank>100</Rank><NormRank>6</NormRank><Placement Child="false">Other</Placement><Products><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL></Products>
|
|
135
|
-
</ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00980355</HdCode><Channel>1</Channel><Rank>194</Rank><NormRank>12</NormRank><Placement Child="false">DPlus</Placement><Products><D_PP PrdCode="EN" D_PP="ProfileId" Type="18042" PPLUS="DirPath" Udac="18042" PPE="Rank" Lang="50"><Keywords><OpenHrs>Monday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Tuesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Wednesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Thursday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Friday 7:00 am - 6:30 pm</OpenHrs><LangSpk>English</LangSpk><GetThr>King George Highway</GetThr><ProdServ>Administer Medications</ProdServ><ProdServ>Animal Care Experience</ProdServ><ProdServ>Dog Daycare</ProdServ><ProdServ>Dog Mind & Body Stimulation</ProdServ><ProdServ>Dog Playhouse</ProdServ><ProdServ>Pet Portraits</ProdServ><ProdServ>Pet Shop</ProdServ></Keywords></D_PP>
|
|
136
|
-
<D_PP PrdCode="FR" D_PP="ProfileId" Type="18042" PPLUS="DirPath" Udac="18042" ="Rank" Lang="0"><Keywords><OpenHrs>Monday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Tuesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Wednesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Thursday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Friday 7:00 am - 6:30 pm</OpenHrs><LangSpk>English</LangSpk><GetThr>King George Highway</GetThr><ProdServ>Administer Medications</ProdServ><ProdServ>Animal Care Experience</ProdServ><ProdServ>Dog Daycare</ProdServ><ProdServ>Dog Mind & Body Stimulation</ProdServ><ProdServ>Dog Playhouse</ProdServ><ProdServ>Pet Portraits</ProdServ><ProdServ>Pet Shop</ProdServ></Keywords></D_PP>
|
|
137
|
-
<URL Type="Lang" URL="EN" PrdCode="LinkText" URL="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" URL0="0"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" URL="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" URL0="0"></URL><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><Thumb Lang="THUMB" EN="Udac" Val="QCW" 14571890aa="Rank" Type="44" THUMB="DirPlus" PrdCode="true"></Thumb><Thumb Lang="THUMB" FR="Udac" Val="QCW" 14571890aa="Rank" Type="44" THUMB="DirPlus" PrdCode="true"></Thumb><DspAd Rank="DISPADT" 44="Lang" DirPlus="EN" true="Udac" AdNo="QCW" 14571890aa="Type" PrdCode="DspAd"><Keywords><Classification><Heading HdCode="HdName" 00980355=""></Heading></Classification><Raw>COLLEEN ALL DOGS Doggie Daycare 1/2 Acr 1/2 Acre of Secured Ine of Secured Indoodoor/Outr/Outdoodoor Spacr Spacee Puppy Social Puppy Socialization, 100%ization, 100% Su Superpervisvisionion An Any Agey Age/Size,/Size, By By Appoint Appointmenment Onlyt Only Pet Firs Pet First Aid, 17 t Aid, 17 YrsYrs Ani Animal Knowledgemal Knowledge 604-604-319-38319-389595 6058 144th St Surrey, BC www.colleewww.colleewww.colleenallnallnalldogs.dogs.dogs.comcomcom</Raw></Keywords></DspAd></Products>
|
|
138
|
-
</ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00740000</HdCode><Channel>1</Channel><Rank>100</Rank><NormRank>6</NormRank><Placement Child="false">Other</Placement><Products><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL></Products>
|
|
139
|
-
</ListEntry></Listing>
|
|
140
|
-
</BusinessListings>
|
|
141
134
|
|
data/saxony.gemspec
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
@spec = Gem::Specification.new do |s|
|
|
2
2
|
s.name = "saxony"
|
|
3
3
|
s.rubyforge_project = 'bone'
|
|
4
|
-
s.version = "0.1.
|
|
5
|
-
s.summary = "Parse gigantic XML files with pleasure and
|
|
4
|
+
s.version = "0.1.2"
|
|
5
|
+
s.summary = "Parse gigantic XML files with pleasure and a without running out of memory."
|
|
6
6
|
s.description = s.summary
|
|
7
7
|
s.author = "Delano Mandelbaum"
|
|
8
8
|
s.email = "delano@solutious.com"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: saxony
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Delano Mandelbaum
|
|
@@ -22,7 +22,7 @@ dependencies:
|
|
|
22
22
|
- !ruby/object:Gem::Version
|
|
23
23
|
version: "0"
|
|
24
24
|
version:
|
|
25
|
-
description: Parse gigantic XML files with pleasure and
|
|
25
|
+
description: Parse gigantic XML files with pleasure and a without running out of memory.
|
|
26
26
|
email: delano@solutious.com
|
|
27
27
|
executables: []
|
|
28
28
|
|
|
@@ -48,7 +48,7 @@ post_install_message:
|
|
|
48
48
|
rdoc_options:
|
|
49
49
|
- --line-numbers
|
|
50
50
|
- --title
|
|
51
|
-
- Parse gigantic XML files with pleasure and
|
|
51
|
+
- Parse gigantic XML files with pleasure and a without running out of memory.
|
|
52
52
|
- --main
|
|
53
53
|
- README.md
|
|
54
54
|
require_paths:
|
|
@@ -71,6 +71,6 @@ rubyforge_project: bone
|
|
|
71
71
|
rubygems_version: 1.3.5
|
|
72
72
|
signing_key:
|
|
73
73
|
specification_version: 3
|
|
74
|
-
summary: Parse gigantic XML files with pleasure and
|
|
74
|
+
summary: Parse gigantic XML files with pleasure and a without running out of memory.
|
|
75
75
|
test_files: []
|
|
76
76
|
|