saxony 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/CHANGES.txt +6 -0
  2. data/README.md +5 -2
  3. data/lib/saxony.rb +25 -32
  4. data/saxony.gemspec +2 -2
  5. metadata +4 -4
data/CHANGES.txt CHANGED
@@ -1,5 +1,11 @@
1
1
  SAXONY, CHANGES
2
2
 
3
+ #### 0.1.2 (2010-02-01) ####
4
+
5
+ * ADDED: Keep track of file path
6
+ * ADDED: Array#chunk
7
+
8
+
3
9
  #### 0.1.1 (2010-01-31) ####
4
10
 
5
11
  * FIXED: Attributes parsing
data/README.md CHANGED
@@ -1,14 +1,16 @@
1
1
  ## Saxony - 0.1 ##
2
2
 
3
- **Parse gigantic XML files with pleasure and ease.**
3
+ **Parse gigantic XML files with pleasure and a without running out of memory.**
4
4
 
5
5
  ## Example ##
6
6
 
7
7
  sax = Saxony.new :SomeObject, 1000
8
8
  sax.parse 'path/2/huge.xml' do
9
9
  total_count # => Total number of SomeObjects processed
10
- doc # => Nokogiri object for 1000 SomeObject
10
+ doc # => Nokogiri object for 1000 SomeObjects
11
11
  elapsed_time # => time processing current batch
12
+ path # => Current file being processed
13
+ xml # => The XML containing 1000 SomeObjects
12
14
  end
13
15
 
14
16
  ## Credits
@@ -18,6 +20,7 @@
18
20
 
19
21
  ## Thanks
20
22
 
23
+ * [Nokogiri](http://nokogiri.org/)
21
24
 
22
25
  ## License
23
26
 
data/lib/saxony.rb CHANGED
@@ -3,9 +3,10 @@ require 'stringio'
3
3
 
4
4
 
5
5
  class Saxony
6
- VERSION = "0.1.1".freeze unless defined?(Saxony::VERSION)
6
+ VERSION = "0.1.2".freeze unless defined?(Saxony::VERSION)
7
7
 
8
8
  class Document < Nokogiri::XML::SAX::Document
9
+ attr_accessor :path
9
10
  attr_reader :total_count, :granularity
10
11
  def initialize(element, granularity, &processor)
11
12
  @root_element = nil
@@ -59,7 +60,7 @@ class Saxony
59
60
  reset
60
61
  end
61
62
  def reset
62
- @xml = nil
63
+ @xml, @path = nil, nil
63
64
  @buffer, @count, @doc, @start_time = StringIO.new, 0, nil, Time.now
64
65
  end
65
66
  def to_otag(name, attributes=[])
@@ -91,17 +92,37 @@ class Saxony
91
92
  sources.each do |src|
92
93
  saxdoc = Saxony::Document.new @element, @granularity, &blk
93
94
  parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
94
- xml = (String === src && File.exists?(src)) ? File.open(src) : src
95
+ if (String === src && File.exists?(src))
96
+ xml = File.open(src)
97
+ saxdoc.path = src
98
+ else
99
+ xml = src
100
+ saxdoc.path = src.class.to_s
101
+ end
95
102
  parser.parse xml
96
103
  end
97
104
  end
98
105
  end
99
106
 
107
+ class Array
108
+ def saxony_chunk(number_of_chunks)
109
+ chunks = (1..number_of_chunks).collect { [] }
110
+ while self.any?
111
+ chunks.each do |a_chunk|
112
+ a_chunk << self.shift if self.any?
113
+ end
114
+ end
115
+ chunks
116
+ end
117
+ alias_method :chunk, :saxony_chunk unless method_defined? :chunk
118
+ end
119
+
120
+
100
121
  #STDERR.print '.' if @samples % 5000 == 0
101
122
 
102
123
  if $0 == __FILE__
103
124
  sax = Saxony.new :Listing, 1000
104
- sax.parse ARGV do
125
+ sax.parse DATA do
105
126
  #doc.xpath("//Listing").each do |obj|
106
127
  #end
107
128
  p [total_count, doc.xpath("//Listing").size, elapsed_time.to_f]
@@ -110,32 +131,4 @@ if $0 == __FILE__
110
131
  end
111
132
  end
112
133
 
113
- __END__
114
-
115
- <BusinessListings>
116
- <Listing><ListingId>17</ListingId><DBID>16</DBID><BusName>&#39;A&#39; Company Military Surplus</BusName><BusNameFr>&#39;A&#39; Company Military Surplus</BusNameFr><Address>2240 Alberni Hwy</Address><City>Parksville</City><PstCode>V0R1M0</PstCode><Phone><Primary><Prefix>+1</Prefix><NPA>250</NPA><NXX>951</NXX><XNUM>0609</XNUM><DisplayNumber>250-951-0609</DisplayNumber></Primary><Other Type="Click2Call"><Prefix>+1</Prefix><NPA>250</NPA><NXX>951</NXX><XNUM>0609</XNUM><DisplayNumber>250-951-0609</DisplayNumber></Other></Phone>
117
- <ListingKeys>D00007295080000465894</ListingKeys><ReportId>16</ReportId><Paid>Y</Paid><ListEntry><DirProv>BC</DirProv><DirCode>022000</DirCode><HdCode>00866400</HdCode><Channel>2</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13980461ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
118
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
119
- </Text></HS></Products>
120
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086494</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13912789ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
121
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
122
- </Text></HS></Products>
123
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086604</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13908447ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
124
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
125
- </Text></HS></Products>
126
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086652</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13890219ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
127
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
128
- </Text></HS></Products>
129
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086926</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13980461ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
130
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
131
- </Text></HS></Products>
132
- </ListEntry></Listing>
133
- <Listing><ListingId>19</ListingId><DBID>18</DBID><BusName>&#39;Colleen All Dogs&#39; Doggie Daycare</BusName><BusNameFr>&#39;Colleen All Dogs&#39; Doggie Daycare</BusNameFr><Address>6058 144 Street</Address><City>Surrey</City><Prov>BC</Prov><PstCode>V3X1A3</PstCode><Lat>49.113197</Lat><Long>-122.823369</Long><Phone><Primary><Prefix>+1</Prefix><NPA>604</NPA><NXX>319</NXX><XNUM>3895</XNUM><DisplayNumber>604-319-3895</DisplayNumber></Primary><Other Type="Click2Call"><Prefix>+1</Prefix><NPA>604</NPA><NXX>319</NXX><XNUM>3895</XNUM><DisplayNumber>604-319-3895</DisplayNumber></Other></Phone>
134
- <ListingKeys>D00007440120000535278</ListingKeys><ReportId>18</ReportId><Paid>Y</Paid><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00980600</HdCode><Channel>1</Channel><Rank>100</Rank><NormRank>6</NormRank><Placement Child="false">Other</Placement><Products><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL></Products>
135
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00980355</HdCode><Channel>1</Channel><Rank>194</Rank><NormRank>12</NormRank><Placement Child="false">DPlus</Placement><Products><D_PP PrdCode="EN" D_PP="ProfileId" Type="18042" PPLUS="DirPath" Udac="18042" PPE="Rank" Lang="50"><Keywords><OpenHrs>Monday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Tuesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Wednesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Thursday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Friday 7:00 am - 6:30 pm</OpenHrs><LangSpk>English</LangSpk><GetThr>King George Highway</GetThr><ProdServ>Administer Medications</ProdServ><ProdServ>Animal Care Experience</ProdServ><ProdServ>Dog Daycare</ProdServ><ProdServ>Dog Mind &amp; Body Stimulation</ProdServ><ProdServ>Dog Playhouse</ProdServ><ProdServ>Pet Portraits</ProdServ><ProdServ>Pet Shop</ProdServ></Keywords></D_PP>
136
- <D_PP PrdCode="FR" D_PP="ProfileId" Type="18042" PPLUS="DirPath" Udac="18042" ="Rank" Lang="0"><Keywords><OpenHrs>Monday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Tuesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Wednesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Thursday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Friday 7:00 am - 6:30 pm</OpenHrs><LangSpk>English</LangSpk><GetThr>King George Highway</GetThr><ProdServ>Administer Medications</ProdServ><ProdServ>Animal Care Experience</ProdServ><ProdServ>Dog Daycare</ProdServ><ProdServ>Dog Mind &amp; Body Stimulation</ProdServ><ProdServ>Dog Playhouse</ProdServ><ProdServ>Pet Portraits</ProdServ><ProdServ>Pet Shop</ProdServ></Keywords></D_PP>
137
- <URL Type="Lang" URL="EN" PrdCode="LinkText" URL="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" URL0="0"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" URL="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" URL0="0"></URL><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><Thumb Lang="THUMB" EN="Udac" Val="QCW" 14571890aa="Rank" Type="44" THUMB="DirPlus" PrdCode="true"></Thumb><Thumb Lang="THUMB" FR="Udac" Val="QCW" 14571890aa="Rank" Type="44" THUMB="DirPlus" PrdCode="true"></Thumb><DspAd Rank="DISPADT" 44="Lang" DirPlus="EN" true="Udac" AdNo="QCW" 14571890aa="Type" PrdCode="DspAd"><Keywords><Classification><Heading HdCode="HdName" 00980355=""></Heading></Classification><Raw>COLLEEN ALL DOGS Doggie Daycare 1/2 Acr 1/2 Acre of Secured Ine of Secured Indoodoor/Outr/Outdoodoor Spacr Spacee Puppy Social Puppy Socialization, 100%ization, 100% Su Superpervisvisionion An Any Agey Age/Size,/Size, By By Appoint Appointmenment Onlyt Only Pet Firs Pet First Aid, 17 t Aid, 17 YrsYrs Ani Animal Knowledgemal Knowledge 604-604-319-38319-389595 6058 144th St Surrey, BC www.colleewww.colleewww.colleenallnallnalldogs.dogs.dogs.comcomcom</Raw></Keywords></DspAd></Products>
138
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00740000</HdCode><Channel>1</Channel><Rank>100</Rank><NormRank>6</NormRank><Placement Child="false">Other</Placement><Products><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL></Products>
139
- </ListEntry></Listing>
140
- </BusinessListings>
141
134
 
data/saxony.gemspec CHANGED
@@ -1,8 +1,8 @@
1
1
  @spec = Gem::Specification.new do |s|
2
2
  s.name = "saxony"
3
3
  s.rubyforge_project = 'bone'
4
- s.version = "0.1.1"
5
- s.summary = "Parse gigantic XML files with pleasure and ease."
4
+ s.version = "0.1.2"
5
+ s.summary = "Parse gigantic XML files with pleasure and a without running out of memory."
6
6
  s.description = s.summary
7
7
  s.author = "Delano Mandelbaum"
8
8
  s.email = "delano@solutious.com"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxony
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Delano Mandelbaum
@@ -22,7 +22,7 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
24
  version:
25
- description: Parse gigantic XML files with pleasure and ease.
25
+ description: Parse gigantic XML files with pleasure and a without running out of memory.
26
26
  email: delano@solutious.com
27
27
  executables: []
28
28
 
@@ -48,7 +48,7 @@ post_install_message:
48
48
  rdoc_options:
49
49
  - --line-numbers
50
50
  - --title
51
- - Parse gigantic XML files with pleasure and ease.
51
+ - Parse gigantic XML files with pleasure and a without running out of memory.
52
52
  - --main
53
53
  - README.md
54
54
  require_paths:
@@ -71,6 +71,6 @@ rubyforge_project: bone
71
71
  rubygems_version: 1.3.5
72
72
  signing_key:
73
73
  specification_version: 3
74
- summary: Parse gigantic XML files with pleasure and ease.
74
+ summary: Parse gigantic XML files with pleasure and a without running out of memory.
75
75
  test_files: []
76
76