saxony 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/CHANGES.txt +6 -0
  2. data/README.md +5 -2
  3. data/lib/saxony.rb +25 -32
  4. data/saxony.gemspec +2 -2
  5. metadata +4 -4
data/CHANGES.txt CHANGED
@@ -1,5 +1,11 @@
1
1
  SAXONY, CHANGES
2
2
 
3
+ #### 0.1.2 (2010-02-01) ####
4
+
5
+ * ADDED: Keep track of file path
6
+ * ADDED: Array#chunk
7
+
8
+
3
9
  #### 0.1.1 (2010-01-31) ####
4
10
 
5
11
  * FIXED: Attributes parsing
data/README.md CHANGED
@@ -1,14 +1,16 @@
1
1
  ## Saxony - 0.1 ##
2
2
 
3
- **Parse gigantic XML files with pleasure and ease.**
3
+ **Parse gigantic XML files with pleasure and a without running out of memory.**
4
4
 
5
5
  ## Example ##
6
6
 
7
7
  sax = Saxony.new :SomeObject, 1000
8
8
  sax.parse 'path/2/huge.xml' do
9
9
  total_count # => Total number of SomeObjects processed
10
- doc # => Nokogiri object for 1000 SomeObject
10
+ doc # => Nokogiri object for 1000 SomeObjects
11
11
  elapsed_time # => time processing current batch
12
+ path # => Current file being processed
13
+ xml # => The XML containing 1000 SomeObjects
12
14
  end
13
15
 
14
16
  ## Credits
@@ -18,6 +20,7 @@
18
20
 
19
21
  ## Thanks
20
22
 
23
+ * [Nokogiri](http://nokogiri.org/)
21
24
 
22
25
  ## License
23
26
 
data/lib/saxony.rb CHANGED
@@ -3,9 +3,10 @@ require 'stringio'
3
3
 
4
4
 
5
5
  class Saxony
6
- VERSION = "0.1.1".freeze unless defined?(Saxony::VERSION)
6
+ VERSION = "0.1.2".freeze unless defined?(Saxony::VERSION)
7
7
 
8
8
  class Document < Nokogiri::XML::SAX::Document
9
+ attr_accessor :path
9
10
  attr_reader :total_count, :granularity
10
11
  def initialize(element, granularity, &processor)
11
12
  @root_element = nil
@@ -59,7 +60,7 @@ class Saxony
59
60
  reset
60
61
  end
61
62
  def reset
62
- @xml = nil
63
+ @xml, @path = nil, nil
63
64
  @buffer, @count, @doc, @start_time = StringIO.new, 0, nil, Time.now
64
65
  end
65
66
  def to_otag(name, attributes=[])
@@ -91,17 +92,37 @@ class Saxony
91
92
  sources.each do |src|
92
93
  saxdoc = Saxony::Document.new @element, @granularity, &blk
93
94
  parser = Nokogiri::XML::SAX::Parser.new(saxdoc)
94
- xml = (String === src && File.exists?(src)) ? File.open(src) : src
95
+ if (String === src && File.exists?(src))
96
+ xml = File.open(src)
97
+ saxdoc.path = src
98
+ else
99
+ xml = src
100
+ saxdoc.path = src.class.to_s
101
+ end
95
102
  parser.parse xml
96
103
  end
97
104
  end
98
105
  end
99
106
 
107
+ class Array
108
+ def saxony_chunk(number_of_chunks)
109
+ chunks = (1..number_of_chunks).collect { [] }
110
+ while self.any?
111
+ chunks.each do |a_chunk|
112
+ a_chunk << self.shift if self.any?
113
+ end
114
+ end
115
+ chunks
116
+ end
117
+ alias_method :chunk, :saxony_chunk unless method_defined? :chunk
118
+ end
119
+
120
+
100
121
  #STDERR.print '.' if @samples % 5000 == 0
101
122
 
102
123
  if $0 == __FILE__
103
124
  sax = Saxony.new :Listing, 1000
104
- sax.parse ARGV do
125
+ sax.parse DATA do
105
126
  #doc.xpath("//Listing").each do |obj|
106
127
  #end
107
128
  p [total_count, doc.xpath("//Listing").size, elapsed_time.to_f]
@@ -110,32 +131,4 @@ if $0 == __FILE__
110
131
  end
111
132
  end
112
133
 
113
- __END__
114
-
115
- <BusinessListings>
116
- <Listing><ListingId>17</ListingId><DBID>16</DBID><BusName>&#39;A&#39; Company Military Surplus</BusName><BusNameFr>&#39;A&#39; Company Military Surplus</BusNameFr><Address>2240 Alberni Hwy</Address><City>Parksville</City><PstCode>V0R1M0</PstCode><Phone><Primary><Prefix>+1</Prefix><NPA>250</NPA><NXX>951</NXX><XNUM>0609</XNUM><DisplayNumber>250-951-0609</DisplayNumber></Primary><Other Type="Click2Call"><Prefix>+1</Prefix><NPA>250</NPA><NXX>951</NXX><XNUM>0609</XNUM><DisplayNumber>250-951-0609</DisplayNumber></Other></Phone>
117
- <ListingKeys>D00007295080000465894</ListingKeys><ReportId>16</ReportId><Paid>Y</Paid><ListEntry><DirProv>BC</DirProv><DirCode>022000</DirCode><HdCode>00866400</HdCode><Channel>2</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13980461ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
118
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
119
- </Text></HS></Products>
120
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086494</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13912789ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
121
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
122
- </Text></HS></Products>
123
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086604</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13908447ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
124
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
125
- </Text></HS></Products>
126
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086652</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13890219ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
127
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
128
- </Text></HS></Products>
129
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086926</DirCode><HdCode>00866400</HdCode><Channel>1</Channel><Rank>7</Rank><NormRank>0</NormRank><Placement Child="false">DPlus</Placement><Products><HS DirPlus="1HS" true="Lang" AdNo="EN" 13980461ab="Rank" PrdCode="7" WEBHS3="Colour" Udac=""><Keywords><Classification><Heading HdCode="HdName" 00866400=""></Heading></Classification><Raw>OPEN 7 DAYS A WEEK CALL US FOR SPECIALS</Raw><HrsOpr>7days</HrsOpr></Keywords><Text><Line Num="Val" 1="OPEN 7 DAYS A WEEK"></Line>
130
- <Line Num="Val" 2="CALL US FOR SPECIALS"></Line>
131
- </Text></HS></Products>
132
- </ListEntry></Listing>
133
- <Listing><ListingId>19</ListingId><DBID>18</DBID><BusName>&#39;Colleen All Dogs&#39; Doggie Daycare</BusName><BusNameFr>&#39;Colleen All Dogs&#39; Doggie Daycare</BusNameFr><Address>6058 144 Street</Address><City>Surrey</City><Prov>BC</Prov><PstCode>V3X1A3</PstCode><Lat>49.113197</Lat><Long>-122.823369</Long><Phone><Primary><Prefix>+1</Prefix><NPA>604</NPA><NXX>319</NXX><XNUM>3895</XNUM><DisplayNumber>604-319-3895</DisplayNumber></Primary><Other Type="Click2Call"><Prefix>+1</Prefix><NPA>604</NPA><NXX>319</NXX><XNUM>3895</XNUM><DisplayNumber>604-319-3895</DisplayNumber></Other></Phone>
134
- <ListingKeys>D00007440120000535278</ListingKeys><ReportId>18</ReportId><Paid>Y</Paid><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00980600</HdCode><Channel>1</Channel><Rank>100</Rank><NormRank>6</NormRank><Placement Child="false">Other</Placement><Products><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL></Products>
135
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00980355</HdCode><Channel>1</Channel><Rank>194</Rank><NormRank>12</NormRank><Placement Child="false">DPlus</Placement><Products><D_PP PrdCode="EN" D_PP="ProfileId" Type="18042" PPLUS="DirPath" Udac="18042" PPE="Rank" Lang="50"><Keywords><OpenHrs>Monday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Tuesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Wednesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Thursday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Friday 7:00 am - 6:30 pm</OpenHrs><LangSpk>English</LangSpk><GetThr>King George Highway</GetThr><ProdServ>Administer Medications</ProdServ><ProdServ>Animal Care Experience</ProdServ><ProdServ>Dog Daycare</ProdServ><ProdServ>Dog Mind &amp; Body Stimulation</ProdServ><ProdServ>Dog Playhouse</ProdServ><ProdServ>Pet Portraits</ProdServ><ProdServ>Pet Shop</ProdServ></Keywords></D_PP>
136
- <D_PP PrdCode="FR" D_PP="ProfileId" Type="18042" PPLUS="DirPath" Udac="18042" ="Rank" Lang="0"><Keywords><OpenHrs>Monday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Tuesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Wednesday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Thursday 7:00 am - 6:30 pm</OpenHrs><OpenHrs>Friday 7:00 am - 6:30 pm</OpenHrs><LangSpk>English</LangSpk><GetThr>King George Highway</GetThr><ProdServ>Administer Medications</ProdServ><ProdServ>Animal Care Experience</ProdServ><ProdServ>Dog Daycare</ProdServ><ProdServ>Dog Mind &amp; Body Stimulation</ProdServ><ProdServ>Dog Playhouse</ProdServ><ProdServ>Pet Portraits</ProdServ><ProdServ>Pet Shop</ProdServ></Keywords></D_PP>
137
- <URL Type="Lang" URL="EN" PrdCode="LinkText" URL="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" URL0="0"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" URL="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" URL0="0"></URL><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><Thumb Lang="THUMB" EN="Udac" Val="QCW" 14571890aa="Rank" Type="44" THUMB="DirPlus" PrdCode="true"></Thumb><Thumb Lang="THUMB" FR="Udac" Val="QCW" 14571890aa="Rank" Type="44" THUMB="DirPlus" PrdCode="true"></Thumb><DspAd Rank="DISPADT" 44="Lang" DirPlus="EN" true="Udac" AdNo="QCW" 14571890aa="Type" PrdCode="DspAd"><Keywords><Classification><Heading HdCode="HdName" 00980355=""></Heading></Classification><Raw>COLLEEN ALL DOGS Doggie Daycare 1/2 Acr 1/2 Acre of Secured Ine of Secured Indoodoor/Outr/Outdoodoor Spacr Spacee Puppy Social Puppy Socialization, 100%ization, 100% Su Superpervisvisionion An Any Agey Age/Size,/Size, By By Appoint Appointmenment Onlyt Only Pet Firs Pet First Aid, 17 t Aid, 17 YrsYrs Ani Animal Knowledgemal Knowledge 604-604-319-38319-389595 6058 144th St Surrey, BC www.colleewww.colleewww.colleenallnallnalldogs.dogs.dogs.comcomcom</Raw></Keywords></DspAd></Products>
138
- </ListEntry><ListEntry><DirProv>BC</DirProv><DirCode>086446</DirCode><HdCode>00740000</HdCode><Channel>1</Channel><Rank>100</Rank><NormRank>6</NormRank><Placement Child="false">Other</Placement><Products><URL Type="Lang" URL="EN" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL><URL Type="Lang" URL="FR" PrdCode="LinkText" P_LINK="" Val="UrlImg" http://www.colleenalldogs.com="u2/b/ad8/bad8592a30566ecbe27da92022963564.jpg" Udac="Rank" SUPEB="100"></URL></Products>
139
- </ListEntry></Listing>
140
- </BusinessListings>
141
134
 
data/saxony.gemspec CHANGED
@@ -1,8 +1,8 @@
1
1
  @spec = Gem::Specification.new do |s|
2
2
  s.name = "saxony"
3
3
  s.rubyforge_project = 'bone'
4
- s.version = "0.1.1"
5
- s.summary = "Parse gigantic XML files with pleasure and ease."
4
+ s.version = "0.1.2"
5
+ s.summary = "Parse gigantic XML files with pleasure and a without running out of memory."
6
6
  s.description = s.summary
7
7
  s.author = "Delano Mandelbaum"
8
8
  s.email = "delano@solutious.com"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxony
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Delano Mandelbaum
@@ -22,7 +22,7 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
24
  version:
25
- description: Parse gigantic XML files with pleasure and ease.
25
+ description: Parse gigantic XML files with pleasure and a without running out of memory.
26
26
  email: delano@solutious.com
27
27
  executables: []
28
28
 
@@ -48,7 +48,7 @@ post_install_message:
48
48
  rdoc_options:
49
49
  - --line-numbers
50
50
  - --title
51
- - Parse gigantic XML files with pleasure and ease.
51
+ - Parse gigantic XML files with pleasure and a without running out of memory.
52
52
  - --main
53
53
  - README.md
54
54
  require_paths:
@@ -71,6 +71,6 @@ rubyforge_project: bone
71
71
  rubygems_version: 1.3.5
72
72
  signing_key:
73
73
  specification_version: 3
74
- summary: Parse gigantic XML files with pleasure and ease.
74
+ summary: Parse gigantic XML files with pleasure and a without running out of memory.
75
75
  test_files: []
76
76