honkster-fasterer-csv 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ Mockyright (m) 2010 Mason
2
+
3
+ Permission is hereby granted.
data/README.rdoc ADDED
@@ -0,0 +1,26 @@
1
+ = fasterer-csv
2
+
3
+ Normal FasterCSV was just too slow for the project I was working on taking a billion trallion years to read in these tons of huge CSV files, so: this.
4
+
5
+ Depends on various stuff and such it almost always reads at least twice as fast, sometimes up to 40x as fast.
6
+
7
+ It's got some caveats, though... which you'll discover in fun and entertaining ways.
8
+
9
+ Also, it's not *completely* a drop in replacement, but it's really really close. Why do it this way? Because I hate you and want to make you life a little more difficult whenever possible.
10
+
11
+
12
+ == Note on Patches/Pull Requests
13
+
14
+ * Spoon the project.
15
+ * Make your feature addition or bug fix.
16
+ * Add tests for it. This is important so I don't break it in a
17
+ future version unintentionally.
18
+ * Commit, do not mess with rakefile, version, or history.
19
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
20
+ * Send me a pull request.
21
+ * Delete everything you've done at the point because I'm probably going to reimplement it anyways.
22
+
23
+ == Monkyright
24
+
25
+ Monkeyright (m) 2010 Mason. See LICENSE for details.
26
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+
8
+ gem.name = %Q{#{ENV["GEM_PREFIX"] && "#{ENV["GEM_PREFIX"]}-"}fasterer-csv}
9
+ gem.authors = ["Mason"]
10
+ gem.email = %q{mason@chipped.net}
11
+ gem.date = Time.now.strftime("%Y-%m-%d")
12
+ gem.description = %q{CSV parsing awesomeness}
13
+ gem.summary = %q{Even fasterer than FasterCSV!}
14
+ gem.homepage = %q{http://github.com/gnovos/fasterer-csv}
15
+ gem.post_install_message = <<-POST
16
+ Kernel Panic! System32 deleted! Klaxons klaxoning! Dogs and Cats living together!!!! We're doooomed! Everything is...
17
+ oh, no wait, it installed fine. My bad.
18
+ POST
19
+ gem.files = Dir["lib/**/*.rb", "LICENSE", "Rakefile", "VERSION", "**/*.rdoc", "spec/**/*.rb"]
20
+
21
+ end
22
+ Jeweler::GemcutterTasks.new
23
+ rescue LoadError
24
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler you doof"
25
+ end
26
+
27
+ require 'spec/rake/spectask'
28
+ Spec::Rake::SpecTask.new(:spec) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ task :default => :spec
34
+
35
+ require 'rake/rdoctask'
36
+ Rake::RDocTask.new do |rdoc|
37
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
38
+
39
+ rdoc.rdoc_dir = 'rdoc'
40
+ rdoc.title = "FastererCSV #{version}"
41
+ rdoc.rdoc_files.include('README*')
42
+ rdoc.rdoc_files.include('lib/**/*.rb')
43
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.7.0
@@ -0,0 +1,429 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+
4
+ module FastererCSV
5
+
6
+ class Table < Array
7
+
8
+ class << self
9
+ def format_headers(unformatted)
10
+ unformatted.map { |header| Row.to_key(header) }
11
+ end
12
+ end
13
+
14
+ attr_reader :headers, :lines, :line_block
15
+
16
+ def initialize(headers, fail_on_malformed_columns = true, &line_block)
17
+ @headers = Table.format_headers(headers)
18
+ @fail_on_malformed_columns = fail_on_malformed_columns
19
+ @line_block = line_block
20
+ @lines = 0
21
+ @indexes = {}
22
+ end
23
+
24
+ def <<(row)
25
+ @lines += 1
26
+ if !row.is_a?(Row)
27
+ row = Row.new(self, row, @lines)
28
+ end
29
+ if @headers.length != row.length
30
+ error = "*** WARNING - COLUMN COUNT MISMATCH - WARNING ***\n*** ROW #{size} : EXPECTED #{@headers.length} : FOUND #{row.length}\n\n"
31
+ len = 0
32
+ headers.each do |header|
33
+ len = header.to_s.length if header.to_s.length > len
34
+ end
35
+ headers.each_with_index do |header, i|
36
+ error << sprintf("%-32s : %s\n", header, row[i])
37
+ end
38
+ puts error
39
+ raise error if @fail_on_malformed_columns
40
+ end
41
+ if line_block
42
+ line_block.call(row)
43
+ else
44
+ super(row)
45
+ end
46
+ end
47
+ alias_method :push, :<<
48
+
49
+ def merge(*tables)
50
+
51
+ tables.each do |table|
52
+ matching = self.headers & table.headers
53
+
54
+ key = {}
55
+
56
+ table.each do |row|
57
+ matching.each do |match|
58
+ key[match] = row[match]
59
+ end
60
+
61
+ self.lookup(key) { |r| r.merge(row) }
62
+ end
63
+ end
64
+
65
+ self
66
+
67
+ end
68
+
69
+ def index(columns, reindex = false)
70
+ columns = columns.compact.uniq.sort { |a, b| a.to_s <=> b.to_s }.map { |column| Row.to_key(column) }
71
+
72
+ key = columns.join('|#|')
73
+
74
+ @indexes[key] ||= {}
75
+
76
+ index = @indexes[key]
77
+
78
+ if reindex || index.empty?
79
+
80
+ self.each do |row|
81
+ vkey = columns.map { |column| row[column] }
82
+ index[vkey] ||= []
83
+ index[vkey] << row
84
+ end
85
+ end
86
+ index
87
+ end
88
+
89
+ def lookup(key)
90
+
91
+ values = []
92
+ columns = key.keys.compact.uniq.sort { |a, b| a.to_s <=> b.to_s }.map do |column|
93
+ values << key[column]
94
+ Row.to_key(column)
95
+ end
96
+
97
+ rows = index(columns)[values]
98
+ if rows && block_given?
99
+ rows.each do |row|
100
+ yield(row)
101
+ end
102
+ end
103
+
104
+ rows
105
+ end
106
+
107
+ def write(file, quot = '~', sep = ',')
108
+ FastererCSV.write(file, quot, sep) do |out|
109
+ out << headers
110
+ each do |row|
111
+ out << row
112
+ end
113
+ end
114
+ end
115
+
116
+ alias_method :rows, :to_a
117
+ alias_method :merge!, :merge
118
+
119
+ end
120
+
121
+ class Row < Array
122
+
123
+ class << self
124
+ def to_key(key)
125
+ key = "#{key}".downcase.gsub(/\s+/, '_')
126
+ key.empty? ? :_ : key.to_sym
127
+ end
128
+ end
129
+
130
+ def headers
131
+ @table.headers
132
+ end
133
+
134
+ attr_reader :line
135
+
136
+ def initialize(table, array, line=-1)
137
+ @table = table
138
+ @line = line
139
+ super(array)
140
+ end
141
+
142
+ def [](*is)
143
+ is.each do |i|
144
+ val = if i.is_a? Fixnum
145
+ super
146
+ else
147
+ found = headers.index(Row::to_key(i))
148
+ found ? super(found) : nil
149
+ end
150
+ return val unless val.nil?
151
+ end
152
+ nil
153
+ end
154
+
155
+ def []=(key, val)
156
+ if key.is_a? Fixnum
157
+ super
158
+ else
159
+ key = Row::to_key(key)
160
+ headers << key unless headers.include? key
161
+ found = headers.index(key)
162
+ super(found, val)
163
+ end
164
+ end
165
+
166
+ def pull(*columns)
167
+ columns.map do |column|
168
+ self[*column]
169
+ end
170
+ end
171
+
172
+ def merge(row)
173
+ if row.is_a? Row
174
+ row.headers.each do |header|
175
+ self[header] = row[header]
176
+ end
177
+ else
178
+ row.each do |key, value|
179
+ self[key] = value
180
+ end
181
+ end
182
+ self
183
+ end
184
+
185
+ def to_hash
186
+ headers.inject({}) do |memo, h|
187
+ memo[h] = self[h]
188
+ memo
189
+ end
190
+ end
191
+
192
+ def key?(key)
193
+ keys.include?(Row.to_key(key))
194
+ end
195
+
196
+ def value?(value)
197
+ values.include?(value)
198
+ end
199
+
200
+ def method_missing(method, *args, &block)
201
+ to_hash.send(method, *args, &block)
202
+ end
203
+
204
+ alias_method :keys, :headers
205
+ alias_method :values, :to_a
206
+
207
+ alias_method :has_key?, :key?
208
+ alias_method :member?, :key?
209
+ alias_method :include?, :key?
210
+
211
+ alias_method :has_value?, :value?
212
+ alias_method :merge!, :merge
213
+
214
+ end
215
+
216
+ class NumericConversion < Array
217
+
218
+ def initialize
219
+ @int = @float = true
220
+ @dot = false
221
+ end
222
+
223
+ def clear
224
+ @int = @float = true
225
+ @dot = false
226
+ super
227
+ end
228
+
229
+ def <<(ch)
230
+ if ch == ?-
231
+ @float = @int = size == 0
232
+ elsif (ch > ?9 || ch < ?0) && ch != ?.
233
+ @int = @float = false
234
+ elsif ch == ?. && @dot
235
+ @int = @float = false
236
+ elsif ch == ?.
237
+ @int = false
238
+ @dot = true
239
+ end
240
+
241
+ super(ch.chr)
242
+ end
243
+
244
+ def convert(as_string = false)
245
+ if as_string
246
+ join
247
+ elsif empty?
248
+ nil
249
+ elsif @int
250
+ join.to_i
251
+ elsif @float
252
+ join.to_f
253
+ else
254
+ join
255
+ end
256
+ end
257
+
258
+ end
259
+
260
+ class NoConversion < Array
261
+
262
+ def <<(ch)
263
+ super(ch.chr)
264
+ end
265
+
266
+ def convert(as_string = false)
267
+ if as_string
268
+ join
269
+ elsif empty?
270
+ nil
271
+ else
272
+ join
273
+ end
274
+ end
275
+
276
+ end
277
+
278
+ class IOWriter
279
+ def initialize(file, quot = '~', sep = ',', quotenum = false)
280
+ @first = true; @io = file; @quot = quot; @sep = sep; @quotenum = quotenum
281
+ end
282
+
283
+ def <<(row)
284
+ raise "can only write arrays! #{row.class} #{row.inspect}" unless row.is_a? Array
285
+ if @first && row.is_a?(Row)
286
+ self.<<(row.headers)
287
+ end
288
+ @first = false
289
+ @io.syswrite FastererCSV::quot_row(row, @quot, @sep, @quotenum)
290
+ row
291
+ end
292
+ end
293
+
294
+ class << self
295
+
296
+ def headers(file, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
297
+ parse_headers(File.open(file, 'r') { |io| io.gets }, quot, sep, fail_on_malformed, column, &block)
298
+ end
299
+
300
+ def read(file, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
301
+ File.open(file, 'r') do |io|
302
+ parse(io, quot, sep, fail_on_malformed, column, &block)
303
+ end
304
+ end
305
+
306
+ def convread(file, quot = '~', sep = ',', fail_on_malformed = true, column = NumericConversion.new, &block)
307
+ File.open(file, 'r') do |io|
308
+ parse(io, quot, sep, fail_on_malformed, column, &block)
309
+ end
310
+ end
311
+
312
+ def parse_headers(data, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
313
+ parse(data, quot, sep, fail_on_malformed, column, &block).headers
314
+ end
315
+
316
+ def parse(io, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
317
+ q, s, row, inquot, clean, maybe, table, field, endline = quot[0], sep[0], [], false, true, false, nil, true, false
318
+
319
+ io.each_byte do |c|
320
+ next if c == ?\r
321
+
322
+ if maybe && c == s
323
+ row << column.convert(true)
324
+ column.clear
325
+ clean, inquot, maybe, field, endline = true, false, false, true, false
326
+ elsif maybe && c == ?\n && table.nil?
327
+ row << column.convert(true) unless (column.empty? && endline)
328
+ column.clear
329
+ table = Table.new(row, fail_on_malformed, &block) unless row.empty?
330
+ row, clean, inquot, maybe, field, endline = [], true, false, false, false, true
331
+ elsif maybe && c == ?\n
332
+ row << column.convert(true) unless (column.empty? && endline)
333
+ column.clear
334
+ table << row unless row.empty?
335
+ row, clean, inquot, maybe, field, endline = [], true, false, false, false, true
336
+ elsif clean && c == q
337
+ inquot, clean, endline = true, false, false
338
+ elsif maybe && c == q
339
+ column << c
340
+ clean, maybe, endline = false, false, false
341
+ elsif c == q
342
+ maybe, endline = true, false
343
+ elsif inquot
344
+ column << c
345
+ clean, endline = false, false
346
+ elsif c == s
347
+ row << column.convert(false)
348
+ column.clear
349
+ clean, field, endline = true, true, false
350
+ elsif c == ?\n && table.nil?
351
+
352
+ row << column.convert(false) unless column.empty? && endline
353
+
354
+ column.clear
355
+ table = Table.new(row, fail_on_malformed, &block) unless row.empty?
356
+ row, clean, inquot, field, endline = [], true, false, false, true
357
+ elsif c == ?\n
358
+
359
+ row << column.convert(false) unless column.empty? && endline
360
+
361
+ column.clear
362
+ table << row unless row.empty?
363
+ row, clean, inquot, field, endline = [], true, false, false, true
364
+ else
365
+ column << c
366
+ clean, endline = false, false
367
+ end
368
+ end
369
+
370
+ if !clean
371
+ row << column.convert(maybe)
372
+ if table
373
+ table << row unless row.empty?
374
+ else
375
+ table = Table.new(row, fail_on_malformed, &block) unless row.empty?
376
+ end
377
+ elsif field
378
+ row << column.convert(maybe)
379
+ end
380
+
381
+ table
382
+ end
383
+
384
+ def quot_row(row, q = '~', s = ',', numquot = false)
385
+ num_quot = /(?:[#{q}#{s}\n]|^\d+$)/
386
+ need_quot = /[#{q}#{s}\n]/
387
+ row.map do |val|
388
+ if val.nil?
389
+ ""
390
+ elsif val.is_a? Numeric
391
+ val.to_s
392
+ else
393
+ quot = (val.is_a?(Symbol) || !numquot) ? need_quot : num_quot
394
+ val = String(val)
395
+ if val.length == 0
396
+ q * 2
397
+ else
398
+ val[quot] ? q + val.gsub(q, q * 2) + q : val
399
+ end
400
+ end
401
+ end.join(s) + "\n"
402
+ end
403
+
404
+ def generate(quot = '~', sep = ',', &block)
405
+ builder = StringIO.new
406
+ write(builder, quot, sep, &block)
407
+ builder.string
408
+ end
409
+
410
+ def write(data, quot = '~', sep = ',', quotenum = false, &block)
411
+ out(data, 'w', quot, sep, quotenum, &block)
412
+ end
413
+
414
+ def append(data, quot = '~', sep = ',', quotenum = false, &block)
415
+ out(data, 'a', quot, sep, quotenum, &block)
416
+ end
417
+
418
+ def out(data, mode = 'w', quot = '~', sep = ',', quotenum = false, &block)
419
+ if data.class == String
420
+ File.open(data, mode) do |io|
421
+ out(io, mode, quot, sep, quotenum, &block)
422
+ end
423
+ else
424
+ yield(IOWriter.new(data, quot, sep, quotenum))
425
+ end
426
+ end
427
+
428
+ end
429
+ end
@@ -0,0 +1,166 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "FastererCSV" do
4
+
5
+ describe "fiddly bits" do
6
+ describe "Table" do
7
+ it "works" do
8
+
9
+ end
10
+ end
11
+
12
+ describe "Row" do
13
+ it "works" do
14
+
15
+ end
16
+ end
17
+ end
18
+
19
+ describe "Converters" do
20
+ describe "NumericConverters" do
21
+ it "works" do
22
+
23
+ conv = FastererCSV::NumericConversion.new
24
+ conv << ?1
25
+ conv.convert(true).class.should == String
26
+ conv.convert(true).should == "1"
27
+
28
+ conv.convert(false).class.should == Fixnum
29
+ conv.convert(false).should == 1
30
+
31
+ conv.clear
32
+ conv << ?-
33
+ conv << ?1
34
+ conv.convert(false).class.should == Fixnum
35
+ conv.convert(false).should == -1
36
+
37
+ conv.clear
38
+ conv << ?1
39
+ conv << ?.
40
+ conv << ?1
41
+ conv.convert(false).class.should == Float
42
+ conv.convert(false).should == 1.1
43
+
44
+ conv.clear
45
+ conv << ?-
46
+ conv << ?1
47
+ conv << ?.
48
+ conv << ?1
49
+ conv.convert(false).class.should == Float
50
+ conv.convert(false).should == -1.1
51
+
52
+ conv.clear
53
+ conv << ?1
54
+ conv << ?.
55
+ conv << ?1
56
+ conv << ?.
57
+ conv << ?1
58
+ conv.convert(false).class.should == String
59
+ conv.convert(false).should == "1.1.1"
60
+
61
+ conv.clear
62
+ conv << ?a
63
+ conv.convert(false).class.should == String
64
+ conv.convert(false).should == "a"
65
+
66
+ conv.clear
67
+ conv.should be_empty
68
+ conv.convert(false).should be_nil
69
+ conv.convert(true).should == ""
70
+
71
+ end
72
+ end
73
+ describe "NoConverter" do
74
+ it "works" do
75
+
76
+ conv = FastererCSV::NoConversion.new
77
+ conv << ?1
78
+ conv.convert(true).class.should == String
79
+ conv.convert(false).class.should == String
80
+
81
+ conv.convert(true).should == "1"
82
+ conv.convert(false).should == "1"
83
+
84
+ conv.clear
85
+ conv.should be_empty
86
+ conv.convert(false).should be_nil
87
+ conv.convert(true).should == ""
88
+
89
+ end
90
+ end
91
+ end
92
+
93
+ describe "important stuff" do
94
+
95
+ before do
96
+ @data = <<-CSV
97
+ a,b,c,d,e,f,g,h,i,j,k,l,m,
98
+ ,,1,1.1,-1,-1.1,1.1.1,~1~,a,~a~,~a~~a~,~a
99
+ ~~a~,~,~,
100
+
101
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,14
102
+ CSV
103
+ end
104
+
105
+ describe "parse" do
106
+ it "works" do
107
+ table = FastererCSV.parse(@data)
108
+ table.headers.should == [:a, :b, :c,:d,:e,:f,:g,:h,:i,:j,:k,:l,:m,:_]
109
+ table.lines.should == 2
110
+
111
+ table[0].should == [nil, nil, "1", "1.1", "-1", "-1.1", "1.1.1", "1", "a", "a", "a~a", "a\n~a", ",", nil]
112
+ table[1].should == ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "14"]
113
+
114
+ row = table[1]
115
+ row.pull(:a, nil, 'd').should == ["0","14","3"]
116
+ row[:b].should == "1"
117
+ row["b"].should == "1"
118
+
119
+ end
120
+ end
121
+
122
+ describe "read" do
123
+ it "" do
124
+
125
+ end
126
+ end
127
+
128
+ describe "convread" do
129
+ it "" do
130
+
131
+ end
132
+ end
133
+
134
+ describe "headers" do
135
+ it "" do
136
+
137
+ end
138
+ end
139
+
140
+ describe "parse_headers" do
141
+ it "" do
142
+
143
+ end
144
+ end
145
+
146
+ describe "quot_row" do
147
+ it "" do
148
+
149
+ end
150
+ end
151
+
152
+ describe "generate" do
153
+ it "" do
154
+
155
+ end
156
+ end
157
+
158
+ describe "write" do
159
+ it "" do
160
+
161
+ end
162
+ end
163
+
164
+ end
165
+
166
+ end
@@ -0,0 +1,10 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'fasterer_csv'
4
+ ARGV.push("-b")
5
+ require 'spec'
6
+ require 'spec/autorun'
7
+
8
+ Spec::Runner.configure do |config|
9
+
10
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: honkster-fasterer-csv
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 7
9
+ - 1
10
+ version: 1.7.1
11
+ platform: ruby
12
+ authors:
13
+ - Mason
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-09-07 00:00:00 -07:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: CSV parsing awesomeness
23
+ email: mason@chipped.net
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ - README.rdoc
31
+ files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ - Rakefile
35
+ - VERSION
36
+ - lib/fasterer_csv.rb
37
+ - spec/fasterer_csv_spec.rb
38
+ - spec/spec_helper.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/gnovos/fasterer-csv
41
+ licenses: []
42
+
43
+ post_install_message: |
44
+ Kernel Panic! System32 deleted! Klaxons klaxoning! Dogs and Cats living together!!!! We're doooomed! Everything is...
45
+ oh, no wait, it installed fine. My bad.
46
+
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.7
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Even fasterer than FasterCSV!
76
+ test_files:
77
+ - spec/spec_helper.rb
78
+ - spec/fasterer_csv_spec.rb