honkster-fasterer-csv 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ Mockyright (m) 2010 Mason
2
+
3
+ Permission is hereby granted.
data/README.rdoc ADDED
@@ -0,0 +1,26 @@
1
+ = fasterer-csv
2
+
3
+ Normal FasterCSV was just too slow for the project I was working on taking a billion trallion years to read in these tons of huge CSV files, so: this.
4
+
5
+ Depends on various stuff and such it almost always reads at least twice as fast, sometimes up to 40x as fast.
6
+
7
+ It's got some caveats, though... which you'll discover in fun and entertaining ways.
8
+
9
+ Also, it's not *completely* a drop in replacement, but it's really really close. Why do it this way? Because I hate you and want to make you life a little more difficult whenever possible.
10
+
11
+
12
+ == Note on Patches/Pull Requests
13
+
14
+ * Spoon the project.
15
+ * Make your feature addition or bug fix.
16
+ * Add tests for it. This is important so I don't break it in a
17
+ future version unintentionally.
18
+ * Commit, do not mess with rakefile, version, or history.
19
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
20
+ * Send me a pull request.
21
+ * Delete everything you've done at the point because I'm probably going to reimplement it anyways.
22
+
23
+ == Monkyright
24
+
25
+ Monkeyright (m) 2010 Mason. See LICENSE for details.
26
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+
8
+ gem.name = %Q{#{ENV["GEM_PREFIX"] && "#{ENV["GEM_PREFIX"]}-"}fasterer-csv}
9
+ gem.authors = ["Mason"]
10
+ gem.email = %q{mason@chipped.net}
11
+ gem.date = Time.now.strftime("%Y-%m-%d")
12
+ gem.description = %q{CSV parsing awesomeness}
13
+ gem.summary = %q{Even fasterer than FasterCSV!}
14
+ gem.homepage = %q{http://github.com/gnovos/fasterer-csv}
15
+ gem.post_install_message = <<-POST
16
+ Kernel Panic! System32 deleted! Klaxons klaxoning! Dogs and Cats living together!!!! We're doooomed! Everything is...
17
+ oh, no wait, it installed fine. My bad.
18
+ POST
19
+ gem.files = Dir["lib/**/*.rb", "LICENSE", "Rakefile", "VERSION", "**/*.rdoc", "spec/**/*.rb"]
20
+
21
+ end
22
+ Jeweler::GemcutterTasks.new
23
+ rescue LoadError
24
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler you doof"
25
+ end
26
+
27
+ require 'spec/rake/spectask'
28
+ Spec::Rake::SpecTask.new(:spec) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ task :default => :spec
34
+
35
+ require 'rake/rdoctask'
36
+ Rake::RDocTask.new do |rdoc|
37
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
38
+
39
+ rdoc.rdoc_dir = 'rdoc'
40
+ rdoc.title = "FastererCSV #{version}"
41
+ rdoc.rdoc_files.include('README*')
42
+ rdoc.rdoc_files.include('lib/**/*.rb')
43
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.7.0
@@ -0,0 +1,429 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+
4
+ module FastererCSV
5
+
6
+ class Table < Array
7
+
8
+ class << self
9
+ def format_headers(unformatted)
10
+ unformatted.map { |header| Row.to_key(header) }
11
+ end
12
+ end
13
+
14
+ attr_reader :headers, :lines, :line_block
15
+
16
+ def initialize(headers, fail_on_malformed_columns = true, &line_block)
17
+ @headers = Table.format_headers(headers)
18
+ @fail_on_malformed_columns = fail_on_malformed_columns
19
+ @line_block = line_block
20
+ @lines = 0
21
+ @indexes = {}
22
+ end
23
+
24
+ def <<(row)
25
+ @lines += 1
26
+ if !row.is_a?(Row)
27
+ row = Row.new(self, row, @lines)
28
+ end
29
+ if @headers.length != row.length
30
+ error = "*** WARNING - COLUMN COUNT MISMATCH - WARNING ***\n*** ROW #{size} : EXPECTED #{@headers.length} : FOUND #{row.length}\n\n"
31
+ len = 0
32
+ headers.each do |header|
33
+ len = header.to_s.length if header.to_s.length > len
34
+ end
35
+ headers.each_with_index do |header, i|
36
+ error << sprintf("%-32s : %s\n", header, row[i])
37
+ end
38
+ puts error
39
+ raise error if @fail_on_malformed_columns
40
+ end
41
+ if line_block
42
+ line_block.call(row)
43
+ else
44
+ super(row)
45
+ end
46
+ end
47
+ alias_method :push, :<<
48
+
49
+ def merge(*tables)
50
+
51
+ tables.each do |table|
52
+ matching = self.headers & table.headers
53
+
54
+ key = {}
55
+
56
+ table.each do |row|
57
+ matching.each do |match|
58
+ key[match] = row[match]
59
+ end
60
+
61
+ self.lookup(key) { |r| r.merge(row) }
62
+ end
63
+ end
64
+
65
+ self
66
+
67
+ end
68
+
69
+ def index(columns, reindex = false)
70
+ columns = columns.compact.uniq.sort { |a, b| a.to_s <=> b.to_s }.map { |column| Row.to_key(column) }
71
+
72
+ key = columns.join('|#|')
73
+
74
+ @indexes[key] ||= {}
75
+
76
+ index = @indexes[key]
77
+
78
+ if reindex || index.empty?
79
+
80
+ self.each do |row|
81
+ vkey = columns.map { |column| row[column] }
82
+ index[vkey] ||= []
83
+ index[vkey] << row
84
+ end
85
+ end
86
+ index
87
+ end
88
+
89
+ def lookup(key)
90
+
91
+ values = []
92
+ columns = key.keys.compact.uniq.sort { |a, b| a.to_s <=> b.to_s }.map do |column|
93
+ values << key[column]
94
+ Row.to_key(column)
95
+ end
96
+
97
+ rows = index(columns)[values]
98
+ if rows && block_given?
99
+ rows.each do |row|
100
+ yield(row)
101
+ end
102
+ end
103
+
104
+ rows
105
+ end
106
+
107
+ def write(file, quot = '~', sep = ',')
108
+ FastererCSV.write(file, quot, sep) do |out|
109
+ out << headers
110
+ each do |row|
111
+ out << row
112
+ end
113
+ end
114
+ end
115
+
116
+ alias_method :rows, :to_a
117
+ alias_method :merge!, :merge
118
+
119
+ end
120
+
121
+ class Row < Array
122
+
123
+ class << self
124
+ def to_key(key)
125
+ key = "#{key}".downcase.gsub(/\s+/, '_')
126
+ key.empty? ? :_ : key.to_sym
127
+ end
128
+ end
129
+
130
+ def headers
131
+ @table.headers
132
+ end
133
+
134
+ attr_reader :line
135
+
136
+ def initialize(table, array, line=-1)
137
+ @table = table
138
+ @line = line
139
+ super(array)
140
+ end
141
+
142
+ def [](*is)
143
+ is.each do |i|
144
+ val = if i.is_a? Fixnum
145
+ super
146
+ else
147
+ found = headers.index(Row::to_key(i))
148
+ found ? super(found) : nil
149
+ end
150
+ return val unless val.nil?
151
+ end
152
+ nil
153
+ end
154
+
155
+ def []=(key, val)
156
+ if key.is_a? Fixnum
157
+ super
158
+ else
159
+ key = Row::to_key(key)
160
+ headers << key unless headers.include? key
161
+ found = headers.index(key)
162
+ super(found, val)
163
+ end
164
+ end
165
+
166
+ def pull(*columns)
167
+ columns.map do |column|
168
+ self[*column]
169
+ end
170
+ end
171
+
172
+ def merge(row)
173
+ if row.is_a? Row
174
+ row.headers.each do |header|
175
+ self[header] = row[header]
176
+ end
177
+ else
178
+ row.each do |key, value|
179
+ self[key] = value
180
+ end
181
+ end
182
+ self
183
+ end
184
+
185
+ def to_hash
186
+ headers.inject({}) do |memo, h|
187
+ memo[h] = self[h]
188
+ memo
189
+ end
190
+ end
191
+
192
+ def key?(key)
193
+ keys.include?(Row.to_key(key))
194
+ end
195
+
196
+ def value?(value)
197
+ values.include?(value)
198
+ end
199
+
200
+ def method_missing(method, *args, &block)
201
+ to_hash.send(method, *args, &block)
202
+ end
203
+
204
+ alias_method :keys, :headers
205
+ alias_method :values, :to_a
206
+
207
+ alias_method :has_key?, :key?
208
+ alias_method :member?, :key?
209
+ alias_method :include?, :key?
210
+
211
+ alias_method :has_value?, :value?
212
+ alias_method :merge!, :merge
213
+
214
+ end
215
+
216
+ class NumericConversion < Array
217
+
218
+ def initialize
219
+ @int = @float = true
220
+ @dot = false
221
+ end
222
+
223
+ def clear
224
+ @int = @float = true
225
+ @dot = false
226
+ super
227
+ end
228
+
229
+ def <<(ch)
230
+ if ch == ?-
231
+ @float = @int = size == 0
232
+ elsif (ch > ?9 || ch < ?0) && ch != ?.
233
+ @int = @float = false
234
+ elsif ch == ?. && @dot
235
+ @int = @float = false
236
+ elsif ch == ?.
237
+ @int = false
238
+ @dot = true
239
+ end
240
+
241
+ super(ch.chr)
242
+ end
243
+
244
+ def convert(as_string = false)
245
+ if as_string
246
+ join
247
+ elsif empty?
248
+ nil
249
+ elsif @int
250
+ join.to_i
251
+ elsif @float
252
+ join.to_f
253
+ else
254
+ join
255
+ end
256
+ end
257
+
258
+ end
259
+
260
+ class NoConversion < Array
261
+
262
+ def <<(ch)
263
+ super(ch.chr)
264
+ end
265
+
266
+ def convert(as_string = false)
267
+ if as_string
268
+ join
269
+ elsif empty?
270
+ nil
271
+ else
272
+ join
273
+ end
274
+ end
275
+
276
+ end
277
+
278
+ class IOWriter
279
+ def initialize(file, quot = '~', sep = ',', quotenum = false)
280
+ @first = true; @io = file; @quot = quot; @sep = sep; @quotenum = quotenum
281
+ end
282
+
283
+ def <<(row)
284
+ raise "can only write arrays! #{row.class} #{row.inspect}" unless row.is_a? Array
285
+ if @first && row.is_a?(Row)
286
+ self.<<(row.headers)
287
+ end
288
+ @first = false
289
+ @io.syswrite FastererCSV::quot_row(row, @quot, @sep, @quotenum)
290
+ row
291
+ end
292
+ end
293
+
294
+ class << self
295
+
296
+ def headers(file, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
297
+ parse_headers(File.open(file, 'r') { |io| io.gets }, quot, sep, fail_on_malformed, column, &block)
298
+ end
299
+
300
+ def read(file, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
301
+ File.open(file, 'r') do |io|
302
+ parse(io, quot, sep, fail_on_malformed, column, &block)
303
+ end
304
+ end
305
+
306
+ def convread(file, quot = '~', sep = ',', fail_on_malformed = true, column = NumericConversion.new, &block)
307
+ File.open(file, 'r') do |io|
308
+ parse(io, quot, sep, fail_on_malformed, column, &block)
309
+ end
310
+ end
311
+
312
+ def parse_headers(data, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
313
+ parse(data, quot, sep, fail_on_malformed, column, &block).headers
314
+ end
315
+
316
+ def parse(io, quot = '~', sep = ',', fail_on_malformed = true, column = NoConversion.new, &block)
317
+ q, s, row, inquot, clean, maybe, table, field, endline = quot[0], sep[0], [], false, true, false, nil, true, false
318
+
319
+ io.each_byte do |c|
320
+ next if c == ?\r
321
+
322
+ if maybe && c == s
323
+ row << column.convert(true)
324
+ column.clear
325
+ clean, inquot, maybe, field, endline = true, false, false, true, false
326
+ elsif maybe && c == ?\n && table.nil?
327
+ row << column.convert(true) unless (column.empty? && endline)
328
+ column.clear
329
+ table = Table.new(row, fail_on_malformed, &block) unless row.empty?
330
+ row, clean, inquot, maybe, field, endline = [], true, false, false, false, true
331
+ elsif maybe && c == ?\n
332
+ row << column.convert(true) unless (column.empty? && endline)
333
+ column.clear
334
+ table << row unless row.empty?
335
+ row, clean, inquot, maybe, field, endline = [], true, false, false, false, true
336
+ elsif clean && c == q
337
+ inquot, clean, endline = true, false, false
338
+ elsif maybe && c == q
339
+ column << c
340
+ clean, maybe, endline = false, false, false
341
+ elsif c == q
342
+ maybe, endline = true, false
343
+ elsif inquot
344
+ column << c
345
+ clean, endline = false, false
346
+ elsif c == s
347
+ row << column.convert(false)
348
+ column.clear
349
+ clean, field, endline = true, true, false
350
+ elsif c == ?\n && table.nil?
351
+
352
+ row << column.convert(false) unless column.empty? && endline
353
+
354
+ column.clear
355
+ table = Table.new(row, fail_on_malformed, &block) unless row.empty?
356
+ row, clean, inquot, field, endline = [], true, false, false, true
357
+ elsif c == ?\n
358
+
359
+ row << column.convert(false) unless column.empty? && endline
360
+
361
+ column.clear
362
+ table << row unless row.empty?
363
+ row, clean, inquot, field, endline = [], true, false, false, true
364
+ else
365
+ column << c
366
+ clean, endline = false, false
367
+ end
368
+ end
369
+
370
+ if !clean
371
+ row << column.convert(maybe)
372
+ if table
373
+ table << row unless row.empty?
374
+ else
375
+ table = Table.new(row, fail_on_malformed, &block) unless row.empty?
376
+ end
377
+ elsif field
378
+ row << column.convert(maybe)
379
+ end
380
+
381
+ table
382
+ end
383
+
384
+ def quot_row(row, q = '~', s = ',', numquot = false)
385
+ num_quot = /(?:[#{q}#{s}\n]|^\d+$)/
386
+ need_quot = /[#{q}#{s}\n]/
387
+ row.map do |val|
388
+ if val.nil?
389
+ ""
390
+ elsif val.is_a? Numeric
391
+ val.to_s
392
+ else
393
+ quot = (val.is_a?(Symbol) || !numquot) ? need_quot : num_quot
394
+ val = String(val)
395
+ if val.length == 0
396
+ q * 2
397
+ else
398
+ val[quot] ? q + val.gsub(q, q * 2) + q : val
399
+ end
400
+ end
401
+ end.join(s) + "\n"
402
+ end
403
+
404
+ def generate(quot = '~', sep = ',', &block)
405
+ builder = StringIO.new
406
+ write(builder, quot, sep, &block)
407
+ builder.string
408
+ end
409
+
410
+ def write(data, quot = '~', sep = ',', quotenum = false, &block)
411
+ out(data, 'w', quot, sep, quotenum, &block)
412
+ end
413
+
414
+ def append(data, quot = '~', sep = ',', quotenum = false, &block)
415
+ out(data, 'a', quot, sep, quotenum, &block)
416
+ end
417
+
418
+ def out(data, mode = 'w', quot = '~', sep = ',', quotenum = false, &block)
419
+ if data.class == String
420
+ File.open(data, mode) do |io|
421
+ out(io, mode, quot, sep, quotenum, &block)
422
+ end
423
+ else
424
+ yield(IOWriter.new(data, quot, sep, quotenum))
425
+ end
426
+ end
427
+
428
+ end
429
+ end
@@ -0,0 +1,166 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "FastererCSV" do
4
+
5
+ describe "fiddly bits" do
6
+ describe "Table" do
7
+ it "works" do
8
+
9
+ end
10
+ end
11
+
12
+ describe "Row" do
13
+ it "works" do
14
+
15
+ end
16
+ end
17
+ end
18
+
19
+ describe "Converters" do
20
+ describe "NumericConverters" do
21
+ it "works" do
22
+
23
+ conv = FastererCSV::NumericConversion.new
24
+ conv << ?1
25
+ conv.convert(true).class.should == String
26
+ conv.convert(true).should == "1"
27
+
28
+ conv.convert(false).class.should == Fixnum
29
+ conv.convert(false).should == 1
30
+
31
+ conv.clear
32
+ conv << ?-
33
+ conv << ?1
34
+ conv.convert(false).class.should == Fixnum
35
+ conv.convert(false).should == -1
36
+
37
+ conv.clear
38
+ conv << ?1
39
+ conv << ?.
40
+ conv << ?1
41
+ conv.convert(false).class.should == Float
42
+ conv.convert(false).should == 1.1
43
+
44
+ conv.clear
45
+ conv << ?-
46
+ conv << ?1
47
+ conv << ?.
48
+ conv << ?1
49
+ conv.convert(false).class.should == Float
50
+ conv.convert(false).should == -1.1
51
+
52
+ conv.clear
53
+ conv << ?1
54
+ conv << ?.
55
+ conv << ?1
56
+ conv << ?.
57
+ conv << ?1
58
+ conv.convert(false).class.should == String
59
+ conv.convert(false).should == "1.1.1"
60
+
61
+ conv.clear
62
+ conv << ?a
63
+ conv.convert(false).class.should == String
64
+ conv.convert(false).should == "a"
65
+
66
+ conv.clear
67
+ conv.should be_empty
68
+ conv.convert(false).should be_nil
69
+ conv.convert(true).should == ""
70
+
71
+ end
72
+ end
73
+ describe "NoConverter" do
74
+ it "works" do
75
+
76
+ conv = FastererCSV::NoConversion.new
77
+ conv << ?1
78
+ conv.convert(true).class.should == String
79
+ conv.convert(false).class.should == String
80
+
81
+ conv.convert(true).should == "1"
82
+ conv.convert(false).should == "1"
83
+
84
+ conv.clear
85
+ conv.should be_empty
86
+ conv.convert(false).should be_nil
87
+ conv.convert(true).should == ""
88
+
89
+ end
90
+ end
91
+ end
92
+
93
+ describe "important stuff" do
94
+
95
+ before do
96
+ @data = <<-CSV
97
+ a,b,c,d,e,f,g,h,i,j,k,l,m,
98
+ ,,1,1.1,-1,-1.1,1.1.1,~1~,a,~a~,~a~~a~,~a
99
+ ~~a~,~,~,
100
+
101
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,14
102
+ CSV
103
+ end
104
+
105
+ describe "parse" do
106
+ it "works" do
107
+ table = FastererCSV.parse(@data)
108
+ table.headers.should == [:a, :b, :c,:d,:e,:f,:g,:h,:i,:j,:k,:l,:m,:_]
109
+ table.lines.should == 2
110
+
111
+ table[0].should == [nil, nil, "1", "1.1", "-1", "-1.1", "1.1.1", "1", "a", "a", "a~a", "a\n~a", ",", nil]
112
+ table[1].should == ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "14"]
113
+
114
+ row = table[1]
115
+ row.pull(:a, nil, 'd').should == ["0","14","3"]
116
+ row[:b].should == "1"
117
+ row["b"].should == "1"
118
+
119
+ end
120
+ end
121
+
122
+ describe "read" do
123
+ it "" do
124
+
125
+ end
126
+ end
127
+
128
+ describe "convread" do
129
+ it "" do
130
+
131
+ end
132
+ end
133
+
134
+ describe "headers" do
135
+ it "" do
136
+
137
+ end
138
+ end
139
+
140
+ describe "parse_headers" do
141
+ it "" do
142
+
143
+ end
144
+ end
145
+
146
+ describe "quot_row" do
147
+ it "" do
148
+
149
+ end
150
+ end
151
+
152
+ describe "generate" do
153
+ it "" do
154
+
155
+ end
156
+ end
157
+
158
+ describe "write" do
159
+ it "" do
160
+
161
+ end
162
+ end
163
+
164
+ end
165
+
166
+ end
@@ -0,0 +1,10 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'fasterer_csv'
4
+ ARGV.push("-b")
5
+ require 'spec'
6
+ require 'spec/autorun'
7
+
8
+ Spec::Runner.configure do |config|
9
+
10
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: honkster-fasterer-csv
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 7
9
+ - 1
10
+ version: 1.7.1
11
+ platform: ruby
12
+ authors:
13
+ - Mason
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-09-07 00:00:00 -07:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: CSV parsing awesomeness
23
+ email: mason@chipped.net
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ - README.rdoc
31
+ files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ - Rakefile
35
+ - VERSION
36
+ - lib/fasterer_csv.rb
37
+ - spec/fasterer_csv_spec.rb
38
+ - spec/spec_helper.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/gnovos/fasterer-csv
41
+ licenses: []
42
+
43
+ post_install_message: |
44
+ Kernel Panic! System32 deleted! Klaxons klaxoning! Dogs and Cats living together!!!! We're doooomed! Everything is...
45
+ oh, no wait, it installed fine. My bad.
46
+
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.7
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Even fasterer than FasterCSV!
76
+ test_files:
77
+ - spec/spec_helper.rb
78
+ - spec/fasterer_csv_spec.rb