rubysl-csv 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +2 -0
  8. data/lib/csv.rb +1 -0
  9. data/lib/rubysl/csv.rb +2 -0
  10. data/lib/rubysl/csv/csv.rb +992 -0
  11. data/lib/rubysl/csv/version.rb +5 -0
  12. data/rubysl-csv.gemspec +27 -0
  13. data/spec/basicwriter/close_on_terminate_spec.rb +5 -0
  14. data/spec/basicwriter/initialize_spec.rb +5 -0
  15. data/spec/basicwriter/terminate_spec.rb +5 -0
  16. data/spec/cell/data_spec.rb +5 -0
  17. data/spec/cell/initialize_spec.rb +5 -0
  18. data/spec/fixtures/one_line.csv +1 -0
  19. data/spec/foreach_spec.rb +5 -0
  20. data/spec/generate_line_spec.rb +58 -0
  21. data/spec/generate_row_spec.rb +5 -0
  22. data/spec/generate_spec.rb +73 -0
  23. data/spec/iobuf/close_spec.rb +5 -0
  24. data/spec/iobuf/initialize_spec.rb +5 -0
  25. data/spec/iobuf/read_spec.rb +5 -0
  26. data/spec/iobuf/terminate_spec.rb +5 -0
  27. data/spec/ioreader/close_on_terminate_spec.rb +5 -0
  28. data/spec/ioreader/get_row_spec.rb +5 -0
  29. data/spec/ioreader/initialize_spec.rb +5 -0
  30. data/spec/ioreader/terminate_spec.rb +5 -0
  31. data/spec/open_spec.rb +5 -0
  32. data/spec/parse_row_spec.rb +29 -0
  33. data/spec/parse_spec.rb +126 -0
  34. data/spec/read_spec.rb +5 -0
  35. data/spec/reader/close_spec.rb +7 -0
  36. data/spec/reader/create_spec.rb +7 -0
  37. data/spec/reader/each_spec.rb +7 -0
  38. data/spec/reader/get_row_spec.rb +7 -0
  39. data/spec/reader/initialize_spec.rb +7 -0
  40. data/spec/reader/parse_spec.rb +24 -0
  41. data/spec/reader/shift_spec.rb +7 -0
  42. data/spec/reader/terminate_spec.rb +7 -0
  43. data/spec/readlines_spec.rb +24 -0
  44. data/spec/streambuf/add_buf_spec.rb +5 -0
  45. data/spec/streambuf/buf_size_spec.rb +5 -0
  46. data/spec/streambuf/drop_spec.rb +5 -0
  47. data/spec/streambuf/element_reference_spec.rb +5 -0
  48. data/spec/streambuf/get_spec.rb +5 -0
  49. data/spec/streambuf/idx_is_eos_spec.rb +5 -0
  50. data/spec/streambuf/initialize_spec.rb +5 -0
  51. data/spec/streambuf/is_eos_spec.rb +5 -0
  52. data/spec/streambuf/read_spec.rb +5 -0
  53. data/spec/streambuf/rel_buf_spec.rb +5 -0
  54. data/spec/streambuf/terminate_spec.rb +5 -0
  55. data/spec/stringreader/get_row_spec.rb +5 -0
  56. data/spec/stringreader/initialize_spec.rb +5 -0
  57. data/spec/writer/add_row_spec.rb +5 -0
  58. data/spec/writer/append_spec.rb +5 -0
  59. data/spec/writer/close_spec.rb +5 -0
  60. data/spec/writer/create_spec.rb +5 -0
  61. data/spec/writer/generate_spec.rb +5 -0
  62. data/spec/writer/initialize_spec.rb +5 -0
  63. data/spec/writer/terminate_spec.rb +5 -0
  64. metadata +227 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d73bb569e1fefd81701c9d864103837e884882ac
4
+ data.tar.gz: 3184788b8f1ef3727a76e8deef349173d3e652fd
5
+ SHA512:
6
+ metadata.gz: 8943bd15e1331bb4189cf814432c854f609bef95370dd9d1dada5a36fee3bd372100b0883518035660c127e307af952dd94a494a3f275324e2f8840f2710125e
7
+ data.tar.gz: e60dfb285ac43dda46e209a202a7eedf1749603165ef2930c8a047449476664f32d9cbd7f3831d109c1ad00016bd340441daddcf768b4fb26e237d2221ef572f
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .rbx
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ before_install:
3
+ - gem update --system
4
+ - gem --version
5
+ - gem install rubysl-bundler
6
+ script: bundle exec mspec spec
7
+ rvm:
8
+ - rbx-nightly-18mode
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rubysl-csv.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2013, Brian Shirai
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ 3. Neither the name of the library nor the names of its contributors may be
13
+ used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT,
20
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25
+ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,29 @@
1
+ # RubySL::Csv
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'rubysl-csv'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install rubysl-csv
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1 @@
1
+ require "rubysl/csv"
@@ -0,0 +1,2 @@
1
+ require "rubysl/csv/csv"
2
+ require "rubysl/csv/version"
@@ -0,0 +1,992 @@
1
+ # CSV -- module for generating/parsing CSV data.
2
+ # Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
3
+
4
+ # $Id: csv.rb 11708 2007-02-12 23:01:19Z shyouhei $
5
+
6
+ # This program is copyrighted free software by NAKAMURA, Hiroshi. You can
7
+ # redistribute it and/or modify it under the same terms of Ruby's license;
8
+ # either the dual license version in 2003, or any later version.
9
+
10
+
11
+ class CSV
12
+ class IllegalFormatError < RuntimeError; end
13
+
14
+ # deprecated
15
+ class Cell < String
16
+ def initialize(data = "", is_null = false)
17
+ super(is_null ? "" : data)
18
+ end
19
+
20
+ def data
21
+ to_s
22
+ end
23
+ end
24
+
25
+ # deprecated
26
+ class Row < Array
27
+ end
28
+
29
+ # Open a CSV formatted file for reading or writing.
30
+ #
31
+ # For reading.
32
+ #
33
+ # EXAMPLE 1
34
+ # CSV.open('csvfile.csv', 'r') do |row|
35
+ # p row
36
+ # end
37
+ #
38
+ # EXAMPLE 2
39
+ # reader = CSV.open('csvfile.csv', 'r')
40
+ # row1 = reader.shift
41
+ # row2 = reader.shift
42
+ # if row2.empty?
43
+ # p 'row2 not find.'
44
+ # end
45
+ # reader.close
46
+ #
47
+ # ARGS
48
+ # filename: filename to parse.
49
+ # col_sep: Column separator. ?, by default. If you want to separate
50
+ # fields with semicolon, give ?; here.
51
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
52
+ # want to separate records with \r, give ?\r here.
53
+ #
54
+ # RETURNS
55
+ # reader instance. To get parse result, see CSV::Reader#each.
56
+ #
57
+ #
58
+ # For writing.
59
+ #
60
+ # EXAMPLE 1
61
+ # CSV.open('csvfile.csv', 'w') do |writer|
62
+ # writer << ['r1c1', 'r1c2']
63
+ # writer << ['r2c1', 'r2c2']
64
+ # writer << [nil, nil]
65
+ # end
66
+ #
67
+ # EXAMPLE 2
68
+ # writer = CSV.open('csvfile.csv', 'w')
69
+ # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
70
+ # writer.close
71
+ #
72
+ # ARGS
73
+ # filename: filename to generate.
74
+ # col_sep: Column separator. ?, by default. If you want to separate
75
+ # fields with semicolon, give ?; here.
76
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
77
+ # want to separate records with \r, give ?\r here.
78
+ #
79
+ # RETURNS
80
+ # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
81
+ # to generate CSV string.
82
+ #
83
+ def CSV.open(path, mode, fs = nil, rs = nil, &block)
84
+ if mode == 'r' or mode == 'rb'
85
+ open_reader(path, mode, fs, rs, &block)
86
+ elsif mode == 'w' or mode == 'wb'
87
+ open_writer(path, mode, fs, rs, &block)
88
+ else
89
+ raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
90
+ end
91
+ end
92
+
93
+ def CSV.foreach(path, rs = nil, &block)
94
+ open_reader(path, 'r', ',', rs, &block)
95
+ end
96
+
97
+ def CSV.read(path, length = nil, offset = nil)
98
+ CSV.parse(IO.read(path, length, offset))
99
+ end
100
+
101
+ def CSV.readlines(path, rs = nil)
102
+ reader = open_reader(path, 'r', ',', rs)
103
+ begin
104
+ reader.collect { |row| row }
105
+ ensure
106
+ reader.close
107
+ end
108
+ end
109
+
110
+ def CSV.generate(path, fs = nil, rs = nil, &block)
111
+ open_writer(path, 'w', fs, rs, &block)
112
+ end
113
+
114
+ # Parse lines from given string or stream. Return rows as an Array of Arrays.
115
+ def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
116
+ if File.exist?(str_or_readable)
117
+ STDERR.puts("CSV.parse(filename) is deprecated." +
118
+ " Use CSV.open(filename, 'r') instead.")
119
+ return open_reader(str_or_readable, 'r', fs, rs, &block)
120
+ end
121
+ if block
122
+ CSV::Reader.parse(str_or_readable, fs, rs) do |row|
123
+ yield(row)
124
+ end
125
+ nil
126
+ else
127
+ CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
128
+ end
129
+ end
130
+
131
+ # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
132
+ # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
133
+ # second line 'c,d' is ignored.
134
+ #
135
+ # If you don't know whether a target string to parse is exactly 1 line or
136
+ # not, use CSV.parse_row instead of this method.
137
+ def CSV.parse_line(src, fs = nil, rs = nil)
138
+ fs ||= ','
139
+ if fs.is_a?(Fixnum)
140
+ fs = fs.chr
141
+ end
142
+ if !rs.nil? and rs.is_a?(Fixnum)
143
+ rs = rs.chr
144
+ end
145
+ idx = 0
146
+ res_type = :DT_COLSEP
147
+ row = []
148
+ begin
149
+ while res_type == :DT_COLSEP
150
+ res_type, idx, cell = parse_body(src, idx, fs, rs)
151
+ row << cell
152
+ end
153
+ rescue IllegalFormatError
154
+ return []
155
+ end
156
+ row
157
+ end
158
+
159
+ # Create a line from cells. each cell is stringified by to_s.
160
+ def CSV.generate_line(row, fs = nil, rs = nil)
161
+ if row.size == 0
162
+ return ''
163
+ end
164
+ fs ||= ','
165
+ if fs.is_a?(Fixnum)
166
+ fs = fs.chr
167
+ end
168
+ if !rs.nil? and rs.is_a?(Fixnum)
169
+ rs = rs.chr
170
+ end
171
+ res_type = :DT_COLSEP
172
+ result_str = ''
173
+ idx = 0
174
+ while true
175
+ generate_body(row[idx], result_str, fs, rs)
176
+ idx += 1
177
+ if (idx == row.size)
178
+ break
179
+ end
180
+ generate_separator(:DT_COLSEP, result_str, fs, rs)
181
+ end
182
+ result_str
183
+ end
184
+
185
+ # Parse a line from string. Consider using CSV.parse_line instead.
186
+ # To parse lines in CSV string, see EXAMPLE below.
187
+ #
188
+ # EXAMPLE
189
+ # src = "a,b\r\nc,d\r\ne,f"
190
+ # idx = 0
191
+ # begin
192
+ # parsed = []
193
+ # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
194
+ # puts "Parsed #{ parsed_cells } cells."
195
+ # p parsed
196
+ # end while parsed_cells > 0
197
+ #
198
+ # ARGS
199
+ # src: a CSV data to be parsed. Must respond '[](idx)'.
200
+ # src[](idx) must return a char. (Not a string such as 'a', but 97).
201
+ # src[](idx_out_of_bounds) must return nil. A String satisfies this
202
+ # requirement.
203
+ # idx: index of parsing location of 'src'. 0 origin.
204
+ # out_dev: buffer for parsed cells. Must respond '<<(aString)'.
205
+ # col_sep: Column separator. ?, by default. If you want to separate
206
+ # fields with semicolon, give ?; here.
207
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
208
+ # want to separate records with \r, give ?\r here.
209
+ #
210
+ # RETURNS
211
+ # parsed_cells: num of parsed cells.
212
+ # idx: index of next parsing location of 'src'.
213
+ #
214
+ def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
215
+ fs ||= ','
216
+ if fs.is_a?(Fixnum)
217
+ fs = fs.chr
218
+ end
219
+ if !rs.nil? and rs.is_a?(Fixnum)
220
+ rs = rs.chr
221
+ end
222
+ idx_backup = idx
223
+ parsed_cells = 0
224
+ res_type = :DT_COLSEP
225
+ begin
226
+ while res_type != :DT_ROWSEP
227
+ res_type, idx, cell = parse_body(src, idx, fs, rs)
228
+ if res_type == :DT_EOS
229
+ if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
230
+ return 0, 0
231
+ end
232
+ res_type = :DT_ROWSEP
233
+ end
234
+ parsed_cells += 1
235
+ out_dev << cell
236
+ end
237
+ rescue IllegalFormatError
238
+ return 0, 0
239
+ end
240
+ return parsed_cells, idx
241
+ end
242
+
243
+ # Convert a line from cells data to string. Consider using CSV.generate_line
244
+ # instead. To generate multi-row CSV string, see EXAMPLE below.
245
+ #
246
+ # EXAMPLE
247
+ # row1 = ['a', 'b']
248
+ # row2 = ['c', 'd']
249
+ # row3 = ['e', 'f']
250
+ # src = [row1, row2, row3]
251
+ # buf = ''
252
+ # src.each do |row|
253
+ # parsed_cells = CSV.generate_row(row, 2, buf)
254
+ # puts "Created #{ parsed_cells } cells."
255
+ # end
256
+ # p buf
257
+ #
258
+ # ARGS
259
+ # src: an Array of String to be converted to CSV string. Must respond to
260
+ # 'size' and '[](idx)'. src[idx] must return String.
261
+ # cells: num of cells in a line.
262
+ # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
263
+ # col_sep: Column separator. ?, by default. If you want to separate
264
+ # fields with semicolon, give ?; here.
265
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
266
+ # want to separate records with \r, give ?\r here.
267
+ #
268
+ # RETURNS
269
+ # parsed_cells: num of converted cells.
270
+ #
271
+ def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
272
+ fs ||= ','
273
+ if fs.is_a?(Fixnum)
274
+ fs = fs.chr
275
+ end
276
+ if !rs.nil? and rs.is_a?(Fixnum)
277
+ rs = rs.chr
278
+ end
279
+ src_size = src.size
280
+ if (src_size == 0)
281
+ if cells == 0
282
+ generate_separator(:DT_ROWSEP, out_dev, fs, rs)
283
+ end
284
+ return 0
285
+ end
286
+ res_type = :DT_COLSEP
287
+ parsed_cells = 0
288
+ generate_body(src[parsed_cells], out_dev, fs, rs)
289
+ parsed_cells += 1
290
+ while ((parsed_cells < cells) and (parsed_cells != src_size))
291
+ generate_separator(:DT_COLSEP, out_dev, fs, rs)
292
+ generate_body(src[parsed_cells], out_dev, fs, rs)
293
+ parsed_cells += 1
294
+ end
295
+ if (parsed_cells == cells)
296
+ generate_separator(:DT_ROWSEP, out_dev, fs, rs)
297
+ else
298
+ generate_separator(:DT_COLSEP, out_dev, fs, rs)
299
+ end
300
+ parsed_cells
301
+ end
302
+
303
+ # Private class methods.
304
+ class << self
305
+ private
306
+
307
+ def open_reader(path, mode, fs, rs, &block)
308
+ file = File.open(path, mode)
309
+ if block
310
+ begin
311
+ CSV::Reader.parse(file, fs, rs) do |row|
312
+ yield(row)
313
+ end
314
+ ensure
315
+ file.close
316
+ end
317
+ nil
318
+ else
319
+ reader = CSV::Reader.create(file, fs, rs)
320
+ reader.close_on_terminate
321
+ reader
322
+ end
323
+ end
324
+
325
+ def open_writer(path, mode, fs, rs, &block)
326
+ file = File.open(path, mode)
327
+ if block
328
+ begin
329
+ CSV::Writer.generate(file, fs, rs) do |writer|
330
+ yield(writer)
331
+ end
332
+ ensure
333
+ file.close
334
+ end
335
+ nil
336
+ else
337
+ writer = CSV::Writer.create(file, fs, rs)
338
+ writer.close_on_terminate
339
+ writer
340
+ end
341
+ end
342
+
343
+ def parse_body(src, idx, fs, rs)
344
+ fs_str = fs
345
+ fs_size = fs_str.size
346
+ rs_str = rs || "\n"
347
+ rs_size = rs_str.size
348
+ fs_idx = rs_idx = 0
349
+ cell = Cell.new
350
+ state = :ST_START
351
+ quoted = cr = false
352
+ c = nil
353
+ last_idx = idx
354
+ while c = src[idx]
355
+ unless quoted
356
+ fschar = (c == fs_str[fs_idx])
357
+ rschar = (c == rs_str[rs_idx])
358
+ # simple 1 char backtrack
359
+ if !fschar and c == fs_str[0]
360
+ fs_idx = 0
361
+ fschar = true
362
+ if state == :ST_START
363
+ state = :ST_DATA
364
+ elsif state == :ST_QUOTE
365
+ raise IllegalFormatError
366
+ end
367
+ end
368
+ if !rschar and c == rs_str[0]
369
+ rs_idx = 0
370
+ rschar = true
371
+ if state == :ST_START
372
+ state = :ST_DATA
373
+ elsif state == :ST_QUOTE
374
+ raise IllegalFormatError
375
+ end
376
+ end
377
+ end
378
+ if c == ?"
379
+ fs_idx = rs_idx = 0
380
+ if cr
381
+ raise IllegalFormatError
382
+ end
383
+ cell << src[last_idx, (idx - last_idx)]
384
+ last_idx = idx
385
+ if state == :ST_DATA
386
+ if quoted
387
+ last_idx += 1
388
+ quoted = false
389
+ state = :ST_QUOTE
390
+ else
391
+ raise IllegalFormatError
392
+ end
393
+ elsif state == :ST_QUOTE
394
+ cell << c.chr
395
+ last_idx += 1
396
+ quoted = true
397
+ state = :ST_DATA
398
+ else # :ST_START
399
+ quoted = true
400
+ last_idx += 1
401
+ state = :ST_DATA
402
+ end
403
+ elsif fschar or rschar
404
+ if fschar
405
+ fs_idx += 1
406
+ end
407
+ if rschar
408
+ rs_idx += 1
409
+ end
410
+ sep = nil
411
+ if fs_idx == fs_size
412
+ if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
413
+ state = :ST_DATA
414
+ end
415
+ cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
416
+ last_idx = idx
417
+ fs_idx = rs_idx = 0
418
+ if cr
419
+ raise IllegalFormatError
420
+ end
421
+ sep = :DT_COLSEP
422
+ elsif rs_idx == rs_size
423
+ if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
424
+ state = :ST_DATA
425
+ end
426
+ if !(rs.nil? and cr)
427
+ cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
428
+ last_idx = idx
429
+ end
430
+ fs_idx = rs_idx = 0
431
+ sep = :DT_ROWSEP
432
+ end
433
+ if sep
434
+ if state == :ST_DATA
435
+ return sep, idx + 1, cell;
436
+ elsif state == :ST_QUOTE
437
+ return sep, idx + 1, cell;
438
+ else # :ST_START
439
+ return sep, idx + 1, nil
440
+ end
441
+ end
442
+ elsif rs.nil? and c == ?\r
443
+ # special \r treatment for backward compatibility
444
+ fs_idx = rs_idx = 0
445
+ if cr
446
+ raise IllegalFormatError
447
+ end
448
+ cell << src[last_idx, (idx - last_idx)]
449
+ last_idx = idx
450
+ if quoted
451
+ state = :ST_DATA
452
+ else
453
+ cr = true
454
+ end
455
+ else
456
+ fs_idx = rs_idx = 0
457
+ if state == :ST_DATA or state == :ST_START
458
+ if cr
459
+ raise IllegalFormatError
460
+ end
461
+ state = :ST_DATA
462
+ else # :ST_QUOTE
463
+ raise IllegalFormatError
464
+ end
465
+ end
466
+ idx += 1
467
+ end
468
+ if state == :ST_START
469
+ if fs_idx > 0 or rs_idx > 0
470
+ state = :ST_DATA
471
+ else
472
+ return :DT_EOS, idx, nil
473
+ end
474
+ elsif quoted
475
+ raise IllegalFormatError
476
+ elsif cr
477
+ raise IllegalFormatError
478
+ end
479
+ cell << src[last_idx, (idx - last_idx)]
480
+ last_idx = idx
481
+ return :DT_EOS, idx, cell
482
+ end
483
+
484
+ def generate_body(cell, out_dev, fs, rs)
485
+ if cell.nil?
486
+ # empty
487
+ else
488
+ cell = cell.to_s
489
+ row_data = cell.dup
490
+ if (row_data.gsub!('"', '""') or
491
+ row_data.index(fs) or
492
+ (rs and row_data.index(rs)) or
493
+ (/[\r\n]/ =~ row_data) or
494
+ (cell.empty?))
495
+ out_dev << '"' << row_data << '"'
496
+ else
497
+ out_dev << row_data
498
+ end
499
+ end
500
+ end
501
+
502
+ def generate_separator(type, out_dev, fs, rs)
503
+ case type
504
+ when :DT_COLSEP
505
+ out_dev << fs
506
+ when :DT_ROWSEP
507
+ out_dev << (rs || "\n")
508
+ end
509
+ end
510
+ end
511
+
512
+
513
+ # CSV formatted string/stream reader.
514
+ #
515
+ # EXAMPLE
516
+ # read CSV lines untill the first column is 'stop'.
517
+ #
518
+ # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
519
+ # p row
520
+ # break if !row[0].is_null && row[0].data == 'stop'
521
+ # end
522
+ #
523
+ class Reader
524
+ include Enumerable
525
+
526
+ # Parse CSV data and get lines. Given block is called for each parsed row.
527
+ # Block value is always nil. Rows are not cached for performance reason.
528
+ def Reader.parse(str_or_readable, fs = ',', rs = nil, &block)
529
+ reader = Reader.create(str_or_readable, fs, rs)
530
+ if block
531
+ reader.each do |row|
532
+ yield(row)
533
+ end
534
+ reader.close
535
+ nil
536
+ else
537
+ reader
538
+ end
539
+ end
540
+
541
+ # Returns reader instance.
542
+ def Reader.create(str_or_readable, fs = ',', rs = nil)
543
+ case str_or_readable
544
+ when IO
545
+ IOReader.new(str_or_readable, fs, rs)
546
+ when String
547
+ StringReader.new(str_or_readable, fs, rs)
548
+ else
549
+ IOReader.new(str_or_readable, fs, rs)
550
+ end
551
+ end
552
+
553
+ def each
554
+ while true
555
+ row = []
556
+ parsed_cells = get_row(row)
557
+ if parsed_cells == 0
558
+ break
559
+ end
560
+ yield(row)
561
+ end
562
+ nil
563
+ end
564
+
565
+ def shift
566
+ row = []
567
+ parsed_cells = get_row(row)
568
+ row
569
+ end
570
+
571
+ def close
572
+ terminate
573
+ end
574
+
575
+ private
576
+
577
+ def initialize(dev)
578
+ raise RuntimeError.new('Do not instanciate this class directly.')
579
+ end
580
+
581
+ def get_row(row)
582
+ raise NotImplementedError.new('Method get_row must be defined in a derived class.')
583
+ end
584
+
585
+ def terminate
586
+ # Define if needed.
587
+ end
588
+ end
589
+
590
+
591
+ class StringReader < Reader
592
+ def initialize(string, fs = ',', rs = nil)
593
+ @fs = fs
594
+ @rs = rs
595
+ @dev = string
596
+ @idx = 0
597
+ if @dev[0, 3] == "\xef\xbb\xbf"
598
+ @idx += 3
599
+ end
600
+ end
601
+
602
+ private
603
+
604
+ def get_row(row)
605
+ parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
606
+ if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
607
+ raise IllegalFormatError.new
608
+ end
609
+ @idx = next_idx
610
+ parsed_cells
611
+ end
612
+ end
613
+
614
+
615
+ class IOReader < Reader
616
+ def initialize(io, fs = ',', rs = nil)
617
+ @io = io
618
+ @fs = fs
619
+ @rs = rs
620
+ @dev = CSV::IOBuf.new(@io)
621
+ @idx = 0
622
+ if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
623
+ @idx += 3
624
+ end
625
+ @close_on_terminate = false
626
+ end
627
+
628
+ # Tell this reader to close the IO when terminated (Triggered by invoking
629
+ # CSV::IOReader#close).
630
+ def close_on_terminate
631
+ @close_on_terminate = true
632
+ end
633
+
634
+ private
635
+
636
+ def get_row(row)
637
+ parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
638
+ if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos?
639
+ raise IllegalFormatError.new
640
+ end
641
+ dropped = @dev.drop(next_idx)
642
+ @idx = next_idx - dropped
643
+ parsed_cells
644
+ end
645
+
646
+ def terminate
647
+ if @close_on_terminate
648
+ @io.close
649
+ end
650
+
651
+ if @dev
652
+ @dev.close
653
+ end
654
+ end
655
+ end
656
+
657
+
658
+ # CSV formatted string/stream writer.
659
+ #
660
+ # EXAMPLE
661
+ # Write rows to 'csvout' file.
662
+ #
663
+ # outfile = File.open('csvout', 'wb')
664
+ # CSV::Writer.generate(outfile) do |csv|
665
+ # csv << ['c1', nil, '', '"', "\r\n", 'c2']
666
+ # ...
667
+ # end
668
+ #
669
+ # outfile.close
670
+ #
671
+ class Writer
672
+ # Given block is called with the writer instance. str_or_writable must
673
+ # handle '<<(string)'.
674
+ def Writer.generate(str_or_writable, fs = ',', rs = nil, &block)
675
+ writer = Writer.create(str_or_writable, fs, rs)
676
+ if block
677
+ yield(writer)
678
+ writer.close
679
+ nil
680
+ else
681
+ writer
682
+ end
683
+ end
684
+
685
+ # str_or_writable must handle '<<(string)'.
686
+ def Writer.create(str_or_writable, fs = ',', rs = nil)
687
+ BasicWriter.new(str_or_writable, fs, rs)
688
+ end
689
+
690
+ # dump CSV stream to the device. argument must be an Array of String.
691
+ def <<(row)
692
+ CSV.generate_row(row, row.size, @dev, @fs, @rs)
693
+ self
694
+ end
695
+ alias add_row <<
696
+
697
+ def close
698
+ terminate
699
+ end
700
+
701
+ private
702
+
703
+ def initialize(dev)
704
+ raise RuntimeError.new('Do not instanciate this class directly.')
705
+ end
706
+
707
+ def terminate
708
+ # Define if needed.
709
+ end
710
+ end
711
+
712
+
713
+ class BasicWriter < Writer
714
+ def initialize(str_or_writable, fs = ',', rs = nil)
715
+ @fs = fs
716
+ @rs = rs
717
+ @dev = str_or_writable
718
+ @close_on_terminate = false
719
+ end
720
+
721
+ # Tell this writer to close the IO when terminated (Triggered by invoking
722
+ # CSV::BasicWriter#close).
723
+ def close_on_terminate
724
+ @close_on_terminate = true
725
+ end
726
+
727
+ private
728
+
729
+ def terminate
730
+ if @close_on_terminate
731
+ @dev.close
732
+ end
733
+ end
734
+ end
735
+
736
+ private
737
+
738
+ # Buffered stream.
739
+ #
740
+ # EXAMPLE 1 -- an IO.
741
+ # class MyBuf < StreamBuf
742
+ # # Do initialize myself before a super class. Super class might call my
743
+ # # method 'read'. (Could be awful for C++ user. :-)
744
+ # def initialize(s)
745
+ # @s = s
746
+ # super()
747
+ # end
748
+ #
749
+ # # define my own 'read' method.
750
+ # # CAUTION: Returning nil means EnfOfStream.
751
+ # def read(size)
752
+ # @s.read(size)
753
+ # end
754
+ #
755
+ # # release buffers. in Ruby which has GC, you do not have to call this...
756
+ # def terminate
757
+ # @s = nil
758
+ # super()
759
+ # end
760
+ # end
761
+ #
762
+ # buf = MyBuf.new(STDIN)
763
+ # my_str = ''
764
+ # p buf[0, 0] # => '' (null string)
765
+ # p buf[0] # => 97 (char code of 'a')
766
+ # p buf[0, 1] # => 'a'
767
+ # my_str = buf[0, 5]
768
+ # p my_str # => 'abcde' (5 chars)
769
+ # p buf[0, 6] # => "abcde\n" (6 chars)
770
+ # p buf[0, 7] # => "abcde\n" (6 chars)
771
+ # p buf.drop(3) # => 3 (dropped chars)
772
+ # p buf.get(0, 2) # => 'de' (2 chars)
773
+ # p buf.is_eos? # => false (is not EOS here)
774
+ # p buf.drop(5) # => 3 (dropped chars)
775
+ # p buf.is_eos? # => true (is EOS here)
776
+ # p buf[0] # => nil (is EOS here)
777
+ #
778
+ # EXAMPLE 2 -- String.
779
+ # This is a conceptual example. No pros with this.
780
+ #
781
+ # class StrBuf < StreamBuf
782
+ # def initialize(s)
783
+ # @str = s
784
+ # @idx = 0
785
+ # super()
786
+ # end
787
+ #
788
+ # def read(size)
789
+ # str = @str[@idx, size]
790
+ # @idx += str.size
791
+ # str
792
+ # end
793
+ # end
794
+ #
795
+ class StreamBuf
796
+ # get a char or a partial string from the stream.
797
+ # idx: index of a string to specify a start point of a string to get.
798
+ # unlike String instance, idx < 0 returns nil.
799
+ # n: size of a string to get.
800
+ # returns char at idx if n == nil.
801
+ # returns a partial string, from idx to (idx + n) if n != nil. at EOF,
802
+ # the string size could not equal to arg n.
803
+ def [](idx, n = nil)
804
+ if idx < 0
805
+ return nil
806
+ end
807
+ if (idx_is_eos?(idx))
808
+ if n and (@offset + idx == buf_size(@cur_buf))
809
+ # Like a String, 'abc'[4, 1] returns nil and
810
+ # 'abc'[3, 1] returns '' not nil.
811
+ return ''
812
+ else
813
+ return nil
814
+ end
815
+ end
816
+ my_buf = @cur_buf
817
+ my_offset = @offset
818
+ next_idx = idx
819
+ while (my_offset + next_idx >= buf_size(my_buf))
820
+ if (my_buf == @buf_tail_idx)
821
+ unless add_buf
822
+ break
823
+ end
824
+ end
825
+ next_idx = my_offset + next_idx - buf_size(my_buf)
826
+ my_buf += 1
827
+ my_offset = 0
828
+ end
829
+ loc = my_offset + next_idx
830
+ if !n
831
+ return @buf_list[my_buf][loc] # Fixnum of char code.
832
+ elsif (loc + n - 1 < buf_size(my_buf))
833
+ return @buf_list[my_buf][loc, n] # String.
834
+ else # should do loop insted of (tail) recursive call...
835
+ res = @buf_list[my_buf][loc, BufSize]
836
+ size_added = buf_size(my_buf) - loc
837
+ if size_added > 0
838
+ idx += size_added
839
+ n -= size_added
840
+ ret = self[idx, n]
841
+ if ret
842
+ res << ret
843
+ end
844
+ end
845
+ return res
846
+ end
847
+ end
848
+ alias get []
849
+
850
+ # drop a string from the stream.
851
+ # returns dropped size. at EOF, dropped size might not equals to arg n.
852
+ # Once you drop the head of the stream, access to the dropped part via []
853
+ # or get returns nil.
854
+ def drop(n)
855
+ if is_eos?
856
+ return 0
857
+ end
858
+ size_dropped = 0
859
+ while (n > 0)
860
+ if !@is_eos or (@cur_buf != @buf_tail_idx)
861
+ if (@offset + n < buf_size(@cur_buf))
862
+ size_dropped += n
863
+ @offset += n
864
+ n = 0
865
+ else
866
+ size = buf_size(@cur_buf) - @offset
867
+ size_dropped += size
868
+ n -= size
869
+ @offset = 0
870
+ unless rel_buf
871
+ unless add_buf
872
+ break
873
+ end
874
+ @cur_buf = @buf_tail_idx
875
+ end
876
+ end
877
+ end
878
+ end
879
+ size_dropped
880
+ end
881
+
882
+ def is_eos?
883
+ return idx_is_eos?(0)
884
+ end
885
+
886
+ # WARN: Do not instantiate this class directly. Define your own class
887
+ # which derives this class and define 'read' instance method.
888
+ def initialize
889
+ @buf_list = []
890
+ @cur_buf = @buf_tail_idx = -1
891
+ @offset = 0
892
+ @is_eos = false
893
+ add_buf
894
+ @cur_buf = @buf_tail_idx
895
+ end
896
+
897
+ protected
898
+
899
+ def terminate
900
+ while (rel_buf); end
901
+ end
902
+
903
+ # protected method 'read' must be defined in derived classes.
904
+ # CAUTION: Returning a string which size is not equal to 'size' means
905
+ # EnfOfStream. When it is not at EOS, you must block the callee, try to
906
+ # read and return the sized string.
907
+ def read(size) # raise EOFError
908
+ raise NotImplementedError.new('Method read must be defined in a derived class.')
909
+ end
910
+
911
+ private
912
+
913
+ def buf_size(idx)
914
+ @buf_list[idx].size
915
+ end
916
+
917
+ def add_buf
918
+ if @is_eos
919
+ return false
920
+ end
921
+ begin
922
+ str_read = read(BufSize)
923
+ rescue EOFError
924
+ str_read = nil
925
+ rescue
926
+ terminate
927
+ raise
928
+ end
929
+ if str_read.nil?
930
+ @is_eos = true
931
+ @buf_list.push('')
932
+ @buf_tail_idx += 1
933
+ false
934
+ else
935
+ @buf_list.push(str_read)
936
+ @buf_tail_idx += 1
937
+ true
938
+ end
939
+ end
940
+
941
+ def rel_buf
942
+ if (@cur_buf < 0)
943
+ return false
944
+ end
945
+ @buf_list[@cur_buf] = nil
946
+ if (@cur_buf == @buf_tail_idx)
947
+ @cur_buf = -1
948
+ return false
949
+ else
950
+ @cur_buf += 1
951
+ return true
952
+ end
953
+ end
954
+
955
+ def idx_is_eos?(idx)
956
+ (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
957
+ end
958
+
959
+ BufSize = 1024 * 8
960
+ end
961
+
962
+ # Buffered IO.
963
+ #
964
+ # EXAMPLE
965
+ # # File 'bigdata' could be a giga-byte size one!
966
+ # buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
967
+ # CSV::Reader.new(buf).each do |row|
968
+ # p row
969
+ # break if row[0].data == 'admin'
970
+ # end
971
+ #
972
+ class IOBuf < StreamBuf
973
+ def initialize(s)
974
+ @s = s
975
+ super()
976
+ end
977
+
978
+ def close
979
+ terminate
980
+ end
981
+
982
+ private
983
+
984
+ def read(size)
985
+ @s.read(size)
986
+ end
987
+
988
+ def terminate
989
+ super()
990
+ end
991
+ end
992
+ end