rubysl-csv 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +2 -0
  8. data/lib/csv.rb +1 -0
  9. data/lib/rubysl/csv.rb +2 -0
  10. data/lib/rubysl/csv/csv.rb +992 -0
  11. data/lib/rubysl/csv/version.rb +5 -0
  12. data/rubysl-csv.gemspec +27 -0
  13. data/spec/basicwriter/close_on_terminate_spec.rb +5 -0
  14. data/spec/basicwriter/initialize_spec.rb +5 -0
  15. data/spec/basicwriter/terminate_spec.rb +5 -0
  16. data/spec/cell/data_spec.rb +5 -0
  17. data/spec/cell/initialize_spec.rb +5 -0
  18. data/spec/fixtures/one_line.csv +1 -0
  19. data/spec/foreach_spec.rb +5 -0
  20. data/spec/generate_line_spec.rb +58 -0
  21. data/spec/generate_row_spec.rb +5 -0
  22. data/spec/generate_spec.rb +73 -0
  23. data/spec/iobuf/close_spec.rb +5 -0
  24. data/spec/iobuf/initialize_spec.rb +5 -0
  25. data/spec/iobuf/read_spec.rb +5 -0
  26. data/spec/iobuf/terminate_spec.rb +5 -0
  27. data/spec/ioreader/close_on_terminate_spec.rb +5 -0
  28. data/spec/ioreader/get_row_spec.rb +5 -0
  29. data/spec/ioreader/initialize_spec.rb +5 -0
  30. data/spec/ioreader/terminate_spec.rb +5 -0
  31. data/spec/open_spec.rb +5 -0
  32. data/spec/parse_row_spec.rb +29 -0
  33. data/spec/parse_spec.rb +126 -0
  34. data/spec/read_spec.rb +5 -0
  35. data/spec/reader/close_spec.rb +7 -0
  36. data/spec/reader/create_spec.rb +7 -0
  37. data/spec/reader/each_spec.rb +7 -0
  38. data/spec/reader/get_row_spec.rb +7 -0
  39. data/spec/reader/initialize_spec.rb +7 -0
  40. data/spec/reader/parse_spec.rb +24 -0
  41. data/spec/reader/shift_spec.rb +7 -0
  42. data/spec/reader/terminate_spec.rb +7 -0
  43. data/spec/readlines_spec.rb +24 -0
  44. data/spec/streambuf/add_buf_spec.rb +5 -0
  45. data/spec/streambuf/buf_size_spec.rb +5 -0
  46. data/spec/streambuf/drop_spec.rb +5 -0
  47. data/spec/streambuf/element_reference_spec.rb +5 -0
  48. data/spec/streambuf/get_spec.rb +5 -0
  49. data/spec/streambuf/idx_is_eos_spec.rb +5 -0
  50. data/spec/streambuf/initialize_spec.rb +5 -0
  51. data/spec/streambuf/is_eos_spec.rb +5 -0
  52. data/spec/streambuf/read_spec.rb +5 -0
  53. data/spec/streambuf/rel_buf_spec.rb +5 -0
  54. data/spec/streambuf/terminate_spec.rb +5 -0
  55. data/spec/stringreader/get_row_spec.rb +5 -0
  56. data/spec/stringreader/initialize_spec.rb +5 -0
  57. data/spec/writer/add_row_spec.rb +5 -0
  58. data/spec/writer/append_spec.rb +5 -0
  59. data/spec/writer/close_spec.rb +5 -0
  60. data/spec/writer/create_spec.rb +5 -0
  61. data/spec/writer/generate_spec.rb +5 -0
  62. data/spec/writer/initialize_spec.rb +5 -0
  63. data/spec/writer/terminate_spec.rb +5 -0
  64. metadata +227 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d73bb569e1fefd81701c9d864103837e884882ac
4
+ data.tar.gz: 3184788b8f1ef3727a76e8deef349173d3e652fd
5
+ SHA512:
6
+ metadata.gz: 8943bd15e1331bb4189cf814432c854f609bef95370dd9d1dada5a36fee3bd372100b0883518035660c127e307af952dd94a494a3f275324e2f8840f2710125e
7
+ data.tar.gz: e60dfb285ac43dda46e209a202a7eedf1749603165ef2930c8a047449476664f32d9cbd7f3831d109c1ad00016bd340441daddcf768b4fb26e237d2221ef572f
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .rbx
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ before_install:
3
+ - gem update --system
4
+ - gem --version
5
+ - gem install rubysl-bundler
6
+ script: bundle exec mspec spec
7
+ rvm:
8
+ - rbx-nightly-18mode
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rubysl-csv.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2013, Brian Shirai
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+ 3. Neither the name of the library nor the names of its contributors may be
13
+ used to endorse or promote products derived from this software without
14
+ specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT,
20
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25
+ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,29 @@
1
+ # RubySL::Csv
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'rubysl-csv'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install rubysl-csv
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1 @@
1
+ require "rubysl/csv"
@@ -0,0 +1,2 @@
1
+ require "rubysl/csv/csv"
2
+ require "rubysl/csv/version"
@@ -0,0 +1,992 @@
1
+ # CSV -- module for generating/parsing CSV data.
2
+ # Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
3
+
4
+ # $Id: csv.rb 11708 2007-02-12 23:01:19Z shyouhei $
5
+
6
+ # This program is copyrighted free software by NAKAMURA, Hiroshi. You can
7
+ # redistribute it and/or modify it under the same terms of Ruby's license;
8
+ # either the dual license version in 2003, or any later version.
9
+
10
+
11
+ class CSV
12
+ class IllegalFormatError < RuntimeError; end
13
+
14
+ # deprecated
15
+ class Cell < String
16
+ def initialize(data = "", is_null = false)
17
+ super(is_null ? "" : data)
18
+ end
19
+
20
+ def data
21
+ to_s
22
+ end
23
+ end
24
+
25
+ # deprecated
26
+ class Row < Array
27
+ end
28
+
29
+ # Open a CSV formatted file for reading or writing.
30
+ #
31
+ # For reading.
32
+ #
33
+ # EXAMPLE 1
34
+ # CSV.open('csvfile.csv', 'r') do |row|
35
+ # p row
36
+ # end
37
+ #
38
+ # EXAMPLE 2
39
+ # reader = CSV.open('csvfile.csv', 'r')
40
+ # row1 = reader.shift
41
+ # row2 = reader.shift
42
+ # if row2.empty?
43
+ # p 'row2 not find.'
44
+ # end
45
+ # reader.close
46
+ #
47
+ # ARGS
48
+ # filename: filename to parse.
49
+ # col_sep: Column separator. ?, by default. If you want to separate
50
+ # fields with semicolon, give ?; here.
51
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
52
+ # want to separate records with \r, give ?\r here.
53
+ #
54
+ # RETURNS
55
+ # reader instance. To get parse result, see CSV::Reader#each.
56
+ #
57
+ #
58
+ # For writing.
59
+ #
60
+ # EXAMPLE 1
61
+ # CSV.open('csvfile.csv', 'w') do |writer|
62
+ # writer << ['r1c1', 'r1c2']
63
+ # writer << ['r2c1', 'r2c2']
64
+ # writer << [nil, nil]
65
+ # end
66
+ #
67
+ # EXAMPLE 2
68
+ # writer = CSV.open('csvfile.csv', 'w')
69
+ # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
70
+ # writer.close
71
+ #
72
+ # ARGS
73
+ # filename: filename to generate.
74
+ # col_sep: Column separator. ?, by default. If you want to separate
75
+ # fields with semicolon, give ?; here.
76
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
77
+ # want to separate records with \r, give ?\r here.
78
+ #
79
+ # RETURNS
80
+ # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
81
+ # to generate CSV string.
82
+ #
83
+ def CSV.open(path, mode, fs = nil, rs = nil, &block)
84
+ if mode == 'r' or mode == 'rb'
85
+ open_reader(path, mode, fs, rs, &block)
86
+ elsif mode == 'w' or mode == 'wb'
87
+ open_writer(path, mode, fs, rs, &block)
88
+ else
89
+ raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
90
+ end
91
+ end
92
+
93
+ def CSV.foreach(path, rs = nil, &block)
94
+ open_reader(path, 'r', ',', rs, &block)
95
+ end
96
+
97
+ def CSV.read(path, length = nil, offset = nil)
98
+ CSV.parse(IO.read(path, length, offset))
99
+ end
100
+
101
+ def CSV.readlines(path, rs = nil)
102
+ reader = open_reader(path, 'r', ',', rs)
103
+ begin
104
+ reader.collect { |row| row }
105
+ ensure
106
+ reader.close
107
+ end
108
+ end
109
+
110
+ def CSV.generate(path, fs = nil, rs = nil, &block)
111
+ open_writer(path, 'w', fs, rs, &block)
112
+ end
113
+
114
+ # Parse lines from given string or stream. Return rows as an Array of Arrays.
115
+ def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
116
+ if File.exist?(str_or_readable)
117
+ STDERR.puts("CSV.parse(filename) is deprecated." +
118
+ " Use CSV.open(filename, 'r') instead.")
119
+ return open_reader(str_or_readable, 'r', fs, rs, &block)
120
+ end
121
+ if block
122
+ CSV::Reader.parse(str_or_readable, fs, rs) do |row|
123
+ yield(row)
124
+ end
125
+ nil
126
+ else
127
+ CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
128
+ end
129
+ end
130
+
131
+ # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
132
+ # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
133
+ # second line 'c,d' is ignored.
134
+ #
135
+ # If you don't know whether a target string to parse is exactly 1 line or
136
+ # not, use CSV.parse_row instead of this method.
137
+ def CSV.parse_line(src, fs = nil, rs = nil)
138
+ fs ||= ','
139
+ if fs.is_a?(Fixnum)
140
+ fs = fs.chr
141
+ end
142
+ if !rs.nil? and rs.is_a?(Fixnum)
143
+ rs = rs.chr
144
+ end
145
+ idx = 0
146
+ res_type = :DT_COLSEP
147
+ row = []
148
+ begin
149
+ while res_type == :DT_COLSEP
150
+ res_type, idx, cell = parse_body(src, idx, fs, rs)
151
+ row << cell
152
+ end
153
+ rescue IllegalFormatError
154
+ return []
155
+ end
156
+ row
157
+ end
158
+
159
+ # Create a line from cells. each cell is stringified by to_s.
160
+ def CSV.generate_line(row, fs = nil, rs = nil)
161
+ if row.size == 0
162
+ return ''
163
+ end
164
+ fs ||= ','
165
+ if fs.is_a?(Fixnum)
166
+ fs = fs.chr
167
+ end
168
+ if !rs.nil? and rs.is_a?(Fixnum)
169
+ rs = rs.chr
170
+ end
171
+ res_type = :DT_COLSEP
172
+ result_str = ''
173
+ idx = 0
174
+ while true
175
+ generate_body(row[idx], result_str, fs, rs)
176
+ idx += 1
177
+ if (idx == row.size)
178
+ break
179
+ end
180
+ generate_separator(:DT_COLSEP, result_str, fs, rs)
181
+ end
182
+ result_str
183
+ end
184
+
185
+ # Parse a line from string. Consider using CSV.parse_line instead.
186
+ # To parse lines in CSV string, see EXAMPLE below.
187
+ #
188
+ # EXAMPLE
189
+ # src = "a,b\r\nc,d\r\ne,f"
190
+ # idx = 0
191
+ # begin
192
+ # parsed = []
193
+ # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
194
+ # puts "Parsed #{ parsed_cells } cells."
195
+ # p parsed
196
+ # end while parsed_cells > 0
197
+ #
198
+ # ARGS
199
+ # src: a CSV data to be parsed. Must respond '[](idx)'.
200
+ # src[](idx) must return a char. (Not a string such as 'a', but 97).
201
+ # src[](idx_out_of_bounds) must return nil. A String satisfies this
202
+ # requirement.
203
+ # idx: index of parsing location of 'src'. 0 origin.
204
+ # out_dev: buffer for parsed cells. Must respond '<<(aString)'.
205
+ # col_sep: Column separator. ?, by default. If you want to separate
206
+ # fields with semicolon, give ?; here.
207
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
208
+ # want to separate records with \r, give ?\r here.
209
+ #
210
+ # RETURNS
211
+ # parsed_cells: num of parsed cells.
212
+ # idx: index of next parsing location of 'src'.
213
+ #
214
+ def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
215
+ fs ||= ','
216
+ if fs.is_a?(Fixnum)
217
+ fs = fs.chr
218
+ end
219
+ if !rs.nil? and rs.is_a?(Fixnum)
220
+ rs = rs.chr
221
+ end
222
+ idx_backup = idx
223
+ parsed_cells = 0
224
+ res_type = :DT_COLSEP
225
+ begin
226
+ while res_type != :DT_ROWSEP
227
+ res_type, idx, cell = parse_body(src, idx, fs, rs)
228
+ if res_type == :DT_EOS
229
+ if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
230
+ return 0, 0
231
+ end
232
+ res_type = :DT_ROWSEP
233
+ end
234
+ parsed_cells += 1
235
+ out_dev << cell
236
+ end
237
+ rescue IllegalFormatError
238
+ return 0, 0
239
+ end
240
+ return parsed_cells, idx
241
+ end
242
+
243
+ # Convert a line from cells data to string. Consider using CSV.generate_line
244
+ # instead. To generate multi-row CSV string, see EXAMPLE below.
245
+ #
246
+ # EXAMPLE
247
+ # row1 = ['a', 'b']
248
+ # row2 = ['c', 'd']
249
+ # row3 = ['e', 'f']
250
+ # src = [row1, row2, row3]
251
+ # buf = ''
252
+ # src.each do |row|
253
+ # parsed_cells = CSV.generate_row(row, 2, buf)
254
+ # puts "Created #{ parsed_cells } cells."
255
+ # end
256
+ # p buf
257
+ #
258
+ # ARGS
259
+ # src: an Array of String to be converted to CSV string. Must respond to
260
+ # 'size' and '[](idx)'. src[idx] must return String.
261
+ # cells: num of cells in a line.
262
+ # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
263
+ # col_sep: Column separator. ?, by default. If you want to separate
264
+ # fields with semicolon, give ?; here.
265
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
266
+ # want to separate records with \r, give ?\r here.
267
+ #
268
+ # RETURNS
269
+ # parsed_cells: num of converted cells.
270
+ #
271
+ def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
272
+ fs ||= ','
273
+ if fs.is_a?(Fixnum)
274
+ fs = fs.chr
275
+ end
276
+ if !rs.nil? and rs.is_a?(Fixnum)
277
+ rs = rs.chr
278
+ end
279
+ src_size = src.size
280
+ if (src_size == 0)
281
+ if cells == 0
282
+ generate_separator(:DT_ROWSEP, out_dev, fs, rs)
283
+ end
284
+ return 0
285
+ end
286
+ res_type = :DT_COLSEP
287
+ parsed_cells = 0
288
+ generate_body(src[parsed_cells], out_dev, fs, rs)
289
+ parsed_cells += 1
290
+ while ((parsed_cells < cells) and (parsed_cells != src_size))
291
+ generate_separator(:DT_COLSEP, out_dev, fs, rs)
292
+ generate_body(src[parsed_cells], out_dev, fs, rs)
293
+ parsed_cells += 1
294
+ end
295
+ if (parsed_cells == cells)
296
+ generate_separator(:DT_ROWSEP, out_dev, fs, rs)
297
+ else
298
+ generate_separator(:DT_COLSEP, out_dev, fs, rs)
299
+ end
300
+ parsed_cells
301
+ end
302
+
303
+ # Private class methods.
304
+ class << self
305
+ private
306
+
307
+ def open_reader(path, mode, fs, rs, &block)
308
+ file = File.open(path, mode)
309
+ if block
310
+ begin
311
+ CSV::Reader.parse(file, fs, rs) do |row|
312
+ yield(row)
313
+ end
314
+ ensure
315
+ file.close
316
+ end
317
+ nil
318
+ else
319
+ reader = CSV::Reader.create(file, fs, rs)
320
+ reader.close_on_terminate
321
+ reader
322
+ end
323
+ end
324
+
325
+ def open_writer(path, mode, fs, rs, &block)
326
+ file = File.open(path, mode)
327
+ if block
328
+ begin
329
+ CSV::Writer.generate(file, fs, rs) do |writer|
330
+ yield(writer)
331
+ end
332
+ ensure
333
+ file.close
334
+ end
335
+ nil
336
+ else
337
+ writer = CSV::Writer.create(file, fs, rs)
338
+ writer.close_on_terminate
339
+ writer
340
+ end
341
+ end
342
+
343
+ def parse_body(src, idx, fs, rs)
344
+ fs_str = fs
345
+ fs_size = fs_str.size
346
+ rs_str = rs || "\n"
347
+ rs_size = rs_str.size
348
+ fs_idx = rs_idx = 0
349
+ cell = Cell.new
350
+ state = :ST_START
351
+ quoted = cr = false
352
+ c = nil
353
+ last_idx = idx
354
+ while c = src[idx]
355
+ unless quoted
356
+ fschar = (c == fs_str[fs_idx])
357
+ rschar = (c == rs_str[rs_idx])
358
+ # simple 1 char backtrack
359
+ if !fschar and c == fs_str[0]
360
+ fs_idx = 0
361
+ fschar = true
362
+ if state == :ST_START
363
+ state = :ST_DATA
364
+ elsif state == :ST_QUOTE
365
+ raise IllegalFormatError
366
+ end
367
+ end
368
+ if !rschar and c == rs_str[0]
369
+ rs_idx = 0
370
+ rschar = true
371
+ if state == :ST_START
372
+ state = :ST_DATA
373
+ elsif state == :ST_QUOTE
374
+ raise IllegalFormatError
375
+ end
376
+ end
377
+ end
378
+ if c == ?"
379
+ fs_idx = rs_idx = 0
380
+ if cr
381
+ raise IllegalFormatError
382
+ end
383
+ cell << src[last_idx, (idx - last_idx)]
384
+ last_idx = idx
385
+ if state == :ST_DATA
386
+ if quoted
387
+ last_idx += 1
388
+ quoted = false
389
+ state = :ST_QUOTE
390
+ else
391
+ raise IllegalFormatError
392
+ end
393
+ elsif state == :ST_QUOTE
394
+ cell << c.chr
395
+ last_idx += 1
396
+ quoted = true
397
+ state = :ST_DATA
398
+ else # :ST_START
399
+ quoted = true
400
+ last_idx += 1
401
+ state = :ST_DATA
402
+ end
403
+ elsif fschar or rschar
404
+ if fschar
405
+ fs_idx += 1
406
+ end
407
+ if rschar
408
+ rs_idx += 1
409
+ end
410
+ sep = nil
411
+ if fs_idx == fs_size
412
+ if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
413
+ state = :ST_DATA
414
+ end
415
+ cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
416
+ last_idx = idx
417
+ fs_idx = rs_idx = 0
418
+ if cr
419
+ raise IllegalFormatError
420
+ end
421
+ sep = :DT_COLSEP
422
+ elsif rs_idx == rs_size
423
+ if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
424
+ state = :ST_DATA
425
+ end
426
+ if !(rs.nil? and cr)
427
+ cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
428
+ last_idx = idx
429
+ end
430
+ fs_idx = rs_idx = 0
431
+ sep = :DT_ROWSEP
432
+ end
433
+ if sep
434
+ if state == :ST_DATA
435
+ return sep, idx + 1, cell;
436
+ elsif state == :ST_QUOTE
437
+ return sep, idx + 1, cell;
438
+ else # :ST_START
439
+ return sep, idx + 1, nil
440
+ end
441
+ end
442
+ elsif rs.nil? and c == ?\r
443
+ # special \r treatment for backward compatibility
444
+ fs_idx = rs_idx = 0
445
+ if cr
446
+ raise IllegalFormatError
447
+ end
448
+ cell << src[last_idx, (idx - last_idx)]
449
+ last_idx = idx
450
+ if quoted
451
+ state = :ST_DATA
452
+ else
453
+ cr = true
454
+ end
455
+ else
456
+ fs_idx = rs_idx = 0
457
+ if state == :ST_DATA or state == :ST_START
458
+ if cr
459
+ raise IllegalFormatError
460
+ end
461
+ state = :ST_DATA
462
+ else # :ST_QUOTE
463
+ raise IllegalFormatError
464
+ end
465
+ end
466
+ idx += 1
467
+ end
468
+ if state == :ST_START
469
+ if fs_idx > 0 or rs_idx > 0
470
+ state = :ST_DATA
471
+ else
472
+ return :DT_EOS, idx, nil
473
+ end
474
+ elsif quoted
475
+ raise IllegalFormatError
476
+ elsif cr
477
+ raise IllegalFormatError
478
+ end
479
+ cell << src[last_idx, (idx - last_idx)]
480
+ last_idx = idx
481
+ return :DT_EOS, idx, cell
482
+ end
483
+
484
+ def generate_body(cell, out_dev, fs, rs)
485
+ if cell.nil?
486
+ # empty
487
+ else
488
+ cell = cell.to_s
489
+ row_data = cell.dup
490
+ if (row_data.gsub!('"', '""') or
491
+ row_data.index(fs) or
492
+ (rs and row_data.index(rs)) or
493
+ (/[\r\n]/ =~ row_data) or
494
+ (cell.empty?))
495
+ out_dev << '"' << row_data << '"'
496
+ else
497
+ out_dev << row_data
498
+ end
499
+ end
500
+ end
501
+
502
+ def generate_separator(type, out_dev, fs, rs)
503
+ case type
504
+ when :DT_COLSEP
505
+ out_dev << fs
506
+ when :DT_ROWSEP
507
+ out_dev << (rs || "\n")
508
+ end
509
+ end
510
+ end
511
+
512
+
513
+ # CSV formatted string/stream reader.
514
+ #
515
+ # EXAMPLE
516
+ # read CSV lines untill the first column is 'stop'.
517
+ #
518
+ # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
519
+ # p row
520
+ # break if !row[0].is_null && row[0].data == 'stop'
521
+ # end
522
+ #
523
+ class Reader
524
+ include Enumerable
525
+
526
+ # Parse CSV data and get lines. Given block is called for each parsed row.
527
+ # Block value is always nil. Rows are not cached for performance reason.
528
+ def Reader.parse(str_or_readable, fs = ',', rs = nil, &block)
529
+ reader = Reader.create(str_or_readable, fs, rs)
530
+ if block
531
+ reader.each do |row|
532
+ yield(row)
533
+ end
534
+ reader.close
535
+ nil
536
+ else
537
+ reader
538
+ end
539
+ end
540
+
541
+ # Returns reader instance.
542
+ def Reader.create(str_or_readable, fs = ',', rs = nil)
543
+ case str_or_readable
544
+ when IO
545
+ IOReader.new(str_or_readable, fs, rs)
546
+ when String
547
+ StringReader.new(str_or_readable, fs, rs)
548
+ else
549
+ IOReader.new(str_or_readable, fs, rs)
550
+ end
551
+ end
552
+
553
+ def each
554
+ while true
555
+ row = []
556
+ parsed_cells = get_row(row)
557
+ if parsed_cells == 0
558
+ break
559
+ end
560
+ yield(row)
561
+ end
562
+ nil
563
+ end
564
+
565
+ def shift
566
+ row = []
567
+ parsed_cells = get_row(row)
568
+ row
569
+ end
570
+
571
+ def close
572
+ terminate
573
+ end
574
+
575
+ private
576
+
577
+ def initialize(dev)
578
+ raise RuntimeError.new('Do not instanciate this class directly.')
579
+ end
580
+
581
+ def get_row(row)
582
+ raise NotImplementedError.new('Method get_row must be defined in a derived class.')
583
+ end
584
+
585
+ def terminate
586
+ # Define if needed.
587
+ end
588
+ end
589
+
590
+
591
+ class StringReader < Reader
592
+ def initialize(string, fs = ',', rs = nil)
593
+ @fs = fs
594
+ @rs = rs
595
+ @dev = string
596
+ @idx = 0
597
+ if @dev[0, 3] == "\xef\xbb\xbf"
598
+ @idx += 3
599
+ end
600
+ end
601
+
602
+ private
603
+
604
+ def get_row(row)
605
+ parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
606
+ if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
607
+ raise IllegalFormatError.new
608
+ end
609
+ @idx = next_idx
610
+ parsed_cells
611
+ end
612
+ end
613
+
614
+
615
+ class IOReader < Reader
616
+ def initialize(io, fs = ',', rs = nil)
617
+ @io = io
618
+ @fs = fs
619
+ @rs = rs
620
+ @dev = CSV::IOBuf.new(@io)
621
+ @idx = 0
622
+ if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
623
+ @idx += 3
624
+ end
625
+ @close_on_terminate = false
626
+ end
627
+
628
+ # Tell this reader to close the IO when terminated (Triggered by invoking
629
+ # CSV::IOReader#close).
630
+ def close_on_terminate
631
+ @close_on_terminate = true
632
+ end
633
+
634
+ private
635
+
636
+ def get_row(row)
637
+ parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
638
+ if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos?
639
+ raise IllegalFormatError.new
640
+ end
641
+ dropped = @dev.drop(next_idx)
642
+ @idx = next_idx - dropped
643
+ parsed_cells
644
+ end
645
+
646
+ def terminate
647
+ if @close_on_terminate
648
+ @io.close
649
+ end
650
+
651
+ if @dev
652
+ @dev.close
653
+ end
654
+ end
655
+ end
656
+
657
+
658
+ # CSV formatted string/stream writer.
659
+ #
660
+ # EXAMPLE
661
+ # Write rows to 'csvout' file.
662
+ #
663
+ # outfile = File.open('csvout', 'wb')
664
+ # CSV::Writer.generate(outfile) do |csv|
665
+ # csv << ['c1', nil, '', '"', "\r\n", 'c2']
666
+ # ...
667
+ # end
668
+ #
669
+ # outfile.close
670
+ #
671
+ class Writer
672
+ # Given block is called with the writer instance. str_or_writable must
673
+ # handle '<<(string)'.
674
+ def Writer.generate(str_or_writable, fs = ',', rs = nil, &block)
675
+ writer = Writer.create(str_or_writable, fs, rs)
676
+ if block
677
+ yield(writer)
678
+ writer.close
679
+ nil
680
+ else
681
+ writer
682
+ end
683
+ end
684
+
685
+ # str_or_writable must handle '<<(string)'.
686
+ def Writer.create(str_or_writable, fs = ',', rs = nil)
687
+ BasicWriter.new(str_or_writable, fs, rs)
688
+ end
689
+
690
+ # dump CSV stream to the device. argument must be an Array of String.
691
+ def <<(row)
692
+ CSV.generate_row(row, row.size, @dev, @fs, @rs)
693
+ self
694
+ end
695
+ alias add_row <<
696
+
697
+ def close
698
+ terminate
699
+ end
700
+
701
+ private
702
+
703
+ def initialize(dev)
704
+ raise RuntimeError.new('Do not instanciate this class directly.')
705
+ end
706
+
707
+ def terminate
708
+ # Define if needed.
709
+ end
710
+ end
711
+
712
+
713
+ class BasicWriter < Writer
714
+ def initialize(str_or_writable, fs = ',', rs = nil)
715
+ @fs = fs
716
+ @rs = rs
717
+ @dev = str_or_writable
718
+ @close_on_terminate = false
719
+ end
720
+
721
+ # Tell this writer to close the IO when terminated (Triggered by invoking
722
+ # CSV::BasicWriter#close).
723
+ def close_on_terminate
724
+ @close_on_terminate = true
725
+ end
726
+
727
+ private
728
+
729
+ def terminate
730
+ if @close_on_terminate
731
+ @dev.close
732
+ end
733
+ end
734
+ end
735
+
736
+ private
737
+
738
+ # Buffered stream.
739
+ #
740
+ # EXAMPLE 1 -- an IO.
741
+ # class MyBuf < StreamBuf
742
+ # # Do initialize myself before a super class. Super class might call my
743
+ # # method 'read'. (Could be awful for C++ user. :-)
744
+ # def initialize(s)
745
+ # @s = s
746
+ # super()
747
+ # end
748
+ #
749
+ # # define my own 'read' method.
750
+ # # CAUTION: Returning nil means EnfOfStream.
751
+ # def read(size)
752
+ # @s.read(size)
753
+ # end
754
+ #
755
+ # # release buffers. in Ruby which has GC, you do not have to call this...
756
+ # def terminate
757
+ # @s = nil
758
+ # super()
759
+ # end
760
+ # end
761
+ #
762
+ # buf = MyBuf.new(STDIN)
763
+ # my_str = ''
764
+ # p buf[0, 0] # => '' (null string)
765
+ # p buf[0] # => 97 (char code of 'a')
766
+ # p buf[0, 1] # => 'a'
767
+ # my_str = buf[0, 5]
768
+ # p my_str # => 'abcde' (5 chars)
769
+ # p buf[0, 6] # => "abcde\n" (6 chars)
770
+ # p buf[0, 7] # => "abcde\n" (6 chars)
771
+ # p buf.drop(3) # => 3 (dropped chars)
772
+ # p buf.get(0, 2) # => 'de' (2 chars)
773
+ # p buf.is_eos? # => false (is not EOS here)
774
+ # p buf.drop(5) # => 3 (dropped chars)
775
+ # p buf.is_eos? # => true (is EOS here)
776
+ # p buf[0] # => nil (is EOS here)
777
+ #
778
+ # EXAMPLE 2 -- String.
779
+ # This is a conceptual example. No pros with this.
780
+ #
781
+ # class StrBuf < StreamBuf
782
+ # def initialize(s)
783
+ # @str = s
784
+ # @idx = 0
785
+ # super()
786
+ # end
787
+ #
788
+ # def read(size)
789
+ # str = @str[@idx, size]
790
+ # @idx += str.size
791
+ # str
792
+ # end
793
+ # end
794
+ #
795
+ class StreamBuf
796
+ # get a char or a partial string from the stream.
797
+ # idx: index of a string to specify a start point of a string to get.
798
+ # unlike String instance, idx < 0 returns nil.
799
+ # n: size of a string to get.
800
+ # returns char at idx if n == nil.
801
+ # returns a partial string, from idx to (idx + n) if n != nil. at EOF,
802
+ # the string size could not equal to arg n.
803
+ def [](idx, n = nil)
804
+ if idx < 0
805
+ return nil
806
+ end
807
+ if (idx_is_eos?(idx))
808
+ if n and (@offset + idx == buf_size(@cur_buf))
809
+ # Like a String, 'abc'[4, 1] returns nil and
810
+ # 'abc'[3, 1] returns '' not nil.
811
+ return ''
812
+ else
813
+ return nil
814
+ end
815
+ end
816
+ my_buf = @cur_buf
817
+ my_offset = @offset
818
+ next_idx = idx
819
+ while (my_offset + next_idx >= buf_size(my_buf))
820
+ if (my_buf == @buf_tail_idx)
821
+ unless add_buf
822
+ break
823
+ end
824
+ end
825
+ next_idx = my_offset + next_idx - buf_size(my_buf)
826
+ my_buf += 1
827
+ my_offset = 0
828
+ end
829
+ loc = my_offset + next_idx
830
+ if !n
831
+ return @buf_list[my_buf][loc] # Fixnum of char code.
832
+ elsif (loc + n - 1 < buf_size(my_buf))
833
+ return @buf_list[my_buf][loc, n] # String.
834
+ else # should do loop insted of (tail) recursive call...
835
+ res = @buf_list[my_buf][loc, BufSize]
836
+ size_added = buf_size(my_buf) - loc
837
+ if size_added > 0
838
+ idx += size_added
839
+ n -= size_added
840
+ ret = self[idx, n]
841
+ if ret
842
+ res << ret
843
+ end
844
+ end
845
+ return res
846
+ end
847
+ end
848
+ alias get []
849
+
850
+ # drop a string from the stream.
851
+ # returns dropped size. at EOF, dropped size might not equals to arg n.
852
+ # Once you drop the head of the stream, access to the dropped part via []
853
+ # or get returns nil.
854
+ def drop(n)
855
+ if is_eos?
856
+ return 0
857
+ end
858
+ size_dropped = 0
859
+ while (n > 0)
860
+ if !@is_eos or (@cur_buf != @buf_tail_idx)
861
+ if (@offset + n < buf_size(@cur_buf))
862
+ size_dropped += n
863
+ @offset += n
864
+ n = 0
865
+ else
866
+ size = buf_size(@cur_buf) - @offset
867
+ size_dropped += size
868
+ n -= size
869
+ @offset = 0
870
+ unless rel_buf
871
+ unless add_buf
872
+ break
873
+ end
874
+ @cur_buf = @buf_tail_idx
875
+ end
876
+ end
877
+ end
878
+ end
879
+ size_dropped
880
+ end
881
+
882
+ def is_eos?
883
+ return idx_is_eos?(0)
884
+ end
885
+
886
+ # WARN: Do not instantiate this class directly. Define your own class
887
+ # which derives this class and define 'read' instance method.
888
+ def initialize
889
+ @buf_list = []
890
+ @cur_buf = @buf_tail_idx = -1
891
+ @offset = 0
892
+ @is_eos = false
893
+ add_buf
894
+ @cur_buf = @buf_tail_idx
895
+ end
896
+
897
+ protected
898
+
899
+ def terminate
900
+ while (rel_buf); end
901
+ end
902
+
903
+ # protected method 'read' must be defined in derived classes.
904
+ # CAUTION: Returning a string which size is not equal to 'size' means
905
+ # EnfOfStream. When it is not at EOS, you must block the callee, try to
906
+ # read and return the sized string.
907
+ def read(size) # raise EOFError
908
+ raise NotImplementedError.new('Method read must be defined in a derived class.')
909
+ end
910
+
911
+ private
912
+
913
+ def buf_size(idx)
914
+ @buf_list[idx].size
915
+ end
916
+
917
+ def add_buf
918
+ if @is_eos
919
+ return false
920
+ end
921
+ begin
922
+ str_read = read(BufSize)
923
+ rescue EOFError
924
+ str_read = nil
925
+ rescue
926
+ terminate
927
+ raise
928
+ end
929
+ if str_read.nil?
930
+ @is_eos = true
931
+ @buf_list.push('')
932
+ @buf_tail_idx += 1
933
+ false
934
+ else
935
+ @buf_list.push(str_read)
936
+ @buf_tail_idx += 1
937
+ true
938
+ end
939
+ end
940
+
941
+ def rel_buf
942
+ if (@cur_buf < 0)
943
+ return false
944
+ end
945
+ @buf_list[@cur_buf] = nil
946
+ if (@cur_buf == @buf_tail_idx)
947
+ @cur_buf = -1
948
+ return false
949
+ else
950
+ @cur_buf += 1
951
+ return true
952
+ end
953
+ end
954
+
955
+ def idx_is_eos?(idx)
956
+ (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
957
+ end
958
+
959
+ BufSize = 1024 * 8
960
+ end
961
+
962
+ # Buffered IO.
963
+ #
964
+ # EXAMPLE
965
+ # # File 'bigdata' could be a giga-byte size one!
966
+ # buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
967
+ # CSV::Reader.new(buf).each do |row|
968
+ # p row
969
+ # break if row[0].data == 'admin'
970
+ # end
971
+ #
972
+ class IOBuf < StreamBuf
973
+ def initialize(s)
974
+ @s = s
975
+ super()
976
+ end
977
+
978
+ def close
979
+ terminate
980
+ end
981
+
982
+ private
983
+
984
+ def read(size)
985
+ @s.read(size)
986
+ end
987
+
988
+ def terminate
989
+ super()
990
+ end
991
+ end
992
+ end