rcsv 0.2.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -6,3 +6,6 @@ ext/rcsv/rcsv.o
6
6
  lib/rcsv/rcsv.so
7
7
  lib/rcsv/rcsv.bundle
8
8
  .*swp
9
+ Gemfile.lock
10
+ .bundle
11
+ vendor/
@@ -1,10 +1,8 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - ree
4
+ - 1.8.7
3
5
  - 1.9.3
4
- - 1.9.2
5
6
  - 2.0.0
6
- - 2.1.0
7
- - 1.8.7
8
- - ree
9
- env:
10
- - JRUBY_OPTS="--server -Xcext.enabled=true -Xcompile.invokedynamic=false"
7
+ - 2.1.6
8
+ - 2.2.2
data/Gemfile CHANGED
@@ -4,3 +4,4 @@ source 'https://rubygems.org'
4
4
  gemspec
5
5
 
6
6
  gem "rake-compiler", :group => :development
7
+ gem "test-unit", :group => :test if RUBY_VERSION >= '2.2'
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/fiksu/rcsv.png)](https://travis-ci.org/fiksu/rcsv)
4
4
 
5
- Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE 1.8.7 and Ruby 1.9.3.
5
+ Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2.
6
6
 
7
7
  Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.0.3 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
8
8
 
@@ -114,6 +114,10 @@ A boolean flag. If enabled, only parses columns that are listed in :columns. Dis
114
114
  An integer. Default is 1MiB (1024 * 1024).
115
115
  Specifies a number of bytes that are read at once, thus allowing to read drectly from IO-like objects (files, sockets etc).
116
116
 
117
+ ### :output_encoding
118
+ A string. By default is auto-detected from the original CSV file.
119
+ If specified, enforces the encoding of parsed string values. The default value keeps the encoding the same as in the original CSV file.
120
+
117
121
 
118
122
  ## Examples
119
123
 
@@ -164,10 +168,14 @@ That would display contents of each row without needing to put the whole parsed
164
168
 
165
169
  This way it is possible to read from a File directly, with a 20MiB buffer and parse lines one by one:
166
170
 
167
- Rcsv.parse(File.open('/some/file.csv'), :buffer_size => 20 * 1024 * 1024) { |row|
171
+ some_csv_file = File.open('/some/file.csv')
172
+
173
+ Rcsv.parse(some_csv_file, :buffer_size => 20 * 1024 * 1024) { |row|
168
174
  puts row.inspect
169
175
  }
170
176
 
177
+ some_csv_file.close
178
+
171
179
 
172
180
  ## To do
173
181
 
@@ -187,5 +195,5 @@ This way it is possible to read from a File directly, with a 20MiB buffer and pa
187
195
 
188
196
  ## Credits
189
197
 
190
- * Maintainer: Arthur Pirogovski @arp
191
- * Contributors: Edward Slavich @eslavich
198
+ * Maintainer: Artur Pyrogovskyi @arp
199
+ * Contributors: Edward Slavich @eslavich, Ivan Zarea @minivan, @97jaz, David Price @dprice, Andrew Grim @stopdropandrew
data/RELNOTES CHANGED
@@ -1,3 +1,16 @@
1
+ Version 0.3.1
2
+ * Travis fixes
3
+ * Fixed older Ruby support in tests
4
+
5
+ Version 0.3.0
6
+ * changed nil to be rendered as empty string by writer (by David Price)
7
+ * changed the default writer newline character from "\r\n" to platform's default (by Andrew Grim)
8
+ * improved writer quoting (by Andrew Grim)
9
+ * added :quote_char writer option (by @97jaz)
10
+ * added :output_encoding reader option (by David Price)
11
+ * added automatic encoding detection by reader (by David Price)
12
+ * updated supported Rubies list to REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2
13
+
1
14
  Version 0.2.1
2
15
  * removed a bunch of deprecation warnings (by Ivan Zarea)
3
16
  * added Ruby 2.1 support for Travis CI
@@ -110,7 +110,7 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
110
110
  if (meta->current_col < meta->num_row_conversions) {
111
111
  switch (row_conversion){
112
112
  case 's': /* String */
113
- parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
113
+ parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
114
114
  break;
115
115
  case 'i': /* Integer */
116
116
  parsed_field = LL2NUM(atoll(field_str));
@@ -327,6 +327,12 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
327
327
  csv_set_delim(cp, (unsigned char)*StringValuePtr(option));
328
328
  }
329
329
 
330
+ /* :quote_char sets the character used for quoting data; default is double-quote (") */
331
+ option = rb_hash_aref(options, ID2SYM(rb_intern("quote_char")));
332
+ if (option != Qnil) {
333
+ csv_set_quote(cp, (unsigned char)*StringValuePtr(option));
334
+ }
335
+
330
336
  /* Specify how many rows to skip from the beginning of CSV */
331
337
  option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
332
338
  if (option != Qnil) {
@@ -382,7 +388,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
382
388
 
383
389
  /* :row_conversions specifies Ruby types that CSV field values should be converted into.
384
390
  Each char of row_conversions string represents Ruby type for CSV field with matching position. */
385
- option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
391
+ option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
386
392
  if (option != Qnil) {
387
393
  meta->num_row_conversions = RSTRING_LEN(option);
388
394
  meta->row_conversions = StringValuePtr(option);
@@ -390,7 +396,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
390
396
 
391
397
  /* Column names should be declared explicitly when parsing fields as Hashes */
392
398
  if (meta->row_as_hash) { /* Only matters for hash results */
393
- option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
399
+ option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
394
400
  if (option == Qnil) {
395
401
  rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
396
402
  } else {
@@ -34,6 +34,9 @@ class LibCsv
34
34
  attach_function :csv_set_delim, [:pointer, :uchar], :void
35
35
  attach_function :csv_get_delim, [:pointer], :uchar
36
36
 
37
+ attach_function :csv_set_quote, [:pointer, :uchar], :void
38
+ attach_function :csv_get_quote, [:pointer], :uchar
39
+
37
40
  attach_function :csv_error, [:pointer], :int
38
41
  attach_function :csv_strerror, [:int], :string
39
42
 
@@ -46,6 +49,10 @@ class LibCsv
46
49
  csv_set_delim(parser, options[:col_sep].ord)
47
50
  end
48
51
 
52
+ if options[:quote_char]
53
+ csv_set_quote(parser, options[:quote_char].ord)
54
+ end
55
+
49
56
  fail "Couldn't initialize libcsv" if result == -1
50
57
 
51
58
  result = [[]]
@@ -82,7 +89,7 @@ class LibCsv
82
89
  csv_fini(parser, end_of_field_callback, end_of_record_callback, nil)
83
90
  csv_free(parser)
84
91
  result.pop if result.last == []
85
-
92
+
86
93
  return result
87
94
  end
88
95
  end
@@ -2,6 +2,7 @@ require "rcsv/rcsv"
2
2
  require "rcsv/version"
3
3
 
4
4
  require "stringio"
5
+ require "English"
5
6
 
6
7
  class Rcsv
7
8
 
@@ -30,6 +31,7 @@ class Rcsv
30
31
  raw_options = {}
31
32
 
32
33
  raw_options[:col_sep] = options[:column_separator] && options[:column_separator][0] || ','
34
+ raw_options[:quote_char] = options[:quote_char] && options[:quote_char][0] || '"'
33
35
  raw_options[:offset_rows] = options[:offset_rows] || 0
34
36
  raw_options[:nostrict] = options[:nostrict]
35
37
  raw_options[:parse_empty_fields_as] = options[:parse_empty_fields_as]
@@ -138,8 +140,16 @@ class Rcsv
138
140
  def initialize(write_options = {})
139
141
  @write_options = write_options
140
142
  @write_options[:column_separator] ||= ','
141
- @write_options[:newline_delimiter] ||= "\r\n" # Making Excel happy...
143
+ @write_options[:newline_delimiter] ||= $INPUT_RECORD_SEPARATOR
142
144
  @write_options[:header] ||= false
145
+
146
+ @quote = '"'
147
+ @escaped_quote = @quote * 2
148
+ @quotable_chars = Regexp.new('[%s%s%s]' % [
149
+ Regexp.escape(@write_options[:column_separator]),
150
+ Regexp.escape(@write_options[:newline_delimiter]),
151
+ Regexp.escape(@quote)
152
+ ])
143
153
  end
144
154
 
145
155
  def write(io, &block)
@@ -161,9 +171,8 @@ class Rcsv
161
171
  max_index = row.size - 1
162
172
 
163
173
  row.each_with_index do |field, index|
164
- unquoted_field = process(field, @write_options[:columns][index])
165
- # TODO: a better quoting
166
- csv_row << (unquoted_field.match(/,/) ? "\"#{unquoted_field}\"" : unquoted_field)
174
+ unquoted_field = process(field, @write_options[:columns] && @write_options[:columns][index])
175
+ csv_row << (unquoted_field.match(@quotable_chars) ? "\"#{unquoted_field.gsub(@quote, @escaped_quote)}\"" : unquoted_field)
167
176
  csv_row << column_separator unless index == max_index
168
177
  end
169
178
 
@@ -173,7 +182,8 @@ class Rcsv
173
182
  protected
174
183
 
175
184
  def process(field, column_options)
176
- return case column_options[:formatter]
185
+ return '' if field.nil?
186
+ return case column_options && column_options[:formatter]
177
187
  when :strftime
178
188
  format = column_options[:format] || "%Y-%m-%d %H:%M:%S %z"
179
189
  field.strftime(format)
@@ -1,3 +1,3 @@
1
1
  class Rcsv
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -16,4 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.name = "rcsv"
17
17
  gem.require_paths = ["lib", "ext"]
18
18
  gem.version = Rcsv::VERSION
19
+ if RUBY_VERSION >= '2.2'
20
+ gem.add_development_dependency "test-unit", "~> 3.0.8"
21
+ end
19
22
  end
@@ -32,17 +32,29 @@ class RcsvRawParseTest < Test::Unit::TestCase
32
32
  assert_equal('""C81E-=; **ECCB; .. 89', raw_parsed_tsv_data[3][6])
33
33
  assert_equal("Dallas\t TX", raw_parsed_tsv_data[888][13])
34
34
  end
35
-
35
+
36
+ def test_rcsv_quote_char
37
+ csv = [
38
+ "F0A83489,69118080,,73,7008,2016-10-03,'''''C81E-=; **ECCB; .. 89','130,86',a3eb,1341-04-10,7612.699237971538,5b5e3fce-2ea5-4ca9-9749-90fd6dc8dd66,9,'Los Angeles, CA',e,6.047887837492023,f",
39
+ "48F4FAC9,11599213,,0,1897,2014-02-23,'''''ECCB-=; **A87F; .. 61','787,6',84de,1353-11-10,8078.704911344607,404d9d3e-963f-4199-a2f3-e71f6828b716,6,'Dallas, TX',,-6.684507609859605, 1"
40
+ ]
41
+ csv_data = StringIO.new(csv.join("\n"))
42
+ raw_parsed_csv_data = Rcsv.raw_parse(csv_data, :quote_char => "'")
43
+
44
+ assert_equal("''C81E-=; **ECCB; .. 89", raw_parsed_csv_data[0][6])
45
+ assert_equal('Dallas, TX', raw_parsed_csv_data[1][13])
46
+ end
47
+
36
48
  if String.instance_methods.include?(:encoding)
37
49
  def test_rcsv_output_encoding_default
38
50
  raw_parsed_csv_data = Rcsv.raw_parse(@csv_data)
39
-
51
+
40
52
  assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::ASCII_8BIT)
41
53
  end
42
54
 
43
55
  def test_rcsv_output_encoding_utf8
44
56
  raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :output_encoding => "UTF-8")
45
-
57
+
46
58
  assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::UTF_8)
47
59
  end
48
60
  else
@@ -52,7 +64,7 @@ class RcsvRawParseTest < Test::Unit::TestCase
52
64
  end
53
65
  end
54
66
  end
55
-
67
+
56
68
  def test_buffer_size
57
69
  raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :buffer_size => 10)
58
70
 
@@ -47,9 +47,10 @@ class RcsvWriteTest < Test::Unit::TestCase
47
47
  }
48
48
 
49
49
  @data = [
50
- [1, Date.parse('2012-11-11'), 100.234, true, 1, nil],
51
- [nil, Date.parse('1970-01-02'), -0.1, :nyancat, 123.8891, 0],
52
- [3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop']
50
+ [1, Date.parse('2012-11-11'), 100.234, true, 1, true],
51
+ ['elephant', Date.parse('1970-01-02'), -0.1, :nyancat, 123.8891, 0],
52
+ [3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop'],
53
+ [nil, nil, nil, nil, nil, nil]
53
54
  ]
54
55
 
55
56
  @writer = Rcsv.new(@options)
@@ -57,12 +58,12 @@ class RcsvWriteTest < Test::Unit::TestCase
57
58
 
58
59
  def test_rcsv_generate_header
59
60
  assert_equal(
60
- "ID,Date,Money,Banana IDDQD,Hashformat,\r\n", @writer.generate_header
61
+ "ID,Date,Money,Banana IDDQD,Hashformat,\n", @writer.generate_header
61
62
  )
62
63
  end
63
64
 
64
65
  def test_rscv_generate_row
65
- assert_equal("1,2012-11-11,$100.23,true,$1.00,false\r\n", @writer.generate_row(@data.first))
66
+ assert_equal("1,2012-11-11,$100.23,true,$1.00,true\n", @writer.generate_row(@data.first))
66
67
  end
67
68
 
68
69
  def test_rcsv_write
@@ -75,7 +76,7 @@ class RcsvWriteTest < Test::Unit::TestCase
75
76
  io.rewind
76
77
 
77
78
  assert_equal(
78
- "ID,Date,Money,Banana IDDQD,Hashformat,\r\n1,2012-11-11,$100.23,true,$1.00,false\r\n,1970-01-02,$-0.10,nyancat,$123.89,false\r\n3,2012-12-12,$0.00,sepulka,$-122.00,true\r\n", io.read
79
+ "ID,Date,Money,Banana IDDQD,Hashformat,\n1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
79
80
  )
80
81
  end
81
82
 
@@ -90,7 +91,34 @@ class RcsvWriteTest < Test::Unit::TestCase
90
91
  io.rewind
91
92
 
92
93
  assert_equal(
93
- "1,2012-11-11,$100.23,true,$1.00,false\r\n,1970-01-02,$-0.10,nyancat,$123.89,false\r\n3,2012-12-12,$0.00,sepulka,$-122.00,true\r\n", io.read
94
+ "1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
94
95
  )
95
96
  end
97
+
98
+ def test_generate_row__dont_require_columns
99
+ writer = Rcsv.new
100
+ assert_equal "1,2,3\n", writer.generate_row([1, 2, 3])
101
+ end
102
+
103
+ def test_generate_row__proper_escaping_for_quotes_and_newlines
104
+ writer = Rcsv.new
105
+ assert_equal "\"before quote \"\" after quote\",\"before newline \n after newline\"\n",
106
+ writer.generate_row(["before quote \" after quote", "before newline \n after newline"])
107
+ end
108
+
109
+ def test_generate_row__should_be_able_to_parse_generated_csv
110
+ writer = Rcsv.new
111
+ quotable_strings = [
112
+ "before quote \" after quote",
113
+ "before newline \n after newline",
114
+ "before separator , after separator",
115
+ "separator , and quote \" oh my"
116
+ ]
117
+ assert_equal [quotable_strings], Rcsv.parse(writer.generate_row(quotable_strings), :header => :none)
118
+ end
119
+
120
+ def test_generate_row__should_handle_alternate_column_separators
121
+ writer = Rcsv.new(:column_separator => '|')
122
+ assert_equal "1|2|\"before pipe | after pipe\"\n", writer.generate_row([1, 2, 'before pipe | after pipe'])
123
+ end
96
124
  end
metadata CHANGED
@@ -1,28 +1,37 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rcsv
3
- version: !ruby/object:Gem::Version
4
- version: 0.2.1
3
+ version: !ruby/object:Gem::Version
4
+ hash: 17
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 3
9
+ - 1
10
+ version: 0.3.1
5
11
  platform: ruby
6
- authors:
12
+ authors:
7
13
  - Arthur Pirogovski
8
14
  autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
- date: 2014-06-06 00:00:00.000000000 Z
17
+
18
+ date: 2015-06-17 00:00:00 Z
12
19
  dependencies: []
20
+
13
21
  description: A libcsv-based CSV parser for Ruby
14
- email:
22
+ email:
15
23
  - arthur@flyingtealeaf.com
16
24
  executables: []
17
- extensions:
25
+
26
+ extensions:
18
27
  - ext/rcsv/extconf.rb
19
28
  extra_rdoc_files: []
20
- files:
29
+
30
+ files:
21
31
  - .gitignore
22
32
  - .travis.yml
23
33
  - COPYING.LESSER
24
34
  - Gemfile
25
- - Gemfile.lock
26
35
  - LICENSE
27
36
  - README.md
28
37
  - RELNOTES
@@ -43,30 +52,39 @@ files:
43
52
  - test/test_rcsv_write.rb
44
53
  homepage: http://github.com/fiksu/rcsv
45
54
  licenses: []
46
- metadata: {}
55
+
47
56
  post_install_message:
48
57
  rdoc_options: []
49
- require_paths:
58
+
59
+ require_paths:
50
60
  - lib
51
61
  - ext
52
- required_ruby_version: !ruby/object:Gem::Requirement
53
- requirements:
54
- - - '>='
55
- - !ruby/object:Gem::Version
56
- version: '0'
57
- required_rubygems_version: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
62
80
  requirements: []
81
+
63
82
  rubyforge_project:
64
- rubygems_version: 2.2.2
83
+ rubygems_version: 1.8.24
65
84
  signing_key:
66
- specification_version: 4
67
- summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion,
68
- non-strict parsing and basic filtering.
69
- test_files:
85
+ specification_version: 3
86
+ summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion, non-strict parsing and basic filtering.
87
+ test_files:
70
88
  - test/test_rcsv.csv
71
89
  - test/test_rcsv_parse.rb
72
90
  - test/test_rcsv_raw_parse.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 0333dfa0687b5fff49a8102ddbea1368a8335919
4
- data.tar.gz: 0c9b534944b188a82c7734b30aac28bdf7f38f80
5
- SHA512:
6
- metadata.gz: 06e065ecb4755ec5acb43132b39672331ce4a5b7e52c656739d6e23b896c5930311232f6e1c7fa0b39357df6e41d138dcee9cb33b040c0e536c1f7f450f48f1a
7
- data.tar.gz: 9c46cedf27acf68b9248d8e9b2329f5162bb099bd67585f6ddc61d85d784b83e20a644649a8666ad5b57d074a1e015f7d42f1c58013f559b5e84d0495a1d6e44
@@ -1,18 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- rcsv (0.2.1)
5
-
6
- GEM
7
- remote: https://rubygems.org/
8
- specs:
9
- rake (0.9.2.2)
10
- rake-compiler (0.8.1)
11
- rake
12
-
13
- PLATFORMS
14
- ruby
15
-
16
- DEPENDENCIES
17
- rake-compiler
18
- rcsv!