rcsv 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -6,3 +6,6 @@ ext/rcsv/rcsv.o
6
6
  lib/rcsv/rcsv.so
7
7
  lib/rcsv/rcsv.bundle
8
8
  .*swp
9
+ Gemfile.lock
10
+ .bundle
11
+ vendor/
@@ -1,10 +1,8 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - ree
4
+ - 1.8.7
3
5
  - 1.9.3
4
- - 1.9.2
5
6
  - 2.0.0
6
- - 2.1.0
7
- - 1.8.7
8
- - ree
9
- env:
10
- - JRUBY_OPTS="--server -Xcext.enabled=true -Xcompile.invokedynamic=false"
7
+ - 2.1.6
8
+ - 2.2.2
data/Gemfile CHANGED
@@ -4,3 +4,4 @@ source 'https://rubygems.org'
4
4
  gemspec
5
5
 
6
6
  gem "rake-compiler", :group => :development
7
+ gem "test-unit", :group => :test if RUBY_VERSION >= '2.2'
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/fiksu/rcsv.png)](https://travis-ci.org/fiksu/rcsv)
4
4
 
5
- Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE 1.8.7 and Ruby 1.9.3.
5
+ Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2.
6
6
 
7
7
  Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.0.3 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
8
8
 
@@ -114,6 +114,10 @@ A boolean flag. If enabled, only parses columns that are listed in :columns. Dis
114
114
  An integer. Default is 1MiB (1024 * 1024).
115
115
  Specifies a number of bytes that are read at once, thus allowing to read drectly from IO-like objects (files, sockets etc).
116
116
 
117
+ ### :output_encoding
118
+ A string. By default is auto-detected from the original CSV file.
119
+ If specified, enforces the encoding of parsed string values. The default value keeps the encoding the same as in the original CSV file.
120
+
117
121
 
118
122
  ## Examples
119
123
 
@@ -164,10 +168,14 @@ That would display contents of each row without needing to put the whole parsed
164
168
 
165
169
  This way it is possible to read from a File directly, with a 20MiB buffer and parse lines one by one:
166
170
 
167
- Rcsv.parse(File.open('/some/file.csv'), :buffer_size => 20 * 1024 * 1024) { |row|
171
+ some_csv_file = File.open('/some/file.csv')
172
+
173
+ Rcsv.parse(some_csv_file, :buffer_size => 20 * 1024 * 1024) { |row|
168
174
  puts row.inspect
169
175
  }
170
176
 
177
+ some_csv_file.close
178
+
171
179
 
172
180
  ## To do
173
181
 
@@ -187,5 +195,5 @@ This way it is possible to read from a File directly, with a 20MiB buffer and pa
187
195
 
188
196
  ## Credits
189
197
 
190
- * Maintainer: Arthur Pirogovski @arp
191
- * Contributors: Edward Slavich @eslavich
198
+ * Maintainer: Artur Pyrogovskyi @arp
199
+ * Contributors: Edward Slavich @eslavich, Ivan Zarea @minivan, @97jaz, David Price @dprice, Andrew Grim @stopdropandrew
data/RELNOTES CHANGED
@@ -1,3 +1,16 @@
1
+ Version 0.3.1
2
+ * Travis fixes
3
+ * Fixed older Ruby support in tests
4
+
5
+ Version 0.3.0
6
+ * changed nil to be rendered as empty string by writer (by David Price)
7
+ * changed the default writer newline character from "\r\n" to platform's default (by Andrew Grim)
8
+ * improved writer quoting (by Andrew Grim)
9
+ * added :quote_char writer option (by @97jaz)
10
+ * added :output_encoding reader option (by David Price)
11
+ * added automatic encoding detection by reader (by David Price)
12
+ * updated supported Rubies list to REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2
13
+
1
14
  Version 0.2.1
2
15
  * removed a bunch of deprecation warnings (by Ivan Zarea)
3
16
  * added Ruby 2.1 support for Travis CI
@@ -110,7 +110,7 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
110
110
  if (meta->current_col < meta->num_row_conversions) {
111
111
  switch (row_conversion){
112
112
  case 's': /* String */
113
- parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
113
+ parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
114
114
  break;
115
115
  case 'i': /* Integer */
116
116
  parsed_field = LL2NUM(atoll(field_str));
@@ -327,6 +327,12 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
327
327
  csv_set_delim(cp, (unsigned char)*StringValuePtr(option));
328
328
  }
329
329
 
330
+ /* :quote_char sets the character used for quoting data; default is double-quote (") */
331
+ option = rb_hash_aref(options, ID2SYM(rb_intern("quote_char")));
332
+ if (option != Qnil) {
333
+ csv_set_quote(cp, (unsigned char)*StringValuePtr(option));
334
+ }
335
+
330
336
  /* Specify how many rows to skip from the beginning of CSV */
331
337
  option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
332
338
  if (option != Qnil) {
@@ -382,7 +388,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
382
388
 
383
389
  /* :row_conversions specifies Ruby types that CSV field values should be converted into.
384
390
  Each char of row_conversions string represents Ruby type for CSV field with matching position. */
385
- option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
391
+ option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
386
392
  if (option != Qnil) {
387
393
  meta->num_row_conversions = RSTRING_LEN(option);
388
394
  meta->row_conversions = StringValuePtr(option);
@@ -390,7 +396,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
390
396
 
391
397
  /* Column names should be declared explicitly when parsing fields as Hashes */
392
398
  if (meta->row_as_hash) { /* Only matters for hash results */
393
- option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
399
+ option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
394
400
  if (option == Qnil) {
395
401
  rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
396
402
  } else {
@@ -34,6 +34,9 @@ class LibCsv
34
34
  attach_function :csv_set_delim, [:pointer, :uchar], :void
35
35
  attach_function :csv_get_delim, [:pointer], :uchar
36
36
 
37
+ attach_function :csv_set_quote, [:pointer, :uchar], :void
38
+ attach_function :csv_get_quote, [:pointer], :uchar
39
+
37
40
  attach_function :csv_error, [:pointer], :int
38
41
  attach_function :csv_strerror, [:int], :string
39
42
 
@@ -46,6 +49,10 @@ class LibCsv
46
49
  csv_set_delim(parser, options[:col_sep].ord)
47
50
  end
48
51
 
52
+ if options[:quote_char]
53
+ csv_set_quote(parser, options[:quote_char].ord)
54
+ end
55
+
49
56
  fail "Couldn't initialize libcsv" if result == -1
50
57
 
51
58
  result = [[]]
@@ -82,7 +89,7 @@ class LibCsv
82
89
  csv_fini(parser, end_of_field_callback, end_of_record_callback, nil)
83
90
  csv_free(parser)
84
91
  result.pop if result.last == []
85
-
92
+
86
93
  return result
87
94
  end
88
95
  end
@@ -2,6 +2,7 @@ require "rcsv/rcsv"
2
2
  require "rcsv/version"
3
3
 
4
4
  require "stringio"
5
+ require "English"
5
6
 
6
7
  class Rcsv
7
8
 
@@ -30,6 +31,7 @@ class Rcsv
30
31
  raw_options = {}
31
32
 
32
33
  raw_options[:col_sep] = options[:column_separator] && options[:column_separator][0] || ','
34
+ raw_options[:quote_char] = options[:quote_char] && options[:quote_char][0] || '"'
33
35
  raw_options[:offset_rows] = options[:offset_rows] || 0
34
36
  raw_options[:nostrict] = options[:nostrict]
35
37
  raw_options[:parse_empty_fields_as] = options[:parse_empty_fields_as]
@@ -138,8 +140,16 @@ class Rcsv
138
140
  def initialize(write_options = {})
139
141
  @write_options = write_options
140
142
  @write_options[:column_separator] ||= ','
141
- @write_options[:newline_delimiter] ||= "\r\n" # Making Excel happy...
143
+ @write_options[:newline_delimiter] ||= $INPUT_RECORD_SEPARATOR
142
144
  @write_options[:header] ||= false
145
+
146
+ @quote = '"'
147
+ @escaped_quote = @quote * 2
148
+ @quotable_chars = Regexp.new('[%s%s%s]' % [
149
+ Regexp.escape(@write_options[:column_separator]),
150
+ Regexp.escape(@write_options[:newline_delimiter]),
151
+ Regexp.escape(@quote)
152
+ ])
143
153
  end
144
154
 
145
155
  def write(io, &block)
@@ -161,9 +171,8 @@ class Rcsv
161
171
  max_index = row.size - 1
162
172
 
163
173
  row.each_with_index do |field, index|
164
- unquoted_field = process(field, @write_options[:columns][index])
165
- # TODO: a better quoting
166
- csv_row << (unquoted_field.match(/,/) ? "\"#{unquoted_field}\"" : unquoted_field)
174
+ unquoted_field = process(field, @write_options[:columns] && @write_options[:columns][index])
175
+ csv_row << (unquoted_field.match(@quotable_chars) ? "\"#{unquoted_field.gsub(@quote, @escaped_quote)}\"" : unquoted_field)
167
176
  csv_row << column_separator unless index == max_index
168
177
  end
169
178
 
@@ -173,7 +182,8 @@ class Rcsv
173
182
  protected
174
183
 
175
184
  def process(field, column_options)
176
- return case column_options[:formatter]
185
+ return '' if field.nil?
186
+ return case column_options && column_options[:formatter]
177
187
  when :strftime
178
188
  format = column_options[:format] || "%Y-%m-%d %H:%M:%S %z"
179
189
  field.strftime(format)
@@ -1,3 +1,3 @@
1
1
  class Rcsv
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -16,4 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.name = "rcsv"
17
17
  gem.require_paths = ["lib", "ext"]
18
18
  gem.version = Rcsv::VERSION
19
+ if RUBY_VERSION >= '2.2'
20
+ gem.add_development_dependency "test-unit", "~> 3.0.8"
21
+ end
19
22
  end
@@ -32,17 +32,29 @@ class RcsvRawParseTest < Test::Unit::TestCase
32
32
  assert_equal('""C81E-=; **ECCB; .. 89', raw_parsed_tsv_data[3][6])
33
33
  assert_equal("Dallas\t TX", raw_parsed_tsv_data[888][13])
34
34
  end
35
-
35
+
36
+ def test_rcsv_quote_char
37
+ csv = [
38
+ "F0A83489,69118080,,73,7008,2016-10-03,'''''C81E-=; **ECCB; .. 89','130,86',a3eb,1341-04-10,7612.699237971538,5b5e3fce-2ea5-4ca9-9749-90fd6dc8dd66,9,'Los Angeles, CA',e,6.047887837492023,f",
39
+ "48F4FAC9,11599213,,0,1897,2014-02-23,'''''ECCB-=; **A87F; .. 61','787,6',84de,1353-11-10,8078.704911344607,404d9d3e-963f-4199-a2f3-e71f6828b716,6,'Dallas, TX',,-6.684507609859605, 1"
40
+ ]
41
+ csv_data = StringIO.new(csv.join("\n"))
42
+ raw_parsed_csv_data = Rcsv.raw_parse(csv_data, :quote_char => "'")
43
+
44
+ assert_equal("''C81E-=; **ECCB; .. 89", raw_parsed_csv_data[0][6])
45
+ assert_equal('Dallas, TX', raw_parsed_csv_data[1][13])
46
+ end
47
+
36
48
  if String.instance_methods.include?(:encoding)
37
49
  def test_rcsv_output_encoding_default
38
50
  raw_parsed_csv_data = Rcsv.raw_parse(@csv_data)
39
-
51
+
40
52
  assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::ASCII_8BIT)
41
53
  end
42
54
 
43
55
  def test_rcsv_output_encoding_utf8
44
56
  raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :output_encoding => "UTF-8")
45
-
57
+
46
58
  assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::UTF_8)
47
59
  end
48
60
  else
@@ -52,7 +64,7 @@ class RcsvRawParseTest < Test::Unit::TestCase
52
64
  end
53
65
  end
54
66
  end
55
-
67
+
56
68
  def test_buffer_size
57
69
  raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :buffer_size => 10)
58
70
 
@@ -47,9 +47,10 @@ class RcsvWriteTest < Test::Unit::TestCase
47
47
  }
48
48
 
49
49
  @data = [
50
- [1, Date.parse('2012-11-11'), 100.234, true, 1, nil],
51
- [nil, Date.parse('1970-01-02'), -0.1, :nyancat, 123.8891, 0],
52
- [3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop']
50
+ [1, Date.parse('2012-11-11'), 100.234, true, 1, true],
51
+ ['elephant', Date.parse('1970-01-02'), -0.1, :nyancat, 123.8891, 0],
52
+ [3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop'],
53
+ [nil, nil, nil, nil, nil, nil]
53
54
  ]
54
55
 
55
56
  @writer = Rcsv.new(@options)
@@ -57,12 +58,12 @@ class RcsvWriteTest < Test::Unit::TestCase
57
58
 
58
59
  def test_rcsv_generate_header
59
60
  assert_equal(
60
- "ID,Date,Money,Banana IDDQD,Hashformat,\r\n", @writer.generate_header
61
+ "ID,Date,Money,Banana IDDQD,Hashformat,\n", @writer.generate_header
61
62
  )
62
63
  end
63
64
 
64
65
  def test_rscv_generate_row
65
- assert_equal("1,2012-11-11,$100.23,true,$1.00,false\r\n", @writer.generate_row(@data.first))
66
+ assert_equal("1,2012-11-11,$100.23,true,$1.00,true\n", @writer.generate_row(@data.first))
66
67
  end
67
68
 
68
69
  def test_rcsv_write
@@ -75,7 +76,7 @@ class RcsvWriteTest < Test::Unit::TestCase
75
76
  io.rewind
76
77
 
77
78
  assert_equal(
78
- "ID,Date,Money,Banana IDDQD,Hashformat,\r\n1,2012-11-11,$100.23,true,$1.00,false\r\n,1970-01-02,$-0.10,nyancat,$123.89,false\r\n3,2012-12-12,$0.00,sepulka,$-122.00,true\r\n", io.read
79
+ "ID,Date,Money,Banana IDDQD,Hashformat,\n1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
79
80
  )
80
81
  end
81
82
 
@@ -90,7 +91,34 @@ class RcsvWriteTest < Test::Unit::TestCase
90
91
  io.rewind
91
92
 
92
93
  assert_equal(
93
- "1,2012-11-11,$100.23,true,$1.00,false\r\n,1970-01-02,$-0.10,nyancat,$123.89,false\r\n3,2012-12-12,$0.00,sepulka,$-122.00,true\r\n", io.read
94
+ "1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
94
95
  )
95
96
  end
97
+
98
+ def test_generate_row__dont_require_columns
99
+ writer = Rcsv.new
100
+ assert_equal "1,2,3\n", writer.generate_row([1, 2, 3])
101
+ end
102
+
103
+ def test_generate_row__proper_escaping_for_quotes_and_newlines
104
+ writer = Rcsv.new
105
+ assert_equal "\"before quote \"\" after quote\",\"before newline \n after newline\"\n",
106
+ writer.generate_row(["before quote \" after quote", "before newline \n after newline"])
107
+ end
108
+
109
+ def test_generate_row__should_be_able_to_parse_generated_csv
110
+ writer = Rcsv.new
111
+ quotable_strings = [
112
+ "before quote \" after quote",
113
+ "before newline \n after newline",
114
+ "before separator , after separator",
115
+ "separator , and quote \" oh my"
116
+ ]
117
+ assert_equal [quotable_strings], Rcsv.parse(writer.generate_row(quotable_strings), :header => :none)
118
+ end
119
+
120
+ def test_generate_row__should_handle_alternate_column_separators
121
+ writer = Rcsv.new(:column_separator => '|')
122
+ assert_equal "1|2|\"before pipe | after pipe\"\n", writer.generate_row([1, 2, 'before pipe | after pipe'])
123
+ end
96
124
  end
metadata CHANGED
@@ -1,28 +1,37 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rcsv
3
- version: !ruby/object:Gem::Version
4
- version: 0.2.1
3
+ version: !ruby/object:Gem::Version
4
+ hash: 17
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 3
9
+ - 1
10
+ version: 0.3.1
5
11
  platform: ruby
6
- authors:
12
+ authors:
7
13
  - Arthur Pirogovski
8
14
  autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
- date: 2014-06-06 00:00:00.000000000 Z
17
+
18
+ date: 2015-06-17 00:00:00 Z
12
19
  dependencies: []
20
+
13
21
  description: A libcsv-based CSV parser for Ruby
14
- email:
22
+ email:
15
23
  - arthur@flyingtealeaf.com
16
24
  executables: []
17
- extensions:
25
+
26
+ extensions:
18
27
  - ext/rcsv/extconf.rb
19
28
  extra_rdoc_files: []
20
- files:
29
+
30
+ files:
21
31
  - .gitignore
22
32
  - .travis.yml
23
33
  - COPYING.LESSER
24
34
  - Gemfile
25
- - Gemfile.lock
26
35
  - LICENSE
27
36
  - README.md
28
37
  - RELNOTES
@@ -43,30 +52,39 @@ files:
43
52
  - test/test_rcsv_write.rb
44
53
  homepage: http://github.com/fiksu/rcsv
45
54
  licenses: []
46
- metadata: {}
55
+
47
56
  post_install_message:
48
57
  rdoc_options: []
49
- require_paths:
58
+
59
+ require_paths:
50
60
  - lib
51
61
  - ext
52
- required_ruby_version: !ruby/object:Gem::Requirement
53
- requirements:
54
- - - '>='
55
- - !ruby/object:Gem::Version
56
- version: '0'
57
- required_rubygems_version: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
62
80
  requirements: []
81
+
63
82
  rubyforge_project:
64
- rubygems_version: 2.2.2
83
+ rubygems_version: 1.8.24
65
84
  signing_key:
66
- specification_version: 4
67
- summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion,
68
- non-strict parsing and basic filtering.
69
- test_files:
85
+ specification_version: 3
86
+ summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion, non-strict parsing and basic filtering.
87
+ test_files:
70
88
  - test/test_rcsv.csv
71
89
  - test/test_rcsv_parse.rb
72
90
  - test/test_rcsv_raw_parse.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 0333dfa0687b5fff49a8102ddbea1368a8335919
4
- data.tar.gz: 0c9b534944b188a82c7734b30aac28bdf7f38f80
5
- SHA512:
6
- metadata.gz: 06e065ecb4755ec5acb43132b39672331ce4a5b7e52c656739d6e23b896c5930311232f6e1c7fa0b39357df6e41d138dcee9cb33b040c0e536c1f7f450f48f1a
7
- data.tar.gz: 9c46cedf27acf68b9248d8e9b2329f5162bb099bd67585f6ddc61d85d784b83e20a644649a8666ad5b57d074a1e015f7d42f1c58013f559b5e84d0495a1d6e44
@@ -1,18 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- rcsv (0.2.1)
5
-
6
- GEM
7
- remote: https://rubygems.org/
8
- specs:
9
- rake (0.9.2.2)
10
- rake-compiler (0.8.1)
11
- rake
12
-
13
- PLATFORMS
14
- ruby
15
-
16
- DEPENDENCIES
17
- rake-compiler
18
- rcsv!