rcsv 0.2.1 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/.travis.yml +4 -6
- data/Gemfile +1 -0
- data/README.md +12 -4
- data/RELNOTES +13 -0
- data/ext/rcsv/rcsv.c +9 -3
- data/lib/lib_csv.rb +8 -1
- data/lib/rcsv.rb +15 -5
- data/lib/rcsv/version.rb +1 -1
- data/rcsv.gemspec +3 -0
- data/test/test_rcsv_raw_parse.rb +16 -4
- data/test/test_rcsv_write.rb +35 -7
- metadata +44 -26
- checksums.yaml +0 -7
- data/Gemfile.lock +0 -18
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/fiksu/rcsv.png)](https://travis-ci.org/fiksu/rcsv)
|
4
4
|
|
5
|
-
Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE 1.8.7
|
5
|
+
Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2.
|
6
6
|
|
7
7
|
Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.0.3 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
|
8
8
|
|
@@ -114,6 +114,10 @@ A boolean flag. If enabled, only parses columns that are listed in :columns. Dis
|
|
114
114
|
An integer. Default is 1MiB (1024 * 1024).
|
115
115
|
Specifies a number of bytes that are read at once, thus allowing to read drectly from IO-like objects (files, sockets etc).
|
116
116
|
|
117
|
+
### :output_encoding
|
118
|
+
A string. By default is auto-detected from the original CSV file.
|
119
|
+
If specified, enforces the encoding of parsed string values. The default value keeps the encoding the same as in the original CSV file.
|
120
|
+
|
117
121
|
|
118
122
|
## Examples
|
119
123
|
|
@@ -164,10 +168,14 @@ That would display contents of each row without needing to put the whole parsed
|
|
164
168
|
|
165
169
|
This way it is possible to read from a File directly, with a 20MiB buffer and parse lines one by one:
|
166
170
|
|
167
|
-
|
171
|
+
some_csv_file = File.open('/some/file.csv')
|
172
|
+
|
173
|
+
Rcsv.parse(some_csv_file, :buffer_size => 20 * 1024 * 1024) { |row|
|
168
174
|
puts row.inspect
|
169
175
|
}
|
170
176
|
|
177
|
+
some_csv_file.close
|
178
|
+
|
171
179
|
|
172
180
|
## To do
|
173
181
|
|
@@ -187,5 +195,5 @@ This way it is possible to read from a File directly, with a 20MiB buffer and pa
|
|
187
195
|
|
188
196
|
## Credits
|
189
197
|
|
190
|
-
* Maintainer:
|
191
|
-
* Contributors: Edward Slavich @eslavich
|
198
|
+
* Maintainer: Artur Pyrogovskyi @arp
|
199
|
+
* Contributors: Edward Slavich @eslavich, Ivan Zarea @minivan, @97jaz, David Price @dprice, Andrew Grim @stopdropandrew
|
data/RELNOTES
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
Version 0.3.1
|
2
|
+
* Travis fixes
|
3
|
+
* Fixed older Ruby support in tests
|
4
|
+
|
5
|
+
Version 0.3.0
|
6
|
+
* changed nil to be rendered as empty string by writer (by David Price)
|
7
|
+
* changed the default writer newline character from "\r\n" to platform's default (by Andrew Grim)
|
8
|
+
* improved writer quoting (by Andrew Grim)
|
9
|
+
* added :quote_char writer option (by @97jaz)
|
10
|
+
* added :output_encoding reader option (by David Price)
|
11
|
+
* added automatic encoding detection by reader (by David Price)
|
12
|
+
* updated supported Rubies list to REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2
|
13
|
+
|
1
14
|
Version 0.2.1
|
2
15
|
* removed a bunch of deprecation warnings (by Ivan Zarea)
|
3
16
|
* added Ruby 2.1 support for Travis CI
|
data/ext/rcsv/rcsv.c
CHANGED
@@ -110,7 +110,7 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
|
|
110
110
|
if (meta->current_col < meta->num_row_conversions) {
|
111
111
|
switch (row_conversion){
|
112
112
|
case 's': /* String */
|
113
|
-
parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
|
113
|
+
parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
|
114
114
|
break;
|
115
115
|
case 'i': /* Integer */
|
116
116
|
parsed_field = LL2NUM(atoll(field_str));
|
@@ -327,6 +327,12 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
|
|
327
327
|
csv_set_delim(cp, (unsigned char)*StringValuePtr(option));
|
328
328
|
}
|
329
329
|
|
330
|
+
/* :quote_char sets the character used for quoting data; default is double-quote (") */
|
331
|
+
option = rb_hash_aref(options, ID2SYM(rb_intern("quote_char")));
|
332
|
+
if (option != Qnil) {
|
333
|
+
csv_set_quote(cp, (unsigned char)*StringValuePtr(option));
|
334
|
+
}
|
335
|
+
|
330
336
|
/* Specify how many rows to skip from the beginning of CSV */
|
331
337
|
option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
|
332
338
|
if (option != Qnil) {
|
@@ -382,7 +388,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
|
|
382
388
|
|
383
389
|
/* :row_conversions specifies Ruby types that CSV field values should be converted into.
|
384
390
|
Each char of row_conversions string represents Ruby type for CSV field with matching position. */
|
385
|
-
option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
|
391
|
+
option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
|
386
392
|
if (option != Qnil) {
|
387
393
|
meta->num_row_conversions = RSTRING_LEN(option);
|
388
394
|
meta->row_conversions = StringValuePtr(option);
|
@@ -390,7 +396,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
|
|
390
396
|
|
391
397
|
/* Column names should be declared explicitly when parsing fields as Hashes */
|
392
398
|
if (meta->row_as_hash) { /* Only matters for hash results */
|
393
|
-
option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
|
399
|
+
option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
|
394
400
|
if (option == Qnil) {
|
395
401
|
rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
|
396
402
|
} else {
|
data/lib/lib_csv.rb
CHANGED
@@ -34,6 +34,9 @@ class LibCsv
|
|
34
34
|
attach_function :csv_set_delim, [:pointer, :uchar], :void
|
35
35
|
attach_function :csv_get_delim, [:pointer], :uchar
|
36
36
|
|
37
|
+
attach_function :csv_set_quote, [:pointer, :uchar], :void
|
38
|
+
attach_function :csv_get_quote, [:pointer], :uchar
|
39
|
+
|
37
40
|
attach_function :csv_error, [:pointer], :int
|
38
41
|
attach_function :csv_strerror, [:int], :string
|
39
42
|
|
@@ -46,6 +49,10 @@ class LibCsv
|
|
46
49
|
csv_set_delim(parser, options[:col_sep].ord)
|
47
50
|
end
|
48
51
|
|
52
|
+
if options[:quote_char]
|
53
|
+
csv_set_quote(parser, options[:quote_char].ord)
|
54
|
+
end
|
55
|
+
|
49
56
|
fail "Couldn't initialize libcsv" if result == -1
|
50
57
|
|
51
58
|
result = [[]]
|
@@ -82,7 +89,7 @@ class LibCsv
|
|
82
89
|
csv_fini(parser, end_of_field_callback, end_of_record_callback, nil)
|
83
90
|
csv_free(parser)
|
84
91
|
result.pop if result.last == []
|
85
|
-
|
92
|
+
|
86
93
|
return result
|
87
94
|
end
|
88
95
|
end
|
data/lib/rcsv.rb
CHANGED
@@ -2,6 +2,7 @@ require "rcsv/rcsv"
|
|
2
2
|
require "rcsv/version"
|
3
3
|
|
4
4
|
require "stringio"
|
5
|
+
require "English"
|
5
6
|
|
6
7
|
class Rcsv
|
7
8
|
|
@@ -30,6 +31,7 @@ class Rcsv
|
|
30
31
|
raw_options = {}
|
31
32
|
|
32
33
|
raw_options[:col_sep] = options[:column_separator] && options[:column_separator][0] || ','
|
34
|
+
raw_options[:quote_char] = options[:quote_char] && options[:quote_char][0] || '"'
|
33
35
|
raw_options[:offset_rows] = options[:offset_rows] || 0
|
34
36
|
raw_options[:nostrict] = options[:nostrict]
|
35
37
|
raw_options[:parse_empty_fields_as] = options[:parse_empty_fields_as]
|
@@ -138,8 +140,16 @@ class Rcsv
|
|
138
140
|
def initialize(write_options = {})
|
139
141
|
@write_options = write_options
|
140
142
|
@write_options[:column_separator] ||= ','
|
141
|
-
@write_options[:newline_delimiter] ||=
|
143
|
+
@write_options[:newline_delimiter] ||= $INPUT_RECORD_SEPARATOR
|
142
144
|
@write_options[:header] ||= false
|
145
|
+
|
146
|
+
@quote = '"'
|
147
|
+
@escaped_quote = @quote * 2
|
148
|
+
@quotable_chars = Regexp.new('[%s%s%s]' % [
|
149
|
+
Regexp.escape(@write_options[:column_separator]),
|
150
|
+
Regexp.escape(@write_options[:newline_delimiter]),
|
151
|
+
Regexp.escape(@quote)
|
152
|
+
])
|
143
153
|
end
|
144
154
|
|
145
155
|
def write(io, &block)
|
@@ -161,9 +171,8 @@ class Rcsv
|
|
161
171
|
max_index = row.size - 1
|
162
172
|
|
163
173
|
row.each_with_index do |field, index|
|
164
|
-
unquoted_field = process(field, @write_options[:columns][index])
|
165
|
-
#
|
166
|
-
csv_row << (unquoted_field.match(/,/) ? "\"#{unquoted_field}\"" : unquoted_field)
|
174
|
+
unquoted_field = process(field, @write_options[:columns] && @write_options[:columns][index])
|
175
|
+
csv_row << (unquoted_field.match(@quotable_chars) ? "\"#{unquoted_field.gsub(@quote, @escaped_quote)}\"" : unquoted_field)
|
167
176
|
csv_row << column_separator unless index == max_index
|
168
177
|
end
|
169
178
|
|
@@ -173,7 +182,8 @@ class Rcsv
|
|
173
182
|
protected
|
174
183
|
|
175
184
|
def process(field, column_options)
|
176
|
-
return
|
185
|
+
return '' if field.nil?
|
186
|
+
return case column_options && column_options[:formatter]
|
177
187
|
when :strftime
|
178
188
|
format = column_options[:format] || "%Y-%m-%d %H:%M:%S %z"
|
179
189
|
field.strftime(format)
|
data/lib/rcsv/version.rb
CHANGED
data/rcsv.gemspec
CHANGED
data/test/test_rcsv_raw_parse.rb
CHANGED
@@ -32,17 +32,29 @@ class RcsvRawParseTest < Test::Unit::TestCase
|
|
32
32
|
assert_equal('""C81E-=; **ECCB; .. 89', raw_parsed_tsv_data[3][6])
|
33
33
|
assert_equal("Dallas\t TX", raw_parsed_tsv_data[888][13])
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
|
+
def test_rcsv_quote_char
|
37
|
+
csv = [
|
38
|
+
"F0A83489,69118080,,73,7008,2016-10-03,'''''C81E-=; **ECCB; .. 89','130,86',a3eb,1341-04-10,7612.699237971538,5b5e3fce-2ea5-4ca9-9749-90fd6dc8dd66,9,'Los Angeles, CA',e,6.047887837492023,f",
|
39
|
+
"48F4FAC9,11599213,,0,1897,2014-02-23,'''''ECCB-=; **A87F; .. 61','787,6',84de,1353-11-10,8078.704911344607,404d9d3e-963f-4199-a2f3-e71f6828b716,6,'Dallas, TX',,-6.684507609859605, 1"
|
40
|
+
]
|
41
|
+
csv_data = StringIO.new(csv.join("\n"))
|
42
|
+
raw_parsed_csv_data = Rcsv.raw_parse(csv_data, :quote_char => "'")
|
43
|
+
|
44
|
+
assert_equal("''C81E-=; **ECCB; .. 89", raw_parsed_csv_data[0][6])
|
45
|
+
assert_equal('Dallas, TX', raw_parsed_csv_data[1][13])
|
46
|
+
end
|
47
|
+
|
36
48
|
if String.instance_methods.include?(:encoding)
|
37
49
|
def test_rcsv_output_encoding_default
|
38
50
|
raw_parsed_csv_data = Rcsv.raw_parse(@csv_data)
|
39
|
-
|
51
|
+
|
40
52
|
assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::ASCII_8BIT)
|
41
53
|
end
|
42
54
|
|
43
55
|
def test_rcsv_output_encoding_utf8
|
44
56
|
raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :output_encoding => "UTF-8")
|
45
|
-
|
57
|
+
|
46
58
|
assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::UTF_8)
|
47
59
|
end
|
48
60
|
else
|
@@ -52,7 +64,7 @@ class RcsvRawParseTest < Test::Unit::TestCase
|
|
52
64
|
end
|
53
65
|
end
|
54
66
|
end
|
55
|
-
|
67
|
+
|
56
68
|
def test_buffer_size
|
57
69
|
raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :buffer_size => 10)
|
58
70
|
|
data/test/test_rcsv_write.rb
CHANGED
@@ -47,9 +47,10 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
47
47
|
}
|
48
48
|
|
49
49
|
@data = [
|
50
|
-
[1, Date.parse('2012-11-11'), 100.234, true, 1,
|
51
|
-
[
|
52
|
-
[3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop']
|
50
|
+
[1, Date.parse('2012-11-11'), 100.234, true, 1, true],
|
51
|
+
['elephant', Date.parse('1970-01-02'), -0.1, :nyancat, 123.8891, 0],
|
52
|
+
[3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop'],
|
53
|
+
[nil, nil, nil, nil, nil, nil]
|
53
54
|
]
|
54
55
|
|
55
56
|
@writer = Rcsv.new(@options)
|
@@ -57,12 +58,12 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
57
58
|
|
58
59
|
def test_rcsv_generate_header
|
59
60
|
assert_equal(
|
60
|
-
"ID,Date,Money,Banana IDDQD,Hashformat,\
|
61
|
+
"ID,Date,Money,Banana IDDQD,Hashformat,\n", @writer.generate_header
|
61
62
|
)
|
62
63
|
end
|
63
64
|
|
64
65
|
def test_rscv_generate_row
|
65
|
-
assert_equal("1,2012-11-11,$100.23,true,$1.00,
|
66
|
+
assert_equal("1,2012-11-11,$100.23,true,$1.00,true\n", @writer.generate_row(@data.first))
|
66
67
|
end
|
67
68
|
|
68
69
|
def test_rcsv_write
|
@@ -75,7 +76,7 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
75
76
|
io.rewind
|
76
77
|
|
77
78
|
assert_equal(
|
78
|
-
"ID,Date,Money,Banana IDDQD,Hashformat,\
|
79
|
+
"ID,Date,Money,Banana IDDQD,Hashformat,\n1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
|
79
80
|
)
|
80
81
|
end
|
81
82
|
|
@@ -90,7 +91,34 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
90
91
|
io.rewind
|
91
92
|
|
92
93
|
assert_equal(
|
93
|
-
"1,2012-11-11,$100.23,true,$1.00,
|
94
|
+
"1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
|
94
95
|
)
|
95
96
|
end
|
97
|
+
|
98
|
+
def test_generate_row__dont_require_columns
|
99
|
+
writer = Rcsv.new
|
100
|
+
assert_equal "1,2,3\n", writer.generate_row([1, 2, 3])
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_generate_row__proper_escaping_for_quotes_and_newlines
|
104
|
+
writer = Rcsv.new
|
105
|
+
assert_equal "\"before quote \"\" after quote\",\"before newline \n after newline\"\n",
|
106
|
+
writer.generate_row(["before quote \" after quote", "before newline \n after newline"])
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_generate_row__should_be_able_to_parse_generated_csv
|
110
|
+
writer = Rcsv.new
|
111
|
+
quotable_strings = [
|
112
|
+
"before quote \" after quote",
|
113
|
+
"before newline \n after newline",
|
114
|
+
"before separator , after separator",
|
115
|
+
"separator , and quote \" oh my"
|
116
|
+
]
|
117
|
+
assert_equal [quotable_strings], Rcsv.parse(writer.generate_row(quotable_strings), :header => :none)
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_generate_row__should_handle_alternate_column_separators
|
121
|
+
writer = Rcsv.new(:column_separator => '|')
|
122
|
+
assert_equal "1|2|\"before pipe | after pipe\"\n", writer.generate_row([1, 2, 'before pipe | after pipe'])
|
123
|
+
end
|
96
124
|
end
|
metadata
CHANGED
@@ -1,28 +1,37 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 17
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
5
11
|
platform: ruby
|
6
|
-
authors:
|
12
|
+
authors:
|
7
13
|
- Arthur Pirogovski
|
8
14
|
autorequire:
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
|
-
|
17
|
+
|
18
|
+
date: 2015-06-17 00:00:00 Z
|
12
19
|
dependencies: []
|
20
|
+
|
13
21
|
description: A libcsv-based CSV parser for Ruby
|
14
|
-
email:
|
22
|
+
email:
|
15
23
|
- arthur@flyingtealeaf.com
|
16
24
|
executables: []
|
17
|
-
|
25
|
+
|
26
|
+
extensions:
|
18
27
|
- ext/rcsv/extconf.rb
|
19
28
|
extra_rdoc_files: []
|
20
|
-
|
29
|
+
|
30
|
+
files:
|
21
31
|
- .gitignore
|
22
32
|
- .travis.yml
|
23
33
|
- COPYING.LESSER
|
24
34
|
- Gemfile
|
25
|
-
- Gemfile.lock
|
26
35
|
- LICENSE
|
27
36
|
- README.md
|
28
37
|
- RELNOTES
|
@@ -43,30 +52,39 @@ files:
|
|
43
52
|
- test/test_rcsv_write.rb
|
44
53
|
homepage: http://github.com/fiksu/rcsv
|
45
54
|
licenses: []
|
46
|
-
|
55
|
+
|
47
56
|
post_install_message:
|
48
57
|
rdoc_options: []
|
49
|
-
|
58
|
+
|
59
|
+
require_paths:
|
50
60
|
- lib
|
51
61
|
- ext
|
52
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
62
80
|
requirements: []
|
81
|
+
|
63
82
|
rubyforge_project:
|
64
|
-
rubygems_version:
|
83
|
+
rubygems_version: 1.8.24
|
65
84
|
signing_key:
|
66
|
-
specification_version:
|
67
|
-
summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion,
|
68
|
-
|
69
|
-
test_files:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion, non-strict parsing and basic filtering.
|
87
|
+
test_files:
|
70
88
|
- test/test_rcsv.csv
|
71
89
|
- test/test_rcsv_parse.rb
|
72
90
|
- test/test_rcsv_raw_parse.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 0333dfa0687b5fff49a8102ddbea1368a8335919
|
4
|
-
data.tar.gz: 0c9b534944b188a82c7734b30aac28bdf7f38f80
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 06e065ecb4755ec5acb43132b39672331ce4a5b7e52c656739d6e23b896c5930311232f6e1c7fa0b39357df6e41d138dcee9cb33b040c0e536c1f7f450f48f1a
|
7
|
-
data.tar.gz: 9c46cedf27acf68b9248d8e9b2329f5162bb099bd67585f6ddc61d85d784b83e20a644649a8666ad5b57d074a1e015f7d42f1c58013f559b5e84d0495a1d6e44
|