rcsv 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/.travis.yml +4 -6
- data/Gemfile +1 -0
- data/README.md +12 -4
- data/RELNOTES +13 -0
- data/ext/rcsv/rcsv.c +9 -3
- data/lib/lib_csv.rb +8 -1
- data/lib/rcsv.rb +15 -5
- data/lib/rcsv/version.rb +1 -1
- data/rcsv.gemspec +3 -0
- data/test/test_rcsv_raw_parse.rb +16 -4
- data/test/test_rcsv_write.rb +35 -7
- metadata +44 -26
- checksums.yaml +0 -7
- data/Gemfile.lock +0 -18
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[](https://travis-ci.org/fiksu/rcsv)
|
4
4
|
|
5
|
-
Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE 1.8.7
|
5
|
+
Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2.
|
6
6
|
|
7
7
|
Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.0.3 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
|
8
8
|
|
@@ -114,6 +114,10 @@ A boolean flag. If enabled, only parses columns that are listed in :columns. Dis
|
|
114
114
|
An integer. Default is 1MiB (1024 * 1024).
|
115
115
|
Specifies a number of bytes that are read at once, thus allowing to read drectly from IO-like objects (files, sockets etc).
|
116
116
|
|
117
|
+
### :output_encoding
|
118
|
+
A string. By default is auto-detected from the original CSV file.
|
119
|
+
If specified, enforces the encoding of parsed string values. The default value keeps the encoding the same as in the original CSV file.
|
120
|
+
|
117
121
|
|
118
122
|
## Examples
|
119
123
|
|
@@ -164,10 +168,14 @@ That would display contents of each row without needing to put the whole parsed
|
|
164
168
|
|
165
169
|
This way it is possible to read from a File directly, with a 20MiB buffer and parse lines one by one:
|
166
170
|
|
167
|
-
|
171
|
+
some_csv_file = File.open('/some/file.csv')
|
172
|
+
|
173
|
+
Rcsv.parse(some_csv_file, :buffer_size => 20 * 1024 * 1024) { |row|
|
168
174
|
puts row.inspect
|
169
175
|
}
|
170
176
|
|
177
|
+
some_csv_file.close
|
178
|
+
|
171
179
|
|
172
180
|
## To do
|
173
181
|
|
@@ -187,5 +195,5 @@ This way it is possible to read from a File directly, with a 20MiB buffer and pa
|
|
187
195
|
|
188
196
|
## Credits
|
189
197
|
|
190
|
-
* Maintainer:
|
191
|
-
* Contributors: Edward Slavich @eslavich
|
198
|
+
* Maintainer: Artur Pyrogovskyi @arp
|
199
|
+
* Contributors: Edward Slavich @eslavich, Ivan Zarea @minivan, @97jaz, David Price @dprice, Andrew Grim @stopdropandrew
|
data/RELNOTES
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
Version 0.3.1
|
2
|
+
* Travis fixes
|
3
|
+
* Fixed older Ruby support in tests
|
4
|
+
|
5
|
+
Version 0.3.0
|
6
|
+
* changed nil to be rendered as empty string by writer (by David Price)
|
7
|
+
* changed the default writer newline character from "\r\n" to platform's default (by Andrew Grim)
|
8
|
+
* improved writer quoting (by Andrew Grim)
|
9
|
+
* added :quote_char writer option (by @97jaz)
|
10
|
+
* added :output_encoding reader option (by David Price)
|
11
|
+
* added automatic encoding detection by reader (by David Price)
|
12
|
+
* updated supported Rubies list to REE, 1.8.7, 1.9.4, 2.0.0, 2.1.6, 2.2.2
|
13
|
+
|
1
14
|
Version 0.2.1
|
2
15
|
* removed a bunch of deprecation warnings (by Ivan Zarea)
|
3
16
|
* added Ruby 2.1 support for Travis CI
|
data/ext/rcsv/rcsv.c
CHANGED
@@ -110,7 +110,7 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
|
|
110
110
|
if (meta->current_col < meta->num_row_conversions) {
|
111
111
|
switch (row_conversion){
|
112
112
|
case 's': /* String */
|
113
|
-
parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
|
113
|
+
parsed_field = ENCODED_STR_NEW(field_str, field_size, meta->encoding_index);
|
114
114
|
break;
|
115
115
|
case 'i': /* Integer */
|
116
116
|
parsed_field = LL2NUM(atoll(field_str));
|
@@ -327,6 +327,12 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
|
|
327
327
|
csv_set_delim(cp, (unsigned char)*StringValuePtr(option));
|
328
328
|
}
|
329
329
|
|
330
|
+
/* :quote_char sets the character used for quoting data; default is double-quote (") */
|
331
|
+
option = rb_hash_aref(options, ID2SYM(rb_intern("quote_char")));
|
332
|
+
if (option != Qnil) {
|
333
|
+
csv_set_quote(cp, (unsigned char)*StringValuePtr(option));
|
334
|
+
}
|
335
|
+
|
330
336
|
/* Specify how many rows to skip from the beginning of CSV */
|
331
337
|
option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
|
332
338
|
if (option != Qnil) {
|
@@ -382,7 +388,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
|
|
382
388
|
|
383
389
|
/* :row_conversions specifies Ruby types that CSV field values should be converted into.
|
384
390
|
Each char of row_conversions string represents Ruby type for CSV field with matching position. */
|
385
|
-
option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
|
391
|
+
option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
|
386
392
|
if (option != Qnil) {
|
387
393
|
meta->num_row_conversions = RSTRING_LEN(option);
|
388
394
|
meta->row_conversions = StringValuePtr(option);
|
@@ -390,7 +396,7 @@ VALUE rcsv_raw_parse(VALUE ensure_container) {
|
|
390
396
|
|
391
397
|
/* Column names should be declared explicitly when parsing fields as Hashes */
|
392
398
|
if (meta->row_as_hash) { /* Only matters for hash results */
|
393
|
-
option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
|
399
|
+
option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
|
394
400
|
if (option == Qnil) {
|
395
401
|
rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
|
396
402
|
} else {
|
data/lib/lib_csv.rb
CHANGED
@@ -34,6 +34,9 @@ class LibCsv
|
|
34
34
|
attach_function :csv_set_delim, [:pointer, :uchar], :void
|
35
35
|
attach_function :csv_get_delim, [:pointer], :uchar
|
36
36
|
|
37
|
+
attach_function :csv_set_quote, [:pointer, :uchar], :void
|
38
|
+
attach_function :csv_get_quote, [:pointer], :uchar
|
39
|
+
|
37
40
|
attach_function :csv_error, [:pointer], :int
|
38
41
|
attach_function :csv_strerror, [:int], :string
|
39
42
|
|
@@ -46,6 +49,10 @@ class LibCsv
|
|
46
49
|
csv_set_delim(parser, options[:col_sep].ord)
|
47
50
|
end
|
48
51
|
|
52
|
+
if options[:quote_char]
|
53
|
+
csv_set_quote(parser, options[:quote_char].ord)
|
54
|
+
end
|
55
|
+
|
49
56
|
fail "Couldn't initialize libcsv" if result == -1
|
50
57
|
|
51
58
|
result = [[]]
|
@@ -82,7 +89,7 @@ class LibCsv
|
|
82
89
|
csv_fini(parser, end_of_field_callback, end_of_record_callback, nil)
|
83
90
|
csv_free(parser)
|
84
91
|
result.pop if result.last == []
|
85
|
-
|
92
|
+
|
86
93
|
return result
|
87
94
|
end
|
88
95
|
end
|
data/lib/rcsv.rb
CHANGED
@@ -2,6 +2,7 @@ require "rcsv/rcsv"
|
|
2
2
|
require "rcsv/version"
|
3
3
|
|
4
4
|
require "stringio"
|
5
|
+
require "English"
|
5
6
|
|
6
7
|
class Rcsv
|
7
8
|
|
@@ -30,6 +31,7 @@ class Rcsv
|
|
30
31
|
raw_options = {}
|
31
32
|
|
32
33
|
raw_options[:col_sep] = options[:column_separator] && options[:column_separator][0] || ','
|
34
|
+
raw_options[:quote_char] = options[:quote_char] && options[:quote_char][0] || '"'
|
33
35
|
raw_options[:offset_rows] = options[:offset_rows] || 0
|
34
36
|
raw_options[:nostrict] = options[:nostrict]
|
35
37
|
raw_options[:parse_empty_fields_as] = options[:parse_empty_fields_as]
|
@@ -138,8 +140,16 @@ class Rcsv
|
|
138
140
|
def initialize(write_options = {})
|
139
141
|
@write_options = write_options
|
140
142
|
@write_options[:column_separator] ||= ','
|
141
|
-
@write_options[:newline_delimiter] ||=
|
143
|
+
@write_options[:newline_delimiter] ||= $INPUT_RECORD_SEPARATOR
|
142
144
|
@write_options[:header] ||= false
|
145
|
+
|
146
|
+
@quote = '"'
|
147
|
+
@escaped_quote = @quote * 2
|
148
|
+
@quotable_chars = Regexp.new('[%s%s%s]' % [
|
149
|
+
Regexp.escape(@write_options[:column_separator]),
|
150
|
+
Regexp.escape(@write_options[:newline_delimiter]),
|
151
|
+
Regexp.escape(@quote)
|
152
|
+
])
|
143
153
|
end
|
144
154
|
|
145
155
|
def write(io, &block)
|
@@ -161,9 +171,8 @@ class Rcsv
|
|
161
171
|
max_index = row.size - 1
|
162
172
|
|
163
173
|
row.each_with_index do |field, index|
|
164
|
-
unquoted_field = process(field, @write_options[:columns][index])
|
165
|
-
#
|
166
|
-
csv_row << (unquoted_field.match(/,/) ? "\"#{unquoted_field}\"" : unquoted_field)
|
174
|
+
unquoted_field = process(field, @write_options[:columns] && @write_options[:columns][index])
|
175
|
+
csv_row << (unquoted_field.match(@quotable_chars) ? "\"#{unquoted_field.gsub(@quote, @escaped_quote)}\"" : unquoted_field)
|
167
176
|
csv_row << column_separator unless index == max_index
|
168
177
|
end
|
169
178
|
|
@@ -173,7 +182,8 @@ class Rcsv
|
|
173
182
|
protected
|
174
183
|
|
175
184
|
def process(field, column_options)
|
176
|
-
return
|
185
|
+
return '' if field.nil?
|
186
|
+
return case column_options && column_options[:formatter]
|
177
187
|
when :strftime
|
178
188
|
format = column_options[:format] || "%Y-%m-%d %H:%M:%S %z"
|
179
189
|
field.strftime(format)
|
data/lib/rcsv/version.rb
CHANGED
data/rcsv.gemspec
CHANGED
data/test/test_rcsv_raw_parse.rb
CHANGED
@@ -32,17 +32,29 @@ class RcsvRawParseTest < Test::Unit::TestCase
|
|
32
32
|
assert_equal('""C81E-=; **ECCB; .. 89', raw_parsed_tsv_data[3][6])
|
33
33
|
assert_equal("Dallas\t TX", raw_parsed_tsv_data[888][13])
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
|
+
def test_rcsv_quote_char
|
37
|
+
csv = [
|
38
|
+
"F0A83489,69118080,,73,7008,2016-10-03,'''''C81E-=; **ECCB; .. 89','130,86',a3eb,1341-04-10,7612.699237971538,5b5e3fce-2ea5-4ca9-9749-90fd6dc8dd66,9,'Los Angeles, CA',e,6.047887837492023,f",
|
39
|
+
"48F4FAC9,11599213,,0,1897,2014-02-23,'''''ECCB-=; **A87F; .. 61','787,6',84de,1353-11-10,8078.704911344607,404d9d3e-963f-4199-a2f3-e71f6828b716,6,'Dallas, TX',,-6.684507609859605, 1"
|
40
|
+
]
|
41
|
+
csv_data = StringIO.new(csv.join("\n"))
|
42
|
+
raw_parsed_csv_data = Rcsv.raw_parse(csv_data, :quote_char => "'")
|
43
|
+
|
44
|
+
assert_equal("''C81E-=; **ECCB; .. 89", raw_parsed_csv_data[0][6])
|
45
|
+
assert_equal('Dallas, TX', raw_parsed_csv_data[1][13])
|
46
|
+
end
|
47
|
+
|
36
48
|
if String.instance_methods.include?(:encoding)
|
37
49
|
def test_rcsv_output_encoding_default
|
38
50
|
raw_parsed_csv_data = Rcsv.raw_parse(@csv_data)
|
39
|
-
|
51
|
+
|
40
52
|
assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::ASCII_8BIT)
|
41
53
|
end
|
42
54
|
|
43
55
|
def test_rcsv_output_encoding_utf8
|
44
56
|
raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :output_encoding => "UTF-8")
|
45
|
-
|
57
|
+
|
46
58
|
assert_equal(raw_parsed_csv_data[0][2].encoding, Encoding::UTF_8)
|
47
59
|
end
|
48
60
|
else
|
@@ -52,7 +64,7 @@ class RcsvRawParseTest < Test::Unit::TestCase
|
|
52
64
|
end
|
53
65
|
end
|
54
66
|
end
|
55
|
-
|
67
|
+
|
56
68
|
def test_buffer_size
|
57
69
|
raw_parsed_csv_data = Rcsv.raw_parse(@csv_data, :buffer_size => 10)
|
58
70
|
|
data/test/test_rcsv_write.rb
CHANGED
@@ -47,9 +47,10 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
47
47
|
}
|
48
48
|
|
49
49
|
@data = [
|
50
|
-
[1, Date.parse('2012-11-11'), 100.234, true, 1,
|
51
|
-
[
|
52
|
-
[3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop']
|
50
|
+
[1, Date.parse('2012-11-11'), 100.234, true, 1, true],
|
51
|
+
['elephant', Date.parse('1970-01-02'), -0.1, :nyancat, 123.8891, 0],
|
52
|
+
[3, Date.parse('2012-12-12'), 0, 'sepulka', -122, 'zoop'],
|
53
|
+
[nil, nil, nil, nil, nil, nil]
|
53
54
|
]
|
54
55
|
|
55
56
|
@writer = Rcsv.new(@options)
|
@@ -57,12 +58,12 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
57
58
|
|
58
59
|
def test_rcsv_generate_header
|
59
60
|
assert_equal(
|
60
|
-
"ID,Date,Money,Banana IDDQD,Hashformat,\
|
61
|
+
"ID,Date,Money,Banana IDDQD,Hashformat,\n", @writer.generate_header
|
61
62
|
)
|
62
63
|
end
|
63
64
|
|
64
65
|
def test_rscv_generate_row
|
65
|
-
assert_equal("1,2012-11-11,$100.23,true,$1.00,
|
66
|
+
assert_equal("1,2012-11-11,$100.23,true,$1.00,true\n", @writer.generate_row(@data.first))
|
66
67
|
end
|
67
68
|
|
68
69
|
def test_rcsv_write
|
@@ -75,7 +76,7 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
75
76
|
io.rewind
|
76
77
|
|
77
78
|
assert_equal(
|
78
|
-
"ID,Date,Money,Banana IDDQD,Hashformat,\
|
79
|
+
"ID,Date,Money,Banana IDDQD,Hashformat,\n1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
|
79
80
|
)
|
80
81
|
end
|
81
82
|
|
@@ -90,7 +91,34 @@ class RcsvWriteTest < Test::Unit::TestCase
|
|
90
91
|
io.rewind
|
91
92
|
|
92
93
|
assert_equal(
|
93
|
-
"1,2012-11-11,$100.23,true,$1.00,
|
94
|
+
"1,2012-11-11,$100.23,true,$1.00,true\nelephant,1970-01-02,$-0.10,nyancat,$123.89,false\n3,2012-12-12,$0.00,sepulka,$-122.00,true\n,,,,,\n", io.read
|
94
95
|
)
|
95
96
|
end
|
97
|
+
|
98
|
+
def test_generate_row__dont_require_columns
|
99
|
+
writer = Rcsv.new
|
100
|
+
assert_equal "1,2,3\n", writer.generate_row([1, 2, 3])
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_generate_row__proper_escaping_for_quotes_and_newlines
|
104
|
+
writer = Rcsv.new
|
105
|
+
assert_equal "\"before quote \"\" after quote\",\"before newline \n after newline\"\n",
|
106
|
+
writer.generate_row(["before quote \" after quote", "before newline \n after newline"])
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_generate_row__should_be_able_to_parse_generated_csv
|
110
|
+
writer = Rcsv.new
|
111
|
+
quotable_strings = [
|
112
|
+
"before quote \" after quote",
|
113
|
+
"before newline \n after newline",
|
114
|
+
"before separator , after separator",
|
115
|
+
"separator , and quote \" oh my"
|
116
|
+
]
|
117
|
+
assert_equal [quotable_strings], Rcsv.parse(writer.generate_row(quotable_strings), :header => :none)
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_generate_row__should_handle_alternate_column_separators
|
121
|
+
writer = Rcsv.new(:column_separator => '|')
|
122
|
+
assert_equal "1|2|\"before pipe | after pipe\"\n", writer.generate_row([1, 2, 'before pipe | after pipe'])
|
123
|
+
end
|
96
124
|
end
|
metadata
CHANGED
@@ -1,28 +1,37 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 17
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
5
11
|
platform: ruby
|
6
|
-
authors:
|
12
|
+
authors:
|
7
13
|
- Arthur Pirogovski
|
8
14
|
autorequire:
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
|
-
|
17
|
+
|
18
|
+
date: 2015-06-17 00:00:00 Z
|
12
19
|
dependencies: []
|
20
|
+
|
13
21
|
description: A libcsv-based CSV parser for Ruby
|
14
|
-
email:
|
22
|
+
email:
|
15
23
|
- arthur@flyingtealeaf.com
|
16
24
|
executables: []
|
17
|
-
|
25
|
+
|
26
|
+
extensions:
|
18
27
|
- ext/rcsv/extconf.rb
|
19
28
|
extra_rdoc_files: []
|
20
|
-
|
29
|
+
|
30
|
+
files:
|
21
31
|
- .gitignore
|
22
32
|
- .travis.yml
|
23
33
|
- COPYING.LESSER
|
24
34
|
- Gemfile
|
25
|
-
- Gemfile.lock
|
26
35
|
- LICENSE
|
27
36
|
- README.md
|
28
37
|
- RELNOTES
|
@@ -43,30 +52,39 @@ files:
|
|
43
52
|
- test/test_rcsv_write.rb
|
44
53
|
homepage: http://github.com/fiksu/rcsv
|
45
54
|
licenses: []
|
46
|
-
|
55
|
+
|
47
56
|
post_install_message:
|
48
57
|
rdoc_options: []
|
49
|
-
|
58
|
+
|
59
|
+
require_paths:
|
50
60
|
- lib
|
51
61
|
- ext
|
52
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
62
80
|
requirements: []
|
81
|
+
|
63
82
|
rubyforge_project:
|
64
|
-
rubygems_version:
|
83
|
+
rubygems_version: 1.8.24
|
65
84
|
signing_key:
|
66
|
-
specification_version:
|
67
|
-
summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion,
|
68
|
-
|
69
|
-
test_files:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Fast CSV parsing library for MRI based on libcsv. Supports type conversion, non-strict parsing and basic filtering.
|
87
|
+
test_files:
|
70
88
|
- test/test_rcsv.csv
|
71
89
|
- test/test_rcsv_parse.rb
|
72
90
|
- test/test_rcsv_raw_parse.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 0333dfa0687b5fff49a8102ddbea1368a8335919
|
4
|
-
data.tar.gz: 0c9b534944b188a82c7734b30aac28bdf7f38f80
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 06e065ecb4755ec5acb43132b39672331ce4a5b7e52c656739d6e23b896c5930311232f6e1c7fa0b39357df6e41d138dcee9cb33b040c0e536c1f7f450f48f1a
|
7
|
-
data.tar.gz: 9c46cedf27acf68b9248d8e9b2329f5162bb099bd67585f6ddc61d85d784b83e20a644649a8666ad5b57d074a1e015f7d42f1c58013f559b5e84d0495a1d6e44
|