bamfcsv 0.2.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +18 -14
- data/lib/bamfcsv/table.rb +14 -1
- data/lib/bamfcsv/version.rb +1 -1
- data/lib/bamfcsv.rb +6 -1
- data/spec/lib/bamfcsv_spec.rb +72 -1
- metadata +40 -48
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
#include <ruby/ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
2
3
|
#include <stdlib.h>
|
3
4
|
#include <stdio.h>
|
5
|
+
#include <stdbool.h>
|
4
6
|
|
5
7
|
VALUE BAMFCSV_module;
|
6
8
|
VALUE BAMFCSV_MalformedCSVError_class;
|
7
9
|
|
8
|
-
VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
|
10
|
+
VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count, rb_encoding *enc) {
|
9
11
|
if (*cell_end == '\r')
|
10
12
|
cell_end--;
|
11
13
|
|
@@ -17,12 +19,20 @@ VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
|
|
17
19
|
cell_end--;
|
18
20
|
}
|
19
21
|
|
20
|
-
VALUE cell_str =
|
22
|
+
VALUE cell_str = rb_enc_str_new(cell_start, cell_end-cell_start+1, enc);
|
21
23
|
|
22
24
|
return cell_str;
|
23
25
|
}
|
24
26
|
|
25
|
-
|
27
|
+
bool quotes_end_line(char* cur) {
|
28
|
+
return *(cur-1) == '"' || (*(cur-1) == '\r' && *(cur-2) == '"');
|
29
|
+
}
|
30
|
+
|
31
|
+
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
|
32
|
+
char *buf = RSTRING_PTR(string);
|
33
|
+
long bufsize = RSTRING_LEN(string);
|
34
|
+
rb_encoding *enc = rb_enc_from_index(ENCODING_GET(string));
|
35
|
+
|
26
36
|
unsigned long num_rows = 1, cell_count = 1;
|
27
37
|
int quote_count = 0, quotes_matched = 1;
|
28
38
|
|
@@ -57,7 +67,7 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
57
67
|
if (quote_count && *(cur-1) != '"')
|
58
68
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu.", num_rows, cell_count);
|
59
69
|
|
60
|
-
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
70
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
|
61
71
|
if (quote_count)
|
62
72
|
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
63
73
|
|
@@ -69,10 +79,10 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
69
79
|
|
70
80
|
} else if (*cur == '\n') {
|
71
81
|
|
72
|
-
if (quote_count && !(
|
82
|
+
if (quote_count && !quotes_end_line(cur))
|
73
83
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOL", num_rows, cell_count);
|
74
84
|
|
75
|
-
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
85
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
|
76
86
|
if (quote_count)
|
77
87
|
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
78
88
|
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
@@ -95,10 +105,10 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
95
105
|
|
96
106
|
if (!quotes_matched) /* Reached EOF without matching quotes */
|
97
107
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: File ends without closing '\"'", num_rows, cell_count);
|
98
|
-
else if (quote_count &&
|
108
|
+
else if (quote_count && !quotes_end_line(cur)) /* Quotes closed before end of final cell */
|
99
109
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOF", num_rows, cell_count);
|
100
110
|
|
101
|
-
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
111
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
|
102
112
|
if (quote_count)
|
103
113
|
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
104
114
|
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
@@ -110,12 +120,6 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
110
120
|
|
111
121
|
}
|
112
122
|
|
113
|
-
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
|
114
|
-
|
115
|
-
return bamfcsv_build_matrix(RSTRING_PTR(string), NUM2ULONG(rb_str_length(string)));
|
116
|
-
|
117
|
-
}
|
118
|
-
|
119
123
|
void Init_bamfcsv() {
|
120
124
|
|
121
125
|
BAMFCSV_module = rb_define_module("BAMFCSV");
|
data/lib/bamfcsv/table.rb
CHANGED
@@ -26,8 +26,12 @@ module BAMFCSV
|
|
26
26
|
@row_cache[idx] ||= Row.new(@header_map, @matrix[idx])
|
27
27
|
end
|
28
28
|
|
29
|
+
def empty?
|
30
|
+
@matrix.empty?
|
31
|
+
end
|
32
|
+
|
29
33
|
def inspect
|
30
|
-
"
|
34
|
+
"[#{self.map{|r| r.inspect}.join(", ")}]"
|
31
35
|
end
|
32
36
|
|
33
37
|
private
|
@@ -50,6 +54,15 @@ module BAMFCSV
|
|
50
54
|
def [](key)
|
51
55
|
@fields[@header_map[key]]
|
52
56
|
end
|
57
|
+
|
58
|
+
def inspect
|
59
|
+
pairs = []
|
60
|
+
headers.each do |h|
|
61
|
+
pairs << "#{h.inspect} => #{self[h].inspect}"
|
62
|
+
end
|
63
|
+
"{#{pairs.join(", ")}}"
|
64
|
+
end
|
65
|
+
|
53
66
|
end
|
54
67
|
end
|
55
68
|
end
|
data/lib/bamfcsv/version.rb
CHANGED
data/lib/bamfcsv.rb
CHANGED
@@ -9,7 +9,12 @@ module BAMFCSV
|
|
9
9
|
|
10
10
|
def self.parse(csv_str, opts={})
|
11
11
|
return [] if csv_str.empty?
|
12
|
-
|
12
|
+
# We need to do this because the C extension currently overwrites
|
13
|
+
# the input, and all of String#clone, String#dup, and String.new
|
14
|
+
# copy the pointer, not the contents. So we make a copy, parse
|
15
|
+
# that, and throw away the copy.
|
16
|
+
copy = "" + csv_str
|
17
|
+
matrix = __parse_string(copy)
|
13
18
|
if opts[:headers]
|
14
19
|
Table.new(matrix)
|
15
20
|
else
|
data/spec/lib/bamfcsv_spec.rb
CHANGED
@@ -56,7 +56,15 @@ describe BAMFCSV do
|
|
56
56
|
BAMFCSV.parse("1,2").should == [["1","2"]]
|
57
57
|
end
|
58
58
|
|
59
|
-
it
|
59
|
+
it "correctly parses the last cell when followed by CRLF" do
|
60
|
+
BAMFCSV.parse("1,2\r\n").should == [["1","2"]]
|
61
|
+
end
|
62
|
+
|
63
|
+
it "correctly parses the last cell when quotes and followed by CRLF" do
|
64
|
+
BAMFCSV.parse("1,\"2\"\r\n").should == [["1","2"]]
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'correctly escapes ""' do
|
60
68
|
BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
|
61
69
|
end
|
62
70
|
|
@@ -74,6 +82,19 @@ describe BAMFCSV do
|
|
74
82
|
BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
|
75
83
|
end
|
76
84
|
|
85
|
+
it "parses data outside the 7-bit range" do
|
86
|
+
BAMFCSV.parse("age \u226540 years").should == [["age \u226540 years"]]
|
87
|
+
end
|
88
|
+
|
89
|
+
it "doesn't alter the input" do
|
90
|
+
original = %Q{this,that,"the ""other"" thing"\r\n1,2,3\n}
|
91
|
+
# String#dup, String#clone, and String.new copy the pointer but
|
92
|
+
# share the same underlying buffer, d'oh!
|
93
|
+
input = "" + original
|
94
|
+
BAMFCSV.parse(input)
|
95
|
+
input.should == original
|
96
|
+
end
|
97
|
+
|
77
98
|
describe "default CSV module compatibility" do
|
78
99
|
it "adds a nil cell after a trailing comma with no newline" do
|
79
100
|
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
|
@@ -179,5 +200,55 @@ describe BAMFCSV do
|
|
179
200
|
expect { table.each }.should_not raise_error(LocalJumpError)
|
180
201
|
end
|
181
202
|
end
|
203
|
+
|
204
|
+
describe "Table#inspect" do
|
205
|
+
it "is an Array of Hashes" do
|
206
|
+
csv = <<CSV
|
207
|
+
foo,bar
|
208
|
+
1,2
|
209
|
+
baz,quux
|
210
|
+
CSV
|
211
|
+
|
212
|
+
inspected = '[{"foo" => "1", "bar" => "2"}, {"foo" => "baz", "bar" => "quux"}]'
|
213
|
+
|
214
|
+
BAMFCSV.parse(csv, :headers => true).inspect.should == inspected
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe "Table#empty?" do
|
219
|
+
it "returns true for an empty table with headers" do
|
220
|
+
table = BAMFCSV.parse("column1,column2\n", :headers => true)
|
221
|
+
table.empty?.should be_true
|
222
|
+
end
|
223
|
+
|
224
|
+
it "returns false for a non-empty table with headers" do
|
225
|
+
table = BAMFCSV.parse("column1,column2\nfoo,bar", :headers => true)
|
226
|
+
table.empty?.should be_false
|
227
|
+
end
|
228
|
+
|
229
|
+
it "returns true for an empty table without headers" do
|
230
|
+
table = BAMFCSV.parse("", :headers => false)
|
231
|
+
table.empty?.should be_true
|
232
|
+
end
|
233
|
+
|
234
|
+
it "returns false for a non-empty table without headers" do
|
235
|
+
table = BAMFCSV.parse("foo,bar", :headers => false)
|
236
|
+
table.empty?.should be_false
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe "Table::Row#inspect" do
|
241
|
+
it "is a Hash" do
|
242
|
+
csv = <<CSV
|
243
|
+
foo,bar
|
244
|
+
1,2
|
245
|
+
baz,quux
|
246
|
+
CSV
|
247
|
+
|
248
|
+
inspected = '{"foo" => "1", "bar" => "2"}'
|
249
|
+
|
250
|
+
BAMFCSV.parse(csv, :headers => true).first.inspect.should == inspected
|
251
|
+
end
|
252
|
+
end
|
182
253
|
end
|
183
254
|
end
|
metadata
CHANGED
@@ -1,63 +1,59 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bamfcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.1
|
4
5
|
prerelease:
|
5
|
-
version: 0.2.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Jon Distad
|
9
9
|
- Alex Redington
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
dependencies:
|
17
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2011-08-10 00:00:00.000000000Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
18
16
|
name: rspec
|
19
|
-
|
20
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &2157181100 !ruby/object:Gem::Requirement
|
21
18
|
none: false
|
22
|
-
requirements:
|
19
|
+
requirements:
|
23
20
|
- - ~>
|
24
|
-
- !ruby/object:Gem::Version
|
21
|
+
- !ruby/object:Gem::Version
|
25
22
|
version: 2.5.0
|
26
23
|
type: :development
|
27
|
-
version_requirements: *id001
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: fuubar
|
30
24
|
prerelease: false
|
31
|
-
|
25
|
+
version_requirements: *2157181100
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: fuubar
|
28
|
+
requirement: &2157180600 !ruby/object:Gem::Requirement
|
32
29
|
none: false
|
33
|
-
requirements:
|
30
|
+
requirements:
|
34
31
|
- - ~>
|
35
|
-
- !ruby/object:Gem::Version
|
32
|
+
- !ruby/object:Gem::Version
|
36
33
|
version: 0.0.2
|
37
34
|
type: :development
|
38
|
-
version_requirements: *id002
|
39
|
-
- !ruby/object:Gem::Dependency
|
40
|
-
name: rake-compiler
|
41
35
|
prerelease: false
|
42
|
-
|
36
|
+
version_requirements: *2157180600
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rake-compiler
|
39
|
+
requirement: &2157180140 !ruby/object:Gem::Requirement
|
43
40
|
none: false
|
44
|
-
requirements:
|
41
|
+
requirements:
|
45
42
|
- - ~>
|
46
|
-
- !ruby/object:Gem::Version
|
43
|
+
- !ruby/object:Gem::Version
|
47
44
|
version: 0.7.1
|
48
45
|
type: :development
|
49
|
-
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *2157180140
|
50
48
|
description: BAMFCSV parses csv like a BAMF. BAMF!!
|
51
|
-
email:
|
49
|
+
email:
|
52
50
|
- jon@thinkrelevance.com
|
53
51
|
- lovemachine@thinkrelevance.com
|
54
52
|
executables: []
|
55
|
-
|
56
|
-
extensions:
|
53
|
+
extensions:
|
57
54
|
- ext/bamfcsv/extconf.rb
|
58
55
|
extra_rdoc_files: []
|
59
|
-
|
60
|
-
files:
|
56
|
+
files:
|
61
57
|
- .gitignore
|
62
58
|
- .rspec
|
63
59
|
- Gemfile
|
@@ -81,36 +77,32 @@ files:
|
|
81
77
|
- spec/spec_helper.rb
|
82
78
|
- tasks/compile.rake
|
83
79
|
- tasks/rspec.rake
|
84
|
-
has_rdoc: true
|
85
80
|
homepage: https://github.com/jondistad/bamfcsv
|
86
81
|
licenses: []
|
87
|
-
|
88
82
|
post_install_message:
|
89
83
|
rdoc_options: []
|
90
|
-
|
91
|
-
require_paths:
|
84
|
+
require_paths:
|
92
85
|
- lib
|
93
86
|
- ext
|
94
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
88
|
none: false
|
96
|
-
requirements:
|
97
|
-
- -
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
version:
|
100
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
94
|
none: false
|
102
|
-
requirements:
|
103
|
-
- -
|
104
|
-
- !ruby/object:Gem::Version
|
105
|
-
version:
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
106
99
|
requirements: []
|
107
|
-
|
108
100
|
rubyforge_project: bamfcsv
|
109
|
-
rubygems_version: 1.
|
101
|
+
rubygems_version: 1.8.6
|
110
102
|
signing_key:
|
111
103
|
specification_version: 3
|
112
104
|
summary: BAMF!!! Your csv is parsed.
|
113
|
-
test_files:
|
105
|
+
test_files:
|
114
106
|
- spec/fixtures/bamf-comma-comma.csv
|
115
107
|
- spec/fixtures/double-quotes.csv
|
116
108
|
- spec/fixtures/empty.csv
|