bamfcsv 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +48 -42
- data/ext/bamfcsv/bamfcsv_ext.h +5 -0
- data/lib/bamfcsv/version.rb +1 -1
- data/lib/bamfcsv.rb +5 -5
- data/spec/lib/bamfcsv_spec.rb +72 -2
- metadata +55 -40
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,7 +1,4 @@
|
|
1
1
|
#include <stdlib.h>
|
2
|
-
#include <ruby/ruby.h>
|
3
|
-
#include <fcntl.h>
|
4
|
-
#include <sys/mman.h>
|
5
2
|
#include "bamfcsv_ext.h"
|
6
3
|
|
7
4
|
struct s_Row *alloc_row() {
|
@@ -71,10 +68,14 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
71
68
|
rb_ary_store(matrix,i,row);
|
72
69
|
for (j = 0; j < cur_row->cell_count; j++) {
|
73
70
|
if (*(cur_cell->start) == '"'
|
74
|
-
&& *((cur_cell->start)+(
|
75
|
-
new_string = rb_str_new(cur_cell->start+
|
76
|
-
else
|
77
|
-
|
71
|
+
&& *((cur_cell->start)+(cur_cell->len-1)) == '"')
|
72
|
+
new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
|
73
|
+
else {
|
74
|
+
if (cur_cell->len)
|
75
|
+
new_string = rb_str_new(cur_cell->start, cur_cell->len);
|
76
|
+
else
|
77
|
+
new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
|
78
|
+
}
|
78
79
|
if (cur_cell->has_quotes) {
|
79
80
|
rb_funcall(new_string, gsub, 2, dquote, quote);
|
80
81
|
}
|
@@ -87,17 +88,19 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
87
88
|
return matrix;
|
88
89
|
}
|
89
90
|
|
90
|
-
void finalize_cell(struct s_Cell *cell, char *cur) {
|
91
|
-
if (*(cur-
|
92
|
-
cell->len = cur-(cell->start)-
|
91
|
+
void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
|
92
|
+
if (*(cur-1) == '\r')
|
93
|
+
cell->len = cur-(cell->start)-1;
|
93
94
|
else
|
94
95
|
cell->len = cur-(cell->start);
|
96
|
+
|
97
|
+
if (quote_count) cell->has_quotes = 1;
|
95
98
|
}
|
96
99
|
|
97
100
|
VALUE build_matrix(char *buf, int bufsize) {
|
98
101
|
int str_start = 0;
|
99
102
|
int num_rows = 1;
|
100
|
-
int
|
103
|
+
int quote_count = 0, quotes_matched = 1;
|
101
104
|
|
102
105
|
struct s_Row *first_row = alloc_row();
|
103
106
|
struct s_Row *cur_row = first_row;
|
@@ -112,33 +115,43 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
112
115
|
for (cur = buf; cur < buf+bufsize; cur++) {
|
113
116
|
|
114
117
|
if (*cur == '"') {
|
115
|
-
if (
|
116
|
-
|
117
|
-
|
118
|
-
|
118
|
+
if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
|
119
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count+1);
|
120
|
+
else
|
121
|
+
++quote_count;
|
119
122
|
}
|
120
123
|
|
121
|
-
|
124
|
+
quotes_matched = !(quote_count & 1); /* count is even */
|
125
|
+
|
126
|
+
if (quotes_matched) {
|
122
127
|
|
123
128
|
if (*cur == ',') {
|
124
129
|
|
125
|
-
|
130
|
+
if (quote_count && *(cur-1) != '"')
|
131
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count+1);
|
132
|
+
|
133
|
+
finalize_cell(cur_cell,cur,quote_count);
|
126
134
|
cur_cell->next_cell = alloc_cell();
|
127
135
|
cur_cell = cur_cell->next_cell;
|
128
|
-
cur_cell->start = cur+
|
136
|
+
cur_cell->start = cur+1;
|
129
137
|
cur_row->cell_count += 1;
|
138
|
+
quote_count = 0;
|
130
139
|
|
131
140
|
}
|
132
141
|
|
133
142
|
if (*cur == '\n') {
|
134
143
|
|
135
|
-
|
144
|
+
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
|
145
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count+1);
|
146
|
+
|
147
|
+
finalize_cell(cur_cell,cur,quote_count);
|
136
148
|
cur_row->cell_count += 1;
|
137
149
|
cur_row->next_row = alloc_row();
|
138
150
|
cur_row = cur_row -> next_row;
|
139
151
|
cur_row->first_cell = alloc_cell();
|
140
152
|
cur_cell = cur_row->first_cell;
|
141
|
-
cur_cell->start = cur+
|
153
|
+
cur_cell->start = cur+1;
|
154
|
+
quote_count = 0;
|
142
155
|
|
143
156
|
num_rows++;
|
144
157
|
|
@@ -148,8 +161,16 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
148
161
|
|
149
162
|
}
|
150
163
|
|
151
|
-
if (
|
164
|
+
if (!quotes_matched) /* Reached EOF without matching quotes */
|
165
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count+1);
|
166
|
+
else if (quote_count && *cur != '"')
|
167
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count+1);
|
168
|
+
|
169
|
+
if (cur_row->cell_count == 0) { /* Ended with newline */
|
152
170
|
num_rows--;
|
171
|
+
} else { /* No newline before EOF */
|
172
|
+
finalize_cell(cur_cell, cur, quote_count);
|
173
|
+
cur_row->cell_count++;
|
153
174
|
}
|
154
175
|
|
155
176
|
matrix = build_matrix_from_pointer_tree(first_row, num_rows);
|
@@ -160,32 +181,17 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
160
181
|
|
161
182
|
}
|
162
183
|
|
163
|
-
VALUE
|
164
|
-
|
165
|
-
char *mmapped_csv;
|
166
|
-
int filesize, csv;
|
167
|
-
|
168
|
-
csv = open(file, O_RDONLY);
|
169
|
-
filesize = lseek(csv, 0, SEEK_END);
|
170
|
-
mmapped_csv = (char*) mmap(0, filesize, PROT_READ, MAP_SHARED, csv, 0);
|
171
|
-
|
172
|
-
VALUE matrix = build_matrix(mmapped_csv,filesize);
|
173
|
-
|
174
|
-
munmap(mmapped_csv, filesize);
|
175
|
-
close(csv);
|
176
|
-
|
177
|
-
return matrix;
|
178
|
-
}
|
179
|
-
|
180
|
-
VALUE read_path(VALUE self, VALUE file) {
|
184
|
+
VALUE parse_string(VALUE self, VALUE string) {
|
181
185
|
|
182
|
-
return
|
186
|
+
return build_matrix(RSTRING_PTR(string), NUM2INT(rb_str_length(string)));
|
183
187
|
|
184
188
|
}
|
185
189
|
|
186
190
|
void Init_bamfcsv() {
|
187
191
|
|
188
|
-
|
189
|
-
|
192
|
+
BAMFCSV_module = rb_define_module("BAMFCSV");
|
193
|
+
VALUE bamfcsv_singleton_class = rb_singleton_class(BAMFCSV_module);
|
194
|
+
rb_define_private_method(bamfcsv_singleton_class, "__parse_string", parse_string, 1);
|
190
195
|
|
196
|
+
BAMFCSV_MalformedCSVError_class = rb_define_class_under(BAMFCSV_module, "MalformedCSVError", rb_eRuntimeError);
|
191
197
|
}
|
data/ext/bamfcsv/bamfcsv_ext.h
CHANGED
data/lib/bamfcsv/version.rb
CHANGED
data/lib/bamfcsv.rb
CHANGED
@@ -3,11 +3,11 @@ require 'bamfcsv/bamfcsv'
|
|
3
3
|
module BAMFCSV
|
4
4
|
|
5
5
|
def self.read(thing_to_read)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
__parse_string(File.read(thing_to_read))
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.parse(csv_str)
|
10
|
+
__parse_string(csv_str)
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/spec/lib/bamfcsv_spec.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe BAMFCSV do
|
4
|
-
it "has a
|
4
|
+
it "has a read method" do
|
5
5
|
BAMFCSV.should respond_to(:read)
|
6
6
|
end
|
7
7
|
|
8
|
+
it "has a parse method" do
|
9
|
+
BAMFCSV.should respond_to(:parse)
|
10
|
+
end
|
11
|
+
|
8
12
|
describe "#read" do
|
9
13
|
it "is a matrix given a filename" do
|
10
14
|
BAMFCSV.read("spec/fixtures/test.csv").should be_instance_of Array
|
@@ -19,7 +23,7 @@ describe BAMFCSV do
|
|
19
23
|
end
|
20
24
|
|
21
25
|
it "interprets empty cells correctly" do
|
22
|
-
BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF",
|
26
|
+
BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF",nil,"CSV"]]
|
23
27
|
end
|
24
28
|
|
25
29
|
it "escapes cells that are quoted" do
|
@@ -46,4 +50,70 @@ describe BAMFCSV do
|
|
46
50
|
end.should raise_error Errno::EISDIR
|
47
51
|
end
|
48
52
|
end
|
53
|
+
|
54
|
+
describe "#parse" do
|
55
|
+
it "correctly parses the last cell even if there is no newline" do
|
56
|
+
BAMFCSV.parse("1,2").should == [["1","2"]]
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'correctly escaptes ""' do
|
60
|
+
BAMFCSV.parse("1,\"\"2\"\"\n").should == [["1", '"2"']]
|
61
|
+
end
|
62
|
+
|
63
|
+
it "parses unquoted empty cells as nil" do
|
64
|
+
BAMFCSV.parse("1,,2").should == [["1",nil,"2"]]
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'parses quoted empty cells as ""' do
|
68
|
+
BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "default CSV module compatibility" do
|
72
|
+
it "adds a nil cell after a trailing comma with no newline" do
|
73
|
+
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
|
74
|
+
end
|
75
|
+
|
76
|
+
it "adds a nil cell after a trailing comma with an ending newline" do
|
77
|
+
BAMFCSV.parse("1,2,\n").should == [["1","2",nil]]
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "when a quoted cell ends a line" do
|
81
|
+
it "does not raise an exception" do
|
82
|
+
expect { BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|) }.should_not raise_error
|
83
|
+
expect { BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|) }.should_not raise_error
|
84
|
+
end
|
85
|
+
|
86
|
+
it "correctly parses a quoted cell at the end of a line" do
|
87
|
+
BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
|
88
|
+
BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it "raises BAMFCSV::MalformedCSVError when quotes appear in a cell which was not started with quotes" do
|
93
|
+
expect { BAMFCSV.parse(' ""') }.should raise_error(BAMFCSV::MalformedCSVError)
|
94
|
+
expect { BAMFCSV.parse(" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
95
|
+
expect { BAMFCSV.parse(" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
96
|
+
expect { BAMFCSV.parse('1, "",3') }.should raise_error(BAMFCSV::MalformedCSVError)
|
97
|
+
end
|
98
|
+
|
99
|
+
it "raises BAMFCSV::MalformedCSVError when a quoted cell is not closed at its end" do
|
100
|
+
expect { BAMFCSV.parse('"') }.should raise_error(BAMFCSV::MalformedCSVError)
|
101
|
+
expect { BAMFCSV.parse('" ""') }.should raise_error(BAMFCSV::MalformedCSVError)
|
102
|
+
expect { BAMFCSV.parse("\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
103
|
+
expect { BAMFCSV.parse("\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
104
|
+
expect { BAMFCSV.parse("\" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
105
|
+
expect { BAMFCSV.parse("\" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
106
|
+
expect { BAMFCSV.parse('1,"2,3') }.should raise_error(BAMFCSV::MalformedCSVError)
|
107
|
+
expect { BAMFCSV.parse("1,\"2,3\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
108
|
+
expect { BAMFCSV.parse("1,\"2,3\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "raises BAMFCSV::MalformedCSVError when quoted cell is closed before its end" do
|
112
|
+
expect { BAMFCSV.parse('"" ') }.should raise_error(BAMFCSV::MalformedCSVError)
|
113
|
+
expect { BAMFCSV.parse("\"\" \n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
114
|
+
expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
115
|
+
expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
49
119
|
end
|
metadata
CHANGED
@@ -1,60 +1,63 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bamfcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.1.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Jon Distad
|
9
9
|
- Alex Redington
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
|
13
|
+
|
14
|
+
date: 2011-04-03 00:00:00 -04:00
|
14
15
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
- !ruby/object:Gem::Dependency
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
17
18
|
name: rspec
|
18
|
-
|
19
|
+
prerelease: false
|
20
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
21
|
none: false
|
20
|
-
requirements:
|
22
|
+
requirements:
|
21
23
|
- - ~>
|
22
|
-
- !ruby/object:Gem::Version
|
24
|
+
- !ruby/object:Gem::Version
|
23
25
|
version: 2.5.0
|
24
26
|
type: :development
|
25
|
-
|
26
|
-
|
27
|
-
- !ruby/object:Gem::Dependency
|
27
|
+
version_requirements: *id001
|
28
|
+
- !ruby/object:Gem::Dependency
|
28
29
|
name: fuubar
|
29
|
-
|
30
|
+
prerelease: false
|
31
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
32
|
none: false
|
31
|
-
requirements:
|
33
|
+
requirements:
|
32
34
|
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
35
|
+
- !ruby/object:Gem::Version
|
34
36
|
version: 0.0.2
|
35
37
|
type: :development
|
36
|
-
|
37
|
-
|
38
|
-
- !ruby/object:Gem::Dependency
|
38
|
+
version_requirements: *id002
|
39
|
+
- !ruby/object:Gem::Dependency
|
39
40
|
name: rake-compiler
|
40
|
-
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
43
|
none: false
|
42
|
-
requirements:
|
44
|
+
requirements:
|
43
45
|
- - ~>
|
44
|
-
- !ruby/object:Gem::Version
|
46
|
+
- !ruby/object:Gem::Version
|
45
47
|
version: 0.7.1
|
46
48
|
type: :development
|
47
|
-
|
48
|
-
version_requirements: *22499160
|
49
|
+
version_requirements: *id003
|
49
50
|
description: BAMFCSV parses csv like a BAMF. BAMF!!
|
50
|
-
email:
|
51
|
+
email:
|
51
52
|
- jon@thinkrelevance.com
|
52
53
|
- lovemachine@thinkrelevance.com
|
53
54
|
executables: []
|
54
|
-
|
55
|
+
|
56
|
+
extensions:
|
55
57
|
- ext/bamfcsv/extconf.rb
|
56
58
|
extra_rdoc_files: []
|
57
|
-
|
59
|
+
|
60
|
+
files:
|
58
61
|
- .gitignore
|
59
62
|
- .rspec
|
60
63
|
- Gemfile
|
@@ -81,27 +84,39 @@ files:
|
|
81
84
|
has_rdoc: true
|
82
85
|
homepage: https://github.com/jondistad/bamfcsv
|
83
86
|
licenses: []
|
87
|
+
|
84
88
|
post_install_message:
|
85
89
|
rdoc_options: []
|
86
|
-
|
90
|
+
|
91
|
+
require_paths:
|
87
92
|
- lib
|
88
93
|
- ext
|
89
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
95
|
none: false
|
91
|
-
requirements:
|
92
|
-
- -
|
93
|
-
- !ruby/object:Gem::Version
|
94
|
-
version:
|
95
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "0"
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
101
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
version:
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
101
106
|
requirements: []
|
107
|
+
|
102
108
|
rubyforge_project: bamfcsv
|
103
|
-
rubygems_version: 1.
|
109
|
+
rubygems_version: 1.6.2
|
104
110
|
signing_key:
|
105
111
|
specification_version: 3
|
106
112
|
summary: BAMF!!! Your csv is parsed.
|
107
|
-
test_files:
|
113
|
+
test_files:
|
114
|
+
- spec/fixtures/bamf-comma-comma.csv
|
115
|
+
- spec/fixtures/double-quotes.csv
|
116
|
+
- spec/fixtures/empty.csv
|
117
|
+
- spec/fixtures/escapes.csv
|
118
|
+
- spec/fixtures/one-column.csv
|
119
|
+
- spec/fixtures/terminated-with-cr.csv
|
120
|
+
- spec/fixtures/test.csv
|
121
|
+
- spec/lib/bamfcsv_spec.rb
|
122
|
+
- spec/spec_helper.rb
|