bamfcsv 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +48 -42
- data/ext/bamfcsv/bamfcsv_ext.h +5 -0
- data/lib/bamfcsv/version.rb +1 -1
- data/lib/bamfcsv.rb +5 -5
- data/spec/lib/bamfcsv_spec.rb +72 -2
- metadata +55 -40
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,7 +1,4 @@
|
|
1
1
|
#include <stdlib.h>
|
2
|
-
#include <ruby/ruby.h>
|
3
|
-
#include <fcntl.h>
|
4
|
-
#include <sys/mman.h>
|
5
2
|
#include "bamfcsv_ext.h"
|
6
3
|
|
7
4
|
struct s_Row *alloc_row() {
|
@@ -71,10 +68,14 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
71
68
|
rb_ary_store(matrix,i,row);
|
72
69
|
for (j = 0; j < cur_row->cell_count; j++) {
|
73
70
|
if (*(cur_cell->start) == '"'
|
74
|
-
&& *((cur_cell->start)+(
|
75
|
-
new_string = rb_str_new(cur_cell->start+
|
76
|
-
else
|
77
|
-
|
71
|
+
&& *((cur_cell->start)+(cur_cell->len-1)) == '"')
|
72
|
+
new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
|
73
|
+
else {
|
74
|
+
if (cur_cell->len)
|
75
|
+
new_string = rb_str_new(cur_cell->start, cur_cell->len);
|
76
|
+
else
|
77
|
+
new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
|
78
|
+
}
|
78
79
|
if (cur_cell->has_quotes) {
|
79
80
|
rb_funcall(new_string, gsub, 2, dquote, quote);
|
80
81
|
}
|
@@ -87,17 +88,19 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
87
88
|
return matrix;
|
88
89
|
}
|
89
90
|
|
90
|
-
void finalize_cell(struct s_Cell *cell, char *cur) {
|
91
|
-
if (*(cur-
|
92
|
-
cell->len = cur-(cell->start)-
|
91
|
+
void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
|
92
|
+
if (*(cur-1) == '\r')
|
93
|
+
cell->len = cur-(cell->start)-1;
|
93
94
|
else
|
94
95
|
cell->len = cur-(cell->start);
|
96
|
+
|
97
|
+
if (quote_count) cell->has_quotes = 1;
|
95
98
|
}
|
96
99
|
|
97
100
|
VALUE build_matrix(char *buf, int bufsize) {
|
98
101
|
int str_start = 0;
|
99
102
|
int num_rows = 1;
|
100
|
-
int
|
103
|
+
int quote_count = 0, quotes_matched = 1;
|
101
104
|
|
102
105
|
struct s_Row *first_row = alloc_row();
|
103
106
|
struct s_Row *cur_row = first_row;
|
@@ -112,33 +115,43 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
112
115
|
for (cur = buf; cur < buf+bufsize; cur++) {
|
113
116
|
|
114
117
|
if (*cur == '"') {
|
115
|
-
if (
|
116
|
-
|
117
|
-
|
118
|
-
|
118
|
+
if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
|
119
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count+1);
|
120
|
+
else
|
121
|
+
++quote_count;
|
119
122
|
}
|
120
123
|
|
121
|
-
|
124
|
+
quotes_matched = !(quote_count & 1); /* count is even */
|
125
|
+
|
126
|
+
if (quotes_matched) {
|
122
127
|
|
123
128
|
if (*cur == ',') {
|
124
129
|
|
125
|
-
|
130
|
+
if (quote_count && *(cur-1) != '"')
|
131
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count+1);
|
132
|
+
|
133
|
+
finalize_cell(cur_cell,cur,quote_count);
|
126
134
|
cur_cell->next_cell = alloc_cell();
|
127
135
|
cur_cell = cur_cell->next_cell;
|
128
|
-
cur_cell->start = cur+
|
136
|
+
cur_cell->start = cur+1;
|
129
137
|
cur_row->cell_count += 1;
|
138
|
+
quote_count = 0;
|
130
139
|
|
131
140
|
}
|
132
141
|
|
133
142
|
if (*cur == '\n') {
|
134
143
|
|
135
|
-
|
144
|
+
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
|
145
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count+1);
|
146
|
+
|
147
|
+
finalize_cell(cur_cell,cur,quote_count);
|
136
148
|
cur_row->cell_count += 1;
|
137
149
|
cur_row->next_row = alloc_row();
|
138
150
|
cur_row = cur_row -> next_row;
|
139
151
|
cur_row->first_cell = alloc_cell();
|
140
152
|
cur_cell = cur_row->first_cell;
|
141
|
-
cur_cell->start = cur+
|
153
|
+
cur_cell->start = cur+1;
|
154
|
+
quote_count = 0;
|
142
155
|
|
143
156
|
num_rows++;
|
144
157
|
|
@@ -148,8 +161,16 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
148
161
|
|
149
162
|
}
|
150
163
|
|
151
|
-
if (
|
164
|
+
if (!quotes_matched) /* Reached EOF without matching quotes */
|
165
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count+1);
|
166
|
+
else if (quote_count && *cur != '"')
|
167
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count+1);
|
168
|
+
|
169
|
+
if (cur_row->cell_count == 0) { /* Ended with newline */
|
152
170
|
num_rows--;
|
171
|
+
} else { /* No newline before EOF */
|
172
|
+
finalize_cell(cur_cell, cur, quote_count);
|
173
|
+
cur_row->cell_count++;
|
153
174
|
}
|
154
175
|
|
155
176
|
matrix = build_matrix_from_pointer_tree(first_row, num_rows);
|
@@ -160,32 +181,17 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
160
181
|
|
161
182
|
}
|
162
183
|
|
163
|
-
VALUE
|
164
|
-
|
165
|
-
char *mmapped_csv;
|
166
|
-
int filesize, csv;
|
167
|
-
|
168
|
-
csv = open(file, O_RDONLY);
|
169
|
-
filesize = lseek(csv, 0, SEEK_END);
|
170
|
-
mmapped_csv = (char*) mmap(0, filesize, PROT_READ, MAP_SHARED, csv, 0);
|
171
|
-
|
172
|
-
VALUE matrix = build_matrix(mmapped_csv,filesize);
|
173
|
-
|
174
|
-
munmap(mmapped_csv, filesize);
|
175
|
-
close(csv);
|
176
|
-
|
177
|
-
return matrix;
|
178
|
-
}
|
179
|
-
|
180
|
-
VALUE read_path(VALUE self, VALUE file) {
|
184
|
+
VALUE parse_string(VALUE self, VALUE string) {
|
181
185
|
|
182
|
-
return
|
186
|
+
return build_matrix(RSTRING_PTR(string), NUM2INT(rb_str_length(string)));
|
183
187
|
|
184
188
|
}
|
185
189
|
|
186
190
|
void Init_bamfcsv() {
|
187
191
|
|
188
|
-
|
189
|
-
|
192
|
+
BAMFCSV_module = rb_define_module("BAMFCSV");
|
193
|
+
VALUE bamfcsv_singleton_class = rb_singleton_class(BAMFCSV_module);
|
194
|
+
rb_define_private_method(bamfcsv_singleton_class, "__parse_string", parse_string, 1);
|
190
195
|
|
196
|
+
BAMFCSV_MalformedCSVError_class = rb_define_class_under(BAMFCSV_module, "MalformedCSVError", rb_eRuntimeError);
|
191
197
|
}
|
data/ext/bamfcsv/bamfcsv_ext.h
CHANGED
data/lib/bamfcsv/version.rb
CHANGED
data/lib/bamfcsv.rb
CHANGED
@@ -3,11 +3,11 @@ require 'bamfcsv/bamfcsv'
|
|
3
3
|
module BAMFCSV
|
4
4
|
|
5
5
|
def self.read(thing_to_read)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
__parse_string(File.read(thing_to_read))
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.parse(csv_str)
|
10
|
+
__parse_string(csv_str)
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/spec/lib/bamfcsv_spec.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe BAMFCSV do
|
4
|
-
it "has a
|
4
|
+
it "has a read method" do
|
5
5
|
BAMFCSV.should respond_to(:read)
|
6
6
|
end
|
7
7
|
|
8
|
+
it "has a parse method" do
|
9
|
+
BAMFCSV.should respond_to(:parse)
|
10
|
+
end
|
11
|
+
|
8
12
|
describe "#read" do
|
9
13
|
it "is a matrix given a filename" do
|
10
14
|
BAMFCSV.read("spec/fixtures/test.csv").should be_instance_of Array
|
@@ -19,7 +23,7 @@ describe BAMFCSV do
|
|
19
23
|
end
|
20
24
|
|
21
25
|
it "interprets empty cells correctly" do
|
22
|
-
BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF",
|
26
|
+
BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF",nil,"CSV"]]
|
23
27
|
end
|
24
28
|
|
25
29
|
it "escapes cells that are quoted" do
|
@@ -46,4 +50,70 @@ describe BAMFCSV do
|
|
46
50
|
end.should raise_error Errno::EISDIR
|
47
51
|
end
|
48
52
|
end
|
53
|
+
|
54
|
+
describe "#parse" do
|
55
|
+
it "correctly parses the last cell even if there is no newline" do
|
56
|
+
BAMFCSV.parse("1,2").should == [["1","2"]]
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'correctly escaptes ""' do
|
60
|
+
BAMFCSV.parse("1,\"\"2\"\"\n").should == [["1", '"2"']]
|
61
|
+
end
|
62
|
+
|
63
|
+
it "parses unquoted empty cells as nil" do
|
64
|
+
BAMFCSV.parse("1,,2").should == [["1",nil,"2"]]
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'parses quoted empty cells as ""' do
|
68
|
+
BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "default CSV module compatibility" do
|
72
|
+
it "adds a nil cell after a trailing comma with no newline" do
|
73
|
+
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
|
74
|
+
end
|
75
|
+
|
76
|
+
it "adds a nil cell after a trailing comma with an ending newline" do
|
77
|
+
BAMFCSV.parse("1,2,\n").should == [["1","2",nil]]
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "when a quoted cell ends a line" do
|
81
|
+
it "does not raise an exception" do
|
82
|
+
expect { BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|) }.should_not raise_error
|
83
|
+
expect { BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|) }.should_not raise_error
|
84
|
+
end
|
85
|
+
|
86
|
+
it "correctly parses a quoted cell at the end of a line" do
|
87
|
+
BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
|
88
|
+
BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it "raises BAMFCSV::MalformedCSVError when quotes appear in a cell which was not started with quotes" do
|
93
|
+
expect { BAMFCSV.parse(' ""') }.should raise_error(BAMFCSV::MalformedCSVError)
|
94
|
+
expect { BAMFCSV.parse(" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
95
|
+
expect { BAMFCSV.parse(" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
96
|
+
expect { BAMFCSV.parse('1, "",3') }.should raise_error(BAMFCSV::MalformedCSVError)
|
97
|
+
end
|
98
|
+
|
99
|
+
it "raises BAMFCSV::MalformedCSVError when a quoted cell is not closed at its end" do
|
100
|
+
expect { BAMFCSV.parse('"') }.should raise_error(BAMFCSV::MalformedCSVError)
|
101
|
+
expect { BAMFCSV.parse('" ""') }.should raise_error(BAMFCSV::MalformedCSVError)
|
102
|
+
expect { BAMFCSV.parse("\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
103
|
+
expect { BAMFCSV.parse("\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
104
|
+
expect { BAMFCSV.parse("\" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
105
|
+
expect { BAMFCSV.parse("\" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
106
|
+
expect { BAMFCSV.parse('1,"2,3') }.should raise_error(BAMFCSV::MalformedCSVError)
|
107
|
+
expect { BAMFCSV.parse("1,\"2,3\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
108
|
+
expect { BAMFCSV.parse("1,\"2,3\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "raises BAMFCSV::MalformedCSVError when quoted cell is closed before its end" do
|
112
|
+
expect { BAMFCSV.parse('"" ') }.should raise_error(BAMFCSV::MalformedCSVError)
|
113
|
+
expect { BAMFCSV.parse("\"\" \n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
114
|
+
expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
115
|
+
expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
49
119
|
end
|
metadata
CHANGED
@@ -1,60 +1,63 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bamfcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.1.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Jon Distad
|
9
9
|
- Alex Redington
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
|
13
|
+
|
14
|
+
date: 2011-04-03 00:00:00 -04:00
|
14
15
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
- !ruby/object:Gem::Dependency
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
17
18
|
name: rspec
|
18
|
-
|
19
|
+
prerelease: false
|
20
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
21
|
none: false
|
20
|
-
requirements:
|
22
|
+
requirements:
|
21
23
|
- - ~>
|
22
|
-
- !ruby/object:Gem::Version
|
24
|
+
- !ruby/object:Gem::Version
|
23
25
|
version: 2.5.0
|
24
26
|
type: :development
|
25
|
-
|
26
|
-
|
27
|
-
- !ruby/object:Gem::Dependency
|
27
|
+
version_requirements: *id001
|
28
|
+
- !ruby/object:Gem::Dependency
|
28
29
|
name: fuubar
|
29
|
-
|
30
|
+
prerelease: false
|
31
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
32
|
none: false
|
31
|
-
requirements:
|
33
|
+
requirements:
|
32
34
|
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
35
|
+
- !ruby/object:Gem::Version
|
34
36
|
version: 0.0.2
|
35
37
|
type: :development
|
36
|
-
|
37
|
-
|
38
|
-
- !ruby/object:Gem::Dependency
|
38
|
+
version_requirements: *id002
|
39
|
+
- !ruby/object:Gem::Dependency
|
39
40
|
name: rake-compiler
|
40
|
-
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
43
|
none: false
|
42
|
-
requirements:
|
44
|
+
requirements:
|
43
45
|
- - ~>
|
44
|
-
- !ruby/object:Gem::Version
|
46
|
+
- !ruby/object:Gem::Version
|
45
47
|
version: 0.7.1
|
46
48
|
type: :development
|
47
|
-
|
48
|
-
version_requirements: *22499160
|
49
|
+
version_requirements: *id003
|
49
50
|
description: BAMFCSV parses csv like a BAMF. BAMF!!
|
50
|
-
email:
|
51
|
+
email:
|
51
52
|
- jon@thinkrelevance.com
|
52
53
|
- lovemachine@thinkrelevance.com
|
53
54
|
executables: []
|
54
|
-
|
55
|
+
|
56
|
+
extensions:
|
55
57
|
- ext/bamfcsv/extconf.rb
|
56
58
|
extra_rdoc_files: []
|
57
|
-
|
59
|
+
|
60
|
+
files:
|
58
61
|
- .gitignore
|
59
62
|
- .rspec
|
60
63
|
- Gemfile
|
@@ -81,27 +84,39 @@ files:
|
|
81
84
|
has_rdoc: true
|
82
85
|
homepage: https://github.com/jondistad/bamfcsv
|
83
86
|
licenses: []
|
87
|
+
|
84
88
|
post_install_message:
|
85
89
|
rdoc_options: []
|
86
|
-
|
90
|
+
|
91
|
+
require_paths:
|
87
92
|
- lib
|
88
93
|
- ext
|
89
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
95
|
none: false
|
91
|
-
requirements:
|
92
|
-
- -
|
93
|
-
- !ruby/object:Gem::Version
|
94
|
-
version:
|
95
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "0"
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
101
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
version:
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
101
106
|
requirements: []
|
107
|
+
|
102
108
|
rubyforge_project: bamfcsv
|
103
|
-
rubygems_version: 1.
|
109
|
+
rubygems_version: 1.6.2
|
104
110
|
signing_key:
|
105
111
|
specification_version: 3
|
106
112
|
summary: BAMF!!! Your csv is parsed.
|
107
|
-
test_files:
|
113
|
+
test_files:
|
114
|
+
- spec/fixtures/bamf-comma-comma.csv
|
115
|
+
- spec/fixtures/double-quotes.csv
|
116
|
+
- spec/fixtures/empty.csv
|
117
|
+
- spec/fixtures/escapes.csv
|
118
|
+
- spec/fixtures/one-column.csv
|
119
|
+
- spec/fixtures/terminated-with-cr.csv
|
120
|
+
- spec/fixtures/test.csv
|
121
|
+
- spec/lib/bamfcsv_spec.rb
|
122
|
+
- spec/spec_helper.rb
|