bamfcsv 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +53 -58
- data/lib/bamfcsv/table.rb +46 -0
- data/lib/bamfcsv/version.rb +1 -1
- data/lib/bamfcsv.rb +11 -4
- data/spec/lib/bamfcsv_spec.rb +42 -1
- data/spec/spec_helper.rb +1 -0
- metadata +3 -2
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,19 +1,7 @@
|
|
1
1
|
#include <stdlib.h>
|
2
2
|
#include "bamfcsv_ext.h"
|
3
3
|
|
4
|
-
struct s_Row *
|
5
|
-
|
6
|
-
struct s_Row *new_row = malloc(sizeof(struct s_Row));
|
7
|
-
|
8
|
-
new_row -> first_cell = 0;
|
9
|
-
new_row -> next_row = 0;
|
10
|
-
new_row -> cell_count = 0;
|
11
|
-
|
12
|
-
return new_row;
|
13
|
-
|
14
|
-
}
|
15
|
-
|
16
|
-
struct s_Cell *alloc_cell() {
|
4
|
+
struct s_Cell *alloc_cell(struct s_Row *row, struct s_Cell *prev_cell) {
|
17
5
|
|
18
6
|
struct s_Cell *new_cell = malloc(sizeof(struct s_Cell));
|
19
7
|
|
@@ -21,11 +9,26 @@ struct s_Cell *alloc_cell() {
|
|
21
9
|
new_cell -> len = 0;
|
22
10
|
new_cell -> next_cell = 0;
|
23
11
|
new_cell -> has_quotes = 0;
|
12
|
+
row->cell_count++;
|
13
|
+
if (prev_cell) prev_cell->next_cell = new_cell;
|
24
14
|
|
25
15
|
return new_cell;
|
26
16
|
|
27
17
|
}
|
28
18
|
|
19
|
+
struct s_Row *alloc_row(struct s_Row *prev_row) {
|
20
|
+
|
21
|
+
struct s_Row *new_row = malloc(sizeof(struct s_Row));
|
22
|
+
|
23
|
+
new_row -> next_row = 0;
|
24
|
+
new_row -> cell_count = 0;
|
25
|
+
new_row -> first_cell = alloc_cell(new_row, 0);
|
26
|
+
if (prev_row) prev_row->next_row = new_row;
|
27
|
+
|
28
|
+
return new_row;
|
29
|
+
|
30
|
+
}
|
31
|
+
|
29
32
|
void free_cell(struct s_Cell *cell) {
|
30
33
|
|
31
34
|
if (cell != 0) {
|
@@ -66,21 +69,20 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
66
69
|
cur_cell = cur_row->first_cell;
|
67
70
|
row = rb_ary_new2(cur_row->cell_count);
|
68
71
|
rb_ary_store(matrix,i,row);
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
72
|
+
if (cur_row->cell_count > 1 || cur_cell->len) {
|
73
|
+
for (j = 0; j < cur_row->cell_count; j++) {
|
74
|
+
if (cur_cell->has_quotes) {
|
75
|
+
new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
|
76
|
+
rb_funcall(new_string, gsub, 2, dquote, quote);
|
77
|
+
} else {
|
78
|
+
if (cur_cell->len)
|
79
|
+
new_string = rb_str_new(cur_cell->start, cur_cell->len);
|
80
|
+
else
|
81
|
+
new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
|
82
|
+
}
|
83
|
+
rb_ary_store(row, j, new_string);
|
84
|
+
cur_cell = cur_cell->next_cell;
|
81
85
|
}
|
82
|
-
rb_ary_store(row, j, new_string);
|
83
|
-
cur_cell = cur_cell->next_cell;
|
84
86
|
}
|
85
87
|
cur_row = cur_row->next_row;
|
86
88
|
}
|
@@ -90,9 +92,9 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
90
92
|
|
91
93
|
void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
|
92
94
|
if (*(cur-1) == '\r')
|
93
|
-
cell->len = cur-(cell->start)-1;
|
95
|
+
cell->len = (int)(cur-(cell->start)-1);
|
94
96
|
else
|
95
|
-
cell->len = cur-(cell->start);
|
97
|
+
cell->len = (int)(cur-(cell->start));
|
96
98
|
|
97
99
|
if (quote_count) cell->has_quotes = 1;
|
98
100
|
}
|
@@ -102,21 +104,25 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
102
104
|
int num_rows = 1;
|
103
105
|
int quote_count = 0, quotes_matched = 1;
|
104
106
|
|
105
|
-
struct s_Row *first_row = alloc_row();
|
107
|
+
struct s_Row *first_row = alloc_row(0);
|
106
108
|
struct s_Row *cur_row = first_row;
|
107
|
-
struct s_Cell *cur_cell =
|
108
|
-
first_row->first_cell = cur_cell;
|
109
|
+
struct s_Cell *cur_cell = cur_row->first_cell;
|
109
110
|
cur_cell->start = buf;
|
110
111
|
|
111
112
|
VALUE matrix;
|
112
113
|
|
113
114
|
char *cur;
|
115
|
+
|
116
|
+
if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
|
117
|
+
*(buf+bufsize-1) = 0;
|
118
|
+
--bufsize;
|
119
|
+
}
|
114
120
|
|
115
121
|
for (cur = buf; cur < buf+bufsize; cur++) {
|
116
122
|
|
117
123
|
if (*cur == '"') {
|
118
124
|
if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
|
119
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count
|
125
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count);
|
120
126
|
else
|
121
127
|
++quote_count;
|
122
128
|
}
|
@@ -128,50 +134,39 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
128
134
|
if (*cur == ',') {
|
129
135
|
|
130
136
|
if (quote_count && *(cur-1) != '"')
|
131
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count
|
137
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count);
|
132
138
|
|
133
|
-
finalize_cell(cur_cell,cur,quote_count);
|
134
|
-
cur_cell
|
135
|
-
cur_cell = cur_cell->next_cell;
|
139
|
+
finalize_cell(cur_cell, cur, quote_count);
|
140
|
+
cur_cell = alloc_cell(cur_row, cur_cell);
|
136
141
|
cur_cell->start = cur+1;
|
137
|
-
cur_row->cell_count += 1;
|
138
142
|
quote_count = 0;
|
139
143
|
|
140
|
-
}
|
141
|
-
|
142
|
-
if (*cur == '\n') {
|
144
|
+
} else if (*cur == '\n') {
|
143
145
|
|
144
146
|
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
|
145
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count
|
147
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count);
|
146
148
|
|
147
|
-
finalize_cell(cur_cell,cur,quote_count);
|
148
|
-
cur_row
|
149
|
-
cur_row->next_row = alloc_row();
|
150
|
-
cur_row = cur_row -> next_row;
|
151
|
-
cur_row->first_cell = alloc_cell();
|
149
|
+
finalize_cell(cur_cell, cur, quote_count);
|
150
|
+
cur_row = alloc_row(cur_row);
|
152
151
|
cur_cell = cur_row->first_cell;
|
153
152
|
cur_cell->start = cur+1;
|
154
153
|
quote_count = 0;
|
155
154
|
|
156
155
|
num_rows++;
|
157
156
|
|
158
|
-
}
|
157
|
+
} else if (quote_count && *cur != '\r' && *cur != '"')
|
158
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d", num_rows, cur_row->cell_count);
|
159
159
|
|
160
160
|
}
|
161
161
|
|
162
162
|
}
|
163
163
|
|
164
164
|
if (!quotes_matched) /* Reached EOF without matching quotes */
|
165
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count
|
166
|
-
else if (quote_count && *cur != '"')
|
167
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count
|
168
|
-
|
169
|
-
|
170
|
-
num_rows--;
|
171
|
-
} else { /* No newline before EOF */
|
172
|
-
finalize_cell(cur_cell, cur, quote_count);
|
173
|
-
cur_row->cell_count++;
|
174
|
-
}
|
165
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count);
|
166
|
+
else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
|
167
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count);
|
168
|
+
|
169
|
+
finalize_cell(cur_cell, cur, quote_count);
|
175
170
|
|
176
171
|
matrix = build_matrix_from_pointer_tree(first_row, num_rows);
|
177
172
|
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BAMFCSV
|
2
|
+
class Table
|
3
|
+
include Enumerable
|
4
|
+
def initialize(matrix)
|
5
|
+
@headers = matrix.shift
|
6
|
+
@matrix = matrix
|
7
|
+
@header_map = {}
|
8
|
+
@headers.each_with_index do |h, i|
|
9
|
+
@header_map[h] = i
|
10
|
+
end
|
11
|
+
@row_cache = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def each
|
15
|
+
@matrix.size.times do |idx|
|
16
|
+
yield self[idx]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](idx)
|
21
|
+
idx += @matrix.size if idx < 0
|
22
|
+
return if idx < 0 || idx >= @matrix.size
|
23
|
+
@row_cache[idx] ||= Row.new(@header_map, @matrix[idx])
|
24
|
+
end
|
25
|
+
|
26
|
+
def inspect
|
27
|
+
"#<BAMFCSV::Table>"
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def row_hash(row)
|
32
|
+
Hash[@headers.zip(row)]
|
33
|
+
end
|
34
|
+
|
35
|
+
class Row
|
36
|
+
def initialize(header_map, values)
|
37
|
+
@header_map = header_map
|
38
|
+
@values = values
|
39
|
+
end
|
40
|
+
|
41
|
+
def [](key)
|
42
|
+
@values[@header_map[key]]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/bamfcsv/version.rb
CHANGED
data/lib/bamfcsv.rb
CHANGED
@@ -1,13 +1,20 @@
|
|
1
1
|
require 'bamfcsv/bamfcsv'
|
2
|
+
require 'bamfcsv/table'
|
2
3
|
|
3
4
|
module BAMFCSV
|
4
5
|
|
5
|
-
def self.read(thing_to_read)
|
6
|
-
|
6
|
+
def self.read(thing_to_read, opts={})
|
7
|
+
parse(File.read(thing_to_read), opts)
|
7
8
|
end
|
8
9
|
|
9
|
-
def self.parse(csv_str)
|
10
|
-
|
10
|
+
def self.parse(csv_str, opts={})
|
11
|
+
return [] if csv_str.empty?
|
12
|
+
matrix = __parse_string(csv_str)
|
13
|
+
if opts[:headers]
|
14
|
+
Table.new(matrix)
|
15
|
+
else
|
16
|
+
matrix
|
17
|
+
end
|
11
18
|
end
|
12
19
|
|
13
20
|
end
|
data/spec/lib/bamfcsv_spec.rb
CHANGED
@@ -57,7 +57,7 @@ describe BAMFCSV do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'correctly escaptes ""' do
|
60
|
-
BAMFCSV.parse(
|
60
|
+
BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
|
61
61
|
end
|
62
62
|
|
63
63
|
it "parses unquoted empty cells as nil" do
|
@@ -68,6 +68,12 @@ describe BAMFCSV do
|
|
68
68
|
BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
|
69
69
|
end
|
70
70
|
|
71
|
+
it "parses a single cell not followed by a newline correctly" do
|
72
|
+
BAMFCSV.parse("1").should == [["1"]]
|
73
|
+
BAMFCSV.parse("1\n2").should == [["1"],["2"]]
|
74
|
+
BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
|
75
|
+
end
|
76
|
+
|
71
77
|
describe "default CSV module compatibility" do
|
72
78
|
it "adds a nil cell after a trailing comma with no newline" do
|
73
79
|
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
|
@@ -114,6 +120,41 @@ describe BAMFCSV do
|
|
114
120
|
expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
115
121
|
expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
|
116
122
|
end
|
123
|
+
|
124
|
+
it "raises BAMFCSV::MalformedCSVError when unescaped quotes appear in a quoted cell" do
|
125
|
+
expect { BAMFCSV.parse('"a"b"c"') }.should raise_error(BAMFCSV::MalformedCSVError)
|
126
|
+
expect { BAMFCSV.parse('"a"b"c",2') }.should raise_error(BAMFCSV::MalformedCSVError)
|
127
|
+
expect { BAMFCSV.parse(%Q("a"b"c"\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
|
128
|
+
expect { BAMFCSV.parse(%Q("a"b"c"\r\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
|
129
|
+
end
|
130
|
+
|
131
|
+
['', "\n", "\r\n", ",", "\n\n", "\r\n\r\n"].each do |csv|
|
132
|
+
csv_result = CSV.parse(csv)
|
133
|
+
it "BAMFCSV parses #{csv.inspect} the same way as CSV (#{csv_result.inspect})" do
|
134
|
+
BAMFCSV.parse(csv).should == csv_result
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
describe "generating a Table" do
|
141
|
+
describe "with only a header" do
|
142
|
+
let(:header_only) { BAMFCSV.parse("1,2,3", :headers => true) }
|
143
|
+
it "has no body rows" do
|
144
|
+
header_only.first.should be_nil
|
145
|
+
end
|
146
|
+
|
147
|
+
it "does nothing when iterating" do
|
148
|
+
expect { header_only.each { |x| raise "Oh dang!" } }.should_not raise_error
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
describe "with body rows" do
|
153
|
+
it "maps the headers the the values of each row" do
|
154
|
+
table = BAMFCSV.parse("a,b,c\r\n1,2,3\r\nx,y,z", :headers => true)
|
155
|
+
table.first["a"].should == "1"
|
156
|
+
table[1]["c"].should == "z"
|
157
|
+
end
|
117
158
|
end
|
118
159
|
end
|
119
160
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: bamfcsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.1.
|
5
|
+
version: 0.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jon Distad
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2011-04-
|
14
|
+
date: 2011-04-08 00:00:00 -04:00
|
15
15
|
default_executable:
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- ext/bamfcsv/bamfcsv_ext.h
|
70
70
|
- ext/bamfcsv/extconf.rb
|
71
71
|
- lib/bamfcsv.rb
|
72
|
+
- lib/bamfcsv/table.rb
|
72
73
|
- lib/bamfcsv/version.rb
|
73
74
|
- spec/fixtures/bamf-comma-comma.csv
|
74
75
|
- spec/fixtures/double-quotes.csv
|