bamfcsv 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +53 -58
- data/lib/bamfcsv/table.rb +46 -0
- data/lib/bamfcsv/version.rb +1 -1
- data/lib/bamfcsv.rb +11 -4
- data/spec/lib/bamfcsv_spec.rb +42 -1
- data/spec/spec_helper.rb +1 -0
- metadata +3 -2
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,19 +1,7 @@
|
|
1
1
|
#include <stdlib.h>
|
2
2
|
#include "bamfcsv_ext.h"
|
3
3
|
|
4
|
-
struct s_Row *
|
5
|
-
|
6
|
-
struct s_Row *new_row = malloc(sizeof(struct s_Row));
|
7
|
-
|
8
|
-
new_row -> first_cell = 0;
|
9
|
-
new_row -> next_row = 0;
|
10
|
-
new_row -> cell_count = 0;
|
11
|
-
|
12
|
-
return new_row;
|
13
|
-
|
14
|
-
}
|
15
|
-
|
16
|
-
struct s_Cell *alloc_cell() {
|
4
|
+
struct s_Cell *alloc_cell(struct s_Row *row, struct s_Cell *prev_cell) {
|
17
5
|
|
18
6
|
struct s_Cell *new_cell = malloc(sizeof(struct s_Cell));
|
19
7
|
|
@@ -21,11 +9,26 @@ struct s_Cell *alloc_cell() {
|
|
21
9
|
new_cell -> len = 0;
|
22
10
|
new_cell -> next_cell = 0;
|
23
11
|
new_cell -> has_quotes = 0;
|
12
|
+
row->cell_count++;
|
13
|
+
if (prev_cell) prev_cell->next_cell = new_cell;
|
24
14
|
|
25
15
|
return new_cell;
|
26
16
|
|
27
17
|
}
|
28
18
|
|
19
|
+
struct s_Row *alloc_row(struct s_Row *prev_row) {
|
20
|
+
|
21
|
+
struct s_Row *new_row = malloc(sizeof(struct s_Row));
|
22
|
+
|
23
|
+
new_row -> next_row = 0;
|
24
|
+
new_row -> cell_count = 0;
|
25
|
+
new_row -> first_cell = alloc_cell(new_row, 0);
|
26
|
+
if (prev_row) prev_row->next_row = new_row;
|
27
|
+
|
28
|
+
return new_row;
|
29
|
+
|
30
|
+
}
|
31
|
+
|
29
32
|
void free_cell(struct s_Cell *cell) {
|
30
33
|
|
31
34
|
if (cell != 0) {
|
@@ -66,21 +69,20 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
66
69
|
cur_cell = cur_row->first_cell;
|
67
70
|
row = rb_ary_new2(cur_row->cell_count);
|
68
71
|
rb_ary_store(matrix,i,row);
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
72
|
+
if (cur_row->cell_count > 1 || cur_cell->len) {
|
73
|
+
for (j = 0; j < cur_row->cell_count; j++) {
|
74
|
+
if (cur_cell->has_quotes) {
|
75
|
+
new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
|
76
|
+
rb_funcall(new_string, gsub, 2, dquote, quote);
|
77
|
+
} else {
|
78
|
+
if (cur_cell->len)
|
79
|
+
new_string = rb_str_new(cur_cell->start, cur_cell->len);
|
80
|
+
else
|
81
|
+
new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
|
82
|
+
}
|
83
|
+
rb_ary_store(row, j, new_string);
|
84
|
+
cur_cell = cur_cell->next_cell;
|
81
85
|
}
|
82
|
-
rb_ary_store(row, j, new_string);
|
83
|
-
cur_cell = cur_cell->next_cell;
|
84
86
|
}
|
85
87
|
cur_row = cur_row->next_row;
|
86
88
|
}
|
@@ -90,9 +92,9 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
|
|
90
92
|
|
91
93
|
void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
|
92
94
|
if (*(cur-1) == '\r')
|
93
|
-
cell->len = cur-(cell->start)-1;
|
95
|
+
cell->len = (int)(cur-(cell->start)-1);
|
94
96
|
else
|
95
|
-
cell->len = cur-(cell->start);
|
97
|
+
cell->len = (int)(cur-(cell->start));
|
96
98
|
|
97
99
|
if (quote_count) cell->has_quotes = 1;
|
98
100
|
}
|
@@ -102,21 +104,25 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
102
104
|
int num_rows = 1;
|
103
105
|
int quote_count = 0, quotes_matched = 1;
|
104
106
|
|
105
|
-
struct s_Row *first_row = alloc_row();
|
107
|
+
struct s_Row *first_row = alloc_row(0);
|
106
108
|
struct s_Row *cur_row = first_row;
|
107
|
-
struct s_Cell *cur_cell =
|
108
|
-
first_row->first_cell = cur_cell;
|
109
|
+
struct s_Cell *cur_cell = cur_row->first_cell;
|
109
110
|
cur_cell->start = buf;
|
110
111
|
|
111
112
|
VALUE matrix;
|
112
113
|
|
113
114
|
char *cur;
|
115
|
+
|
116
|
+
if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
|
117
|
+
*(buf+bufsize-1) = 0;
|
118
|
+
--bufsize;
|
119
|
+
}
|
114
120
|
|
115
121
|
for (cur = buf; cur < buf+bufsize; cur++) {
|
116
122
|
|
117
123
|
if (*cur == '"') {
|
118
124
|
if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
|
119
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count
|
125
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count);
|
120
126
|
else
|
121
127
|
++quote_count;
|
122
128
|
}
|
@@ -128,50 +134,39 @@ VALUE build_matrix(char *buf, int bufsize) {
|
|
128
134
|
if (*cur == ',') {
|
129
135
|
|
130
136
|
if (quote_count && *(cur-1) != '"')
|
131
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count
|
137
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count);
|
132
138
|
|
133
|
-
finalize_cell(cur_cell,cur,quote_count);
|
134
|
-
cur_cell
|
135
|
-
cur_cell = cur_cell->next_cell;
|
139
|
+
finalize_cell(cur_cell, cur, quote_count);
|
140
|
+
cur_cell = alloc_cell(cur_row, cur_cell);
|
136
141
|
cur_cell->start = cur+1;
|
137
|
-
cur_row->cell_count += 1;
|
138
142
|
quote_count = 0;
|
139
143
|
|
140
|
-
}
|
141
|
-
|
142
|
-
if (*cur == '\n') {
|
144
|
+
} else if (*cur == '\n') {
|
143
145
|
|
144
146
|
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
|
145
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count
|
147
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count);
|
146
148
|
|
147
|
-
finalize_cell(cur_cell,cur,quote_count);
|
148
|
-
cur_row
|
149
|
-
cur_row->next_row = alloc_row();
|
150
|
-
cur_row = cur_row -> next_row;
|
151
|
-
cur_row->first_cell = alloc_cell();
|
149
|
+
finalize_cell(cur_cell, cur, quote_count);
|
150
|
+
cur_row = alloc_row(cur_row);
|
152
151
|
cur_cell = cur_row->first_cell;
|
153
152
|
cur_cell->start = cur+1;
|
154
153
|
quote_count = 0;
|
155
154
|
|
156
155
|
num_rows++;
|
157
156
|
|
158
|
-
}
|
157
|
+
} else if (quote_count && *cur != '\r' && *cur != '"')
|
158
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d", num_rows, cur_row->cell_count);
|
159
159
|
|
160
160
|
}
|
161
161
|
|
162
162
|
}
|
163
163
|
|
164
164
|
if (!quotes_matched) /* Reached EOF without matching quotes */
|
165
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count
|
166
|
-
else if (quote_count && *cur != '"')
|
167
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count
|
168
|
-
|
169
|
-
|
170
|
-
num_rows--;
|
171
|
-
} else { /* No newline before EOF */
|
172
|
-
finalize_cell(cur_cell, cur, quote_count);
|
173
|
-
cur_row->cell_count++;
|
174
|
-
}
|
165
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count);
|
166
|
+
else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
|
167
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count);
|
168
|
+
|
169
|
+
finalize_cell(cur_cell, cur, quote_count);
|
175
170
|
|
176
171
|
matrix = build_matrix_from_pointer_tree(first_row, num_rows);
|
177
172
|
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BAMFCSV
|
2
|
+
class Table
|
3
|
+
include Enumerable
|
4
|
+
def initialize(matrix)
|
5
|
+
@headers = matrix.shift
|
6
|
+
@matrix = matrix
|
7
|
+
@header_map = {}
|
8
|
+
@headers.each_with_index do |h, i|
|
9
|
+
@header_map[h] = i
|
10
|
+
end
|
11
|
+
@row_cache = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def each
|
15
|
+
@matrix.size.times do |idx|
|
16
|
+
yield self[idx]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](idx)
|
21
|
+
idx += @matrix.size if idx < 0
|
22
|
+
return if idx < 0 || idx >= @matrix.size
|
23
|
+
@row_cache[idx] ||= Row.new(@header_map, @matrix[idx])
|
24
|
+
end
|
25
|
+
|
26
|
+
def inspect
|
27
|
+
"#<BAMFCSV::Table>"
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def row_hash(row)
|
32
|
+
Hash[@headers.zip(row)]
|
33
|
+
end
|
34
|
+
|
35
|
+
class Row
|
36
|
+
def initialize(header_map, values)
|
37
|
+
@header_map = header_map
|
38
|
+
@values = values
|
39
|
+
end
|
40
|
+
|
41
|
+
def [](key)
|
42
|
+
@values[@header_map[key]]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/bamfcsv/version.rb
CHANGED
data/lib/bamfcsv.rb
CHANGED
@@ -1,13 +1,20 @@
|
|
1
1
|
require 'bamfcsv/bamfcsv'
|
2
|
+
require 'bamfcsv/table'
|
2
3
|
|
3
4
|
module BAMFCSV
|
4
5
|
|
5
|
-
def self.read(thing_to_read)
|
6
|
-
|
6
|
+
def self.read(thing_to_read, opts={})
|
7
|
+
parse(File.read(thing_to_read), opts)
|
7
8
|
end
|
8
9
|
|
9
|
-
def self.parse(csv_str)
|
10
|
-
|
10
|
+
def self.parse(csv_str, opts={})
|
11
|
+
return [] if csv_str.empty?
|
12
|
+
matrix = __parse_string(csv_str)
|
13
|
+
if opts[:headers]
|
14
|
+
Table.new(matrix)
|
15
|
+
else
|
16
|
+
matrix
|
17
|
+
end
|
11
18
|
end
|
12
19
|
|
13
20
|
end
|
data/spec/lib/bamfcsv_spec.rb
CHANGED
@@ -57,7 +57,7 @@ describe BAMFCSV do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'correctly escaptes ""' do
|
60
|
-
BAMFCSV.parse(
|
60
|
+
BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
|
61
61
|
end
|
62
62
|
|
63
63
|
it "parses unquoted empty cells as nil" do
|
@@ -68,6 +68,12 @@ describe BAMFCSV do
|
|
68
68
|
BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
|
69
69
|
end
|
70
70
|
|
71
|
+
it "parses a single cell not followed by a newline correctly" do
|
72
|
+
BAMFCSV.parse("1").should == [["1"]]
|
73
|
+
BAMFCSV.parse("1\n2").should == [["1"],["2"]]
|
74
|
+
BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
|
75
|
+
end
|
76
|
+
|
71
77
|
describe "default CSV module compatibility" do
|
72
78
|
it "adds a nil cell after a trailing comma with no newline" do
|
73
79
|
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
|
@@ -114,6 +120,41 @@ describe BAMFCSV do
|
|
114
120
|
expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
|
115
121
|
expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
|
116
122
|
end
|
123
|
+
|
124
|
+
it "raises BAMFCSV::MalformedCSVError when unescaped quotes appear in a quoted cell" do
|
125
|
+
expect { BAMFCSV.parse('"a"b"c"') }.should raise_error(BAMFCSV::MalformedCSVError)
|
126
|
+
expect { BAMFCSV.parse('"a"b"c",2') }.should raise_error(BAMFCSV::MalformedCSVError)
|
127
|
+
expect { BAMFCSV.parse(%Q("a"b"c"\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
|
128
|
+
expect { BAMFCSV.parse(%Q("a"b"c"\r\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
|
129
|
+
end
|
130
|
+
|
131
|
+
['', "\n", "\r\n", ",", "\n\n", "\r\n\r\n"].each do |csv|
|
132
|
+
csv_result = CSV.parse(csv)
|
133
|
+
it "BAMFCSV parses #{csv.inspect} the same way as CSV (#{csv_result.inspect})" do
|
134
|
+
BAMFCSV.parse(csv).should == csv_result
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
describe "generating a Table" do
|
141
|
+
describe "with only a header" do
|
142
|
+
let(:header_only) { BAMFCSV.parse("1,2,3", :headers => true) }
|
143
|
+
it "has no body rows" do
|
144
|
+
header_only.first.should be_nil
|
145
|
+
end
|
146
|
+
|
147
|
+
it "does nothing when iterating" do
|
148
|
+
expect { header_only.each { |x| raise "Oh dang!" } }.should_not raise_error
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
describe "with body rows" do
|
153
|
+
it "maps the headers the the values of each row" do
|
154
|
+
table = BAMFCSV.parse("a,b,c\r\n1,2,3\r\nx,y,z", :headers => true)
|
155
|
+
table.first["a"].should == "1"
|
156
|
+
table[1]["c"].should == "z"
|
157
|
+
end
|
117
158
|
end
|
118
159
|
end
|
119
160
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: bamfcsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.1.
|
5
|
+
version: 0.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jon Distad
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2011-04-
|
14
|
+
date: 2011-04-08 00:00:00 -04:00
|
15
15
|
default_executable:
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- ext/bamfcsv/bamfcsv_ext.h
|
70
70
|
- ext/bamfcsv/extconf.rb
|
71
71
|
- lib/bamfcsv.rb
|
72
|
+
- lib/bamfcsv/table.rb
|
72
73
|
- lib/bamfcsv/version.rb
|
73
74
|
- spec/fixtures/bamf-comma-comma.csv
|
74
75
|
- spec/fixtures/double-quotes.csv
|