bamfcsv 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +59 -121
- data/lib/bamfcsv/version.rb +1 -1
- metadata +55 -41
- data/ext/bamfcsv/bamfcsv_ext.h +0 -24
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,124 +1,49 @@
|
|
1
|
+
#include <ruby/ruby.h>
|
1
2
|
#include <stdlib.h>
|
2
|
-
#include
|
3
|
+
#include <stdio.h>
|
3
4
|
|
4
|
-
|
5
|
+
VALUE BAMFCSV_module;
|
6
|
+
VALUE BAMFCSV_MalformedCSVError_class;
|
5
7
|
|
6
|
-
|
8
|
+
VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
|
9
|
+
if (*cell_end == '\r')
|
10
|
+
cell_end--;
|
7
11
|
|
8
|
-
|
9
|
-
|
10
|
-
new_cell -> next_cell = NULL;
|
11
|
-
new_cell -> has_quotes = 0;
|
12
|
-
row->cell_count++;
|
13
|
-
if (prev_cell != NULL) prev_cell->next_cell = new_cell;
|
12
|
+
if (cell_end < cell_start)
|
13
|
+
return Qnil;
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
struct bamfcsv_Row *bamfcsv_alloc_row(struct bamfcsv_Row *prev_row) {
|
20
|
-
|
21
|
-
struct bamfcsv_Row *new_row = malloc(sizeof(struct bamfcsv_Row));
|
22
|
-
|
23
|
-
new_row -> next_row = NULL;
|
24
|
-
new_row -> cell_count = 0;
|
25
|
-
new_row -> first_cell = bamfcsv_alloc_cell(new_row, NULL);
|
26
|
-
if (prev_row != NULL) prev_row->next_row = new_row;
|
27
|
-
|
28
|
-
return new_row;
|
29
|
-
|
30
|
-
}
|
31
|
-
|
32
|
-
void bamfcsv_free_rows(struct bamfcsv_Row *row) {
|
33
|
-
|
34
|
-
struct bamfcsv_Row *cur_row = row;
|
35
|
-
while (cur_row != NULL) {
|
36
|
-
struct bamfcsv_Cell *cur_cell = cur_row->first_cell;
|
37
|
-
while (cur_cell != NULL) {
|
38
|
-
struct bamfcsv_Cell *next_cell = cur_cell->next_cell;
|
39
|
-
free(cur_cell);
|
40
|
-
cur_cell = next_cell;
|
41
|
-
}
|
42
|
-
struct bamfcsv_Row *next_row = cur_row->next_row;
|
43
|
-
free(cur_row);
|
44
|
-
cur_row = next_row;
|
45
|
-
}
|
46
|
-
|
47
|
-
}
|
48
|
-
|
49
|
-
VALUE bamfcsv_build_matrix_from_pointer_tree(struct bamfcsv_Row *first_row, unsigned long num_rows) {
|
50
|
-
VALUE matrix;
|
51
|
-
VALUE row;
|
52
|
-
VALUE new_string;
|
53
|
-
unsigned long i,j;
|
54
|
-
struct bamfcsv_Row *cur_row;
|
55
|
-
struct bamfcsv_Cell *cur_cell;
|
56
|
-
|
57
|
-
cur_row = first_row;
|
58
|
-
matrix = rb_ary_new2(num_rows);
|
59
|
-
|
60
|
-
ID gsub = rb_intern("gsub!");
|
61
|
-
VALUE dquote = rb_str_new2("\"\""), quote = rb_str_new2("\"");
|
62
|
-
|
63
|
-
for (i = 0; i < num_rows; i++) {
|
64
|
-
|
65
|
-
cur_cell = cur_row->first_cell;
|
66
|
-
row = rb_ary_new2(cur_row->cell_count);
|
67
|
-
rb_ary_store(matrix,i,row);
|
68
|
-
if (cur_row->cell_count > 1 || cur_cell->len) {
|
69
|
-
for (j = 0; j < cur_row->cell_count; j++) {
|
70
|
-
if (cur_cell->has_quotes) {
|
71
|
-
new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
|
72
|
-
rb_funcall(new_string, gsub, 2, dquote, quote);
|
73
|
-
} else {
|
74
|
-
if (cur_cell->len)
|
75
|
-
new_string = rb_str_new(cur_cell->start, cur_cell->len);
|
76
|
-
else
|
77
|
-
new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
|
78
|
-
}
|
79
|
-
rb_ary_store(row, j, new_string);
|
80
|
-
cur_cell = cur_cell->next_cell;
|
81
|
-
}
|
82
|
-
}
|
83
|
-
cur_row = cur_row->next_row;
|
15
|
+
if (quote_count) {
|
16
|
+
cell_start++;
|
17
|
+
cell_end--;
|
84
18
|
}
|
85
19
|
|
86
|
-
|
87
|
-
}
|
20
|
+
VALUE cell_str = rb_str_new(cell_start, cell_end-cell_start+1);
|
88
21
|
|
89
|
-
|
90
|
-
if (*(cur-1) == '\r')
|
91
|
-
cell->len = (int)(cur-(cell->start)-1);
|
92
|
-
else
|
93
|
-
cell->len = (int)(cur-(cell->start));
|
94
|
-
|
95
|
-
if (quote_count) cell->has_quotes = 1;
|
22
|
+
return cell_str;
|
96
23
|
}
|
97
24
|
|
98
|
-
VALUE bamfcsv_build_matrix(char *buf,
|
99
|
-
|
100
|
-
int num_rows = 1;
|
25
|
+
VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
26
|
+
unsigned long num_rows = 1, cell_count = 1;
|
101
27
|
int quote_count = 0, quotes_matched = 1;
|
102
28
|
|
103
|
-
|
104
|
-
|
105
|
-
struct bamfcsv_Cell *cur_cell = cur_row->first_cell;
|
106
|
-
cur_cell->start = buf;
|
107
|
-
|
108
|
-
VALUE matrix;
|
29
|
+
VALUE matrix = rb_ary_new();
|
30
|
+
VALUE row = rb_ary_new();
|
109
31
|
|
110
|
-
char *cur;
|
32
|
+
char *cur = buf, *cell_start = buf;
|
111
33
|
|
112
34
|
if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
|
113
35
|
*(buf+bufsize-1) = 0;
|
114
36
|
--bufsize;
|
115
37
|
}
|
38
|
+
|
39
|
+
VALUE dbl_dquote = rb_str_new("\"\"", 2), dquote = rb_str_new("\"", 1);
|
40
|
+
ID gsub_bang = rb_intern("gsub!");
|
116
41
|
|
117
|
-
for (
|
42
|
+
for (; cur < buf+bufsize; cur++) {
|
118
43
|
|
119
44
|
if (*cur == '"') {
|
120
|
-
if (0 == quote_count &&
|
121
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %
|
45
|
+
if (0 == quote_count && cell_start != cur) /* Quotes begin past opening of cell */
|
46
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: Quoted cell must open with '\"'", num_rows, cell_count);
|
122
47
|
else
|
123
48
|
++quote_count;
|
124
49
|
}
|
@@ -130,43 +55,56 @@ VALUE bamfcsv_build_matrix(char *buf, int bufsize) {
|
|
130
55
|
if (*cur == ',') {
|
131
56
|
|
132
57
|
if (quote_count && *(cur-1) != '"')
|
133
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %
|
58
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu.", num_rows, cell_count);
|
59
|
+
|
60
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
61
|
+
if (quote_count)
|
62
|
+
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
63
|
+
|
64
|
+
rb_ary_push(row, cell_str);
|
65
|
+
cell_start = cur+1;
|
134
66
|
|
135
|
-
bamfcsv_finalize_cell(cur_cell, cur, quote_count);
|
136
|
-
cur_cell = bamfcsv_alloc_cell(cur_row, cur_cell);
|
137
|
-
cur_cell->start = cur+1;
|
138
67
|
quote_count = 0;
|
68
|
+
++cell_count;
|
139
69
|
|
140
70
|
} else if (*cur == '\n') {
|
141
71
|
|
142
72
|
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
|
143
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %
|
144
|
-
|
145
|
-
bamfcsv_finalize_cell(
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
73
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOL", num_rows, cell_count);
|
74
|
+
|
75
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
76
|
+
if (quote_count)
|
77
|
+
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
78
|
+
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
79
|
+
if (cell_count > 1 || cell_str != Qnil)
|
80
|
+
rb_ary_push(row, cell_str);
|
81
|
+
rb_ary_push(matrix, row);
|
82
|
+
row = rb_ary_new();
|
83
|
+
cell_start = cur+1;
|
150
84
|
|
151
|
-
|
85
|
+
quote_count = 0;
|
86
|
+
++num_rows;
|
87
|
+
cell_count = 0;
|
152
88
|
|
153
89
|
} else if (quote_count && *cur != '\r' && *cur != '"')
|
154
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %
|
90
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu", num_rows, cell_count);
|
155
91
|
|
156
92
|
}
|
157
93
|
|
158
94
|
}
|
159
95
|
|
160
96
|
if (!quotes_matched) /* Reached EOF without matching quotes */
|
161
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %
|
97
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: File ends without closing '\"'", num_rows, cell_count);
|
162
98
|
else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
|
163
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %
|
164
|
-
|
165
|
-
bamfcsv_finalize_cell(cur_cell, cur, quote_count);
|
166
|
-
|
167
|
-
matrix = bamfcsv_build_matrix_from_pointer_tree(first_row, num_rows);
|
99
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOF", num_rows, cell_count);
|
168
100
|
|
169
|
-
|
101
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
102
|
+
if (quote_count)
|
103
|
+
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
104
|
+
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
105
|
+
if (cell_count > 1 || cell_str != Qnil)
|
106
|
+
rb_ary_push(row, cell_str);
|
107
|
+
rb_ary_push(matrix, row);
|
170
108
|
|
171
109
|
return matrix;
|
172
110
|
|
@@ -174,7 +112,7 @@ VALUE bamfcsv_build_matrix(char *buf, int bufsize) {
|
|
174
112
|
|
175
113
|
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
|
176
114
|
|
177
|
-
return bamfcsv_build_matrix(RSTRING_PTR(string),
|
115
|
+
return bamfcsv_build_matrix(RSTRING_PTR(string), NUM2ULONG(rb_str_length(string)));
|
178
116
|
|
179
117
|
}
|
180
118
|
|
data/lib/bamfcsv/version.rb
CHANGED
metadata
CHANGED
@@ -1,60 +1,63 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bamfcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.4
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.2.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Jon Distad
|
9
9
|
- Alex Redington
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
|
13
|
+
|
14
|
+
date: 2011-04-29 00:00:00 -04:00
|
14
15
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
- !ruby/object:Gem::Dependency
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
17
18
|
name: rspec
|
18
|
-
|
19
|
+
prerelease: false
|
20
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
21
|
none: false
|
20
|
-
requirements:
|
22
|
+
requirements:
|
21
23
|
- - ~>
|
22
|
-
- !ruby/object:Gem::Version
|
24
|
+
- !ruby/object:Gem::Version
|
23
25
|
version: 2.5.0
|
24
26
|
type: :development
|
25
|
-
|
26
|
-
|
27
|
-
- !ruby/object:Gem::Dependency
|
27
|
+
version_requirements: *id001
|
28
|
+
- !ruby/object:Gem::Dependency
|
28
29
|
name: fuubar
|
29
|
-
|
30
|
+
prerelease: false
|
31
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
32
|
none: false
|
31
|
-
requirements:
|
33
|
+
requirements:
|
32
34
|
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
35
|
+
- !ruby/object:Gem::Version
|
34
36
|
version: 0.0.2
|
35
37
|
type: :development
|
36
|
-
|
37
|
-
|
38
|
-
- !ruby/object:Gem::Dependency
|
38
|
+
version_requirements: *id002
|
39
|
+
- !ruby/object:Gem::Dependency
|
39
40
|
name: rake-compiler
|
40
|
-
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
43
|
none: false
|
42
|
-
requirements:
|
44
|
+
requirements:
|
43
45
|
- - ~>
|
44
|
-
- !ruby/object:Gem::Version
|
46
|
+
- !ruby/object:Gem::Version
|
45
47
|
version: 0.7.1
|
46
48
|
type: :development
|
47
|
-
|
48
|
-
version_requirements: *23845220
|
49
|
+
version_requirements: *id003
|
49
50
|
description: BAMFCSV parses csv like a BAMF. BAMF!!
|
50
|
-
email:
|
51
|
+
email:
|
51
52
|
- jon@thinkrelevance.com
|
52
53
|
- lovemachine@thinkrelevance.com
|
53
54
|
executables: []
|
54
|
-
|
55
|
+
|
56
|
+
extensions:
|
55
57
|
- ext/bamfcsv/extconf.rb
|
56
58
|
extra_rdoc_files: []
|
57
|
-
|
59
|
+
|
60
|
+
files:
|
58
61
|
- .gitignore
|
59
62
|
- .rspec
|
60
63
|
- Gemfile
|
@@ -63,7 +66,6 @@ files:
|
|
63
66
|
- Rakefile
|
64
67
|
- bamfcsv.gemspec
|
65
68
|
- ext/bamfcsv/bamfcsv_ext.c
|
66
|
-
- ext/bamfcsv/bamfcsv_ext.h
|
67
69
|
- ext/bamfcsv/extconf.rb
|
68
70
|
- lib/bamfcsv.rb
|
69
71
|
- lib/bamfcsv/table.rb
|
@@ -82,27 +84,39 @@ files:
|
|
82
84
|
has_rdoc: true
|
83
85
|
homepage: https://github.com/jondistad/bamfcsv
|
84
86
|
licenses: []
|
87
|
+
|
85
88
|
post_install_message:
|
86
89
|
rdoc_options: []
|
87
|
-
|
90
|
+
|
91
|
+
require_paths:
|
88
92
|
- lib
|
89
93
|
- ext
|
90
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
95
|
none: false
|
92
|
-
requirements:
|
93
|
-
- -
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
version:
|
96
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "0"
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
101
|
none: false
|
98
|
-
requirements:
|
99
|
-
- -
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version:
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
102
106
|
requirements: []
|
107
|
+
|
103
108
|
rubyforge_project: bamfcsv
|
104
|
-
rubygems_version: 1.
|
109
|
+
rubygems_version: 1.5.3
|
105
110
|
signing_key:
|
106
111
|
specification_version: 3
|
107
112
|
summary: BAMF!!! Your csv is parsed.
|
108
|
-
test_files:
|
113
|
+
test_files:
|
114
|
+
- spec/fixtures/bamf-comma-comma.csv
|
115
|
+
- spec/fixtures/double-quotes.csv
|
116
|
+
- spec/fixtures/empty.csv
|
117
|
+
- spec/fixtures/escapes.csv
|
118
|
+
- spec/fixtures/one-column.csv
|
119
|
+
- spec/fixtures/terminated-with-cr.csv
|
120
|
+
- spec/fixtures/test.csv
|
121
|
+
- spec/lib/bamfcsv_spec.rb
|
122
|
+
- spec/spec_helper.rb
|
data/ext/bamfcsv/bamfcsv_ext.h
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#ifndef _BAMFCSV_EXT_H
|
2
|
-
#define _BAMFCSV_EXT_H
|
3
|
-
|
4
|
-
#include <ruby/ruby.h>
|
5
|
-
|
6
|
-
VALUE BAMFCSV_module;
|
7
|
-
VALUE BAMFCSV_MalformedCSVError_class;
|
8
|
-
|
9
|
-
struct bamfcsv_Row {
|
10
|
-
struct bamfcsv_Cell *first_cell;
|
11
|
-
struct bamfcsv_Row *next_row;
|
12
|
-
unsigned long cell_count;
|
13
|
-
};
|
14
|
-
|
15
|
-
struct bamfcsv_Cell {
|
16
|
-
char *start;
|
17
|
-
int len;
|
18
|
-
int has_quotes;
|
19
|
-
struct bamfcsv_Cell *next_cell;
|
20
|
-
};
|
21
|
-
|
22
|
-
void Init_bamfcsv();
|
23
|
-
|
24
|
-
#endif
|