bamfcsv 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +59 -121
- data/lib/bamfcsv/version.rb +1 -1
- metadata +55 -41
- data/ext/bamfcsv/bamfcsv_ext.h +0 -24
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,124 +1,49 @@
|
|
1
|
+
#include <ruby/ruby.h>
|
1
2
|
#include <stdlib.h>
|
2
|
-
#include
|
3
|
+
#include <stdio.h>
|
3
4
|
|
4
|
-
|
5
|
+
VALUE BAMFCSV_module;
|
6
|
+
VALUE BAMFCSV_MalformedCSVError_class;
|
5
7
|
|
6
|
-
|
8
|
+
VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
|
9
|
+
if (*cell_end == '\r')
|
10
|
+
cell_end--;
|
7
11
|
|
8
|
-
|
9
|
-
|
10
|
-
new_cell -> next_cell = NULL;
|
11
|
-
new_cell -> has_quotes = 0;
|
12
|
-
row->cell_count++;
|
13
|
-
if (prev_cell != NULL) prev_cell->next_cell = new_cell;
|
12
|
+
if (cell_end < cell_start)
|
13
|
+
return Qnil;
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
struct bamfcsv_Row *bamfcsv_alloc_row(struct bamfcsv_Row *prev_row) {
|
20
|
-
|
21
|
-
struct bamfcsv_Row *new_row = malloc(sizeof(struct bamfcsv_Row));
|
22
|
-
|
23
|
-
new_row -> next_row = NULL;
|
24
|
-
new_row -> cell_count = 0;
|
25
|
-
new_row -> first_cell = bamfcsv_alloc_cell(new_row, NULL);
|
26
|
-
if (prev_row != NULL) prev_row->next_row = new_row;
|
27
|
-
|
28
|
-
return new_row;
|
29
|
-
|
30
|
-
}
|
31
|
-
|
32
|
-
void bamfcsv_free_rows(struct bamfcsv_Row *row) {
|
33
|
-
|
34
|
-
struct bamfcsv_Row *cur_row = row;
|
35
|
-
while (cur_row != NULL) {
|
36
|
-
struct bamfcsv_Cell *cur_cell = cur_row->first_cell;
|
37
|
-
while (cur_cell != NULL) {
|
38
|
-
struct bamfcsv_Cell *next_cell = cur_cell->next_cell;
|
39
|
-
free(cur_cell);
|
40
|
-
cur_cell = next_cell;
|
41
|
-
}
|
42
|
-
struct bamfcsv_Row *next_row = cur_row->next_row;
|
43
|
-
free(cur_row);
|
44
|
-
cur_row = next_row;
|
45
|
-
}
|
46
|
-
|
47
|
-
}
|
48
|
-
|
49
|
-
VALUE bamfcsv_build_matrix_from_pointer_tree(struct bamfcsv_Row *first_row, unsigned long num_rows) {
|
50
|
-
VALUE matrix;
|
51
|
-
VALUE row;
|
52
|
-
VALUE new_string;
|
53
|
-
unsigned long i,j;
|
54
|
-
struct bamfcsv_Row *cur_row;
|
55
|
-
struct bamfcsv_Cell *cur_cell;
|
56
|
-
|
57
|
-
cur_row = first_row;
|
58
|
-
matrix = rb_ary_new2(num_rows);
|
59
|
-
|
60
|
-
ID gsub = rb_intern("gsub!");
|
61
|
-
VALUE dquote = rb_str_new2("\"\""), quote = rb_str_new2("\"");
|
62
|
-
|
63
|
-
for (i = 0; i < num_rows; i++) {
|
64
|
-
|
65
|
-
cur_cell = cur_row->first_cell;
|
66
|
-
row = rb_ary_new2(cur_row->cell_count);
|
67
|
-
rb_ary_store(matrix,i,row);
|
68
|
-
if (cur_row->cell_count > 1 || cur_cell->len) {
|
69
|
-
for (j = 0; j < cur_row->cell_count; j++) {
|
70
|
-
if (cur_cell->has_quotes) {
|
71
|
-
new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
|
72
|
-
rb_funcall(new_string, gsub, 2, dquote, quote);
|
73
|
-
} else {
|
74
|
-
if (cur_cell->len)
|
75
|
-
new_string = rb_str_new(cur_cell->start, cur_cell->len);
|
76
|
-
else
|
77
|
-
new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
|
78
|
-
}
|
79
|
-
rb_ary_store(row, j, new_string);
|
80
|
-
cur_cell = cur_cell->next_cell;
|
81
|
-
}
|
82
|
-
}
|
83
|
-
cur_row = cur_row->next_row;
|
15
|
+
if (quote_count) {
|
16
|
+
cell_start++;
|
17
|
+
cell_end--;
|
84
18
|
}
|
85
19
|
|
86
|
-
|
87
|
-
}
|
20
|
+
VALUE cell_str = rb_str_new(cell_start, cell_end-cell_start+1);
|
88
21
|
|
89
|
-
|
90
|
-
if (*(cur-1) == '\r')
|
91
|
-
cell->len = (int)(cur-(cell->start)-1);
|
92
|
-
else
|
93
|
-
cell->len = (int)(cur-(cell->start));
|
94
|
-
|
95
|
-
if (quote_count) cell->has_quotes = 1;
|
22
|
+
return cell_str;
|
96
23
|
}
|
97
24
|
|
98
|
-
VALUE bamfcsv_build_matrix(char *buf,
|
99
|
-
|
100
|
-
int num_rows = 1;
|
25
|
+
VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
26
|
+
unsigned long num_rows = 1, cell_count = 1;
|
101
27
|
int quote_count = 0, quotes_matched = 1;
|
102
28
|
|
103
|
-
|
104
|
-
|
105
|
-
struct bamfcsv_Cell *cur_cell = cur_row->first_cell;
|
106
|
-
cur_cell->start = buf;
|
107
|
-
|
108
|
-
VALUE matrix;
|
29
|
+
VALUE matrix = rb_ary_new();
|
30
|
+
VALUE row = rb_ary_new();
|
109
31
|
|
110
|
-
char *cur;
|
32
|
+
char *cur = buf, *cell_start = buf;
|
111
33
|
|
112
34
|
if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
|
113
35
|
*(buf+bufsize-1) = 0;
|
114
36
|
--bufsize;
|
115
37
|
}
|
38
|
+
|
39
|
+
VALUE dbl_dquote = rb_str_new("\"\"", 2), dquote = rb_str_new("\"", 1);
|
40
|
+
ID gsub_bang = rb_intern("gsub!");
|
116
41
|
|
117
|
-
for (
|
42
|
+
for (; cur < buf+bufsize; cur++) {
|
118
43
|
|
119
44
|
if (*cur == '"') {
|
120
|
-
if (0 == quote_count &&
|
121
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %
|
45
|
+
if (0 == quote_count && cell_start != cur) /* Quotes begin past opening of cell */
|
46
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: Quoted cell must open with '\"'", num_rows, cell_count);
|
122
47
|
else
|
123
48
|
++quote_count;
|
124
49
|
}
|
@@ -130,43 +55,56 @@ VALUE bamfcsv_build_matrix(char *buf, int bufsize) {
|
|
130
55
|
if (*cur == ',') {
|
131
56
|
|
132
57
|
if (quote_count && *(cur-1) != '"')
|
133
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %
|
58
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu.", num_rows, cell_count);
|
59
|
+
|
60
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
61
|
+
if (quote_count)
|
62
|
+
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
63
|
+
|
64
|
+
rb_ary_push(row, cell_str);
|
65
|
+
cell_start = cur+1;
|
134
66
|
|
135
|
-
bamfcsv_finalize_cell(cur_cell, cur, quote_count);
|
136
|
-
cur_cell = bamfcsv_alloc_cell(cur_row, cur_cell);
|
137
|
-
cur_cell->start = cur+1;
|
138
67
|
quote_count = 0;
|
68
|
+
++cell_count;
|
139
69
|
|
140
70
|
} else if (*cur == '\n') {
|
141
71
|
|
142
72
|
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
|
143
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %
|
144
|
-
|
145
|
-
bamfcsv_finalize_cell(
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
73
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOL", num_rows, cell_count);
|
74
|
+
|
75
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
76
|
+
if (quote_count)
|
77
|
+
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
78
|
+
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
79
|
+
if (cell_count > 1 || cell_str != Qnil)
|
80
|
+
rb_ary_push(row, cell_str);
|
81
|
+
rb_ary_push(matrix, row);
|
82
|
+
row = rb_ary_new();
|
83
|
+
cell_start = cur+1;
|
150
84
|
|
151
|
-
|
85
|
+
quote_count = 0;
|
86
|
+
++num_rows;
|
87
|
+
cell_count = 0;
|
152
88
|
|
153
89
|
} else if (quote_count && *cur != '\r' && *cur != '"')
|
154
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %
|
90
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu", num_rows, cell_count);
|
155
91
|
|
156
92
|
}
|
157
93
|
|
158
94
|
}
|
159
95
|
|
160
96
|
if (!quotes_matched) /* Reached EOF without matching quotes */
|
161
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %
|
97
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: File ends without closing '\"'", num_rows, cell_count);
|
162
98
|
else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
|
163
|
-
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %
|
164
|
-
|
165
|
-
bamfcsv_finalize_cell(cur_cell, cur, quote_count);
|
166
|
-
|
167
|
-
matrix = bamfcsv_build_matrix_from_pointer_tree(first_row, num_rows);
|
99
|
+
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOF", num_rows, cell_count);
|
168
100
|
|
169
|
-
|
101
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
102
|
+
if (quote_count)
|
103
|
+
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
104
|
+
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
105
|
+
if (cell_count > 1 || cell_str != Qnil)
|
106
|
+
rb_ary_push(row, cell_str);
|
107
|
+
rb_ary_push(matrix, row);
|
170
108
|
|
171
109
|
return matrix;
|
172
110
|
|
@@ -174,7 +112,7 @@ VALUE bamfcsv_build_matrix(char *buf, int bufsize) {
|
|
174
112
|
|
175
113
|
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
|
176
114
|
|
177
|
-
return bamfcsv_build_matrix(RSTRING_PTR(string),
|
115
|
+
return bamfcsv_build_matrix(RSTRING_PTR(string), NUM2ULONG(rb_str_length(string)));
|
178
116
|
|
179
117
|
}
|
180
118
|
|
data/lib/bamfcsv/version.rb
CHANGED
metadata
CHANGED
@@ -1,60 +1,63 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bamfcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.4
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.2.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Jon Distad
|
9
9
|
- Alex Redington
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
|
13
|
+
|
14
|
+
date: 2011-04-29 00:00:00 -04:00
|
14
15
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
- !ruby/object:Gem::Dependency
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
17
18
|
name: rspec
|
18
|
-
|
19
|
+
prerelease: false
|
20
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
21
|
none: false
|
20
|
-
requirements:
|
22
|
+
requirements:
|
21
23
|
- - ~>
|
22
|
-
- !ruby/object:Gem::Version
|
24
|
+
- !ruby/object:Gem::Version
|
23
25
|
version: 2.5.0
|
24
26
|
type: :development
|
25
|
-
|
26
|
-
|
27
|
-
- !ruby/object:Gem::Dependency
|
27
|
+
version_requirements: *id001
|
28
|
+
- !ruby/object:Gem::Dependency
|
28
29
|
name: fuubar
|
29
|
-
|
30
|
+
prerelease: false
|
31
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
32
|
none: false
|
31
|
-
requirements:
|
33
|
+
requirements:
|
32
34
|
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
35
|
+
- !ruby/object:Gem::Version
|
34
36
|
version: 0.0.2
|
35
37
|
type: :development
|
36
|
-
|
37
|
-
|
38
|
-
- !ruby/object:Gem::Dependency
|
38
|
+
version_requirements: *id002
|
39
|
+
- !ruby/object:Gem::Dependency
|
39
40
|
name: rake-compiler
|
40
|
-
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
43
|
none: false
|
42
|
-
requirements:
|
44
|
+
requirements:
|
43
45
|
- - ~>
|
44
|
-
- !ruby/object:Gem::Version
|
46
|
+
- !ruby/object:Gem::Version
|
45
47
|
version: 0.7.1
|
46
48
|
type: :development
|
47
|
-
|
48
|
-
version_requirements: *23845220
|
49
|
+
version_requirements: *id003
|
49
50
|
description: BAMFCSV parses csv like a BAMF. BAMF!!
|
50
|
-
email:
|
51
|
+
email:
|
51
52
|
- jon@thinkrelevance.com
|
52
53
|
- lovemachine@thinkrelevance.com
|
53
54
|
executables: []
|
54
|
-
|
55
|
+
|
56
|
+
extensions:
|
55
57
|
- ext/bamfcsv/extconf.rb
|
56
58
|
extra_rdoc_files: []
|
57
|
-
|
59
|
+
|
60
|
+
files:
|
58
61
|
- .gitignore
|
59
62
|
- .rspec
|
60
63
|
- Gemfile
|
@@ -63,7 +66,6 @@ files:
|
|
63
66
|
- Rakefile
|
64
67
|
- bamfcsv.gemspec
|
65
68
|
- ext/bamfcsv/bamfcsv_ext.c
|
66
|
-
- ext/bamfcsv/bamfcsv_ext.h
|
67
69
|
- ext/bamfcsv/extconf.rb
|
68
70
|
- lib/bamfcsv.rb
|
69
71
|
- lib/bamfcsv/table.rb
|
@@ -82,27 +84,39 @@ files:
|
|
82
84
|
has_rdoc: true
|
83
85
|
homepage: https://github.com/jondistad/bamfcsv
|
84
86
|
licenses: []
|
87
|
+
|
85
88
|
post_install_message:
|
86
89
|
rdoc_options: []
|
87
|
-
|
90
|
+
|
91
|
+
require_paths:
|
88
92
|
- lib
|
89
93
|
- ext
|
90
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
95
|
none: false
|
92
|
-
requirements:
|
93
|
-
- -
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
version:
|
96
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "0"
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
101
|
none: false
|
98
|
-
requirements:
|
99
|
-
- -
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version:
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
102
106
|
requirements: []
|
107
|
+
|
103
108
|
rubyforge_project: bamfcsv
|
104
|
-
rubygems_version: 1.
|
109
|
+
rubygems_version: 1.5.3
|
105
110
|
signing_key:
|
106
111
|
specification_version: 3
|
107
112
|
summary: BAMF!!! Your csv is parsed.
|
108
|
-
test_files:
|
113
|
+
test_files:
|
114
|
+
- spec/fixtures/bamf-comma-comma.csv
|
115
|
+
- spec/fixtures/double-quotes.csv
|
116
|
+
- spec/fixtures/empty.csv
|
117
|
+
- spec/fixtures/escapes.csv
|
118
|
+
- spec/fixtures/one-column.csv
|
119
|
+
- spec/fixtures/terminated-with-cr.csv
|
120
|
+
- spec/fixtures/test.csv
|
121
|
+
- spec/lib/bamfcsv_spec.rb
|
122
|
+
- spec/spec_helper.rb
|
data/ext/bamfcsv/bamfcsv_ext.h
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#ifndef _BAMFCSV_EXT_H
|
2
|
-
#define _BAMFCSV_EXT_H
|
3
|
-
|
4
|
-
#include <ruby/ruby.h>
|
5
|
-
|
6
|
-
VALUE BAMFCSV_module;
|
7
|
-
VALUE BAMFCSV_MalformedCSVError_class;
|
8
|
-
|
9
|
-
struct bamfcsv_Row {
|
10
|
-
struct bamfcsv_Cell *first_cell;
|
11
|
-
struct bamfcsv_Row *next_row;
|
12
|
-
unsigned long cell_count;
|
13
|
-
};
|
14
|
-
|
15
|
-
struct bamfcsv_Cell {
|
16
|
-
char *start;
|
17
|
-
int len;
|
18
|
-
int has_quotes;
|
19
|
-
struct bamfcsv_Cell *next_cell;
|
20
|
-
};
|
21
|
-
|
22
|
-
void Init_bamfcsv();
|
23
|
-
|
24
|
-
#endif
|