bamfcsv 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/ext/bamfcsv/bamfcsv_ext.c +18 -14
- data/lib/bamfcsv/table.rb +14 -1
- data/lib/bamfcsv/version.rb +1 -1
- data/lib/bamfcsv.rb +6 -1
- data/spec/lib/bamfcsv_spec.rb +72 -1
- metadata +40 -48
data/Gemfile.lock
CHANGED
data/ext/bamfcsv/bamfcsv_ext.c
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
#include <ruby/ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
2
3
|
#include <stdlib.h>
|
3
4
|
#include <stdio.h>
|
5
|
+
#include <stdbool.h>
|
4
6
|
|
5
7
|
VALUE BAMFCSV_module;
|
6
8
|
VALUE BAMFCSV_MalformedCSVError_class;
|
7
9
|
|
8
|
-
VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
|
10
|
+
VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count, rb_encoding *enc) {
|
9
11
|
if (*cell_end == '\r')
|
10
12
|
cell_end--;
|
11
13
|
|
@@ -17,12 +19,20 @@ VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
|
|
17
19
|
cell_end--;
|
18
20
|
}
|
19
21
|
|
20
|
-
VALUE cell_str =
|
22
|
+
VALUE cell_str = rb_enc_str_new(cell_start, cell_end-cell_start+1, enc);
|
21
23
|
|
22
24
|
return cell_str;
|
23
25
|
}
|
24
26
|
|
25
|
-
|
27
|
+
bool quotes_end_line(char* cur) {
|
28
|
+
return *(cur-1) == '"' || (*(cur-1) == '\r' && *(cur-2) == '"');
|
29
|
+
}
|
30
|
+
|
31
|
+
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
|
32
|
+
char *buf = RSTRING_PTR(string);
|
33
|
+
long bufsize = RSTRING_LEN(string);
|
34
|
+
rb_encoding *enc = rb_enc_from_index(ENCODING_GET(string));
|
35
|
+
|
26
36
|
unsigned long num_rows = 1, cell_count = 1;
|
27
37
|
int quote_count = 0, quotes_matched = 1;
|
28
38
|
|
@@ -57,7 +67,7 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
57
67
|
if (quote_count && *(cur-1) != '"')
|
58
68
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu.", num_rows, cell_count);
|
59
69
|
|
60
|
-
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
70
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
|
61
71
|
if (quote_count)
|
62
72
|
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
63
73
|
|
@@ -69,10 +79,10 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
69
79
|
|
70
80
|
} else if (*cur == '\n') {
|
71
81
|
|
72
|
-
if (quote_count && !(
|
82
|
+
if (quote_count && !quotes_end_line(cur))
|
73
83
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOL", num_rows, cell_count);
|
74
84
|
|
75
|
-
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
85
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
|
76
86
|
if (quote_count)
|
77
87
|
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
78
88
|
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
@@ -95,10 +105,10 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
95
105
|
|
96
106
|
if (!quotes_matched) /* Reached EOF without matching quotes */
|
97
107
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: File ends without closing '\"'", num_rows, cell_count);
|
98
|
-
else if (quote_count &&
|
108
|
+
else if (quote_count && !quotes_end_line(cur)) /* Quotes closed before end of final cell */
|
99
109
|
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOF", num_rows, cell_count);
|
100
110
|
|
101
|
-
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
|
111
|
+
VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
|
102
112
|
if (quote_count)
|
103
113
|
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
|
104
114
|
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
|
@@ -110,12 +120,6 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
|
|
110
120
|
|
111
121
|
}
|
112
122
|
|
113
|
-
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
|
114
|
-
|
115
|
-
return bamfcsv_build_matrix(RSTRING_PTR(string), NUM2ULONG(rb_str_length(string)));
|
116
|
-
|
117
|
-
}
|
118
|
-
|
119
123
|
void Init_bamfcsv() {
|
120
124
|
|
121
125
|
BAMFCSV_module = rb_define_module("BAMFCSV");
|
data/lib/bamfcsv/table.rb
CHANGED
@@ -26,8 +26,12 @@ module BAMFCSV
|
|
26
26
|
@row_cache[idx] ||= Row.new(@header_map, @matrix[idx])
|
27
27
|
end
|
28
28
|
|
29
|
+
def empty?
|
30
|
+
@matrix.empty?
|
31
|
+
end
|
32
|
+
|
29
33
|
def inspect
|
30
|
-
"
|
34
|
+
"[#{self.map{|r| r.inspect}.join(", ")}]"
|
31
35
|
end
|
32
36
|
|
33
37
|
private
|
@@ -50,6 +54,15 @@ module BAMFCSV
|
|
50
54
|
def [](key)
|
51
55
|
@fields[@header_map[key]]
|
52
56
|
end
|
57
|
+
|
58
|
+
def inspect
|
59
|
+
pairs = []
|
60
|
+
headers.each do |h|
|
61
|
+
pairs << "#{h.inspect} => #{self[h].inspect}"
|
62
|
+
end
|
63
|
+
"{#{pairs.join(", ")}}"
|
64
|
+
end
|
65
|
+
|
53
66
|
end
|
54
67
|
end
|
55
68
|
end
|
data/lib/bamfcsv/version.rb
CHANGED
data/lib/bamfcsv.rb
CHANGED
@@ -9,7 +9,12 @@ module BAMFCSV
|
|
9
9
|
|
10
10
|
def self.parse(csv_str, opts={})
|
11
11
|
return [] if csv_str.empty?
|
12
|
-
|
12
|
+
# We need to do this because the C extension currently overwrites
|
13
|
+
# the input, and all of String#clone, String#dup, and String.new
|
14
|
+
# copy the pointer, not the contents. So we make a copy, parse
|
15
|
+
# that, and throw away the copy.
|
16
|
+
copy = "" + csv_str
|
17
|
+
matrix = __parse_string(copy)
|
13
18
|
if opts[:headers]
|
14
19
|
Table.new(matrix)
|
15
20
|
else
|
data/spec/lib/bamfcsv_spec.rb
CHANGED
@@ -56,7 +56,15 @@ describe BAMFCSV do
|
|
56
56
|
BAMFCSV.parse("1,2").should == [["1","2"]]
|
57
57
|
end
|
58
58
|
|
59
|
-
it
|
59
|
+
it "correctly parses the last cell when followed by CRLF" do
|
60
|
+
BAMFCSV.parse("1,2\r\n").should == [["1","2"]]
|
61
|
+
end
|
62
|
+
|
63
|
+
it "correctly parses the last cell when quotes and followed by CRLF" do
|
64
|
+
BAMFCSV.parse("1,\"2\"\r\n").should == [["1","2"]]
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'correctly escapes ""' do
|
60
68
|
BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
|
61
69
|
end
|
62
70
|
|
@@ -74,6 +82,19 @@ describe BAMFCSV do
|
|
74
82
|
BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
|
75
83
|
end
|
76
84
|
|
85
|
+
it "parses data outside the 7-bit range" do
|
86
|
+
BAMFCSV.parse("age \u226540 years").should == [["age \u226540 years"]]
|
87
|
+
end
|
88
|
+
|
89
|
+
it "doesn't alter the input" do
|
90
|
+
original = %Q{this,that,"the ""other"" thing"\r\n1,2,3\n}
|
91
|
+
# String#dup, String#clone, and String.new copy the pointer but
|
92
|
+
# share the same underlying buffer, d'oh!
|
93
|
+
input = "" + original
|
94
|
+
BAMFCSV.parse(input)
|
95
|
+
input.should == original
|
96
|
+
end
|
97
|
+
|
77
98
|
describe "default CSV module compatibility" do
|
78
99
|
it "adds a nil cell after a trailing comma with no newline" do
|
79
100
|
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
|
@@ -179,5 +200,55 @@ describe BAMFCSV do
|
|
179
200
|
expect { table.each }.should_not raise_error(LocalJumpError)
|
180
201
|
end
|
181
202
|
end
|
203
|
+
|
204
|
+
describe "Table#inspect" do
|
205
|
+
it "is an Array of Hashes" do
|
206
|
+
csv = <<CSV
|
207
|
+
foo,bar
|
208
|
+
1,2
|
209
|
+
baz,quux
|
210
|
+
CSV
|
211
|
+
|
212
|
+
inspected = '[{"foo" => "1", "bar" => "2"}, {"foo" => "baz", "bar" => "quux"}]'
|
213
|
+
|
214
|
+
BAMFCSV.parse(csv, :headers => true).inspect.should == inspected
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe "Table#empty?" do
|
219
|
+
it "returns true for an empty table with headers" do
|
220
|
+
table = BAMFCSV.parse("column1,column2\n", :headers => true)
|
221
|
+
table.empty?.should be_true
|
222
|
+
end
|
223
|
+
|
224
|
+
it "returns false for a non-empty table with headers" do
|
225
|
+
table = BAMFCSV.parse("column1,column2\nfoo,bar", :headers => true)
|
226
|
+
table.empty?.should be_false
|
227
|
+
end
|
228
|
+
|
229
|
+
it "returns true for an empty table without headers" do
|
230
|
+
table = BAMFCSV.parse("", :headers => false)
|
231
|
+
table.empty?.should be_true
|
232
|
+
end
|
233
|
+
|
234
|
+
it "returns false for a non-empty table without headers" do
|
235
|
+
table = BAMFCSV.parse("foo,bar", :headers => false)
|
236
|
+
table.empty?.should be_false
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe "Table::Row#inspect" do
|
241
|
+
it "is a Hash" do
|
242
|
+
csv = <<CSV
|
243
|
+
foo,bar
|
244
|
+
1,2
|
245
|
+
baz,quux
|
246
|
+
CSV
|
247
|
+
|
248
|
+
inspected = '{"foo" => "1", "bar" => "2"}'
|
249
|
+
|
250
|
+
BAMFCSV.parse(csv, :headers => true).first.inspect.should == inspected
|
251
|
+
end
|
252
|
+
end
|
182
253
|
end
|
183
254
|
end
|
metadata
CHANGED
@@ -1,63 +1,59 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bamfcsv
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.1
|
4
5
|
prerelease:
|
5
|
-
version: 0.2.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Jon Distad
|
9
9
|
- Alex Redington
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
dependencies:
|
17
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2011-08-10 00:00:00.000000000Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
18
16
|
name: rspec
|
19
|
-
|
20
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &2157181100 !ruby/object:Gem::Requirement
|
21
18
|
none: false
|
22
|
-
requirements:
|
19
|
+
requirements:
|
23
20
|
- - ~>
|
24
|
-
- !ruby/object:Gem::Version
|
21
|
+
- !ruby/object:Gem::Version
|
25
22
|
version: 2.5.0
|
26
23
|
type: :development
|
27
|
-
version_requirements: *id001
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: fuubar
|
30
24
|
prerelease: false
|
31
|
-
|
25
|
+
version_requirements: *2157181100
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: fuubar
|
28
|
+
requirement: &2157180600 !ruby/object:Gem::Requirement
|
32
29
|
none: false
|
33
|
-
requirements:
|
30
|
+
requirements:
|
34
31
|
- - ~>
|
35
|
-
- !ruby/object:Gem::Version
|
32
|
+
- !ruby/object:Gem::Version
|
36
33
|
version: 0.0.2
|
37
34
|
type: :development
|
38
|
-
version_requirements: *id002
|
39
|
-
- !ruby/object:Gem::Dependency
|
40
|
-
name: rake-compiler
|
41
35
|
prerelease: false
|
42
|
-
|
36
|
+
version_requirements: *2157180600
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rake-compiler
|
39
|
+
requirement: &2157180140 !ruby/object:Gem::Requirement
|
43
40
|
none: false
|
44
|
-
requirements:
|
41
|
+
requirements:
|
45
42
|
- - ~>
|
46
|
-
- !ruby/object:Gem::Version
|
43
|
+
- !ruby/object:Gem::Version
|
47
44
|
version: 0.7.1
|
48
45
|
type: :development
|
49
|
-
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *2157180140
|
50
48
|
description: BAMFCSV parses csv like a BAMF. BAMF!!
|
51
|
-
email:
|
49
|
+
email:
|
52
50
|
- jon@thinkrelevance.com
|
53
51
|
- lovemachine@thinkrelevance.com
|
54
52
|
executables: []
|
55
|
-
|
56
|
-
extensions:
|
53
|
+
extensions:
|
57
54
|
- ext/bamfcsv/extconf.rb
|
58
55
|
extra_rdoc_files: []
|
59
|
-
|
60
|
-
files:
|
56
|
+
files:
|
61
57
|
- .gitignore
|
62
58
|
- .rspec
|
63
59
|
- Gemfile
|
@@ -81,36 +77,32 @@ files:
|
|
81
77
|
- spec/spec_helper.rb
|
82
78
|
- tasks/compile.rake
|
83
79
|
- tasks/rspec.rake
|
84
|
-
has_rdoc: true
|
85
80
|
homepage: https://github.com/jondistad/bamfcsv
|
86
81
|
licenses: []
|
87
|
-
|
88
82
|
post_install_message:
|
89
83
|
rdoc_options: []
|
90
|
-
|
91
|
-
require_paths:
|
84
|
+
require_paths:
|
92
85
|
- lib
|
93
86
|
- ext
|
94
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
88
|
none: false
|
96
|
-
requirements:
|
97
|
-
- -
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
version:
|
100
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
94
|
none: false
|
102
|
-
requirements:
|
103
|
-
- -
|
104
|
-
- !ruby/object:Gem::Version
|
105
|
-
version:
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
106
99
|
requirements: []
|
107
|
-
|
108
100
|
rubyforge_project: bamfcsv
|
109
|
-
rubygems_version: 1.
|
101
|
+
rubygems_version: 1.8.6
|
110
102
|
signing_key:
|
111
103
|
specification_version: 3
|
112
104
|
summary: BAMF!!! Your csv is parsed.
|
113
|
-
test_files:
|
105
|
+
test_files:
|
114
106
|
- spec/fixtures/bamf-comma-comma.csv
|
115
107
|
- spec/fixtures/double-quotes.csv
|
116
108
|
- spec/fixtures/empty.csv
|