bamfcsv 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bamfcsv (0.0.1)
4
+ bamfcsv (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -1,7 +1,4 @@
1
1
  #include <stdlib.h>
2
- #include <ruby/ruby.h>
3
- #include <fcntl.h>
4
- #include <sys/mman.h>
5
2
  #include "bamfcsv_ext.h"
6
3
 
7
4
  struct s_Row *alloc_row() {
@@ -71,10 +68,14 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
71
68
  rb_ary_store(matrix,i,row);
72
69
  for (j = 0; j < cur_row->cell_count; j++) {
73
70
  if (*(cur_cell->start) == '"'
74
- && *((cur_cell->start)+((cur_cell->len-1)*sizeof(char))) == '"')
75
- new_string = rb_str_new(cur_cell->start+sizeof(char), cur_cell->len-(sizeof(char)*2));
76
- else
77
- new_string = rb_str_new(cur_cell->start, cur_cell->len);
71
+ && *((cur_cell->start)+(cur_cell->len-1)) == '"')
72
+ new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
73
+ else {
74
+ if (cur_cell->len)
75
+ new_string = rb_str_new(cur_cell->start, cur_cell->len);
76
+ else
77
+ new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
78
+ }
78
79
  if (cur_cell->has_quotes) {
79
80
  rb_funcall(new_string, gsub, 2, dquote, quote);
80
81
  }
@@ -87,17 +88,19 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
87
88
  return matrix;
88
89
  }
89
90
 
90
- void finalize_cell(struct s_Cell *cell, char *cur) {
91
- if (*(cur-sizeof(char)) == '\r')
92
- cell->len = cur-(cell->start)-sizeof(char);
91
+ void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
92
+ if (*(cur-1) == '\r')
93
+ cell->len = cur-(cell->start)-1;
93
94
  else
94
95
  cell->len = cur-(cell->start);
96
+
97
+ if (quote_count) cell->has_quotes = 1;
95
98
  }
96
99
 
97
100
  VALUE build_matrix(char *buf, int bufsize) {
98
101
  int str_start = 0;
99
102
  int num_rows = 1;
100
- int in_quote = 0;
103
+ int quote_count = 0, quotes_matched = 1;
101
104
 
102
105
  struct s_Row *first_row = alloc_row();
103
106
  struct s_Row *cur_row = first_row;
@@ -112,33 +115,43 @@ VALUE build_matrix(char *buf, int bufsize) {
112
115
  for (cur = buf; cur < buf+bufsize; cur++) {
113
116
 
114
117
  if (*cur == '"') {
115
- if (in_quote)
116
- if (*(cur+1) != ',')
117
- cur_cell->has_quotes = 1;
118
- in_quote = !in_quote;
118
+ if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
119
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count+1);
120
+ else
121
+ ++quote_count;
119
122
  }
120
123
 
121
- if (!in_quote) {
124
+ quotes_matched = !(quote_count & 1); /* count is even */
125
+
126
+ if (quotes_matched) {
122
127
 
123
128
  if (*cur == ',') {
124
129
 
125
- finalize_cell(cur_cell,cur);
130
+ if (quote_count && *(cur-1) != '"')
131
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count+1);
132
+
133
+ finalize_cell(cur_cell,cur,quote_count);
126
134
  cur_cell->next_cell = alloc_cell();
127
135
  cur_cell = cur_cell->next_cell;
128
- cur_cell->start = cur+sizeof(char);
136
+ cur_cell->start = cur+1;
129
137
  cur_row->cell_count += 1;
138
+ quote_count = 0;
130
139
 
131
140
  }
132
141
 
133
142
  if (*cur == '\n') {
134
143
 
135
- finalize_cell(cur_cell,cur);
144
+ if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
145
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count+1);
146
+
147
+ finalize_cell(cur_cell,cur,quote_count);
136
148
  cur_row->cell_count += 1;
137
149
  cur_row->next_row = alloc_row();
138
150
  cur_row = cur_row -> next_row;
139
151
  cur_row->first_cell = alloc_cell();
140
152
  cur_cell = cur_row->first_cell;
141
- cur_cell->start = cur+sizeof(char);
153
+ cur_cell->start = cur+1;
154
+ quote_count = 0;
142
155
 
143
156
  num_rows++;
144
157
 
@@ -148,8 +161,16 @@ VALUE build_matrix(char *buf, int bufsize) {
148
161
 
149
162
  }
150
163
 
151
- if (cur_row->cell_count == 0) {
164
+ if (!quotes_matched) /* Reached EOF without matching quotes */
165
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count+1);
166
+ else if (quote_count && *cur != '"')
167
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count+1);
168
+
169
+ if (cur_row->cell_count == 0) { /* Ended with newline */
152
170
  num_rows--;
171
+ } else { /* No newline before EOF */
172
+ finalize_cell(cur_cell, cur, quote_count);
173
+ cur_row->cell_count++;
153
174
  }
154
175
 
155
176
  matrix = build_matrix_from_pointer_tree(first_row, num_rows);
@@ -160,32 +181,17 @@ VALUE build_matrix(char *buf, int bufsize) {
160
181
 
161
182
  }
162
183
 
163
- VALUE mm_parse(const char *file) {
164
-
165
- char *mmapped_csv;
166
- int filesize, csv;
167
-
168
- csv = open(file, O_RDONLY);
169
- filesize = lseek(csv, 0, SEEK_END);
170
- mmapped_csv = (char*) mmap(0, filesize, PROT_READ, MAP_SHARED, csv, 0);
171
-
172
- VALUE matrix = build_matrix(mmapped_csv,filesize);
173
-
174
- munmap(mmapped_csv, filesize);
175
- close(csv);
176
-
177
- return matrix;
178
- }
179
-
180
- VALUE read_path(VALUE self, VALUE file) {
184
+ VALUE parse_string(VALUE self, VALUE string) {
181
185
 
182
- return mm_parse(RSTRING_PTR(file));
186
+ return build_matrix(RSTRING_PTR(string), NUM2INT(rb_str_length(string)));
183
187
 
184
188
  }
185
189
 
186
190
  void Init_bamfcsv() {
187
191
 
188
- VALUE module = rb_define_module("BAMFCSV");
189
- rb_define_module_function(module, "read_path", read_path, 1);
192
+ BAMFCSV_module = rb_define_module("BAMFCSV");
193
+ VALUE bamfcsv_singleton_class = rb_singleton_class(BAMFCSV_module);
194
+ rb_define_private_method(bamfcsv_singleton_class, "__parse_string", parse_string, 1);
190
195
 
196
+ BAMFCSV_MalformedCSVError_class = rb_define_class_under(BAMFCSV_module, "MalformedCSVError", rb_eRuntimeError);
191
197
  }
@@ -1,6 +1,11 @@
1
1
  #ifndef _BAMFCSV_EXT_H
2
2
  #define _BAMFCSV_EXT_H
3
3
 
4
+ #include <ruby/ruby.h>
5
+
6
+ VALUE BAMFCSV_module;
7
+ VALUE BAMFCSV_MalformedCSVError_class;
8
+
4
9
  struct s_Row {
5
10
  struct s_Cell *first_cell;
6
11
  struct s_Row *next_row;
@@ -1,3 +1,3 @@
1
1
  module BAMFCSV
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/bamfcsv.rb CHANGED
@@ -3,11 +3,11 @@ require 'bamfcsv/bamfcsv'
3
3
  module BAMFCSV
4
4
 
5
5
  def self.read(thing_to_read)
6
- if String === thing_to_read
7
- raise Errno::ENOENT.new("#{thing_to_read} does not exist") unless File.exist? thing_to_read
8
- raise Errno::EISDIR.new("#{thing_to_read} is a directory") if File.directory? thing_to_read
9
- read_path(thing_to_read)
10
- end
6
+ __parse_string(File.read(thing_to_read))
7
+ end
8
+
9
+ def self.parse(csv_str)
10
+ __parse_string(csv_str)
11
11
  end
12
12
 
13
13
  end
@@ -1,10 +1,14 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe BAMFCSV do
4
- it "has a parse method" do
4
+ it "has a read method" do
5
5
  BAMFCSV.should respond_to(:read)
6
6
  end
7
7
 
8
+ it "has a parse method" do
9
+ BAMFCSV.should respond_to(:parse)
10
+ end
11
+
8
12
  describe "#read" do
9
13
  it "is a matrix given a filename" do
10
14
  BAMFCSV.read("spec/fixtures/test.csv").should be_instance_of Array
@@ -19,7 +23,7 @@ describe BAMFCSV do
19
23
  end
20
24
 
21
25
  it "interprets empty cells correctly" do
22
- BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF","","CSV"]]
26
+ BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF",nil,"CSV"]]
23
27
  end
24
28
 
25
29
  it "escapes cells that are quoted" do
@@ -46,4 +50,70 @@ describe BAMFCSV do
46
50
  end.should raise_error Errno::EISDIR
47
51
  end
48
52
  end
53
+
54
+ describe "#parse" do
55
+ it "correctly parses the last cell even if there is no newline" do
56
+ BAMFCSV.parse("1,2").should == [["1","2"]]
57
+ end
58
+
59
+ it 'correctly escaptes ""' do
60
+ BAMFCSV.parse("1,\"\"2\"\"\n").should == [["1", '"2"']]
61
+ end
62
+
63
+ it "parses unquoted empty cells as nil" do
64
+ BAMFCSV.parse("1,,2").should == [["1",nil,"2"]]
65
+ end
66
+
67
+ it 'parses quoted empty cells as ""' do
68
+ BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
69
+ end
70
+
71
+ describe "default CSV module compatibility" do
72
+ it "adds a nil cell after a trailing comma with no newline" do
73
+ BAMFCSV.parse("1,2,").should == [["1","2",nil]]
74
+ end
75
+
76
+ it "adds a nil cell after a trailing comma with an ending newline" do
77
+ BAMFCSV.parse("1,2,\n").should == [["1","2",nil]]
78
+ end
79
+
80
+ describe "when a quoted cell ends a line" do
81
+ it "does not raise an exception" do
82
+ expect { BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|) }.should_not raise_error
83
+ expect { BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|) }.should_not raise_error
84
+ end
85
+
86
+ it "correctly parses a quoted cell at the end of a line" do
87
+ BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
88
+ BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
89
+ end
90
+ end
91
+
92
+ it "raises BAMFCSV::MalformedCSVError when quotes appear in a cell which was not started with quotes" do
93
+ expect { BAMFCSV.parse(' ""') }.should raise_error(BAMFCSV::MalformedCSVError)
94
+ expect { BAMFCSV.parse(" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
95
+ expect { BAMFCSV.parse(" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
96
+ expect { BAMFCSV.parse('1, "",3') }.should raise_error(BAMFCSV::MalformedCSVError)
97
+ end
98
+
99
+ it "raises BAMFCSV::MalformedCSVError when a quoted cell is not closed at its end" do
100
+ expect { BAMFCSV.parse('"') }.should raise_error(BAMFCSV::MalformedCSVError)
101
+ expect { BAMFCSV.parse('" ""') }.should raise_error(BAMFCSV::MalformedCSVError)
102
+ expect { BAMFCSV.parse("\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
103
+ expect { BAMFCSV.parse("\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
104
+ expect { BAMFCSV.parse("\" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
105
+ expect { BAMFCSV.parse("\" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
106
+ expect { BAMFCSV.parse('1,"2,3') }.should raise_error(BAMFCSV::MalformedCSVError)
107
+ expect { BAMFCSV.parse("1,\"2,3\n") }.should raise_error(BAMFCSV::MalformedCSVError)
108
+ expect { BAMFCSV.parse("1,\"2,3\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
109
+ end
110
+
111
+ it "raises BAMFCSV::MalformedCSVError when quoted cell is closed before its end" do
112
+ expect { BAMFCSV.parse('"" ') }.should raise_error(BAMFCSV::MalformedCSVError)
113
+ expect { BAMFCSV.parse("\"\" \n") }.should raise_error(BAMFCSV::MalformedCSVError)
114
+ expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
115
+ expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
116
+ end
117
+ end
118
+ end
49
119
  end
metadata CHANGED
@@ -1,60 +1,63 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bamfcsv
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.2
3
+ version: !ruby/object:Gem::Version
5
4
  prerelease:
5
+ version: 0.1.0
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - Jon Distad
9
9
  - Alex Redington
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2011-04-01 00:00:00.000000000 +00:00
13
+
14
+ date: 2011-04-03 00:00:00 -04:00
14
15
  default_executable:
15
- dependencies:
16
- - !ruby/object:Gem::Dependency
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
17
18
  name: rspec
18
- requirement: &22500120 !ruby/object:Gem::Requirement
19
+ prerelease: false
20
+ requirement: &id001 !ruby/object:Gem::Requirement
19
21
  none: false
20
- requirements:
22
+ requirements:
21
23
  - - ~>
22
- - !ruby/object:Gem::Version
24
+ - !ruby/object:Gem::Version
23
25
  version: 2.5.0
24
26
  type: :development
25
- prerelease: false
26
- version_requirements: *22500120
27
- - !ruby/object:Gem::Dependency
27
+ version_requirements: *id001
28
+ - !ruby/object:Gem::Dependency
28
29
  name: fuubar
29
- requirement: &22499620 !ruby/object:Gem::Requirement
30
+ prerelease: false
31
+ requirement: &id002 !ruby/object:Gem::Requirement
30
32
  none: false
31
- requirements:
33
+ requirements:
32
34
  - - ~>
33
- - !ruby/object:Gem::Version
35
+ - !ruby/object:Gem::Version
34
36
  version: 0.0.2
35
37
  type: :development
36
- prerelease: false
37
- version_requirements: *22499620
38
- - !ruby/object:Gem::Dependency
38
+ version_requirements: *id002
39
+ - !ruby/object:Gem::Dependency
39
40
  name: rake-compiler
40
- requirement: &22499160 !ruby/object:Gem::Requirement
41
+ prerelease: false
42
+ requirement: &id003 !ruby/object:Gem::Requirement
41
43
  none: false
42
- requirements:
44
+ requirements:
43
45
  - - ~>
44
- - !ruby/object:Gem::Version
46
+ - !ruby/object:Gem::Version
45
47
  version: 0.7.1
46
48
  type: :development
47
- prerelease: false
48
- version_requirements: *22499160
49
+ version_requirements: *id003
49
50
  description: BAMFCSV parses csv like a BAMF. BAMF!!
50
- email:
51
+ email:
51
52
  - jon@thinkrelevance.com
52
53
  - lovemachine@thinkrelevance.com
53
54
  executables: []
54
- extensions:
55
+
56
+ extensions:
55
57
  - ext/bamfcsv/extconf.rb
56
58
  extra_rdoc_files: []
57
- files:
59
+
60
+ files:
58
61
  - .gitignore
59
62
  - .rspec
60
63
  - Gemfile
@@ -81,27 +84,39 @@ files:
81
84
  has_rdoc: true
82
85
  homepage: https://github.com/jondistad/bamfcsv
83
86
  licenses: []
87
+
84
88
  post_install_message:
85
89
  rdoc_options: []
86
- require_paths:
90
+
91
+ require_paths:
87
92
  - lib
88
93
  - ext
89
- required_ruby_version: !ruby/object:Gem::Requirement
94
+ required_ruby_version: !ruby/object:Gem::Requirement
90
95
  none: false
91
- requirements:
92
- - - ! '>='
93
- - !ruby/object:Gem::Version
94
- version: '0'
95
- required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
101
  none: false
97
- requirements:
98
- - - ! '>='
99
- - !ruby/object:Gem::Version
100
- version: '0'
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: "0"
101
106
  requirements: []
107
+
102
108
  rubyforge_project: bamfcsv
103
- rubygems_version: 1.5.2
109
+ rubygems_version: 1.6.2
104
110
  signing_key:
105
111
  specification_version: 3
106
112
  summary: BAMF!!! Your csv is parsed.
107
- test_files: []
113
+ test_files:
114
+ - spec/fixtures/bamf-comma-comma.csv
115
+ - spec/fixtures/double-quotes.csv
116
+ - spec/fixtures/empty.csv
117
+ - spec/fixtures/escapes.csv
118
+ - spec/fixtures/one-column.csv
119
+ - spec/fixtures/terminated-with-cr.csv
120
+ - spec/fixtures/test.csv
121
+ - spec/lib/bamfcsv_spec.rb
122
+ - spec/spec_helper.rb