bamfcsv 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bamfcsv (0.0.1)
4
+ bamfcsv (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -1,7 +1,4 @@
1
1
  #include <stdlib.h>
2
- #include <ruby/ruby.h>
3
- #include <fcntl.h>
4
- #include <sys/mman.h>
5
2
  #include "bamfcsv_ext.h"
6
3
 
7
4
  struct s_Row *alloc_row() {
@@ -71,10 +68,14 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
71
68
  rb_ary_store(matrix,i,row);
72
69
  for (j = 0; j < cur_row->cell_count; j++) {
73
70
  if (*(cur_cell->start) == '"'
74
- && *((cur_cell->start)+((cur_cell->len-1)*sizeof(char))) == '"')
75
- new_string = rb_str_new(cur_cell->start+sizeof(char), cur_cell->len-(sizeof(char)*2));
76
- else
77
- new_string = rb_str_new(cur_cell->start, cur_cell->len);
71
+ && *((cur_cell->start)+(cur_cell->len-1)) == '"')
72
+ new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
73
+ else {
74
+ if (cur_cell->len)
75
+ new_string = rb_str_new(cur_cell->start, cur_cell->len);
76
+ else
77
+ new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
78
+ }
78
79
  if (cur_cell->has_quotes) {
79
80
  rb_funcall(new_string, gsub, 2, dquote, quote);
80
81
  }
@@ -87,17 +88,19 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
87
88
  return matrix;
88
89
  }
89
90
 
90
- void finalize_cell(struct s_Cell *cell, char *cur) {
91
- if (*(cur-sizeof(char)) == '\r')
92
- cell->len = cur-(cell->start)-sizeof(char);
91
+ void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
92
+ if (*(cur-1) == '\r')
93
+ cell->len = cur-(cell->start)-1;
93
94
  else
94
95
  cell->len = cur-(cell->start);
96
+
97
+ if (quote_count) cell->has_quotes = 1;
95
98
  }
96
99
 
97
100
  VALUE build_matrix(char *buf, int bufsize) {
98
101
  int str_start = 0;
99
102
  int num_rows = 1;
100
- int in_quote = 0;
103
+ int quote_count = 0, quotes_matched = 1;
101
104
 
102
105
  struct s_Row *first_row = alloc_row();
103
106
  struct s_Row *cur_row = first_row;
@@ -112,33 +115,43 @@ VALUE build_matrix(char *buf, int bufsize) {
112
115
  for (cur = buf; cur < buf+bufsize; cur++) {
113
116
 
114
117
  if (*cur == '"') {
115
- if (in_quote)
116
- if (*(cur+1) != ',')
117
- cur_cell->has_quotes = 1;
118
- in_quote = !in_quote;
118
+ if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
119
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count+1);
120
+ else
121
+ ++quote_count;
119
122
  }
120
123
 
121
- if (!in_quote) {
124
+ quotes_matched = !(quote_count & 1); /* count is even */
125
+
126
+ if (quotes_matched) {
122
127
 
123
128
  if (*cur == ',') {
124
129
 
125
- finalize_cell(cur_cell,cur);
130
+ if (quote_count && *(cur-1) != '"')
131
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count+1);
132
+
133
+ finalize_cell(cur_cell,cur,quote_count);
126
134
  cur_cell->next_cell = alloc_cell();
127
135
  cur_cell = cur_cell->next_cell;
128
- cur_cell->start = cur+sizeof(char);
136
+ cur_cell->start = cur+1;
129
137
  cur_row->cell_count += 1;
138
+ quote_count = 0;
130
139
 
131
140
  }
132
141
 
133
142
  if (*cur == '\n') {
134
143
 
135
- finalize_cell(cur_cell,cur);
144
+ if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
145
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count+1);
146
+
147
+ finalize_cell(cur_cell,cur,quote_count);
136
148
  cur_row->cell_count += 1;
137
149
  cur_row->next_row = alloc_row();
138
150
  cur_row = cur_row -> next_row;
139
151
  cur_row->first_cell = alloc_cell();
140
152
  cur_cell = cur_row->first_cell;
141
- cur_cell->start = cur+sizeof(char);
153
+ cur_cell->start = cur+1;
154
+ quote_count = 0;
142
155
 
143
156
  num_rows++;
144
157
 
@@ -148,8 +161,16 @@ VALUE build_matrix(char *buf, int bufsize) {
148
161
 
149
162
  }
150
163
 
151
- if (cur_row->cell_count == 0) {
164
+ if (!quotes_matched) /* Reached EOF without matching quotes */
165
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count+1);
166
+ else if (quote_count && *cur != '"')
167
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count+1);
168
+
169
+ if (cur_row->cell_count == 0) { /* Ended with newline */
152
170
  num_rows--;
171
+ } else { /* No newline before EOF */
172
+ finalize_cell(cur_cell, cur, quote_count);
173
+ cur_row->cell_count++;
153
174
  }
154
175
 
155
176
  matrix = build_matrix_from_pointer_tree(first_row, num_rows);
@@ -160,32 +181,17 @@ VALUE build_matrix(char *buf, int bufsize) {
160
181
 
161
182
  }
162
183
 
163
- VALUE mm_parse(const char *file) {
164
-
165
- char *mmapped_csv;
166
- int filesize, csv;
167
-
168
- csv = open(file, O_RDONLY);
169
- filesize = lseek(csv, 0, SEEK_END);
170
- mmapped_csv = (char*) mmap(0, filesize, PROT_READ, MAP_SHARED, csv, 0);
171
-
172
- VALUE matrix = build_matrix(mmapped_csv,filesize);
173
-
174
- munmap(mmapped_csv, filesize);
175
- close(csv);
176
-
177
- return matrix;
178
- }
179
-
180
- VALUE read_path(VALUE self, VALUE file) {
184
+ VALUE parse_string(VALUE self, VALUE string) {
181
185
 
182
- return mm_parse(RSTRING_PTR(file));
186
+ return build_matrix(RSTRING_PTR(string), NUM2INT(rb_str_length(string)));
183
187
 
184
188
  }
185
189
 
186
190
  void Init_bamfcsv() {
187
191
 
188
- VALUE module = rb_define_module("BAMFCSV");
189
- rb_define_module_function(module, "read_path", read_path, 1);
192
+ BAMFCSV_module = rb_define_module("BAMFCSV");
193
+ VALUE bamfcsv_singleton_class = rb_singleton_class(BAMFCSV_module);
194
+ rb_define_private_method(bamfcsv_singleton_class, "__parse_string", parse_string, 1);
190
195
 
196
+ BAMFCSV_MalformedCSVError_class = rb_define_class_under(BAMFCSV_module, "MalformedCSVError", rb_eRuntimeError);
191
197
  }
@@ -1,6 +1,11 @@
1
1
  #ifndef _BAMFCSV_EXT_H
2
2
  #define _BAMFCSV_EXT_H
3
3
 
4
+ #include <ruby/ruby.h>
5
+
6
+ VALUE BAMFCSV_module;
7
+ VALUE BAMFCSV_MalformedCSVError_class;
8
+
4
9
  struct s_Row {
5
10
  struct s_Cell *first_cell;
6
11
  struct s_Row *next_row;
@@ -1,3 +1,3 @@
1
1
  module BAMFCSV
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/bamfcsv.rb CHANGED
@@ -3,11 +3,11 @@ require 'bamfcsv/bamfcsv'
3
3
  module BAMFCSV
4
4
 
5
5
  def self.read(thing_to_read)
6
- if String === thing_to_read
7
- raise Errno::ENOENT.new("#{thing_to_read} does not exist") unless File.exist? thing_to_read
8
- raise Errno::EISDIR.new("#{thing_to_read} is a directory") if File.directory? thing_to_read
9
- read_path(thing_to_read)
10
- end
6
+ __parse_string(File.read(thing_to_read))
7
+ end
8
+
9
+ def self.parse(csv_str)
10
+ __parse_string(csv_str)
11
11
  end
12
12
 
13
13
  end
@@ -1,10 +1,14 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe BAMFCSV do
4
- it "has a parse method" do
4
+ it "has a read method" do
5
5
  BAMFCSV.should respond_to(:read)
6
6
  end
7
7
 
8
+ it "has a parse method" do
9
+ BAMFCSV.should respond_to(:parse)
10
+ end
11
+
8
12
  describe "#read" do
9
13
  it "is a matrix given a filename" do
10
14
  BAMFCSV.read("spec/fixtures/test.csv").should be_instance_of Array
@@ -19,7 +23,7 @@ describe BAMFCSV do
19
23
  end
20
24
 
21
25
  it "interprets empty cells correctly" do
22
- BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF","","CSV"]]
26
+ BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF",nil,"CSV"]]
23
27
  end
24
28
 
25
29
  it "escapes cells that are quoted" do
@@ -46,4 +50,70 @@ describe BAMFCSV do
46
50
  end.should raise_error Errno::EISDIR
47
51
  end
48
52
  end
53
+
54
+ describe "#parse" do
55
+ it "correctly parses the last cell even if there is no newline" do
56
+ BAMFCSV.parse("1,2").should == [["1","2"]]
57
+ end
58
+
59
+ it 'correctly escaptes ""' do
60
+ BAMFCSV.parse("1,\"\"2\"\"\n").should == [["1", '"2"']]
61
+ end
62
+
63
+ it "parses unquoted empty cells as nil" do
64
+ BAMFCSV.parse("1,,2").should == [["1",nil,"2"]]
65
+ end
66
+
67
+ it 'parses quoted empty cells as ""' do
68
+ BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
69
+ end
70
+
71
+ describe "default CSV module compatibility" do
72
+ it "adds a nil cell after a trailing comma with no newline" do
73
+ BAMFCSV.parse("1,2,").should == [["1","2",nil]]
74
+ end
75
+
76
+ it "adds a nil cell after a trailing comma with an ending newline" do
77
+ BAMFCSV.parse("1,2,\n").should == [["1","2",nil]]
78
+ end
79
+
80
+ describe "when a quoted cell ends a line" do
81
+ it "does not raise an exception" do
82
+ expect { BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|) }.should_not raise_error
83
+ expect { BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|) }.should_not raise_error
84
+ end
85
+
86
+ it "correctly parses a quoted cell at the end of a line" do
87
+ BAMFCSV.parse(%Q|1,2,"3,4"\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
88
+ BAMFCSV.parse(%Q|1,2,"3,4"\r\n5,6,7|).should == [["1","2","3,4"],["5","6","7"]]
89
+ end
90
+ end
91
+
92
+ it "raises BAMFCSV::MalformedCSVError when quotes appear in a cell which was not started with quotes" do
93
+ expect { BAMFCSV.parse(' ""') }.should raise_error(BAMFCSV::MalformedCSVError)
94
+ expect { BAMFCSV.parse(" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
95
+ expect { BAMFCSV.parse(" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
96
+ expect { BAMFCSV.parse('1, "",3') }.should raise_error(BAMFCSV::MalformedCSVError)
97
+ end
98
+
99
+ it "raises BAMFCSV::MalformedCSVError when a quoted cell is not closed at its end" do
100
+ expect { BAMFCSV.parse('"') }.should raise_error(BAMFCSV::MalformedCSVError)
101
+ expect { BAMFCSV.parse('" ""') }.should raise_error(BAMFCSV::MalformedCSVError)
102
+ expect { BAMFCSV.parse("\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
103
+ expect { BAMFCSV.parse("\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
104
+ expect { BAMFCSV.parse("\" \"\"\n") }.should raise_error(BAMFCSV::MalformedCSVError)
105
+ expect { BAMFCSV.parse("\" \"\"\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
106
+ expect { BAMFCSV.parse('1,"2,3') }.should raise_error(BAMFCSV::MalformedCSVError)
107
+ expect { BAMFCSV.parse("1,\"2,3\n") }.should raise_error(BAMFCSV::MalformedCSVError)
108
+ expect { BAMFCSV.parse("1,\"2,3\r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
109
+ end
110
+
111
+ it "raises BAMFCSV::MalformedCSVError when quoted cell is closed before its end" do
112
+ expect { BAMFCSV.parse('"" ') }.should raise_error(BAMFCSV::MalformedCSVError)
113
+ expect { BAMFCSV.parse("\"\" \n") }.should raise_error(BAMFCSV::MalformedCSVError)
114
+ expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
115
+ expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
116
+ end
117
+ end
118
+ end
49
119
  end
metadata CHANGED
@@ -1,60 +1,63 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bamfcsv
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.2
3
+ version: !ruby/object:Gem::Version
5
4
  prerelease:
5
+ version: 0.1.0
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - Jon Distad
9
9
  - Alex Redington
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2011-04-01 00:00:00.000000000 +00:00
13
+
14
+ date: 2011-04-03 00:00:00 -04:00
14
15
  default_executable:
15
- dependencies:
16
- - !ruby/object:Gem::Dependency
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
17
18
  name: rspec
18
- requirement: &22500120 !ruby/object:Gem::Requirement
19
+ prerelease: false
20
+ requirement: &id001 !ruby/object:Gem::Requirement
19
21
  none: false
20
- requirements:
22
+ requirements:
21
23
  - - ~>
22
- - !ruby/object:Gem::Version
24
+ - !ruby/object:Gem::Version
23
25
  version: 2.5.0
24
26
  type: :development
25
- prerelease: false
26
- version_requirements: *22500120
27
- - !ruby/object:Gem::Dependency
27
+ version_requirements: *id001
28
+ - !ruby/object:Gem::Dependency
28
29
  name: fuubar
29
- requirement: &22499620 !ruby/object:Gem::Requirement
30
+ prerelease: false
31
+ requirement: &id002 !ruby/object:Gem::Requirement
30
32
  none: false
31
- requirements:
33
+ requirements:
32
34
  - - ~>
33
- - !ruby/object:Gem::Version
35
+ - !ruby/object:Gem::Version
34
36
  version: 0.0.2
35
37
  type: :development
36
- prerelease: false
37
- version_requirements: *22499620
38
- - !ruby/object:Gem::Dependency
38
+ version_requirements: *id002
39
+ - !ruby/object:Gem::Dependency
39
40
  name: rake-compiler
40
- requirement: &22499160 !ruby/object:Gem::Requirement
41
+ prerelease: false
42
+ requirement: &id003 !ruby/object:Gem::Requirement
41
43
  none: false
42
- requirements:
44
+ requirements:
43
45
  - - ~>
44
- - !ruby/object:Gem::Version
46
+ - !ruby/object:Gem::Version
45
47
  version: 0.7.1
46
48
  type: :development
47
- prerelease: false
48
- version_requirements: *22499160
49
+ version_requirements: *id003
49
50
  description: BAMFCSV parses csv like a BAMF. BAMF!!
50
- email:
51
+ email:
51
52
  - jon@thinkrelevance.com
52
53
  - lovemachine@thinkrelevance.com
53
54
  executables: []
54
- extensions:
55
+
56
+ extensions:
55
57
  - ext/bamfcsv/extconf.rb
56
58
  extra_rdoc_files: []
57
- files:
59
+
60
+ files:
58
61
  - .gitignore
59
62
  - .rspec
60
63
  - Gemfile
@@ -81,27 +84,39 @@ files:
81
84
  has_rdoc: true
82
85
  homepage: https://github.com/jondistad/bamfcsv
83
86
  licenses: []
87
+
84
88
  post_install_message:
85
89
  rdoc_options: []
86
- require_paths:
90
+
91
+ require_paths:
87
92
  - lib
88
93
  - ext
89
- required_ruby_version: !ruby/object:Gem::Requirement
94
+ required_ruby_version: !ruby/object:Gem::Requirement
90
95
  none: false
91
- requirements:
92
- - - ! '>='
93
- - !ruby/object:Gem::Version
94
- version: '0'
95
- required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
101
  none: false
97
- requirements:
98
- - - ! '>='
99
- - !ruby/object:Gem::Version
100
- version: '0'
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: "0"
101
106
  requirements: []
107
+
102
108
  rubyforge_project: bamfcsv
103
- rubygems_version: 1.5.2
109
+ rubygems_version: 1.6.2
104
110
  signing_key:
105
111
  specification_version: 3
106
112
  summary: BAMF!!! Your csv is parsed.
107
- test_files: []
113
+ test_files:
114
+ - spec/fixtures/bamf-comma-comma.csv
115
+ - spec/fixtures/double-quotes.csv
116
+ - spec/fixtures/empty.csv
117
+ - spec/fixtures/escapes.csv
118
+ - spec/fixtures/one-column.csv
119
+ - spec/fixtures/terminated-with-cr.csv
120
+ - spec/fixtures/test.csv
121
+ - spec/lib/bamfcsv_spec.rb
122
+ - spec/spec_helper.rb