bamfcsv 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bamfcsv (0.0.2)
4
+ bamfcsv (0.1.0)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -1,19 +1,7 @@
1
1
  #include <stdlib.h>
2
2
  #include "bamfcsv_ext.h"
3
3
 
4
- struct s_Row *alloc_row() {
5
-
6
- struct s_Row *new_row = malloc(sizeof(struct s_Row));
7
-
8
- new_row -> first_cell = 0;
9
- new_row -> next_row = 0;
10
- new_row -> cell_count = 0;
11
-
12
- return new_row;
13
-
14
- }
15
-
16
- struct s_Cell *alloc_cell() {
4
+ struct s_Cell *alloc_cell(struct s_Row *row, struct s_Cell *prev_cell) {
17
5
 
18
6
  struct s_Cell *new_cell = malloc(sizeof(struct s_Cell));
19
7
 
@@ -21,11 +9,26 @@ struct s_Cell *alloc_cell() {
21
9
  new_cell -> len = 0;
22
10
  new_cell -> next_cell = 0;
23
11
  new_cell -> has_quotes = 0;
12
+ row->cell_count++;
13
+ if (prev_cell) prev_cell->next_cell = new_cell;
24
14
 
25
15
  return new_cell;
26
16
 
27
17
  }
28
18
 
19
+ struct s_Row *alloc_row(struct s_Row *prev_row) {
20
+
21
+ struct s_Row *new_row = malloc(sizeof(struct s_Row));
22
+
23
+ new_row -> next_row = 0;
24
+ new_row -> cell_count = 0;
25
+ new_row -> first_cell = alloc_cell(new_row, 0);
26
+ if (prev_row) prev_row->next_row = new_row;
27
+
28
+ return new_row;
29
+
30
+ }
31
+
29
32
  void free_cell(struct s_Cell *cell) {
30
33
 
31
34
  if (cell != 0) {
@@ -66,21 +69,20 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
66
69
  cur_cell = cur_row->first_cell;
67
70
  row = rb_ary_new2(cur_row->cell_count);
68
71
  rb_ary_store(matrix,i,row);
69
- for (j = 0; j < cur_row->cell_count; j++) {
70
- if (*(cur_cell->start) == '"'
71
- && *((cur_cell->start)+(cur_cell->len-1)) == '"')
72
- new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
73
- else {
74
- if (cur_cell->len)
75
- new_string = rb_str_new(cur_cell->start, cur_cell->len);
76
- else
77
- new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
78
- }
79
- if (cur_cell->has_quotes) {
80
- rb_funcall(new_string, gsub, 2, dquote, quote);
72
+ if (cur_row->cell_count > 1 || cur_cell->len) {
73
+ for (j = 0; j < cur_row->cell_count; j++) {
74
+ if (cur_cell->has_quotes) {
75
+ new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
76
+ rb_funcall(new_string, gsub, 2, dquote, quote);
77
+ } else {
78
+ if (cur_cell->len)
79
+ new_string = rb_str_new(cur_cell->start, cur_cell->len);
80
+ else
81
+ new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
82
+ }
83
+ rb_ary_store(row, j, new_string);
84
+ cur_cell = cur_cell->next_cell;
81
85
  }
82
- rb_ary_store(row, j, new_string);
83
- cur_cell = cur_cell->next_cell;
84
86
  }
85
87
  cur_row = cur_row->next_row;
86
88
  }
@@ -90,9 +92,9 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
90
92
 
91
93
  void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
92
94
  if (*(cur-1) == '\r')
93
- cell->len = cur-(cell->start)-1;
95
+ cell->len = (int)(cur-(cell->start)-1);
94
96
  else
95
- cell->len = cur-(cell->start);
97
+ cell->len = (int)(cur-(cell->start));
96
98
 
97
99
  if (quote_count) cell->has_quotes = 1;
98
100
  }
@@ -102,21 +104,25 @@ VALUE build_matrix(char *buf, int bufsize) {
102
104
  int num_rows = 1;
103
105
  int quote_count = 0, quotes_matched = 1;
104
106
 
105
- struct s_Row *first_row = alloc_row();
107
+ struct s_Row *first_row = alloc_row(0);
106
108
  struct s_Row *cur_row = first_row;
107
- struct s_Cell *cur_cell = alloc_cell();
108
- first_row->first_cell = cur_cell;
109
+ struct s_Cell *cur_cell = cur_row->first_cell;
109
110
  cur_cell->start = buf;
110
111
 
111
112
  VALUE matrix;
112
113
 
113
114
  char *cur;
115
+
116
+ if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
117
+ *(buf+bufsize-1) = 0;
118
+ --bufsize;
119
+ }
114
120
 
115
121
  for (cur = buf; cur < buf+bufsize; cur++) {
116
122
 
117
123
  if (*cur == '"') {
118
124
  if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
119
- rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count+1);
125
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count);
120
126
  else
121
127
  ++quote_count;
122
128
  }
@@ -128,50 +134,39 @@ VALUE build_matrix(char *buf, int bufsize) {
128
134
  if (*cur == ',') {
129
135
 
130
136
  if (quote_count && *(cur-1) != '"')
131
- rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count+1);
137
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count);
132
138
 
133
- finalize_cell(cur_cell,cur,quote_count);
134
- cur_cell->next_cell = alloc_cell();
135
- cur_cell = cur_cell->next_cell;
139
+ finalize_cell(cur_cell, cur, quote_count);
140
+ cur_cell = alloc_cell(cur_row, cur_cell);
136
141
  cur_cell->start = cur+1;
137
- cur_row->cell_count += 1;
138
142
  quote_count = 0;
139
143
 
140
- }
141
-
142
- if (*cur == '\n') {
144
+ } else if (*cur == '\n') {
143
145
 
144
146
  if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
145
- rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count+1);
147
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count);
146
148
 
147
- finalize_cell(cur_cell,cur,quote_count);
148
- cur_row->cell_count += 1;
149
- cur_row->next_row = alloc_row();
150
- cur_row = cur_row -> next_row;
151
- cur_row->first_cell = alloc_cell();
149
+ finalize_cell(cur_cell, cur, quote_count);
150
+ cur_row = alloc_row(cur_row);
152
151
  cur_cell = cur_row->first_cell;
153
152
  cur_cell->start = cur+1;
154
153
  quote_count = 0;
155
154
 
156
155
  num_rows++;
157
156
 
158
- }
157
+ } else if (quote_count && *cur != '\r' && *cur != '"')
158
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d", num_rows, cur_row->cell_count);
159
159
 
160
160
  }
161
161
 
162
162
  }
163
163
 
164
164
  if (!quotes_matched) /* Reached EOF without matching quotes */
165
- rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count+1);
166
- else if (quote_count && *cur != '"')
167
- rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count+1);
168
-
169
- if (cur_row->cell_count == 0) { /* Ended with newline */
170
- num_rows--;
171
- } else { /* No newline before EOF */
172
- finalize_cell(cur_cell, cur, quote_count);
173
- cur_row->cell_count++;
174
- }
165
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count);
166
+ else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
167
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count);
168
+
169
+ finalize_cell(cur_cell, cur, quote_count);
175
170
 
176
171
  matrix = build_matrix_from_pointer_tree(first_row, num_rows);
177
172
 
@@ -0,0 +1,46 @@
1
+ module BAMFCSV
2
+ class Table
3
+ include Enumerable
4
+ def initialize(matrix)
5
+ @headers = matrix.shift
6
+ @matrix = matrix
7
+ @header_map = {}
8
+ @headers.each_with_index do |h, i|
9
+ @header_map[h] = i
10
+ end
11
+ @row_cache = []
12
+ end
13
+
14
+ def each
15
+ @matrix.size.times do |idx|
16
+ yield self[idx]
17
+ end
18
+ end
19
+
20
+ def [](idx)
21
+ idx += @matrix.size if idx < 0
22
+ return if idx < 0 || idx >= @matrix.size
23
+ @row_cache[idx] ||= Row.new(@header_map, @matrix[idx])
24
+ end
25
+
26
+ def inspect
27
+ "#<BAMFCSV::Table>"
28
+ end
29
+
30
+ private
31
+ def row_hash(row)
32
+ Hash[@headers.zip(row)]
33
+ end
34
+
35
+ class Row
36
+ def initialize(header_map, values)
37
+ @header_map = header_map
38
+ @values = values
39
+ end
40
+
41
+ def [](key)
42
+ @values[@header_map[key]]
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,3 +1,3 @@
1
1
  module BAMFCSV
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
data/lib/bamfcsv.rb CHANGED
@@ -1,13 +1,20 @@
1
1
  require 'bamfcsv/bamfcsv'
2
+ require 'bamfcsv/table'
2
3
 
3
4
  module BAMFCSV
4
5
 
5
- def self.read(thing_to_read)
6
- __parse_string(File.read(thing_to_read))
6
+ def self.read(thing_to_read, opts={})
7
+ parse(File.read(thing_to_read), opts)
7
8
  end
8
9
 
9
- def self.parse(csv_str)
10
- __parse_string(csv_str)
10
+ def self.parse(csv_str, opts={})
11
+ return [] if csv_str.empty?
12
+ matrix = __parse_string(csv_str)
13
+ if opts[:headers]
14
+ Table.new(matrix)
15
+ else
16
+ matrix
17
+ end
11
18
  end
12
19
 
13
20
  end
@@ -57,7 +57,7 @@ describe BAMFCSV do
57
57
  end
58
58
 
59
59
  it 'correctly escaptes ""' do
60
- BAMFCSV.parse("1,\"\"2\"\"\n").should == [["1", '"2"']]
60
+ BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
61
61
  end
62
62
 
63
63
  it "parses unquoted empty cells as nil" do
@@ -68,6 +68,12 @@ describe BAMFCSV do
68
68
  BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
69
69
  end
70
70
 
71
+ it "parses a single cell not followed by a newline correctly" do
72
+ BAMFCSV.parse("1").should == [["1"]]
73
+ BAMFCSV.parse("1\n2").should == [["1"],["2"]]
74
+ BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
75
+ end
76
+
71
77
  describe "default CSV module compatibility" do
72
78
  it "adds a nil cell after a trailing comma with no newline" do
73
79
  BAMFCSV.parse("1,2,").should == [["1","2",nil]]
@@ -114,6 +120,41 @@ describe BAMFCSV do
114
120
  expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
115
121
  expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
116
122
  end
123
+
124
+ it "raises BAMFCSV::MalformedCSVError when unescaped quotes appear in a quoted cell" do
125
+ expect { BAMFCSV.parse('"a"b"c"') }.should raise_error(BAMFCSV::MalformedCSVError)
126
+ expect { BAMFCSV.parse('"a"b"c",2') }.should raise_error(BAMFCSV::MalformedCSVError)
127
+ expect { BAMFCSV.parse(%Q("a"b"c"\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
128
+ expect { BAMFCSV.parse(%Q("a"b"c"\r\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
129
+ end
130
+
131
+ ['', "\n", "\r\n", ",", "\n\n", "\r\n\r\n"].each do |csv|
132
+ csv_result = CSV.parse(csv)
133
+ it "BAMFCSV parses #{csv.inspect} the same way as CSV (#{csv_result.inspect})" do
134
+ BAMFCSV.parse(csv).should == csv_result
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ describe "generating a Table" do
141
+ describe "with only a header" do
142
+ let(:header_only) { BAMFCSV.parse("1,2,3", :headers => true) }
143
+ it "has no body rows" do
144
+ header_only.first.should be_nil
145
+ end
146
+
147
+ it "does nothing when iterating" do
148
+ expect { header_only.each { |x| raise "Oh dang!" } }.should_not raise_error
149
+ end
150
+ end
151
+
152
+ describe "with body rows" do
153
+ it "maps the headers the the values of each row" do
154
+ table = BAMFCSV.parse("a,b,c\r\n1,2,3\r\nx,y,z", :headers => true)
155
+ table.first["a"].should == "1"
156
+ table[1]["c"].should == "z"
157
+ end
117
158
  end
118
159
  end
119
160
  end
data/spec/spec_helper.rb CHANGED
@@ -4,6 +4,7 @@ $:.unshift(File.join(root_path, "ext"))
4
4
  require 'bundler'
5
5
  require 'bamfcsv'
6
6
  Bundler.require(:development)
7
+ require 'csv'
7
8
 
8
9
  RSpec.configure do |config|
9
10
  config.filter_run :focused => true
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: bamfcsv
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.0
5
+ version: 0.1.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jon Distad
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2011-04-03 00:00:00 -04:00
14
+ date: 2011-04-08 00:00:00 -04:00
15
15
  default_executable:
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
@@ -69,6 +69,7 @@ files:
69
69
  - ext/bamfcsv/bamfcsv_ext.h
70
70
  - ext/bamfcsv/extconf.rb
71
71
  - lib/bamfcsv.rb
72
+ - lib/bamfcsv/table.rb
72
73
  - lib/bamfcsv/version.rb
73
74
  - spec/fixtures/bamf-comma-comma.csv
74
75
  - spec/fixtures/double-quotes.csv