bamfcsv 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bamfcsv (0.0.2)
4
+ bamfcsv (0.1.0)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -1,19 +1,7 @@
1
1
  #include <stdlib.h>
2
2
  #include "bamfcsv_ext.h"
3
3
 
4
- struct s_Row *alloc_row() {
5
-
6
- struct s_Row *new_row = malloc(sizeof(struct s_Row));
7
-
8
- new_row -> first_cell = 0;
9
- new_row -> next_row = 0;
10
- new_row -> cell_count = 0;
11
-
12
- return new_row;
13
-
14
- }
15
-
16
- struct s_Cell *alloc_cell() {
4
+ struct s_Cell *alloc_cell(struct s_Row *row, struct s_Cell *prev_cell) {
17
5
 
18
6
  struct s_Cell *new_cell = malloc(sizeof(struct s_Cell));
19
7
 
@@ -21,11 +9,26 @@ struct s_Cell *alloc_cell() {
21
9
  new_cell -> len = 0;
22
10
  new_cell -> next_cell = 0;
23
11
  new_cell -> has_quotes = 0;
12
+ row->cell_count++;
13
+ if (prev_cell) prev_cell->next_cell = new_cell;
24
14
 
25
15
  return new_cell;
26
16
 
27
17
  }
28
18
 
19
+ struct s_Row *alloc_row(struct s_Row *prev_row) {
20
+
21
+ struct s_Row *new_row = malloc(sizeof(struct s_Row));
22
+
23
+ new_row -> next_row = 0;
24
+ new_row -> cell_count = 0;
25
+ new_row -> first_cell = alloc_cell(new_row, 0);
26
+ if (prev_row) prev_row->next_row = new_row;
27
+
28
+ return new_row;
29
+
30
+ }
31
+
29
32
  void free_cell(struct s_Cell *cell) {
30
33
 
31
34
  if (cell != 0) {
@@ -66,21 +69,20 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
66
69
  cur_cell = cur_row->first_cell;
67
70
  row = rb_ary_new2(cur_row->cell_count);
68
71
  rb_ary_store(matrix,i,row);
69
- for (j = 0; j < cur_row->cell_count; j++) {
70
- if (*(cur_cell->start) == '"'
71
- && *((cur_cell->start)+(cur_cell->len-1)) == '"')
72
- new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
73
- else {
74
- if (cur_cell->len)
75
- new_string = rb_str_new(cur_cell->start, cur_cell->len);
76
- else
77
- new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
78
- }
79
- if (cur_cell->has_quotes) {
80
- rb_funcall(new_string, gsub, 2, dquote, quote);
72
+ if (cur_row->cell_count > 1 || cur_cell->len) {
73
+ for (j = 0; j < cur_row->cell_count; j++) {
74
+ if (cur_cell->has_quotes) {
75
+ new_string = rb_str_new(cur_cell->start+1, cur_cell->len-2);
76
+ rb_funcall(new_string, gsub, 2, dquote, quote);
77
+ } else {
78
+ if (cur_cell->len)
79
+ new_string = rb_str_new(cur_cell->start, cur_cell->len);
80
+ else
81
+ new_string = Qnil; /* Empty, unquoted cells are nil, for default ruby CSV compatibility */
82
+ }
83
+ rb_ary_store(row, j, new_string);
84
+ cur_cell = cur_cell->next_cell;
81
85
  }
82
- rb_ary_store(row, j, new_string);
83
- cur_cell = cur_cell->next_cell;
84
86
  }
85
87
  cur_row = cur_row->next_row;
86
88
  }
@@ -90,9 +92,9 @@ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
90
92
 
91
93
  void finalize_cell(struct s_Cell *cell, char *cur, int quote_count) {
92
94
  if (*(cur-1) == '\r')
93
- cell->len = cur-(cell->start)-1;
95
+ cell->len = (int)(cur-(cell->start)-1);
94
96
  else
95
- cell->len = cur-(cell->start);
97
+ cell->len = (int)(cur-(cell->start));
96
98
 
97
99
  if (quote_count) cell->has_quotes = 1;
98
100
  }
@@ -102,21 +104,25 @@ VALUE build_matrix(char *buf, int bufsize) {
102
104
  int num_rows = 1;
103
105
  int quote_count = 0, quotes_matched = 1;
104
106
 
105
- struct s_Row *first_row = alloc_row();
107
+ struct s_Row *first_row = alloc_row(0);
106
108
  struct s_Row *cur_row = first_row;
107
- struct s_Cell *cur_cell = alloc_cell();
108
- first_row->first_cell = cur_cell;
109
+ struct s_Cell *cur_cell = cur_row->first_cell;
109
110
  cur_cell->start = buf;
110
111
 
111
112
  VALUE matrix;
112
113
 
113
114
  char *cur;
115
+
116
+ if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
117
+ *(buf+bufsize-1) = 0;
118
+ --bufsize;
119
+ }
114
120
 
115
121
  for (cur = buf; cur < buf+bufsize; cur++) {
116
122
 
117
123
  if (*cur == '"') {
118
124
  if (0 == quote_count && cur_cell->start != cur) /* Quotes begin past opening of cell */
119
- rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count+1);
125
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: Quoted cell must open with '\"'", num_rows, cur_row->cell_count);
120
126
  else
121
127
  ++quote_count;
122
128
  }
@@ -128,50 +134,39 @@ VALUE build_matrix(char *buf, int bufsize) {
128
134
  if (*cur == ',') {
129
135
 
130
136
  if (quote_count && *(cur-1) != '"')
131
- rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count+1);
137
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d.", num_rows, cur_row->cell_count);
132
138
 
133
- finalize_cell(cur_cell,cur,quote_count);
134
- cur_cell->next_cell = alloc_cell();
135
- cur_cell = cur_cell->next_cell;
139
+ finalize_cell(cur_cell, cur, quote_count);
140
+ cur_cell = alloc_cell(cur_row, cur_cell);
136
141
  cur_cell->start = cur+1;
137
- cur_row->cell_count += 1;
138
142
  quote_count = 0;
139
143
 
140
- }
141
-
142
- if (*cur == '\n') {
144
+ } else if (*cur == '\n') {
143
145
 
144
146
  if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
145
- rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count+1);
147
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOL", num_rows, cur_row->cell_count);
146
148
 
147
- finalize_cell(cur_cell,cur,quote_count);
148
- cur_row->cell_count += 1;
149
- cur_row->next_row = alloc_row();
150
- cur_row = cur_row -> next_row;
151
- cur_row->first_cell = alloc_cell();
149
+ finalize_cell(cur_cell, cur, quote_count);
150
+ cur_row = alloc_row(cur_row);
152
151
  cur_cell = cur_row->first_cell;
153
152
  cur_cell->start = cur+1;
154
153
  quote_count = 0;
155
154
 
156
155
  num_rows++;
157
156
 
158
- }
157
+ } else if (quote_count && *cur != '\r' && *cur != '"')
158
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d", num_rows, cur_row->cell_count);
159
159
 
160
160
  }
161
161
 
162
162
  }
163
163
 
164
164
  if (!quotes_matched) /* Reached EOF without matching quotes */
165
- rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count+1);
166
- else if (quote_count && *cur != '"')
167
- rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count+1);
168
-
169
- if (cur_row->cell_count == 0) { /* Ended with newline */
170
- num_rows--;
171
- } else { /* No newline before EOF */
172
- finalize_cell(cur_cell, cur, quote_count);
173
- cur_row->cell_count++;
174
- }
165
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %d, cell %d: File ends without closing '\"'", num_rows, cur_row->cell_count);
166
+ else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
167
+ rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %d, cell %d: EOF", num_rows, cur_row->cell_count);
168
+
169
+ finalize_cell(cur_cell, cur, quote_count);
175
170
 
176
171
  matrix = build_matrix_from_pointer_tree(first_row, num_rows);
177
172
 
@@ -0,0 +1,46 @@
1
+ module BAMFCSV
2
+ class Table
3
+ include Enumerable
4
+ def initialize(matrix)
5
+ @headers = matrix.shift
6
+ @matrix = matrix
7
+ @header_map = {}
8
+ @headers.each_with_index do |h, i|
9
+ @header_map[h] = i
10
+ end
11
+ @row_cache = []
12
+ end
13
+
14
+ def each
15
+ @matrix.size.times do |idx|
16
+ yield self[idx]
17
+ end
18
+ end
19
+
20
+ def [](idx)
21
+ idx += @matrix.size if idx < 0
22
+ return if idx < 0 || idx >= @matrix.size
23
+ @row_cache[idx] ||= Row.new(@header_map, @matrix[idx])
24
+ end
25
+
26
+ def inspect
27
+ "#<BAMFCSV::Table>"
28
+ end
29
+
30
+ private
31
+ def row_hash(row)
32
+ Hash[@headers.zip(row)]
33
+ end
34
+
35
+ class Row
36
+ def initialize(header_map, values)
37
+ @header_map = header_map
38
+ @values = values
39
+ end
40
+
41
+ def [](key)
42
+ @values[@header_map[key]]
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,3 +1,3 @@
1
1
  module BAMFCSV
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
data/lib/bamfcsv.rb CHANGED
@@ -1,13 +1,20 @@
1
1
  require 'bamfcsv/bamfcsv'
2
+ require 'bamfcsv/table'
2
3
 
3
4
  module BAMFCSV
4
5
 
5
- def self.read(thing_to_read)
6
- __parse_string(File.read(thing_to_read))
6
+ def self.read(thing_to_read, opts={})
7
+ parse(File.read(thing_to_read), opts)
7
8
  end
8
9
 
9
- def self.parse(csv_str)
10
- __parse_string(csv_str)
10
+ def self.parse(csv_str, opts={})
11
+ return [] if csv_str.empty?
12
+ matrix = __parse_string(csv_str)
13
+ if opts[:headers]
14
+ Table.new(matrix)
15
+ else
16
+ matrix
17
+ end
11
18
  end
12
19
 
13
20
  end
@@ -57,7 +57,7 @@ describe BAMFCSV do
57
57
  end
58
58
 
59
59
  it 'correctly escaptes ""' do
60
- BAMFCSV.parse("1,\"\"2\"\"\n").should == [["1", '"2"']]
60
+ BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
61
61
  end
62
62
 
63
63
  it "parses unquoted empty cells as nil" do
@@ -68,6 +68,12 @@ describe BAMFCSV do
68
68
  BAMFCSV.parse("1,\"\",2").should == [["1","","2"]]
69
69
  end
70
70
 
71
+ it "parses a single cell not followed by a newline correctly" do
72
+ BAMFCSV.parse("1").should == [["1"]]
73
+ BAMFCSV.parse("1\n2").should == [["1"],["2"]]
74
+ BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
75
+ end
76
+
71
77
  describe "default CSV module compatibility" do
72
78
  it "adds a nil cell after a trailing comma with no newline" do
73
79
  BAMFCSV.parse("1,2,").should == [["1","2",nil]]
@@ -114,6 +120,41 @@ describe BAMFCSV do
114
120
  expect { BAMFCSV.parse("\"\" \r\n") }.should raise_error(BAMFCSV::MalformedCSVError)
115
121
  expect { BAMFCSV.parse('1,"" ,2') }.should raise_error(BAMFCSV::MalformedCSVError)
116
122
  end
123
+
124
+ it "raises BAMFCSV::MalformedCSVError when unescaped quotes appear in a quoted cell" do
125
+ expect { BAMFCSV.parse('"a"b"c"') }.should raise_error(BAMFCSV::MalformedCSVError)
126
+ expect { BAMFCSV.parse('"a"b"c",2') }.should raise_error(BAMFCSV::MalformedCSVError)
127
+ expect { BAMFCSV.parse(%Q("a"b"c"\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
128
+ expect { BAMFCSV.parse(%Q("a"b"c"\r\n)) }.should raise_error(BAMFCSV::MalformedCSVError)
129
+ end
130
+
131
+ ['', "\n", "\r\n", ",", "\n\n", "\r\n\r\n"].each do |csv|
132
+ csv_result = CSV.parse(csv)
133
+ it "BAMFCSV parses #{csv.inspect} the same way as CSV (#{csv_result.inspect})" do
134
+ BAMFCSV.parse(csv).should == csv_result
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ describe "generating a Table" do
141
+ describe "with only a header" do
142
+ let(:header_only) { BAMFCSV.parse("1,2,3", :headers => true) }
143
+ it "has no body rows" do
144
+ header_only.first.should be_nil
145
+ end
146
+
147
+ it "does nothing when iterating" do
148
+ expect { header_only.each { |x| raise "Oh dang!" } }.should_not raise_error
149
+ end
150
+ end
151
+
152
+ describe "with body rows" do
153
+ it "maps the headers the the values of each row" do
154
+ table = BAMFCSV.parse("a,b,c\r\n1,2,3\r\nx,y,z", :headers => true)
155
+ table.first["a"].should == "1"
156
+ table[1]["c"].should == "z"
157
+ end
117
158
  end
118
159
  end
119
160
  end
data/spec/spec_helper.rb CHANGED
@@ -4,6 +4,7 @@ $:.unshift(File.join(root_path, "ext"))
4
4
  require 'bundler'
5
5
  require 'bamfcsv'
6
6
  Bundler.require(:development)
7
+ require 'csv'
7
8
 
8
9
  RSpec.configure do |config|
9
10
  config.filter_run :focused => true
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: bamfcsv
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.0
5
+ version: 0.1.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jon Distad
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2011-04-03 00:00:00 -04:00
14
+ date: 2011-04-08 00:00:00 -04:00
15
15
  default_executable:
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
@@ -69,6 +69,7 @@ files:
69
69
  - ext/bamfcsv/bamfcsv_ext.h
70
70
  - ext/bamfcsv/extconf.rb
71
71
  - lib/bamfcsv.rb
72
+ - lib/bamfcsv/table.rb
72
73
  - lib/bamfcsv/version.rb
73
74
  - spec/fixtures/bamf-comma-comma.csv
74
75
  - spec/fixtures/double-quotes.csv