rcsv 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 1.9.2
5
+ - ruby-head
6
+ - 1.8.7
7
+ - ree
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rcsv (0.0.5)
4
+ rcsv (0.0.6)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -1,13 +1,15 @@
1
1
  # Rcsv
2
2
 
3
+ [![Build Status](https://travis-ci.org/fiksu/rcsv.png)](https://travis-ci.org/fiksu/rcsv)
4
+
3
5
  Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE 1.8.7 and Ruby 1.9.3.
4
6
 
5
- Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.1.0 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
7
+ Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.0.2 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
6
8
 
7
9
  ## Benchmarks
8
- user system total real
9
- FasterCSV 0.580000 0.000000 0.580000 ( 0.618837)
10
- rcsv 0.060000 0.000000 0.060000 ( 0.062248)
10
+ user system total real
11
+ FasterCSV 0.580000 0.000000 0.580000 ( 0.618837)
12
+ rcsv 0.060000 0.000000 0.060000 ( 0.062248)
11
13
 
12
14
  ## License
13
15
 
@@ -30,7 +32,7 @@ Or install it yourself as:
30
32
 
31
33
  ## Building the latest source
32
34
 
33
- First, check out the master branch. Then cd there and run:
35
+ First, check out the master branch. Then cd there and run:
34
36
 
35
37
  $ bundle # Installs development dependencies
36
38
  $ bundle exec rake # Runs tests
@@ -42,7 +44,7 @@ Currently, Rcsv only supports CSV parsing. CSV write support is planned.
42
44
 
43
45
  Quickstart:
44
46
 
45
- parsed = Rcsv.parse(csv_data)
47
+ parsed = Rcsv.parse(csv_data)
46
48
 
47
49
 
48
50
  Rcsv class exposes a class method *parse* that accepts a CSV string as its first parameter and options hash as its second parameter.
@@ -99,36 +101,36 @@ A boolean flag. If enabled, only parses columns that are listed in :columns. Dis
99
101
 
100
102
  This example parses a 3-column CSV file and only returns parsed rows where "Age" values are set to "35".
101
103
 
102
- Rcsv.parse some_csv, :row_as_hash => true,
103
- :columns => {
104
- 'First Name' => { :alias => :first_name, :default => "Unknown" },
105
- 'Last Name' => { :alias => :last_name, :default => "Unknown"},
106
- 'Age' => { :alias => :age, :type => :int, :match => "35"}
107
- }
104
+ Rcsv.parse some_csv, :row_as_hash => true,
105
+ :columns => {
106
+ 'First Name' => { :alias => :first_name, :default => "Unknown" },
107
+ 'Last Name' => { :alias => :last_name, :default => "Unknown"},
108
+ 'Age' => { :alias => :age, :type => :int, :match => "35"}
109
+ }
108
110
 
109
111
  The result would look like this:
110
112
 
111
- [
112
- { :first_name => "Mary", :last_name => "Jane", :age => 35 },
113
- { :first_name => "Unknown", :last_name => "Alien", :age => 35}
114
- ]
113
+ [
114
+ { :first_name => "Mary", :last_name => "Jane", :age => 35 },
115
+ { :first_name => "Unknown", :last_name => "Alien", :age => 35}
116
+ ]
115
117
 
116
118
  Another example, for a miserable headerless Tab-separated CSV:
117
119
 
118
- Rcsv.parse some_csv, :column_separator => "\t",
119
- :header => :none,
120
- :columns => {
121
- 1 => { :type => :float, :default => 0 }
122
- }
120
+ Rcsv.parse some_csv, :column_separator => "\t",
121
+ :header => :none,
122
+ :columns => {
123
+ 1 => { :type => :float, :default => 0 }
124
+ }
123
125
 
124
126
  The result would look like this:
125
127
 
126
- [
127
- [ "Very hot", 3.7, "Mercury" ],
128
- [ "Very hot and cloudy", 8.87, "Venus" ],
129
- [ "Just about ok", 9.78, "Earth"],
130
- [ nil, 0, "Vacuum" ]
131
- ]
128
+ [
129
+ [ "Very hot", 3.7, "Mercury" ],
130
+ [ "Very hot and cloudy", 8.87, "Venus" ],
131
+ [ "Just about ok", 9.78, "Earth"],
132
+ [ nil, 0, "Vacuum" ]
133
+ ]
132
134
 
133
135
 
134
136
  ## To do
data/ext/rcsv/rcsv.c CHANGED
@@ -3,37 +3,37 @@
3
3
 
4
4
  #include "csv.h"
5
5
 
6
- static VALUE rcsv_parse_error; // class Rcsv::ParseError << StandardError; end
6
+ static VALUE rcsv_parse_error; /* class Rcsv::ParseError << StandardError; end */
7
7
 
8
- // It is useful to know exact row/column positions and field contents where parse-time exception was raised
8
+ /* It is useful to know exact row/column positions and field contents where parse-time exception was raised */
9
9
  #define RAISE_WITH_LOCATION(row, column, contents, fmt, ...) \
10
10
  rb_raise(rcsv_parse_error, "[%d:%d '%s'] " fmt, (int)(row), (int)(column), (char *)(contents), ##__VA_ARGS__);
11
11
 
12
12
  struct rcsv_metadata {
13
- // Derived from user-specified options
14
- bool row_as_hash; // Used to return array of hashes rather than array of arrays
15
- size_t offset_rows; // Number of rows to skip before parsing
16
-
17
- char * row_conversions; // A pointer to string/array of row conversions char specifiers
18
- char ** only_rows; // A pointer to array of strings for only_rows filter
19
- VALUE * row_defaults; // A pointer to array of row defaults
20
- VALUE * column_names; // A pointer to array of column names to be used with hashes
21
-
22
- // Pointer options lengths
23
- size_t num_row_conversions; // Number of converter types in row_conversions array
24
- size_t num_only_rows; // Number of items in only_rows filter
25
- size_t num_row_defaults; // Number of default values in row_defaults array
26
- size_t num_columns; // Number of columns detected from column_names.size
27
-
28
- // Internal state
29
- bool skip_current_row; // Used by only_rows filter to skip parsing of the row remainder
30
- size_t current_col; // Current column's index
31
- size_t current_row; // Current row's index
32
-
33
- VALUE * result; // A pointer to the parsed data
13
+ /* Derived from user-specified options */
14
+ bool row_as_hash; /* Used to return array of hashes rather than array of arrays */
15
+ size_t offset_rows; /* Number of rows to skip before parsing */
16
+
17
+ char * row_conversions; /* A pointer to string/array of row conversions char specifiers */
18
+ char ** only_rows; /* A pointer to array of strings for only_rows filter */
19
+ VALUE * row_defaults; /* A pointer to array of row defaults */
20
+ VALUE * column_names; /* A pointer to array of column names to be used with hashes */
21
+
22
+ /* Pointer options lengths */
23
+ size_t num_row_conversions; /* Number of converter types in row_conversions array */
24
+ size_t num_only_rows; /* Number of items in only_rows filter */
25
+ size_t num_row_defaults; /* Number of default values in row_defaults array */
26
+ size_t num_columns; /* Number of columns detected from column_names.size */
27
+
28
+ /* Internal state */
29
+ bool skip_current_row; /* Used by only_rows filter to skip parsing of the row remainder */
30
+ size_t current_col; /* Current column's index */
31
+ size_t current_row; /* Current row's index */
32
+
33
+ VALUE * result; /* A pointer to the parsed data */
34
34
  };
35
35
 
36
- //// Internal callbacks ////
36
+ /* Internal callbacks */
37
37
 
38
38
  /* This procedure is called for every parsed field */
39
39
  void end_of_field_callback(void * field, size_t field_size, void * data) {
@@ -41,20 +41,20 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
41
41
  struct rcsv_metadata * meta = (struct rcsv_metadata *) data;
42
42
  char row_conversion = 0;
43
43
  VALUE parsed_field;
44
- VALUE last_entry = rb_ary_entry(*(meta->result), -1); // result.last
44
+ VALUE last_entry = rb_ary_entry(*(meta->result), -1); /* result.last */
45
45
 
46
- // No need to parse anything until the end of the line if skip_current_row is set
46
+ /* No need to parse anything until the end of the line if skip_current_row is set */
47
47
  if (meta->skip_current_row) {
48
48
  return;
49
49
  }
50
50
 
51
- // Skip the row if its position is less than specifed offset
51
+ /* Skip the row if its position is less than specifed offset */
52
52
  if (meta->current_row < meta->offset_rows) {
53
53
  meta->skip_current_row = true;
54
54
  return;
55
55
  }
56
56
 
57
- // Filter by string row values listed in meta->only_rows.
57
+ /* Filter by string row values listed in meta->only_rows */
58
58
  if ((meta->only_rows != NULL) &&
59
59
  (meta->current_col < meta->num_only_rows) &&
60
60
  (meta->only_rows[meta->current_col] != NULL) &&
@@ -63,33 +63,33 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
63
63
  return;
64
64
  }
65
65
 
66
- // Get row conversion char specifier
66
+ /* Get row conversion char specifier */
67
67
  if (meta->current_col < meta->num_row_conversions) {
68
68
  row_conversion = (char)meta->row_conversions[meta->current_col];
69
69
  }
70
70
 
71
- // Convert the field from string into Ruby type specified by row_conversion
72
- if (row_conversion != ' ') { // spacebar skips the column
71
+ /* Convert the field from string into Ruby type specified by row_conversion */
72
+ if (row_conversion != ' ') { /* spacebar skips the column */
73
73
  if (field_size == 0) {
74
- // Assigning appropriate default value if applicable.
74
+ /* Assigning appropriate default value if applicable. */
75
75
  if (meta->current_col < meta->num_row_defaults) {
76
76
  parsed_field = meta->row_defaults[meta->current_col];
77
- } else { // By default, default is nil
77
+ } else { /* By default, default is nil */
78
78
  parsed_field = Qnil;
79
79
  }
80
80
  } else {
81
81
  if (meta->current_col < meta->num_row_conversions) {
82
82
  switch (row_conversion){
83
- case 's': // String
83
+ case 's': /* String */
84
84
  parsed_field = rb_str_new(field_str, field_size);
85
85
  break;
86
- case 'i': // Integer
86
+ case 'i': /* Integer */
87
87
  parsed_field = INT2NUM(atol(field_str));
88
88
  break;
89
- case 'f': // Float
89
+ case 'f': /* Float */
90
90
  parsed_field = rb_float_new(atof(field_str));
91
91
  break;
92
- case 'b': // TrueClass/FalseClass
92
+ case 'b': /* TrueClass/FalseClass */
93
93
  switch (field_str[0]) {
94
94
  case 't':
95
95
  case 'T':
@@ -119,12 +119,12 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
119
119
  row_conversion
120
120
  );
121
121
  }
122
- } else { // No conversion happens
123
- parsed_field = rb_str_new(field_str, field_size); // field
122
+ } else { /* No conversion happens */
123
+ parsed_field = rb_str_new(field_str, field_size); /* field */
124
124
  }
125
125
  }
126
126
 
127
- // Assign the value to appropriate hash key if parsing into Hash
127
+ /* Assign the value to appropriate hash key if parsing into Hash */
128
128
  if (meta->row_as_hash) {
129
129
  if (meta->current_col >= meta->num_columns) {
130
130
  RAISE_WITH_LOCATION(
@@ -138,12 +138,12 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
138
138
  } else {
139
139
  rb_hash_aset(last_entry, meta->column_names[meta->current_col], parsed_field);
140
140
  }
141
- } else { // Parse into Array
142
- rb_ary_push(last_entry, parsed_field); // result << field
141
+ } else { /* Parse into Array */
142
+ rb_ary_push(last_entry, parsed_field); /* result << field */
143
143
  }
144
144
  }
145
145
 
146
- // Increment column counter
146
+ /* Increment column counter */
147
147
  meta->current_col++;
148
148
  return;
149
149
  }
@@ -152,30 +152,30 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
152
152
  void end_of_line_callback(int last_char, void * data) {
153
153
  struct rcsv_metadata * meta = (struct rcsv_metadata *) data;
154
154
 
155
- // If filters didn't match, current row parsing is reverted.
155
+ /* If filters didn't match, current row parsing is reverted */
156
156
  if (meta->skip_current_row) {
157
- rb_ary_pop(*(meta->result)); // result.pop
157
+ rb_ary_pop(*(meta->result)); /* result.pop */
158
158
  meta->skip_current_row = false;
159
159
  }
160
160
 
161
- // Add a new empty array/hash for the next line unless EOF reached.
161
+ /* Add a new empty array/hash for the next line unless EOF reached */
162
162
  if (last_char != -1) {
163
163
  if (meta->row_as_hash) {
164
- rb_ary_push(*(meta->result), rb_hash_new()); // result << {}
164
+ rb_ary_push(*(meta->result), rb_hash_new()); /* result << {} */
165
165
  } else {
166
- rb_ary_push(*(meta->result), rb_ary_new()); // result << []
166
+ rb_ary_push(*(meta->result), rb_ary_new()); /* result << [] */
167
167
  }
168
168
  }
169
169
 
170
- // Resetting column counter.
170
+ /* Resetting column counter */
171
171
  meta->current_col = 0;
172
172
 
173
- // Incrementing row counter.
173
+ /* Incrementing row counter */
174
174
  meta->current_row++;
175
175
  return;
176
176
  }
177
177
 
178
- //// C API ////
178
+ /* C API */
179
179
 
180
180
  /* The main method that handles parsing */
181
181
  static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
@@ -189,7 +189,7 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
189
189
  int error;
190
190
  size_t i = 0;
191
191
 
192
- // Setting up some sane defaults
192
+ /* Setting up some sane defaults */
193
193
  meta.row_as_hash = false;
194
194
  meta.skip_current_row = false;
195
195
  meta.num_columns = 0;
@@ -203,50 +203,50 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
203
203
  meta.row_defaults = NULL;
204
204
  meta.row_conversions = NULL;
205
205
  meta.column_names = NULL;
206
- meta.result = (VALUE[]){rb_ary_new()}; // []
206
+ meta.result = (VALUE[]){rb_ary_new()}; /* [] */
207
207
 
208
- // str is required, options is optional (pun intended)
208
+ /* str is required, options is optional (pun intended) */
209
209
  rb_scan_args(argc, argv, "11", &str, &options);
210
210
  csv_string = StringValuePtr(str);
211
211
  csv_string_len = strlen(csv_string);
212
212
 
213
- // options ||= nil
213
+ /* options ||= nil */
214
214
  if (NIL_P(options)) {
215
215
  options = rb_hash_new();
216
216
  }
217
217
 
218
- // By default, parsing is strict
218
+ /* By default, parsing is strict */
219
219
  option = rb_hash_aref(options, ID2SYM(rb_intern("nostrict")));
220
220
  if (!option || (option == Qnil)) {
221
221
  csv_options |= CSV_STRICT;
222
222
  }
223
223
 
224
- // Try to initialize libcsv
224
+ /* Try to initialize libcsv */
225
225
  if (csv_init(&cp, csv_options) == -1) {
226
226
  rb_raise(rcsv_parse_error, "Couldn't initialize libcsv");
227
227
  }
228
228
 
229
- // By default, parse as Array of Arrays
229
+ /* By default, parse as Array of Arrays */
230
230
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_as_hash")));
231
231
  if (option && (option != Qnil)) {
232
232
  meta.row_as_hash = true;
233
233
  }
234
234
 
235
- // :col_sep sets the column separator, default is comma (,)
235
+ /* :col_sep sets the column separator, default is comma (,) */
236
236
  option = rb_hash_aref(options, ID2SYM(rb_intern("col_sep")));
237
237
  if (option != Qnil) {
238
238
  csv_set_delim(&cp, (unsigned char)*StringValuePtr(option));
239
239
  }
240
240
 
241
- // Specify how many rows to skip from the beginning of CSV
241
+ /* Specify how many rows to skip from the beginning of CSV */
242
242
  option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
243
243
  if (option != Qnil) {
244
244
  meta.offset_rows = (size_t)NUM2INT(option);
245
245
  }
246
246
 
247
- // :only_rows is a string mask where row is only parsed
248
- // if its fields match those in the passed array.
249
- // [nil, nil, "ABC"] skips all rows where 3rd column isn't equal to "ABC"
247
+ /* :only_rows is a string mask where row is only parsed
248
+ if its fields match those in the passed array.
249
+ [nil, nil, "ABC"] skips all rows where 3rd column isn't equal to "ABC" */
250
250
  option = rb_hash_aref(options, ID2SYM(rb_intern("only_rows")));
251
251
  if (option != Qnil) {
252
252
  meta.num_only_rows = (size_t)RARRAY_LEN(option);
@@ -262,8 +262,8 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
262
262
  }
263
263
  }
264
264
 
265
- // :row_defaults is an array of default values that are assigned to fields containing empty strings
266
- // according to matching field positions
265
+ /* :row_defaults is an array of default values that are assigned to fields containing empty strings
266
+ according to matching field positions */
267
267
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_defaults")));
268
268
  if (option != Qnil) {
269
269
  meta.num_row_defaults = RARRAY_LEN(option);
@@ -275,16 +275,16 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
275
275
  }
276
276
  }
277
277
 
278
- // :row_conversions specifies Ruby types that CSV field values should be converted into.
279
- // Each char of row_conversions string represents Ruby type for CSV field with matching position.
278
+ /* :row_conversions specifies Ruby types that CSV field values should be converted into.
279
+ Each char of row_conversions string represents Ruby type for CSV field with matching position. */
280
280
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
281
281
  if (option != Qnil) {
282
282
  meta.num_row_conversions = RSTRING_LEN(option);
283
283
  meta.row_conversions = StringValuePtr(option);
284
284
  }
285
285
 
286
- // Column names should be declared explicitly when parsing fields as Hashes
287
- if (meta.row_as_hash) { // Only matters for hash results
286
+ /* Column names should be declared explicitly when parsing fields as Hashes */
287
+ if (meta.row_as_hash) { /* Only matters for hash results */
288
288
  option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
289
289
  if (option == Qnil) {
290
290
  rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
@@ -298,14 +298,14 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
298
298
  }
299
299
  }
300
300
 
301
- // Initializing result with empty Array
301
+ /* Initializing result with empty Array */
302
302
  if (meta.row_as_hash) {
303
- rb_ary_push(*(meta.result), rb_hash_new()); // [{}]
303
+ rb_ary_push(*(meta.result), rb_hash_new()); /* [{}] */
304
304
  } else {
305
- rb_ary_push(*(meta.result), rb_ary_new()); // [[]]
305
+ rb_ary_push(*(meta.result), rb_ary_new()); /* [[]] */
306
306
  }
307
307
 
308
- // Actual parsing and error handling
308
+ /* Actual parsing and error handling */
309
309
  if (csv_string_len != csv_parse(&cp, csv_string, strlen(csv_string),
310
310
  &end_of_field_callback, &end_of_line_callback, &meta)) {
311
311
  error = csv_error(&cp);
@@ -327,7 +327,7 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
327
327
  }
328
328
  }
329
329
 
330
- // Flushing libcsv's buffer and freeing up allocated memory
330
+ /* Flushing libcsv's buffer and freeing up allocated memory */
331
331
  csv_fini(&cp, &end_of_field_callback, &end_of_line_callback, &meta);
332
332
  csv_free(&cp);
333
333
 
@@ -343,23 +343,23 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
343
343
  free(meta.column_names);
344
344
  }
345
345
 
346
- // Remove the last row if it's empty. That happens if CSV file ends with a newline.
346
+ /* Remove the last row if it's empty. That happens if CSV file ends with a newline. */
347
347
  if (RARRAY_LEN(rb_ary_entry(*(meta.result), -1)) == 0) {
348
348
  rb_ary_pop(*(meta.result));
349
349
  }
350
350
 
351
- // An array of arrays of strings is returned.
351
+ /* An array of arrays of strings is returned. */
352
352
  return *(meta.result);
353
353
  }
354
354
 
355
355
 
356
356
  /* Define Ruby API */
357
357
  void Init_rcsv(void) {
358
- VALUE klass = rb_define_class("Rcsv", rb_cObject); // class Rcsv; end
358
+ VALUE klass = rb_define_class("Rcsv", rb_cObject); /* class Rcsv; end */
359
359
 
360
- // Error is initialized through static variable in order to access it from rb_rcsv_raw_parse
360
+ /* Error is initialized through static variable in order to access it from rb_rcsv_raw_parse */
361
361
  rcsv_parse_error = rb_define_class_under(klass, "ParseError", rb_eStandardError);
362
362
 
363
- // def Rcsv.raw_parse; ...; end
363
+ /* def Rcsv.raw_parse; ...; end */
364
364
  rb_define_singleton_method(klass, "raw_parse", rb_rcsv_raw_parse, -1);
365
365
  }
data/lib/rcsv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Rcsv
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -20,6 +20,7 @@ extensions:
20
20
  extra_rdoc_files: []
21
21
  files:
22
22
  - .gitignore
23
+ - .travis.yml
23
24
  - COPYING.LESSER
24
25
  - Gemfile
25
26
  - Gemfile.lock