rcsv 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 1.9.2
5
+ - ruby-head
6
+ - 1.8.7
7
+ - ree
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rcsv (0.0.5)
4
+ rcsv (0.0.6)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -1,13 +1,15 @@
1
1
  # Rcsv
2
2
 
3
+ [![Build Status](https://travis-ci.org/fiksu/rcsv.png)](https://travis-ci.org/fiksu/rcsv)
4
+
3
5
  Rcsv is a fast CSV parsing library for MRI Ruby. Tested on REE 1.8.7 and Ruby 1.9.3.
4
6
 
5
- Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.1.0 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
7
+ Contrary to many other gems that implement their own parsers, Rcsv uses libcsv 3.0.2 (http://sourceforge.net/projects/libcsv/). As long as libcsv's API is stable, getting Rcsv to use newer libcsv version is as simple as updating two files (csv.h and libcsv.c).
6
8
 
7
9
  ## Benchmarks
8
- user system total real
9
- FasterCSV 0.580000 0.000000 0.580000 ( 0.618837)
10
- rcsv 0.060000 0.000000 0.060000 ( 0.062248)
10
+ user system total real
11
+ FasterCSV 0.580000 0.000000 0.580000 ( 0.618837)
12
+ rcsv 0.060000 0.000000 0.060000 ( 0.062248)
11
13
 
12
14
  ## License
13
15
 
@@ -30,7 +32,7 @@ Or install it yourself as:
30
32
 
31
33
  ## Building the latest source
32
34
 
33
- First, check out the master branch. Then cd there and run:
35
+ First, check out the master branch. Then cd there and run:
34
36
 
35
37
  $ bundle # Installs development dependencies
36
38
  $ bundle exec rake # Runs tests
@@ -42,7 +44,7 @@ Currently, Rcsv only supports CSV parsing. CSV write support is planned.
42
44
 
43
45
  Quickstart:
44
46
 
45
- parsed = Rcsv.parse(csv_data)
47
+ parsed = Rcsv.parse(csv_data)
46
48
 
47
49
 
48
50
  Rcsv class exposes a class method *parse* that accepts a CSV string as its first parameter and options hash as its second parameter.
@@ -99,36 +101,36 @@ A boolean flag. If enabled, only parses columns that are listed in :columns. Dis
99
101
 
100
102
  This example parses a 3-column CSV file and only returns parsed rows where "Age" values are set to "35".
101
103
 
102
- Rcsv.parse some_csv, :row_as_hash => true,
103
- :columns => {
104
- 'First Name' => { :alias => :first_name, :default => "Unknown" },
105
- 'Last Name' => { :alias => :last_name, :default => "Unknown"},
106
- 'Age' => { :alias => :age, :type => :int, :match => "35"}
107
- }
104
+ Rcsv.parse some_csv, :row_as_hash => true,
105
+ :columns => {
106
+ 'First Name' => { :alias => :first_name, :default => "Unknown" },
107
+ 'Last Name' => { :alias => :last_name, :default => "Unknown"},
108
+ 'Age' => { :alias => :age, :type => :int, :match => "35"}
109
+ }
108
110
 
109
111
  The result would look like this:
110
112
 
111
- [
112
- { :first_name => "Mary", :last_name => "Jane", :age => 35 },
113
- { :first_name => "Unknown", :last_name => "Alien", :age => 35}
114
- ]
113
+ [
114
+ { :first_name => "Mary", :last_name => "Jane", :age => 35 },
115
+ { :first_name => "Unknown", :last_name => "Alien", :age => 35}
116
+ ]
115
117
 
116
118
  Another example, for a miserable headerless Tab-separated CSV:
117
119
 
118
- Rcsv.parse some_csv, :column_separator => "\t",
119
- :header => :none,
120
- :columns => {
121
- 1 => { :type => :float, :default => 0 }
122
- }
120
+ Rcsv.parse some_csv, :column_separator => "\t",
121
+ :header => :none,
122
+ :columns => {
123
+ 1 => { :type => :float, :default => 0 }
124
+ }
123
125
 
124
126
  The result would look like this:
125
127
 
126
- [
127
- [ "Very hot", 3.7, "Mercury" ],
128
- [ "Very hot and cloudy", 8.87, "Venus" ],
129
- [ "Just about ok", 9.78, "Earth"],
130
- [ nil, 0, "Vacuum" ]
131
- ]
128
+ [
129
+ [ "Very hot", 3.7, "Mercury" ],
130
+ [ "Very hot and cloudy", 8.87, "Venus" ],
131
+ [ "Just about ok", 9.78, "Earth"],
132
+ [ nil, 0, "Vacuum" ]
133
+ ]
132
134
 
133
135
 
134
136
  ## To do
data/ext/rcsv/rcsv.c CHANGED
@@ -3,37 +3,37 @@
3
3
 
4
4
  #include "csv.h"
5
5
 
6
- static VALUE rcsv_parse_error; // class Rcsv::ParseError << StandardError; end
6
+ static VALUE rcsv_parse_error; /* class Rcsv::ParseError << StandardError; end */
7
7
 
8
- // It is useful to know exact row/column positions and field contents where parse-time exception was raised
8
+ /* It is useful to know exact row/column positions and field contents where parse-time exception was raised */
9
9
  #define RAISE_WITH_LOCATION(row, column, contents, fmt, ...) \
10
10
  rb_raise(rcsv_parse_error, "[%d:%d '%s'] " fmt, (int)(row), (int)(column), (char *)(contents), ##__VA_ARGS__);
11
11
 
12
12
  struct rcsv_metadata {
13
- // Derived from user-specified options
14
- bool row_as_hash; // Used to return array of hashes rather than array of arrays
15
- size_t offset_rows; // Number of rows to skip before parsing
16
-
17
- char * row_conversions; // A pointer to string/array of row conversions char specifiers
18
- char ** only_rows; // A pointer to array of strings for only_rows filter
19
- VALUE * row_defaults; // A pointer to array of row defaults
20
- VALUE * column_names; // A pointer to array of column names to be used with hashes
21
-
22
- // Pointer options lengths
23
- size_t num_row_conversions; // Number of converter types in row_conversions array
24
- size_t num_only_rows; // Number of items in only_rows filter
25
- size_t num_row_defaults; // Number of default values in row_defaults array
26
- size_t num_columns; // Number of columns detected from column_names.size
27
-
28
- // Internal state
29
- bool skip_current_row; // Used by only_rows filter to skip parsing of the row remainder
30
- size_t current_col; // Current column's index
31
- size_t current_row; // Current row's index
32
-
33
- VALUE * result; // A pointer to the parsed data
13
+ /* Derived from user-specified options */
14
+ bool row_as_hash; /* Used to return array of hashes rather than array of arrays */
15
+ size_t offset_rows; /* Number of rows to skip before parsing */
16
+
17
+ char * row_conversions; /* A pointer to string/array of row conversions char specifiers */
18
+ char ** only_rows; /* A pointer to array of strings for only_rows filter */
19
+ VALUE * row_defaults; /* A pointer to array of row defaults */
20
+ VALUE * column_names; /* A pointer to array of column names to be used with hashes */
21
+
22
+ /* Pointer options lengths */
23
+ size_t num_row_conversions; /* Number of converter types in row_conversions array */
24
+ size_t num_only_rows; /* Number of items in only_rows filter */
25
+ size_t num_row_defaults; /* Number of default values in row_defaults array */
26
+ size_t num_columns; /* Number of columns detected from column_names.size */
27
+
28
+ /* Internal state */
29
+ bool skip_current_row; /* Used by only_rows filter to skip parsing of the row remainder */
30
+ size_t current_col; /* Current column's index */
31
+ size_t current_row; /* Current row's index */
32
+
33
+ VALUE * result; /* A pointer to the parsed data */
34
34
  };
35
35
 
36
- //// Internal callbacks ////
36
+ /* Internal callbacks */
37
37
 
38
38
  /* This procedure is called for every parsed field */
39
39
  void end_of_field_callback(void * field, size_t field_size, void * data) {
@@ -41,20 +41,20 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
41
41
  struct rcsv_metadata * meta = (struct rcsv_metadata *) data;
42
42
  char row_conversion = 0;
43
43
  VALUE parsed_field;
44
- VALUE last_entry = rb_ary_entry(*(meta->result), -1); // result.last
44
+ VALUE last_entry = rb_ary_entry(*(meta->result), -1); /* result.last */
45
45
 
46
- // No need to parse anything until the end of the line if skip_current_row is set
46
+ /* No need to parse anything until the end of the line if skip_current_row is set */
47
47
  if (meta->skip_current_row) {
48
48
  return;
49
49
  }
50
50
 
51
- // Skip the row if its position is less than specifed offset
51
+ /* Skip the row if its position is less than specifed offset */
52
52
  if (meta->current_row < meta->offset_rows) {
53
53
  meta->skip_current_row = true;
54
54
  return;
55
55
  }
56
56
 
57
- // Filter by string row values listed in meta->only_rows.
57
+ /* Filter by string row values listed in meta->only_rows */
58
58
  if ((meta->only_rows != NULL) &&
59
59
  (meta->current_col < meta->num_only_rows) &&
60
60
  (meta->only_rows[meta->current_col] != NULL) &&
@@ -63,33 +63,33 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
63
63
  return;
64
64
  }
65
65
 
66
- // Get row conversion char specifier
66
+ /* Get row conversion char specifier */
67
67
  if (meta->current_col < meta->num_row_conversions) {
68
68
  row_conversion = (char)meta->row_conversions[meta->current_col];
69
69
  }
70
70
 
71
- // Convert the field from string into Ruby type specified by row_conversion
72
- if (row_conversion != ' ') { // spacebar skips the column
71
+ /* Convert the field from string into Ruby type specified by row_conversion */
72
+ if (row_conversion != ' ') { /* spacebar skips the column */
73
73
  if (field_size == 0) {
74
- // Assigning appropriate default value if applicable.
74
+ /* Assigning appropriate default value if applicable. */
75
75
  if (meta->current_col < meta->num_row_defaults) {
76
76
  parsed_field = meta->row_defaults[meta->current_col];
77
- } else { // By default, default is nil
77
+ } else { /* By default, default is nil */
78
78
  parsed_field = Qnil;
79
79
  }
80
80
  } else {
81
81
  if (meta->current_col < meta->num_row_conversions) {
82
82
  switch (row_conversion){
83
- case 's': // String
83
+ case 's': /* String */
84
84
  parsed_field = rb_str_new(field_str, field_size);
85
85
  break;
86
- case 'i': // Integer
86
+ case 'i': /* Integer */
87
87
  parsed_field = INT2NUM(atol(field_str));
88
88
  break;
89
- case 'f': // Float
89
+ case 'f': /* Float */
90
90
  parsed_field = rb_float_new(atof(field_str));
91
91
  break;
92
- case 'b': // TrueClass/FalseClass
92
+ case 'b': /* TrueClass/FalseClass */
93
93
  switch (field_str[0]) {
94
94
  case 't':
95
95
  case 'T':
@@ -119,12 +119,12 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
119
119
  row_conversion
120
120
  );
121
121
  }
122
- } else { // No conversion happens
123
- parsed_field = rb_str_new(field_str, field_size); // field
122
+ } else { /* No conversion happens */
123
+ parsed_field = rb_str_new(field_str, field_size); /* field */
124
124
  }
125
125
  }
126
126
 
127
- // Assign the value to appropriate hash key if parsing into Hash
127
+ /* Assign the value to appropriate hash key if parsing into Hash */
128
128
  if (meta->row_as_hash) {
129
129
  if (meta->current_col >= meta->num_columns) {
130
130
  RAISE_WITH_LOCATION(
@@ -138,12 +138,12 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
138
138
  } else {
139
139
  rb_hash_aset(last_entry, meta->column_names[meta->current_col], parsed_field);
140
140
  }
141
- } else { // Parse into Array
142
- rb_ary_push(last_entry, parsed_field); // result << field
141
+ } else { /* Parse into Array */
142
+ rb_ary_push(last_entry, parsed_field); /* result << field */
143
143
  }
144
144
  }
145
145
 
146
- // Increment column counter
146
+ /* Increment column counter */
147
147
  meta->current_col++;
148
148
  return;
149
149
  }
@@ -152,30 +152,30 @@ void end_of_field_callback(void * field, size_t field_size, void * data) {
152
152
  void end_of_line_callback(int last_char, void * data) {
153
153
  struct rcsv_metadata * meta = (struct rcsv_metadata *) data;
154
154
 
155
- // If filters didn't match, current row parsing is reverted.
155
+ /* If filters didn't match, current row parsing is reverted */
156
156
  if (meta->skip_current_row) {
157
- rb_ary_pop(*(meta->result)); // result.pop
157
+ rb_ary_pop(*(meta->result)); /* result.pop */
158
158
  meta->skip_current_row = false;
159
159
  }
160
160
 
161
- // Add a new empty array/hash for the next line unless EOF reached.
161
+ /* Add a new empty array/hash for the next line unless EOF reached */
162
162
  if (last_char != -1) {
163
163
  if (meta->row_as_hash) {
164
- rb_ary_push(*(meta->result), rb_hash_new()); // result << {}
164
+ rb_ary_push(*(meta->result), rb_hash_new()); /* result << {} */
165
165
  } else {
166
- rb_ary_push(*(meta->result), rb_ary_new()); // result << []
166
+ rb_ary_push(*(meta->result), rb_ary_new()); /* result << [] */
167
167
  }
168
168
  }
169
169
 
170
- // Resetting column counter.
170
+ /* Resetting column counter */
171
171
  meta->current_col = 0;
172
172
 
173
- // Incrementing row counter.
173
+ /* Incrementing row counter */
174
174
  meta->current_row++;
175
175
  return;
176
176
  }
177
177
 
178
- //// C API ////
178
+ /* C API */
179
179
 
180
180
  /* The main method that handles parsing */
181
181
  static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
@@ -189,7 +189,7 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
189
189
  int error;
190
190
  size_t i = 0;
191
191
 
192
- // Setting up some sane defaults
192
+ /* Setting up some sane defaults */
193
193
  meta.row_as_hash = false;
194
194
  meta.skip_current_row = false;
195
195
  meta.num_columns = 0;
@@ -203,50 +203,50 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
203
203
  meta.row_defaults = NULL;
204
204
  meta.row_conversions = NULL;
205
205
  meta.column_names = NULL;
206
- meta.result = (VALUE[]){rb_ary_new()}; // []
206
+ meta.result = (VALUE[]){rb_ary_new()}; /* [] */
207
207
 
208
- // str is required, options is optional (pun intended)
208
+ /* str is required, options is optional (pun intended) */
209
209
  rb_scan_args(argc, argv, "11", &str, &options);
210
210
  csv_string = StringValuePtr(str);
211
211
  csv_string_len = strlen(csv_string);
212
212
 
213
- // options ||= nil
213
+ /* options ||= nil */
214
214
  if (NIL_P(options)) {
215
215
  options = rb_hash_new();
216
216
  }
217
217
 
218
- // By default, parsing is strict
218
+ /* By default, parsing is strict */
219
219
  option = rb_hash_aref(options, ID2SYM(rb_intern("nostrict")));
220
220
  if (!option || (option == Qnil)) {
221
221
  csv_options |= CSV_STRICT;
222
222
  }
223
223
 
224
- // Try to initialize libcsv
224
+ /* Try to initialize libcsv */
225
225
  if (csv_init(&cp, csv_options) == -1) {
226
226
  rb_raise(rcsv_parse_error, "Couldn't initialize libcsv");
227
227
  }
228
228
 
229
- // By default, parse as Array of Arrays
229
+ /* By default, parse as Array of Arrays */
230
230
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_as_hash")));
231
231
  if (option && (option != Qnil)) {
232
232
  meta.row_as_hash = true;
233
233
  }
234
234
 
235
- // :col_sep sets the column separator, default is comma (,)
235
+ /* :col_sep sets the column separator, default is comma (,) */
236
236
  option = rb_hash_aref(options, ID2SYM(rb_intern("col_sep")));
237
237
  if (option != Qnil) {
238
238
  csv_set_delim(&cp, (unsigned char)*StringValuePtr(option));
239
239
  }
240
240
 
241
- // Specify how many rows to skip from the beginning of CSV
241
+ /* Specify how many rows to skip from the beginning of CSV */
242
242
  option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
243
243
  if (option != Qnil) {
244
244
  meta.offset_rows = (size_t)NUM2INT(option);
245
245
  }
246
246
 
247
- // :only_rows is a string mask where row is only parsed
248
- // if its fields match those in the passed array.
249
- // [nil, nil, "ABC"] skips all rows where 3rd column isn't equal to "ABC"
247
+ /* :only_rows is a string mask where row is only parsed
248
+ if its fields match those in the passed array.
249
+ [nil, nil, "ABC"] skips all rows where 3rd column isn't equal to "ABC" */
250
250
  option = rb_hash_aref(options, ID2SYM(rb_intern("only_rows")));
251
251
  if (option != Qnil) {
252
252
  meta.num_only_rows = (size_t)RARRAY_LEN(option);
@@ -262,8 +262,8 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
262
262
  }
263
263
  }
264
264
 
265
- // :row_defaults is an array of default values that are assigned to fields containing empty strings
266
- // according to matching field positions
265
+ /* :row_defaults is an array of default values that are assigned to fields containing empty strings
266
+ according to matching field positions */
267
267
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_defaults")));
268
268
  if (option != Qnil) {
269
269
  meta.num_row_defaults = RARRAY_LEN(option);
@@ -275,16 +275,16 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
275
275
  }
276
276
  }
277
277
 
278
- // :row_conversions specifies Ruby types that CSV field values should be converted into.
279
- // Each char of row_conversions string represents Ruby type for CSV field with matching position.
278
+ /* :row_conversions specifies Ruby types that CSV field values should be converted into.
279
+ Each char of row_conversions string represents Ruby type for CSV field with matching position. */
280
280
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
281
281
  if (option != Qnil) {
282
282
  meta.num_row_conversions = RSTRING_LEN(option);
283
283
  meta.row_conversions = StringValuePtr(option);
284
284
  }
285
285
 
286
- // Column names should be declared explicitly when parsing fields as Hashes
287
- if (meta.row_as_hash) { // Only matters for hash results
286
+ /* Column names should be declared explicitly when parsing fields as Hashes */
287
+ if (meta.row_as_hash) { /* Only matters for hash results */
288
288
  option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
289
289
  if (option == Qnil) {
290
290
  rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
@@ -298,14 +298,14 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
298
298
  }
299
299
  }
300
300
 
301
- // Initializing result with empty Array
301
+ /* Initializing result with empty Array */
302
302
  if (meta.row_as_hash) {
303
- rb_ary_push(*(meta.result), rb_hash_new()); // [{}]
303
+ rb_ary_push(*(meta.result), rb_hash_new()); /* [{}] */
304
304
  } else {
305
- rb_ary_push(*(meta.result), rb_ary_new()); // [[]]
305
+ rb_ary_push(*(meta.result), rb_ary_new()); /* [[]] */
306
306
  }
307
307
 
308
- // Actual parsing and error handling
308
+ /* Actual parsing and error handling */
309
309
  if (csv_string_len != csv_parse(&cp, csv_string, strlen(csv_string),
310
310
  &end_of_field_callback, &end_of_line_callback, &meta)) {
311
311
  error = csv_error(&cp);
@@ -327,7 +327,7 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
327
327
  }
328
328
  }
329
329
 
330
- // Flushing libcsv's buffer and freeing up allocated memory
330
+ /* Flushing libcsv's buffer and freeing up allocated memory */
331
331
  csv_fini(&cp, &end_of_field_callback, &end_of_line_callback, &meta);
332
332
  csv_free(&cp);
333
333
 
@@ -343,23 +343,23 @@ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
343
343
  free(meta.column_names);
344
344
  }
345
345
 
346
- // Remove the last row if it's empty. That happens if CSV file ends with a newline.
346
+ /* Remove the last row if it's empty. That happens if CSV file ends with a newline. */
347
347
  if (RARRAY_LEN(rb_ary_entry(*(meta.result), -1)) == 0) {
348
348
  rb_ary_pop(*(meta.result));
349
349
  }
350
350
 
351
- // An array of arrays of strings is returned.
351
+ /* An array of arrays of strings is returned. */
352
352
  return *(meta.result);
353
353
  }
354
354
 
355
355
 
356
356
  /* Define Ruby API */
357
357
  void Init_rcsv(void) {
358
- VALUE klass = rb_define_class("Rcsv", rb_cObject); // class Rcsv; end
358
+ VALUE klass = rb_define_class("Rcsv", rb_cObject); /* class Rcsv; end */
359
359
 
360
- // Error is initialized through static variable in order to access it from rb_rcsv_raw_parse
360
+ /* Error is initialized through static variable in order to access it from rb_rcsv_raw_parse */
361
361
  rcsv_parse_error = rb_define_class_under(klass, "ParseError", rb_eStandardError);
362
362
 
363
- // def Rcsv.raw_parse; ...; end
363
+ /* def Rcsv.raw_parse; ...; end */
364
364
  rb_define_singleton_method(klass, "raw_parse", rb_rcsv_raw_parse, -1);
365
365
  }
data/lib/rcsv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Rcsv
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -20,6 +20,7 @@ extensions:
20
20
  extra_rdoc_files: []
21
21
  files:
22
22
  - .gitignore
23
+ - .travis.yml
23
24
  - COPYING.LESSER
24
25
  - Gemfile
25
26
  - Gemfile.lock