smarter_csv 1.8.2 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a6ec6f3a579d9c1e6bfc2c3c9006f64d8c7b705eeca6ec048ea56c688f8ea1c
4
- data.tar.gz: ba9a4a289adcc2fc398ae608f9570c28baac57b877852f3ea37c78fa57f2d7e3
3
+ metadata.gz: 122fe57cc771c142a77ceb6305212e8884660a21b9c9edd67a198a14d19e103e
4
+ data.tar.gz: 4355a9bb355d9f2fa7640ed9f712e6ba57b0a8682417c7668745f96ded39a7c1
5
5
  SHA512:
6
- metadata.gz: d8f516501a5539e30789e2d18c4d051f50372786d8df1272192c2bc7997470cf5d5e1ae94b776d0d580cb62ed8ffb0f6591ccc2be5d60eae6e421f22f0c92f94
7
- data.tar.gz: 2993c59278adb531cf2299c0aa3869c637868f2f4e6421f845e4ade35011f94ee6e226f43e07b206c67c354c2eb38c0a41981ffbdbff00950690b94b06b2aacd
6
+ metadata.gz: 1646311a9207cf6f042f7e9b30b4ebc94cb6389b541548104b1af888ebdec7af0e50c675fd98ae3e60e86f0b6cd81b51a7e01588b82ae79cdb9ac2674bcc8a51
7
+ data.tar.gz: 24cc3e5d6467349d24bac39c615e802a1a8f8e5100b1d8f1f93962f23d6ababb50d58e8505d28e83e5b3c7de7d65d57880dc4447dc1247ac2a200ba2f034d27e
data/CHANGELOG.md CHANGED
@@ -1,6 +1,13 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.8.4 (2023-04-01)
5
+ * fix gem loading issue (issue #232, #234)
6
+
7
+ ## 1.8.3 (2023-03-30)
8
+ * bugfix: windows one-column files were raising NoColSepDetected (issue #229)
9
+
10
+
4
11
  ## 1.8.2 (2023-03-21)
5
12
  * bugfix: do not raise `NoColSepDetected` for CSV files with only one column in most cases (issue #222)
6
13
  If the first lines contain non-ASCII characters, and no col_sep is detected, it will still raise `NoColSepDetected`
data/README.md CHANGED
@@ -3,26 +3,33 @@
3
3
 
4
4
  [![codecov](https://codecov.io/gh/tilo/smarter_csv/branch/main/graph/badge.svg?token=1L7OD80182)](https://codecov.io/gh/tilo/smarter_csv) [![Gem Version](https://badge.fury.io/rb/smarter_csv.svg)](http://badge.fury.io/rb/smarter_csv)
5
5
 
6
+ #### Development Branches
7
+
8
+ * default branch is `main` for 1.x development
9
+ * 2.x development is on `2.0-development` (check this branch for 2.0 documentation)
10
+
6
11
  #### Work towards Future Version 2.0
7
12
 
8
13
  * Work towards SmarterCSV 2.0 is still ongoing, with improved features, and more streamlined options, but consider it as experimental at this time.
9
14
  Please check the [2.0-develop branch](https://github.com/tilo/smarter_csv/tree/2.0-develop), open any issues and pull requests with mention of tag v2.0.
10
15
 
11
- * New versions of SmarterCSV 1.x will soon print a deprecation warning if you set :verbose to true
12
- See below for list of deprecated options.
16
+ ---------------
13
17
 
14
- #### Restructured Branches
18
+ #### SmarterCSV 1.x [Current Version]
15
19
 
16
- * default branch is `main` for 1.x development
17
- * 2.x development is on `2.0-development`
20
+ `smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with ActiveRecord, parallel processing, kicking-off batch jobs with Sidekiq, or oploading data to S3.
18
21
 
19
- ---------------
22
+ The goals for SmarterCSV are:
23
+ * ease of use for handling most common CSV files without having to tweak options
24
+ * improve robustness of your code when you have no control over the quality of the CSV files which are processed
25
+ * formatting each row of data as a hash, in order to allow easy processing with ActiveRecord, parallel processing, kicking-off batch jobs with Sidekiq, or oploading data to S3.
20
26
 
21
- #### SmarterCSV 1.x [Current Version]
27
+ #### Rescue from Exceptions
28
+ While SmarterCSV uses sensible defaults to process the most common CSV files, it will raise exceptions if it can not auto-detect `col_sep`, `row_sep`, or if it encounters other problems. Therefore, when calling `SmarterCSV.process`, please rescue from `SmarterCSVException`, and handle outliers according to your requirements.
22
29
 
23
- `smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with ActiveRecord, parallel processing, or kicking-off batch jobs with Sidekiq.
30
+ If you encounter unusual CSV files, please follow the tips in the Troubleshooting section below. You can use the options below to accomodate for unusual formats.
24
31
 
25
- To create high-quality output, some options are enabled as a default. Please make sure to check the output and tweak the options accordingly.
32
+ #### Features
26
33
 
27
34
  One `smarter_csv` user wrote:
28
35
 
@@ -15,67 +15,67 @@
15
15
  static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size) {
16
16
  if (RB_TYPE_P(line, T_NIL) == 1) {
17
17
  return rb_ary_new();
18
+ }
18
19
 
19
- } else if (RB_TYPE_P(line, T_STRING) == 1) {
20
- rb_encoding *encoding = rb_enc_get(line); /* get the encoding from the input line */
21
- char *startP = RSTRING_PTR(line); /* may not be null terminated */
22
- long line_len = RSTRING_LEN(line);
23
- char *endP = startP + line_len ; /* points behind the string */
24
- char *p = startP;
20
+ if (RB_TYPE_P(line, T_STRING) != 1) {
21
+ rb_raise(rb_eTypeError, "ERROR in SmarterCSV.parse_line: line has to be a string or nil");
22
+ }
25
23
 
26
- char *col_sepP = RSTRING_PTR(col_sep);
27
- long col_sep_len = RSTRING_LEN(col_sep);
24
+ rb_encoding *encoding = rb_enc_get(line); /* get the encoding from the input line */
25
+ char *startP = RSTRING_PTR(line); /* may not be null terminated */
26
+ long line_len = RSTRING_LEN(line);
27
+ char *endP = startP + line_len ; /* points behind the string */
28
+ char *p = startP;
28
29
 
29
- char *quoteP = RSTRING_PTR(quote_char);
30
- long quote_count = 0;
30
+ char *col_sepP = RSTRING_PTR(col_sep);
31
+ long col_sep_len = RSTRING_LEN(col_sep);
31
32
 
32
- bool col_sep_found = true;
33
+ char *quoteP = RSTRING_PTR(quote_char);
34
+ long quote_count = 0;
33
35
 
34
- VALUE elements = rb_ary_new();
35
- VALUE field;
36
- long i;
36
+ bool col_sep_found = true;
37
37
 
38
- while (p < endP) {
39
- /* does the remaining string start with col_sep ? */
40
- col_sep_found = true;
41
- for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
42
- col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
43
- }
44
- /* if col_sep was found and we have even quotes */
45
- if (col_sep_found && (quote_count % 2 == 0)) {
46
- /* if max_size != nil && lements.size >= header_size */
47
- if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
48
- break;
49
- } else {
50
- /* push that field with original encoding onto the results */
51
- field = rb_enc_str_new(startP, p - startP, encoding);
52
- rb_ary_push(elements, field);
38
+ VALUE elements = rb_ary_new();
39
+ VALUE field;
40
+ long i;
53
41
 
54
- p += col_sep_len;
55
- startP = p;
56
- }
42
+ while (p < endP) {
43
+ /* does the remaining string start with col_sep ? */
44
+ col_sep_found = true;
45
+ for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
46
+ col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
47
+ }
48
+ /* if col_sep was found and we have even quotes */
49
+ if (col_sep_found && (quote_count % 2 == 0)) {
50
+ /* if max_size != nil && lements.size >= header_size */
51
+ if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
52
+ break;
57
53
  } else {
58
- if (*p == *quoteP) {
59
- quote_count += 1;
60
- }
61
- p++;
62
- }
63
- } /* while */
54
+ /* push that field with original encoding onto the results */
55
+ field = rb_enc_str_new(startP, p - startP, encoding);
56
+ rb_ary_push(elements, field);
64
57
 
65
- /* check if the last part of the line needs to be processed */
66
- if ((max_size == Qnil) || RARRAY_LEN(elements) < NUM2INT(max_size)) {
67
- /* copy the remaining line as a field with original encoding onto the results */
68
- field = rb_enc_str_new(startP, endP - startP, encoding);
69
- rb_ary_push(elements, field);
58
+ p += col_sep_len;
59
+ startP = p;
60
+ }
61
+ } else {
62
+ if (*p == *quoteP) {
63
+ quote_count += 1;
64
+ }
65
+ p++;
70
66
  }
67
+ } /* while */
71
68
 
72
- return elements;
69
+ /* check if the last part of the line needs to be processed */
70
+ if ((max_size == Qnil) || RARRAY_LEN(elements) < NUM2INT(max_size)) {
71
+ /* copy the remaining line as a field with original encoding onto the results */
72
+ field = rb_enc_str_new(startP, endP - startP, encoding);
73
+ rb_ary_push(elements, field);
73
74
  }
74
75
 
75
- rb_raise(rb_eTypeError, "ERROR in SmarterCSV.parse_line: line has to be a string or nil");
76
+ return elements;
76
77
  }
77
78
 
78
-
79
79
  VALUE SmarterCSV = Qnil;
80
80
 
81
81
  void Init_smarter_csv(void) {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.8.2"
4
+ VERSION = "1.8.4"
5
5
  end
data/lib/smarter_csv.rb CHANGED
@@ -411,7 +411,7 @@ module SmarterCSV
411
411
 
412
412
  if candidates.values.max == 0
413
413
  # if the header only contains
414
- return ',' if line =~ /^\w+$/
414
+ return ',' if line.chomp(options[:row_sep]) =~ /^\w+$/
415
415
 
416
416
  raise SmarterCSV::NoColSepDetected
417
417
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.2
4
+ version: 1.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-22 00:00:00.000000000 Z
11
+ date: 2023-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: awesome_print