smarter_csv 1.8.2 → 1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +16 -9
- data/ext/smarter_csv/smarter_csv.c +46 -46
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 122fe57cc771c142a77ceb6305212e8884660a21b9c9edd67a198a14d19e103e
|
4
|
+
data.tar.gz: 4355a9bb355d9f2fa7640ed9f712e6ba57b0a8682417c7668745f96ded39a7c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1646311a9207cf6f042f7e9b30b4ebc94cb6389b541548104b1af888ebdec7af0e50c675fd98ae3e60e86f0b6cd81b51a7e01588b82ae79cdb9ac2674bcc8a51
|
7
|
+
data.tar.gz: 24cc3e5d6467349d24bac39c615e802a1a8f8e5100b1d8f1f93962f23d6ababb50d58e8505d28e83e5b3c7de7d65d57880dc4447dc1247ac2a200ba2f034d27e
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.8.4 (2023-04-01)
|
5
|
+
* fix gem loading issue (issue #232, #234)
|
6
|
+
|
7
|
+
## 1.8.3 (2023-03-30)
|
8
|
+
* bugfix: windows one-column files were raising NoColSepDetected (issue #229)
|
9
|
+
|
10
|
+
|
4
11
|
## 1.8.2 (2023-03-21)
|
5
12
|
* bugfix: do not raise `NoColSepDetected` for CSV files with only one column in most cases (issue #222)
|
6
13
|
If the first lines contain non-ASCII characters, and no col_sep is detected, it will still raise `NoColSepDetected`
|
data/README.md
CHANGED
@@ -3,26 +3,33 @@
|
|
3
3
|
|
4
4
|
[](https://codecov.io/gh/tilo/smarter_csv) [](http://badge.fury.io/rb/smarter_csv)
|
5
5
|
|
6
|
+
#### Development Branches
|
7
|
+
|
8
|
+
* default branch is `main` for 1.x development
|
9
|
+
* 2.x development is on `2.0-development` (check this branch for 2.0 documentation)
|
10
|
+
|
6
11
|
#### Work towards Future Version 2.0
|
7
12
|
|
8
13
|
* Work towards SmarterCSV 2.0 is still ongoing, with improved features, and more streamlined options, but consider it as experimental at this time.
|
9
14
|
Please check the [2.0-develop branch](https://github.com/tilo/smarter_csv/tree/2.0-develop), open any issues and pull requests with mention of tag v2.0.
|
10
15
|
|
11
|
-
|
12
|
-
See below for list of deprecated options.
|
16
|
+
---------------
|
13
17
|
|
14
|
-
####
|
18
|
+
#### SmarterCSV 1.x [Current Version]
|
15
19
|
|
16
|
-
|
17
|
-
* 2.x development is on `2.0-development`
|
20
|
+
`smarter_csv` is a Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, suitable for direct processing with ActiveRecord, parallel processing, kicking-off batch jobs with Sidekiq, or oploading data to S3.
|
18
21
|
|
19
|
-
|
22
|
+
The goals for SmarterCSV are:
|
23
|
+
* ease of use for handling most common CSV files without having to tweak options
|
24
|
+
* improve robustness of your code when you have no control over the quality of the CSV files which are processed
|
25
|
+
* formatting each row of data as a hash, in order to allow easy processing with ActiveRecord, parallel processing, kicking-off batch jobs with Sidekiq, or oploading data to S3.
|
20
26
|
|
21
|
-
####
|
27
|
+
#### Rescue from Exceptions
|
28
|
+
While SmarterCSV uses sensible defaults to process the most common CSV files, it will raise exceptions if it can not auto-detect `col_sep`, `row_sep`, or if it encounters other problems. Therefore, when calling `SmarterCSV.process`, please rescue from `SmarterCSVException`, and handle outliers according to your requirements.
|
22
29
|
|
23
|
-
|
30
|
+
If you encounter unusual CSV files, please follow the tips in the Troubleshooting section below. You can use the options below to accomodate for unusual formats.
|
24
31
|
|
25
|
-
|
32
|
+
#### Features
|
26
33
|
|
27
34
|
One `smarter_csv` user wrote:
|
28
35
|
|
@@ -15,67 +15,67 @@
|
|
15
15
|
static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quote_char, VALUE max_size) {
|
16
16
|
if (RB_TYPE_P(line, T_NIL) == 1) {
|
17
17
|
return rb_ary_new();
|
18
|
+
}
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
long line_len = RSTRING_LEN(line);
|
23
|
-
char *endP = startP + line_len ; /* points behind the string */
|
24
|
-
char *p = startP;
|
20
|
+
if (RB_TYPE_P(line, T_STRING) != 1) {
|
21
|
+
rb_raise(rb_eTypeError, "ERROR in SmarterCSV.parse_line: line has to be a string or nil");
|
22
|
+
}
|
25
23
|
|
26
|
-
|
27
|
-
|
24
|
+
rb_encoding *encoding = rb_enc_get(line); /* get the encoding from the input line */
|
25
|
+
char *startP = RSTRING_PTR(line); /* may not be null terminated */
|
26
|
+
long line_len = RSTRING_LEN(line);
|
27
|
+
char *endP = startP + line_len ; /* points behind the string */
|
28
|
+
char *p = startP;
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
char *col_sepP = RSTRING_PTR(col_sep);
|
31
|
+
long col_sep_len = RSTRING_LEN(col_sep);
|
31
32
|
|
32
|
-
|
33
|
+
char *quoteP = RSTRING_PTR(quote_char);
|
34
|
+
long quote_count = 0;
|
33
35
|
|
34
|
-
|
35
|
-
VALUE field;
|
36
|
-
long i;
|
36
|
+
bool col_sep_found = true;
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
|
42
|
-
col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
|
43
|
-
}
|
44
|
-
/* if col_sep was found and we have even quotes */
|
45
|
-
if (col_sep_found && (quote_count % 2 == 0)) {
|
46
|
-
/* if max_size != nil && lements.size >= header_size */
|
47
|
-
if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
|
48
|
-
break;
|
49
|
-
} else {
|
50
|
-
/* push that field with original encoding onto the results */
|
51
|
-
field = rb_enc_str_new(startP, p - startP, encoding);
|
52
|
-
rb_ary_push(elements, field);
|
38
|
+
VALUE elements = rb_ary_new();
|
39
|
+
VALUE field;
|
40
|
+
long i;
|
53
41
|
|
54
|
-
|
55
|
-
|
56
|
-
|
42
|
+
while (p < endP) {
|
43
|
+
/* does the remaining string start with col_sep ? */
|
44
|
+
col_sep_found = true;
|
45
|
+
for(i=0; (i < col_sep_len) && (p+i < endP) ; i++) {
|
46
|
+
col_sep_found = col_sep_found && (*(p+i) == *(col_sepP+i));
|
47
|
+
}
|
48
|
+
/* if col_sep was found and we have even quotes */
|
49
|
+
if (col_sep_found && (quote_count % 2 == 0)) {
|
50
|
+
/* if max_size != nil && lements.size >= header_size */
|
51
|
+
if ((max_size != Qnil) && RARRAY_LEN(elements) >= NUM2INT(max_size)) {
|
52
|
+
break;
|
57
53
|
} else {
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
p++;
|
62
|
-
}
|
63
|
-
} /* while */
|
54
|
+
/* push that field with original encoding onto the results */
|
55
|
+
field = rb_enc_str_new(startP, p - startP, encoding);
|
56
|
+
rb_ary_push(elements, field);
|
64
57
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
58
|
+
p += col_sep_len;
|
59
|
+
startP = p;
|
60
|
+
}
|
61
|
+
} else {
|
62
|
+
if (*p == *quoteP) {
|
63
|
+
quote_count += 1;
|
64
|
+
}
|
65
|
+
p++;
|
70
66
|
}
|
67
|
+
} /* while */
|
71
68
|
|
72
|
-
|
69
|
+
/* check if the last part of the line needs to be processed */
|
70
|
+
if ((max_size == Qnil) || RARRAY_LEN(elements) < NUM2INT(max_size)) {
|
71
|
+
/* copy the remaining line as a field with original encoding onto the results */
|
72
|
+
field = rb_enc_str_new(startP, endP - startP, encoding);
|
73
|
+
rb_ary_push(elements, field);
|
73
74
|
}
|
74
75
|
|
75
|
-
|
76
|
+
return elements;
|
76
77
|
}
|
77
78
|
|
78
|
-
|
79
79
|
VALUE SmarterCSV = Qnil;
|
80
80
|
|
81
81
|
void Init_smarter_csv(void) {
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: awesome_print
|