smarter_csv 1.8.0 → 1.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +8 -1
- data/ext/smarter_csv/smarter_csv.c +0 -1
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +38 -12
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7aa350efc77f90c6986a7573e733b5d9d02930c94465f17d2227b346263a6ce
|
4
|
+
data.tar.gz: 42351edf3e618b8c025f266796897aa0c3572d77e42788a05b1ee37ce8bdeed2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8bd9d59d7260a8e90ce472917801b98d088e37de5b1e912914f820f2efbbeb0491f5056d47575debdf1bccb8b9b8670cd089647efa15ec93b02413747dcfe702
|
7
|
+
data.tar.gz: 861364c6213af99c11cd3b9a59b2cf46f8c8e850ee2273e4f1b790714c9cd0ca66a734d64233737e086669c2b6aa51415f1343c3d61811547ec3c715d7a1620c
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.8.1 (2023-03-19)
|
5
|
+
* added validation against invalid values for :col_sep, :row_sep, :quote_char (issue #216)
|
6
|
+
* deprecating `required_headers` and replace with `required_keys` (issue #140)
|
7
|
+
* fixed issue with require statement
|
8
|
+
|
4
9
|
## 1.8.0 (2023-03-18)
|
5
10
|
* NEW DEFAULTS: `col_sep: :auto`, `row_sep: :auto`. Fully automatic detection by default.
|
6
11
|
* ignore Byte Order Marker (BOM) in first line in file (issues #27, #219)
|
data/README.md
CHANGED
@@ -73,6 +73,12 @@ $ hexdump -C spec/fixtures/bom_test_feff.csv
|
|
73
73
|
00000040 73 2c 35 36 37 38 0d 0a |s,5678..|
|
74
74
|
```
|
75
75
|
|
76
|
+
### Examples
|
77
|
+
|
78
|
+
Here are some examples to demonstrate the versatility of SmarterCSV.
|
79
|
+
|
80
|
+
By default SmarterCSV determines the `row_sep` and `col_sep` values automatically.
|
81
|
+
In rare cases you may have to manually set these values, after going through the troubleshooting procedure described above.
|
76
82
|
|
77
83
|
#### Example 1a: How SmarterCSV processes CSV-files as array of hashes:
|
78
84
|
Please note how each hash contains only the keys for columns with non-null values.
|
@@ -267,7 +273,8 @@ And header and data validations will also be supported in 2.x
|
|
267
273
|
---------------------------------------------------------------------------------------------------------------------------------
|
268
274
|
| :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
|
269
275
|
| :silence_missing_key | false | ignore missing keys in `key_mapping` if true |
|
270
|
-
| :
|
276
|
+
| :required_keys | nil | An array. Specify the required names AFTER header transformation. |
|
277
|
+
| :required_headers | nil | (DEPRECATED / renamed) Use `required_keys` instead |
|
271
278
|
| | | or an exception is raised No validation if nil is given. |
|
272
279
|
| :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
|
273
280
|
| :downcase_header | true | downcase all column headers |
|
@@ -27,7 +27,6 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
27
27
|
long col_sep_len = RSTRING_LEN(col_sep);
|
28
28
|
|
29
29
|
char *quoteP = RSTRING_PTR(quote_char);
|
30
|
-
long quote_len = RSTRING_LEN(quote_char);
|
31
30
|
long quote_count = 0;
|
32
31
|
|
33
32
|
bool col_sep_found = true;
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
@@ -3,24 +3,25 @@
|
|
3
3
|
require_relative "extensions/hash"
|
4
4
|
require_relative "smarter_csv/version"
|
5
5
|
|
6
|
-
|
7
|
-
require 'smarter_csv.bundle' unless ENV['CI'] #
|
6
|
+
require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
|
7
|
+
# require 'smarter_csv.bundle' unless ENV['CI'] # local testing
|
8
8
|
|
9
9
|
module SmarterCSV
|
10
10
|
class SmarterCSVException < StandardError; end
|
11
11
|
class HeaderSizeMismatch < SmarterCSVException; end
|
12
12
|
class IncorrectOption < SmarterCSVException; end
|
13
|
+
class ValidationError < SmarterCSVException; end
|
13
14
|
class DuplicateHeaders < SmarterCSVException; end
|
14
15
|
class MissingHeaders < SmarterCSVException; end
|
15
16
|
class NoColSepDetected < SmarterCSVException; end
|
16
|
-
class KeyMappingError < SmarterCSVException; end
|
17
|
-
class MalformedCSVError < SmarterCSVException; end
|
17
|
+
class KeyMappingError < SmarterCSVException; end # CURRENTLY UNUSED -> version 1.9.0
|
18
18
|
|
19
19
|
# first parameter: filename or input object which responds to readline method
|
20
20
|
def SmarterCSV.process(input, options = {}, &block)
|
21
21
|
options = default_options.merge(options)
|
22
22
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
23
23
|
puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
|
24
|
+
validate_options!(options)
|
24
25
|
|
25
26
|
headerA = []
|
26
27
|
result = []
|
@@ -214,7 +215,7 @@ module SmarterCSV
|
|
214
215
|
headers_in_file: true,
|
215
216
|
invalid_byte_sequence: '',
|
216
217
|
keep_original_headers: false,
|
217
|
-
|
218
|
+
key_mapping: nil,
|
218
219
|
quote_char: '"',
|
219
220
|
remove_empty_hashes: true,
|
220
221
|
remove_empty_values: true,
|
@@ -222,6 +223,7 @@ module SmarterCSV
|
|
222
223
|
remove_values_matching: nil,
|
223
224
|
remove_zero_values: false,
|
224
225
|
required_headers: nil,
|
226
|
+
required_keys: nil,
|
225
227
|
row_sep: :auto, # was: $/,
|
226
228
|
silence_missing_keys: false,
|
227
229
|
skip_lines: nil,
|
@@ -486,13 +488,13 @@ module SmarterCSV
|
|
486
488
|
|
487
489
|
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
488
490
|
key_mappingH = options[:key_mapping]
|
491
|
+
|
489
492
|
# do some key mapping on the keys in the file header
|
490
493
|
# if you want to completely delete a key, then map it to nil or to ''
|
491
494
|
if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
492
495
|
unless options[:silence_missing_keys]
|
493
496
|
# if silence_missing_keys are not set, raise error if missing header
|
494
497
|
missing_keys = key_mappingH.keys - headerA
|
495
|
-
|
496
498
|
puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
|
497
499
|
end
|
498
500
|
|
@@ -510,12 +512,21 @@ module SmarterCSV
|
|
510
512
|
raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
|
511
513
|
end
|
512
514
|
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
515
|
+
# deprecate required_headers
|
516
|
+
if !options[:required_headers].nil?
|
517
|
+
puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required headers'"
|
518
|
+
if options[:required_keys].nil?
|
519
|
+
options[:required_keys] = options[:required_headers]
|
520
|
+
options[:required_headers] = nil
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
if options[:required_keys] && options[:required_keys].is_a?(Array)
|
525
|
+
missing_keys = []
|
526
|
+
options[:required_keys].each do |k|
|
527
|
+
missing_keys << k unless headerA.include?(k)
|
517
528
|
end
|
518
|
-
raise SmarterCSV::MissingHeaders, "ERROR: missing
|
529
|
+
raise SmarterCSV::MissingHeaders, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
|
519
530
|
end
|
520
531
|
|
521
532
|
@headers = headerA
|
@@ -546,7 +557,7 @@ module SmarterCSV
|
|
546
557
|
|
547
558
|
def remove_bom(str)
|
548
559
|
str_as_hex = str.bytes.map{|x| x.to_s(16)}
|
549
|
-
# if string does not start with one of the bytes
|
560
|
+
# if string does not start with one of the bytes, there is no BOM
|
550
561
|
return str unless %w[ef fe ff 0].include?(str_as_hex[0])
|
551
562
|
|
552
563
|
return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
|
@@ -557,6 +568,21 @@ module SmarterCSV
|
|
557
568
|
str
|
558
569
|
end
|
559
570
|
|
571
|
+
def validate_options!(options)
|
572
|
+
keys = options.keys
|
573
|
+
errors = []
|
574
|
+
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
575
|
+
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
576
|
+
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
577
|
+
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
578
|
+
end
|
579
|
+
|
580
|
+
def option_valid?(str)
|
581
|
+
return true if str.is_a?(Symbol) && str == :auto
|
582
|
+
return true if str.is_a?(String) && !str.empty?
|
583
|
+
false
|
584
|
+
end
|
585
|
+
|
560
586
|
def candidated_column_separators_from_headers(filehandle, options, delimiters)
|
561
587
|
candidates = Hash.new(0)
|
562
588
|
line = readline_with_counts(filehandle, options.slice(:row_sep))
|