smarter_csv 1.8.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +8 -1
- data/ext/smarter_csv/smarter_csv.c +0 -1
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +38 -12
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7aa350efc77f90c6986a7573e733b5d9d02930c94465f17d2227b346263a6ce
|
4
|
+
data.tar.gz: 42351edf3e618b8c025f266796897aa0c3572d77e42788a05b1ee37ce8bdeed2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8bd9d59d7260a8e90ce472917801b98d088e37de5b1e912914f820f2efbbeb0491f5056d47575debdf1bccb8b9b8670cd089647efa15ec93b02413747dcfe702
|
7
|
+
data.tar.gz: 861364c6213af99c11cd3b9a59b2cf46f8c8e850ee2273e4f1b790714c9cd0ca66a734d64233737e086669c2b6aa51415f1343c3d61811547ec3c715d7a1620c
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.8.1 (2023-03-19)
|
5
|
+
* added validation against invalid values for :col_sep, :row_sep, :quote_char (issue #216)
|
6
|
+
* deprecating `required_headers` and replace with `required_keys` (issue #140)
|
7
|
+
* fixed issue with require statement
|
8
|
+
|
4
9
|
## 1.8.0 (2023-03-18)
|
5
10
|
* NEW DEFAULTS: `col_sep: :auto`, `row_sep: :auto`. Fully automatic detection by default.
|
6
11
|
* ignore Byte Order Marker (BOM) in first line in file (issues #27, #219)
|
data/README.md
CHANGED
@@ -73,6 +73,12 @@ $ hexdump -C spec/fixtures/bom_test_feff.csv
|
|
73
73
|
00000040 73 2c 35 36 37 38 0d 0a |s,5678..|
|
74
74
|
```
|
75
75
|
|
76
|
+
### Examples
|
77
|
+
|
78
|
+
Here are some examples to demonstrate the versatility of SmarterCSV.
|
79
|
+
|
80
|
+
By default SmarterCSV determines the `row_sep` and `col_sep` values automatically.
|
81
|
+
In rare cases you may have to manually set these values, after going through the troubleshooting procedure described above.
|
76
82
|
|
77
83
|
#### Example 1a: How SmarterCSV processes CSV-files as array of hashes:
|
78
84
|
Please note how each hash contains only the keys for columns with non-null values.
|
@@ -267,7 +273,8 @@ And header and data validations will also be supported in 2.x
|
|
267
273
|
---------------------------------------------------------------------------------------------------------------------------------
|
268
274
|
| :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
|
269
275
|
| :silence_missing_key | false | ignore missing keys in `key_mapping` if true |
|
270
|
-
| :
|
276
|
+
| :required_keys | nil | An array. Specify the required names AFTER header transformation. |
|
277
|
+
| :required_headers | nil | (DEPRECATED / renamed) Use `required_keys` instead |
|
271
278
|
| | | or an exception is raised No validation if nil is given. |
|
272
279
|
| :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
|
273
280
|
| :downcase_header | true | downcase all column headers |
|
@@ -27,7 +27,6 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
|
|
27
27
|
long col_sep_len = RSTRING_LEN(col_sep);
|
28
28
|
|
29
29
|
char *quoteP = RSTRING_PTR(quote_char);
|
30
|
-
long quote_len = RSTRING_LEN(quote_char);
|
31
30
|
long quote_count = 0;
|
32
31
|
|
33
32
|
bool col_sep_found = true;
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
@@ -3,24 +3,25 @@
|
|
3
3
|
require_relative "extensions/hash"
|
4
4
|
require_relative "smarter_csv/version"
|
5
5
|
|
6
|
-
|
7
|
-
require 'smarter_csv.bundle' unless ENV['CI'] #
|
6
|
+
require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
|
7
|
+
# require 'smarter_csv.bundle' unless ENV['CI'] # local testing
|
8
8
|
|
9
9
|
module SmarterCSV
|
10
10
|
class SmarterCSVException < StandardError; end
|
11
11
|
class HeaderSizeMismatch < SmarterCSVException; end
|
12
12
|
class IncorrectOption < SmarterCSVException; end
|
13
|
+
class ValidationError < SmarterCSVException; end
|
13
14
|
class DuplicateHeaders < SmarterCSVException; end
|
14
15
|
class MissingHeaders < SmarterCSVException; end
|
15
16
|
class NoColSepDetected < SmarterCSVException; end
|
16
|
-
class KeyMappingError < SmarterCSVException; end
|
17
|
-
class MalformedCSVError < SmarterCSVException; end
|
17
|
+
class KeyMappingError < SmarterCSVException; end # CURRENTLY UNUSED -> version 1.9.0
|
18
18
|
|
19
19
|
# first parameter: filename or input object which responds to readline method
|
20
20
|
def SmarterCSV.process(input, options = {}, &block)
|
21
21
|
options = default_options.merge(options)
|
22
22
|
options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
|
23
23
|
puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
|
24
|
+
validate_options!(options)
|
24
25
|
|
25
26
|
headerA = []
|
26
27
|
result = []
|
@@ -214,7 +215,7 @@ module SmarterCSV
|
|
214
215
|
headers_in_file: true,
|
215
216
|
invalid_byte_sequence: '',
|
216
217
|
keep_original_headers: false,
|
217
|
-
|
218
|
+
key_mapping: nil,
|
218
219
|
quote_char: '"',
|
219
220
|
remove_empty_hashes: true,
|
220
221
|
remove_empty_values: true,
|
@@ -222,6 +223,7 @@ module SmarterCSV
|
|
222
223
|
remove_values_matching: nil,
|
223
224
|
remove_zero_values: false,
|
224
225
|
required_headers: nil,
|
226
|
+
required_keys: nil,
|
225
227
|
row_sep: :auto, # was: $/,
|
226
228
|
silence_missing_keys: false,
|
227
229
|
skip_lines: nil,
|
@@ -486,13 +488,13 @@ module SmarterCSV
|
|
486
488
|
|
487
489
|
unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
|
488
490
|
key_mappingH = options[:key_mapping]
|
491
|
+
|
489
492
|
# do some key mapping on the keys in the file header
|
490
493
|
# if you want to completely delete a key, then map it to nil or to ''
|
491
494
|
if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
492
495
|
unless options[:silence_missing_keys]
|
493
496
|
# if silence_missing_keys are not set, raise error if missing header
|
494
497
|
missing_keys = key_mappingH.keys - headerA
|
495
|
-
|
496
498
|
puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
|
497
499
|
end
|
498
500
|
|
@@ -510,12 +512,21 @@ module SmarterCSV
|
|
510
512
|
raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
|
511
513
|
end
|
512
514
|
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
515
|
+
# deprecate required_headers
|
516
|
+
if !options[:required_headers].nil?
|
517
|
+
puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required headers'"
|
518
|
+
if options[:required_keys].nil?
|
519
|
+
options[:required_keys] = options[:required_headers]
|
520
|
+
options[:required_headers] = nil
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
if options[:required_keys] && options[:required_keys].is_a?(Array)
|
525
|
+
missing_keys = []
|
526
|
+
options[:required_keys].each do |k|
|
527
|
+
missing_keys << k unless headerA.include?(k)
|
517
528
|
end
|
518
|
-
raise SmarterCSV::MissingHeaders, "ERROR: missing
|
529
|
+
raise SmarterCSV::MissingHeaders, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
|
519
530
|
end
|
520
531
|
|
521
532
|
@headers = headerA
|
@@ -546,7 +557,7 @@ module SmarterCSV
|
|
546
557
|
|
547
558
|
def remove_bom(str)
|
548
559
|
str_as_hex = str.bytes.map{|x| x.to_s(16)}
|
549
|
-
# if string does not start with one of the bytes
|
560
|
+
# if string does not start with one of the bytes, there is no BOM
|
550
561
|
return str unless %w[ef fe ff 0].include?(str_as_hex[0])
|
551
562
|
|
552
563
|
return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
|
@@ -557,6 +568,21 @@ module SmarterCSV
|
|
557
568
|
str
|
558
569
|
end
|
559
570
|
|
571
|
+
def validate_options!(options)
|
572
|
+
keys = options.keys
|
573
|
+
errors = []
|
574
|
+
errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
|
575
|
+
errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
|
576
|
+
errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
|
577
|
+
raise SmarterCSV::ValidationError, errors.inspect if errors.any?
|
578
|
+
end
|
579
|
+
|
580
|
+
def option_valid?(str)
|
581
|
+
return true if str.is_a?(Symbol) && str == :auto
|
582
|
+
return true if str.is_a?(String) && !str.empty?
|
583
|
+
false
|
584
|
+
end
|
585
|
+
|
560
586
|
def candidated_column_separators_from_headers(filehandle, options, delimiters)
|
561
587
|
candidates = Hash.new(0)
|
562
588
|
line = readline_with_counts(filehandle, options.slice(:row_sep))
|