smarter_csv 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55400b3977ce35c58d60c4101362b68d99f2dbf7cb6a63956ae3b6ab79fcf1ac
4
- data.tar.gz: 41f46d3e4de69a7924ecd2214ba4e37766106469d1b8b257fd752a96204a47fd
3
+ metadata.gz: a7aa350efc77f90c6986a7573e733b5d9d02930c94465f17d2227b346263a6ce
4
+ data.tar.gz: 42351edf3e618b8c025f266796897aa0c3572d77e42788a05b1ee37ce8bdeed2
5
5
  SHA512:
6
- metadata.gz: 24ecc14cf9c65efe5c11e4bd20753420aa8ccd7385171cd21eac2e1be92c4896087cdc2a18799fa111c0f36154ad4481daed7f08b752f4fae2b5f27241b8cf6c
7
- data.tar.gz: c1d70e18a7ae8057e58cbf73b62f4896dd7030bc5fd2e927669e5ea829f9a3c11daeb9c8b83296dbb46e6f0d23034245b7207882a77b54cc1ca128a581175359
6
+ metadata.gz: 8bd9d59d7260a8e90ce472917801b98d088e37de5b1e912914f820f2efbbeb0491f5056d47575debdf1bccb8b9b8670cd089647efa15ec93b02413747dcfe702
7
+ data.tar.gz: 861364c6213af99c11cd3b9a59b2cf46f8c8e850ee2273e4f1b790714c9cd0ca66a734d64233737e086669c2b6aa51415f1343c3d61811547ec3c715d7a1620c
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.8.1 (2023-03-19)
5
+ * added validation against invalid values for :col_sep, :row_sep, :quote_char (issue #216)
6
+ * deprecating `required_headers` and replace with `required_keys` (issue #140)
7
+ * fixed issue with require statement
8
+
4
9
  ## 1.8.0 (2023-03-18)
5
10
  * NEW DEFAULTS: `col_sep: :auto`, `row_sep: :auto`. Fully automatic detection by default.
6
11
  * ignore Byte Order Marker (BOM) in first line in file (issues #27, #219)
data/README.md CHANGED
@@ -73,6 +73,12 @@ $ hexdump -C spec/fixtures/bom_test_feff.csv
73
73
  00000040 73 2c 35 36 37 38 0d 0a |s,5678..|
74
74
  ```
75
75
 
76
+ ### Examples
77
+
78
+ Here are some examples to demonstrate the versatility of SmarterCSV.
79
+
80
+ By default SmarterCSV determines the `row_sep` and `col_sep` values automatically.
81
+ In rare cases you may have to manually set these values, after going through the troubleshooting procedure described above.
76
82
 
77
83
  #### Example 1a: How SmarterCSV processes CSV-files as array of hashes:
78
84
  Please note how each hash contains only the keys for columns with non-null values.
@@ -267,7 +273,8 @@ And header and data validations will also be supported in 2.x
267
273
  ---------------------------------------------------------------------------------------------------------------------------------
268
274
  | :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
269
275
  | :silence_missing_key | false | ignore missing keys in `key_mapping` if true |
270
- | :required_headers | nil | An array. Each of the given headers must be present after header manipulation, |
276
+ | :required_keys | nil | An array. Specify the required names AFTER header transformation. |
277
+ | :required_headers | nil | (DEPRECATED / renamed) Use `required_keys` instead |
271
278
  | | | or an exception is raised No validation if nil is given. |
272
279
  | :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
273
280
  | :downcase_header | true | downcase all column headers |
@@ -27,7 +27,6 @@ static VALUE rb_parse_csv_line(VALUE self, VALUE line, VALUE col_sep, VALUE quot
27
27
  long col_sep_len = RSTRING_LEN(col_sep);
28
28
 
29
29
  char *quoteP = RSTRING_PTR(quote_char);
30
- long quote_len = RSTRING_LEN(quote_char);
31
30
  long quote_count = 0;
32
31
 
33
32
  bool col_sep_found = true;
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.8.0"
4
+ VERSION = "1.8.1"
5
5
  end
data/lib/smarter_csv.rb CHANGED
@@ -3,24 +3,25 @@
3
3
  require_relative "extensions/hash"
4
4
  require_relative "smarter_csv/version"
5
5
 
6
- # require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
7
- require 'smarter_csv.bundle' unless ENV['CI'] # does not compile/link in CI?
6
+ require_relative "smarter_csv/smarter_csv" unless ENV['CI'] # does not compile/link in CI?
7
+ # require 'smarter_csv.bundle' unless ENV['CI'] # local testing
8
8
 
9
9
  module SmarterCSV
10
10
  class SmarterCSVException < StandardError; end
11
11
  class HeaderSizeMismatch < SmarterCSVException; end
12
12
  class IncorrectOption < SmarterCSVException; end
13
+ class ValidationError < SmarterCSVException; end
13
14
  class DuplicateHeaders < SmarterCSVException; end
14
15
  class MissingHeaders < SmarterCSVException; end
15
16
  class NoColSepDetected < SmarterCSVException; end
16
- class KeyMappingError < SmarterCSVException; end
17
- class MalformedCSVError < SmarterCSVException; end
17
+ class KeyMappingError < SmarterCSVException; end # CURRENTLY UNUSED -> version 1.9.0
18
18
 
19
19
  # first parameter: filename or input object which responds to readline method
20
20
  def SmarterCSV.process(input, options = {}, &block)
21
21
  options = default_options.merge(options)
22
22
  options[:invalid_byte_sequence] = '' if options[:invalid_byte_sequence].nil?
23
23
  puts "SmarterCSV OPTIONS: #{options.inspect}" if options[:verbose]
24
+ validate_options!(options)
24
25
 
25
26
  headerA = []
26
27
  result = []
@@ -214,7 +215,7 @@ module SmarterCSV
214
215
  headers_in_file: true,
215
216
  invalid_byte_sequence: '',
216
217
  keep_original_headers: false,
217
- key_mapping_hash: nil,
218
+ key_mapping: nil,
218
219
  quote_char: '"',
219
220
  remove_empty_hashes: true,
220
221
  remove_empty_values: true,
@@ -222,6 +223,7 @@ module SmarterCSV
222
223
  remove_values_matching: nil,
223
224
  remove_zero_values: false,
224
225
  required_headers: nil,
226
+ required_keys: nil,
225
227
  row_sep: :auto, # was: $/,
226
228
  silence_missing_keys: false,
227
229
  skip_lines: nil,
@@ -486,13 +488,13 @@ module SmarterCSV
486
488
 
487
489
  unless options[:user_provided_headers] # wouldn't make sense to re-map user provided headers
488
490
  key_mappingH = options[:key_mapping]
491
+
489
492
  # do some key mapping on the keys in the file header
490
493
  # if you want to completely delete a key, then map it to nil or to ''
491
494
  if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
492
495
  unless options[:silence_missing_keys]
493
496
  # if silence_missing_keys are not set, raise error if missing header
494
497
  missing_keys = key_mappingH.keys - headerA
495
-
496
498
  puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
497
499
  end
498
500
 
@@ -510,12 +512,21 @@ module SmarterCSV
510
512
  raise SmarterCSV::DuplicateHeaders, "ERROR: duplicate headers: #{duplicate_headers.join(',')}"
511
513
  end
512
514
 
513
- if options[:required_headers] && options[:required_headers].is_a?(Array)
514
- missing_headers = []
515
- options[:required_headers].each do |k|
516
- missing_headers << k unless headerA.include?(k)
515
+ # deprecate required_headers
516
+ if !options[:required_headers].nil?
517
+ puts "DEPRECATION WARNING: please use 'required_keys' instead of 'required headers'"
518
+ if options[:required_keys].nil?
519
+ options[:required_keys] = options[:required_headers]
520
+ options[:required_headers] = nil
521
+ end
522
+ end
523
+
524
+ if options[:required_keys] && options[:required_keys].is_a?(Array)
525
+ missing_keys = []
526
+ options[:required_keys].each do |k|
527
+ missing_keys << k unless headerA.include?(k)
517
528
  end
518
- raise SmarterCSV::MissingHeaders, "ERROR: missing headers: #{missing_headers.join(',')}" unless missing_headers.empty?
529
+ raise SmarterCSV::MissingHeaders, "ERROR: missing attributes: #{missing_keys.join(',')}" unless missing_keys.empty?
519
530
  end
520
531
 
521
532
  @headers = headerA
@@ -546,7 +557,7 @@ module SmarterCSV
546
557
 
547
558
  def remove_bom(str)
548
559
  str_as_hex = str.bytes.map{|x| x.to_s(16)}
549
- # if string does not start with one of the bytes above, there is no BOM
560
+ # if string does not start with one of the bytes, there is no BOM
550
561
  return str unless %w[ef fe ff 0].include?(str_as_hex[0])
551
562
 
552
563
  return str.byteslice(4..-1) if [UTF_32_BOM, UTF_32LE_BOM].include?(str_as_hex[0..3])
@@ -557,6 +568,21 @@ module SmarterCSV
557
568
  str
558
569
  end
559
570
 
571
+ def validate_options!(options)
572
+ keys = options.keys
573
+ errors = []
574
+ errors << "invalid row_sep" if keys.include?(:row_sep) && !option_valid?(options[:row_sep])
575
+ errors << "invalid col_sep" if keys.include?(:col_sep) && !option_valid?(options[:col_sep])
576
+ errors << "invalid quote_char" if keys.include?(:quote_char) && !option_valid?(options[:quote_char])
577
+ raise SmarterCSV::ValidationError, errors.inspect if errors.any?
578
+ end
579
+
580
+ def option_valid?(str)
581
+ return true if str.is_a?(Symbol) && str == :auto
582
+ return true if str.is_a?(String) && !str.empty?
583
+ false
584
+ end
585
+
560
586
  def candidated_column_separators_from_headers(filehandle, options, delimiters)
561
587
  candidates = Hash.new(0)
562
588
  line = readline_with_counts(filehandle, options.slice(:row_sep))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda