smarter_csv 1.7.2 → 1.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34a12dae406ef192b3fbac9dd8a4236e18a7a936d4289cc296e49bf3b88fd386
4
- data.tar.gz: f317413b7467386b1337938b2288763d1a6da279c6823ad3f4653ff82ea90d39
3
+ metadata.gz: 3c20e6e8f4281f99aa22e67c778e1e950ef7040a48f454c7d9751f2ccf44c093
4
+ data.tar.gz: 14ec931e58fce24c675bd76ad39f6046c52a0a4b07cc89814fa55c6a981ebe27
5
5
  SHA512:
6
- metadata.gz: ebbd40e8c6ea684200c8efedc12174da1a0a99ab9fae8bcb00f3bfdb8dcac479285644de09003b04b073b46f8ea64cbb29686628e9b7986d3baa07b041ee7dbd
7
- data.tar.gz: 9c9ba18bd64474811bbb3be2b350ab3b25a33dbd3e2cc802d697d04dcefeff2cc24150e87be6f6789eedc045a717ced2590efe6e6b6056a5c0b18095edbd0b38
6
+ metadata.gz: cf3d642f523bf49d0867bc1768a6df247f3392390090c2b0fbfda5ac75f5f8f829eaac2ec14105936e4d317c7a1d1b865d74de1e60ce6405c6fd1f868bd703eb
7
+ data.tar.gz: 70522e31ca2ced36beef2a38509d1df01bad12af5bb56cb1404cfcbceffdfb4cbbbf8a8a8a535e3ea060d2d4c5b4c1049c8486d163d7de6b466dd83838aa2cf0
data/CHANGELOG.md CHANGED
@@ -1,6 +1,12 @@
1
1
 
2
2
  # SmarterCSV 1.x Change Log
3
3
 
4
+ ## 1.7.4 (2022-01-13)
5
+ * improved guessing of the column separator, thanks to Alessandro Fazzi
6
+
7
+ ## 1.7.3 (2022-12-05)
8
+ * new option :silence_missing_keys; if set to true, it ignores missing keys in `key_mapping`
9
+
4
10
  ## 1.7.2 (2022-08-29)
5
11
  * new option :with_line_numbers; if set to true, it adds :csv_line_number to each data hash (issue #130)
6
12
 
data/CONTRIBUTORS.md CHANGED
@@ -48,3 +48,5 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
48
48
  * [Viacheslav Markin](https://github.com/KXEinc)
49
49
  * [Nicolas Rodriguez](https://github.com/n-rodriguez)
50
50
  * [Hirotaka Mizutani ](https://github.com/hirotaka)
51
+ * [Rahul Chaudhary](https://github.com/rahulch95)
52
+ * [Alessandro Fazzi](https://github.com/pioneerskies)
data/README.md CHANGED
@@ -240,7 +240,7 @@ The options and the block are optional.
240
240
  | | | You can not combine the :user_provided_headers and :key_mapping options |
241
241
  | :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs or all empty values |
242
242
  | :verbose | false | print out line number while processing (to track down problems in input files) |
243
- | :with_line_numbers | false | add :csv_line_number to heach data hash |
243
+ | :with_line_numbers | false | add :csv_line_number to each data hash |
244
244
  ---------------------------------------------------------------------------------------------------------------------------------
245
245
 
246
246
  #### Deprecated 1.x Options: to be replaced in 2.0
@@ -253,7 +253,8 @@ And header and data validations will also be supported in 2.x
253
253
  | Option | Default | Explanation |
254
254
  ---------------------------------------------------------------------------------------------------------------------------------
255
255
  | :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
256
- | :required_headers | nil | An array. Eacn of the given headers must be present after header manipulation, |
256
+ | :silence_missing_key | false | ignore missing keys in `key_mapping` if true |
257
+ | :required_headers | nil | An array. Each of the given headers must be present after header manipulation, |
257
258
  | | | or an exception is raised No validation if nil is given. |
258
259
  | :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
259
260
  | :downcase_header | true | downcase all column headers |
data/Rakefile CHANGED
@@ -3,6 +3,17 @@
3
3
  require "bundler/gem_tasks"
4
4
  require 'rspec/core/rake_task'
5
5
 
6
+
7
+ # temp fix for NoMethodError: undefined method `last_comment'
8
+ # remove when fixed in Rake 11.x and higher
9
+ module TempFixForRakeLastComment
10
+ def last_comment
11
+ last_description
12
+ end
13
+ end
14
+ Rake::Application.send :include, TempFixForRakeLastComment
15
+ ### end of tempfix
16
+
6
17
  RSpec::Core::RakeTask.new(:spec)
7
18
 
8
19
  require "rubocop/rake_task"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterCSV
4
- VERSION = "1.7.2"
4
+ VERSION = "1.7.4"
5
5
  end
data/lib/smarter_csv.rb CHANGED
@@ -227,6 +227,7 @@ module SmarterCSV
227
227
  remove_zero_values: false,
228
228
  required_headers: nil,
229
229
  row_sep: $/,
230
+ silence_missing_keys: false,
230
231
  skip_lines: nil,
231
232
  strings_as_keys: false,
232
233
  strip_chars_from_headers: nil,
@@ -373,24 +374,21 @@ module SmarterCSV
373
374
  return false
374
375
  end
375
376
 
376
- # raise exception if none is found
377
+ # If file has headers, then guesses column separator from headers.
378
+ # Otherwise guesses column separator from contents.
379
+ # Raises exception if none is found.
377
380
  def guess_column_separator(filehandle, options)
378
- del = [',', "\t", ';', ':', '|']
379
- n = Hash.new(0)
381
+ possible_delimiters = [',', "\t", ';', ':', '|']
380
382
 
381
- 5.times do
382
- line = filehandle.readline(options[:row_sep])
383
- del.each do |d|
384
- n[d] += line.scan(d).count
385
- end
386
- rescue EOFError # short files
387
- break
388
- end
383
+ candidates = if options.fetch(:headers_in_file)
384
+ candidated_column_separators_from_headers(filehandle, options, possible_delimiters)
385
+ else
386
+ candidated_column_separators_from_contents(filehandle, options, possible_delimiters)
387
+ end
389
388
 
390
- filehandle.rewind
391
- raise SmarterCSV::NoColSepDetected if n.values.max == 0
389
+ raise SmarterCSV::NoColSepDetected if candidates.values.max == 0
392
390
 
393
- col_sep = n.key(n.values.max)
391
+ candidates.key(candidates.values.max)
394
392
  end
395
393
 
396
394
  # limitation: this currently reads the whole file in before making a decision
@@ -479,9 +477,11 @@ module SmarterCSV
479
477
  # do some key mapping on the keys in the file header
480
478
  # if you want to completely delete a key, then map it to nil or to ''
481
479
  if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
482
- # we can't map keys that are not there
483
- missing_keys = key_mappingH.keys - headerA
484
- puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
480
+ unless options[:silence_missing_keys]
481
+ # if silence_missing_keys are not set, raise error if missing header
482
+ missing_keys = key_mappingH.keys - headerA
483
+ puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
484
+ end
485
485
 
486
486
  headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
487
487
  end
@@ -522,5 +522,37 @@ module SmarterCSV
522
522
  end
523
523
  result
524
524
  end
525
+
526
+ private
527
+
528
+ def candidated_column_separators_from_headers(filehandle, options, delimiters)
529
+ candidates = Hash.new(0)
530
+ line = filehandle.readline(options[:row_sep])
531
+
532
+ delimiters.each do |d|
533
+ candidates[d] += line.scan(d).count
534
+ end
535
+
536
+ filehandle.rewind
537
+
538
+ candidates
539
+ end
540
+
541
+ def candidated_column_separators_from_contents(filehandle, options, delimiters)
542
+ candidates = Hash.new(0)
543
+
544
+ 5.times do
545
+ line = filehandle.readline(options[:row_sep])
546
+ delimiters.each do |d|
547
+ candidates[d] += line.scan(d).count
548
+ end
549
+ rescue EOFError # short files
550
+ break
551
+ end
552
+
553
+ filehandle.rewind
554
+
555
+ candidates
556
+ end
525
557
  end
526
558
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.2
4
+ version: 1.7.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-29 00:00:00.000000000 Z
11
+ date: 2023-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: awesome_print
@@ -140,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
140
  - !ruby/object:Gem::Version
141
141
  version: '0'
142
142
  requirements: []
143
- rubygems_version: 3.3.3
143
+ rubygems_version: 3.1.6
144
144
  signing_key:
145
145
  specification_version: 4
146
146
  summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots