smarter_csv 1.7.2 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/CONTRIBUTORS.md +2 -0
- data/README.md +3 -2
- data/Rakefile +11 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +49 -17
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c20e6e8f4281f99aa22e67c778e1e950ef7040a48f454c7d9751f2ccf44c093
|
4
|
+
data.tar.gz: 14ec931e58fce24c675bd76ad39f6046c52a0a4b07cc89814fa55c6a981ebe27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf3d642f523bf49d0867bc1768a6df247f3392390090c2b0fbfda5ac75f5f8f829eaac2ec14105936e4d317c7a1d1b865d74de1e60ce6405c6fd1f868bd703eb
|
7
|
+
data.tar.gz: 70522e31ca2ced36beef2a38509d1df01bad12af5bb56cb1404cfcbceffdfb4cbbbf8a8a8a535e3ea060d2d4c5b4c1049c8486d163d7de6b466dd83838aa2cf0
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.7.4 (2022-01-13)
|
5
|
+
* improved guessing of the column separator, thanks to Alessandro Fazzi
|
6
|
+
|
7
|
+
## 1.7.3 (2022-12-05)
|
8
|
+
* new option :silence_missing_keys; if set to true, it ignores missing keys in `key_mapping`
|
9
|
+
|
4
10
|
## 1.7.2 (2022-08-29)
|
5
11
|
* new option :with_line_numbers; if set to true, it adds :csv_line_number to each data hash (issue #130)
|
6
12
|
|
data/CONTRIBUTORS.md
CHANGED
@@ -48,3 +48,5 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
48
48
|
* [Viacheslav Markin](https://github.com/KXEinc)
|
49
49
|
* [Nicolas Rodriguez](https://github.com/n-rodriguez)
|
50
50
|
* [Hirotaka Mizutani ](https://github.com/hirotaka)
|
51
|
+
* [Rahul Chaudhary](https://github.com/rahulch95)
|
52
|
+
* [Alessandro Fazzi](https://github.com/pioneerskies)
|
data/README.md
CHANGED
@@ -240,7 +240,7 @@ The options and the block are optional.
|
|
240
240
|
| | | You can not combine the :user_provided_headers and :key_mapping options |
|
241
241
|
| :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs or all empty values |
|
242
242
|
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
243
|
-
| :with_line_numbers | false | add :csv_line_number to
|
243
|
+
| :with_line_numbers | false | add :csv_line_number to each data hash |
|
244
244
|
---------------------------------------------------------------------------------------------------------------------------------
|
245
245
|
|
246
246
|
#### Deprecated 1.x Options: to be replaced in 2.0
|
@@ -253,7 +253,8 @@ And header and data validations will also be supported in 2.x
|
|
253
253
|
| Option | Default | Explanation |
|
254
254
|
---------------------------------------------------------------------------------------------------------------------------------
|
255
255
|
| :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
|
256
|
-
| :
|
256
|
+
| :silence_missing_key | false | ignore missing keys in `key_mapping` if true |
|
257
|
+
| :required_headers | nil | An array. Each of the given headers must be present after header manipulation, |
|
257
258
|
| | | or an exception is raised No validation if nil is given. |
|
258
259
|
| :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
|
259
260
|
| :downcase_header | true | downcase all column headers |
|
data/Rakefile
CHANGED
@@ -3,6 +3,17 @@
|
|
3
3
|
require "bundler/gem_tasks"
|
4
4
|
require 'rspec/core/rake_task'
|
5
5
|
|
6
|
+
|
7
|
+
# temp fix for NoMethodError: undefined method `last_comment'
|
8
|
+
# remove when fixed in Rake 11.x and higher
|
9
|
+
module TempFixForRakeLastComment
|
10
|
+
def last_comment
|
11
|
+
last_description
|
12
|
+
end
|
13
|
+
end
|
14
|
+
Rake::Application.send :include, TempFixForRakeLastComment
|
15
|
+
### end of tempfix
|
16
|
+
|
6
17
|
RSpec::Core::RakeTask.new(:spec)
|
7
18
|
|
8
19
|
require "rubocop/rake_task"
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
@@ -227,6 +227,7 @@ module SmarterCSV
|
|
227
227
|
remove_zero_values: false,
|
228
228
|
required_headers: nil,
|
229
229
|
row_sep: $/,
|
230
|
+
silence_missing_keys: false,
|
230
231
|
skip_lines: nil,
|
231
232
|
strings_as_keys: false,
|
232
233
|
strip_chars_from_headers: nil,
|
@@ -373,24 +374,21 @@ module SmarterCSV
|
|
373
374
|
return false
|
374
375
|
end
|
375
376
|
|
376
|
-
#
|
377
|
+
# If file has headers, then guesses column separator from headers.
|
378
|
+
# Otherwise guesses column separator from contents.
|
379
|
+
# Raises exception if none is found.
|
377
380
|
def guess_column_separator(filehandle, options)
|
378
|
-
|
379
|
-
n = Hash.new(0)
|
381
|
+
possible_delimiters = [',', "\t", ';', ':', '|']
|
380
382
|
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
rescue EOFError # short files
|
387
|
-
break
|
388
|
-
end
|
383
|
+
candidates = if options.fetch(:headers_in_file)
|
384
|
+
candidated_column_separators_from_headers(filehandle, options, possible_delimiters)
|
385
|
+
else
|
386
|
+
candidated_column_separators_from_contents(filehandle, options, possible_delimiters)
|
387
|
+
end
|
389
388
|
|
390
|
-
|
391
|
-
raise SmarterCSV::NoColSepDetected if n.values.max == 0
|
389
|
+
raise SmarterCSV::NoColSepDetected if candidates.values.max == 0
|
392
390
|
|
393
|
-
|
391
|
+
candidates.key(candidates.values.max)
|
394
392
|
end
|
395
393
|
|
396
394
|
# limitation: this currently reads the whole file in before making a decision
|
@@ -479,9 +477,11 @@ module SmarterCSV
|
|
479
477
|
# do some key mapping on the keys in the file header
|
480
478
|
# if you want to completely delete a key, then map it to nil or to ''
|
481
479
|
if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
482
|
-
|
483
|
-
|
484
|
-
|
480
|
+
unless options[:silence_missing_keys]
|
481
|
+
# if silence_missing_keys are not set, raise error if missing header
|
482
|
+
missing_keys = key_mappingH.keys - headerA
|
483
|
+
puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
|
484
|
+
end
|
485
485
|
|
486
486
|
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
487
487
|
end
|
@@ -522,5 +522,37 @@ module SmarterCSV
|
|
522
522
|
end
|
523
523
|
result
|
524
524
|
end
|
525
|
+
|
526
|
+
private
|
527
|
+
|
528
|
+
def candidated_column_separators_from_headers(filehandle, options, delimiters)
|
529
|
+
candidates = Hash.new(0)
|
530
|
+
line = filehandle.readline(options[:row_sep])
|
531
|
+
|
532
|
+
delimiters.each do |d|
|
533
|
+
candidates[d] += line.scan(d).count
|
534
|
+
end
|
535
|
+
|
536
|
+
filehandle.rewind
|
537
|
+
|
538
|
+
candidates
|
539
|
+
end
|
540
|
+
|
541
|
+
def candidated_column_separators_from_contents(filehandle, options, delimiters)
|
542
|
+
candidates = Hash.new(0)
|
543
|
+
|
544
|
+
5.times do
|
545
|
+
line = filehandle.readline(options[:row_sep])
|
546
|
+
delimiters.each do |d|
|
547
|
+
candidates[d] += line.scan(d).count
|
548
|
+
end
|
549
|
+
rescue EOFError # short files
|
550
|
+
break
|
551
|
+
end
|
552
|
+
|
553
|
+
filehandle.rewind
|
554
|
+
|
555
|
+
candidates
|
556
|
+
end
|
525
557
|
end
|
526
558
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: awesome_print
|
@@ -140,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
140
|
- !ruby/object:Gem::Version
|
141
141
|
version: '0'
|
142
142
|
requirements: []
|
143
|
-
rubygems_version: 3.
|
143
|
+
rubygems_version: 3.1.6
|
144
144
|
signing_key:
|
145
145
|
specification_version: 4
|
146
146
|
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|