smarter_csv 1.7.2 → 1.7.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/CONTRIBUTORS.md +2 -0
- data/README.md +3 -2
- data/Rakefile +11 -0
- data/lib/smarter_csv/version.rb +1 -1
- data/lib/smarter_csv.rb +49 -17
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c20e6e8f4281f99aa22e67c778e1e950ef7040a48f454c7d9751f2ccf44c093
|
4
|
+
data.tar.gz: 14ec931e58fce24c675bd76ad39f6046c52a0a4b07cc89814fa55c6a981ebe27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf3d642f523bf49d0867bc1768a6df247f3392390090c2b0fbfda5ac75f5f8f829eaac2ec14105936e4d317c7a1d1b865d74de1e60ce6405c6fd1f868bd703eb
|
7
|
+
data.tar.gz: 70522e31ca2ced36beef2a38509d1df01bad12af5bb56cb1404cfcbceffdfb4cbbbf8a8a8a535e3ea060d2d4c5b4c1049c8486d163d7de6b466dd83838aa2cf0
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
|
2
2
|
# SmarterCSV 1.x Change Log
|
3
3
|
|
4
|
+
## 1.7.4 (2022-01-13)
|
5
|
+
* improved guessing of the column separator, thanks to Alessandro Fazzi
|
6
|
+
|
7
|
+
## 1.7.3 (2022-12-05)
|
8
|
+
* new option :silence_missing_keys; if set to true, it ignores missing keys in `key_mapping`
|
9
|
+
|
4
10
|
## 1.7.2 (2022-08-29)
|
5
11
|
* new option :with_line_numbers; if set to true, it adds :csv_line_number to each data hash (issue #130)
|
6
12
|
|
data/CONTRIBUTORS.md
CHANGED
@@ -48,3 +48,5 @@ A Big Thank you to everyone who filed issues, sent comments, and who contributed
|
|
48
48
|
* [Viacheslav Markin](https://github.com/KXEinc)
|
49
49
|
* [Nicolas Rodriguez](https://github.com/n-rodriguez)
|
50
50
|
* [Hirotaka Mizutani ](https://github.com/hirotaka)
|
51
|
+
* [Rahul Chaudhary](https://github.com/rahulch95)
|
52
|
+
* [Alessandro Fazzi](https://github.com/pioneerskies)
|
data/README.md
CHANGED
@@ -240,7 +240,7 @@ The options and the block are optional.
|
|
240
240
|
| | | You can not combine the :user_provided_headers and :key_mapping options |
|
241
241
|
| :remove_empty_hashes | true | remove / ignore any hashes which don't have any key/value pairs or all empty values |
|
242
242
|
| :verbose | false | print out line number while processing (to track down problems in input files) |
|
243
|
-
| :with_line_numbers | false | add :csv_line_number to
|
243
|
+
| :with_line_numbers | false | add :csv_line_number to each data hash |
|
244
244
|
---------------------------------------------------------------------------------------------------------------------------------
|
245
245
|
|
246
246
|
#### Deprecated 1.x Options: to be replaced in 2.0
|
@@ -253,7 +253,8 @@ And header and data validations will also be supported in 2.x
|
|
253
253
|
| Option | Default | Explanation |
|
254
254
|
---------------------------------------------------------------------------------------------------------------------------------
|
255
255
|
| :key_mapping | nil | a hash which maps headers from the CSV file to keys in the result hash |
|
256
|
-
| :
|
256
|
+
| :silence_missing_key | false | ignore missing keys in `key_mapping` if true |
|
257
|
+
| :required_headers | nil | An array. Each of the given headers must be present after header manipulation, |
|
257
258
|
| | | or an exception is raised No validation if nil is given. |
|
258
259
|
| :remove_unmapped_keys | false | when using :key_mapping option, should non-mapped keys / columns be removed? |
|
259
260
|
| :downcase_header | true | downcase all column headers |
|
data/Rakefile
CHANGED
@@ -3,6 +3,17 @@
|
|
3
3
|
require "bundler/gem_tasks"
|
4
4
|
require 'rspec/core/rake_task'
|
5
5
|
|
6
|
+
|
7
|
+
# temp fix for NoMethodError: undefined method `last_comment'
|
8
|
+
# remove when fixed in Rake 11.x and higher
|
9
|
+
module TempFixForRakeLastComment
|
10
|
+
def last_comment
|
11
|
+
last_description
|
12
|
+
end
|
13
|
+
end
|
14
|
+
Rake::Application.send :include, TempFixForRakeLastComment
|
15
|
+
### end of tempfix
|
16
|
+
|
6
17
|
RSpec::Core::RakeTask.new(:spec)
|
7
18
|
|
8
19
|
require "rubocop/rake_task"
|
data/lib/smarter_csv/version.rb
CHANGED
data/lib/smarter_csv.rb
CHANGED
@@ -227,6 +227,7 @@ module SmarterCSV
|
|
227
227
|
remove_zero_values: false,
|
228
228
|
required_headers: nil,
|
229
229
|
row_sep: $/,
|
230
|
+
silence_missing_keys: false,
|
230
231
|
skip_lines: nil,
|
231
232
|
strings_as_keys: false,
|
232
233
|
strip_chars_from_headers: nil,
|
@@ -373,24 +374,21 @@ module SmarterCSV
|
|
373
374
|
return false
|
374
375
|
end
|
375
376
|
|
376
|
-
#
|
377
|
+
# If file has headers, then guesses column separator from headers.
|
378
|
+
# Otherwise guesses column separator from contents.
|
379
|
+
# Raises exception if none is found.
|
377
380
|
def guess_column_separator(filehandle, options)
|
378
|
-
|
379
|
-
n = Hash.new(0)
|
381
|
+
possible_delimiters = [',', "\t", ';', ':', '|']
|
380
382
|
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
rescue EOFError # short files
|
387
|
-
break
|
388
|
-
end
|
383
|
+
candidates = if options.fetch(:headers_in_file)
|
384
|
+
candidated_column_separators_from_headers(filehandle, options, possible_delimiters)
|
385
|
+
else
|
386
|
+
candidated_column_separators_from_contents(filehandle, options, possible_delimiters)
|
387
|
+
end
|
389
388
|
|
390
|
-
|
391
|
-
raise SmarterCSV::NoColSepDetected if n.values.max == 0
|
389
|
+
raise SmarterCSV::NoColSepDetected if candidates.values.max == 0
|
392
390
|
|
393
|
-
|
391
|
+
candidates.key(candidates.values.max)
|
394
392
|
end
|
395
393
|
|
396
394
|
# limitation: this currently reads the whole file in before making a decision
|
@@ -479,9 +477,11 @@ module SmarterCSV
|
|
479
477
|
# do some key mapping on the keys in the file header
|
480
478
|
# if you want to completely delete a key, then map it to nil or to ''
|
481
479
|
if !key_mappingH.nil? && key_mappingH.class == Hash && key_mappingH.keys.size > 0
|
482
|
-
|
483
|
-
|
484
|
-
|
480
|
+
unless options[:silence_missing_keys]
|
481
|
+
# if silence_missing_keys are not set, raise error if missing header
|
482
|
+
missing_keys = key_mappingH.keys - headerA
|
483
|
+
puts "WARNING: missing header(s): #{missing_keys.join(",")}" unless missing_keys.empty?
|
484
|
+
end
|
485
485
|
|
486
486
|
headerA.map!{|x| key_mappingH.has_key?(x) ? (key_mappingH[x].nil? ? nil : key_mappingH[x]) : (options[:remove_unmapped_keys] ? nil : x)}
|
487
487
|
end
|
@@ -522,5 +522,37 @@ module SmarterCSV
|
|
522
522
|
end
|
523
523
|
result
|
524
524
|
end
|
525
|
+
|
526
|
+
private
|
527
|
+
|
528
|
+
def candidated_column_separators_from_headers(filehandle, options, delimiters)
|
529
|
+
candidates = Hash.new(0)
|
530
|
+
line = filehandle.readline(options[:row_sep])
|
531
|
+
|
532
|
+
delimiters.each do |d|
|
533
|
+
candidates[d] += line.scan(d).count
|
534
|
+
end
|
535
|
+
|
536
|
+
filehandle.rewind
|
537
|
+
|
538
|
+
candidates
|
539
|
+
end
|
540
|
+
|
541
|
+
def candidated_column_separators_from_contents(filehandle, options, delimiters)
|
542
|
+
candidates = Hash.new(0)
|
543
|
+
|
544
|
+
5.times do
|
545
|
+
line = filehandle.readline(options[:row_sep])
|
546
|
+
delimiters.each do |d|
|
547
|
+
candidates[d] += line.scan(d).count
|
548
|
+
end
|
549
|
+
rescue EOFError # short files
|
550
|
+
break
|
551
|
+
end
|
552
|
+
|
553
|
+
filehandle.rewind
|
554
|
+
|
555
|
+
candidates
|
556
|
+
end
|
525
557
|
end
|
526
558
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smarter_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilo Sloboda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: awesome_print
|
@@ -140,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
140
|
- !ruby/object:Gem::Version
|
141
141
|
version: '0'
|
142
142
|
requirements: []
|
143
|
-
rubygems_version: 3.
|
143
|
+
rubygems_version: 3.1.6
|
144
144
|
signing_key:
|
145
145
|
specification_version: 4
|
146
146
|
summary: Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots
|