csv-utils 0.3.14 → 0.3.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a15793e118aa3bd4175b59c6a17e7800dd322390f95ce55af29170a6422ae0c3
4
- data.tar.gz: 481c775dc66bc47fec11f5ab39f98a1a35020cb00f6e565761b47badc4d53ad8
3
+ metadata.gz: 0b511a7a2cac6926477ed165212fa2009947dc84040f7d0451f7fbd7fdd9543b
4
+ data.tar.gz: 6661363cabaebcd2f21a41159f7ec0712b7729da5dde0cc7b1818415d8f0bc81
5
5
  SHA512:
6
- metadata.gz: e987380b48bc309fbccb7bfd4311c8664077c8bac6ae31cf0c8c83f8035366f40fbdf60c8bf5b476e2a84e1899d6adada995fe81cd08531376511501700e2dca
7
- data.tar.gz: 020e3c5d08bbc1025b72fbde2de838efd45c24cf41ba89c9fb93cceb619e4184ec0c1671bf26b3ae1d9dad8978ce206359c53672bce843a3ac8651cc6ccf0412
6
+ metadata.gz: cb860bdbe29726e44e1af6c528623daf7acc186f48eecefdbd47530d4ae7ff3472a2c7044cef8de079e70d4d9ada0b1bfbff65923fc3fcb9c6236d3d2e9606d8
7
+ data.tar.gz: fe7a67353de3c0e4dd5e36b71345abb373c19ca3e23bc0a8624f7f4147058911e487db17218c35e2ae2e77dfaed61185ca5ffea77736c410a3b18d1334514cd8
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.0
1
+ 3.1.0
data/Gemfile.lock CHANGED
@@ -2,51 +2,51 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  ast (2.4.2)
5
- diff-lcs (1.4.4)
6
- docile (1.3.5)
7
- inheritance-helper (0.1.5)
8
- parallel (1.20.1)
9
- parser (3.0.0.0)
5
+ diff-lcs (1.5.0)
6
+ docile (1.4.0)
7
+ inheritance-helper (0.2.5)
8
+ parallel (1.22.1)
9
+ parser (3.1.1.0)
10
10
  ast (~> 2.4.1)
11
- rainbow (3.0.0)
12
- rake (13.0.3)
13
- regexp_parser (2.1.1)
14
- rexml (3.2.4)
15
- rspec (3.10.0)
16
- rspec-core (~> 3.10.0)
17
- rspec-expectations (~> 3.10.0)
18
- rspec-mocks (~> 3.10.0)
19
- rspec-core (3.10.1)
20
- rspec-support (~> 3.10.0)
21
- rspec-expectations (3.10.1)
11
+ rainbow (3.1.1)
12
+ rake (13.0.6)
13
+ regexp_parser (2.2.1)
14
+ rexml (3.2.5)
15
+ rspec (3.11.0)
16
+ rspec-core (~> 3.11.0)
17
+ rspec-expectations (~> 3.11.0)
18
+ rspec-mocks (~> 3.11.0)
19
+ rspec-core (3.11.0)
20
+ rspec-support (~> 3.11.0)
21
+ rspec-expectations (3.11.0)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.10.0)
24
- rspec-mocks (3.10.2)
23
+ rspec-support (~> 3.11.0)
24
+ rspec-mocks (3.11.0)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.10.0)
27
- rspec-support (3.10.2)
28
- rubocop (1.11.0)
26
+ rspec-support (~> 3.11.0)
27
+ rspec-support (3.11.0)
28
+ rubocop (1.26.1)
29
29
  parallel (~> 1.10)
30
- parser (>= 3.0.0.0)
30
+ parser (>= 3.1.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
32
  regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 1.2.0, < 2.0)
34
+ rubocop-ast (>= 1.16.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
36
  unicode-display_width (>= 1.4.0, < 3.0)
37
- rubocop-ast (1.4.1)
38
- parser (>= 2.7.1.5)
37
+ rubocop-ast (1.16.0)
38
+ parser (>= 3.1.1.0)
39
39
  ruby-progressbar (1.11.0)
40
40
  simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
43
  simplecov_json_formatter (~> 0.1)
44
44
  simplecov-html (0.12.3)
45
- simplecov_json_formatter (0.1.2)
46
- unicode-display_width (2.0.0)
45
+ simplecov_json_formatter (0.1.4)
46
+ unicode-display_width (2.1.0)
47
47
 
48
48
  PLATFORMS
49
- x86_64-darwin-20
49
+ x86_64-darwin-21
50
50
 
51
51
  DEPENDENCIES
52
52
  inheritance-helper
@@ -56,4 +56,4 @@ DEPENDENCIES
56
56
  simplecov
57
57
 
58
58
  BUNDLED WITH
59
- 2.2.3
59
+ 2.3.3
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ ignore_columns: []
7
+ }
8
+ OptionParser.new do |opts|
9
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
10
+
11
+ opts.on('-h', '--help', 'Prints this help') do
12
+ puts opts
13
+ exit
14
+ end
15
+
16
+ opts.on('-i', '--ignore HEADERS', 'Comman separated list of headers to ignore') do |v|
17
+ options[:ignore_columns] = v.split(',')
18
+ end
19
+ end.parse!
20
+
21
+ require 'digest/sha2'
22
+ require 'json'
23
+ require 'csv-utils'
24
+
25
+ csv = CSVUtils::CSVIterator.new(ARGV[0])
26
+
27
+ missing_headers = options[:ignore_columns] - csv.first.keys
28
+ unless missing_headers.empty?
29
+ raise("unkown headers #{missing_headers.join(', ')} configured ingnore headers")
30
+ end
31
+
32
+ hashed_rows = {}
33
+
34
+ csv.each_with_index do |row, idx|
35
+ options[:ignore_columns].each do |ignore_column|
36
+ row[ignore_column] = ''
37
+ end
38
+ key = Digest::SHA256.hexdigest(row.to_json)
39
+ hashed_rows[key] ||= []
40
+ hashed_rows[key] << idx
41
+ end
42
+
43
+ duplicate_rows = {}
44
+
45
+ hashed_rows.reject! { |key, row_numbers| row_numbers.size <= 1 }
46
+
47
+ hashed_rows.each do |key, row_numbers|
48
+ hashed_rows[key] = {}
49
+ row_numbers.each do |row_number|
50
+ duplicate_rows[row_number] = key
51
+ hashed_rows[key][row_number] = nil
52
+ end
53
+ end
54
+
55
+ csv.each_with_index do |row, idx|
56
+ next unless (key = duplicate_rows[idx])
57
+
58
+ hashed_rows[key][idx] = row
59
+ end
60
+
61
+ CSV.open('duplicates-' + File.basename(ARGV[0]), 'wb') do |out|
62
+ out << ['duplicate_key', 'row_no'] + csv.first.keys
63
+
64
+ hashed_rows.each do |key, rows|
65
+ rows.each do |idx, row|
66
+ out << [key, idx] + row.values
67
+ end
68
+ end
69
+ end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.14'
5
+ s.version = '0.3.15'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14
4
+ version: 0.3.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-23 00:00:00.000000000 Z
11
+ date: 2022-08-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-duplicate-finder
31
32
  - csv-explorer
32
33
  - csv-find-error
33
34
  - csv-readline
@@ -43,6 +44,7 @@ files:
43
44
  - LICENSE
44
45
  - README.md
45
46
  - bin/csv-change-eol
47
+ - bin/csv-duplicate-finder
46
48
  - bin/csv-explorer
47
49
  - bin/csv-find-error
48
50
  - bin/csv-readline
@@ -78,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
80
  - !ruby/object:Gem::Version
79
81
  version: '0'
80
82
  requirements: []
81
- rubygems_version: 3.2.3
83
+ rubygems_version: 3.3.3
82
84
  signing_key:
83
85
  specification_version: 4
84
86
  summary: CSV Utils