csv-utils 0.3.14 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a15793e118aa3bd4175b59c6a17e7800dd322390f95ce55af29170a6422ae0c3
4
- data.tar.gz: 481c775dc66bc47fec11f5ab39f98a1a35020cb00f6e565761b47badc4d53ad8
3
+ metadata.gz: 0b511a7a2cac6926477ed165212fa2009947dc84040f7d0451f7fbd7fdd9543b
4
+ data.tar.gz: 6661363cabaebcd2f21a41159f7ec0712b7729da5dde0cc7b1818415d8f0bc81
5
5
  SHA512:
6
- metadata.gz: e987380b48bc309fbccb7bfd4311c8664077c8bac6ae31cf0c8c83f8035366f40fbdf60c8bf5b476e2a84e1899d6adada995fe81cd08531376511501700e2dca
7
- data.tar.gz: 020e3c5d08bbc1025b72fbde2de838efd45c24cf41ba89c9fb93cceb619e4184ec0c1671bf26b3ae1d9dad8978ce206359c53672bce843a3ac8651cc6ccf0412
6
+ metadata.gz: cb860bdbe29726e44e1af6c528623daf7acc186f48eecefdbd47530d4ae7ff3472a2c7044cef8de079e70d4d9ada0b1bfbff65923fc3fcb9c6236d3d2e9606d8
7
+ data.tar.gz: fe7a67353de3c0e4dd5e36b71345abb373c19ca3e23bc0a8624f7f4147058911e487db17218c35e2ae2e77dfaed61185ca5ffea77736c410a3b18d1334514cd8
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.0
1
+ 3.1.0
data/Gemfile.lock CHANGED
@@ -2,51 +2,51 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  ast (2.4.2)
5
- diff-lcs (1.4.4)
6
- docile (1.3.5)
7
- inheritance-helper (0.1.5)
8
- parallel (1.20.1)
9
- parser (3.0.0.0)
5
+ diff-lcs (1.5.0)
6
+ docile (1.4.0)
7
+ inheritance-helper (0.2.5)
8
+ parallel (1.22.1)
9
+ parser (3.1.1.0)
10
10
  ast (~> 2.4.1)
11
- rainbow (3.0.0)
12
- rake (13.0.3)
13
- regexp_parser (2.1.1)
14
- rexml (3.2.4)
15
- rspec (3.10.0)
16
- rspec-core (~> 3.10.0)
17
- rspec-expectations (~> 3.10.0)
18
- rspec-mocks (~> 3.10.0)
19
- rspec-core (3.10.1)
20
- rspec-support (~> 3.10.0)
21
- rspec-expectations (3.10.1)
11
+ rainbow (3.1.1)
12
+ rake (13.0.6)
13
+ regexp_parser (2.2.1)
14
+ rexml (3.2.5)
15
+ rspec (3.11.0)
16
+ rspec-core (~> 3.11.0)
17
+ rspec-expectations (~> 3.11.0)
18
+ rspec-mocks (~> 3.11.0)
19
+ rspec-core (3.11.0)
20
+ rspec-support (~> 3.11.0)
21
+ rspec-expectations (3.11.0)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.10.0)
24
- rspec-mocks (3.10.2)
23
+ rspec-support (~> 3.11.0)
24
+ rspec-mocks (3.11.0)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.10.0)
27
- rspec-support (3.10.2)
28
- rubocop (1.11.0)
26
+ rspec-support (~> 3.11.0)
27
+ rspec-support (3.11.0)
28
+ rubocop (1.26.1)
29
29
  parallel (~> 1.10)
30
- parser (>= 3.0.0.0)
30
+ parser (>= 3.1.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
32
  regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 1.2.0, < 2.0)
34
+ rubocop-ast (>= 1.16.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
36
  unicode-display_width (>= 1.4.0, < 3.0)
37
- rubocop-ast (1.4.1)
38
- parser (>= 2.7.1.5)
37
+ rubocop-ast (1.16.0)
38
+ parser (>= 3.1.1.0)
39
39
  ruby-progressbar (1.11.0)
40
40
  simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
43
  simplecov_json_formatter (~> 0.1)
44
44
  simplecov-html (0.12.3)
45
- simplecov_json_formatter (0.1.2)
46
- unicode-display_width (2.0.0)
45
+ simplecov_json_formatter (0.1.4)
46
+ unicode-display_width (2.1.0)
47
47
 
48
48
  PLATFORMS
49
- x86_64-darwin-20
49
+ x86_64-darwin-21
50
50
 
51
51
  DEPENDENCIES
52
52
  inheritance-helper
@@ -56,4 +56,4 @@ DEPENDENCIES
56
56
  simplecov
57
57
 
58
58
  BUNDLED WITH
59
- 2.2.3
59
+ 2.3.3
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ ignore_columns: []
7
+ }
8
+ OptionParser.new do |opts|
9
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
10
+
11
+ opts.on('-h', '--help', 'Prints this help') do
12
+ puts opts
13
+ exit
14
+ end
15
+
16
+ opts.on('-i', '--ignore HEADERS', 'Comman separated list of headers to ignore') do |v|
17
+ options[:ignore_columns] = v.split(',')
18
+ end
19
+ end.parse!
20
+
21
+ require 'digest/sha2'
22
+ require 'json'
23
+ require 'csv-utils'
24
+
25
+ csv = CSVUtils::CSVIterator.new(ARGV[0])
26
+
27
+ missing_headers = options[:ignore_columns] - csv.first.keys
28
+ unless missing_headers.empty?
29
+ raise("unkown headers #{missing_headers.join(', ')} configured ingnore headers")
30
+ end
31
+
32
+ hashed_rows = {}
33
+
34
+ csv.each_with_index do |row, idx|
35
+ options[:ignore_columns].each do |ignore_column|
36
+ row[ignore_column] = ''
37
+ end
38
+ key = Digest::SHA256.hexdigest(row.to_json)
39
+ hashed_rows[key] ||= []
40
+ hashed_rows[key] << idx
41
+ end
42
+
43
+ duplicate_rows = {}
44
+
45
+ hashed_rows.reject! { |key, row_numbers| row_numbers.size <= 1 }
46
+
47
+ hashed_rows.each do |key, row_numbers|
48
+ hashed_rows[key] = {}
49
+ row_numbers.each do |row_number|
50
+ duplicate_rows[row_number] = key
51
+ hashed_rows[key][row_number] = nil
52
+ end
53
+ end
54
+
55
+ csv.each_with_index do |row, idx|
56
+ next unless (key = duplicate_rows[idx])
57
+
58
+ hashed_rows[key][idx] = row
59
+ end
60
+
61
+ CSV.open('duplicates-' + File.basename(ARGV[0]), 'wb') do |out|
62
+ out << ['duplicate_key', 'row_no'] + csv.first.keys
63
+
64
+ hashed_rows.each do |key, rows|
65
+ rows.each do |idx, row|
66
+ out << [key, idx] + row.values
67
+ end
68
+ end
69
+ end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.14'
5
+ s.version = '0.3.15'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14
4
+ version: 0.3.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-23 00:00:00.000000000 Z
11
+ date: 2022-08-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-duplicate-finder
31
32
  - csv-explorer
32
33
  - csv-find-error
33
34
  - csv-readline
@@ -43,6 +44,7 @@ files:
43
44
  - LICENSE
44
45
  - README.md
45
46
  - bin/csv-change-eol
47
+ - bin/csv-duplicate-finder
46
48
  - bin/csv-explorer
47
49
  - bin/csv-find-error
48
50
  - bin/csv-readline
@@ -78,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
80
  - !ruby/object:Gem::Version
79
81
  version: '0'
80
82
  requirements: []
81
- rubygems_version: 3.2.3
83
+ rubygems_version: 3.3.3
82
84
  signing_key:
83
85
  specification_version: 4
84
86
  summary: CSV Utils