csv-utils 0.3.12 → 0.3.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +29 -29
- data/bin/csv-duplicate-finder +69 -0
- data/csv-utils.gemspec +1 -1
- data/lib/csv_utils/csv_iterator.rb +4 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b511a7a2cac6926477ed165212fa2009947dc84040f7d0451f7fbd7fdd9543b
|
4
|
+
data.tar.gz: 6661363cabaebcd2f21a41159f7ec0712b7729da5dde0cc7b1818415d8f0bc81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cb860bdbe29726e44e1af6c528623daf7acc186f48eecefdbd47530d4ae7ff3472a2c7044cef8de079e70d4d9ada0b1bfbff65923fc3fcb9c6236d3d2e9606d8
|
7
|
+
data.tar.gz: fe7a67353de3c0e4dd5e36b71345abb373c19ca3e23bc0a8624f7f4147058911e487db17218c35e2ae2e77dfaed61185ca5ffea77736c410a3b18d1334514cd8
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.1.0
|
data/Gemfile.lock
CHANGED
@@ -2,51 +2,51 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
ast (2.4.2)
|
5
|
-
diff-lcs (1.
|
6
|
-
docile (1.
|
7
|
-
inheritance-helper (0.
|
8
|
-
parallel (1.
|
9
|
-
parser (3.
|
5
|
+
diff-lcs (1.5.0)
|
6
|
+
docile (1.4.0)
|
7
|
+
inheritance-helper (0.2.5)
|
8
|
+
parallel (1.22.1)
|
9
|
+
parser (3.1.1.0)
|
10
10
|
ast (~> 2.4.1)
|
11
|
-
rainbow (3.
|
12
|
-
rake (13.0.
|
13
|
-
regexp_parser (2.
|
14
|
-
rexml (3.2.
|
15
|
-
rspec (3.
|
16
|
-
rspec-core (~> 3.
|
17
|
-
rspec-expectations (~> 3.
|
18
|
-
rspec-mocks (~> 3.
|
19
|
-
rspec-core (3.
|
20
|
-
rspec-support (~> 3.
|
21
|
-
rspec-expectations (3.
|
11
|
+
rainbow (3.1.1)
|
12
|
+
rake (13.0.6)
|
13
|
+
regexp_parser (2.2.1)
|
14
|
+
rexml (3.2.5)
|
15
|
+
rspec (3.11.0)
|
16
|
+
rspec-core (~> 3.11.0)
|
17
|
+
rspec-expectations (~> 3.11.0)
|
18
|
+
rspec-mocks (~> 3.11.0)
|
19
|
+
rspec-core (3.11.0)
|
20
|
+
rspec-support (~> 3.11.0)
|
21
|
+
rspec-expectations (3.11.0)
|
22
22
|
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
-
rspec-support (~> 3.
|
24
|
-
rspec-mocks (3.
|
23
|
+
rspec-support (~> 3.11.0)
|
24
|
+
rspec-mocks (3.11.0)
|
25
25
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
-
rspec-support (~> 3.
|
27
|
-
rspec-support (3.
|
28
|
-
rubocop (1.
|
26
|
+
rspec-support (~> 3.11.0)
|
27
|
+
rspec-support (3.11.0)
|
28
|
+
rubocop (1.26.1)
|
29
29
|
parallel (~> 1.10)
|
30
|
-
parser (>= 3.
|
30
|
+
parser (>= 3.1.0.0)
|
31
31
|
rainbow (>= 2.2.2, < 4.0)
|
32
32
|
regexp_parser (>= 1.8, < 3.0)
|
33
33
|
rexml
|
34
|
-
rubocop-ast (>= 1.
|
34
|
+
rubocop-ast (>= 1.16.0, < 2.0)
|
35
35
|
ruby-progressbar (~> 1.7)
|
36
36
|
unicode-display_width (>= 1.4.0, < 3.0)
|
37
|
-
rubocop-ast (1.
|
38
|
-
parser (>=
|
37
|
+
rubocop-ast (1.16.0)
|
38
|
+
parser (>= 3.1.1.0)
|
39
39
|
ruby-progressbar (1.11.0)
|
40
40
|
simplecov (0.21.2)
|
41
41
|
docile (~> 1.1)
|
42
42
|
simplecov-html (~> 0.11)
|
43
43
|
simplecov_json_formatter (~> 0.1)
|
44
44
|
simplecov-html (0.12.3)
|
45
|
-
simplecov_json_formatter (0.1.
|
46
|
-
unicode-display_width (2.
|
45
|
+
simplecov_json_formatter (0.1.4)
|
46
|
+
unicode-display_width (2.1.0)
|
47
47
|
|
48
48
|
PLATFORMS
|
49
|
-
x86_64-darwin-
|
49
|
+
x86_64-darwin-21
|
50
50
|
|
51
51
|
DEPENDENCIES
|
52
52
|
inheritance-helper
|
@@ -56,4 +56,4 @@ DEPENDENCIES
|
|
56
56
|
simplecov
|
57
57
|
|
58
58
|
BUNDLED WITH
|
59
|
-
2.
|
59
|
+
2.3.3
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
options = {
|
6
|
+
ignore_columns: []
|
7
|
+
}
|
8
|
+
OptionParser.new do |opts|
|
9
|
+
opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
|
10
|
+
|
11
|
+
opts.on('-h', '--help', 'Prints this help') do
|
12
|
+
puts opts
|
13
|
+
exit
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on('-i', '--ignore HEADERS', 'Comman separated list of headers to ignore') do |v|
|
17
|
+
options[:ignore_columns] = v.split(',')
|
18
|
+
end
|
19
|
+
end.parse!
|
20
|
+
|
21
|
+
require 'digest/sha2'
|
22
|
+
require 'json'
|
23
|
+
require 'csv-utils'
|
24
|
+
|
25
|
+
csv = CSVUtils::CSVIterator.new(ARGV[0])
|
26
|
+
|
27
|
+
missing_headers = options[:ignore_columns] - csv.first.keys
|
28
|
+
unless missing_headers.empty?
|
29
|
+
raise("unkown headers #{missing_headers.join(', ')} configured ingnore headers")
|
30
|
+
end
|
31
|
+
|
32
|
+
hashed_rows = {}
|
33
|
+
|
34
|
+
csv.each_with_index do |row, idx|
|
35
|
+
options[:ignore_columns].each do |ignore_column|
|
36
|
+
row[ignore_column] = ''
|
37
|
+
end
|
38
|
+
key = Digest::SHA256.hexdigest(row.to_json)
|
39
|
+
hashed_rows[key] ||= []
|
40
|
+
hashed_rows[key] << idx
|
41
|
+
end
|
42
|
+
|
43
|
+
duplicate_rows = {}
|
44
|
+
|
45
|
+
hashed_rows.reject! { |key, row_numbers| row_numbers.size <= 1 }
|
46
|
+
|
47
|
+
hashed_rows.each do |key, row_numbers|
|
48
|
+
hashed_rows[key] = {}
|
49
|
+
row_numbers.each do |row_number|
|
50
|
+
duplicate_rows[row_number] = key
|
51
|
+
hashed_rows[key][row_number] = nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
csv.each_with_index do |row, idx|
|
56
|
+
next unless (key = duplicate_rows[idx])
|
57
|
+
|
58
|
+
hashed_rows[key][idx] = row
|
59
|
+
end
|
60
|
+
|
61
|
+
CSV.open('duplicates-' + File.basename(ARGV[0]), 'wb') do |out|
|
62
|
+
out << ['duplicate_key', 'row_no'] + csv.first.keys
|
63
|
+
|
64
|
+
hashed_rows.each do |key, rows|
|
65
|
+
rows.each do |idx, row|
|
66
|
+
out << [key, idx] + row.values
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/csv-utils.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inheritance-helper
|
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
|
|
28
28
|
email: dougyouch@gmail.com
|
29
29
|
executables:
|
30
30
|
- csv-change-eol
|
31
|
+
- csv-duplicate-finder
|
31
32
|
- csv-explorer
|
32
33
|
- csv-find-error
|
33
34
|
- csv-readline
|
@@ -43,6 +44,7 @@ files:
|
|
43
44
|
- LICENSE
|
44
45
|
- README.md
|
45
46
|
- bin/csv-change-eol
|
47
|
+
- bin/csv-duplicate-finder
|
46
48
|
- bin/csv-explorer
|
47
49
|
- bin/csv-find-error
|
48
50
|
- bin/csv-readline
|
@@ -78,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
80
|
- !ruby/object:Gem::Version
|
79
81
|
version: '0'
|
80
82
|
requirements: []
|
81
|
-
rubygems_version: 3.
|
83
|
+
rubygems_version: 3.3.3
|
82
84
|
signing_key:
|
83
85
|
specification_version: 4
|
84
86
|
summary: CSV Utils
|