csv-utils 0.3.21 → 0.3.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/csv-splitter +81 -0
- data/csv-utils.gemspec +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2241c0a8d13e6edcb21271f66523862506cb1a04fbcfd533c160bf307c3ee6eb
|
4
|
+
data.tar.gz: 6d2f393c77e232aa8c124a43290f6ba912f42c0d63a49c55945c159106eb8169
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d988950900011db0d030d7e5534dcf1cfa0940d17ea3257de18da5fb3fdc5745dbd797007ab220a0a72c4fd5b9ed907100d9244fe30c11484de5dbf3d58c5d1c
|
7
|
+
data.tar.gz: 64698435d62a5bd8e0bbd3e9a480581161d74c1d14bf2e7686a39b72898e765b77aeea0d95e8a8c98c065ab9af0f5f819fb1d58d6cd453a16f95c8b24f49e1ec
|
data/bin/csv-splitter
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
options = {
|
6
|
+
rows: 250_000,
|
7
|
+
no_header: false
|
8
|
+
}
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
|
11
|
+
|
12
|
+
opts.on('-h', '--help', 'Prints this help') do
|
13
|
+
puts opts
|
14
|
+
exit
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on('-r', '--rows ROWS', Integer, 'Max number of rows per a CSV') do |v|
|
18
|
+
options[:rows] = v
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on('--no-header', 'CSV file has no header') do
|
22
|
+
options[:no_header] = true
|
23
|
+
end
|
24
|
+
end.parse!
|
25
|
+
|
26
|
+
file = ARGV[0] || raise('no CSV file specified')
|
27
|
+
|
28
|
+
require 'csv-utils'
|
29
|
+
|
30
|
+
def get_split_file_name(file, num_files, total_files)
|
31
|
+
if file =~ /\.(?:csv|tsv)$/i
|
32
|
+
file.sub(/\.(?:csv|tsv)$/i) { |m| ".part-#{num_files}-of-#{total_files}" + m }
|
33
|
+
else
|
34
|
+
file + ".part-#{num_files}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
csv_options = CSVUtils::CSVOptions.new(file)
|
39
|
+
|
40
|
+
csv = CSV.open(
|
41
|
+
file,
|
42
|
+
'rb',
|
43
|
+
col_sep: csv_options.col_separator,
|
44
|
+
liberal_parsing: true
|
45
|
+
)
|
46
|
+
|
47
|
+
headers = options[:no_header] ? nil : csv.shift
|
48
|
+
|
49
|
+
total_rows = 0
|
50
|
+
while csv.shift
|
51
|
+
total_rows +=1
|
52
|
+
end
|
53
|
+
|
54
|
+
csv.rewind
|
55
|
+
csv.shift if headers
|
56
|
+
|
57
|
+
total_files = total_rows / options[:rows]
|
58
|
+
total_files += 1 if (total_rows % options[:rows]) > 0
|
59
|
+
|
60
|
+
num_rows = 0
|
61
|
+
num_files = 0
|
62
|
+
out = nil
|
63
|
+
|
64
|
+
append_row_proc = proc do |row|
|
65
|
+
if out.nil? || num_rows >= options[:rows]
|
66
|
+
out.close if out
|
67
|
+
num_rows = 0
|
68
|
+
num_files += 1
|
69
|
+
out = CSV.open(get_split_file_name(file, num_files, total_files), 'wb', col_sep: csv_options.col_separator)
|
70
|
+
out << headers if headers
|
71
|
+
end
|
72
|
+
|
73
|
+
out << row
|
74
|
+
num_rows += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
while (row = csv.shift)
|
78
|
+
append_row_proc.call(row)
|
79
|
+
end
|
80
|
+
|
81
|
+
out.close
|
data/csv-utils.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inheritance-helper
|
@@ -34,6 +34,7 @@ executables:
|
|
34
34
|
- csv-find-error
|
35
35
|
- csv-grep
|
36
36
|
- csv-readline
|
37
|
+
- csv-splitter
|
37
38
|
- csv-validator
|
38
39
|
extensions: []
|
39
40
|
extra_rdoc_files: []
|
@@ -52,6 +53,7 @@ files:
|
|
52
53
|
- bin/csv-find-error
|
53
54
|
- bin/csv-grep
|
54
55
|
- bin/csv-readline
|
56
|
+
- bin/csv-splitter
|
55
57
|
- bin/csv-validator
|
56
58
|
- csv-utils.gemspec
|
57
59
|
- lib/csv-utils.rb
|