csv-utils 0.3.21 → 0.3.22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/csv-splitter +81 -0
  3. data/csv-utils.gemspec +1 -1
  4. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bea28dcd86e140064eba5a1134d5aa6193963ddcb7632c63c2e1bfa85c4889ed
4
- data.tar.gz: 1547e2cb2b937c580031205baa64c79c3c0ca062ee2aeffd578b897d10fae092
3
+ metadata.gz: ee1aeb07e7762e4f619464536906cd1ab2171a93238a6113811f30ae76cfd5dd
4
+ data.tar.gz: 894841751be6e16ed552fbb47047e6a8bc550b7eb45beed6419cb351cf1df734
5
5
  SHA512:
6
- metadata.gz: fe635fdc22220a0377ca386132109bebce61ba5fd41ec71625b8940b4dd7e2e5c72efb8cf163fc44f990633e18b8024b14878cbe0e852b49d4ba31fb076e44b3
7
- data.tar.gz: 569285d118ead894808ca96b5e4d1c0c1dd5f4bd130452a79c1ffb4f987ede9a0cf324609f4d11a15bddf61169948799026dd2bd4836674a1aae2f2caa137063
6
+ metadata.gz: 9624fc8ebd25445d34b0d2f8a00fbb4983c9a1ba9e4cf4f16bcdec029a852f5ee049fb2a935fdc8f4c429c34fd7588488182bf2e0433491fe4705c71dc7a43ea
7
+ data.tar.gz: 95ceebc269ca0e48404d62dd47905f0527028956f6bea09d68c71454784ceb626a346b8f8b27113d02bed771a75248ba85c364c63a08c084c6a97b8631d2990d
data/bin/csv-splitter ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ rows: 250_000,
7
+ no_header: false
8
+ }
9
+ OptionParser.new do |opts|
10
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
11
+
12
+ opts.on('-h', '--help', 'Prints this help') do
13
+ puts opts
14
+ exit
15
+ end
16
+
17
+ opts.on('-r', '--rows ROWS', Integer, 'Max number of rows per a CSV') do |v|
18
+ options[:rows] = v
19
+ end
20
+
21
+ opts.on('--no-header', 'CSV file has no header') do
22
+ options[:no_header] = true
23
+ end
24
+ end.parse!
25
+
26
+ file = ARGV[0] || raise('no CSV file specified')
27
+
28
+ require 'csv-utils'
29
+
30
+ def get_split_file_name(file, num_files, total_files)
31
+ if file =~ /\.(?:csv|tsv)$/i
32
+ file.sub(/\.(?:csv|tsv)$/i) { |m| "-part-#{num_files}-of-#{total_files}" + m }
33
+ else
34
+ file + "-part-#{num_files}"
35
+ end
36
+ end
37
+
38
+ csv_options = CSVUtils::CSVOptions.new(file)
39
+
40
+ csv = CSV.open(
41
+ file,
42
+ 'rb',
43
+ col_sep: csv_options.col_separator,
44
+ liberal_parsing: true
45
+ )
46
+
47
+ headers = options[:no_header] ? nil : csv.shift
48
+
49
+ total_rows = 0
50
+ while csv.shift
51
+ total_rows +=1
52
+ end
53
+
54
+ csv.rewind
55
+ csv.shift if headers
56
+
57
+ total_files = total_rows / options[:rows]
58
+ total_files += 1 if (total_rows % options[:rows]) > 0
59
+
60
+ num_rows = 0
61
+ num_files = 0
62
+ out = nil
63
+
64
+ append_row_proc = proc do |row|
65
+ if out.nil? || num_rows >= options[:rows]
66
+ out.close if out
67
+ num_rows = 0
68
+ num_files += 1
69
+ out = CSV.open(get_split_file_name(file, num_files, total_files), 'wb', col_sep: csv_options.col_separator)
70
+ out << headers if headers
71
+ end
72
+
73
+ out << row
74
+ num_rows += 1
75
+ end
76
+
77
+ while (row = csv.shift)
78
+ append_row_proc.call(row)
79
+ end
80
+
81
+ out.close
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.21'
5
+ s.version = '0.3.22'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.21
4
+ version: 0.3.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-14 00:00:00.000000000 Z
11
+ date: 2023-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -34,6 +34,7 @@ executables:
34
34
  - csv-find-error
35
35
  - csv-grep
36
36
  - csv-readline
37
+ - csv-splitter
37
38
  - csv-validator
38
39
  extensions: []
39
40
  extra_rdoc_files: []
@@ -52,6 +53,7 @@ files:
52
53
  - bin/csv-find-error
53
54
  - bin/csv-grep
54
55
  - bin/csv-readline
56
+ - bin/csv-splitter
55
57
  - bin/csv-validator
56
58
  - csv-utils.gemspec
57
59
  - lib/csv-utils.rb