csv-utils 0.3.21 → 0.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/csv-splitter +81 -0
  3. data/csv-utils.gemspec +1 -1
  4. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bea28dcd86e140064eba5a1134d5aa6193963ddcb7632c63c2e1bfa85c4889ed
4
- data.tar.gz: 1547e2cb2b937c580031205baa64c79c3c0ca062ee2aeffd578b897d10fae092
3
+ metadata.gz: 2241c0a8d13e6edcb21271f66523862506cb1a04fbcfd533c160bf307c3ee6eb
4
+ data.tar.gz: 6d2f393c77e232aa8c124a43290f6ba912f42c0d63a49c55945c159106eb8169
5
5
  SHA512:
6
- metadata.gz: fe635fdc22220a0377ca386132109bebce61ba5fd41ec71625b8940b4dd7e2e5c72efb8cf163fc44f990633e18b8024b14878cbe0e852b49d4ba31fb076e44b3
7
- data.tar.gz: 569285d118ead894808ca96b5e4d1c0c1dd5f4bd130452a79c1ffb4f987ede9a0cf324609f4d11a15bddf61169948799026dd2bd4836674a1aae2f2caa137063
6
+ metadata.gz: d988950900011db0d030d7e5534dcf1cfa0940d17ea3257de18da5fb3fdc5745dbd797007ab220a0a72c4fd5b9ed907100d9244fe30c11484de5dbf3d58c5d1c
7
+ data.tar.gz: 64698435d62a5bd8e0bbd3e9a480581161d74c1d14bf2e7686a39b72898e765b77aeea0d95e8a8c98c065ab9af0f5f819fb1d58d6cd453a16f95c8b24f49e1ec
data/bin/csv-splitter ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ rows: 250_000,
7
+ no_header: false
8
+ }
9
+ OptionParser.new do |opts|
10
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
11
+
12
+ opts.on('-h', '--help', 'Prints this help') do
13
+ puts opts
14
+ exit
15
+ end
16
+
17
+ opts.on('-r', '--rows ROWS', Integer, 'Max number of rows per a CSV') do |v|
18
+ options[:rows] = v
19
+ end
20
+
21
+ opts.on('--no-header', 'CSV file has no header') do
22
+ options[:no_header] = true
23
+ end
24
+ end.parse!
25
+
26
+ file = ARGV[0] || raise('no CSV file specified')
27
+
28
+ require 'csv-utils'
29
+
30
+ def get_split_file_name(file, num_files, total_files)
31
+ if file =~ /\.(?:csv|tsv)$/i
32
+ file.sub(/\.(?:csv|tsv)$/i) { |m| ".part-#{num_files}-of-#{total_files}" + m }
33
+ else
34
+ file + ".part-#{num_files}"
35
+ end
36
+ end
37
+
38
+ csv_options = CSVUtils::CSVOptions.new(file)
39
+
40
+ csv = CSV.open(
41
+ file,
42
+ 'rb',
43
+ col_sep: csv_options.col_separator,
44
+ liberal_parsing: true
45
+ )
46
+
47
+ headers = options[:no_header] ? nil : csv.shift
48
+
49
+ total_rows = 0
50
+ while csv.shift
51
+ total_rows +=1
52
+ end
53
+
54
+ csv.rewind
55
+ csv.shift if headers
56
+
57
+ total_files = total_rows / options[:rows]
58
+ total_files += 1 if (total_rows % options[:rows]) > 0
59
+
60
+ num_rows = 0
61
+ num_files = 0
62
+ out = nil
63
+
64
+ append_row_proc = proc do |row|
65
+ if out.nil? || num_rows >= options[:rows]
66
+ out.close if out
67
+ num_rows = 0
68
+ num_files += 1
69
+ out = CSV.open(get_split_file_name(file, num_files, total_files), 'wb', col_sep: csv_options.col_separator)
70
+ out << headers if headers
71
+ end
72
+
73
+ out << row
74
+ num_rows += 1
75
+ end
76
+
77
+ while (row = csv.shift)
78
+ append_row_proc.call(row)
79
+ end
80
+
81
+ out.close
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.21'
5
+ s.version = '0.3.23'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.21
4
+ version: 0.3.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-14 00:00:00.000000000 Z
11
+ date: 2023-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -34,6 +34,7 @@ executables:
34
34
  - csv-find-error
35
35
  - csv-grep
36
36
  - csv-readline
37
+ - csv-splitter
37
38
  - csv-validator
38
39
  extensions: []
39
40
  extra_rdoc_files: []
@@ -52,6 +53,7 @@ files:
52
53
  - bin/csv-find-error
53
54
  - bin/csv-grep
54
55
  - bin/csv-readline
56
+ - bin/csv-splitter
55
57
  - bin/csv-validator
56
58
  - csv-utils.gemspec
57
59
  - lib/csv-utils.rb