csv-utils 0.3.20 → 0.3.22

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 762ef5d73b9c7995cb53fa62dcbd8341684b98a361da9dee744669865d1e3220
4
- data.tar.gz: 240e15b3dafe12aba42d8a76fe3fe34a8c93dfe71c0975c68c2355466c441ef8
3
+ metadata.gz: ee1aeb07e7762e4f619464536906cd1ab2171a93238a6113811f30ae76cfd5dd
4
+ data.tar.gz: 894841751be6e16ed552fbb47047e6a8bc550b7eb45beed6419cb351cf1df734
5
5
  SHA512:
6
- metadata.gz: 5bcf6704d8024d983147e27282fec6dc5ff23ce199e686a439d9a10cdfa689c9e5c22272580219990e3e29149277b766b91b3096073e691bd1733ee418f15159
7
- data.tar.gz: 6cef1bfb34f1cdb0127820d99780f8795bc5fc463ad9daec0f6b0746b107d9543d2c982401e26663c4ac23d009f90287d4cc8a0cc76c7387675bf156952099d0
6
+ metadata.gz: 9624fc8ebd25445d34b0d2f8a00fbb4983c9a1ba9e4cf4f16bcdec029a852f5ee049fb2a935fdc8f4c429c34fd7588488182bf2e0433491fe4705c71dc7a43ea
7
+ data.tar.gz: 95ceebc269ca0e48404d62dd47905f0527028956f6bea09d68c71454784ceb626a346b8f8b27113d02bed771a75248ba85c364c63a08c084c6a97b8631d2990d
data/bin/csv-grep CHANGED
@@ -55,7 +55,7 @@ search_regex =
55
55
  headers =
56
56
  case options[:headers]
57
57
  when :first
58
- csv.headers.first
58
+ [csv .headers.first]
59
59
  when :all
60
60
  csv.headers
61
61
  else
data/bin/csv-splitter ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ rows: 250_000,
7
+ no_header: false
8
+ }
9
+ OptionParser.new do |opts|
10
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
11
+
12
+ opts.on('-h', '--help', 'Prints this help') do
13
+ puts opts
14
+ exit
15
+ end
16
+
17
+ opts.on('-r', '--rows ROWS', Integer, 'Max number of rows per a CSV') do |v|
18
+ options[:rows] = v
19
+ end
20
+
21
+ opts.on('--no-header', 'CSV file has no header') do
22
+ options[:no_header] = true
23
+ end
24
+ end.parse!
25
+
26
+ file = ARGV[0] || raise('no CSV file specified')
27
+
28
+ require 'csv-utils'
29
+
30
+ def get_split_file_name(file, num_files, total_files)
31
+ if file =~ /\.(?:csv|tsv)$/i
32
+ file.sub(/\.(?:csv|tsv)$/i) { |m| "-part-#{num_files}-of-#{total_files}" + m }
33
+ else
34
+ file + "-part-#{num_files}"
35
+ end
36
+ end
37
+
38
+ csv_options = CSVUtils::CSVOptions.new(file)
39
+
40
+ csv = CSV.open(
41
+ file,
42
+ 'rb',
43
+ col_sep: csv_options.col_separator,
44
+ liberal_parsing: true
45
+ )
46
+
47
+ headers = options[:no_header] ? nil : csv.shift
48
+
49
+ total_rows = 0
50
+ while csv.shift
51
+ total_rows +=1
52
+ end
53
+
54
+ csv.rewind
55
+ csv.shift if headers
56
+
57
+ total_files = total_rows / options[:rows]
58
+ total_files += 1 if (total_rows % options[:rows]) > 0
59
+
60
+ num_rows = 0
61
+ num_files = 0
62
+ out = nil
63
+
64
+ append_row_proc = proc do |row|
65
+ if out.nil? || num_rows >= options[:rows]
66
+ out.close if out
67
+ num_rows = 0
68
+ num_files += 1
69
+ out = CSV.open(get_split_file_name(file, num_files, total_files), 'wb', col_sep: csv_options.col_separator)
70
+ out << headers if headers
71
+ end
72
+
73
+ out << row
74
+ num_rows += 1
75
+ end
76
+
77
+ while (row = csv.shift)
78
+ append_row_proc.call(row)
79
+ end
80
+
81
+ out.close
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.20'
5
+ s.version = '0.3.22'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.20
4
+ version: 0.3.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-14 00:00:00.000000000 Z
11
+ date: 2023-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -34,6 +34,7 @@ executables:
34
34
  - csv-find-error
35
35
  - csv-grep
36
36
  - csv-readline
37
+ - csv-splitter
37
38
  - csv-validator
38
39
  extensions: []
39
40
  extra_rdoc_files: []
@@ -52,6 +53,7 @@ files:
52
53
  - bin/csv-find-error
53
54
  - bin/csv-grep
54
55
  - bin/csv-readline
56
+ - bin/csv-splitter
55
57
  - bin/csv-validator
56
58
  - csv-utils.gemspec
57
59
  - lib/csv-utils.rb