csv-utils 0.3.18 → 0.3.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfcbc7a92377a1b5d4dec01847defb5a4b2ac76c3dc65f40ec113c9d2085d2c6
4
- data.tar.gz: bff2e91650ff90feb08e960ab3356d70486759aca7e4ddf54020081d31f2ce18
3
+ metadata.gz: 762ef5d73b9c7995cb53fa62dcbd8341684b98a361da9dee744669865d1e3220
4
+ data.tar.gz: 240e15b3dafe12aba42d8a76fe3fe34a8c93dfe71c0975c68c2355466c441ef8
5
5
  SHA512:
6
- metadata.gz: 7d7db74ed5c52d058fabe0498f9404915c398826be471b8407f839a9eec89c64d797cc9b9f8f1751b89ccd1b80c14165190ec50f595e942f9f0fbc151f544985
7
- data.tar.gz: 90578cb216ba3d5260304a8cd9302bcae3e8b8e892592f7adc75db7536652822e0c729e57a074a99d77472751bfa0c92e2470be0bd7c68393ece4a782e93eb2b
6
+ metadata.gz: 5bcf6704d8024d983147e27282fec6dc5ff23ce199e686a439d9a10cdfa689c9e5c22272580219990e3e29149277b766b91b3096073e691bd1733ee418f15159
7
+ data.tar.gz: 6cef1bfb34f1cdb0127820d99780f8795bc5fc463ad9daec0f6b0746b107d9543d2c982401e26663c4ac23d009f90287d4cc8a0cc76c7387675bf156952099d0
data/bin/csv-grep ADDED
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ search: nil,
7
+ exact_match: false,
8
+ headers: :first,
9
+ limit: nil,
10
+ search_regex_options: nil
11
+ }
12
+ OptionParser.new do |opts|
13
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
14
+
15
+ opts.on('-h', '--help', 'Prints this help') do
16
+ puts opts
17
+ exit
18
+ end
19
+
20
+ opts.on('-s', '--search SEARCH', 'Search expression') do |v|
21
+ options[:search] = v.to_s
22
+ end
23
+
24
+ opts.on('-e', '--exact-match', 'Exact match') do
25
+ options[:exact_match] = true
26
+ end
27
+
28
+ # -c stands for column, since -h is used for help
29
+ opts.on('-c', '--headers HEADERS', 'Comma separated list of headers to search (default first column)') do |v|
30
+ options[:headers] = v == 'all' ? :all : v.split(',')
31
+ end
32
+
33
+ opts.on('-l', '--limit LIMIT', Integer, 'Limit the number of matches') do |v|
34
+ options[:limit] = v
35
+ end
36
+
37
+ opts.on('-i', '--ignore-case', 'Ignore case') do |v|
38
+ options[:search_regex_options] = Regexp::IGNORECASE
39
+ end
40
+ end.parse!
41
+
42
+ raise('no search specified') unless options[:search]
43
+
44
+ require 'csv-utils'
45
+
46
+ csv = CSVUtils::CSVIterator.new(ARGV[0])
47
+
48
+ search_regex =
49
+ if options[:exact_match]
50
+ Regexp.new('\A' + Regexp.escape(options[:search]) + '\z', options[:search_regex_options])
51
+ else
52
+ Regexp.new(options[:search], options[:search_regex_options])
53
+ end
54
+
55
+ headers =
56
+ case options[:headers]
57
+ when :first
58
+ csv.headers.first
59
+ when :all
60
+ csv.headers
61
+ else
62
+ options[:headers]
63
+ end
64
+
65
+ missing_headers = headers - csv.headers
66
+ raise("unknown headers #{headers.join(', ')}") unless missing_headers.empty?
67
+
68
+ matching_row_proc = proc do |row|
69
+ result = false
70
+
71
+ headers.each do |header|
72
+ next unless (val = row[header])
73
+
74
+ if search_regex.match?(val)
75
+ result = true
76
+ break
77
+ end
78
+ end
79
+
80
+ result
81
+ end
82
+
83
+ matches = 0
84
+ csv.each do |row|
85
+ next unless matching_row_proc.call(row)
86
+
87
+ matches += 1
88
+ print row.to_pretty_s + "\n"
89
+
90
+ break if options[:limit] && matches >= options[:limit]
91
+ end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.18'
5
+ s.version = '0.3.20'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -76,6 +76,24 @@ class CSVUtils::CSVIterator
76
76
  cnt
77
77
  end
78
78
 
79
+ def each_batch(batch_size = 1_000)
80
+ batch = []
81
+
82
+ process_batch_proc = Proc.new do
83
+ yield batch
84
+ batch = []
85
+ end
86
+
87
+ each do |row|
88
+ batch << row
89
+ process_batch_proc.call if batch.size >= batch_size
90
+ end
91
+
92
+ process_batch_proc.call if batch.size > 0
93
+
94
+ nil
95
+ end
96
+
79
97
  private
80
98
 
81
99
  def strip_bom!(col)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.18
4
+ version: 0.3.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-18 00:00:00.000000000 Z
11
+ date: 2022-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -32,6 +32,7 @@ executables:
32
32
  - csv-duplicate-finder
33
33
  - csv-explorer
34
34
  - csv-find-error
35
+ - csv-grep
35
36
  - csv-readline
36
37
  - csv-validator
37
38
  extensions: []
@@ -49,6 +50,7 @@ files:
49
50
  - bin/csv-duplicate-finder
50
51
  - bin/csv-explorer
51
52
  - bin/csv-find-error
53
+ - bin/csv-grep
52
54
  - bin/csv-readline
53
55
  - bin/csv-validator
54
56
  - csv-utils.gemspec