csv-utils 0.3.18 → 0.3.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfcbc7a92377a1b5d4dec01847defb5a4b2ac76c3dc65f40ec113c9d2085d2c6
4
- data.tar.gz: bff2e91650ff90feb08e960ab3356d70486759aca7e4ddf54020081d31f2ce18
3
+ metadata.gz: 762ef5d73b9c7995cb53fa62dcbd8341684b98a361da9dee744669865d1e3220
4
+ data.tar.gz: 240e15b3dafe12aba42d8a76fe3fe34a8c93dfe71c0975c68c2355466c441ef8
5
5
  SHA512:
6
- metadata.gz: 7d7db74ed5c52d058fabe0498f9404915c398826be471b8407f839a9eec89c64d797cc9b9f8f1751b89ccd1b80c14165190ec50f595e942f9f0fbc151f544985
7
- data.tar.gz: 90578cb216ba3d5260304a8cd9302bcae3e8b8e892592f7adc75db7536652822e0c729e57a074a99d77472751bfa0c92e2470be0bd7c68393ece4a782e93eb2b
6
+ metadata.gz: 5bcf6704d8024d983147e27282fec6dc5ff23ce199e686a439d9a10cdfa689c9e5c22272580219990e3e29149277b766b91b3096073e691bd1733ee418f15159
7
+ data.tar.gz: 6cef1bfb34f1cdb0127820d99780f8795bc5fc463ad9daec0f6b0746b107d9543d2c982401e26663c4ac23d009f90287d4cc8a0cc76c7387675bf156952099d0
data/bin/csv-grep ADDED
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ options = {
6
+ search: nil,
7
+ exact_match: false,
8
+ headers: :first,
9
+ limit: nil,
10
+ search_regex_options: nil
11
+ }
12
+ OptionParser.new do |opts|
13
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
14
+
15
+ opts.on('-h', '--help', 'Prints this help') do
16
+ puts opts
17
+ exit
18
+ end
19
+
20
+ opts.on('-s', '--search SEARCH', 'Search expression') do |v|
21
+ options[:search] = v.to_s
22
+ end
23
+
24
+ opts.on('-e', '--exact-match', 'Exact match') do
25
+ options[:exact_match] = true
26
+ end
27
+
28
+ # -c stands for column, since -h is used for help
29
+ opts.on('-c', '--headers HEADERS', 'Comma separated list of headers to search (default first column)') do |v|
30
+ options[:headers] = v == 'all' ? :all : v.split(',')
31
+ end
32
+
33
+ opts.on('-l', '--limit LIMIT', Integer, 'Limit the number of matches') do |v|
34
+ options[:limit] = v
35
+ end
36
+
37
+ opts.on('-i', '--ignore-case', 'Ignore case') do |v|
38
+ options[:search_regex_options] = Regexp::IGNORECASE
39
+ end
40
+ end.parse!
41
+
42
+ raise('no search specified') unless options[:search]
43
+
44
+ require 'csv-utils'
45
+
46
+ csv = CSVUtils::CSVIterator.new(ARGV[0])
47
+
48
+ search_regex =
49
+ if options[:exact_match]
50
+ Regexp.new('\A' + Regexp.escape(options[:search]) + '\z', options[:search_regex_options])
51
+ else
52
+ Regexp.new(options[:search], options[:search_regex_options])
53
+ end
54
+
55
+ headers =
56
+ case options[:headers]
57
+ when :first
58
+ csv.headers.first
59
+ when :all
60
+ csv.headers
61
+ else
62
+ options[:headers]
63
+ end
64
+
65
+ missing_headers = headers - csv.headers
66
+ raise("unknown headers #{headers.join(', ')}") unless missing_headers.empty?
67
+
68
+ matching_row_proc = proc do |row|
69
+ result = false
70
+
71
+ headers.each do |header|
72
+ next unless (val = row[header])
73
+
74
+ if search_regex.match?(val)
75
+ result = true
76
+ break
77
+ end
78
+ end
79
+
80
+ result
81
+ end
82
+
83
+ matches = 0
84
+ csv.each do |row|
85
+ next unless matching_row_proc.call(row)
86
+
87
+ matches += 1
88
+ print row.to_pretty_s + "\n"
89
+
90
+ break if options[:limit] && matches >= options[:limit]
91
+ end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.18'
5
+ s.version = '0.3.20'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -76,6 +76,24 @@ class CSVUtils::CSVIterator
76
76
  cnt
77
77
  end
78
78
 
79
+ def each_batch(batch_size = 1_000)
80
+ batch = []
81
+
82
+ process_batch_proc = Proc.new do
83
+ yield batch
84
+ batch = []
85
+ end
86
+
87
+ each do |row|
88
+ batch << row
89
+ process_batch_proc.call if batch.size >= batch_size
90
+ end
91
+
92
+ process_batch_proc.call if batch.size > 0
93
+
94
+ nil
95
+ end
96
+
79
97
  private
80
98
 
81
99
  def strip_bom!(col)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.18
4
+ version: 0.3.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-18 00:00:00.000000000 Z
11
+ date: 2022-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -32,6 +32,7 @@ executables:
32
32
  - csv-duplicate-finder
33
33
  - csv-explorer
34
34
  - csv-find-error
35
+ - csv-grep
35
36
  - csv-readline
36
37
  - csv-validator
37
38
  extensions: []
@@ -49,6 +50,7 @@ files:
49
50
  - bin/csv-duplicate-finder
50
51
  - bin/csv-explorer
51
52
  - bin/csv-find-error
53
+ - bin/csv-grep
52
54
  - bin/csv-readline
53
55
  - bin/csv-validator
54
56
  - csv-utils.gemspec