csv-utils 0.3.18 → 0.3.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/csv-grep +91 -0
- data/csv-utils.gemspec +1 -1
- data/lib/csv_utils/csv_iterator.rb +18 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 762ef5d73b9c7995cb53fa62dcbd8341684b98a361da9dee744669865d1e3220
|
4
|
+
data.tar.gz: 240e15b3dafe12aba42d8a76fe3fe34a8c93dfe71c0975c68c2355466c441ef8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bcf6704d8024d983147e27282fec6dc5ff23ce199e686a439d9a10cdfa689c9e5c22272580219990e3e29149277b766b91b3096073e691bd1733ee418f15159
|
7
|
+
data.tar.gz: 6cef1bfb34f1cdb0127820d99780f8795bc5fc463ad9daec0f6b0746b107d9543d2c982401e26663c4ac23d009f90287d4cc8a0cc76c7387675bf156952099d0
|
data/bin/csv-grep
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
options = {
|
6
|
+
search: nil,
|
7
|
+
exact_match: false,
|
8
|
+
headers: :first,
|
9
|
+
limit: nil,
|
10
|
+
search_regex_options: nil
|
11
|
+
}
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
|
14
|
+
|
15
|
+
opts.on('-h', '--help', 'Prints this help') do
|
16
|
+
puts opts
|
17
|
+
exit
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.on('-s', '--search SEARCH', 'Search expression') do |v|
|
21
|
+
options[:search] = v.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
opts.on('-e', '--exact-match', 'Exact match') do
|
25
|
+
options[:exact_match] = true
|
26
|
+
end
|
27
|
+
|
28
|
+
# -c stands for column, since -h is used for help
|
29
|
+
opts.on('-c', '--headers HEADERS', 'Comma separated list of headers to search (default first column)') do |v|
|
30
|
+
options[:headers] = v == 'all' ? :all : v.split(',')
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on('-l', '--limit LIMIT', Integer, 'Limit the number of matches') do |v|
|
34
|
+
options[:limit] = v
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on('-i', '--ignore-case', 'Ignore case') do |v|
|
38
|
+
options[:search_regex_options] = Regexp::IGNORECASE
|
39
|
+
end
|
40
|
+
end.parse!
|
41
|
+
|
42
|
+
raise('no search specified') unless options[:search]
|
43
|
+
|
44
|
+
require 'csv-utils'
|
45
|
+
|
46
|
+
csv = CSVUtils::CSVIterator.new(ARGV[0])
|
47
|
+
|
48
|
+
search_regex =
|
49
|
+
if options[:exact_match]
|
50
|
+
Regexp.new('\A' + Regexp.escape(options[:search]) + '\z', options[:search_regex_options])
|
51
|
+
else
|
52
|
+
Regexp.new(options[:search], options[:search_regex_options])
|
53
|
+
end
|
54
|
+
|
55
|
+
headers =
|
56
|
+
case options[:headers]
|
57
|
+
when :first
|
58
|
+
csv.headers.first
|
59
|
+
when :all
|
60
|
+
csv.headers
|
61
|
+
else
|
62
|
+
options[:headers]
|
63
|
+
end
|
64
|
+
|
65
|
+
missing_headers = headers - csv.headers
|
66
|
+
raise("unknown headers #{headers.join(', ')}") unless missing_headers.empty?
|
67
|
+
|
68
|
+
matching_row_proc = proc do |row|
|
69
|
+
result = false
|
70
|
+
|
71
|
+
headers.each do |header|
|
72
|
+
next unless (val = row[header])
|
73
|
+
|
74
|
+
if search_regex.match?(val)
|
75
|
+
result = true
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
matches = 0
|
84
|
+
csv.each do |row|
|
85
|
+
next unless matching_row_proc.call(row)
|
86
|
+
|
87
|
+
matches += 1
|
88
|
+
print row.to_pretty_s + "\n"
|
89
|
+
|
90
|
+
break if options[:limit] && matches >= options[:limit]
|
91
|
+
end
|
data/csv-utils.gemspec
CHANGED
@@ -76,6 +76,24 @@ class CSVUtils::CSVIterator
|
|
76
76
|
cnt
|
77
77
|
end
|
78
78
|
|
79
|
+
def each_batch(batch_size = 1_000)
|
80
|
+
batch = []
|
81
|
+
|
82
|
+
process_batch_proc = Proc.new do
|
83
|
+
yield batch
|
84
|
+
batch = []
|
85
|
+
end
|
86
|
+
|
87
|
+
each do |row|
|
88
|
+
batch << row
|
89
|
+
process_batch_proc.call if batch.size >= batch_size
|
90
|
+
end
|
91
|
+
|
92
|
+
process_batch_proc.call if batch.size > 0
|
93
|
+
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
79
97
|
private
|
80
98
|
|
81
99
|
def strip_bom!(col)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inheritance-helper
|
@@ -32,6 +32,7 @@ executables:
|
|
32
32
|
- csv-duplicate-finder
|
33
33
|
- csv-explorer
|
34
34
|
- csv-find-error
|
35
|
+
- csv-grep
|
35
36
|
- csv-readline
|
36
37
|
- csv-validator
|
37
38
|
extensions: []
|
@@ -49,6 +50,7 @@ files:
|
|
49
50
|
- bin/csv-duplicate-finder
|
50
51
|
- bin/csv-explorer
|
51
52
|
- bin/csv-find-error
|
53
|
+
- bin/csv-grep
|
52
54
|
- bin/csv-readline
|
53
55
|
- bin/csv-validator
|
54
56
|
- csv-utils.gemspec
|