cvg 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +47 -0
- data/Rakefile +6 -0
- data/bin/cvg +251 -0
- data/cvg.gemspec +29 -0
- data/features/basic.feature +269 -0
- data/features/composition.feature +54 -0
- data/features/multi_files.feature +67 -0
- data/features/step_definitions/cvg_steps.rb +32 -0
- data/features/support/env.rb +5 -0
- data/lib/cvg.rb +5 -0
- data/lib/cvg/version.rb +3 -0
- data/spec/cvg_spec.rb +7 -0
- data/spec/spec_helper.rb +2 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NzgyMDY5ZWZiODdhZmI3NTE5MGUwZDdjOTQzYjBmYzg0MWZhNzA1OA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWNlYWNiYzRjYWZjZjQwODc2Y2FjN2ZhZWQzMGQ0OTQ3NGZhYTdiMw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZjE2ZDRjYWU3MGY4MDVjYzBjMTQ4Y2RhZmEyOWVmNjkwNTZhYzgzZjhkYzQx
|
10
|
+
ZTJmYmE4MzE4MzEyNmRhZDIyZTcxNDQzODU0NzIxNGM2ZTBhOWEyM2EyNjAw
|
11
|
+
Yjc3OTVhN2U4NzllYWQ5MTk5NmRkZmFiZmQwMDcxZjM3MGMwYTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OTUxZTA1ODMwM2E0NjFlMjVkYjVlNjljNmUyYzk5ZjUwOWZkODZlYzA2NWVi
|
14
|
+
MTE2NDkzNjExNjFiODdlZGMzYmJlYjRlODA3MmMxMmI3ZmIzMDlkYjJiYzAx
|
15
|
+
MWU0NjI5ZDg1OWIxM2EwNjg5ZTMyNGQ3NGY3YWE5M2IyMzU5MDc=
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Seamus Abshere
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Cvg
|
2
|
+
|
3
|
+
## Examples
|
4
|
+
|
5
|
+
cvg --present colA input.csv
|
6
|
+
|
7
|
+
cvg --missing colA input.csv
|
8
|
+
|
9
|
+
cvg --regex 'colA:/\d+/' input.csv
|
10
|
+
|
11
|
+
cvg --detect-missing input.csv
|
12
|
+
|
13
|
+
cvg --lt 'colA:5' input.colA
|
14
|
+
|
15
|
+
Multiple tests:
|
16
|
+
|
17
|
+
cvg --present colA --missing colB input.csv
|
18
|
+
|
19
|
+
Multi files: (different column order OK)
|
20
|
+
|
21
|
+
cvg --present colA input1.csv input2.csv
|
22
|
+
|
23
|
+
## TODO
|
24
|
+
|
25
|
+
DONE allows special "PRESENT" filter
|
26
|
+
|
27
|
+
DONE takes multiple csvs as input
|
28
|
+
|
29
|
+
DONE allows special "FALSE_OR_BLANK" filter
|
30
|
+
|
31
|
+
DONE combines tests
|
32
|
+
|
33
|
+
DONE allows single string matching
|
34
|
+
|
35
|
+
DONE allows multiple option matching
|
36
|
+
|
37
|
+
DONE allows regex matching
|
38
|
+
|
39
|
+
DONE allows > >= < <= comparisons
|
40
|
+
|
41
|
+
DONE treat as missing %w{ N/A n/a NULL null - #DIV/0 #REF! #NAME? NIL nil NA na #VALUE! #NULL! NaN #N/A #NUM! ? }
|
42
|
+
|
43
|
+
writes report (why rejected row, checks missing and extra columns across input files)
|
44
|
+
|
45
|
+
dup checks on certain cols
|
46
|
+
|
47
|
+
optionall uses minimal set of headers shared by all
|
data/Rakefile
ADDED
data/bin/cvg
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'to_regexp'
|
5
|
+
require 'active_support/core_ext'
|
6
|
+
|
7
|
+
class Cvg
|
8
|
+
class << self
|
9
|
+
def parse(argv)
|
10
|
+
flags = []
|
11
|
+
options = []
|
12
|
+
input_paths = []
|
13
|
+
|
14
|
+
option_k = nil
|
15
|
+
argv.each do |arg|
|
16
|
+
if option_k
|
17
|
+
options << [option_k, arg]
|
18
|
+
option_k = nil
|
19
|
+
elsif FLAGS.has_key?(arg[2..-1])
|
20
|
+
flags << arg[2..-1]
|
21
|
+
elsif arg.start_with?('--')
|
22
|
+
option_k = arg[2..-1]
|
23
|
+
else
|
24
|
+
input_paths << arg
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
[ flags, input_paths, options ]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Test
|
33
|
+
attr_reader :k
|
34
|
+
attr_reader :arg
|
35
|
+
def initialize(k, arg)
|
36
|
+
@k = k
|
37
|
+
@arg = arg
|
38
|
+
end
|
39
|
+
# only used sometimes
|
40
|
+
def col
|
41
|
+
@col ||= arg.split(':', 2)[0]
|
42
|
+
end
|
43
|
+
# only used sometimes
|
44
|
+
def cols
|
45
|
+
@cols ||= CSV.parse_line arg.split(':', 2)[0]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Present < Test
|
50
|
+
class << self
|
51
|
+
def accept(k, arg)
|
52
|
+
new(k, arg) if k == 'present'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
def pass?(row)
|
56
|
+
row.fetch(arg).to_s.present?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class Missing < Test
|
61
|
+
class << self
|
62
|
+
def accept(k, arg)
|
63
|
+
new(k, arg) if k == 'missing'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
def pass?(row)
|
67
|
+
row.fetch(arg).to_s.strip.length == 0
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
class Regex < Test
|
72
|
+
class << self
|
73
|
+
def accept(k, arg)
|
74
|
+
new(k, arg) if k == 'regex'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
def pass?(row)
|
78
|
+
!!(row.fetch(col).to_s =~ regex)
|
79
|
+
end
|
80
|
+
def regex
|
81
|
+
@regex ||= arg.split(':', 2)[1].to_regexp(detect: true) or raise("#{arg.inspect} doesn't have a valid regex")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class Value < Test
|
86
|
+
class << self
|
87
|
+
def accept(k, arg)
|
88
|
+
new(k, arg) if k == 'value'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
def pass?(row)
|
92
|
+
v = row.fetch(col).to_s
|
93
|
+
values.include? v
|
94
|
+
end
|
95
|
+
def values
|
96
|
+
@values ||= CSV.parse_line arg.split(':', 2)[1]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class GreaterOrLesser < Test
|
101
|
+
class << self
|
102
|
+
def accept(k, arg)
|
103
|
+
new(k, arg) if OPERATOR[k]
|
104
|
+
end
|
105
|
+
def numify(v, verify = false)
|
106
|
+
case v
|
107
|
+
when JUST_A_NUMBER
|
108
|
+
v.to_f
|
109
|
+
when NUMBER
|
110
|
+
v.gsub(NUMBER_FLUFF, '').to_f
|
111
|
+
else
|
112
|
+
raise "#{v.inspect} not a number" if verify
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
OPERATOR = {
|
117
|
+
'lt' => :<,
|
118
|
+
'lte' => :<=,
|
119
|
+
'gt' => :>,
|
120
|
+
'gte' => :>=,
|
121
|
+
}
|
122
|
+
NUMBER = /\d/
|
123
|
+
JUST_A_NUMBER = /\A-?[\d.]+\z/
|
124
|
+
NUMBER_FLUFF = /[^\d.eE\+\-]+/
|
125
|
+
def pass?(row)
|
126
|
+
if v = GreaterOrLesser.numify(row.fetch(col))
|
127
|
+
v.send(operator, threshold)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
def operator
|
131
|
+
@operator ||= OPERATOR.fetch(k)
|
132
|
+
end
|
133
|
+
def threshold
|
134
|
+
@threshold ||= GreaterOrLesser.numify arg, true
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class Dedup < Test
|
139
|
+
class << self
|
140
|
+
def accept(k, arg)
|
141
|
+
new(k, arg) if k == 'dedup'
|
142
|
+
end
|
143
|
+
end
|
144
|
+
def registry
|
145
|
+
@registry ||= []
|
146
|
+
end
|
147
|
+
def pass?(row)
|
148
|
+
hash = row.values_at(*cols).hash
|
149
|
+
if registry.include?(hash)
|
150
|
+
false
|
151
|
+
else
|
152
|
+
registry << hash
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
class DetectMissing
|
158
|
+
MISSING = %w{ N/A n/a NULL null - #DIV/0 #REF! #NAME? NIL nil NA na #VALUE! #NULL! NaN #N/A #NUM! ? }
|
159
|
+
def apply!(row)
|
160
|
+
row.each do |k, v|
|
161
|
+
if v.is_a?(::String) and MISSING.include?(v)
|
162
|
+
row[k] = nil
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
TESTS = [Present, Missing, Regex, Value, GreaterOrLesser, Dedup]
|
169
|
+
FLAGS = {
|
170
|
+
'detect-missing' => DetectMissing,
|
171
|
+
}
|
172
|
+
|
173
|
+
attr_reader :options
|
174
|
+
attr_reader :input_paths
|
175
|
+
|
176
|
+
def initialize(argv)
|
177
|
+
@flags, @input_paths, @options = Cvg.parse argv
|
178
|
+
end
|
179
|
+
|
180
|
+
def perform
|
181
|
+
output_headers
|
182
|
+
|
183
|
+
each_input_row do |row|
|
184
|
+
if tests.all? { |t| t.pass?(row) }
|
185
|
+
output_row row
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
close_output
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def each_input_row
|
195
|
+
input_paths.each do |path|
|
196
|
+
CSV.foreach(path, headers: :first_row) do |row|
|
197
|
+
row = row.to_hash
|
198
|
+
flags.each { |flag| flag.apply! row }
|
199
|
+
yield row
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def tests
|
205
|
+
@tests ||= begin
|
206
|
+
memo = []
|
207
|
+
options.each do |k, v|
|
208
|
+
TESTS.each do |test_class|
|
209
|
+
if test = test_class.accept(k, v)
|
210
|
+
memo << test
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
memo
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def flags
|
219
|
+
@_flags ||= begin
|
220
|
+
@flags.map do |flag|
|
221
|
+
FLAGS.fetch(flag).new
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def output_row(row)
|
227
|
+
output_f.puts row.values_at(*headers).to_csv
|
228
|
+
end
|
229
|
+
|
230
|
+
def output_headers
|
231
|
+
output_f.puts headers.to_csv
|
232
|
+
end
|
233
|
+
|
234
|
+
def output_f
|
235
|
+
@output_f ||= $stdout
|
236
|
+
end
|
237
|
+
|
238
|
+
def close_output
|
239
|
+
output_f.close
|
240
|
+
end
|
241
|
+
|
242
|
+
def headers
|
243
|
+
@headers ||= input_paths.map do |path|
|
244
|
+
CSV.parse_line(File.open(path) { |f| f.gets })
|
245
|
+
end.flatten.uniq
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
Cvg.new(ARGV).perform
|
data/cvg.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cvg/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cvg"
|
8
|
+
spec.version = Cvg::VERSION
|
9
|
+
spec.authors = ["Seamus Abshere"]
|
10
|
+
spec.email = ["seamus@abshere.net"]
|
11
|
+
spec.description = %q{Like jq or grep for csv. Combine one or more CSVs while filtering on fields with regular expressions, whitelists, presence, missing, etc.}
|
12
|
+
spec.summary = %q{Like jq or grep for csv. Combine one or more CSVs while filtering on fields with regular expressions, whitelists, presence, missing, etc.}
|
13
|
+
spec.homepage = "https://github.com/seamusabshere/cvg"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "activesupport"
|
22
|
+
spec.add_runtime_dependency "to_regexp"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "rspec-expectations"
|
28
|
+
spec.add_development_dependency "posix-spawn"
|
29
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
Feature: Basic functionality
|
2
|
+
|
3
|
+
Scenario: Field is present
|
4
|
+
Given an input csv
|
5
|
+
"""
|
6
|
+
a,b
|
7
|
+
yes,x
|
8
|
+
no,
|
9
|
+
yes,_
|
10
|
+
no," "
|
11
|
+
"""
|
12
|
+
When you pass arguments --present b
|
13
|
+
Then you get output
|
14
|
+
"""
|
15
|
+
a,b
|
16
|
+
yes,x
|
17
|
+
yes,_
|
18
|
+
"""
|
19
|
+
|
20
|
+
Scenario: Field is missing
|
21
|
+
Given an input csv
|
22
|
+
"""
|
23
|
+
a,b
|
24
|
+
no,x
|
25
|
+
yes,
|
26
|
+
no,_
|
27
|
+
yes," "
|
28
|
+
"""
|
29
|
+
When you pass arguments --missing b
|
30
|
+
Then you get output
|
31
|
+
"""
|
32
|
+
a,b
|
33
|
+
yes,
|
34
|
+
yes,
|
35
|
+
"""
|
36
|
+
|
37
|
+
Scenario: Field matches regex
|
38
|
+
Given an input csv
|
39
|
+
"""
|
40
|
+
a,b
|
41
|
+
yes,1
|
42
|
+
no,
|
43
|
+
yes,11
|
44
|
+
no,a
|
45
|
+
yes,a1
|
46
|
+
yes,1a
|
47
|
+
"""
|
48
|
+
When you pass arguments --regex 'b:/\d+/'
|
49
|
+
Then you get output
|
50
|
+
"""
|
51
|
+
a,b
|
52
|
+
yes,1
|
53
|
+
yes,11
|
54
|
+
yes,a1
|
55
|
+
yes,1a
|
56
|
+
"""
|
57
|
+
|
58
|
+
Scenario: Field is value
|
59
|
+
Given an input csv
|
60
|
+
"""
|
61
|
+
a,b
|
62
|
+
yes,z
|
63
|
+
no,a
|
64
|
+
no,
|
65
|
+
no,zz
|
66
|
+
no,ZZ
|
67
|
+
"""
|
68
|
+
When you pass arguments --value 'b:z'
|
69
|
+
Then you get output
|
70
|
+
"""
|
71
|
+
a,b
|
72
|
+
yes,z
|
73
|
+
"""
|
74
|
+
|
75
|
+
Scenario: Field one of values
|
76
|
+
Given an input csv
|
77
|
+
"""
|
78
|
+
a,b
|
79
|
+
yes,z
|
80
|
+
no,a
|
81
|
+
no,
|
82
|
+
no,zz
|
83
|
+
no,ZZ
|
84
|
+
yes,Z
|
85
|
+
"""
|
86
|
+
When you pass arguments --value 'b:z,Z'
|
87
|
+
Then you get output
|
88
|
+
"""
|
89
|
+
a,b
|
90
|
+
yes,z
|
91
|
+
yes,Z
|
92
|
+
"""
|
93
|
+
|
94
|
+
Scenario: Less than
|
95
|
+
Given an input csv
|
96
|
+
"""
|
97
|
+
a,b
|
98
|
+
yes,-1
|
99
|
+
yes,-1.0
|
100
|
+
no,0
|
101
|
+
no,0.0
|
102
|
+
no,1
|
103
|
+
no,1.0
|
104
|
+
no,
|
105
|
+
no,z
|
106
|
+
"""
|
107
|
+
When you pass arguments --lt 'b:0'
|
108
|
+
Then you get output
|
109
|
+
"""
|
110
|
+
a,b
|
111
|
+
yes,-1
|
112
|
+
yes,-1.0
|
113
|
+
"""
|
114
|
+
|
115
|
+
Scenario: Less than or equal to
|
116
|
+
Given an input csv
|
117
|
+
"""
|
118
|
+
a,b
|
119
|
+
yes,-1
|
120
|
+
yes,-1.0
|
121
|
+
yes,0
|
122
|
+
yes,0.0
|
123
|
+
no,1
|
124
|
+
no,1.0
|
125
|
+
no,
|
126
|
+
no,z
|
127
|
+
"""
|
128
|
+
When you pass arguments --lte 'b:0'
|
129
|
+
Then you get output
|
130
|
+
"""
|
131
|
+
a,b
|
132
|
+
yes,-1
|
133
|
+
yes,-1.0
|
134
|
+
yes,0
|
135
|
+
yes,0.0
|
136
|
+
"""
|
137
|
+
|
138
|
+
Scenario: Greater than
|
139
|
+
Given an input csv
|
140
|
+
"""
|
141
|
+
a,b
|
142
|
+
no,-1
|
143
|
+
no,-1.0
|
144
|
+
no,0
|
145
|
+
no,0.0
|
146
|
+
yes,1
|
147
|
+
yes,1.0
|
148
|
+
no,
|
149
|
+
no,z
|
150
|
+
"""
|
151
|
+
When you pass arguments --gt 'b:0'
|
152
|
+
Then you get output
|
153
|
+
"""
|
154
|
+
a,b
|
155
|
+
yes,1
|
156
|
+
yes,1.0
|
157
|
+
"""
|
158
|
+
|
159
|
+
Scenario: Greater than or equal to
|
160
|
+
Given an input csv
|
161
|
+
"""
|
162
|
+
a,b
|
163
|
+
no,-1
|
164
|
+
no,-1.0
|
165
|
+
yes,0
|
166
|
+
yes,0.0
|
167
|
+
yes,1
|
168
|
+
yes,1.0
|
169
|
+
no,
|
170
|
+
no,z
|
171
|
+
"""
|
172
|
+
When you pass arguments --gte 'b:0'
|
173
|
+
Then you get output
|
174
|
+
"""
|
175
|
+
a,b
|
176
|
+
yes,0
|
177
|
+
yes,0.0
|
178
|
+
yes,1
|
179
|
+
yes,1.0
|
180
|
+
"""
|
181
|
+
|
182
|
+
Scenario: Field is one of common null/missing values
|
183
|
+
Given an input csv
|
184
|
+
"""
|
185
|
+
a,b
|
186
|
+
yes,x
|
187
|
+
yes,
|
188
|
+
yes," "
|
189
|
+
yes,N/A
|
190
|
+
yes,n/a
|
191
|
+
yes,NULL
|
192
|
+
yes,null
|
193
|
+
yes,-
|
194
|
+
yes,#DIV/0
|
195
|
+
yes,#REF!
|
196
|
+
yes,#NAME?
|
197
|
+
yes,NIL
|
198
|
+
yes,nil
|
199
|
+
yes,NA
|
200
|
+
yes,na
|
201
|
+
yes,#VALUE!
|
202
|
+
yes,#NULL!
|
203
|
+
yes,NaN
|
204
|
+
yes,#N/A
|
205
|
+
yes,#NUM!
|
206
|
+
yes,?
|
207
|
+
yes,z
|
208
|
+
"""
|
209
|
+
When you pass arguments --detect-missing
|
210
|
+
Then you get output
|
211
|
+
"""
|
212
|
+
a,b
|
213
|
+
yes,x
|
214
|
+
yes,
|
215
|
+
yes,
|
216
|
+
yes,
|
217
|
+
yes,
|
218
|
+
yes,
|
219
|
+
yes,
|
220
|
+
yes,
|
221
|
+
yes,
|
222
|
+
yes,
|
223
|
+
yes,
|
224
|
+
yes,
|
225
|
+
yes,
|
226
|
+
yes,
|
227
|
+
yes,
|
228
|
+
yes,
|
229
|
+
yes,
|
230
|
+
yes,
|
231
|
+
yes,
|
232
|
+
yes,
|
233
|
+
yes,
|
234
|
+
yes,z
|
235
|
+
"""
|
236
|
+
|
237
|
+
Scenario: Dup check one field
|
238
|
+
Given an input csv
|
239
|
+
"""
|
240
|
+
a,b
|
241
|
+
yes,x
|
242
|
+
yes,y
|
243
|
+
no,x
|
244
|
+
"""
|
245
|
+
When you pass arguments --dedup b
|
246
|
+
Then you get output
|
247
|
+
"""
|
248
|
+
a,b
|
249
|
+
yes,x
|
250
|
+
yes,y
|
251
|
+
"""
|
252
|
+
|
253
|
+
Scenario: Dup check multiple fields
|
254
|
+
Given an input csv
|
255
|
+
"""
|
256
|
+
a,b,c
|
257
|
+
yes,x,1
|
258
|
+
yes,y
|
259
|
+
yes,x,2
|
260
|
+
no,x,1
|
261
|
+
"""
|
262
|
+
When you pass arguments --dedup b,c
|
263
|
+
Then you get output
|
264
|
+
"""
|
265
|
+
a,b,c
|
266
|
+
yes,x,1
|
267
|
+
yes,y,
|
268
|
+
yes,x,2
|
269
|
+
"""
|
@@ -0,0 +1,54 @@
|
|
1
|
+
Feature: Composition of tests
|
2
|
+
|
3
|
+
Scenario: Present and missing
|
4
|
+
Given an input csv
|
5
|
+
"""
|
6
|
+
a,b,c
|
7
|
+
yes,x,
|
8
|
+
no,,
|
9
|
+
no,x,x
|
10
|
+
yes,z," "
|
11
|
+
no," "," "
|
12
|
+
"""
|
13
|
+
When you pass arguments --present b --missing c
|
14
|
+
Then you get output
|
15
|
+
"""
|
16
|
+
a,b,c
|
17
|
+
yes,x,
|
18
|
+
yes,z,
|
19
|
+
"""
|
20
|
+
|
21
|
+
Scenario: Field is one of common null/missing values and you don't want those
|
22
|
+
Given an input csv
|
23
|
+
"""
|
24
|
+
a,b
|
25
|
+
yes,x
|
26
|
+
no,
|
27
|
+
no," "
|
28
|
+
no,N/A
|
29
|
+
no,n/a
|
30
|
+
no,NULL
|
31
|
+
no,null
|
32
|
+
no,-
|
33
|
+
no,#DIV/0
|
34
|
+
no,#REF!
|
35
|
+
no,#NAME?
|
36
|
+
no,NIL
|
37
|
+
no,nil
|
38
|
+
no,NA
|
39
|
+
no,na
|
40
|
+
no,#VALUE!
|
41
|
+
no,#NULL!
|
42
|
+
no,NaN
|
43
|
+
no,#N/A
|
44
|
+
no,#NUM!
|
45
|
+
no,?
|
46
|
+
yes,z
|
47
|
+
"""
|
48
|
+
When you pass arguments --detect-missing --present b
|
49
|
+
Then you get output
|
50
|
+
"""
|
51
|
+
a,b
|
52
|
+
yes,x
|
53
|
+
yes,z
|
54
|
+
"""
|
@@ -0,0 +1,67 @@
|
|
1
|
+
Feature: Multi files
|
2
|
+
|
3
|
+
Scenario: Same field order
|
4
|
+
Given an input csv
|
5
|
+
"""
|
6
|
+
a,b
|
7
|
+
a1,b1
|
8
|
+
a2,b2
|
9
|
+
"""
|
10
|
+
And an input csv
|
11
|
+
"""
|
12
|
+
a,b
|
13
|
+
a3,b3
|
14
|
+
a4,b4
|
15
|
+
"""
|
16
|
+
Then you get output
|
17
|
+
"""
|
18
|
+
a,b
|
19
|
+
a1,b1
|
20
|
+
a2,b2
|
21
|
+
a3,b3
|
22
|
+
a4,b4
|
23
|
+
"""
|
24
|
+
|
25
|
+
Scenario: Different field order
|
26
|
+
Given an input csv
|
27
|
+
"""
|
28
|
+
a,b
|
29
|
+
a1,b1
|
30
|
+
a2,b2
|
31
|
+
"""
|
32
|
+
And an input csv
|
33
|
+
"""
|
34
|
+
b,a
|
35
|
+
b3,a3
|
36
|
+
b4,a4
|
37
|
+
"""
|
38
|
+
Then you get output
|
39
|
+
"""
|
40
|
+
a,b
|
41
|
+
a1,b1
|
42
|
+
a2,b2
|
43
|
+
a3,b3
|
44
|
+
a4,b4
|
45
|
+
"""
|
46
|
+
|
47
|
+
Scenario: Extra field somewhere (filled in with nulls)
|
48
|
+
Given an input csv
|
49
|
+
"""
|
50
|
+
a,b
|
51
|
+
a1,b1
|
52
|
+
a2,b2
|
53
|
+
"""
|
54
|
+
And an input csv
|
55
|
+
"""
|
56
|
+
b,a,c
|
57
|
+
b3,a3,c3
|
58
|
+
b4,a4,c4
|
59
|
+
"""
|
60
|
+
Then you get output
|
61
|
+
"""
|
62
|
+
a,b,c
|
63
|
+
a1,b1,
|
64
|
+
a2,b2,
|
65
|
+
a3,b3,c3
|
66
|
+
a4,b4,c4
|
67
|
+
"""
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Before do
|
2
|
+
@input_csv_paths = []
|
3
|
+
@args = []
|
4
|
+
end
|
5
|
+
|
6
|
+
Given(/^an input csv$/) do |string|
|
7
|
+
path = File.join(Dir.tmpdir, "#{rand.to_s}.csv")
|
8
|
+
File.write(path, string)
|
9
|
+
@input_csv_paths << path
|
10
|
+
end
|
11
|
+
|
12
|
+
When(/^you pass arguments (.+)$/) do |args|
|
13
|
+
@args << args.strip
|
14
|
+
end
|
15
|
+
|
16
|
+
Then(/^you get output$/) do |expected_output_csv|
|
17
|
+
bin_path = File.expand_path '../../../bin/cvg', __FILE__
|
18
|
+
cmd = "#{bin_path} #{@args.join(' ')} #{@input_csv_paths.join(' ')}"
|
19
|
+
child = POSIX::Spawn::Child.new cmd
|
20
|
+
if child.err.present?
|
21
|
+
$stderr.puts
|
22
|
+
$stderr.puts cmd
|
23
|
+
$stderr.puts child.err
|
24
|
+
$stderr.puts
|
25
|
+
end
|
26
|
+
expect(child.out.strip).to eq(expected_output_csv.strip)
|
27
|
+
@input_csv_paths.each do |path|
|
28
|
+
if File.dirname(File.expand_path(path)).start_with?(Dir.tmpdir)
|
29
|
+
File.unlink path
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/cvg.rb
ADDED
data/lib/cvg/version.rb
ADDED
data/spec/cvg_spec.rb
ADDED
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cvg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Seamus Abshere
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-11-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: to_regexp
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec-expectations
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: posix-spawn
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Like jq or grep for csv. Combine one or more CSVs while filtering on
|
112
|
+
fields with regular expressions, whitelists, presence, missing, etc.
|
113
|
+
email:
|
114
|
+
- seamus@abshere.net
|
115
|
+
executables:
|
116
|
+
- cvg
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- .gitignore
|
121
|
+
- .rspec
|
122
|
+
- .travis.yml
|
123
|
+
- Gemfile
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.md
|
126
|
+
- Rakefile
|
127
|
+
- bin/cvg
|
128
|
+
- cvg.gemspec
|
129
|
+
- features/basic.feature
|
130
|
+
- features/composition.feature
|
131
|
+
- features/multi_files.feature
|
132
|
+
- features/step_definitions/cvg_steps.rb
|
133
|
+
- features/support/env.rb
|
134
|
+
- lib/cvg.rb
|
135
|
+
- lib/cvg/version.rb
|
136
|
+
- spec/cvg_spec.rb
|
137
|
+
- spec/spec_helper.rb
|
138
|
+
homepage: https://github.com/seamusabshere/cvg
|
139
|
+
licenses:
|
140
|
+
- MIT
|
141
|
+
metadata: {}
|
142
|
+
post_install_message:
|
143
|
+
rdoc_options: []
|
144
|
+
require_paths:
|
145
|
+
- lib
|
146
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - ! '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
|
+
requirements:
|
153
|
+
- - ! '>='
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: '0'
|
156
|
+
requirements: []
|
157
|
+
rubyforge_project:
|
158
|
+
rubygems_version: 2.1.5
|
159
|
+
signing_key:
|
160
|
+
specification_version: 4
|
161
|
+
summary: Like jq or grep for csv. Combine one or more CSVs while filtering on fields
|
162
|
+
with regular expressions, whitelists, presence, missing, etc.
|
163
|
+
test_files:
|
164
|
+
- features/basic.feature
|
165
|
+
- features/composition.feature
|
166
|
+
- features/multi_files.feature
|
167
|
+
- features/step_definitions/cvg_steps.rb
|
168
|
+
- features/support/env.rb
|
169
|
+
- spec/cvg_spec.rb
|
170
|
+
- spec/spec_helper.rb
|
171
|
+
has_rdoc:
|