cvg 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +47 -0
- data/Rakefile +6 -0
- data/bin/cvg +251 -0
- data/cvg.gemspec +29 -0
- data/features/basic.feature +269 -0
- data/features/composition.feature +54 -0
- data/features/multi_files.feature +67 -0
- data/features/step_definitions/cvg_steps.rb +32 -0
- data/features/support/env.rb +5 -0
- data/lib/cvg.rb +5 -0
- data/lib/cvg/version.rb +3 -0
- data/spec/cvg_spec.rb +7 -0
- data/spec/spec_helper.rb +2 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NzgyMDY5ZWZiODdhZmI3NTE5MGUwZDdjOTQzYjBmYzg0MWZhNzA1OA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWNlYWNiYzRjYWZjZjQwODc2Y2FjN2ZhZWQzMGQ0OTQ3NGZhYTdiMw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZjE2ZDRjYWU3MGY4MDVjYzBjMTQ4Y2RhZmEyOWVmNjkwNTZhYzgzZjhkYzQx
|
10
|
+
ZTJmYmE4MzE4MzEyNmRhZDIyZTcxNDQzODU0NzIxNGM2ZTBhOWEyM2EyNjAw
|
11
|
+
Yjc3OTVhN2U4NzllYWQ5MTk5NmRkZmFiZmQwMDcxZjM3MGMwYTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OTUxZTA1ODMwM2E0NjFlMjVkYjVlNjljNmUyYzk5ZjUwOWZkODZlYzA2NWVi
|
14
|
+
MTE2NDkzNjExNjFiODdlZGMzYmJlYjRlODA3MmMxMmI3ZmIzMDlkYjJiYzAx
|
15
|
+
MWU0NjI5ZDg1OWIxM2EwNjg5ZTMyNGQ3NGY3YWE5M2IyMzU5MDc=
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Seamus Abshere
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Cvg
|
2
|
+
|
3
|
+
## Examples
|
4
|
+
|
5
|
+
cvg --present colA input.csv
|
6
|
+
|
7
|
+
cvg --missing colA input.csv
|
8
|
+
|
9
|
+
cvg --regex 'colA:/\d+/' input.csv
|
10
|
+
|
11
|
+
cvg --detect-missing input.csv
|
12
|
+
|
13
|
+
cvg --lt 'colA:5' input.colA
|
14
|
+
|
15
|
+
Multiple tests:
|
16
|
+
|
17
|
+
cvg --present colA --missing colB input.csv
|
18
|
+
|
19
|
+
Multi files: (different column order OK)
|
20
|
+
|
21
|
+
cvg --present colA input1.csv input2.csv
|
22
|
+
|
23
|
+
## TODO
|
24
|
+
|
25
|
+
DONE allows special "PRESENT" filter
|
26
|
+
|
27
|
+
DONE takes multiple csvs as input
|
28
|
+
|
29
|
+
DONE allows special "FALSE_OR_BLANK" filter
|
30
|
+
|
31
|
+
DONE combines tests
|
32
|
+
|
33
|
+
DONE allows single string matching
|
34
|
+
|
35
|
+
DONE allows multiple option matching
|
36
|
+
|
37
|
+
DONE allows regex matching
|
38
|
+
|
39
|
+
DONE allows > >= < <= comparisons
|
40
|
+
|
41
|
+
DONE treat as missing %w{ N/A n/a NULL null - #DIV/0 #REF! #NAME? NIL nil NA na #VALUE! #NULL! NaN #N/A #NUM! ? }
|
42
|
+
|
43
|
+
writes report (why rejected row, checks missing and extra columns across input files)
|
44
|
+
|
45
|
+
dup checks on certain cols
|
46
|
+
|
47
|
+
optionall uses minimal set of headers shared by all
|
data/Rakefile
ADDED
data/bin/cvg
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'to_regexp'
|
5
|
+
require 'active_support/core_ext'
|
6
|
+
|
7
|
+
class Cvg
|
8
|
+
class << self
|
9
|
+
def parse(argv)
|
10
|
+
flags = []
|
11
|
+
options = []
|
12
|
+
input_paths = []
|
13
|
+
|
14
|
+
option_k = nil
|
15
|
+
argv.each do |arg|
|
16
|
+
if option_k
|
17
|
+
options << [option_k, arg]
|
18
|
+
option_k = nil
|
19
|
+
elsif FLAGS.has_key?(arg[2..-1])
|
20
|
+
flags << arg[2..-1]
|
21
|
+
elsif arg.start_with?('--')
|
22
|
+
option_k = arg[2..-1]
|
23
|
+
else
|
24
|
+
input_paths << arg
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
[ flags, input_paths, options ]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Test
|
33
|
+
attr_reader :k
|
34
|
+
attr_reader :arg
|
35
|
+
def initialize(k, arg)
|
36
|
+
@k = k
|
37
|
+
@arg = arg
|
38
|
+
end
|
39
|
+
# only used sometimes
|
40
|
+
def col
|
41
|
+
@col ||= arg.split(':', 2)[0]
|
42
|
+
end
|
43
|
+
# only used sometimes
|
44
|
+
def cols
|
45
|
+
@cols ||= CSV.parse_line arg.split(':', 2)[0]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Present < Test
|
50
|
+
class << self
|
51
|
+
def accept(k, arg)
|
52
|
+
new(k, arg) if k == 'present'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
def pass?(row)
|
56
|
+
row.fetch(arg).to_s.present?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class Missing < Test
|
61
|
+
class << self
|
62
|
+
def accept(k, arg)
|
63
|
+
new(k, arg) if k == 'missing'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
def pass?(row)
|
67
|
+
row.fetch(arg).to_s.strip.length == 0
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
class Regex < Test
|
72
|
+
class << self
|
73
|
+
def accept(k, arg)
|
74
|
+
new(k, arg) if k == 'regex'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
def pass?(row)
|
78
|
+
!!(row.fetch(col).to_s =~ regex)
|
79
|
+
end
|
80
|
+
def regex
|
81
|
+
@regex ||= arg.split(':', 2)[1].to_regexp(detect: true) or raise("#{arg.inspect} doesn't have a valid regex")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class Value < Test
|
86
|
+
class << self
|
87
|
+
def accept(k, arg)
|
88
|
+
new(k, arg) if k == 'value'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
def pass?(row)
|
92
|
+
v = row.fetch(col).to_s
|
93
|
+
values.include? v
|
94
|
+
end
|
95
|
+
def values
|
96
|
+
@values ||= CSV.parse_line arg.split(':', 2)[1]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class GreaterOrLesser < Test
|
101
|
+
class << self
|
102
|
+
def accept(k, arg)
|
103
|
+
new(k, arg) if OPERATOR[k]
|
104
|
+
end
|
105
|
+
def numify(v, verify = false)
|
106
|
+
case v
|
107
|
+
when JUST_A_NUMBER
|
108
|
+
v.to_f
|
109
|
+
when NUMBER
|
110
|
+
v.gsub(NUMBER_FLUFF, '').to_f
|
111
|
+
else
|
112
|
+
raise "#{v.inspect} not a number" if verify
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
OPERATOR = {
|
117
|
+
'lt' => :<,
|
118
|
+
'lte' => :<=,
|
119
|
+
'gt' => :>,
|
120
|
+
'gte' => :>=,
|
121
|
+
}
|
122
|
+
NUMBER = /\d/
|
123
|
+
JUST_A_NUMBER = /\A-?[\d.]+\z/
|
124
|
+
NUMBER_FLUFF = /[^\d.eE\+\-]+/
|
125
|
+
def pass?(row)
|
126
|
+
if v = GreaterOrLesser.numify(row.fetch(col))
|
127
|
+
v.send(operator, threshold)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
def operator
|
131
|
+
@operator ||= OPERATOR.fetch(k)
|
132
|
+
end
|
133
|
+
def threshold
|
134
|
+
@threshold ||= GreaterOrLesser.numify arg, true
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class Dedup < Test
|
139
|
+
class << self
|
140
|
+
def accept(k, arg)
|
141
|
+
new(k, arg) if k == 'dedup'
|
142
|
+
end
|
143
|
+
end
|
144
|
+
def registry
|
145
|
+
@registry ||= []
|
146
|
+
end
|
147
|
+
def pass?(row)
|
148
|
+
hash = row.values_at(*cols).hash
|
149
|
+
if registry.include?(hash)
|
150
|
+
false
|
151
|
+
else
|
152
|
+
registry << hash
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
class DetectMissing
|
158
|
+
MISSING = %w{ N/A n/a NULL null - #DIV/0 #REF! #NAME? NIL nil NA na #VALUE! #NULL! NaN #N/A #NUM! ? }
|
159
|
+
def apply!(row)
|
160
|
+
row.each do |k, v|
|
161
|
+
if v.is_a?(::String) and MISSING.include?(v)
|
162
|
+
row[k] = nil
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
TESTS = [Present, Missing, Regex, Value, GreaterOrLesser, Dedup]
|
169
|
+
FLAGS = {
|
170
|
+
'detect-missing' => DetectMissing,
|
171
|
+
}
|
172
|
+
|
173
|
+
attr_reader :options
|
174
|
+
attr_reader :input_paths
|
175
|
+
|
176
|
+
def initialize(argv)
|
177
|
+
@flags, @input_paths, @options = Cvg.parse argv
|
178
|
+
end
|
179
|
+
|
180
|
+
def perform
|
181
|
+
output_headers
|
182
|
+
|
183
|
+
each_input_row do |row|
|
184
|
+
if tests.all? { |t| t.pass?(row) }
|
185
|
+
output_row row
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
close_output
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def each_input_row
|
195
|
+
input_paths.each do |path|
|
196
|
+
CSV.foreach(path, headers: :first_row) do |row|
|
197
|
+
row = row.to_hash
|
198
|
+
flags.each { |flag| flag.apply! row }
|
199
|
+
yield row
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def tests
|
205
|
+
@tests ||= begin
|
206
|
+
memo = []
|
207
|
+
options.each do |k, v|
|
208
|
+
TESTS.each do |test_class|
|
209
|
+
if test = test_class.accept(k, v)
|
210
|
+
memo << test
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
memo
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def flags
|
219
|
+
@_flags ||= begin
|
220
|
+
@flags.map do |flag|
|
221
|
+
FLAGS.fetch(flag).new
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def output_row(row)
|
227
|
+
output_f.puts row.values_at(*headers).to_csv
|
228
|
+
end
|
229
|
+
|
230
|
+
def output_headers
|
231
|
+
output_f.puts headers.to_csv
|
232
|
+
end
|
233
|
+
|
234
|
+
def output_f
|
235
|
+
@output_f ||= $stdout
|
236
|
+
end
|
237
|
+
|
238
|
+
def close_output
|
239
|
+
output_f.close
|
240
|
+
end
|
241
|
+
|
242
|
+
def headers
|
243
|
+
@headers ||= input_paths.map do |path|
|
244
|
+
CSV.parse_line(File.open(path) { |f| f.gets })
|
245
|
+
end.flatten.uniq
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
Cvg.new(ARGV).perform
|
data/cvg.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cvg/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cvg"
|
8
|
+
spec.version = Cvg::VERSION
|
9
|
+
spec.authors = ["Seamus Abshere"]
|
10
|
+
spec.email = ["seamus@abshere.net"]
|
11
|
+
spec.description = %q{Like jq or grep for csv. Combine one or more CSVs while filtering on fields with regular expressions, whitelists, presence, missing, etc.}
|
12
|
+
spec.summary = %q{Like jq or grep for csv. Combine one or more CSVs while filtering on fields with regular expressions, whitelists, presence, missing, etc.}
|
13
|
+
spec.homepage = "https://github.com/seamusabshere/cvg"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "activesupport"
|
22
|
+
spec.add_runtime_dependency "to_regexp"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "rspec-expectations"
|
28
|
+
spec.add_development_dependency "posix-spawn"
|
29
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
Feature: Basic functionality
|
2
|
+
|
3
|
+
Scenario: Field is present
|
4
|
+
Given an input csv
|
5
|
+
"""
|
6
|
+
a,b
|
7
|
+
yes,x
|
8
|
+
no,
|
9
|
+
yes,_
|
10
|
+
no," "
|
11
|
+
"""
|
12
|
+
When you pass arguments --present b
|
13
|
+
Then you get output
|
14
|
+
"""
|
15
|
+
a,b
|
16
|
+
yes,x
|
17
|
+
yes,_
|
18
|
+
"""
|
19
|
+
|
20
|
+
Scenario: Field is missing
|
21
|
+
Given an input csv
|
22
|
+
"""
|
23
|
+
a,b
|
24
|
+
no,x
|
25
|
+
yes,
|
26
|
+
no,_
|
27
|
+
yes," "
|
28
|
+
"""
|
29
|
+
When you pass arguments --missing b
|
30
|
+
Then you get output
|
31
|
+
"""
|
32
|
+
a,b
|
33
|
+
yes,
|
34
|
+
yes,
|
35
|
+
"""
|
36
|
+
|
37
|
+
Scenario: Field matches regex
|
38
|
+
Given an input csv
|
39
|
+
"""
|
40
|
+
a,b
|
41
|
+
yes,1
|
42
|
+
no,
|
43
|
+
yes,11
|
44
|
+
no,a
|
45
|
+
yes,a1
|
46
|
+
yes,1a
|
47
|
+
"""
|
48
|
+
When you pass arguments --regex 'b:/\d+/'
|
49
|
+
Then you get output
|
50
|
+
"""
|
51
|
+
a,b
|
52
|
+
yes,1
|
53
|
+
yes,11
|
54
|
+
yes,a1
|
55
|
+
yes,1a
|
56
|
+
"""
|
57
|
+
|
58
|
+
Scenario: Field is value
|
59
|
+
Given an input csv
|
60
|
+
"""
|
61
|
+
a,b
|
62
|
+
yes,z
|
63
|
+
no,a
|
64
|
+
no,
|
65
|
+
no,zz
|
66
|
+
no,ZZ
|
67
|
+
"""
|
68
|
+
When you pass arguments --value 'b:z'
|
69
|
+
Then you get output
|
70
|
+
"""
|
71
|
+
a,b
|
72
|
+
yes,z
|
73
|
+
"""
|
74
|
+
|
75
|
+
Scenario: Field one of values
|
76
|
+
Given an input csv
|
77
|
+
"""
|
78
|
+
a,b
|
79
|
+
yes,z
|
80
|
+
no,a
|
81
|
+
no,
|
82
|
+
no,zz
|
83
|
+
no,ZZ
|
84
|
+
yes,Z
|
85
|
+
"""
|
86
|
+
When you pass arguments --value 'b:z,Z'
|
87
|
+
Then you get output
|
88
|
+
"""
|
89
|
+
a,b
|
90
|
+
yes,z
|
91
|
+
yes,Z
|
92
|
+
"""
|
93
|
+
|
94
|
+
Scenario: Less than
|
95
|
+
Given an input csv
|
96
|
+
"""
|
97
|
+
a,b
|
98
|
+
yes,-1
|
99
|
+
yes,-1.0
|
100
|
+
no,0
|
101
|
+
no,0.0
|
102
|
+
no,1
|
103
|
+
no,1.0
|
104
|
+
no,
|
105
|
+
no,z
|
106
|
+
"""
|
107
|
+
When you pass arguments --lt 'b:0'
|
108
|
+
Then you get output
|
109
|
+
"""
|
110
|
+
a,b
|
111
|
+
yes,-1
|
112
|
+
yes,-1.0
|
113
|
+
"""
|
114
|
+
|
115
|
+
Scenario: Less than or equal to
|
116
|
+
Given an input csv
|
117
|
+
"""
|
118
|
+
a,b
|
119
|
+
yes,-1
|
120
|
+
yes,-1.0
|
121
|
+
yes,0
|
122
|
+
yes,0.0
|
123
|
+
no,1
|
124
|
+
no,1.0
|
125
|
+
no,
|
126
|
+
no,z
|
127
|
+
"""
|
128
|
+
When you pass arguments --lte 'b:0'
|
129
|
+
Then you get output
|
130
|
+
"""
|
131
|
+
a,b
|
132
|
+
yes,-1
|
133
|
+
yes,-1.0
|
134
|
+
yes,0
|
135
|
+
yes,0.0
|
136
|
+
"""
|
137
|
+
|
138
|
+
Scenario: Greater than
|
139
|
+
Given an input csv
|
140
|
+
"""
|
141
|
+
a,b
|
142
|
+
no,-1
|
143
|
+
no,-1.0
|
144
|
+
no,0
|
145
|
+
no,0.0
|
146
|
+
yes,1
|
147
|
+
yes,1.0
|
148
|
+
no,
|
149
|
+
no,z
|
150
|
+
"""
|
151
|
+
When you pass arguments --gt 'b:0'
|
152
|
+
Then you get output
|
153
|
+
"""
|
154
|
+
a,b
|
155
|
+
yes,1
|
156
|
+
yes,1.0
|
157
|
+
"""
|
158
|
+
|
159
|
+
Scenario: Greater than or equal to
|
160
|
+
Given an input csv
|
161
|
+
"""
|
162
|
+
a,b
|
163
|
+
no,-1
|
164
|
+
no,-1.0
|
165
|
+
yes,0
|
166
|
+
yes,0.0
|
167
|
+
yes,1
|
168
|
+
yes,1.0
|
169
|
+
no,
|
170
|
+
no,z
|
171
|
+
"""
|
172
|
+
When you pass arguments --gte 'b:0'
|
173
|
+
Then you get output
|
174
|
+
"""
|
175
|
+
a,b
|
176
|
+
yes,0
|
177
|
+
yes,0.0
|
178
|
+
yes,1
|
179
|
+
yes,1.0
|
180
|
+
"""
|
181
|
+
|
182
|
+
Scenario: Field is one of common null/missing values
|
183
|
+
Given an input csv
|
184
|
+
"""
|
185
|
+
a,b
|
186
|
+
yes,x
|
187
|
+
yes,
|
188
|
+
yes," "
|
189
|
+
yes,N/A
|
190
|
+
yes,n/a
|
191
|
+
yes,NULL
|
192
|
+
yes,null
|
193
|
+
yes,-
|
194
|
+
yes,#DIV/0
|
195
|
+
yes,#REF!
|
196
|
+
yes,#NAME?
|
197
|
+
yes,NIL
|
198
|
+
yes,nil
|
199
|
+
yes,NA
|
200
|
+
yes,na
|
201
|
+
yes,#VALUE!
|
202
|
+
yes,#NULL!
|
203
|
+
yes,NaN
|
204
|
+
yes,#N/A
|
205
|
+
yes,#NUM!
|
206
|
+
yes,?
|
207
|
+
yes,z
|
208
|
+
"""
|
209
|
+
When you pass arguments --detect-missing
|
210
|
+
Then you get output
|
211
|
+
"""
|
212
|
+
a,b
|
213
|
+
yes,x
|
214
|
+
yes,
|
215
|
+
yes,
|
216
|
+
yes,
|
217
|
+
yes,
|
218
|
+
yes,
|
219
|
+
yes,
|
220
|
+
yes,
|
221
|
+
yes,
|
222
|
+
yes,
|
223
|
+
yes,
|
224
|
+
yes,
|
225
|
+
yes,
|
226
|
+
yes,
|
227
|
+
yes,
|
228
|
+
yes,
|
229
|
+
yes,
|
230
|
+
yes,
|
231
|
+
yes,
|
232
|
+
yes,
|
233
|
+
yes,
|
234
|
+
yes,z
|
235
|
+
"""
|
236
|
+
|
237
|
+
Scenario: Dup check one field
|
238
|
+
Given an input csv
|
239
|
+
"""
|
240
|
+
a,b
|
241
|
+
yes,x
|
242
|
+
yes,y
|
243
|
+
no,x
|
244
|
+
"""
|
245
|
+
When you pass arguments --dedup b
|
246
|
+
Then you get output
|
247
|
+
"""
|
248
|
+
a,b
|
249
|
+
yes,x
|
250
|
+
yes,y
|
251
|
+
"""
|
252
|
+
|
253
|
+
Scenario: Dup check multiple fields
|
254
|
+
Given an input csv
|
255
|
+
"""
|
256
|
+
a,b,c
|
257
|
+
yes,x,1
|
258
|
+
yes,y
|
259
|
+
yes,x,2
|
260
|
+
no,x,1
|
261
|
+
"""
|
262
|
+
When you pass arguments --dedup b,c
|
263
|
+
Then you get output
|
264
|
+
"""
|
265
|
+
a,b,c
|
266
|
+
yes,x,1
|
267
|
+
yes,y,
|
268
|
+
yes,x,2
|
269
|
+
"""
|
@@ -0,0 +1,54 @@
|
|
1
|
+
Feature: Composition of tests
|
2
|
+
|
3
|
+
Scenario: Present and missing
|
4
|
+
Given an input csv
|
5
|
+
"""
|
6
|
+
a,b,c
|
7
|
+
yes,x,
|
8
|
+
no,,
|
9
|
+
no,x,x
|
10
|
+
yes,z," "
|
11
|
+
no," "," "
|
12
|
+
"""
|
13
|
+
When you pass arguments --present b --missing c
|
14
|
+
Then you get output
|
15
|
+
"""
|
16
|
+
a,b,c
|
17
|
+
yes,x,
|
18
|
+
yes,z,
|
19
|
+
"""
|
20
|
+
|
21
|
+
Scenario: Field is one of common null/missing values and you don't want those
|
22
|
+
Given an input csv
|
23
|
+
"""
|
24
|
+
a,b
|
25
|
+
yes,x
|
26
|
+
no,
|
27
|
+
no," "
|
28
|
+
no,N/A
|
29
|
+
no,n/a
|
30
|
+
no,NULL
|
31
|
+
no,null
|
32
|
+
no,-
|
33
|
+
no,#DIV/0
|
34
|
+
no,#REF!
|
35
|
+
no,#NAME?
|
36
|
+
no,NIL
|
37
|
+
no,nil
|
38
|
+
no,NA
|
39
|
+
no,na
|
40
|
+
no,#VALUE!
|
41
|
+
no,#NULL!
|
42
|
+
no,NaN
|
43
|
+
no,#N/A
|
44
|
+
no,#NUM!
|
45
|
+
no,?
|
46
|
+
yes,z
|
47
|
+
"""
|
48
|
+
When you pass arguments --detect-missing --present b
|
49
|
+
Then you get output
|
50
|
+
"""
|
51
|
+
a,b
|
52
|
+
yes,x
|
53
|
+
yes,z
|
54
|
+
"""
|
@@ -0,0 +1,67 @@
|
|
1
|
+
Feature: Multi files
|
2
|
+
|
3
|
+
Scenario: Same field order
|
4
|
+
Given an input csv
|
5
|
+
"""
|
6
|
+
a,b
|
7
|
+
a1,b1
|
8
|
+
a2,b2
|
9
|
+
"""
|
10
|
+
And an input csv
|
11
|
+
"""
|
12
|
+
a,b
|
13
|
+
a3,b3
|
14
|
+
a4,b4
|
15
|
+
"""
|
16
|
+
Then you get output
|
17
|
+
"""
|
18
|
+
a,b
|
19
|
+
a1,b1
|
20
|
+
a2,b2
|
21
|
+
a3,b3
|
22
|
+
a4,b4
|
23
|
+
"""
|
24
|
+
|
25
|
+
Scenario: Different field order
|
26
|
+
Given an input csv
|
27
|
+
"""
|
28
|
+
a,b
|
29
|
+
a1,b1
|
30
|
+
a2,b2
|
31
|
+
"""
|
32
|
+
And an input csv
|
33
|
+
"""
|
34
|
+
b,a
|
35
|
+
b3,a3
|
36
|
+
b4,a4
|
37
|
+
"""
|
38
|
+
Then you get output
|
39
|
+
"""
|
40
|
+
a,b
|
41
|
+
a1,b1
|
42
|
+
a2,b2
|
43
|
+
a3,b3
|
44
|
+
a4,b4
|
45
|
+
"""
|
46
|
+
|
47
|
+
Scenario: Extra field somewhere (filled in with nulls)
|
48
|
+
Given an input csv
|
49
|
+
"""
|
50
|
+
a,b
|
51
|
+
a1,b1
|
52
|
+
a2,b2
|
53
|
+
"""
|
54
|
+
And an input csv
|
55
|
+
"""
|
56
|
+
b,a,c
|
57
|
+
b3,a3,c3
|
58
|
+
b4,a4,c4
|
59
|
+
"""
|
60
|
+
Then you get output
|
61
|
+
"""
|
62
|
+
a,b,c
|
63
|
+
a1,b1,
|
64
|
+
a2,b2,
|
65
|
+
a3,b3,c3
|
66
|
+
a4,b4,c4
|
67
|
+
"""
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Before do
|
2
|
+
@input_csv_paths = []
|
3
|
+
@args = []
|
4
|
+
end
|
5
|
+
|
6
|
+
Given(/^an input csv$/) do |string|
|
7
|
+
path = File.join(Dir.tmpdir, "#{rand.to_s}.csv")
|
8
|
+
File.write(path, string)
|
9
|
+
@input_csv_paths << path
|
10
|
+
end
|
11
|
+
|
12
|
+
When(/^you pass arguments (.+)$/) do |args|
|
13
|
+
@args << args.strip
|
14
|
+
end
|
15
|
+
|
16
|
+
Then(/^you get output$/) do |expected_output_csv|
|
17
|
+
bin_path = File.expand_path '../../../bin/cvg', __FILE__
|
18
|
+
cmd = "#{bin_path} #{@args.join(' ')} #{@input_csv_paths.join(' ')}"
|
19
|
+
child = POSIX::Spawn::Child.new cmd
|
20
|
+
if child.err.present?
|
21
|
+
$stderr.puts
|
22
|
+
$stderr.puts cmd
|
23
|
+
$stderr.puts child.err
|
24
|
+
$stderr.puts
|
25
|
+
end
|
26
|
+
expect(child.out.strip).to eq(expected_output_csv.strip)
|
27
|
+
@input_csv_paths.each do |path|
|
28
|
+
if File.dirname(File.expand_path(path)).start_with?(Dir.tmpdir)
|
29
|
+
File.unlink path
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/cvg.rb
ADDED
data/lib/cvg/version.rb
ADDED
data/spec/cvg_spec.rb
ADDED
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cvg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Seamus Abshere
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-11-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: to_regexp
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec-expectations
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: posix-spawn
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Like jq or grep for csv. Combine one or more CSVs while filtering on
|
112
|
+
fields with regular expressions, whitelists, presence, missing, etc.
|
113
|
+
email:
|
114
|
+
- seamus@abshere.net
|
115
|
+
executables:
|
116
|
+
- cvg
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- .gitignore
|
121
|
+
- .rspec
|
122
|
+
- .travis.yml
|
123
|
+
- Gemfile
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.md
|
126
|
+
- Rakefile
|
127
|
+
- bin/cvg
|
128
|
+
- cvg.gemspec
|
129
|
+
- features/basic.feature
|
130
|
+
- features/composition.feature
|
131
|
+
- features/multi_files.feature
|
132
|
+
- features/step_definitions/cvg_steps.rb
|
133
|
+
- features/support/env.rb
|
134
|
+
- lib/cvg.rb
|
135
|
+
- lib/cvg/version.rb
|
136
|
+
- spec/cvg_spec.rb
|
137
|
+
- spec/spec_helper.rb
|
138
|
+
homepage: https://github.com/seamusabshere/cvg
|
139
|
+
licenses:
|
140
|
+
- MIT
|
141
|
+
metadata: {}
|
142
|
+
post_install_message:
|
143
|
+
rdoc_options: []
|
144
|
+
require_paths:
|
145
|
+
- lib
|
146
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - ! '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
|
+
requirements:
|
153
|
+
- - ! '>='
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: '0'
|
156
|
+
requirements: []
|
157
|
+
rubyforge_project:
|
158
|
+
rubygems_version: 2.1.5
|
159
|
+
signing_key:
|
160
|
+
specification_version: 4
|
161
|
+
summary: Like jq or grep for csv. Combine one or more CSVs while filtering on fields
|
162
|
+
with regular expressions, whitelists, presence, missing, etc.
|
163
|
+
test_files:
|
164
|
+
- features/basic.feature
|
165
|
+
- features/composition.feature
|
166
|
+
- features/multi_files.feature
|
167
|
+
- features/step_definitions/cvg_steps.rb
|
168
|
+
- features/support/env.rb
|
169
|
+
- spec/cvg_spec.rb
|
170
|
+
- spec/spec_helper.rb
|
171
|
+
has_rdoc:
|