csv-probe 0.1.0 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +15 -0
- data/README.md +13 -6
- data/csv-probe.gemspec +3 -1
- data/lib/csv/probe/checks.rb +47 -17
- data/lib/csv/probe/version.rb +1 -1
- data/lib/csv/probe.rb +3 -3
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2425404adecef9f68fced84863e0acb087f3358935032c3849cab690948e74dc
|
4
|
+
data.tar.gz: 0bf460f9f27439cdcba3150c2f53991ab07512e6e1ffb5195315fdad5689c549
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 721e33de3a86d0513dcaab1d0d102454fbcf48492dd6d551ec1f5623de6c0b7ba74189ea7d6ae8825eb34674b2679f251e50c08e87fabf1ae18a993cdc72eaff
|
7
|
+
data.tar.gz: 042b6b18255ddbb787c7318febbc31147e4e593cbedf67047bc25c962491a9a46b3e664841a18ea3b8bfb25ef4025455391a4c7571dbf1a37072bb75c64967f8
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.1.4] - 2022-01-05
|
4
|
+
- Fix `nil` handling for list + set column checks when the list or set is something like this:
|
5
|
+
- `a,|b|,b` leading and trailing empty items or
|
6
|
+
- `a,,b` the whole list/set string is nil
|
7
|
+
|
8
|
+
## [0.1.3] - 2022-01-04
|
9
|
+
- Add better `nil` handling to list + set column checks. `nil` resp. `""` does no longer implicitly pass ColumnIsListWithDomain and ColumnIsSetWithDomain checks. `nil` or `""` has to be in the expected items in order to pass the check now.
|
10
|
+
|
11
|
+
## [0.1.2] - 2022-01-03
|
12
|
+
- Fix README.md
|
13
|
+
|
14
|
+
## [0.1.1] - 2022-01-02
|
15
|
+
- Fix rubocop warnings
|
16
|
+
- Improve README.md
|
17
|
+
|
3
18
|
## [0.1.0] - 2022-01-02
|
4
19
|
|
5
20
|
- Initial release
|
data/README.md
CHANGED
@@ -23,7 +23,10 @@ Or install it yourself as:
|
|
23
23
|
|
24
24
|
Example how to use Probe
|
25
25
|
|
26
|
-
```
|
26
|
+
```ruby
|
27
|
+
# load csv-probe gem
|
28
|
+
require 'csv/probe'
|
29
|
+
|
27
30
|
# load CSV into CSV::Table
|
28
31
|
csv_table = CSV.parse(<<~ROWS, headers: true)
|
29
32
|
col1,col2,col3,col4,col5,col6,col7
|
@@ -31,13 +34,17 @@ Example how to use Probe
|
|
31
34
|
0,PA,customer,2,03-12-2021,SHA;AN;RU,badger|ant
|
32
35
|
0,TE,guest,1000,06-11-2021,RU,spider|racoon|ant
|
33
36
|
ROWS
|
37
|
+
|
38
|
+
# define linting rules
|
34
39
|
checks = [Probe::ColumnIsEqualTo.new("col1", "0"),
|
35
40
|
Probe::ColumnMatchesRegEx.new("col2", /^(NU|PA|TE)$/),
|
36
|
-
Probe::ColumnIsOneOf.new("col3", [
|
37
|
-
Probe::ColumnMeetsCondition.new("col4",
|
41
|
+
Probe::ColumnIsOneOf.new("col3", %w[customer guest]),
|
42
|
+
Probe::ColumnMeetsCondition.new("col4", lambda { |val, _opts|
|
43
|
+
!Integer(val, exception: false).nil?
|
44
|
+
}, "Not an Integer"),
|
38
45
|
Probe::ColumnIsDate.new("col5", "%d-%m-%Y"),
|
39
|
-
Probe::ColumnIsListWithDomain.new("col6", [
|
40
|
-
Probe::ColumnIsSetWithDomain.new("col7", [
|
46
|
+
Probe::ColumnIsListWithDomain.new("col6", %w[FO RU SHA AN], ";"),
|
47
|
+
Probe::ColumnIsSetWithDomain.new("col7", %w[eagle hawk badger spider racoon ant], "|")]
|
41
48
|
|
42
49
|
# lint table
|
43
50
|
csv_table.lint(checks)
|
@@ -54,7 +61,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
54
61
|
|
55
62
|
## Contributing
|
56
63
|
|
57
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
64
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/homebase-dev/csv-probe.
|
58
65
|
|
59
66
|
## License
|
60
67
|
|
data/csv-probe.gemspec
CHANGED
@@ -9,12 +9,14 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["homebase.dev@gmail.com"]
|
10
10
|
|
11
11
|
spec.summary = "Allows validation of CSV::Table or CSV::Rows via custom rules"
|
12
|
-
spec.description = "This gem provides a simple framework for CSV::Table/CSV::Row
|
12
|
+
spec.description = "This gem provides a simple framework for CSV::Table/CSV::Row linting"\
|
13
|
+
"using custom rules for columns and rows"
|
13
14
|
spec.homepage = "https://gitlab.com/homebase-dev/csv-probe"
|
14
15
|
spec.license = "MIT"
|
15
16
|
spec.required_ruby_version = ">= 2.6.0"
|
16
17
|
|
17
18
|
# spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
|
19
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
18
20
|
|
19
21
|
spec.metadata["homepage_uri"] = spec.homepage
|
20
22
|
spec.metadata["source_code_uri"] = "https://gitlab.com/homebase-dev/csv-probe"
|
data/lib/csv/probe/checks.rb
CHANGED
@@ -5,30 +5,35 @@ module Probe
|
|
5
5
|
|
6
6
|
class LintingError < Error; end
|
7
7
|
|
8
|
+
# Linting error in CSV::Row, if the rule applies to multiple columns of a row
|
8
9
|
class RowError < LintingError
|
9
10
|
def initialize(msg = "CheckRowError")
|
10
11
|
super
|
11
12
|
end
|
12
13
|
end
|
13
14
|
|
15
|
+
# Linting warning in CSV::Row, if the rule applies to multiple columns of a row
|
14
16
|
class RowWarning < LintingError
|
15
17
|
def initialize(msg = "CheckRowWarning")
|
16
18
|
super
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
22
|
+
# Linting error in CSV::Column, if the rule applies to a single column of a row
|
20
23
|
class ColumnError < LintingError
|
21
24
|
def initialize(msg = "CheckColumnError")
|
22
25
|
super
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
29
|
+
# Linting warning in CSV::Column, if the rule applies to a single column of a row
|
26
30
|
class ColumnWarning < LintingError
|
27
31
|
def initialize(msg = "CheckColumnWarning")
|
28
32
|
super
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
36
|
+
# Abstract class of a CSV::Row check
|
32
37
|
class RowMeetsCondition
|
33
38
|
attr_accessor :ok_condition_fn, :fail_msg, :severity, :pre_checks
|
34
39
|
|
@@ -102,6 +107,7 @@ module Probe
|
|
102
107
|
end
|
103
108
|
end
|
104
109
|
|
110
|
+
# Abstract class of a CSV::Row single field (= column) check
|
105
111
|
class ColumnMeetsCondition < RowMeetsCondition
|
106
112
|
attr_accessor :varname, :ok_condition_fn, :fail_msg
|
107
113
|
|
@@ -120,10 +126,14 @@ module Probe
|
|
120
126
|
|
121
127
|
def evaluate(row, opts = {})
|
122
128
|
evaluate_pre_checks(row, opts)
|
123
|
-
|
129
|
+
|
130
|
+
unless @ok_condition_fn.call(row.fetch(@varname), opts) # rubocop:disable Style/GuardClause
|
131
|
+
raise @error_classes.fetch(@severity), error_msg(row, opts)
|
132
|
+
end
|
124
133
|
end
|
125
134
|
end
|
126
135
|
|
136
|
+
# Check if a column value is equal to a provided value (Pay attention to types!)
|
127
137
|
class ColumnIsEqualTo < ColumnMeetsCondition
|
128
138
|
def initialize(varname, expected_val, _placeholder = nil)
|
129
139
|
super(varname, nil, nil)
|
@@ -132,6 +142,7 @@ module Probe
|
|
132
142
|
end
|
133
143
|
end
|
134
144
|
|
145
|
+
# Check if a column value is one of a list of provided value
|
135
146
|
class ColumnIsOneOf < ColumnMeetsCondition
|
136
147
|
def initialize(varname, expected_vals_arr, _placeholder = nil)
|
137
148
|
super(varname, nil, nil)
|
@@ -140,8 +151,9 @@ module Probe
|
|
140
151
|
end
|
141
152
|
end
|
142
153
|
|
154
|
+
# Check if a column value is a date with a given format
|
143
155
|
class ColumnIsDate < ColumnMeetsCondition
|
144
|
-
def initialize(varname, expected_date_format, _placeholder = nil)
|
156
|
+
def initialize(varname, expected_date_format, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
145
157
|
super(varname, nil, nil)
|
146
158
|
@ok_condition_fn = lambda { |val, _cfg|
|
147
159
|
success = true
|
@@ -159,11 +171,13 @@ module Probe
|
|
159
171
|
# class ColumnIsInteger < ColumnMeetsCondition
|
160
172
|
# def initialize(varname, placeholder1=nil, placeholder=nil)
|
161
173
|
# super(varname, nil, nil)
|
162
|
-
# @ok_condition_fn = ->(val, cfg){ val.is_a? Integer }
|
174
|
+
# @ok_condition_fn = ->(val, cfg){ val.is_a? Integer }
|
175
|
+
# # TODO: ATTENTION 12abc -> integer, also 1.1 -> 1 ! better do it over regex?
|
163
176
|
# @fail_msg = "expected to be an Integer"
|
164
177
|
# end
|
165
178
|
# end
|
166
179
|
|
180
|
+
# Check if a column value is a date with a given format
|
167
181
|
class ColumnMatchesRegEx < ColumnMeetsCondition
|
168
182
|
def initialize(varname, expected_regex_pattern, _placeholder = nil)
|
169
183
|
super(varname, nil, nil)
|
@@ -172,56 +186,72 @@ module Probe
|
|
172
186
|
end
|
173
187
|
end
|
174
188
|
|
175
|
-
#
|
189
|
+
# Check if a tokenized column value is a list of given values
|
176
190
|
class ColumnIsListWithDomain < ColumnMeetsCondition
|
177
|
-
def initialize(varname, expected_items_arr, separator, _placeholder = nil)
|
191
|
+
def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
178
192
|
super(varname, nil, nil)
|
179
193
|
@ok_condition_fn = lambda { |val, _cfg|
|
180
|
-
|
194
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
195
|
+
|
196
|
+
items = val.to_s.split(separator, -1)
|
197
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
198
|
+
|
199
|
+
return false if items.empty?
|
200
|
+
|
181
201
|
return items.all? { |e| expected_items_arr.include?(e) }
|
182
202
|
}
|
183
203
|
@fail_msg = lambda { |row, _opts|
|
184
|
-
items = row.fetch(@varname).split(separator)
|
204
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
185
205
|
diff_items = items - expected_items_arr
|
186
|
-
"expected that tokenized items of value #{items.inspect} are a subset of
|
206
|
+
"expected that tokenized items of value #{items.inspect} are a subset of "\
|
207
|
+
"#{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
|
187
208
|
}
|
188
209
|
end
|
189
210
|
end
|
190
211
|
|
212
|
+
# Check if a tokenized column value is a list of given values
|
191
213
|
class ColumnIsSet < ColumnMeetsCondition
|
192
|
-
def initialize(varname, separator, _placeholder = nil)
|
214
|
+
def initialize(varname, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
193
215
|
super(varname, nil, nil)
|
194
216
|
@ok_condition_fn = lambda { |val, _cfg|
|
195
|
-
return true if val.to_s ==
|
217
|
+
return true if val.to_s == ''
|
196
218
|
|
197
|
-
items = val.split(separator)
|
219
|
+
items = val.to_s.split(separator, -1)
|
198
220
|
all_uniq = (items.size == items.uniq.size)
|
199
221
|
return all_uniq
|
200
222
|
}
|
201
223
|
@fail_msg = lambda { |row, _opts|
|
202
|
-
items = row.fetch(@varname).split(separator)
|
224
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
203
225
|
non_uniq_items = items.detect { |e| items.count(e) > 1 }
|
204
226
|
"expected that items of tokenized value #{items.inspect} are uniqe, but items #{non_uniq_items.inspect} are not"
|
205
227
|
}
|
206
228
|
end
|
207
229
|
end
|
208
230
|
|
231
|
+
# Check if a tokenized column value is a set of given values (no duplicates)
|
209
232
|
class ColumnIsSetWithDomain < ColumnMeetsCondition
|
210
|
-
def initialize(varname, expected_items_arr, separator, _placeholder = nil)
|
233
|
+
def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
211
234
|
super(varname, nil, nil)
|
212
235
|
@pre_checks << ColumnIsSet.new(varname, separator)
|
213
236
|
@ok_condition_fn = lambda { |val, _cfg|
|
214
|
-
|
237
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
238
|
+
|
239
|
+
items = val.to_s.split(separator, -1)
|
240
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
241
|
+
|
242
|
+
return false if items.empty?
|
215
243
|
|
216
|
-
items = val.split(separator)
|
217
244
|
all_valid = items.all? { |i| expected_items_arr.include?(i) }
|
218
245
|
return all_valid
|
219
246
|
}
|
220
247
|
@fail_msg = lambda { |row, _opts|
|
221
|
-
items = row.fetch(@varname).split(separator)
|
248
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
222
249
|
"expected that items of tokenized value #{items.inspect} are a subset of #{expected_items_arr.inspect}"
|
223
250
|
}
|
224
|
-
# @fail_msg =
|
251
|
+
# @fail_msg = lambda {|row, opts|
|
252
|
+
# "Unexpected value:#{row.fetch(@varname).inspect} for column:#{@varname.inspect},
|
253
|
+
# expected that items of tokenized value #{row.fetch(@varname).split(separator)}
|
254
|
+
# are uniqe and a subset of #{expected_items_arr.inspect}" }
|
225
255
|
end
|
226
256
|
end
|
227
257
|
|
data/lib/csv/probe/version.rb
CHANGED
data/lib/csv/probe.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
3
|
+
require "csv" # TODO: should i require it here?
|
4
4
|
require "terminal-table"
|
5
5
|
require_relative "probe/version"
|
6
6
|
require_relative "probe/checks"
|
@@ -43,7 +43,7 @@ module Probe
|
|
43
43
|
end
|
44
44
|
|
45
45
|
# Extend CSV::Table with .lint(...) method
|
46
|
-
class CSV::Table
|
46
|
+
class CSV::Table # rubocop:disable Style/ClassAndModuleChildren
|
47
47
|
def lint(checks, opts = {})
|
48
48
|
opts[:headers] = true unless opts.key?(:headers) # CSV::Table has always headers, hence set :headers = true
|
49
49
|
Probe.lint_rows(self, checks, opts)
|
@@ -51,7 +51,7 @@ class CSV::Table
|
|
51
51
|
end
|
52
52
|
|
53
53
|
# Extend CSV::Row with .lint(...) method
|
54
|
-
class CSV::Row
|
54
|
+
class CSV::Row # rubocop:disable Style/ClassAndModuleChildren
|
55
55
|
def lint(checks, opts = {})
|
56
56
|
Probe.lint_row(self, checks, opts)
|
57
57
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-probe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- homebase.dev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-01-
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|
@@ -38,8 +38,8 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.0'
|
41
|
-
description: This gem provides a simple framework for CSV::Table/CSV::Row
|
42
|
-
|
41
|
+
description: This gem provides a simple framework for CSV::Table/CSV::Row lintingusing
|
42
|
+
custom rules for columns and rows
|
43
43
|
email:
|
44
44
|
- homebase.dev@gmail.com
|
45
45
|
executables: []
|
@@ -64,6 +64,7 @@ homepage: https://gitlab.com/homebase-dev/csv-probe
|
|
64
64
|
licenses:
|
65
65
|
- MIT
|
66
66
|
metadata:
|
67
|
+
rubygems_mfa_required: 'true'
|
67
68
|
homepage_uri: https://gitlab.com/homebase-dev/csv-probe
|
68
69
|
source_code_uri: https://gitlab.com/homebase-dev/csv-probe
|
69
70
|
changelog_uri: https://gitlab.com/homebase-dev/csv-probe
|