csv-probe 0.1.0 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +15 -0
- data/README.md +13 -6
- data/csv-probe.gemspec +3 -1
- data/lib/csv/probe/checks.rb +47 -17
- data/lib/csv/probe/version.rb +1 -1
- data/lib/csv/probe.rb +3 -3
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2425404adecef9f68fced84863e0acb087f3358935032c3849cab690948e74dc
|
4
|
+
data.tar.gz: 0bf460f9f27439cdcba3150c2f53991ab07512e6e1ffb5195315fdad5689c549
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 721e33de3a86d0513dcaab1d0d102454fbcf48492dd6d551ec1f5623de6c0b7ba74189ea7d6ae8825eb34674b2679f251e50c08e87fabf1ae18a993cdc72eaff
|
7
|
+
data.tar.gz: 042b6b18255ddbb787c7318febbc31147e4e593cbedf67047bc25c962491a9a46b3e664841a18ea3b8bfb25ef4025455391a4c7571dbf1a37072bb75c64967f8
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.1.4] - 2022-01-05
|
4
|
+
- Fix `nil` handling for list + set column checks when the list or set is something like this:
|
5
|
+
- `a,|b|,b` leading and trailing empty items or
|
6
|
+
- `a,,b` the whole list/set string is nil
|
7
|
+
|
8
|
+
## [0.1.3] - 2022-01-04
|
9
|
+
- Add better `nil` handling to list + set column checks. `nil` resp. `""` does no longer implicitly pass ColumnIsListWithDomain and ColumnIsSetWithDomain checks. `nil` or `""` has to be in the expected items in order to pass the check now.
|
10
|
+
|
11
|
+
## [0.1.2] - 2022-01-03
|
12
|
+
- Fix README.md
|
13
|
+
|
14
|
+
## [0.1.1] - 2022-01-02
|
15
|
+
- Fix rubocop warnings
|
16
|
+
- Improve README.md
|
17
|
+
|
3
18
|
## [0.1.0] - 2022-01-02
|
4
19
|
|
5
20
|
- Initial release
|
data/README.md
CHANGED
@@ -23,7 +23,10 @@ Or install it yourself as:
|
|
23
23
|
|
24
24
|
Example how to use Probe
|
25
25
|
|
26
|
-
```
|
26
|
+
```ruby
|
27
|
+
# load csv-probe gem
|
28
|
+
require 'csv/probe'
|
29
|
+
|
27
30
|
# load CSV into CSV::Table
|
28
31
|
csv_table = CSV.parse(<<~ROWS, headers: true)
|
29
32
|
col1,col2,col3,col4,col5,col6,col7
|
@@ -31,13 +34,17 @@ Example how to use Probe
|
|
31
34
|
0,PA,customer,2,03-12-2021,SHA;AN;RU,badger|ant
|
32
35
|
0,TE,guest,1000,06-11-2021,RU,spider|racoon|ant
|
33
36
|
ROWS
|
37
|
+
|
38
|
+
# define linting rules
|
34
39
|
checks = [Probe::ColumnIsEqualTo.new("col1", "0"),
|
35
40
|
Probe::ColumnMatchesRegEx.new("col2", /^(NU|PA|TE)$/),
|
36
|
-
Probe::ColumnIsOneOf.new("col3", [
|
37
|
-
Probe::ColumnMeetsCondition.new("col4",
|
41
|
+
Probe::ColumnIsOneOf.new("col3", %w[customer guest]),
|
42
|
+
Probe::ColumnMeetsCondition.new("col4", lambda { |val, _opts|
|
43
|
+
!Integer(val, exception: false).nil?
|
44
|
+
}, "Not an Integer"),
|
38
45
|
Probe::ColumnIsDate.new("col5", "%d-%m-%Y"),
|
39
|
-
Probe::ColumnIsListWithDomain.new("col6", [
|
40
|
-
Probe::ColumnIsSetWithDomain.new("col7", [
|
46
|
+
Probe::ColumnIsListWithDomain.new("col6", %w[FO RU SHA AN], ";"),
|
47
|
+
Probe::ColumnIsSetWithDomain.new("col7", %w[eagle hawk badger spider racoon ant], "|")]
|
41
48
|
|
42
49
|
# lint table
|
43
50
|
csv_table.lint(checks)
|
@@ -54,7 +61,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
54
61
|
|
55
62
|
## Contributing
|
56
63
|
|
57
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
64
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/homebase-dev/csv-probe.
|
58
65
|
|
59
66
|
## License
|
60
67
|
|
data/csv-probe.gemspec
CHANGED
@@ -9,12 +9,14 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["homebase.dev@gmail.com"]
|
10
10
|
|
11
11
|
spec.summary = "Allows validation of CSV::Table or CSV::Rows via custom rules"
|
12
|
-
spec.description = "This gem provides a simple framework for CSV::Table/CSV::Row
|
12
|
+
spec.description = "This gem provides a simple framework for CSV::Table/CSV::Row linting"\
|
13
|
+
"using custom rules for columns and rows"
|
13
14
|
spec.homepage = "https://gitlab.com/homebase-dev/csv-probe"
|
14
15
|
spec.license = "MIT"
|
15
16
|
spec.required_ruby_version = ">= 2.6.0"
|
16
17
|
|
17
18
|
# spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
|
19
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
18
20
|
|
19
21
|
spec.metadata["homepage_uri"] = spec.homepage
|
20
22
|
spec.metadata["source_code_uri"] = "https://gitlab.com/homebase-dev/csv-probe"
|
data/lib/csv/probe/checks.rb
CHANGED
@@ -5,30 +5,35 @@ module Probe
|
|
5
5
|
|
6
6
|
class LintingError < Error; end
|
7
7
|
|
8
|
+
# Linting error in CSV::Row, if the rule applies to multiple columns of a row
|
8
9
|
class RowError < LintingError
|
9
10
|
def initialize(msg = "CheckRowError")
|
10
11
|
super
|
11
12
|
end
|
12
13
|
end
|
13
14
|
|
15
|
+
# Linting warning in CSV::Row, if the rule applies to multiple columns of a row
|
14
16
|
class RowWarning < LintingError
|
15
17
|
def initialize(msg = "CheckRowWarning")
|
16
18
|
super
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
22
|
+
# Linting error in CSV::Column, if the rule applies to a single column of a row
|
20
23
|
class ColumnError < LintingError
|
21
24
|
def initialize(msg = "CheckColumnError")
|
22
25
|
super
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
29
|
+
# Linting warning in CSV::Column, if the rule applies to a single column of a row
|
26
30
|
class ColumnWarning < LintingError
|
27
31
|
def initialize(msg = "CheckColumnWarning")
|
28
32
|
super
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
36
|
+
# Abstract class of a CSV::Row check
|
32
37
|
class RowMeetsCondition
|
33
38
|
attr_accessor :ok_condition_fn, :fail_msg, :severity, :pre_checks
|
34
39
|
|
@@ -102,6 +107,7 @@ module Probe
|
|
102
107
|
end
|
103
108
|
end
|
104
109
|
|
110
|
+
# Abstract class of a CSV::Row single field (= column) check
|
105
111
|
class ColumnMeetsCondition < RowMeetsCondition
|
106
112
|
attr_accessor :varname, :ok_condition_fn, :fail_msg
|
107
113
|
|
@@ -120,10 +126,14 @@ module Probe
|
|
120
126
|
|
121
127
|
def evaluate(row, opts = {})
|
122
128
|
evaluate_pre_checks(row, opts)
|
123
|
-
|
129
|
+
|
130
|
+
unless @ok_condition_fn.call(row.fetch(@varname), opts) # rubocop:disable Style/GuardClause
|
131
|
+
raise @error_classes.fetch(@severity), error_msg(row, opts)
|
132
|
+
end
|
124
133
|
end
|
125
134
|
end
|
126
135
|
|
136
|
+
# Check if a column value is equal to a provided value (Pay attention to types!)
|
127
137
|
class ColumnIsEqualTo < ColumnMeetsCondition
|
128
138
|
def initialize(varname, expected_val, _placeholder = nil)
|
129
139
|
super(varname, nil, nil)
|
@@ -132,6 +142,7 @@ module Probe
|
|
132
142
|
end
|
133
143
|
end
|
134
144
|
|
145
|
+
# Check if a column value is one of a list of provided value
|
135
146
|
class ColumnIsOneOf < ColumnMeetsCondition
|
136
147
|
def initialize(varname, expected_vals_arr, _placeholder = nil)
|
137
148
|
super(varname, nil, nil)
|
@@ -140,8 +151,9 @@ module Probe
|
|
140
151
|
end
|
141
152
|
end
|
142
153
|
|
154
|
+
# Check if a column value is a date with a given format
|
143
155
|
class ColumnIsDate < ColumnMeetsCondition
|
144
|
-
def initialize(varname, expected_date_format, _placeholder = nil)
|
156
|
+
def initialize(varname, expected_date_format, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
145
157
|
super(varname, nil, nil)
|
146
158
|
@ok_condition_fn = lambda { |val, _cfg|
|
147
159
|
success = true
|
@@ -159,11 +171,13 @@ module Probe
|
|
159
171
|
# class ColumnIsInteger < ColumnMeetsCondition
|
160
172
|
# def initialize(varname, placeholder1=nil, placeholder=nil)
|
161
173
|
# super(varname, nil, nil)
|
162
|
-
# @ok_condition_fn = ->(val, cfg){ val.is_a? Integer }
|
174
|
+
# @ok_condition_fn = ->(val, cfg){ val.is_a? Integer }
|
175
|
+
# # TODO: ATTENTION 12abc -> integer, also 1.1 -> 1 ! better do it over regex?
|
163
176
|
# @fail_msg = "expected to be an Integer"
|
164
177
|
# end
|
165
178
|
# end
|
166
179
|
|
180
|
+
# Check if a column value is a date with a given format
|
167
181
|
class ColumnMatchesRegEx < ColumnMeetsCondition
|
168
182
|
def initialize(varname, expected_regex_pattern, _placeholder = nil)
|
169
183
|
super(varname, nil, nil)
|
@@ -172,56 +186,72 @@ module Probe
|
|
172
186
|
end
|
173
187
|
end
|
174
188
|
|
175
|
-
#
|
189
|
+
# Check if a tokenized column value is a list of given values
|
176
190
|
class ColumnIsListWithDomain < ColumnMeetsCondition
|
177
|
-
def initialize(varname, expected_items_arr, separator, _placeholder = nil)
|
191
|
+
def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
178
192
|
super(varname, nil, nil)
|
179
193
|
@ok_condition_fn = lambda { |val, _cfg|
|
180
|
-
|
194
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
195
|
+
|
196
|
+
items = val.to_s.split(separator, -1)
|
197
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
198
|
+
|
199
|
+
return false if items.empty?
|
200
|
+
|
181
201
|
return items.all? { |e| expected_items_arr.include?(e) }
|
182
202
|
}
|
183
203
|
@fail_msg = lambda { |row, _opts|
|
184
|
-
items = row.fetch(@varname).split(separator)
|
204
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
185
205
|
diff_items = items - expected_items_arr
|
186
|
-
"expected that tokenized items of value #{items.inspect} are a subset of
|
206
|
+
"expected that tokenized items of value #{items.inspect} are a subset of "\
|
207
|
+
"#{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
|
187
208
|
}
|
188
209
|
end
|
189
210
|
end
|
190
211
|
|
212
|
+
# Check if a tokenized column value is a list of given values
|
191
213
|
class ColumnIsSet < ColumnMeetsCondition
|
192
|
-
def initialize(varname, separator, _placeholder = nil)
|
214
|
+
def initialize(varname, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
193
215
|
super(varname, nil, nil)
|
194
216
|
@ok_condition_fn = lambda { |val, _cfg|
|
195
|
-
return true if val.to_s ==
|
217
|
+
return true if val.to_s == ''
|
196
218
|
|
197
|
-
items = val.split(separator)
|
219
|
+
items = val.to_s.split(separator, -1)
|
198
220
|
all_uniq = (items.size == items.uniq.size)
|
199
221
|
return all_uniq
|
200
222
|
}
|
201
223
|
@fail_msg = lambda { |row, _opts|
|
202
|
-
items = row.fetch(@varname).split(separator)
|
224
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
203
225
|
non_uniq_items = items.detect { |e| items.count(e) > 1 }
|
204
226
|
"expected that items of tokenized value #{items.inspect} are uniqe, but items #{non_uniq_items.inspect} are not"
|
205
227
|
}
|
206
228
|
end
|
207
229
|
end
|
208
230
|
|
231
|
+
# Check if a tokenized column value is a set of given values (no duplicates)
|
209
232
|
class ColumnIsSetWithDomain < ColumnMeetsCondition
|
210
|
-
def initialize(varname, expected_items_arr, separator, _placeholder = nil)
|
233
|
+
def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
211
234
|
super(varname, nil, nil)
|
212
235
|
@pre_checks << ColumnIsSet.new(varname, separator)
|
213
236
|
@ok_condition_fn = lambda { |val, _cfg|
|
214
|
-
|
237
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
238
|
+
|
239
|
+
items = val.to_s.split(separator, -1)
|
240
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
241
|
+
|
242
|
+
return false if items.empty?
|
215
243
|
|
216
|
-
items = val.split(separator)
|
217
244
|
all_valid = items.all? { |i| expected_items_arr.include?(i) }
|
218
245
|
return all_valid
|
219
246
|
}
|
220
247
|
@fail_msg = lambda { |row, _opts|
|
221
|
-
items = row.fetch(@varname).split(separator)
|
248
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
222
249
|
"expected that items of tokenized value #{items.inspect} are a subset of #{expected_items_arr.inspect}"
|
223
250
|
}
|
224
|
-
# @fail_msg =
|
251
|
+
# @fail_msg = lambda {|row, opts|
|
252
|
+
# "Unexpected value:#{row.fetch(@varname).inspect} for column:#{@varname.inspect},
|
253
|
+
# expected that items of tokenized value #{row.fetch(@varname).split(separator)}
|
254
|
+
# are uniqe and a subset of #{expected_items_arr.inspect}" }
|
225
255
|
end
|
226
256
|
end
|
227
257
|
|
data/lib/csv/probe/version.rb
CHANGED
data/lib/csv/probe.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
3
|
+
require "csv" # TODO: should i require it here?
|
4
4
|
require "terminal-table"
|
5
5
|
require_relative "probe/version"
|
6
6
|
require_relative "probe/checks"
|
@@ -43,7 +43,7 @@ module Probe
|
|
43
43
|
end
|
44
44
|
|
45
45
|
# Extend CSV::Table with .lint(...) method
|
46
|
-
class CSV::Table
|
46
|
+
class CSV::Table # rubocop:disable Style/ClassAndModuleChildren
|
47
47
|
def lint(checks, opts = {})
|
48
48
|
opts[:headers] = true unless opts.key?(:headers) # CSV::Table has always headers, hence set :headers = true
|
49
49
|
Probe.lint_rows(self, checks, opts)
|
@@ -51,7 +51,7 @@ class CSV::Table
|
|
51
51
|
end
|
52
52
|
|
53
53
|
# Extend CSV::Row with .lint(...) method
|
54
|
-
class CSV::Row
|
54
|
+
class CSV::Row # rubocop:disable Style/ClassAndModuleChildren
|
55
55
|
def lint(checks, opts = {})
|
56
56
|
Probe.lint_row(self, checks, opts)
|
57
57
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-probe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- homebase.dev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-01-
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|
@@ -38,8 +38,8 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.0'
|
41
|
-
description: This gem provides a simple framework for CSV::Table/CSV::Row
|
42
|
-
|
41
|
+
description: This gem provides a simple framework for CSV::Table/CSV::Row lintingusing
|
42
|
+
custom rules for columns and rows
|
43
43
|
email:
|
44
44
|
- homebase.dev@gmail.com
|
45
45
|
executables: []
|
@@ -64,6 +64,7 @@ homepage: https://gitlab.com/homebase-dev/csv-probe
|
|
64
64
|
licenses:
|
65
65
|
- MIT
|
66
66
|
metadata:
|
67
|
+
rubygems_mfa_required: 'true'
|
67
68
|
homepage_uri: https://gitlab.com/homebase-dev/csv-probe
|
68
69
|
source_code_uri: https://gitlab.com/homebase-dev/csv-probe
|
69
70
|
changelog_uri: https://gitlab.com/homebase-dev/csv-probe
|