csv-probe 0.1.1 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +11 -4
- data/lib/csv/probe/checks.rb +19 -7
- data/lib/csv/probe/version.rb +1 -1
- data/lib/csv/probe.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 945175b169f28d0b77504d23cf6020d5216e66f74e8e5a8b04ef0279f681fd8c
|
4
|
+
data.tar.gz: a91cb9d894402b70ebfb72a342f4166a94bda0b3c6f1ba361920d0caa6b3c0c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d09e366f9e7b4692dbfb1f8389667952f5e3a913901600d77d9954a9b5685650d81b6b9bba701cfa2925c47cefb0aca2e4b4bd5d48a3547b7cd2c775e512e01
|
7
|
+
data.tar.gz: a076a71e6679f69848b7c30117a100986e3857eb41a3ad2bed50c0a6b320add76caf6668ae8790dce9804c82e2f7dcc5b0e0e8675972c8f81a594bc163501f73
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.1.5] - 2022-01-05
|
4
|
+
- Fix rubocop warnings
|
5
|
+
|
6
|
+
## [0.1.4] - 2022-01-05
|
7
|
+
- Fix `nil` handling for list + set column checks when the list or set is something like this:
|
8
|
+
- `a,|b|,b` leading and trailing empty items or
|
9
|
+
- `a,,b` the whole list/set string is nil
|
10
|
+
|
11
|
+
## [0.1.3] - 2022-01-04
|
12
|
+
- Add better `nil` handling to list + set column checks. `nil` resp. `""` does no longer implicitly pass ColumnIsListWithDomain and ColumnIsSetWithDomain checks. `nil` or `""` has to be in the expected items in order to pass the check now.
|
13
|
+
|
14
|
+
## [0.1.2] - 2022-01-03
|
15
|
+
- Fix README.md
|
16
|
+
|
17
|
+
## [0.1.1] - 2022-01-02
|
18
|
+
- Fix rubocop warnings
|
19
|
+
- Improve README.md
|
20
|
+
|
3
21
|
## [0.1.0] - 2022-01-02
|
4
22
|
|
5
23
|
- Initial release
|
data/README.md
CHANGED
@@ -24,6 +24,9 @@ Or install it yourself as:
|
|
24
24
|
Example how to use Probe
|
25
25
|
|
26
26
|
```ruby
|
27
|
+
# load csv-probe gem
|
28
|
+
require 'csv/probe'
|
29
|
+
|
27
30
|
# load CSV into CSV::Table
|
28
31
|
csv_table = CSV.parse(<<~ROWS, headers: true)
|
29
32
|
col1,col2,col3,col4,col5,col6,col7
|
@@ -31,13 +34,17 @@ Example how to use Probe
|
|
31
34
|
0,PA,customer,2,03-12-2021,SHA;AN;RU,badger|ant
|
32
35
|
0,TE,guest,1000,06-11-2021,RU,spider|racoon|ant
|
33
36
|
ROWS
|
37
|
+
|
38
|
+
# define linting rules
|
34
39
|
checks = [Probe::ColumnIsEqualTo.new("col1", "0"),
|
35
40
|
Probe::ColumnMatchesRegEx.new("col2", /^(NU|PA|TE)$/),
|
36
|
-
Probe::ColumnIsOneOf.new("col3", [
|
37
|
-
Probe::ColumnMeetsCondition.new("col4",
|
41
|
+
Probe::ColumnIsOneOf.new("col3", %w[customer guest]),
|
42
|
+
Probe::ColumnMeetsCondition.new("col4", lambda { |val, _opts|
|
43
|
+
!Integer(val, exception: false).nil?
|
44
|
+
}, "Not an Integer"),
|
38
45
|
Probe::ColumnIsDate.new("col5", "%d-%m-%Y"),
|
39
|
-
Probe::ColumnIsListWithDomain.new("col6", [
|
40
|
-
Probe::ColumnIsSetWithDomain.new("col7", [
|
46
|
+
Probe::ColumnIsListWithDomain.new("col6", %w[FO RU SHA AN], ";"),
|
47
|
+
Probe::ColumnIsSetWithDomain.new("col7", %w[eagle hawk badger spider racoon ant], "|")]
|
41
48
|
|
42
49
|
# lint table
|
43
50
|
csv_table.lint(checks)
|
data/lib/csv/probe/checks.rb
CHANGED
@@ -186,16 +186,23 @@ module Probe
|
|
186
186
|
end
|
187
187
|
end
|
188
188
|
|
189
|
+
# rubocop:disable Metrics/AbcSize
|
189
190
|
# Check if a tokenized column value is a list of given values
|
190
191
|
class ColumnIsListWithDomain < ColumnMeetsCondition
|
191
192
|
def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
192
193
|
super(varname, nil, nil)
|
193
194
|
@ok_condition_fn = lambda { |val, _cfg|
|
194
|
-
|
195
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
196
|
+
|
197
|
+
items = val.to_s.split(separator, -1)
|
198
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
199
|
+
|
200
|
+
return false if items.empty?
|
201
|
+
|
195
202
|
return items.all? { |e| expected_items_arr.include?(e) }
|
196
203
|
}
|
197
204
|
@fail_msg = lambda { |row, _opts|
|
198
|
-
items = row.fetch(@varname).split(separator)
|
205
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
199
206
|
diff_items = items - expected_items_arr
|
200
207
|
"expected that tokenized items of value #{items.inspect} are a subset of "\
|
201
208
|
"#{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
|
@@ -210,12 +217,12 @@ module Probe
|
|
210
217
|
@ok_condition_fn = lambda { |val, _cfg|
|
211
218
|
return true if val.to_s == ""
|
212
219
|
|
213
|
-
items = val.split(separator)
|
220
|
+
items = val.to_s.split(separator, -1)
|
214
221
|
all_uniq = (items.size == items.uniq.size)
|
215
222
|
return all_uniq
|
216
223
|
}
|
217
224
|
@fail_msg = lambda { |row, _opts|
|
218
|
-
items = row.fetch(@varname).split(separator)
|
225
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
219
226
|
non_uniq_items = items.detect { |e| items.count(e) > 1 }
|
220
227
|
"expected that items of tokenized value #{items.inspect} are uniqe, but items #{non_uniq_items.inspect} are not"
|
221
228
|
}
|
@@ -228,14 +235,18 @@ module Probe
|
|
228
235
|
super(varname, nil, nil)
|
229
236
|
@pre_checks << ColumnIsSet.new(varname, separator)
|
230
237
|
@ok_condition_fn = lambda { |val, _cfg|
|
231
|
-
|
238
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
239
|
+
|
240
|
+
items = val.to_s.split(separator, -1)
|
241
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
242
|
+
|
243
|
+
return false if items.empty?
|
232
244
|
|
233
|
-
items = val.split(separator)
|
234
245
|
all_valid = items.all? { |i| expected_items_arr.include?(i) }
|
235
246
|
return all_valid
|
236
247
|
}
|
237
248
|
@fail_msg = lambda { |row, _opts|
|
238
|
-
items = row.fetch(@varname).split(separator)
|
249
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
239
250
|
"expected that items of tokenized value #{items.inspect} are a subset of #{expected_items_arr.inspect}"
|
240
251
|
}
|
241
252
|
# @fail_msg = lambda {|row, opts|
|
@@ -244,6 +255,7 @@ module Probe
|
|
244
255
|
# are uniqe and a subset of #{expected_items_arr.inspect}" }
|
245
256
|
end
|
246
257
|
end
|
258
|
+
# rubocop:enable Metrics/AbcSize
|
247
259
|
|
248
260
|
# TODO: nice to have
|
249
261
|
# * if_second_not_empty_then_first_yes_check(varname1, varname2)
|
data/lib/csv/probe/version.rb
CHANGED
data/lib/csv/probe.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-probe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- homebase.dev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-01-
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|