csv-probe 0.1.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +11 -4
- data/lib/csv/probe/checks.rb +19 -7
- data/lib/csv/probe/version.rb +1 -1
- data/lib/csv/probe.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 945175b169f28d0b77504d23cf6020d5216e66f74e8e5a8b04ef0279f681fd8c
|
4
|
+
data.tar.gz: a91cb9d894402b70ebfb72a342f4166a94bda0b3c6f1ba361920d0caa6b3c0c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d09e366f9e7b4692dbfb1f8389667952f5e3a913901600d77d9954a9b5685650d81b6b9bba701cfa2925c47cefb0aca2e4b4bd5d48a3547b7cd2c775e512e01
|
7
|
+
data.tar.gz: a076a71e6679f69848b7c30117a100986e3857eb41a3ad2bed50c0a6b320add76caf6668ae8790dce9804c82e2f7dcc5b0e0e8675972c8f81a594bc163501f73
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.1.5] - 2022-01-05
|
4
|
+
- Fix rubocop warnings
|
5
|
+
|
6
|
+
## [0.1.4] - 2022-01-05
|
7
|
+
- Fix `nil` handling for list + set column checks when the list or set is something like this:
|
8
|
+
- `a,|b|,b` leading and trailing empty items or
|
9
|
+
- `a,,b` the whole list/set string is nil
|
10
|
+
|
11
|
+
## [0.1.3] - 2022-01-04
|
12
|
+
- Add better `nil` handling to list + set column checks. `nil` resp. `""` does no longer implicitly pass ColumnIsListWithDomain and ColumnIsSetWithDomain checks. `nil` or `""` has to be in the expected items in order to pass the check now.
|
13
|
+
|
14
|
+
## [0.1.2] - 2022-01-03
|
15
|
+
- Fix README.md
|
16
|
+
|
17
|
+
## [0.1.1] - 2022-01-02
|
18
|
+
- Fix rubocop warnings
|
19
|
+
- Improve README.md
|
20
|
+
|
3
21
|
## [0.1.0] - 2022-01-02
|
4
22
|
|
5
23
|
- Initial release
|
data/README.md
CHANGED
@@ -24,6 +24,9 @@ Or install it yourself as:
|
|
24
24
|
Example how to use Probe
|
25
25
|
|
26
26
|
```ruby
|
27
|
+
# load csv-probe gem
|
28
|
+
require 'csv/probe'
|
29
|
+
|
27
30
|
# load CSV into CSV::Table
|
28
31
|
csv_table = CSV.parse(<<~ROWS, headers: true)
|
29
32
|
col1,col2,col3,col4,col5,col6,col7
|
@@ -31,13 +34,17 @@ Example how to use Probe
|
|
31
34
|
0,PA,customer,2,03-12-2021,SHA;AN;RU,badger|ant
|
32
35
|
0,TE,guest,1000,06-11-2021,RU,spider|racoon|ant
|
33
36
|
ROWS
|
37
|
+
|
38
|
+
# define linting rules
|
34
39
|
checks = [Probe::ColumnIsEqualTo.new("col1", "0"),
|
35
40
|
Probe::ColumnMatchesRegEx.new("col2", /^(NU|PA|TE)$/),
|
36
|
-
Probe::ColumnIsOneOf.new("col3", [
|
37
|
-
Probe::ColumnMeetsCondition.new("col4",
|
41
|
+
Probe::ColumnIsOneOf.new("col3", %w[customer guest]),
|
42
|
+
Probe::ColumnMeetsCondition.new("col4", lambda { |val, _opts|
|
43
|
+
!Integer(val, exception: false).nil?
|
44
|
+
}, "Not an Integer"),
|
38
45
|
Probe::ColumnIsDate.new("col5", "%d-%m-%Y"),
|
39
|
-
Probe::ColumnIsListWithDomain.new("col6", [
|
40
|
-
Probe::ColumnIsSetWithDomain.new("col7", [
|
46
|
+
Probe::ColumnIsListWithDomain.new("col6", %w[FO RU SHA AN], ";"),
|
47
|
+
Probe::ColumnIsSetWithDomain.new("col7", %w[eagle hawk badger spider racoon ant], "|")]
|
41
48
|
|
42
49
|
# lint table
|
43
50
|
csv_table.lint(checks)
|
data/lib/csv/probe/checks.rb
CHANGED
@@ -186,16 +186,23 @@ module Probe
|
|
186
186
|
end
|
187
187
|
end
|
188
188
|
|
189
|
+
# rubocop:disable Metrics/AbcSize
|
189
190
|
# Check if a tokenized column value is a list of given values
|
190
191
|
class ColumnIsListWithDomain < ColumnMeetsCondition
|
191
192
|
def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
|
192
193
|
super(varname, nil, nil)
|
193
194
|
@ok_condition_fn = lambda { |val, _cfg|
|
194
|
-
|
195
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
196
|
+
|
197
|
+
items = val.to_s.split(separator, -1)
|
198
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
199
|
+
|
200
|
+
return false if items.empty?
|
201
|
+
|
195
202
|
return items.all? { |e| expected_items_arr.include?(e) }
|
196
203
|
}
|
197
204
|
@fail_msg = lambda { |row, _opts|
|
198
|
-
items = row.fetch(@varname).split(separator)
|
205
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
199
206
|
diff_items = items - expected_items_arr
|
200
207
|
"expected that tokenized items of value #{items.inspect} are a subset of "\
|
201
208
|
"#{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
|
@@ -210,12 +217,12 @@ module Probe
|
|
210
217
|
@ok_condition_fn = lambda { |val, _cfg|
|
211
218
|
return true if val.to_s == ""
|
212
219
|
|
213
|
-
items = val.split(separator)
|
220
|
+
items = val.to_s.split(separator, -1)
|
214
221
|
all_uniq = (items.size == items.uniq.size)
|
215
222
|
return all_uniq
|
216
223
|
}
|
217
224
|
@fail_msg = lambda { |row, _opts|
|
218
|
-
items = row.fetch(@varname).split(separator)
|
225
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
219
226
|
non_uniq_items = items.detect { |e| items.count(e) > 1 }
|
220
227
|
"expected that items of tokenized value #{items.inspect} are uniqe, but items #{non_uniq_items.inspect} are not"
|
221
228
|
}
|
@@ -228,14 +235,18 @@ module Probe
|
|
228
235
|
super(varname, nil, nil)
|
229
236
|
@pre_checks << ColumnIsSet.new(varname, separator)
|
230
237
|
@ok_condition_fn = lambda { |val, _cfg|
|
231
|
-
|
238
|
+
expected_items_arr.map!(&:to_s) # turn nil -> ""
|
239
|
+
|
240
|
+
items = val.to_s.split(separator, -1)
|
241
|
+
return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
|
242
|
+
|
243
|
+
return false if items.empty?
|
232
244
|
|
233
|
-
items = val.split(separator)
|
234
245
|
all_valid = items.all? { |i| expected_items_arr.include?(i) }
|
235
246
|
return all_valid
|
236
247
|
}
|
237
248
|
@fail_msg = lambda { |row, _opts|
|
238
|
-
items = row.fetch(@varname).split(separator)
|
249
|
+
items = row.fetch(@varname).to_s.split(separator, -1)
|
239
250
|
"expected that items of tokenized value #{items.inspect} are a subset of #{expected_items_arr.inspect}"
|
240
251
|
}
|
241
252
|
# @fail_msg = lambda {|row, opts|
|
@@ -244,6 +255,7 @@ module Probe
|
|
244
255
|
# are uniqe and a subset of #{expected_items_arr.inspect}" }
|
245
256
|
end
|
246
257
|
end
|
258
|
+
# rubocop:enable Metrics/AbcSize
|
247
259
|
|
248
260
|
# TODO: nice to have
|
249
261
|
# * if_second_not_empty_then_first_yes_check(varname1, varname2)
|
data/lib/csv/probe/version.rb
CHANGED
data/lib/csv/probe.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-probe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- homebase.dev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-01-
|
11
|
+
date: 2022-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|