csv-probe 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83d63f020bf4ed1e557518b78820b440cc24da36a7d93f6ded23b604aa7161ed
4
- data.tar.gz: df4e094f0f0451c9578ac5f02c34eb967d851a4782926dc64f0b96bdcd1eb9f8
3
+ metadata.gz: 945175b169f28d0b77504d23cf6020d5216e66f74e8e5a8b04ef0279f681fd8c
4
+ data.tar.gz: a91cb9d894402b70ebfb72a342f4166a94bda0b3c6f1ba361920d0caa6b3c0c9
5
5
  SHA512:
6
- metadata.gz: 9b3be13578b537577b62fd1efa51ec00f2d0a5a38755b2e8399dfeacf9f3ceaa1f30a65381336af5464cb4d08c32797bb0de5199f34f4e437041db9ec70ab681
7
- data.tar.gz: 2eb6bfdeb32a7d3f819cf52b412b311984d7e45366426ed81a788ccf2f9a83504733cbe348c8da34421f23855eab2f8e5efe40e3fd4086a94a656758cdef3e4b
6
+ metadata.gz: 7d09e366f9e7b4692dbfb1f8389667952f5e3a913901600d77d9954a9b5685650d81b6b9bba701cfa2925c47cefb0aca2e4b4bd5d48a3547b7cd2c775e512e01
7
+ data.tar.gz: a076a71e6679f69848b7c30117a100986e3857eb41a3ad2bed50c0a6b320add76caf6668ae8790dce9804c82e2f7dcc5b0e0e8675972c8f81a594bc163501f73
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.5] - 2022-01-05
4
+ - Fix rubocop warnings
5
+
6
+ ## [0.1.4] - 2022-01-05
7
+ - Fix `nil` handling for list + set column checks when the list or set is something like this:
8
+ - `a,|b|,b` leading and trailing empty items or
9
+ - `a,,b` the whole list/set string is nil
10
+
11
+ ## [0.1.3] - 2022-01-04
12
+ - Add better `nil` handling to list + set column checks. `nil` resp. `""` does no longer implicitly pass ColumnIsListWithDomain and ColumnIsSetWithDomain checks. `nil` or `""` has to be in the expected items in order to pass the check now.
13
+
14
+ ## [0.1.2] - 2022-01-03
15
+ - Fix README.md
16
+
17
+ ## [0.1.1] - 2022-01-02
18
+ - Fix rubocop warnings
19
+ - Improve README.md
20
+
3
21
  ## [0.1.0] - 2022-01-02
4
22
 
5
23
  - Initial release
data/README.md CHANGED
@@ -24,6 +24,9 @@ Or install it yourself as:
24
24
  Example how to use Probe
25
25
 
26
26
  ```ruby
27
+ # load csv-probe gem
28
+ require 'csv/probe'
29
+
27
30
  # load CSV into CSV::Table
28
31
  csv_table = CSV.parse(<<~ROWS, headers: true)
29
32
  col1,col2,col3,col4,col5,col6,col7
@@ -31,13 +34,17 @@ Example how to use Probe
31
34
  0,PA,customer,2,03-12-2021,SHA;AN;RU,badger|ant
32
35
  0,TE,guest,1000,06-11-2021,RU,spider|racoon|ant
33
36
  ROWS
37
+
38
+ # define linting rules
34
39
  checks = [Probe::ColumnIsEqualTo.new("col1", "0"),
35
40
  Probe::ColumnMatchesRegEx.new("col2", /^(NU|PA|TE)$/),
36
- Probe::ColumnIsOneOf.new("col3", ["customer","guest"]),
37
- Probe::ColumnMeetsCondition.new("col4", ->(val, _opts) { Integer(val, exception: false) != nil }, "Not an Integer"),
41
+ Probe::ColumnIsOneOf.new("col3", %w[customer guest]),
42
+ Probe::ColumnMeetsCondition.new("col4", lambda { |val, _opts|
43
+ !Integer(val, exception: false).nil?
44
+ }, "Not an Integer"),
38
45
  Probe::ColumnIsDate.new("col5", "%d-%m-%Y"),
39
- Probe::ColumnIsListWithDomain.new("col6", ["FO", "RU", "SHA", "AN"], ";"),
40
- Probe::ColumnIsSetWithDomain.new("col7", ["eagle", "hawk", "badger", "spider", "racoon", "ant"], "|")]
46
+ Probe::ColumnIsListWithDomain.new("col6", %w[FO RU SHA AN], ";"),
47
+ Probe::ColumnIsSetWithDomain.new("col7", %w[eagle hawk badger spider racoon ant], "|")]
41
48
 
42
49
  # lint table
43
50
  csv_table.lint(checks)
@@ -186,16 +186,23 @@ module Probe
186
186
  end
187
187
  end
188
188
 
189
+ # rubocop:disable Metrics/AbcSize
189
190
  # Check if a tokenized column value is a list of given values
190
191
  class ColumnIsListWithDomain < ColumnMeetsCondition
191
192
  def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
192
193
  super(varname, nil, nil)
193
194
  @ok_condition_fn = lambda { |val, _cfg|
194
- items = val.split(separator)
195
+ expected_items_arr.map!(&:to_s) # turn nil -> ""
196
+
197
+ items = val.to_s.split(separator, -1)
198
+ return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
199
+
200
+ return false if items.empty?
201
+
195
202
  return items.all? { |e| expected_items_arr.include?(e) }
196
203
  }
197
204
  @fail_msg = lambda { |row, _opts|
198
- items = row.fetch(@varname).split(separator)
205
+ items = row.fetch(@varname).to_s.split(separator, -1)
199
206
  diff_items = items - expected_items_arr
200
207
  "expected that tokenized items of value #{items.inspect} are a subset of "\
201
208
  "#{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
@@ -210,12 +217,12 @@ module Probe
210
217
  @ok_condition_fn = lambda { |val, _cfg|
211
218
  return true if val.to_s == ""
212
219
 
213
- items = val.split(separator)
220
+ items = val.to_s.split(separator, -1)
214
221
  all_uniq = (items.size == items.uniq.size)
215
222
  return all_uniq
216
223
  }
217
224
  @fail_msg = lambda { |row, _opts|
218
- items = row.fetch(@varname).split(separator)
225
+ items = row.fetch(@varname).to_s.split(separator, -1)
219
226
  non_uniq_items = items.detect { |e| items.count(e) > 1 }
220
227
  "expected that items of tokenized value #{items.inspect} are uniqe, but items #{non_uniq_items.inspect} are not"
221
228
  }
@@ -228,14 +235,18 @@ module Probe
228
235
  super(varname, nil, nil)
229
236
  @pre_checks << ColumnIsSet.new(varname, separator)
230
237
  @ok_condition_fn = lambda { |val, _cfg|
231
- return true if val.to_s == ""
238
+ expected_items_arr.map!(&:to_s) # turn nil -> ""
239
+
240
+ items = val.to_s.split(separator, -1)
241
+ return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
242
+
243
+ return false if items.empty?
232
244
 
233
- items = val.split(separator)
234
245
  all_valid = items.all? { |i| expected_items_arr.include?(i) }
235
246
  return all_valid
236
247
  }
237
248
  @fail_msg = lambda { |row, _opts|
238
- items = row.fetch(@varname).split(separator)
249
+ items = row.fetch(@varname).to_s.split(separator, -1)
239
250
  "expected that items of tokenized value #{items.inspect} are a subset of #{expected_items_arr.inspect}"
240
251
  }
241
252
  # @fail_msg = lambda {|row, opts|
@@ -244,6 +255,7 @@ module Probe
244
255
  # are uniqe and a subset of #{expected_items_arr.inspect}" }
245
256
  end
246
257
  end
258
+ # rubocop:enable Metrics/AbcSize
247
259
 
248
260
  # TODO: nice to have
249
261
  # * if_second_not_empty_then_first_yes_check(varname1, varname2)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Probe
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.5"
5
5
  end
data/lib/csv/probe.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "csv"
3
+ require "csv" # TODO: should i require it here?
4
4
  require "terminal-table"
5
5
  require_relative "probe/version"
6
6
  require_relative "probe/checks"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-probe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - homebase.dev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-02 00:00:00.000000000 Z
11
+ date: 2022-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv