csv-probe 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 138cd78aa31ccab66bbaa4a629c4f2c0d3b37a547006ed635e6fed935a223051
4
- data.tar.gz: 87f700fcb9c7a620f101b5a63bf095b9f6a611f72d83f876b11ca33797564a72
3
+ metadata.gz: 2425404adecef9f68fced84863e0acb087f3358935032c3849cab690948e74dc
4
+ data.tar.gz: 0bf460f9f27439cdcba3150c2f53991ab07512e6e1ffb5195315fdad5689c549
5
5
  SHA512:
6
- metadata.gz: 1357f59d7446f937c3719feeafd86aa29a10844203d72dd20f30c338243ad347adeca2c5d1dca93195cd223403fb024263330f398f029455f8ae5562fd906812
7
- data.tar.gz: aee4fa829429c8cc4590351da2b303b5879a163b412d987ec7dff63f5c45e5e8e9e4b5dd40af4619a19ceaa166bf09670e3f4f0b50b79f83e4c048552ec0ec7c
6
+ metadata.gz: 721e33de3a86d0513dcaab1d0d102454fbcf48492dd6d551ec1f5623de6c0b7ba74189ea7d6ae8825eb34674b2679f251e50c08e87fabf1ae18a993cdc72eaff
7
+ data.tar.gz: 042b6b18255ddbb787c7318febbc31147e4e593cbedf67047bc25c962491a9a46b3e664841a18ea3b8bfb25ef4025455391a4c7571dbf1a37072bb75c64967f8
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
2
  TargetRubyVersion: 2.6
3
+ NewCops: enable
3
4
 
4
5
  Style/StringLiterals:
5
6
  Enabled: true
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.4] - 2022-01-05
4
+ - Fix `nil` handling for list + set column checks when the list or set is something like this:
5
+ - `a,|b|,b` leading and trailing empty items or
6
+ - `a,,b` the whole list/set string is nil
7
+
8
+ ## [0.1.3] - 2022-01-04
9
+ - Add better `nil` handling to list + set column checks. `nil` resp. `""` does no longer implicitly pass ColumnIsListWithDomain and ColumnIsSetWithDomain checks. `nil` or `""` has to be in the expected items in order to pass the check now.
10
+
11
+ ## [0.1.2] - 2022-01-03
12
+ - Fix README.md
13
+
14
+ ## [0.1.1] - 2022-01-02
15
+ - Fix rubocop warnings
16
+ - Improve README.md
17
+
3
18
  ## [0.1.0] - 2022-01-02
4
19
 
5
20
  - Initial release
data/README.md CHANGED
@@ -23,7 +23,10 @@ Or install it yourself as:
23
23
 
24
24
  Example how to use Probe
25
25
 
26
- ```
26
+ ```ruby
27
+ # load csv-probe gem
28
+ require 'csv/probe'
29
+
27
30
  # load CSV into CSV::Table
28
31
  csv_table = CSV.parse(<<~ROWS, headers: true)
29
32
  col1,col2,col3,col4,col5,col6,col7
@@ -31,13 +34,17 @@ Example how to use Probe
31
34
  0,PA,customer,2,03-12-2021,SHA;AN;RU,badger|ant
32
35
  0,TE,guest,1000,06-11-2021,RU,spider|racoon|ant
33
36
  ROWS
37
+
38
+ # define linting rules
34
39
  checks = [Probe::ColumnIsEqualTo.new("col1", "0"),
35
40
  Probe::ColumnMatchesRegEx.new("col2", /^(NU|PA|TE)$/),
36
- Probe::ColumnIsOneOf.new("col3", ["customer","guest"]),
37
- Probe::ColumnMeetsCondition.new("col4", ->(val, _opts) { Integer(val, exception: false) != nil }, "Not an Integer"),
41
+ Probe::ColumnIsOneOf.new("col3", %w[customer guest]),
42
+ Probe::ColumnMeetsCondition.new("col4", lambda { |val, _opts|
43
+ !Integer(val, exception: false).nil?
44
+ }, "Not an Integer"),
38
45
  Probe::ColumnIsDate.new("col5", "%d-%m-%Y"),
39
- Probe::ColumnIsListWithDomain.new("col6", ["FO", "RU", "SHA", "AN"], ";"),
40
- Probe::ColumnIsSetWithDomain.new("col7", ["eagle", "hawk", "badger", "spider", "racoon", "ant"], "|")]
46
+ Probe::ColumnIsListWithDomain.new("col6", %w[FO RU SHA AN], ";"),
47
+ Probe::ColumnIsSetWithDomain.new("col7", %w[eagle hawk badger spider racoon ant], "|")]
41
48
 
42
49
  # lint table
43
50
  csv_table.lint(checks)
@@ -54,7 +61,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
54
61
 
55
62
  ## Contributing
56
63
 
57
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/csv-probe.
64
+ Bug reports and pull requests are welcome on GitHub at https://github.com/homebase-dev/csv-probe.
58
65
 
59
66
  ## License
60
67
 
data/csv-probe.gemspec CHANGED
@@ -9,12 +9,14 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ["homebase.dev@gmail.com"]
10
10
 
11
11
  spec.summary = "Allows validation of CSV::Table or CSV::Rows via custom rules"
12
- spec.description = "This gem provides a simple framework for CSV::Table/CSV::Row validation using custom rules for columns and rows"
12
+ spec.description = "This gem provides a simple framework for CSV::Table/CSV::Row linting"\
13
+ "using custom rules for columns and rows"
13
14
  spec.homepage = "https://gitlab.com/homebase-dev/csv-probe"
14
15
  spec.license = "MIT"
15
16
  spec.required_ruby_version = ">= 2.6.0"
16
17
 
17
18
  # spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
19
+ spec.metadata["rubygems_mfa_required"] = "true"
18
20
 
19
21
  spec.metadata["homepage_uri"] = spec.homepage
20
22
  spec.metadata["source_code_uri"] = "https://gitlab.com/homebase-dev/csv-probe"
@@ -5,30 +5,35 @@ module Probe
5
5
 
6
6
  class LintingError < Error; end
7
7
 
8
+ # Linting error in CSV::Row, if the rule applies to multiple columns of a row
8
9
  class RowError < LintingError
9
10
  def initialize(msg = "CheckRowError")
10
11
  super
11
12
  end
12
13
  end
13
14
 
15
+ # Linting warning in CSV::Row, if the rule applies to multiple columns of a row
14
16
  class RowWarning < LintingError
15
17
  def initialize(msg = "CheckRowWarning")
16
18
  super
17
19
  end
18
20
  end
19
21
 
22
+ # Linting error in CSV::Column, if the rule applies to a single column of a row
20
23
  class ColumnError < LintingError
21
24
  def initialize(msg = "CheckColumnError")
22
25
  super
23
26
  end
24
27
  end
25
28
 
29
+ # Linting warning in CSV::Column, if the rule applies to a single column of a row
26
30
  class ColumnWarning < LintingError
27
31
  def initialize(msg = "CheckColumnWarning")
28
32
  super
29
33
  end
30
34
  end
31
35
 
36
+ # Abstract class of a CSV::Row check
32
37
  class RowMeetsCondition
33
38
  attr_accessor :ok_condition_fn, :fail_msg, :severity, :pre_checks
34
39
 
@@ -102,6 +107,7 @@ module Probe
102
107
  end
103
108
  end
104
109
 
110
+ # Abstract class of a CSV::Row single field (= column) check
105
111
  class ColumnMeetsCondition < RowMeetsCondition
106
112
  attr_accessor :varname, :ok_condition_fn, :fail_msg
107
113
 
@@ -120,10 +126,14 @@ module Probe
120
126
 
121
127
  def evaluate(row, opts = {})
122
128
  evaluate_pre_checks(row, opts)
123
- raise @error_classes.fetch(@severity), error_msg(row, opts) unless @ok_condition_fn.call(row.fetch(@varname), opts)
129
+
130
+ unless @ok_condition_fn.call(row.fetch(@varname), opts) # rubocop:disable Style/GuardClause
131
+ raise @error_classes.fetch(@severity), error_msg(row, opts)
132
+ end
124
133
  end
125
134
  end
126
135
 
136
+ # Check if a column value is equal to a provided value (Pay attention to types!)
127
137
  class ColumnIsEqualTo < ColumnMeetsCondition
128
138
  def initialize(varname, expected_val, _placeholder = nil)
129
139
  super(varname, nil, nil)
@@ -132,6 +142,7 @@ module Probe
132
142
  end
133
143
  end
134
144
 
145
+ # Check if a column value is one of a list of provided value
135
146
  class ColumnIsOneOf < ColumnMeetsCondition
136
147
  def initialize(varname, expected_vals_arr, _placeholder = nil)
137
148
  super(varname, nil, nil)
@@ -140,8 +151,9 @@ module Probe
140
151
  end
141
152
  end
142
153
 
154
+ # Check if a column value is a date with a given format
143
155
  class ColumnIsDate < ColumnMeetsCondition
144
- def initialize(varname, expected_date_format, _placeholder = nil)
156
+ def initialize(varname, expected_date_format, _placeholder = nil) # rubocop:disable Metrics/MethodLength
145
157
  super(varname, nil, nil)
146
158
  @ok_condition_fn = lambda { |val, _cfg|
147
159
  success = true
@@ -159,11 +171,13 @@ module Probe
159
171
  # class ColumnIsInteger < ColumnMeetsCondition
160
172
  # def initialize(varname, placeholder1=nil, placeholder=nil)
161
173
  # super(varname, nil, nil)
162
- # @ok_condition_fn = ->(val, cfg){ val.is_a? Integer } # TODO: achtung 12abc -> integer auch 1.1 wird zu 1... ist nicht so einfach, lieber über regex prüfen
174
+ # @ok_condition_fn = ->(val, cfg){ val.is_a? Integer }
175
+ # # TODO: ATTENTION 12abc -> integer, also 1.1 -> 1 ! better do it over regex?
163
176
  # @fail_msg = "expected to be an Integer"
164
177
  # end
165
178
  # end
166
179
 
180
+ # Check if a column value is a date with a given format
167
181
  class ColumnMatchesRegEx < ColumnMeetsCondition
168
182
  def initialize(varname, expected_regex_pattern, _placeholder = nil)
169
183
  super(varname, nil, nil)
@@ -172,56 +186,72 @@ module Probe
172
186
  end
173
187
  end
174
188
 
175
- # TODO: fixme uniq als eigenen check machen
189
+ # Check if a tokenized column value is a list of given values
176
190
  class ColumnIsListWithDomain < ColumnMeetsCondition
177
- def initialize(varname, expected_items_arr, separator, _placeholder = nil)
191
+ def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
178
192
  super(varname, nil, nil)
179
193
  @ok_condition_fn = lambda { |val, _cfg|
180
- items = val.split(separator)
194
+ expected_items_arr.map!(&:to_s) # turn nil -> ""
195
+
196
+ items = val.to_s.split(separator, -1)
197
+ return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
198
+
199
+ return false if items.empty?
200
+
181
201
  return items.all? { |e| expected_items_arr.include?(e) }
182
202
  }
183
203
  @fail_msg = lambda { |row, _opts|
184
- items = row.fetch(@varname).split(separator)
204
+ items = row.fetch(@varname).to_s.split(separator, -1)
185
205
  diff_items = items - expected_items_arr
186
- "expected that tokenized items of value #{items.inspect} are a subset of #{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
206
+ "expected that tokenized items of value #{items.inspect} are a subset of "\
207
+ "#{expected_items_arr.inspect}, but items #{diff_items.inspect} are not"
187
208
  }
188
209
  end
189
210
  end
190
211
 
212
+ # Check if a tokenized column value is a list of given values
191
213
  class ColumnIsSet < ColumnMeetsCondition
192
- def initialize(varname, separator, _placeholder = nil)
214
+ def initialize(varname, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
193
215
  super(varname, nil, nil)
194
216
  @ok_condition_fn = lambda { |val, _cfg|
195
- return true if val.to_s == ""
217
+ return true if val.to_s == ''
196
218
 
197
- items = val.split(separator)
219
+ items = val.to_s.split(separator, -1)
198
220
  all_uniq = (items.size == items.uniq.size)
199
221
  return all_uniq
200
222
  }
201
223
  @fail_msg = lambda { |row, _opts|
202
- items = row.fetch(@varname).split(separator)
224
+ items = row.fetch(@varname).to_s.split(separator, -1)
203
225
  non_uniq_items = items.detect { |e| items.count(e) > 1 }
204
226
  "expected that items of tokenized value #{items.inspect} are uniqe, but items #{non_uniq_items.inspect} are not"
205
227
  }
206
228
  end
207
229
  end
208
230
 
231
+ # Check if a tokenized column value is a set of given values (no duplicates)
209
232
  class ColumnIsSetWithDomain < ColumnMeetsCondition
210
- def initialize(varname, expected_items_arr, separator, _placeholder = nil)
233
+ def initialize(varname, expected_items_arr, separator, _placeholder = nil) # rubocop:disable Metrics/MethodLength
211
234
  super(varname, nil, nil)
212
235
  @pre_checks << ColumnIsSet.new(varname, separator)
213
236
  @ok_condition_fn = lambda { |val, _cfg|
214
- return true if val.to_s == ""
237
+ expected_items_arr.map!(&:to_s) # turn nil -> ""
238
+
239
+ items = val.to_s.split(separator, -1)
240
+ return true if items.empty? && expected_items_arr.include?(nil.to_s) # empty str allowed
241
+
242
+ return false if items.empty?
215
243
 
216
- items = val.split(separator)
217
244
  all_valid = items.all? { |i| expected_items_arr.include?(i) }
218
245
  return all_valid
219
246
  }
220
247
  @fail_msg = lambda { |row, _opts|
221
- items = row.fetch(@varname).split(separator)
248
+ items = row.fetch(@varname).to_s.split(separator, -1)
222
249
  "expected that items of tokenized value #{items.inspect} are a subset of #{expected_items_arr.inspect}"
223
250
  }
224
- # @fail_msg = ->(row, opts) { "Unexpected value:#{row.fetch(@varname).inspect} for column:#{@varname.inspect}, expected that items of tokenized value #{row.fetch(@varname).split(separator)} are uniqe and a subset of #{expected_items_arr.inspect}" }
251
+ # @fail_msg = lambda {|row, opts|
252
+ # "Unexpected value:#{row.fetch(@varname).inspect} for column:#{@varname.inspect},
253
+ # expected that items of tokenized value #{row.fetch(@varname).split(separator)}
254
+ # are uniqe and a subset of #{expected_items_arr.inspect}" }
225
255
  end
226
256
  end
227
257
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Probe
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.4"
5
5
  end
data/lib/csv/probe.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "csv"
3
+ require "csv" # TODO: should i require it here?
4
4
  require "terminal-table"
5
5
  require_relative "probe/version"
6
6
  require_relative "probe/checks"
@@ -43,7 +43,7 @@ module Probe
43
43
  end
44
44
 
45
45
  # Extend CSV::Table with .lint(...) method
46
- class CSV::Table
46
+ class CSV::Table # rubocop:disable Style/ClassAndModuleChildren
47
47
  def lint(checks, opts = {})
48
48
  opts[:headers] = true unless opts.key?(:headers) # CSV::Table has always headers, hence set :headers = true
49
49
  Probe.lint_rows(self, checks, opts)
@@ -51,7 +51,7 @@ class CSV::Table
51
51
  end
52
52
 
53
53
  # Extend CSV::Row with .lint(...) method
54
- class CSV::Row
54
+ class CSV::Row # rubocop:disable Style/ClassAndModuleChildren
55
55
  def lint(checks, opts = {})
56
56
  Probe.lint_row(self, checks, opts)
57
57
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-probe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - homebase.dev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-02 00:00:00.000000000 Z
11
+ date: 2022-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv
@@ -38,8 +38,8 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '3.0'
41
- description: This gem provides a simple framework for CSV::Table/CSV::Row validation
42
- using custom rules for columns and rows
41
+ description: This gem provides a simple framework for CSV::Table/CSV::Row lintingusing
42
+ custom rules for columns and rows
43
43
  email:
44
44
  - homebase.dev@gmail.com
45
45
  executables: []
@@ -64,6 +64,7 @@ homepage: https://gitlab.com/homebase-dev/csv-probe
64
64
  licenses:
65
65
  - MIT
66
66
  metadata:
67
+ rubygems_mfa_required: 'true'
67
68
  homepage_uri: https://gitlab.com/homebase-dev/csv-probe
68
69
  source_code_uri: https://gitlab.com/homebase-dev/csv-probe
69
70
  changelog_uri: https://gitlab.com/homebase-dev/csv-probe