dh_easy-qa 0.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/dh_easy/qa.rb +17 -0
- data/lib/dh_easy/qa/helpers.rb +46 -0
- data/lib/dh_easy/qa/save_output.rb +158 -0
- data/lib/dh_easy/qa/validate_external.rb +30 -0
- data/lib/dh_easy/qa/validate_groups.rb +51 -0
- data/lib/dh_easy/qa/validate_internal.rb +172 -0
- data/lib/dh_easy/qa/validate_rules.rb +89 -0
- data/lib/dh_easy/qa/validate_type.rb +64 -0
- data/lib/dh_easy/qa/validate_value.rb +75 -0
- data/lib/dh_easy/qa/validator.rb +49 -0
- data/lib/dh_easy/qa/version.rb +5 -0
- metadata +143 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5f50c7fbde345982f84159b95991622284552a7c879cef3c41433afb40bd297b
|
4
|
+
data.tar.gz: a528d2eab5ad5ab35cae524ba67a10b07c4a2f58d60bdddd2f11b11af8276bd3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1cc9e15f9e2284744af36d311e185c8ee0df773e76e5b1d6d61a267158264efa63340c0227bdb68e2c1dac2e22a528cdc025204ccd669f140a9daad7f839cdd5
|
7
|
+
data.tar.gz: 62ddd23ffdb3fe48ba7380f6ba7ff5196e80ce6c3c72d79b4c3ae4640a797c70fef2a38b10e0047758f648f53eae2fc40fb6ceeefed60250cf584920e741a493
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "dh_easy/qa"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/dh_easy/qa.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'dh_easy/config'
|
3
|
+
require 'dh_easy/qa/helpers'
|
4
|
+
require 'dh_easy/qa/version'
|
5
|
+
require 'dh_easy/qa/validate_internal'
|
6
|
+
require 'dh_easy/qa/validate_external'
|
7
|
+
require 'dh_easy/qa/validate_rules'
|
8
|
+
require 'dh_easy/qa/validate_type'
|
9
|
+
require 'dh_easy/qa/validate_value'
|
10
|
+
require 'dh_easy/qa/validate_groups'
|
11
|
+
require 'dh_easy/qa/save_output'
|
12
|
+
require 'dh_easy/qa/validator'
|
13
|
+
|
14
|
+
module DhEasy
|
15
|
+
module Qa
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
module Helpers
|
4
|
+
def add_errored_item(data_hash, field_to_validate, validation)
|
5
|
+
error_name = "#{field_to_validate}_#{validation}"
|
6
|
+
errored_item[:failures][error_name.to_sym] = 'fail'
|
7
|
+
errored_item[:item] = data_hash if errored_item[:data].nil?
|
8
|
+
end
|
9
|
+
|
10
|
+
def pass_if?(if_params, data_hash)
|
11
|
+
case if_params.keys.first
|
12
|
+
when 'and', 'or'
|
13
|
+
operator = if_params.keys.first
|
14
|
+
evaluations = if_params[operator].collect{|child_if_params|
|
15
|
+
field_name = child_if_params.keys.first
|
16
|
+
condition_hash = child_if_params[field_name]
|
17
|
+
evaluate_condition(field_name, condition_hash, data_hash)
|
18
|
+
}
|
19
|
+
operator == 'and' ? evaluations.all? : evaluations.any?
|
20
|
+
else
|
21
|
+
field_name = if_params.keys.first
|
22
|
+
condition_hash = if_params[field_name]
|
23
|
+
evaluate_condition(field_name, condition_hash, data_hash)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def evaluate_condition(field_name, condition_hash, data_hash)
|
28
|
+
case condition_hash.keys.first
|
29
|
+
when 'value'
|
30
|
+
value_hash = condition_hash['value'] #Ex: {"equal"=>"A"}
|
31
|
+
if value_hash['equal']
|
32
|
+
data_hash[field_name] == value_hash['equal']
|
33
|
+
elsif value_hash['regex']
|
34
|
+
!(data_hash[field_name].to_s =~ Regexp.new(value_hash['regex'], true)).nil?
|
35
|
+
elsif value_hash['less_than']
|
36
|
+
data_hash[field_name].to_i < value_hash['less_than'].to_i
|
37
|
+
elsif value_hash['greater_than']
|
38
|
+
data_hash[field_name].to_i > value_hash['greater_than'].to_i
|
39
|
+
else
|
40
|
+
raise StandardError.new("The if condition '#{value_hash}' is unknown.")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class SaveOutput
|
4
|
+
attr_reader :total_items, :rules, :errors, :collection_name,
|
5
|
+
:outputs, :summary, :error_totals, :fields_to_ignore,
|
6
|
+
:specific_validations_to_ignore, :options
|
7
|
+
|
8
|
+
def initialize(total_items, rules, errors, collection_name, outputs, options)
|
9
|
+
@total_items = total_items
|
10
|
+
@rules = rules
|
11
|
+
@errors = errors
|
12
|
+
@collection_name = collection_name
|
13
|
+
@outputs = outputs
|
14
|
+
@options = options
|
15
|
+
@summary = Hash.new(0)
|
16
|
+
@error_totals = {}
|
17
|
+
@fields_to_ignore = []
|
18
|
+
@specific_validations_to_ignore = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
gather_threshold_totals
|
23
|
+
gather_validations_to_ignore
|
24
|
+
save_group_errors
|
25
|
+
save_errors
|
26
|
+
save_summary
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
#thresholds are a setting where you can ignore errors if they are under a specific error rate
|
32
|
+
def gather_threshold_totals
|
33
|
+
rules.each{|field_to_validate, field_options|
|
34
|
+
field_threshold = return_threshold(field_to_validate, field_options)
|
35
|
+
if field_threshold
|
36
|
+
gather_field_threshold_totals(field_to_validate, field_options)
|
37
|
+
else
|
38
|
+
gather_specific_validation_totals(field_to_validate, field_options)
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def gather_field_threshold_totals(field_to_validate, field_options)
|
44
|
+
error_total = errors[:errored_items].inject(0){|total, errored_item|
|
45
|
+
failed_fields = errored_item[:failures].keys.collect{|failure_key|
|
46
|
+
extract_field(failure_key)
|
47
|
+
}.uniq
|
48
|
+
total += 1 if failed_fields.include?(field_to_validate)
|
49
|
+
total
|
50
|
+
}
|
51
|
+
error_totals[field_to_validate] = error_total
|
52
|
+
end
|
53
|
+
|
54
|
+
def gather_specific_validation_totals(field_to_validate, field_options)
|
55
|
+
field_options.each do |validation|
|
56
|
+
potential_failure_name = "#{field_to_validate}_#{validation[0]}_fail"
|
57
|
+
if options['thresholds'] && options['thresholds'][potential_failure_name]
|
58
|
+
error_total = errors[:errored_items].inject(0){|total, errored_item|
|
59
|
+
failed_validations = errored_item[:failures].keys.collect{|failure_key|
|
60
|
+
"#{failure_key}_fail"
|
61
|
+
}
|
62
|
+
total += 1 if failed_validations.include?(potential_failure_name)
|
63
|
+
total
|
64
|
+
}
|
65
|
+
error_totals[potential_failure_name] = error_total
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def gather_validations_to_ignore
|
71
|
+
rules.each{|field_to_validate, field_options|
|
72
|
+
field_threshold = return_threshold(field_to_validate, field_options)
|
73
|
+
if field_threshold
|
74
|
+
gather_fields_to_ignore(field_to_validate, field_threshold)
|
75
|
+
else
|
76
|
+
gather_specific_validations_to_ignore(field_to_validate, field_options)
|
77
|
+
end
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def gather_fields_to_ignore(field_to_validate, field_threshold)
|
82
|
+
total_errors = error_totals[field_to_validate]
|
83
|
+
if total_errors
|
84
|
+
success_ratio = (total_items - total_errors).to_f / total_items
|
85
|
+
if success_ratio > field_threshold.to_f
|
86
|
+
puts "Ignoring #{field_to_validate}"
|
87
|
+
fields_to_ignore.push(field_to_validate)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def gather_specific_validations_to_ignore(field_to_validate, field_options)
|
93
|
+
field_options.each do |validation|
|
94
|
+
potential_failure_name = "#{field_to_validate}_#{validation[0]}_fail"
|
95
|
+
total_errors = error_totals[potential_failure_name]
|
96
|
+
if total_errors
|
97
|
+
specific_validation_threshold = options['thresholds'][potential_failure_name].to_f
|
98
|
+
success_ratio = (total_items - total_errors).to_f / total_items
|
99
|
+
if success_ratio > specific_validation_threshold || (specific_validation_threshold == 0.0 && success_ratio == 0.0)
|
100
|
+
puts "Ignoring #{potential_failure_name}"
|
101
|
+
specific_validations_to_ignore.push(potential_failure_name)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def return_threshold(field_to_validate, field_options)
|
108
|
+
if options['thresholds']
|
109
|
+
options['thresholds'][field_to_validate]
|
110
|
+
else
|
111
|
+
field_options['threshold'] || options['threshold']
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def save_group_errors
|
116
|
+
errors.each{|error_name, output_hash|
|
117
|
+
if error_name != :errored_items
|
118
|
+
error_name = output_hash.delete(:failure)
|
119
|
+
summary["#{error_name}_fail"] = output_hash
|
120
|
+
end
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
def save_errors
|
125
|
+
errors[:errored_items].each do |errored_item|
|
126
|
+
remove_threshold_failures(errored_item) if fields_to_ignore.any? || specific_validations_to_ignore.any?
|
127
|
+
errored_item[:failures].each do |error_key, value|
|
128
|
+
key = "#{error_key.to_s}_#{value.to_s}"
|
129
|
+
summary[key] += 1
|
130
|
+
end
|
131
|
+
errored_item['_collection'] = collection_name
|
132
|
+
outputs << errored_item if errored_item[:failures].any?
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def remove_threshold_failures(errored_item)
|
137
|
+
errored_item[:failures].delete_if{|error_name, fail|
|
138
|
+
specific_validation_name = "#{error_name}_fail"
|
139
|
+
field_name = extract_field(error_name)
|
140
|
+
fields_to_ignore.include?(field_name) || specific_validations_to_ignore.include?(specific_validation_name)
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
def save_summary
|
145
|
+
summary['pass'] = 'true' if summary.empty?
|
146
|
+
summary['_collection'] = "#{collection_name}_summary"
|
147
|
+
summary['total_items'] = total_items
|
148
|
+
outputs << summary
|
149
|
+
end
|
150
|
+
|
151
|
+
def extract_field(field_sym)
|
152
|
+
a = field_sym.to_s.split('_')
|
153
|
+
a.pop
|
154
|
+
a.join('_')
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateExternal
|
4
|
+
attr_reader :data, :errors, :rules, :outputs, :collection_name, :options
|
5
|
+
|
6
|
+
def initialize(data, config, outputs, collection_name, options)
|
7
|
+
@data = data
|
8
|
+
@rules = config['individual_validations'] if config
|
9
|
+
@outputs = outputs
|
10
|
+
@collection_name = collection_name
|
11
|
+
@options = options
|
12
|
+
@errors = { errored_items: [] }
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
begin
|
17
|
+
if data.any?
|
18
|
+
ValidateGroups.new(data, nil, collection_name, errors).run
|
19
|
+
ValidateRules.new(data, errors, rules).run if rules
|
20
|
+
end
|
21
|
+
SaveOutput.new(data.count, rules, errors, collection_name, outputs, options).run
|
22
|
+
return errors
|
23
|
+
rescue StandardError => e
|
24
|
+
puts "An error has occurred: #{e}"
|
25
|
+
return nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateGroups
|
4
|
+
attr_reader :data, :scraper_name, :collection_name, :errors
|
5
|
+
|
6
|
+
def initialize(data, scraper_name, collection_name, errors)
|
7
|
+
@data = data
|
8
|
+
@scraper_name = scraper_name
|
9
|
+
@collection_name = collection_name
|
10
|
+
@errors = errors
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
puts "Running Group Validations"
|
15
|
+
if group_validations_present?
|
16
|
+
load_module
|
17
|
+
include_module
|
18
|
+
call_validation_methods
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def fail_validation(name)
|
25
|
+
errors[name.to_sym] = 'fail'
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_module
|
29
|
+
load group_validations_path
|
30
|
+
end
|
31
|
+
|
32
|
+
def include_module
|
33
|
+
self.class.send(:include, GroupValidations)
|
34
|
+
end
|
35
|
+
|
36
|
+
def call_validation_methods
|
37
|
+
GroupValidations.public_instance_methods.each do |method|
|
38
|
+
self.send(method)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def group_validations_present?
|
43
|
+
File.exists?(group_validations_path)
|
44
|
+
end
|
45
|
+
|
46
|
+
def group_validations_path
|
47
|
+
@group_validations_path ||= File.expand_path('group_validations.rb', Dir.pwd)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateInternal
|
4
|
+
attr_reader :scraper_name, :collections, :rules, :outputs
|
5
|
+
|
6
|
+
def initialize(vars, config, outputs)
|
7
|
+
@scraper_name = vars['scraper_name']
|
8
|
+
@collections = vars['collections']
|
9
|
+
@rules = config['individual_validations']
|
10
|
+
@outputs = outputs
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
begin
|
15
|
+
ValidateScraper.new(scraper_name, collections, rules, outputs, thresholds).run
|
16
|
+
rescue StandardError => e
|
17
|
+
puts "An error has occurred: #{e}"
|
18
|
+
return nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
#thresholds are a setting where you can suppress errors if they are under a specific error rate
|
25
|
+
def thresholds
|
26
|
+
@thresholds ||= begin
|
27
|
+
file_path = File.expand_path('thresholds.yaml', Dir.pwd)
|
28
|
+
if File.exists? file_path
|
29
|
+
YAML.load(File.open(file_path))
|
30
|
+
else
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class ValidateScraper
|
38
|
+
attr_reader :scraper_name, :collections, :rules, :outputs, :options
|
39
|
+
|
40
|
+
def initialize(scraper_name, collections, rules, outputs, thresholds)
|
41
|
+
@scraper_name = scraper_name
|
42
|
+
@collections = collections
|
43
|
+
@rules = rules
|
44
|
+
@outputs = outputs
|
45
|
+
@options = {}
|
46
|
+
options['thresholds'] = thresholds[scraper_name] if thresholds && thresholds[scraper_name]
|
47
|
+
end
|
48
|
+
|
49
|
+
def run
|
50
|
+
begin
|
51
|
+
output_scraper
|
52
|
+
if status_ok?
|
53
|
+
validate_collections if collections && collections.any?
|
54
|
+
else
|
55
|
+
output_response
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
rescue StandardError => e
|
59
|
+
puts "An error has occurred for the scraper named '#{scraper_name}': #{e}"
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def output_scraper
|
67
|
+
puts "validating scraper: #{scraper_name}"
|
68
|
+
end
|
69
|
+
|
70
|
+
def status_ok?
|
71
|
+
!collection_response.parsed_response.nil? && collection_response.code == 200
|
72
|
+
end
|
73
|
+
|
74
|
+
def validate_collections
|
75
|
+
collections.each do |collection_name|
|
76
|
+
collection = collection_counts.find{|collection_hash| collection_hash['collection'] == collection_name }
|
77
|
+
if collection
|
78
|
+
ValidateCollection.new(scraper_name, collection_name, collection['outputs'], rules, outputs, options).run
|
79
|
+
else
|
80
|
+
puts "collection #{collection_name} is missing"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def output_response
|
86
|
+
if collection_response.parsed_response.nil?
|
87
|
+
puts "collection response is null"
|
88
|
+
else
|
89
|
+
puts collection_response.parsed_response['message']
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def collection_counts
|
94
|
+
@collection_counts ||= collection_response.parsed_response
|
95
|
+
end
|
96
|
+
|
97
|
+
def collection_response
|
98
|
+
@collection_response || Datahen::Client::ScraperJobOutput.new.collections(scraper_name)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class ValidateCollection
|
103
|
+
attr_reader :scraper_name, :collection_name, :total_records, :rules, :errors, :outputs, :options
|
104
|
+
|
105
|
+
def initialize(scraper_name, collection_name, total_records, rules, outputs, options)
|
106
|
+
@scraper_name = scraper_name
|
107
|
+
@collection_name = collection_name
|
108
|
+
@total_records = total_records
|
109
|
+
@rules = rules
|
110
|
+
@outputs = outputs
|
111
|
+
@options = options
|
112
|
+
@errors = { errored_items: [] }
|
113
|
+
end
|
114
|
+
|
115
|
+
def run
|
116
|
+
output_collection
|
117
|
+
if most_recent_finished_job
|
118
|
+
puts "data count #{data.count}"
|
119
|
+
if data.any?
|
120
|
+
ValidateGroups.new(data, scraper_name, collection_name, errors).run
|
121
|
+
ValidateRules.new(data, errors, rules).run if rules
|
122
|
+
end
|
123
|
+
SaveOutput.new(data.count, rules, errors, outputs_collection_name, outputs, options).run
|
124
|
+
else
|
125
|
+
puts "No job with status 'done' available"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def output_collection
|
132
|
+
puts "Validating collection: #{collection_name}"
|
133
|
+
end
|
134
|
+
|
135
|
+
def most_recent_finished_job
|
136
|
+
@most_recent_finished_job ||= begin
|
137
|
+
jobs_response = Datahen::Client::ScraperJob.new.all(scraper_name)
|
138
|
+
if jobs_response.code == 200
|
139
|
+
jobs_response.parsed_response.sort_by { |job| job['created_at'] }.reverse.find{|job| job['status'] == 'active' || job['status'] == 'done' }
|
140
|
+
else
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def data
|
147
|
+
@data ||= begin
|
148
|
+
data = []
|
149
|
+
page = 1
|
150
|
+
while data.count < total_records
|
151
|
+
records = Datahen::Client::JobOutput.new(per_page:500, page: page).all(most_recent_finished_job['id'], collection_name).parsed_response
|
152
|
+
sleep 1
|
153
|
+
if records
|
154
|
+
records.each do |record|
|
155
|
+
data << record
|
156
|
+
end
|
157
|
+
else
|
158
|
+
puts "All ScraperJobOutput request was nil. Total records: #{total_records}, data count: #{data.count}, page: #{page}"
|
159
|
+
break
|
160
|
+
end
|
161
|
+
page += 1
|
162
|
+
end
|
163
|
+
data
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def outputs_collection_name
|
168
|
+
@outputs_collection_name ||= "#{scraper_name}_#{collection_name}"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateRules
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
attr_reader :data, :errors, :rules
|
7
|
+
attr_accessor :errored_item
|
8
|
+
|
9
|
+
def initialize(data, errors, rules)
|
10
|
+
@data = data
|
11
|
+
@errors = errors
|
12
|
+
@rules = rules
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
puts "Validating Rules"
|
17
|
+
handle_rules
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def handle_rules
|
23
|
+
data.each do |data_hash|
|
24
|
+
reset_errored_item
|
25
|
+
rules.each{|field_to_validate, options|
|
26
|
+
if passes_required_check?(options, data_hash, field_to_validate)
|
27
|
+
options.each{|validation, value|
|
28
|
+
case validation
|
29
|
+
when 'type'
|
30
|
+
ValidateType.new(data_hash, field_to_validate, value, rules, errored_item).run
|
31
|
+
when 'value'
|
32
|
+
ValidateValue.new(data_hash, field_to_validate, value, errored_item).run
|
33
|
+
when 'length'
|
34
|
+
validate_length(data_hash, field_to_validate, value)
|
35
|
+
when /required|threshold|if/
|
36
|
+
nil
|
37
|
+
else
|
38
|
+
unknown_validation_error(validation) if validation !~ /format/
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
42
|
+
}
|
43
|
+
errors[:errored_items].push(errored_item) if errored_item && !errored_item[:failures].empty?
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def passes_required_check?(options, data_hash, field_to_validate)
|
48
|
+
if options['required'] == true
|
49
|
+
if options['if']
|
50
|
+
if pass_if?(options['if'], data_hash)
|
51
|
+
check_for_required_failure(data_hash, field_to_validate)
|
52
|
+
else
|
53
|
+
false
|
54
|
+
end
|
55
|
+
else
|
56
|
+
check_for_required_failure(data_hash, field_to_validate)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
false
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def check_for_required_failure(data_hash, field_to_validate)
|
64
|
+
if fails_required_check?(data_hash, field_to_validate)
|
65
|
+
add_errored_item(data_hash, field_to_validate, 'required')
|
66
|
+
false
|
67
|
+
else
|
68
|
+
true
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def fails_required_check?(data_hash, field_to_validate)
|
73
|
+
data_hash[field_to_validate].nil? || (data_hash[field_to_validate].class == String && data_hash[field_to_validate].empty?)
|
74
|
+
end
|
75
|
+
|
76
|
+
def validate_length(data_hash, field_to_validate, length)
|
77
|
+
add_errored_item(data_hash, field_to_validate, 'length') if data_hash[field_to_validate].to_s.length != length
|
78
|
+
end
|
79
|
+
|
80
|
+
def reset_errored_item
|
81
|
+
self.errored_item = { failures: {} }
|
82
|
+
end
|
83
|
+
|
84
|
+
def unknown_validation_error(validation)
|
85
|
+
raise StandardError.new("The validation '#{validation}' is unknown.")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateType
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
attr_reader :data_hash, :field_to_validate, :desired_type, :rules, :errored_item
|
7
|
+
|
8
|
+
def initialize(data_hash, field_to_validate, desired_type, rules, errored_item)
|
9
|
+
@data_hash = data_hash
|
10
|
+
@field_to_validate = field_to_validate
|
11
|
+
@desired_type = desired_type
|
12
|
+
@rules = rules
|
13
|
+
@errored_item = errored_item
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
handle_types
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def handle_types
|
23
|
+
case desired_type
|
24
|
+
when 'String'
|
25
|
+
add_errored_item(data_hash, field_to_validate, 'type') if data_hash[field_to_validate].class != String
|
26
|
+
when 'Integer'
|
27
|
+
add_errored_item(data_hash, field_to_validate, 'type') unless data_hash[field_to_validate].class == Fixnum || data_hash[field_to_validate].to_s.strip =~ /\A\d+(\,\d+)?\z/
|
28
|
+
when 'Float'
|
29
|
+
add_errored_item(data_hash, field_to_validate, 'type') unless data_hash[field_to_validate].class == Float || data_hash[field_to_validate].to_s.strip =~ /\A\.?\d+(\,\d+)?(\.\d+)?\z/
|
30
|
+
when 'Date'
|
31
|
+
validate_date_type
|
32
|
+
when 'Url'
|
33
|
+
add_errored_item(data_hash, field_to_validate, 'type') if data_hash[field_to_validate] !~ /^(http|https):\/\//
|
34
|
+
else
|
35
|
+
unknown_type_error(desired_type)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate_date_type
|
40
|
+
format = rules[field_to_validate]['format']
|
41
|
+
missing_date_format_error if format.nil?
|
42
|
+
date_str = data_hash[field_to_validate]
|
43
|
+
begin
|
44
|
+
date = Time.strptime(date_str, format.gsub('%-m', '%m').gsub('%-d', '%d'))
|
45
|
+
add_errored_item(data_hash, field_to_validate, 'type') if date.strftime(format) != date_str
|
46
|
+
rescue ArgumentError => e
|
47
|
+
if e.to_s =~ /^invalid strptime format/
|
48
|
+
add_errored_item(data_hash, field_to_validate, 'type')
|
49
|
+
else
|
50
|
+
raise StandardError.new(e.to_s)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def unknown_type_error(desired_type)
|
56
|
+
raise StandardError.new("The validation type '#{desired_type}' is unknown.")
|
57
|
+
end
|
58
|
+
|
59
|
+
def missing_date_format_error
|
60
|
+
raise StandardError.new("Date validation is missing a format.")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateValue
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
attr_reader :data_hash, :field_to_validate, :params, :errored_item
|
7
|
+
|
8
|
+
def initialize(data_hash, field_to_validate, params, errored_item)
|
9
|
+
@data_hash = data_hash
|
10
|
+
@field_to_validate = field_to_validate
|
11
|
+
@params = params
|
12
|
+
@errored_item = errored_item
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
if_exists? ? handle_if : main_value_check
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def if_exists?
|
22
|
+
!params['if'].nil?
|
23
|
+
end
|
24
|
+
|
25
|
+
def handle_if
|
26
|
+
main_value_check if pass_if?(params['if'], data_hash)
|
27
|
+
end
|
28
|
+
|
29
|
+
def main_value_check
|
30
|
+
if params['equal']
|
31
|
+
if equal_with_operators?
|
32
|
+
equal_with_operators
|
33
|
+
else
|
34
|
+
add_errored_item(data_hash, field_to_validate, 'value') if (data_hash[field_to_validate] != params['equal'])
|
35
|
+
end
|
36
|
+
elsif params['regex']
|
37
|
+
add_errored_item(data_hash, field_to_validate, 'value') if (data_hash[field_to_validate].to_s !~ Regexp.new(params['regex'], true))
|
38
|
+
elsif params['less_than']
|
39
|
+
add_errored_item(data_hash, field_to_validate, 'value') if !(data_hash[field_to_validate].to_i < params['less_than'].to_i)
|
40
|
+
elsif params['greater_than']
|
41
|
+
add_errored_item(data_hash, field_to_validate, 'value') if !(data_hash[field_to_validate].to_i > params['greater_than'].to_i)
|
42
|
+
else
|
43
|
+
unknown_value_error
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def equal_with_operators
|
48
|
+
case equal_operator
|
49
|
+
when 'or'
|
50
|
+
add_errored_item(data_hash, field_to_validate, 'value') if !or_statment
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def or_statment
|
55
|
+
eval or_vals.map{|val| "data_hash[field_to_validate] == #{val}" }.join(' || ')
|
56
|
+
end
|
57
|
+
|
58
|
+
def equal_with_operators?
|
59
|
+
params['equal'].class == Hash
|
60
|
+
end
|
61
|
+
|
62
|
+
def equal_operator
|
63
|
+
params['equal'].keys.first
|
64
|
+
end
|
65
|
+
|
66
|
+
def or_vals
|
67
|
+
params['equal']['or']
|
68
|
+
end
|
69
|
+
|
70
|
+
def unknown_value_error
|
71
|
+
raise StandardError.new("The value rule '#{params}' is unknown.")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class Validator
|
4
|
+
attr_reader :local_config, :data, :options, :errors
|
5
|
+
|
6
|
+
def initialize(data=nil, options={})
|
7
|
+
load_config
|
8
|
+
@options = options
|
9
|
+
@data = data
|
10
|
+
end
|
11
|
+
|
12
|
+
#this method is for validating "internal" scrapers that run on Datahen
|
13
|
+
def validate_internal(vars, outputs)
|
14
|
+
ValidateInternal.new(vars, config, outputs).run
|
15
|
+
end
|
16
|
+
|
17
|
+
#this method is for validating data from "external" sources
|
18
|
+
def validate_external(outputs, collection_name)
|
19
|
+
ValidateExternal.new(data, config, outputs, collection_name, options).run
|
20
|
+
end
|
21
|
+
|
22
|
+
# Configuration.
|
23
|
+
#
|
24
|
+
# @return [Hash]
|
25
|
+
def config
|
26
|
+
@config ||= local_config['qa']
|
27
|
+
end
|
28
|
+
|
29
|
+
# Configuration.
|
30
|
+
#
|
31
|
+
# @param [Hash] value Configuration.
|
32
|
+
#
|
33
|
+
# @return [Hash]
|
34
|
+
def config=value
|
35
|
+
@config = value
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def load_config
|
41
|
+
@local_config ||= DhEasy::Config::Local.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def config_path
|
45
|
+
local_config.file_path
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dh_easy-qa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.33
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Lynam
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-12-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: dh_easy-config
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.16'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov-console
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: DataHen Easy QA gem allows you to ensure the quality of output on Fetch
|
98
|
+
email:
|
99
|
+
- dlynam@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- bin/console
|
105
|
+
- bin/setup
|
106
|
+
- lib/dh_easy/qa.rb
|
107
|
+
- lib/dh_easy/qa/helpers.rb
|
108
|
+
- lib/dh_easy/qa/save_output.rb
|
109
|
+
- lib/dh_easy/qa/validate_external.rb
|
110
|
+
- lib/dh_easy/qa/validate_groups.rb
|
111
|
+
- lib/dh_easy/qa/validate_internal.rb
|
112
|
+
- lib/dh_easy/qa/validate_rules.rb
|
113
|
+
- lib/dh_easy/qa/validate_type.rb
|
114
|
+
- lib/dh_easy/qa/validate_value.rb
|
115
|
+
- lib/dh_easy/qa/validator.rb
|
116
|
+
- lib/dh_easy/qa/version.rb
|
117
|
+
homepage: https://datahen.com
|
118
|
+
licenses:
|
119
|
+
- MIT
|
120
|
+
metadata:
|
121
|
+
homepage_uri: https://datahen.com
|
122
|
+
source_code_uri: https://github.com/DataHenOfficial/dh_easy-qa
|
123
|
+
post_install_message:
|
124
|
+
rdoc_options: []
|
125
|
+
require_paths:
|
126
|
+
- lib
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 2.2.2
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
requirements: []
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.7.6
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
142
|
+
summary: DataHen Easy Quality Assurance gem
|
143
|
+
test_files: []
|