dh_easy-qa 0.0.33
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/dh_easy/qa.rb +17 -0
- data/lib/dh_easy/qa/helpers.rb +46 -0
- data/lib/dh_easy/qa/save_output.rb +158 -0
- data/lib/dh_easy/qa/validate_external.rb +30 -0
- data/lib/dh_easy/qa/validate_groups.rb +51 -0
- data/lib/dh_easy/qa/validate_internal.rb +172 -0
- data/lib/dh_easy/qa/validate_rules.rb +89 -0
- data/lib/dh_easy/qa/validate_type.rb +64 -0
- data/lib/dh_easy/qa/validate_value.rb +75 -0
- data/lib/dh_easy/qa/validator.rb +49 -0
- data/lib/dh_easy/qa/version.rb +5 -0
- metadata +143 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5f50c7fbde345982f84159b95991622284552a7c879cef3c41433afb40bd297b
|
4
|
+
data.tar.gz: a528d2eab5ad5ab35cae524ba67a10b07c4a2f58d60bdddd2f11b11af8276bd3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1cc9e15f9e2284744af36d311e185c8ee0df773e76e5b1d6d61a267158264efa63340c0227bdb68e2c1dac2e22a528cdc025204ccd669f140a9daad7f839cdd5
|
7
|
+
data.tar.gz: 62ddd23ffdb3fe48ba7380f6ba7ff5196e80ce6c3c72d79b4c3ae4640a797c70fef2a38b10e0047758f648f53eae2fc40fb6ceeefed60250cf584920e741a493
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "dh_easy/qa"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/dh_easy/qa.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'dh_easy/config'
|
3
|
+
require 'dh_easy/qa/helpers'
|
4
|
+
require 'dh_easy/qa/version'
|
5
|
+
require 'dh_easy/qa/validate_internal'
|
6
|
+
require 'dh_easy/qa/validate_external'
|
7
|
+
require 'dh_easy/qa/validate_rules'
|
8
|
+
require 'dh_easy/qa/validate_type'
|
9
|
+
require 'dh_easy/qa/validate_value'
|
10
|
+
require 'dh_easy/qa/validate_groups'
|
11
|
+
require 'dh_easy/qa/save_output'
|
12
|
+
require 'dh_easy/qa/validator'
|
13
|
+
|
14
|
+
module DhEasy
|
15
|
+
module Qa
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
module Helpers
|
4
|
+
def add_errored_item(data_hash, field_to_validate, validation)
|
5
|
+
error_name = "#{field_to_validate}_#{validation}"
|
6
|
+
errored_item[:failures][error_name.to_sym] = 'fail'
|
7
|
+
errored_item[:item] = data_hash if errored_item[:data].nil?
|
8
|
+
end
|
9
|
+
|
10
|
+
def pass_if?(if_params, data_hash)
|
11
|
+
case if_params.keys.first
|
12
|
+
when 'and', 'or'
|
13
|
+
operator = if_params.keys.first
|
14
|
+
evaluations = if_params[operator].collect{|child_if_params|
|
15
|
+
field_name = child_if_params.keys.first
|
16
|
+
condition_hash = child_if_params[field_name]
|
17
|
+
evaluate_condition(field_name, condition_hash, data_hash)
|
18
|
+
}
|
19
|
+
operator == 'and' ? evaluations.all? : evaluations.any?
|
20
|
+
else
|
21
|
+
field_name = if_params.keys.first
|
22
|
+
condition_hash = if_params[field_name]
|
23
|
+
evaluate_condition(field_name, condition_hash, data_hash)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def evaluate_condition(field_name, condition_hash, data_hash)
|
28
|
+
case condition_hash.keys.first
|
29
|
+
when 'value'
|
30
|
+
value_hash = condition_hash['value'] #Ex: {"equal"=>"A"}
|
31
|
+
if value_hash['equal']
|
32
|
+
data_hash[field_name] == value_hash['equal']
|
33
|
+
elsif value_hash['regex']
|
34
|
+
!(data_hash[field_name].to_s =~ Regexp.new(value_hash['regex'], true)).nil?
|
35
|
+
elsif value_hash['less_than']
|
36
|
+
data_hash[field_name].to_i < value_hash['less_than'].to_i
|
37
|
+
elsif value_hash['greater_than']
|
38
|
+
data_hash[field_name].to_i > value_hash['greater_than'].to_i
|
39
|
+
else
|
40
|
+
raise StandardError.new("The if condition '#{value_hash}' is unknown.")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class SaveOutput
|
4
|
+
attr_reader :total_items, :rules, :errors, :collection_name,
|
5
|
+
:outputs, :summary, :error_totals, :fields_to_ignore,
|
6
|
+
:specific_validations_to_ignore, :options
|
7
|
+
|
8
|
+
def initialize(total_items, rules, errors, collection_name, outputs, options)
|
9
|
+
@total_items = total_items
|
10
|
+
@rules = rules
|
11
|
+
@errors = errors
|
12
|
+
@collection_name = collection_name
|
13
|
+
@outputs = outputs
|
14
|
+
@options = options
|
15
|
+
@summary = Hash.new(0)
|
16
|
+
@error_totals = {}
|
17
|
+
@fields_to_ignore = []
|
18
|
+
@specific_validations_to_ignore = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
gather_threshold_totals
|
23
|
+
gather_validations_to_ignore
|
24
|
+
save_group_errors
|
25
|
+
save_errors
|
26
|
+
save_summary
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
#thresholds are a setting where you can ignore errors if they are under a specific error rate
|
32
|
+
def gather_threshold_totals
|
33
|
+
rules.each{|field_to_validate, field_options|
|
34
|
+
field_threshold = return_threshold(field_to_validate, field_options)
|
35
|
+
if field_threshold
|
36
|
+
gather_field_threshold_totals(field_to_validate, field_options)
|
37
|
+
else
|
38
|
+
gather_specific_validation_totals(field_to_validate, field_options)
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def gather_field_threshold_totals(field_to_validate, field_options)
|
44
|
+
error_total = errors[:errored_items].inject(0){|total, errored_item|
|
45
|
+
failed_fields = errored_item[:failures].keys.collect{|failure_key|
|
46
|
+
extract_field(failure_key)
|
47
|
+
}.uniq
|
48
|
+
total += 1 if failed_fields.include?(field_to_validate)
|
49
|
+
total
|
50
|
+
}
|
51
|
+
error_totals[field_to_validate] = error_total
|
52
|
+
end
|
53
|
+
|
54
|
+
def gather_specific_validation_totals(field_to_validate, field_options)
|
55
|
+
field_options.each do |validation|
|
56
|
+
potential_failure_name = "#{field_to_validate}_#{validation[0]}_fail"
|
57
|
+
if options['thresholds'] && options['thresholds'][potential_failure_name]
|
58
|
+
error_total = errors[:errored_items].inject(0){|total, errored_item|
|
59
|
+
failed_validations = errored_item[:failures].keys.collect{|failure_key|
|
60
|
+
"#{failure_key}_fail"
|
61
|
+
}
|
62
|
+
total += 1 if failed_validations.include?(potential_failure_name)
|
63
|
+
total
|
64
|
+
}
|
65
|
+
error_totals[potential_failure_name] = error_total
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def gather_validations_to_ignore
|
71
|
+
rules.each{|field_to_validate, field_options|
|
72
|
+
field_threshold = return_threshold(field_to_validate, field_options)
|
73
|
+
if field_threshold
|
74
|
+
gather_fields_to_ignore(field_to_validate, field_threshold)
|
75
|
+
else
|
76
|
+
gather_specific_validations_to_ignore(field_to_validate, field_options)
|
77
|
+
end
|
78
|
+
}
|
79
|
+
end
|
80
|
+
|
81
|
+
def gather_fields_to_ignore(field_to_validate, field_threshold)
|
82
|
+
total_errors = error_totals[field_to_validate]
|
83
|
+
if total_errors
|
84
|
+
success_ratio = (total_items - total_errors).to_f / total_items
|
85
|
+
if success_ratio > field_threshold.to_f
|
86
|
+
puts "Ignoring #{field_to_validate}"
|
87
|
+
fields_to_ignore.push(field_to_validate)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def gather_specific_validations_to_ignore(field_to_validate, field_options)
|
93
|
+
field_options.each do |validation|
|
94
|
+
potential_failure_name = "#{field_to_validate}_#{validation[0]}_fail"
|
95
|
+
total_errors = error_totals[potential_failure_name]
|
96
|
+
if total_errors
|
97
|
+
specific_validation_threshold = options['thresholds'][potential_failure_name].to_f
|
98
|
+
success_ratio = (total_items - total_errors).to_f / total_items
|
99
|
+
if success_ratio > specific_validation_threshold || (specific_validation_threshold == 0.0 && success_ratio == 0.0)
|
100
|
+
puts "Ignoring #{potential_failure_name}"
|
101
|
+
specific_validations_to_ignore.push(potential_failure_name)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def return_threshold(field_to_validate, field_options)
|
108
|
+
if options['thresholds']
|
109
|
+
options['thresholds'][field_to_validate]
|
110
|
+
else
|
111
|
+
field_options['threshold'] || options['threshold']
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def save_group_errors
|
116
|
+
errors.each{|error_name, output_hash|
|
117
|
+
if error_name != :errored_items
|
118
|
+
error_name = output_hash.delete(:failure)
|
119
|
+
summary["#{error_name}_fail"] = output_hash
|
120
|
+
end
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
def save_errors
|
125
|
+
errors[:errored_items].each do |errored_item|
|
126
|
+
remove_threshold_failures(errored_item) if fields_to_ignore.any? || specific_validations_to_ignore.any?
|
127
|
+
errored_item[:failures].each do |error_key, value|
|
128
|
+
key = "#{error_key.to_s}_#{value.to_s}"
|
129
|
+
summary[key] += 1
|
130
|
+
end
|
131
|
+
errored_item['_collection'] = collection_name
|
132
|
+
outputs << errored_item if errored_item[:failures].any?
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def remove_threshold_failures(errored_item)
|
137
|
+
errored_item[:failures].delete_if{|error_name, fail|
|
138
|
+
specific_validation_name = "#{error_name}_fail"
|
139
|
+
field_name = extract_field(error_name)
|
140
|
+
fields_to_ignore.include?(field_name) || specific_validations_to_ignore.include?(specific_validation_name)
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
def save_summary
|
145
|
+
summary['pass'] = 'true' if summary.empty?
|
146
|
+
summary['_collection'] = "#{collection_name}_summary"
|
147
|
+
summary['total_items'] = total_items
|
148
|
+
outputs << summary
|
149
|
+
end
|
150
|
+
|
151
|
+
def extract_field(field_sym)
|
152
|
+
a = field_sym.to_s.split('_')
|
153
|
+
a.pop
|
154
|
+
a.join('_')
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateExternal
|
4
|
+
attr_reader :data, :errors, :rules, :outputs, :collection_name, :options
|
5
|
+
|
6
|
+
def initialize(data, config, outputs, collection_name, options)
|
7
|
+
@data = data
|
8
|
+
@rules = config['individual_validations'] if config
|
9
|
+
@outputs = outputs
|
10
|
+
@collection_name = collection_name
|
11
|
+
@options = options
|
12
|
+
@errors = { errored_items: [] }
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
begin
|
17
|
+
if data.any?
|
18
|
+
ValidateGroups.new(data, nil, collection_name, errors).run
|
19
|
+
ValidateRules.new(data, errors, rules).run if rules
|
20
|
+
end
|
21
|
+
SaveOutput.new(data.count, rules, errors, collection_name, outputs, options).run
|
22
|
+
return errors
|
23
|
+
rescue StandardError => e
|
24
|
+
puts "An error has occurred: #{e}"
|
25
|
+
return nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateGroups
|
4
|
+
attr_reader :data, :scraper_name, :collection_name, :errors
|
5
|
+
|
6
|
+
def initialize(data, scraper_name, collection_name, errors)
|
7
|
+
@data = data
|
8
|
+
@scraper_name = scraper_name
|
9
|
+
@collection_name = collection_name
|
10
|
+
@errors = errors
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
puts "Running Group Validations"
|
15
|
+
if group_validations_present?
|
16
|
+
load_module
|
17
|
+
include_module
|
18
|
+
call_validation_methods
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def fail_validation(name)
|
25
|
+
errors[name.to_sym] = 'fail'
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_module
|
29
|
+
load group_validations_path
|
30
|
+
end
|
31
|
+
|
32
|
+
def include_module
|
33
|
+
self.class.send(:include, GroupValidations)
|
34
|
+
end
|
35
|
+
|
36
|
+
def call_validation_methods
|
37
|
+
GroupValidations.public_instance_methods.each do |method|
|
38
|
+
self.send(method)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def group_validations_present?
|
43
|
+
File.exists?(group_validations_path)
|
44
|
+
end
|
45
|
+
|
46
|
+
def group_validations_path
|
47
|
+
@group_validations_path ||= File.expand_path('group_validations.rb', Dir.pwd)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateInternal
|
4
|
+
attr_reader :scraper_name, :collections, :rules, :outputs
|
5
|
+
|
6
|
+
def initialize(vars, config, outputs)
|
7
|
+
@scraper_name = vars['scraper_name']
|
8
|
+
@collections = vars['collections']
|
9
|
+
@rules = config['individual_validations']
|
10
|
+
@outputs = outputs
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
begin
|
15
|
+
ValidateScraper.new(scraper_name, collections, rules, outputs, thresholds).run
|
16
|
+
rescue StandardError => e
|
17
|
+
puts "An error has occurred: #{e}"
|
18
|
+
return nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
#thresholds are a setting where you can suppress errors if they are under a specific error rate
|
25
|
+
def thresholds
|
26
|
+
@thresholds ||= begin
|
27
|
+
file_path = File.expand_path('thresholds.yaml', Dir.pwd)
|
28
|
+
if File.exists? file_path
|
29
|
+
YAML.load(File.open(file_path))
|
30
|
+
else
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class ValidateScraper
|
38
|
+
attr_reader :scraper_name, :collections, :rules, :outputs, :options
|
39
|
+
|
40
|
+
def initialize(scraper_name, collections, rules, outputs, thresholds)
|
41
|
+
@scraper_name = scraper_name
|
42
|
+
@collections = collections
|
43
|
+
@rules = rules
|
44
|
+
@outputs = outputs
|
45
|
+
@options = {}
|
46
|
+
options['thresholds'] = thresholds[scraper_name] if thresholds && thresholds[scraper_name]
|
47
|
+
end
|
48
|
+
|
49
|
+
def run
|
50
|
+
begin
|
51
|
+
output_scraper
|
52
|
+
if status_ok?
|
53
|
+
validate_collections if collections && collections.any?
|
54
|
+
else
|
55
|
+
output_response
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
rescue StandardError => e
|
59
|
+
puts "An error has occurred for the scraper named '#{scraper_name}': #{e}"
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def output_scraper
|
67
|
+
puts "validating scraper: #{scraper_name}"
|
68
|
+
end
|
69
|
+
|
70
|
+
def status_ok?
|
71
|
+
!collection_response.parsed_response.nil? && collection_response.code == 200
|
72
|
+
end
|
73
|
+
|
74
|
+
def validate_collections
|
75
|
+
collections.each do |collection_name|
|
76
|
+
collection = collection_counts.find{|collection_hash| collection_hash['collection'] == collection_name }
|
77
|
+
if collection
|
78
|
+
ValidateCollection.new(scraper_name, collection_name, collection['outputs'], rules, outputs, options).run
|
79
|
+
else
|
80
|
+
puts "collection #{collection_name} is missing"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def output_response
|
86
|
+
if collection_response.parsed_response.nil?
|
87
|
+
puts "collection response is null"
|
88
|
+
else
|
89
|
+
puts collection_response.parsed_response['message']
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def collection_counts
|
94
|
+
@collection_counts ||= collection_response.parsed_response
|
95
|
+
end
|
96
|
+
|
97
|
+
def collection_response
|
98
|
+
@collection_response || Datahen::Client::ScraperJobOutput.new.collections(scraper_name)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class ValidateCollection
|
103
|
+
attr_reader :scraper_name, :collection_name, :total_records, :rules, :errors, :outputs, :options
|
104
|
+
|
105
|
+
def initialize(scraper_name, collection_name, total_records, rules, outputs, options)
|
106
|
+
@scraper_name = scraper_name
|
107
|
+
@collection_name = collection_name
|
108
|
+
@total_records = total_records
|
109
|
+
@rules = rules
|
110
|
+
@outputs = outputs
|
111
|
+
@options = options
|
112
|
+
@errors = { errored_items: [] }
|
113
|
+
end
|
114
|
+
|
115
|
+
def run
|
116
|
+
output_collection
|
117
|
+
if most_recent_finished_job
|
118
|
+
puts "data count #{data.count}"
|
119
|
+
if data.any?
|
120
|
+
ValidateGroups.new(data, scraper_name, collection_name, errors).run
|
121
|
+
ValidateRules.new(data, errors, rules).run if rules
|
122
|
+
end
|
123
|
+
SaveOutput.new(data.count, rules, errors, outputs_collection_name, outputs, options).run
|
124
|
+
else
|
125
|
+
puts "No job with status 'done' available"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def output_collection
|
132
|
+
puts "Validating collection: #{collection_name}"
|
133
|
+
end
|
134
|
+
|
135
|
+
def most_recent_finished_job
|
136
|
+
@most_recent_finished_job ||= begin
|
137
|
+
jobs_response = Datahen::Client::ScraperJob.new.all(scraper_name)
|
138
|
+
if jobs_response.code == 200
|
139
|
+
jobs_response.parsed_response.sort_by { |job| job['created_at'] }.reverse.find{|job| job['status'] == 'active' || job['status'] == 'done' }
|
140
|
+
else
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def data
|
147
|
+
@data ||= begin
|
148
|
+
data = []
|
149
|
+
page = 1
|
150
|
+
while data.count < total_records
|
151
|
+
records = Datahen::Client::JobOutput.new(per_page:500, page: page).all(most_recent_finished_job['id'], collection_name).parsed_response
|
152
|
+
sleep 1
|
153
|
+
if records
|
154
|
+
records.each do |record|
|
155
|
+
data << record
|
156
|
+
end
|
157
|
+
else
|
158
|
+
puts "All ScraperJobOutput request was nil. Total records: #{total_records}, data count: #{data.count}, page: #{page}"
|
159
|
+
break
|
160
|
+
end
|
161
|
+
page += 1
|
162
|
+
end
|
163
|
+
data
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def outputs_collection_name
|
168
|
+
@outputs_collection_name ||= "#{scraper_name}_#{collection_name}"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateRules
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
attr_reader :data, :errors, :rules
|
7
|
+
attr_accessor :errored_item
|
8
|
+
|
9
|
+
def initialize(data, errors, rules)
|
10
|
+
@data = data
|
11
|
+
@errors = errors
|
12
|
+
@rules = rules
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
puts "Validating Rules"
|
17
|
+
handle_rules
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def handle_rules
|
23
|
+
data.each do |data_hash|
|
24
|
+
reset_errored_item
|
25
|
+
rules.each{|field_to_validate, options|
|
26
|
+
if passes_required_check?(options, data_hash, field_to_validate)
|
27
|
+
options.each{|validation, value|
|
28
|
+
case validation
|
29
|
+
when 'type'
|
30
|
+
ValidateType.new(data_hash, field_to_validate, value, rules, errored_item).run
|
31
|
+
when 'value'
|
32
|
+
ValidateValue.new(data_hash, field_to_validate, value, errored_item).run
|
33
|
+
when 'length'
|
34
|
+
validate_length(data_hash, field_to_validate, value)
|
35
|
+
when /required|threshold|if/
|
36
|
+
nil
|
37
|
+
else
|
38
|
+
unknown_validation_error(validation) if validation !~ /format/
|
39
|
+
end
|
40
|
+
}
|
41
|
+
end
|
42
|
+
}
|
43
|
+
errors[:errored_items].push(errored_item) if errored_item && !errored_item[:failures].empty?
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def passes_required_check?(options, data_hash, field_to_validate)
|
48
|
+
if options['required'] == true
|
49
|
+
if options['if']
|
50
|
+
if pass_if?(options['if'], data_hash)
|
51
|
+
check_for_required_failure(data_hash, field_to_validate)
|
52
|
+
else
|
53
|
+
false
|
54
|
+
end
|
55
|
+
else
|
56
|
+
check_for_required_failure(data_hash, field_to_validate)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
false
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def check_for_required_failure(data_hash, field_to_validate)
|
64
|
+
if fails_required_check?(data_hash, field_to_validate)
|
65
|
+
add_errored_item(data_hash, field_to_validate, 'required')
|
66
|
+
false
|
67
|
+
else
|
68
|
+
true
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def fails_required_check?(data_hash, field_to_validate)
|
73
|
+
data_hash[field_to_validate].nil? || (data_hash[field_to_validate].class == String && data_hash[field_to_validate].empty?)
|
74
|
+
end
|
75
|
+
|
76
|
+
def validate_length(data_hash, field_to_validate, length)
|
77
|
+
add_errored_item(data_hash, field_to_validate, 'length') if data_hash[field_to_validate].to_s.length != length
|
78
|
+
end
|
79
|
+
|
80
|
+
def reset_errored_item
|
81
|
+
self.errored_item = { failures: {} }
|
82
|
+
end
|
83
|
+
|
84
|
+
def unknown_validation_error(validation)
|
85
|
+
raise StandardError.new("The validation '#{validation}' is unknown.")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateType
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
attr_reader :data_hash, :field_to_validate, :desired_type, :rules, :errored_item
|
7
|
+
|
8
|
+
def initialize(data_hash, field_to_validate, desired_type, rules, errored_item)
|
9
|
+
@data_hash = data_hash
|
10
|
+
@field_to_validate = field_to_validate
|
11
|
+
@desired_type = desired_type
|
12
|
+
@rules = rules
|
13
|
+
@errored_item = errored_item
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
handle_types
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def handle_types
|
23
|
+
case desired_type
|
24
|
+
when 'String'
|
25
|
+
add_errored_item(data_hash, field_to_validate, 'type') if data_hash[field_to_validate].class != String
|
26
|
+
when 'Integer'
|
27
|
+
add_errored_item(data_hash, field_to_validate, 'type') unless data_hash[field_to_validate].class == Fixnum || data_hash[field_to_validate].to_s.strip =~ /\A\d+(\,\d+)?\z/
|
28
|
+
when 'Float'
|
29
|
+
add_errored_item(data_hash, field_to_validate, 'type') unless data_hash[field_to_validate].class == Float || data_hash[field_to_validate].to_s.strip =~ /\A\.?\d+(\,\d+)?(\.\d+)?\z/
|
30
|
+
when 'Date'
|
31
|
+
validate_date_type
|
32
|
+
when 'Url'
|
33
|
+
add_errored_item(data_hash, field_to_validate, 'type') if data_hash[field_to_validate] !~ /^(http|https):\/\//
|
34
|
+
else
|
35
|
+
unknown_type_error(desired_type)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate_date_type
|
40
|
+
format = rules[field_to_validate]['format']
|
41
|
+
missing_date_format_error if format.nil?
|
42
|
+
date_str = data_hash[field_to_validate]
|
43
|
+
begin
|
44
|
+
date = Time.strptime(date_str, format.gsub('%-m', '%m').gsub('%-d', '%d'))
|
45
|
+
add_errored_item(data_hash, field_to_validate, 'type') if date.strftime(format) != date_str
|
46
|
+
rescue ArgumentError => e
|
47
|
+
if e.to_s =~ /^invalid strptime format/
|
48
|
+
add_errored_item(data_hash, field_to_validate, 'type')
|
49
|
+
else
|
50
|
+
raise StandardError.new(e.to_s)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def unknown_type_error(desired_type)
|
56
|
+
raise StandardError.new("The validation type '#{desired_type}' is unknown.")
|
57
|
+
end
|
58
|
+
|
59
|
+
def missing_date_format_error
|
60
|
+
raise StandardError.new("Date validation is missing a format.")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class ValidateValue
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
attr_reader :data_hash, :field_to_validate, :params, :errored_item
|
7
|
+
|
8
|
+
def initialize(data_hash, field_to_validate, params, errored_item)
|
9
|
+
@data_hash = data_hash
|
10
|
+
@field_to_validate = field_to_validate
|
11
|
+
@params = params
|
12
|
+
@errored_item = errored_item
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
if_exists? ? handle_if : main_value_check
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def if_exists?
|
22
|
+
!params['if'].nil?
|
23
|
+
end
|
24
|
+
|
25
|
+
def handle_if
|
26
|
+
main_value_check if pass_if?(params['if'], data_hash)
|
27
|
+
end
|
28
|
+
|
29
|
+
def main_value_check
|
30
|
+
if params['equal']
|
31
|
+
if equal_with_operators?
|
32
|
+
equal_with_operators
|
33
|
+
else
|
34
|
+
add_errored_item(data_hash, field_to_validate, 'value') if (data_hash[field_to_validate] != params['equal'])
|
35
|
+
end
|
36
|
+
elsif params['regex']
|
37
|
+
add_errored_item(data_hash, field_to_validate, 'value') if (data_hash[field_to_validate].to_s !~ Regexp.new(params['regex'], true))
|
38
|
+
elsif params['less_than']
|
39
|
+
add_errored_item(data_hash, field_to_validate, 'value') if !(data_hash[field_to_validate].to_i < params['less_than'].to_i)
|
40
|
+
elsif params['greater_than']
|
41
|
+
add_errored_item(data_hash, field_to_validate, 'value') if !(data_hash[field_to_validate].to_i > params['greater_than'].to_i)
|
42
|
+
else
|
43
|
+
unknown_value_error
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def equal_with_operators
|
48
|
+
case equal_operator
|
49
|
+
when 'or'
|
50
|
+
add_errored_item(data_hash, field_to_validate, 'value') if !or_statment
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def or_statment
|
55
|
+
eval or_vals.map{|val| "data_hash[field_to_validate] == #{val}" }.join(' || ')
|
56
|
+
end
|
57
|
+
|
58
|
+
def equal_with_operators?
|
59
|
+
params['equal'].class == Hash
|
60
|
+
end
|
61
|
+
|
62
|
+
def equal_operator
|
63
|
+
params['equal'].keys.first
|
64
|
+
end
|
65
|
+
|
66
|
+
def or_vals
|
67
|
+
params['equal']['or']
|
68
|
+
end
|
69
|
+
|
70
|
+
def unknown_value_error
|
71
|
+
raise StandardError.new("The value rule '#{params}' is unknown.")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module DhEasy
|
2
|
+
module Qa
|
3
|
+
class Validator
|
4
|
+
attr_reader :local_config, :data, :options, :errors
|
5
|
+
|
6
|
+
def initialize(data=nil, options={})
|
7
|
+
load_config
|
8
|
+
@options = options
|
9
|
+
@data = data
|
10
|
+
end
|
11
|
+
|
12
|
+
#this method is for validating "internal" scrapers that run on Datahen
|
13
|
+
def validate_internal(vars, outputs)
|
14
|
+
ValidateInternal.new(vars, config, outputs).run
|
15
|
+
end
|
16
|
+
|
17
|
+
#this method is for validating data from "external" sources
|
18
|
+
def validate_external(outputs, collection_name)
|
19
|
+
ValidateExternal.new(data, config, outputs, collection_name, options).run
|
20
|
+
end
|
21
|
+
|
22
|
+
# Configuration.
|
23
|
+
#
|
24
|
+
# @return [Hash]
|
25
|
+
def config
|
26
|
+
@config ||= local_config['qa']
|
27
|
+
end
|
28
|
+
|
29
|
+
# Configuration.
|
30
|
+
#
|
31
|
+
# @param [Hash] value Configuration.
|
32
|
+
#
|
33
|
+
# @return [Hash]
|
34
|
+
def config=value
|
35
|
+
@config = value
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def load_config
|
41
|
+
@local_config ||= DhEasy::Config::Local.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def config_path
|
45
|
+
local_config.file_path
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dh_easy-qa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.33
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Lynam
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-12-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: dh_easy-config
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.16'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov-console
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: DataHen Easy QA gem allows you to ensure the quality of output on Fetch
|
98
|
+
email:
|
99
|
+
- dlynam@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- bin/console
|
105
|
+
- bin/setup
|
106
|
+
- lib/dh_easy/qa.rb
|
107
|
+
- lib/dh_easy/qa/helpers.rb
|
108
|
+
- lib/dh_easy/qa/save_output.rb
|
109
|
+
- lib/dh_easy/qa/validate_external.rb
|
110
|
+
- lib/dh_easy/qa/validate_groups.rb
|
111
|
+
- lib/dh_easy/qa/validate_internal.rb
|
112
|
+
- lib/dh_easy/qa/validate_rules.rb
|
113
|
+
- lib/dh_easy/qa/validate_type.rb
|
114
|
+
- lib/dh_easy/qa/validate_value.rb
|
115
|
+
- lib/dh_easy/qa/validator.rb
|
116
|
+
- lib/dh_easy/qa/version.rb
|
117
|
+
homepage: https://datahen.com
|
118
|
+
licenses:
|
119
|
+
- MIT
|
120
|
+
metadata:
|
121
|
+
homepage_uri: https://datahen.com
|
122
|
+
source_code_uri: https://github.com/DataHenOfficial/dh_easy-qa
|
123
|
+
post_install_message:
|
124
|
+
rdoc_options: []
|
125
|
+
require_paths:
|
126
|
+
- lib
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 2.2.2
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
requirements: []
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.7.6
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
142
|
+
summary: DataHen Easy Quality Assurance gem
|
143
|
+
test_files: []
|