turbot-runner 0.1.20 → 0.1.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/turbot_runner/base_handler.rb +1 -1
- data/lib/turbot_runner/processor.rb +93 -17
- data/lib/turbot_runner/utils.rb +9 -0
- data/lib/turbot_runner/validator.rb +74 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +2 -0
- data/schema/schemas/financial-payment-schema.json +2 -4
- data/schema/schemas/licence-schema.json +4 -4
- data/schema/schemas/primary-data-schema.json +2 -1
- data/schema/schemas/rich-licence-schema.json +3 -0
- data/schema/schemas/simple-financial-payment-schema.json +2 -1
- data/schema/schemas/simple-licence-schema.json +2 -2
- data/schema/schemas/simple-subsidiary-schema.json +5 -3
- data/schema/schemas/subsidiary-relationship-schema.json +4 -4
- data/spec/lib/processor_spec.rb +141 -6
- data/spec/lib/runner_spec.rb +1 -1
- data/spec/lib/validator_spec.rb +235 -0
- data/spec/lib/validators_spec.rb +48 -0
- data/spec/spec_helper.rb +20 -0
- metadata +68 -85
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OTBiNjljYWU5MzdhMzk4ZjJiMGI4MmNkMzJiZTU4MjIwYjIyNGZhZg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ODEzZmE5ZjBlMDQ0MzY5MWQyODlkN2RjZDU0Y2UyODI3ODVmOWZkYw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZGQwMzJhZWY1NzUwOGYyZDQ1YjEyZjEyY2M1OGQ5YzY2YmU1ZjhlZjZlN2Y5
|
10
|
+
YmNjN2FiYTFkZWUxNzUyMDQ0MDIyYzA2OTI1MzEyNzA3ZGNjM2ViMDY4NGQ3
|
11
|
+
MWExM2JiMzkyOWE3MzExODMyNDMyNzYxMTEzNGU1YjNmYTcyODU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
Y2U0NGY0MDFmMzk4MTNhMWYxYjQwMzJhYmVlMjg5MGVkNDRhYWQ0NDBkZGQy
|
14
|
+
MzlkMmQyYjY0NTFkMjRjOGQ0MjFlN2Y0NDkxZWVkMjRiNjcyYjg5ODZiZGNi
|
15
|
+
MzZmNDQxOWM2YTg0YTI5NjQ5MGE0NzAwZWFiNmVmY2I5NWRkNTg=
|
@@ -17,16 +17,22 @@ module TurbotRunner
|
|
17
17
|
@runner.interrupt if @runner
|
18
18
|
else
|
19
19
|
record = JSON.parse(line)
|
20
|
-
errors = validate(record)
|
21
20
|
|
22
|
-
|
21
|
+
begin
|
22
|
+
converted_record = convert_record(record)
|
23
|
+
error_message = validate(converted_record)
|
24
|
+
rescue ConversionError => e
|
25
|
+
error_message = e.message
|
26
|
+
end
|
27
|
+
|
28
|
+
if error_message.nil?
|
23
29
|
begin
|
24
|
-
@record_handler.handle_valid_record(
|
30
|
+
@record_handler.handle_valid_record(converted_record, @data_type)
|
25
31
|
rescue InterruptRun
|
26
32
|
@runner.interrupt if @runner
|
27
33
|
end
|
28
34
|
else
|
29
|
-
@record_handler.handle_invalid_record(record, @data_type,
|
35
|
+
@record_handler.handle_invalid_record(record, @data_type, error_message)
|
30
36
|
@runner.interrupt_and_mark_as_failed if @runner
|
31
37
|
end
|
32
38
|
end
|
@@ -40,37 +46,107 @@ module TurbotRunner
|
|
40
46
|
@runner.interrupt
|
41
47
|
end
|
42
48
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
+
def convert_record(record)
|
50
|
+
converted_record = Utils.deep_copy(record)
|
51
|
+
|
52
|
+
date_paths.each do |path|
|
53
|
+
begin
|
54
|
+
tmp = converted_record
|
55
|
+
|
56
|
+
path[0...-1].each do |path_item|
|
57
|
+
tmp = tmp[path_item]
|
58
|
+
end
|
59
|
+
|
60
|
+
value = tmp[path[-1]]
|
61
|
+
rescue NoMethodError
|
62
|
+
next
|
63
|
+
end
|
64
|
+
|
65
|
+
next unless value.is_a?(String)
|
66
|
+
|
67
|
+
if value == ''
|
68
|
+
tmp.delete(path[-1])
|
49
69
|
else
|
50
|
-
|
70
|
+
begin
|
71
|
+
tmp[path[-1]] = Date.strptime(value, '%Y-%m-%d').strftime('%Y-%m-%d')
|
72
|
+
rescue ArgumentError
|
73
|
+
raise ConversionError.new("Property not a valid date: #{path.join('.')}")
|
74
|
+
end
|
51
75
|
end
|
52
76
|
end
|
53
77
|
|
54
|
-
|
78
|
+
converted_record
|
79
|
+
end
|
80
|
+
|
81
|
+
def date_paths
|
82
|
+
@date_paths ||= get_date_paths(schema['properties'])
|
83
|
+
end
|
84
|
+
|
85
|
+
def get_date_paths(properties)
|
86
|
+
date_paths = []
|
87
|
+
|
88
|
+
properties.each do |name, attrs|
|
89
|
+
if attrs['format'] == 'date'
|
90
|
+
date_paths << [name]
|
91
|
+
elsif attrs['type'] == 'object'
|
92
|
+
get_date_paths(attrs['properties']).each do |path|
|
93
|
+
date_paths << [name] + path
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
date_paths
|
99
|
+
end
|
100
|
+
|
101
|
+
def validate(record)
|
102
|
+
error = Validator.validate(schema, record)
|
103
|
+
|
104
|
+
message = nil
|
105
|
+
|
106
|
+
if error.nil?
|
55
107
|
identifying_attributes = record.reject do |k, v|
|
56
108
|
!@identifying_fields.include?(k) || v.nil? || v == ''
|
57
109
|
end
|
58
110
|
|
59
111
|
if identifying_attributes.empty?
|
60
|
-
|
112
|
+
message = "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
|
113
|
+
end
|
114
|
+
else
|
115
|
+
message = case error[:type]
|
116
|
+
when :missing
|
117
|
+
"Missing required property: #{error[:path]}"
|
118
|
+
when :one_of_no_matches
|
119
|
+
"No match for property: #{error[:path]}"
|
120
|
+
when :one_of_many_matches
|
121
|
+
"Multiple possible matches for property: #{error[:path]}"
|
122
|
+
when :too_short
|
123
|
+
"Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
|
124
|
+
when :too_long
|
125
|
+
"Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
|
126
|
+
when :type_mismatch
|
127
|
+
"Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
|
128
|
+
when :enum_mismatch
|
129
|
+
"Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
|
130
|
+
when :format_mismatch
|
131
|
+
"Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
|
132
|
+
when :unknown
|
133
|
+
error[:message]
|
61
134
|
end
|
62
135
|
end
|
63
136
|
|
64
|
-
|
137
|
+
message
|
65
138
|
end
|
66
139
|
|
67
140
|
def schema
|
68
|
-
@schema ||=
|
141
|
+
@schema ||= load_schema
|
69
142
|
end
|
70
143
|
|
71
|
-
def
|
144
|
+
def load_schema
|
72
145
|
hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
73
|
-
File.expand_path("../../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
146
|
+
path = File.expand_path("../../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
147
|
+
JSON.load(File.read(path))
|
74
148
|
end
|
149
|
+
|
150
|
+
class ConversionError < StandardError; end
|
75
151
|
end
|
76
152
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'json-schema'
|
2
|
+
|
3
|
+
module TurbotRunner
|
4
|
+
module Validator
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def validate(schema, record)
|
8
|
+
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
9
|
+
|
10
|
+
# For now, we just handle the first error.
|
11
|
+
error = errors[0]
|
12
|
+
return if error.nil?
|
13
|
+
|
14
|
+
case error[:failed_attribute]
|
15
|
+
when 'Required'
|
16
|
+
match = error[:message].match(/required property of '(.*)'/)
|
17
|
+
missing_property = match[1]
|
18
|
+
path = fragment_to_path("#{error[:fragment]}/#{missing_property}")
|
19
|
+
|
20
|
+
{:type => :missing, :path => path}
|
21
|
+
when 'OneOf'
|
22
|
+
if error[:message].match(/did not match any/)
|
23
|
+
path_elements = fragment_to_path(error[:fragment]).split('.')
|
24
|
+
|
25
|
+
raise "Deeply nested OneOf error at: #{error[:fragment]}" unless path_elements.size == 1
|
26
|
+
|
27
|
+
record_fragment = record[path_elements[0]]
|
28
|
+
schema_fragments = schema['properties'][path_elements[0]]['oneOf']
|
29
|
+
|
30
|
+
schema_fragments.each do |s|
|
31
|
+
s['properties'].each do |k, v|
|
32
|
+
next if v['enum'].nil?
|
33
|
+
|
34
|
+
if v['enum'].include?(record_fragment[k])
|
35
|
+
error1 = validate(s, record_fragment)
|
36
|
+
return error1.merge(:path => "#{path_elements[0]}.#{error1[:path]}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
{:type => :one_of_no_matches, :path => fragment_to_path(error[:fragment])}
|
42
|
+
else
|
43
|
+
{:type => :one_of_many_matches, :path => fragment_to_path(error[:fragment])}
|
44
|
+
end
|
45
|
+
when 'MinLength'
|
46
|
+
match = error[:message].match(/minimum string length of (\d+) in/)
|
47
|
+
min_length = match[1].to_i
|
48
|
+
{:type => :too_short, :path => fragment_to_path(error[:fragment]), :length => min_length}
|
49
|
+
when 'MaxLength'
|
50
|
+
match = error[:message].match(/maximum string length of (\d+) in/)
|
51
|
+
max_length = match[1].to_i
|
52
|
+
{:type => :too_long, :path => fragment_to_path(error[:fragment]), :length => max_length}
|
53
|
+
when 'TypeV4'
|
54
|
+
match = error[:message].match(/the following types?: ([\w\s,]+) in schema/)
|
55
|
+
allowed_types = match[1].split(',').map(&:strip)
|
56
|
+
{:type => :type_mismatch, :path => fragment_to_path(error[:fragment]), :allowed_types => allowed_types}
|
57
|
+
when 'Enum'
|
58
|
+
match = error[:message].match(/the following values: ([\w\s,]+) in schema/)
|
59
|
+
allowed_values = match[1].split(',').map(&:strip)
|
60
|
+
{:type => :enum_mismatch, :path => fragment_to_path(error[:fragment]), :allowed_values => allowed_values}
|
61
|
+
else
|
62
|
+
if error[:message].match(/must be of format yyyy-mm-dd/)
|
63
|
+
{:type => :format_mismatch, :path => fragment_to_path(error[:fragment]), :expected_format => 'yyyy-mm-dd'}
|
64
|
+
else
|
65
|
+
{:type => :unknown, :path => fragment_to_path(error[:fragment]), :failed_attribute => error[:failed_attribute], :message => error[:message]}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def fragment_to_path(fragment)
|
71
|
+
fragment.sub(/^#?\/*/, '').gsub('/', '.')
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/turbot_runner.rb
CHANGED
@@ -3,5 +3,7 @@ require 'turbot_runner/exceptions'
|
|
3
3
|
require 'turbot_runner/processor'
|
4
4
|
require 'turbot_runner/runner'
|
5
5
|
require 'turbot_runner/script_runner'
|
6
|
+
require 'turbot_runner/utils'
|
7
|
+
require 'turbot_runner/validator'
|
6
8
|
require 'turbot_runner/validators'
|
7
9
|
require 'turbot_runner/version'
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
5
5
|
"type": "object",
|
6
6
|
"properties": {
|
7
|
+
"sample_date": {"type": "string", "format": "date"},
|
7
8
|
"source_jurisdiction": {
|
8
9
|
"description": "Jurisdiction of the source of the data",
|
9
10
|
"type": "string"
|
@@ -20,8 +21,5 @@
|
|
20
21
|
"additionalItems": false
|
21
22
|
}
|
22
23
|
},
|
23
|
-
"required": [
|
24
|
-
"company",
|
25
|
-
"data"
|
26
|
-
]
|
24
|
+
"required": ["company", "data", "sample_date"]
|
27
25
|
}
|
@@ -4,6 +4,9 @@
|
|
4
4
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
5
5
|
"type": "object",
|
6
6
|
"properties": {
|
7
|
+
"sample_date": {"type": "string", "format": "date"},
|
8
|
+
"start_date": {"type": "string", "format": "date"},
|
9
|
+
"end_date": {"type": "string", "format": "date"},
|
7
10
|
"source_jurisdiction": {
|
8
11
|
"description": "Jurisdiction of the source of the data",
|
9
12
|
"type": "string"
|
@@ -20,8 +23,5 @@
|
|
20
23
|
"additionalItems": false
|
21
24
|
}
|
22
25
|
},
|
23
|
-
"required": [
|
24
|
-
"company",
|
25
|
-
"data"
|
26
|
-
]
|
26
|
+
"required": ["company", "data", "sample_date"]
|
27
27
|
}
|
@@ -3,6 +3,9 @@
|
|
3
3
|
"description": "A Licence is a permission for an entity to do something that would otherwise not be permitted. Note that such permissions are recorded in multiple different ways and to different granularity. In addition categories cross over, for example a pornborker may be regulated as a retail establishment by a city, but also as a financial institution by a national or regional financial regulator. Sometimes the licence is a simple statement – the government of Rwanda has given a banking licence to a certain bank -- other times it is fine-grained and highly complex (e.g. an extractives licence or the UK FCA licencing scheme). This schema tries to make it easy to submit the former, while not losing granularity of the latter. The object has the name of Rich Licence to distinguish it from Simple Licence, which it is expected to ultimately replace.",
|
4
4
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
5
5
|
"properties": {
|
6
|
+
"sample_date": {"type": "string", "format": "date"},
|
7
|
+
"start_date": {"type": "string", "format": "date"},
|
8
|
+
"end_date": {"type": "string", "format": "date"},
|
6
9
|
"licence_holder": {
|
7
10
|
"entity": {
|
8
11
|
"oneOf": [
|
@@ -11,15 +11,17 @@
|
|
11
11
|
"sample_date": {
|
12
12
|
"description": "Date on which this fact was known to be true",
|
13
13
|
"type": "string",
|
14
|
-
"
|
14
|
+
"format": "date"
|
15
15
|
},
|
16
16
|
"start_date": {
|
17
17
|
"description": "Earliest known date this was known to be a subsidiary",
|
18
|
-
"type": "string"
|
18
|
+
"type": "string",
|
19
|
+
"format": "date"
|
19
20
|
},
|
20
21
|
"end_date": {
|
21
22
|
"description": "Latest known date this was known to be a subsidiary",
|
22
|
-
"type": "string"
|
23
|
+
"type": "string",
|
24
|
+
"format": "date"
|
23
25
|
},
|
24
26
|
"confidence": {
|
25
27
|
"description": "Confidence in accuracy of data",
|
@@ -4,6 +4,9 @@
|
|
4
4
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
5
5
|
"type": "object",
|
6
6
|
"properties": {
|
7
|
+
"sample_date": {"type": "string", "format": "date"},
|
8
|
+
"start_date": {"type": "string", "format": "date"},
|
9
|
+
"end_date": {"type": "string", "format": "date"},
|
7
10
|
"source_jurisdiction": {
|
8
11
|
"description": "Jurisdiction of the source of the data",
|
9
12
|
"type": "string"
|
@@ -20,8 +23,5 @@
|
|
20
23
|
"additionalItems": false
|
21
24
|
}
|
22
25
|
},
|
23
|
-
"required": [
|
24
|
-
"company",
|
25
|
-
"data"
|
26
|
-
]
|
26
|
+
"required": ["company", "data", "sample_date"]
|
27
27
|
}
|
data/spec/lib/processor_spec.rb
CHANGED
@@ -26,9 +26,9 @@ describe TurbotRunner::Processor do
|
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'calls Handler#handle_invalid_record' do
|
29
|
-
|
29
|
+
expected_error = 'Missing required property: source_url'
|
30
30
|
expect(@handler).to receive(:handle_invalid_record).
|
31
|
-
with(@record, @data_type,
|
31
|
+
with(@record, @data_type, expected_error)
|
32
32
|
@processor.process(@record.to_json)
|
33
33
|
end
|
34
34
|
end
|
@@ -62,9 +62,9 @@ describe TurbotRunner::Processor do
|
|
62
62
|
end
|
63
63
|
|
64
64
|
it 'calls Handler#handle_invalid_record' do
|
65
|
-
|
65
|
+
expected_error = 'Missing required property: source_url'
|
66
66
|
expect(@handler).to receive(:handle_invalid_record).
|
67
|
-
with(@record, @data_type,
|
67
|
+
with(@record, @data_type, expected_error)
|
68
68
|
@processor.process(@record.to_json)
|
69
69
|
end
|
70
70
|
end
|
@@ -78,9 +78,9 @@ describe TurbotRunner::Processor do
|
|
78
78
|
end
|
79
79
|
|
80
80
|
it 'calls Handler#handle_invalid_record' do
|
81
|
-
|
81
|
+
expected_error = 'There were no values provided for any of the identifying fields: number'
|
82
82
|
expect(@handler).to receive(:handle_invalid_record).
|
83
|
-
with(@record, @data_type,
|
83
|
+
with(@record, @data_type, expected_error)
|
84
84
|
@processor.process(@record.to_json)
|
85
85
|
end
|
86
86
|
end
|
@@ -92,6 +92,141 @@ describe TurbotRunner::Processor do
|
|
92
92
|
@processor.process(line)
|
93
93
|
end
|
94
94
|
end
|
95
|
+
|
96
|
+
context 'with record with sample_date from Time.now' do
|
97
|
+
it 'calls Handler#handle_valid_record with converted sample_date' do
|
98
|
+
record = {
|
99
|
+
'sample_date' => '2014-06-01 12:34:56 +0000',
|
100
|
+
'source_url' => 'http://example.com/123',
|
101
|
+
'number' => 123
|
102
|
+
}
|
103
|
+
|
104
|
+
expected_converted_record = {
|
105
|
+
'sample_date' => '2014-06-01',
|
106
|
+
'source_url' => 'http://example.com/123',
|
107
|
+
'number' => 123
|
108
|
+
}
|
109
|
+
expect(@handler).to receive(:handle_valid_record).
|
110
|
+
with(expected_converted_record, @data_type)
|
111
|
+
@processor.process(record.to_json)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context 'with record with missing sample_date' do
|
116
|
+
it 'calls Handler#handle_invalid_record' do
|
117
|
+
record = {
|
118
|
+
'source_url' => 'http://example.com/123',
|
119
|
+
'number' => 123
|
120
|
+
}
|
121
|
+
|
122
|
+
expected_error = 'Missing required property: sample_date'
|
123
|
+
expect(@handler).to receive(:handle_invalid_record).
|
124
|
+
with(record, @data_type, expected_error)
|
125
|
+
@processor.process(record.to_json)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
context 'with record with empty sample_date' do
|
130
|
+
it 'calls Handler#handle_invalid_record' do
|
131
|
+
record = {
|
132
|
+
'sample_date' => '',
|
133
|
+
'source_url' => 'http://example.com/123',
|
134
|
+
'number' => 123
|
135
|
+
}
|
136
|
+
|
137
|
+
expected_error = 'Missing required property: sample_date'
|
138
|
+
expect(@handler).to receive(:handle_invalid_record).
|
139
|
+
with(record, @data_type, expected_error)
|
140
|
+
@processor.process(record.to_json)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
context 'with record with invalid sample_date' do
|
145
|
+
it 'calls Handler#handle_invalid_record' do
|
146
|
+
record = {
|
147
|
+
'sample_date' => '2014-06-00',
|
148
|
+
'source_url' => 'http://example.com/123',
|
149
|
+
'number' => 123
|
150
|
+
}
|
151
|
+
|
152
|
+
expected_error = 'Property not a valid date: sample_date'
|
153
|
+
expect(@handler).to receive(:handle_invalid_record).
|
154
|
+
with(record, @data_type, expected_error)
|
155
|
+
@processor.process(record.to_json)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
describe '#convert_record' do
|
162
|
+
before do
|
163
|
+
schema = {
|
164
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
165
|
+
'type' => 'object',
|
166
|
+
'properties' => {
|
167
|
+
'aaa' => {'format' => 'date'},
|
168
|
+
'bbb' => {'format' => 'not-date'},
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
@processor = TurbotRunner::Processor.new(nil, {}, nil)
|
173
|
+
allow(@processor).to receive(:schema).and_return(schema)
|
95
174
|
end
|
175
|
+
|
176
|
+
context 'when date field is YYYY-MM-DD' do
|
177
|
+
it 'leaves date field alone' do
|
178
|
+
record = {'aaa' => '2015-01-26', 'bbb' => 'cabbage'}
|
179
|
+
expect(@processor.convert_record(record)).to eq({'aaa' => '2015-01-26', 'bbb' => 'cabbage'})
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
context 'when date field with YYYY-MM-DD HH:MM:SS' do
|
184
|
+
it 'replaces value with YYYY-MM-DD' do
|
185
|
+
record = {'aaa' => '2015-01-26 12:34:56', 'bbb' => 'cabbage'}
|
186
|
+
expect(@processor.convert_record(record)).to eq({'aaa' => '2015-01-26', 'bbb' => 'cabbage'})
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
context 'when date field is empty string' do
|
191
|
+
it 'replaces removes field' do
|
192
|
+
record = {'aaa' => '', 'bbb' => 'cabbage'}
|
193
|
+
expect(@processor.convert_record(record)).to eq({'bbb' => 'cabbage'})
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
context 'when date field is invalid date' do
|
198
|
+
it 'rasies ConversionError' do
|
199
|
+
record = {'aaa' => 'cabbage', 'bbb' => 'cabbage'}
|
200
|
+
expect{@processor.convert_record(record)}.to raise_error(TurbotRunner::Processor::ConversionError)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
specify '#get_date_paths' do
|
206
|
+
schema = {
|
207
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
208
|
+
'type' => 'object',
|
209
|
+
'properties' => {
|
210
|
+
'aaa' => {'format' => 'date'},
|
211
|
+
'bbb' => {'format' => 'not-date'},
|
212
|
+
'ccc' => {
|
213
|
+
'type' => 'object',
|
214
|
+
'properties' => {
|
215
|
+
'ddd' => {'format' => 'date'},
|
216
|
+
'eee' => {'format' => 'not-date'},
|
217
|
+
'fff' => {
|
218
|
+
'type' => 'object',
|
219
|
+
'properties' => {
|
220
|
+
'ggg' => {'format' => 'date'},
|
221
|
+
'hhh' => {'format' => 'not-date'},
|
222
|
+
}
|
223
|
+
}
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
229
|
+
processor = TurbotRunner::Processor.new(nil, {}, nil)
|
230
|
+
expect(processor.get_date_paths(schema['properties'])).to eq([['aaa'], ['ccc', 'ddd'], ['ccc', 'fff', 'ggg']])
|
96
231
|
end
|
97
232
|
end
|
data/spec/lib/runner_spec.rb
CHANGED
@@ -0,0 +1,235 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TurbotRunner::Validator do
|
4
|
+
describe 'validation' do
|
5
|
+
specify 'when record is valid' do
|
6
|
+
schema = {
|
7
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
8
|
+
'type' => 'object',
|
9
|
+
'required' => ['aaa'],
|
10
|
+
}
|
11
|
+
record = {'aaa' => 'zzz'}
|
12
|
+
|
13
|
+
expect([schema, record]).to be_valid
|
14
|
+
end
|
15
|
+
|
16
|
+
specify 'when required top-level property missing' do
|
17
|
+
schema = {
|
18
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
19
|
+
'type' => 'object',
|
20
|
+
'required' => ['aaa'],
|
21
|
+
}
|
22
|
+
record = {}
|
23
|
+
|
24
|
+
expect([schema, record]).to fail_validation_with(
|
25
|
+
:type => :missing,
|
26
|
+
:path => 'aaa'
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
specify 'when required nested property missing' do
|
31
|
+
schema = {
|
32
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
33
|
+
'type' => 'object',
|
34
|
+
'required' => ['aaa'],
|
35
|
+
'properties' => {
|
36
|
+
'aaa' => {
|
37
|
+
'type' => 'object',
|
38
|
+
'required' => ['bbb'],
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
42
|
+
record = {'aaa' => {}}
|
43
|
+
|
44
|
+
expect([schema, record]).to fail_validation_with(
|
45
|
+
:type => :missing,
|
46
|
+
:path => 'aaa.bbb'
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'when none of oneOf options match' do
|
51
|
+
specify 'and we are switching on an enum field' do
|
52
|
+
schema = {
|
53
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
54
|
+
'type' => 'object',
|
55
|
+
'required' => ['aaa'],
|
56
|
+
'properties' => {
|
57
|
+
'aaa' => {
|
58
|
+
'type' => 'object',
|
59
|
+
'oneOf' => [{
|
60
|
+
'properties' => {
|
61
|
+
'a_type' => {
|
62
|
+
'enum' => ['a1']
|
63
|
+
},
|
64
|
+
'a_properties' => {
|
65
|
+
'type' => 'object',
|
66
|
+
'required' => ['bbb'],
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}, {
|
70
|
+
'properties' => {
|
71
|
+
'a_type' => {
|
72
|
+
'enum' => ['a2']
|
73
|
+
},
|
74
|
+
'a_properties' => {
|
75
|
+
'type' => 'object',
|
76
|
+
'required' => ['ccc']
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}]
|
80
|
+
}
|
81
|
+
}
|
82
|
+
}
|
83
|
+
|
84
|
+
record = {'aaa' => {'a_type' => 'a1', 'a_properties' => {}}}
|
85
|
+
|
86
|
+
expect([schema, record]).to fail_validation_with(
|
87
|
+
:type => :missing,
|
88
|
+
:path => 'aaa.a_properties.bbb'
|
89
|
+
)
|
90
|
+
end
|
91
|
+
|
92
|
+
specify 'and we are not switching on an enum field' do
|
93
|
+
schema = {
|
94
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
95
|
+
'type' => 'object',
|
96
|
+
'required' => ['aaa'],
|
97
|
+
'properties' => {
|
98
|
+
'aaa' => {
|
99
|
+
'type' => 'object',
|
100
|
+
'oneOf' => [{
|
101
|
+
'properties' => {
|
102
|
+
'bbb' => {
|
103
|
+
'type' => 'object',
|
104
|
+
'required' => ['ccc'],
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}, {
|
108
|
+
'properties' => {
|
109
|
+
'bbb' => {
|
110
|
+
'type' => 'object',
|
111
|
+
'required' => ['ddd']
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}]
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
record = {'aaa' => {'bbb' => {}}}
|
120
|
+
|
121
|
+
expect([schema, record]).to fail_validation_with(
|
122
|
+
:type => :one_of_no_matches,
|
123
|
+
:path => 'aaa'
|
124
|
+
)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
specify 'when top-level property too short' do
|
129
|
+
schema = {
|
130
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
131
|
+
'type' => 'object',
|
132
|
+
'properties' => {
|
133
|
+
'aaa' => {'minLength' => 2}
|
134
|
+
}
|
135
|
+
}
|
136
|
+
record = {'aaa' => 'x'}
|
137
|
+
|
138
|
+
expect([schema, record]).to fail_validation_with(
|
139
|
+
:type => :too_short,
|
140
|
+
:path => 'aaa',
|
141
|
+
:length => 2
|
142
|
+
)
|
143
|
+
end
|
144
|
+
|
145
|
+
specify 'when nested property too short' do
|
146
|
+
schema = {
|
147
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
148
|
+
'type' => 'object',
|
149
|
+
'properties' => {
|
150
|
+
'aaa' => {
|
151
|
+
'type' => 'object',
|
152
|
+
'properties' => {
|
153
|
+
'bbb' => {'minLength' => 2}
|
154
|
+
}
|
155
|
+
}
|
156
|
+
}
|
157
|
+
}
|
158
|
+
record = {'aaa' => {'bbb' => 'x'}}
|
159
|
+
|
160
|
+
expect([schema, record]).to fail_validation_with(
|
161
|
+
:type => :too_short,
|
162
|
+
:path => 'aaa.bbb',
|
163
|
+
:length => 2
|
164
|
+
)
|
165
|
+
end
|
166
|
+
|
167
|
+
specify 'when property too long' do
|
168
|
+
schema = {
|
169
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
170
|
+
'type' => 'object',
|
171
|
+
'properties' => {
|
172
|
+
'aaa' => {'maxLength' => 2}
|
173
|
+
}
|
174
|
+
}
|
175
|
+
record = {'aaa' => 'xxx'}
|
176
|
+
|
177
|
+
expect([schema, record]).to fail_validation_with(
|
178
|
+
:type => :too_long,
|
179
|
+
:path => 'aaa',
|
180
|
+
:length => 2
|
181
|
+
)
|
182
|
+
end
|
183
|
+
|
184
|
+
specify 'when property of wrong type and many types allowed' do
|
185
|
+
schema = {
|
186
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
187
|
+
'type' => 'object',
|
188
|
+
'properties' => {
|
189
|
+
'aaa' => {'type' => ['number', 'string']}
|
190
|
+
}
|
191
|
+
}
|
192
|
+
record = {'aaa' => ['xxx']}
|
193
|
+
|
194
|
+
expect([schema, record]).to fail_validation_with(
|
195
|
+
:type => :type_mismatch,
|
196
|
+
:path => 'aaa',
|
197
|
+
:allowed_types => ['number', 'string']
|
198
|
+
)
|
199
|
+
end
|
200
|
+
|
201
|
+
specify 'when property of wrong type and single type allowed' do
|
202
|
+
schema = {
|
203
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
204
|
+
'type' => 'object',
|
205
|
+
'properties' => {
|
206
|
+
'aaa' => {'type' => 'number'}
|
207
|
+
}
|
208
|
+
}
|
209
|
+
record = {'aaa' => 'xxx'}
|
210
|
+
|
211
|
+
expect([schema, record]).to fail_validation_with(
|
212
|
+
:type => :type_mismatch,
|
213
|
+
:path => 'aaa',
|
214
|
+
:allowed_types => ['number']
|
215
|
+
)
|
216
|
+
end
|
217
|
+
|
218
|
+
specify 'when property not in enum' do
|
219
|
+
schema = {
|
220
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
221
|
+
'type' => 'object',
|
222
|
+
'properties' => {
|
223
|
+
'aaa' => {'enum' => ['a', 'b', 'c']}
|
224
|
+
}
|
225
|
+
}
|
226
|
+
record = {'aaa' => 'z'}
|
227
|
+
|
228
|
+
expect([schema, record]).to fail_validation_with(
|
229
|
+
:type => :enum_mismatch,
|
230
|
+
:path => 'aaa',
|
231
|
+
:allowed_values => ['a', 'b', 'c']
|
232
|
+
)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'custom validators' do
|
4
|
+
describe 'for date' do
|
5
|
+
before do
|
6
|
+
@schema = {
|
7
|
+
'$schema' => 'http://json-schema.org/draft-04/schema#',
|
8
|
+
'type' => 'object',
|
9
|
+
'properties' => {
|
10
|
+
'aaa' => {'format' => 'date'}
|
11
|
+
}
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
specify 'validate valid dates' do
|
16
|
+
strings = [
|
17
|
+
'2015-01-10',
|
18
|
+
'2015-01-10T10:15:57',
|
19
|
+
'2015-01-10T10:15:57Z',
|
20
|
+
'2015-01-10T10:15:57+00:00',
|
21
|
+
'2015-01-10T11:15:57+01:00',
|
22
|
+
'2015-01-10T09:15:57-01:00',
|
23
|
+
'2015-01-10 10:15:57 +0000',
|
24
|
+
'2015-01-10 11:15:57 +0100',
|
25
|
+
'2015-01-10 09:15:57 -0100',
|
26
|
+
]
|
27
|
+
|
28
|
+
strings.each do |string|
|
29
|
+
record = {'aaa' => string}
|
30
|
+
expect([@schema, record]).to be_valid
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
specify 'do not validator invalid dates' do
|
35
|
+
strings = [
|
36
|
+
'nonsense',
|
37
|
+
'2015-01-nonsense',
|
38
|
+
'2015:01:10',
|
39
|
+
'2015/01/10',
|
40
|
+
]
|
41
|
+
|
42
|
+
strings.each do |string|
|
43
|
+
record = {'aaa' => string}
|
44
|
+
expect([@schema, record]).not_to be_valid
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'turbot_runner'
|
2
|
+
|
3
|
+
RSpec::Matchers.define(:fail_validation_with) do |expected|
|
4
|
+
match do |actual|
|
5
|
+
schema, record = actual
|
6
|
+
|
7
|
+
error = TurbotRunner::Validator.validate(schema, record)
|
8
|
+
expect(error).to eq(expected)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
RSpec::Matchers.define(:be_valid) do
|
13
|
+
match do |actual|
|
14
|
+
schema, record = actual
|
15
|
+
|
16
|
+
error = TurbotRunner::Validator.validate(schema, record)
|
17
|
+
expect(error).to eq(nil)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
metadata
CHANGED
@@ -1,46 +1,35 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 20
|
10
|
-
version: 0.1.20
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.21
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- OpenCorporates
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2015-01-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: json-schema
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 11
|
29
|
-
segments:
|
30
|
-
- 2
|
31
|
-
- 4
|
32
|
-
version: "2.4"
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.5.0
|
33
20
|
type: :runtime
|
34
|
-
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.5.0
|
35
27
|
description:
|
36
28
|
email: bots@opencorporates.com
|
37
29
|
executables: []
|
38
|
-
|
39
30
|
extensions: []
|
40
|
-
|
41
31
|
extra_rdoc_files: []
|
42
|
-
|
43
|
-
files:
|
32
|
+
files:
|
44
33
|
- bin/rspec
|
45
34
|
- lib/turbot_runner.rb
|
46
35
|
- lib/turbot_runner/base_handler.rb
|
@@ -49,8 +38,39 @@ files:
|
|
49
38
|
- lib/turbot_runner/processor.rb
|
50
39
|
- lib/turbot_runner/runner.rb
|
51
40
|
- lib/turbot_runner/script_runner.rb
|
41
|
+
- lib/turbot_runner/utils.rb
|
42
|
+
- lib/turbot_runner/validator.rb
|
52
43
|
- lib/turbot_runner/validators.rb
|
53
44
|
- lib/turbot_runner/version.rb
|
45
|
+
- schema/schemas/company-schema.json
|
46
|
+
- schema/schemas/financial-payment-schema.json
|
47
|
+
- schema/schemas/includes/address.json
|
48
|
+
- schema/schemas/includes/alternative_name.json
|
49
|
+
- schema/schemas/includes/company.json
|
50
|
+
- schema/schemas/includes/filing.json
|
51
|
+
- schema/schemas/includes/financial-payment-data-object.json
|
52
|
+
- schema/schemas/includes/identifier.json
|
53
|
+
- schema/schemas/includes/industry_code.json
|
54
|
+
- schema/schemas/includes/licence-data-object.json
|
55
|
+
- schema/schemas/includes/officer.json
|
56
|
+
- schema/schemas/includes/organisation.json
|
57
|
+
- schema/schemas/includes/permission.json
|
58
|
+
- schema/schemas/includes/person.json
|
59
|
+
- schema/schemas/includes/person_name.json
|
60
|
+
- schema/schemas/includes/previous_name.json
|
61
|
+
- schema/schemas/includes/share-parcel-data.json
|
62
|
+
- schema/schemas/includes/share-parcel.json
|
63
|
+
- schema/schemas/includes/subsidiary-relationship-data.json
|
64
|
+
- schema/schemas/includes/total-shares.json
|
65
|
+
- schema/schemas/includes/unknown_entity_type.json
|
66
|
+
- schema/schemas/licence-schema.json
|
67
|
+
- schema/schemas/primary-data-schema.json
|
68
|
+
- schema/schemas/rich-licence-schema.json
|
69
|
+
- schema/schemas/share-parcel-schema.json
|
70
|
+
- schema/schemas/simple-financial-payment-schema.json
|
71
|
+
- schema/schemas/simple-licence-schema.json
|
72
|
+
- schema/schemas/simple-subsidiary-schema.json
|
73
|
+
- schema/schemas/subsidiary-relationship-schema.json
|
54
74
|
- spec/bots/bot-that-crashes-immediately/manifest.json
|
55
75
|
- spec/bots/bot-that-crashes-immediately/scraper.rb
|
56
76
|
- spec/bots/bot-that-crashes-immediately/transformer1.rb
|
@@ -91,73 +111,36 @@ files:
|
|
91
111
|
- spec/bots/slow-bot/scraper.rb
|
92
112
|
- spec/lib/processor_spec.rb
|
93
113
|
- spec/lib/runner_spec.rb
|
114
|
+
- spec/lib/validator_spec.rb
|
115
|
+
- spec/lib/validators_spec.rb
|
94
116
|
- spec/manual_spec.rb
|
95
117
|
- spec/outputs/full-scraper.out
|
96
118
|
- spec/outputs/full-transformer.out
|
97
119
|
- spec/outputs/truncated-scraper.out
|
98
|
-
-
|
99
|
-
- schema/schemas/financial-payment-schema.json
|
100
|
-
- schema/schemas/includes/address.json
|
101
|
-
- schema/schemas/includes/alternative_name.json
|
102
|
-
- schema/schemas/includes/company.json
|
103
|
-
- schema/schemas/includes/filing.json
|
104
|
-
- schema/schemas/includes/financial-payment-data-object.json
|
105
|
-
- schema/schemas/includes/identifier.json
|
106
|
-
- schema/schemas/includes/industry_code.json
|
107
|
-
- schema/schemas/includes/licence-data-object.json
|
108
|
-
- schema/schemas/includes/officer.json
|
109
|
-
- schema/schemas/includes/organisation.json
|
110
|
-
- schema/schemas/includes/permission.json
|
111
|
-
- schema/schemas/includes/person.json
|
112
|
-
- schema/schemas/includes/person_name.json
|
113
|
-
- schema/schemas/includes/previous_name.json
|
114
|
-
- schema/schemas/includes/share-parcel-data.json
|
115
|
-
- schema/schemas/includes/share-parcel.json
|
116
|
-
- schema/schemas/includes/subsidiary-relationship-data.json
|
117
|
-
- schema/schemas/includes/total-shares.json
|
118
|
-
- schema/schemas/includes/unknown_entity_type.json
|
119
|
-
- schema/schemas/licence-schema.json
|
120
|
-
- schema/schemas/primary-data-schema.json
|
121
|
-
- schema/schemas/rich-licence-schema.json
|
122
|
-
- schema/schemas/share-parcel-schema.json
|
123
|
-
- schema/schemas/simple-financial-payment-schema.json
|
124
|
-
- schema/schemas/simple-licence-schema.json
|
125
|
-
- schema/schemas/simple-subsidiary-schema.json
|
126
|
-
- schema/schemas/subsidiary-relationship-schema.json
|
120
|
+
- spec/spec_helper.rb
|
127
121
|
homepage: http://turbot.opencorporates.com/
|
128
|
-
licenses:
|
122
|
+
licenses:
|
129
123
|
- MIT
|
124
|
+
metadata: {}
|
130
125
|
post_install_message:
|
131
126
|
rdoc_options: []
|
132
|
-
|
133
|
-
require_paths:
|
127
|
+
require_paths:
|
134
128
|
- lib
|
135
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
- !ruby/object:Gem::Version
|
140
|
-
hash: 55
|
141
|
-
segments:
|
142
|
-
- 1
|
143
|
-
- 9
|
144
|
-
- 2
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
145
133
|
version: 1.9.2
|
146
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
hash: 3
|
152
|
-
segments:
|
153
|
-
- 0
|
154
|
-
version: "0"
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ! '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
155
139
|
requirements: []
|
156
|
-
|
157
140
|
rubyforge_project:
|
158
|
-
rubygems_version:
|
141
|
+
rubygems_version: 2.2.2
|
159
142
|
signing_key:
|
160
|
-
specification_version:
|
143
|
+
specification_version: 4
|
161
144
|
summary: Utilities for running bots with Turbot
|
162
145
|
test_files: []
|
163
|
-
|
146
|
+
has_rdoc:
|