turbot-runner 0.1.20 → 0.1.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTBiNjljYWU5MzdhMzk4ZjJiMGI4MmNkMzJiZTU4MjIwYjIyNGZhZg==
5
+ data.tar.gz: !binary |-
6
+ ODEzZmE5ZjBlMDQ0MzY5MWQyODlkN2RjZDU0Y2UyODI3ODVmOWZkYw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZGQwMzJhZWY1NzUwOGYyZDQ1YjEyZjEyY2M1OGQ5YzY2YmU1ZjhlZjZlN2Y5
10
+ YmNjN2FiYTFkZWUxNzUyMDQ0MDIyYzA2OTI1MzEyNzA3ZGNjM2ViMDY4NGQ3
11
+ MWExM2JiMzkyOWE3MzExODMyNDMyNzYxMTEzNGU1YjNmYTcyODU=
12
+ data.tar.gz: !binary |-
13
+ Y2U0NGY0MDFmMzk4MTNhMWYxYjQwMzJhYmVlMjg5MGVkNDRhYWQ0NDBkZGQy
14
+ MzlkMmQyYjY0NTFkMjRjOGQ0MjFlN2Y0NDkxZWVkMjRiNjcyYjg5ODZiZGNi
15
+ MzZmNDQxOWM2YTg0YTI5NjQ5MGE0NzAwZWFiNmVmY2I5NWRkNTg=
@@ -6,7 +6,7 @@ module TurbotRunner
6
6
  def handle_run_ended
7
7
  end
8
8
 
9
- def handle_invalid_record(record, data_type, line)
9
+ def handle_invalid_record(record, data_type, error_message)
10
10
  end
11
11
 
12
12
  def handle_invalid_json(line)
@@ -17,16 +17,22 @@ module TurbotRunner
17
17
  @runner.interrupt if @runner
18
18
  else
19
19
  record = JSON.parse(line)
20
- errors = validate(record)
21
20
 
22
- if errors.empty?
21
+ begin
22
+ converted_record = convert_record(record)
23
+ error_message = validate(converted_record)
24
+ rescue ConversionError => e
25
+ error_message = e.message
26
+ end
27
+
28
+ if error_message.nil?
23
29
  begin
24
- @record_handler.handle_valid_record(record, @data_type)
30
+ @record_handler.handle_valid_record(converted_record, @data_type)
25
31
  rescue InterruptRun
26
32
  @runner.interrupt if @runner
27
33
  end
28
34
  else
29
- @record_handler.handle_invalid_record(record, @data_type, errors)
35
+ @record_handler.handle_invalid_record(record, @data_type, error_message)
30
36
  @runner.interrupt_and_mark_as_failed if @runner
31
37
  end
32
38
  end
@@ -40,37 +46,107 @@ module TurbotRunner
40
46
  @runner.interrupt
41
47
  end
42
48
 
43
- def validate(record)
44
- errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
45
- messages = errors.map do |error|
46
- case error[:message]
47
- when /The property '#\/' did not contain a required property of '(\w+)'/
48
- "Missing required attribute: #{Regexp.last_match(1)}"
49
+ def convert_record(record)
50
+ converted_record = Utils.deep_copy(record)
51
+
52
+ date_paths.each do |path|
53
+ begin
54
+ tmp = converted_record
55
+
56
+ path[0...-1].each do |path_item|
57
+ tmp = tmp[path_item]
58
+ end
59
+
60
+ value = tmp[path[-1]]
61
+ rescue NoMethodError
62
+ next
63
+ end
64
+
65
+ next unless value.is_a?(String)
66
+
67
+ if value == ''
68
+ tmp.delete(path[-1])
49
69
  else
50
- error[:message]
70
+ begin
71
+ tmp[path[-1]] = Date.strptime(value, '%Y-%m-%d').strftime('%Y-%m-%d')
72
+ rescue ArgumentError
73
+ raise ConversionError.new("Property not a valid date: #{path.join('.')}")
74
+ end
51
75
  end
52
76
  end
53
77
 
54
- if messages.empty?
78
+ converted_record
79
+ end
80
+
81
+ def date_paths
82
+ @date_paths ||= get_date_paths(schema['properties'])
83
+ end
84
+
85
+ def get_date_paths(properties)
86
+ date_paths = []
87
+
88
+ properties.each do |name, attrs|
89
+ if attrs['format'] == 'date'
90
+ date_paths << [name]
91
+ elsif attrs['type'] == 'object'
92
+ get_date_paths(attrs['properties']).each do |path|
93
+ date_paths << [name] + path
94
+ end
95
+ end
96
+ end
97
+
98
+ date_paths
99
+ end
100
+
101
+ def validate(record)
102
+ error = Validator.validate(schema, record)
103
+
104
+ message = nil
105
+
106
+ if error.nil?
55
107
  identifying_attributes = record.reject do |k, v|
56
108
  !@identifying_fields.include?(k) || v.nil? || v == ''
57
109
  end
58
110
 
59
111
  if identifying_attributes.empty?
60
- messages << "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
112
+ message = "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
113
+ end
114
+ else
115
+ message = case error[:type]
116
+ when :missing
117
+ "Missing required property: #{error[:path]}"
118
+ when :one_of_no_matches
119
+ "No match for property: #{error[:path]}"
120
+ when :one_of_many_matches
121
+ "Multiple possible matches for property: #{error[:path]}"
122
+ when :too_short
123
+ "Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
124
+ when :too_long
125
+ "Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
126
+ when :type_mismatch
127
+ "Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
128
+ when :enum_mismatch
129
+ "Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
130
+ when :format_mismatch
131
+ "Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
132
+ when :unknown
133
+ error[:message]
61
134
  end
62
135
  end
63
136
 
64
- messages
137
+ message
65
138
  end
66
139
 
67
140
  def schema
68
- @schema ||= get_schema
141
+ @schema ||= load_schema
69
142
  end
70
143
 
71
- def get_schema
144
+ def load_schema
72
145
  hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
73
- File.expand_path("../../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
146
+ path = File.expand_path("../../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
147
+ JSON.load(File.read(path))
74
148
  end
149
+
150
+ class ConversionError < StandardError; end
75
151
  end
76
152
  end
@@ -0,0 +1,9 @@
1
+ module TurbotRunner
2
+ module Utils
3
+ extend self
4
+
5
+ def deep_copy(thing)
6
+ Marshal.load(Marshal.dump(thing))
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,74 @@
1
+ require 'json-schema'
2
+
3
+ module TurbotRunner
4
+ module Validator
5
+ extend self
6
+
7
+ def validate(schema, record)
8
+ errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
9
+
10
+ # For now, we just handle the first error.
11
+ error = errors[0]
12
+ return if error.nil?
13
+
14
+ case error[:failed_attribute]
15
+ when 'Required'
16
+ match = error[:message].match(/required property of '(.*)'/)
17
+ missing_property = match[1]
18
+ path = fragment_to_path("#{error[:fragment]}/#{missing_property}")
19
+
20
+ {:type => :missing, :path => path}
21
+ when 'OneOf'
22
+ if error[:message].match(/did not match any/)
23
+ path_elements = fragment_to_path(error[:fragment]).split('.')
24
+
25
+ raise "Deeply nested OneOf error at: #{error[:fragment]}" unless path_elements.size == 1
26
+
27
+ record_fragment = record[path_elements[0]]
28
+ schema_fragments = schema['properties'][path_elements[0]]['oneOf']
29
+
30
+ schema_fragments.each do |s|
31
+ s['properties'].each do |k, v|
32
+ next if v['enum'].nil?
33
+
34
+ if v['enum'].include?(record_fragment[k])
35
+ error1 = validate(s, record_fragment)
36
+ return error1.merge(:path => "#{path_elements[0]}.#{error1[:path]}")
37
+ end
38
+ end
39
+ end
40
+
41
+ {:type => :one_of_no_matches, :path => fragment_to_path(error[:fragment])}
42
+ else
43
+ {:type => :one_of_many_matches, :path => fragment_to_path(error[:fragment])}
44
+ end
45
+ when 'MinLength'
46
+ match = error[:message].match(/minimum string length of (\d+) in/)
47
+ min_length = match[1].to_i
48
+ {:type => :too_short, :path => fragment_to_path(error[:fragment]), :length => min_length}
49
+ when 'MaxLength'
50
+ match = error[:message].match(/maximum string length of (\d+) in/)
51
+ max_length = match[1].to_i
52
+ {:type => :too_long, :path => fragment_to_path(error[:fragment]), :length => max_length}
53
+ when 'TypeV4'
54
+ match = error[:message].match(/the following types?: ([\w\s,]+) in schema/)
55
+ allowed_types = match[1].split(',').map(&:strip)
56
+ {:type => :type_mismatch, :path => fragment_to_path(error[:fragment]), :allowed_types => allowed_types}
57
+ when 'Enum'
58
+ match = error[:message].match(/the following values: ([\w\s,]+) in schema/)
59
+ allowed_values = match[1].split(',').map(&:strip)
60
+ {:type => :enum_mismatch, :path => fragment_to_path(error[:fragment]), :allowed_values => allowed_values}
61
+ else
62
+ if error[:message].match(/must be of format yyyy-mm-dd/)
63
+ {:type => :format_mismatch, :path => fragment_to_path(error[:fragment]), :expected_format => 'yyyy-mm-dd'}
64
+ else
65
+ {:type => :unknown, :path => fragment_to_path(error[:fragment]), :failed_attribute => error[:failed_attribute], :message => error[:message]}
66
+ end
67
+ end
68
+ end
69
+
70
+ def fragment_to_path(fragment)
71
+ fragment.sub(/^#?\/*/, '').gsub('/', '.')
72
+ end
73
+ end
74
+ end
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.1.20'
2
+ VERSION = '0.1.21'
3
3
  end
data/lib/turbot_runner.rb CHANGED
@@ -3,5 +3,7 @@ require 'turbot_runner/exceptions'
3
3
  require 'turbot_runner/processor'
4
4
  require 'turbot_runner/runner'
5
5
  require 'turbot_runner/script_runner'
6
+ require 'turbot_runner/utils'
7
+ require 'turbot_runner/validator'
6
8
  require 'turbot_runner/validators'
7
9
  require 'turbot_runner/version'
@@ -4,6 +4,7 @@
4
4
  "$schema": "http://json-schema.org/draft-04/schema#",
5
5
  "type": "object",
6
6
  "properties": {
7
+ "sample_date": {"type": "string", "format": "date"},
7
8
  "source_jurisdiction": {
8
9
  "description": "Jurisdiction of the source of the data",
9
10
  "type": "string"
@@ -20,8 +21,5 @@
20
21
  "additionalItems": false
21
22
  }
22
23
  },
23
- "required": [
24
- "company",
25
- "data"
26
- ]
24
+ "required": ["company", "data", "sample_date"]
27
25
  }
@@ -4,6 +4,9 @@
4
4
  "$schema": "http://json-schema.org/draft-04/schema#",
5
5
  "type": "object",
6
6
  "properties": {
7
+ "sample_date": {"type": "string", "format": "date"},
8
+ "start_date": {"type": "string", "format": "date"},
9
+ "end_date": {"type": "string", "format": "date"},
7
10
  "source_jurisdiction": {
8
11
  "description": "Jurisdiction of the source of the data",
9
12
  "type": "string"
@@ -20,8 +23,5 @@
20
23
  "additionalItems": false
21
24
  }
22
25
  },
23
- "required": [
24
- "company",
25
- "data"
26
- ]
26
+ "required": ["company", "data", "sample_date"]
27
27
  }
@@ -9,7 +9,8 @@
9
9
  },
10
10
  "sample_date": {
11
11
  "description": "Date on which this fact was known to be true",
12
- "type": "string"
12
+ "type": "string",
13
+ "format": "date"
13
14
  }
14
15
  },
15
16
  "required": ["source_url", "sample_date"],
@@ -3,6 +3,9 @@
3
3
  "description": "A Licence is a permission for an entity to do something that would otherwise not be permitted. Note that such permissions are recorded in multiple different ways and to different granularity. In addition categories cross over, for example a pornborker may be regulated as a retail establishment by a city, but also as a financial institution by a national or regional financial regulator. Sometimes the licence is a simple statement – the government of Rwanda has given a banking licence to a certain bank -- other times it is fine-grained and highly complex (e.g. an extractives licence or the UK FCA licencing scheme). This schema tries to make it easy to submit the former, while not losing granularity of the latter. The object has the name of Rich Licence to distinguish it from Simple Licence, which it is expected to ultimately replace.",
4
4
  "$schema": "http://json-schema.org/draft-04/schema#",
5
5
  "properties": {
6
+ "sample_date": {"type": "string", "format": "date"},
7
+ "start_date": {"type": "string", "format": "date"},
8
+ "end_date": {"type": "string", "format": "date"},
6
9
  "licence_holder": {
7
10
  "entity": {
8
11
  "oneOf": [
@@ -10,7 +10,8 @@
10
10
  },
11
11
  "sample_date": {
12
12
  "description": "Date on which this fact was known to be true",
13
- "type": "string"
13
+ "type": "string",
14
+ "format": "date"
14
15
  },
15
16
  "confidence": {
16
17
  "description": "Confidence in accuracy of data",
@@ -10,8 +10,8 @@
10
10
  },
11
11
  "sample_date": {
12
12
  "description": "Date on which this fact was known to be true",
13
- "type": "date",
14
- "minLength": 1
13
+ "type": "string",
14
+ "format": "date"
15
15
  },
16
16
  "confidence": {
17
17
  "description": "Confidence in accuracy of data",
@@ -11,15 +11,17 @@
11
11
  "sample_date": {
12
12
  "description": "Date on which this fact was known to be true",
13
13
  "type": "string",
14
- "minLength": 1
14
+ "format": "date"
15
15
  },
16
16
  "start_date": {
17
17
  "description": "Earliest known date this was known to be a subsidiary",
18
- "type": "string"
18
+ "type": "string",
19
+ "format": "date"
19
20
  },
20
21
  "end_date": {
21
22
  "description": "Latest known date this was known to be a subsidiary",
22
- "type": "string"
23
+ "type": "string",
24
+ "format": "date"
23
25
  },
24
26
  "confidence": {
25
27
  "description": "Confidence in accuracy of data",
@@ -4,6 +4,9 @@
4
4
  "$schema": "http://json-schema.org/draft-04/schema#",
5
5
  "type": "object",
6
6
  "properties": {
7
+ "sample_date": {"type": "string", "format": "date"},
8
+ "start_date": {"type": "string", "format": "date"},
9
+ "end_date": {"type": "string", "format": "date"},
7
10
  "source_jurisdiction": {
8
11
  "description": "Jurisdiction of the source of the data",
9
12
  "type": "string"
@@ -20,8 +23,5 @@
20
23
  "additionalItems": false
21
24
  }
22
25
  },
23
- "required": [
24
- "company",
25
- "data"
26
- ]
26
+ "required": ["company", "data", "sample_date"]
27
27
  }
@@ -26,9 +26,9 @@ describe TurbotRunner::Processor do
26
26
  end
27
27
 
28
28
  it 'calls Handler#handle_invalid_record' do
29
- expected_errors = ['Missing required attribute: source_url']
29
+ expected_error = 'Missing required property: source_url'
30
30
  expect(@handler).to receive(:handle_invalid_record).
31
- with(@record, @data_type, expected_errors)
31
+ with(@record, @data_type, expected_error)
32
32
  @processor.process(@record.to_json)
33
33
  end
34
34
  end
@@ -62,9 +62,9 @@ describe TurbotRunner::Processor do
62
62
  end
63
63
 
64
64
  it 'calls Handler#handle_invalid_record' do
65
- expected_errors = ['Missing required attribute: source_url']
65
+ expected_error = 'Missing required property: source_url'
66
66
  expect(@handler).to receive(:handle_invalid_record).
67
- with(@record, @data_type, expected_errors)
67
+ with(@record, @data_type, expected_error)
68
68
  @processor.process(@record.to_json)
69
69
  end
70
70
  end
@@ -78,9 +78,9 @@ describe TurbotRunner::Processor do
78
78
  end
79
79
 
80
80
  it 'calls Handler#handle_invalid_record' do
81
- expected_errors = ['There were no values provided for any of the identifying fields: number']
81
+ expected_error = 'There were no values provided for any of the identifying fields: number'
82
82
  expect(@handler).to receive(:handle_invalid_record).
83
- with(@record, @data_type, expected_errors)
83
+ with(@record, @data_type, expected_error)
84
84
  @processor.process(@record.to_json)
85
85
  end
86
86
  end
@@ -92,6 +92,141 @@ describe TurbotRunner::Processor do
92
92
  @processor.process(line)
93
93
  end
94
94
  end
95
+
96
+ context 'with record with sample_date from Time.now' do
97
+ it 'calls Handler#handle_valid_record with converted sample_date' do
98
+ record = {
99
+ 'sample_date' => '2014-06-01 12:34:56 +0000',
100
+ 'source_url' => 'http://example.com/123',
101
+ 'number' => 123
102
+ }
103
+
104
+ expected_converted_record = {
105
+ 'sample_date' => '2014-06-01',
106
+ 'source_url' => 'http://example.com/123',
107
+ 'number' => 123
108
+ }
109
+ expect(@handler).to receive(:handle_valid_record).
110
+ with(expected_converted_record, @data_type)
111
+ @processor.process(record.to_json)
112
+ end
113
+ end
114
+
115
+ context 'with record with missing sample_date' do
116
+ it 'calls Handler#handle_invalid_record' do
117
+ record = {
118
+ 'source_url' => 'http://example.com/123',
119
+ 'number' => 123
120
+ }
121
+
122
+ expected_error = 'Missing required property: sample_date'
123
+ expect(@handler).to receive(:handle_invalid_record).
124
+ with(record, @data_type, expected_error)
125
+ @processor.process(record.to_json)
126
+ end
127
+ end
128
+
129
+ context 'with record with empty sample_date' do
130
+ it 'calls Handler#handle_invalid_record' do
131
+ record = {
132
+ 'sample_date' => '',
133
+ 'source_url' => 'http://example.com/123',
134
+ 'number' => 123
135
+ }
136
+
137
+ expected_error = 'Missing required property: sample_date'
138
+ expect(@handler).to receive(:handle_invalid_record).
139
+ with(record, @data_type, expected_error)
140
+ @processor.process(record.to_json)
141
+ end
142
+ end
143
+
144
+ context 'with record with invalid sample_date' do
145
+ it 'calls Handler#handle_invalid_record' do
146
+ record = {
147
+ 'sample_date' => '2014-06-00',
148
+ 'source_url' => 'http://example.com/123',
149
+ 'number' => 123
150
+ }
151
+
152
+ expected_error = 'Property not a valid date: sample_date'
153
+ expect(@handler).to receive(:handle_invalid_record).
154
+ with(record, @data_type, expected_error)
155
+ @processor.process(record.to_json)
156
+ end
157
+ end
158
+ end
159
+ end
160
+
161
+ describe '#convert_record' do
162
+ before do
163
+ schema = {
164
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
165
+ 'type' => 'object',
166
+ 'properties' => {
167
+ 'aaa' => {'format' => 'date'},
168
+ 'bbb' => {'format' => 'not-date'},
169
+ }
170
+ }
171
+
172
+ @processor = TurbotRunner::Processor.new(nil, {}, nil)
173
+ allow(@processor).to receive(:schema).and_return(schema)
95
174
  end
175
+
176
+ context 'when date field is YYYY-MM-DD' do
177
+ it 'leaves date field alone' do
178
+ record = {'aaa' => '2015-01-26', 'bbb' => 'cabbage'}
179
+ expect(@processor.convert_record(record)).to eq({'aaa' => '2015-01-26', 'bbb' => 'cabbage'})
180
+ end
181
+ end
182
+
183
+ context 'when date field with YYYY-MM-DD HH:MM:SS' do
184
+ it 'replaces value with YYYY-MM-DD' do
185
+ record = {'aaa' => '2015-01-26 12:34:56', 'bbb' => 'cabbage'}
186
+ expect(@processor.convert_record(record)).to eq({'aaa' => '2015-01-26', 'bbb' => 'cabbage'})
187
+ end
188
+ end
189
+
190
+ context 'when date field is empty string' do
191
+ it 'replaces removes field' do
192
+ record = {'aaa' => '', 'bbb' => 'cabbage'}
193
+ expect(@processor.convert_record(record)).to eq({'bbb' => 'cabbage'})
194
+ end
195
+ end
196
+
197
+ context 'when date field is invalid date' do
198
+ it 'rasies ConversionError' do
199
+ record = {'aaa' => 'cabbage', 'bbb' => 'cabbage'}
200
+ expect{@processor.convert_record(record)}.to raise_error(TurbotRunner::Processor::ConversionError)
201
+ end
202
+ end
203
+ end
204
+
205
+ specify '#get_date_paths' do
206
+ schema = {
207
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
208
+ 'type' => 'object',
209
+ 'properties' => {
210
+ 'aaa' => {'format' => 'date'},
211
+ 'bbb' => {'format' => 'not-date'},
212
+ 'ccc' => {
213
+ 'type' => 'object',
214
+ 'properties' => {
215
+ 'ddd' => {'format' => 'date'},
216
+ 'eee' => {'format' => 'not-date'},
217
+ 'fff' => {
218
+ 'type' => 'object',
219
+ 'properties' => {
220
+ 'ggg' => {'format' => 'date'},
221
+ 'hhh' => {'format' => 'not-date'},
222
+ }
223
+ }
224
+ }
225
+ }
226
+ }
227
+ }
228
+
229
+ processor = TurbotRunner::Processor.new(nil, {}, nil)
230
+ expect(processor.get_date_paths(schema['properties'])).to eq([['aaa'], ['ccc', 'ddd'], ['ccc', 'fff', 'ggg']])
96
231
  end
97
232
  end
@@ -223,7 +223,7 @@ describe TurbotRunner::Runner do
223
223
  end
224
224
 
225
225
  it 'returns false' do
226
- expect(@runner.run).to be(true)
226
+ expect(@runner.run).to be(false)
227
227
  end
228
228
  end
229
229
  end
@@ -0,0 +1,235 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Validator do
4
+ describe 'validation' do
5
+ specify 'when record is valid' do
6
+ schema = {
7
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
8
+ 'type' => 'object',
9
+ 'required' => ['aaa'],
10
+ }
11
+ record = {'aaa' => 'zzz'}
12
+
13
+ expect([schema, record]).to be_valid
14
+ end
15
+
16
+ specify 'when required top-level property missing' do
17
+ schema = {
18
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
19
+ 'type' => 'object',
20
+ 'required' => ['aaa'],
21
+ }
22
+ record = {}
23
+
24
+ expect([schema, record]).to fail_validation_with(
25
+ :type => :missing,
26
+ :path => 'aaa'
27
+ )
28
+ end
29
+
30
+ specify 'when required nested property missing' do
31
+ schema = {
32
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
33
+ 'type' => 'object',
34
+ 'required' => ['aaa'],
35
+ 'properties' => {
36
+ 'aaa' => {
37
+ 'type' => 'object',
38
+ 'required' => ['bbb'],
39
+ }
40
+ }
41
+ }
42
+ record = {'aaa' => {}}
43
+
44
+ expect([schema, record]).to fail_validation_with(
45
+ :type => :missing,
46
+ :path => 'aaa.bbb'
47
+ )
48
+ end
49
+
50
+ context 'when none of oneOf options match' do
51
+ specify 'and we are switching on an enum field' do
52
+ schema = {
53
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
54
+ 'type' => 'object',
55
+ 'required' => ['aaa'],
56
+ 'properties' => {
57
+ 'aaa' => {
58
+ 'type' => 'object',
59
+ 'oneOf' => [{
60
+ 'properties' => {
61
+ 'a_type' => {
62
+ 'enum' => ['a1']
63
+ },
64
+ 'a_properties' => {
65
+ 'type' => 'object',
66
+ 'required' => ['bbb'],
67
+ }
68
+ }
69
+ }, {
70
+ 'properties' => {
71
+ 'a_type' => {
72
+ 'enum' => ['a2']
73
+ },
74
+ 'a_properties' => {
75
+ 'type' => 'object',
76
+ 'required' => ['ccc']
77
+ }
78
+ }
79
+ }]
80
+ }
81
+ }
82
+ }
83
+
84
+ record = {'aaa' => {'a_type' => 'a1', 'a_properties' => {}}}
85
+
86
+ expect([schema, record]).to fail_validation_with(
87
+ :type => :missing,
88
+ :path => 'aaa.a_properties.bbb'
89
+ )
90
+ end
91
+
92
+ specify 'and we are not switching on an enum field' do
93
+ schema = {
94
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
95
+ 'type' => 'object',
96
+ 'required' => ['aaa'],
97
+ 'properties' => {
98
+ 'aaa' => {
99
+ 'type' => 'object',
100
+ 'oneOf' => [{
101
+ 'properties' => {
102
+ 'bbb' => {
103
+ 'type' => 'object',
104
+ 'required' => ['ccc'],
105
+ }
106
+ }
107
+ }, {
108
+ 'properties' => {
109
+ 'bbb' => {
110
+ 'type' => 'object',
111
+ 'required' => ['ddd']
112
+ }
113
+ }
114
+ }]
115
+ }
116
+ }
117
+ }
118
+
119
+ record = {'aaa' => {'bbb' => {}}}
120
+
121
+ expect([schema, record]).to fail_validation_with(
122
+ :type => :one_of_no_matches,
123
+ :path => 'aaa'
124
+ )
125
+ end
126
+ end
127
+
128
+ specify 'when top-level property too short' do
129
+ schema = {
130
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
131
+ 'type' => 'object',
132
+ 'properties' => {
133
+ 'aaa' => {'minLength' => 2}
134
+ }
135
+ }
136
+ record = {'aaa' => 'x'}
137
+
138
+ expect([schema, record]).to fail_validation_with(
139
+ :type => :too_short,
140
+ :path => 'aaa',
141
+ :length => 2
142
+ )
143
+ end
144
+
145
+ specify 'when nested property too short' do
146
+ schema = {
147
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
148
+ 'type' => 'object',
149
+ 'properties' => {
150
+ 'aaa' => {
151
+ 'type' => 'object',
152
+ 'properties' => {
153
+ 'bbb' => {'minLength' => 2}
154
+ }
155
+ }
156
+ }
157
+ }
158
+ record = {'aaa' => {'bbb' => 'x'}}
159
+
160
+ expect([schema, record]).to fail_validation_with(
161
+ :type => :too_short,
162
+ :path => 'aaa.bbb',
163
+ :length => 2
164
+ )
165
+ end
166
+
167
+ specify 'when property too long' do
168
+ schema = {
169
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
170
+ 'type' => 'object',
171
+ 'properties' => {
172
+ 'aaa' => {'maxLength' => 2}
173
+ }
174
+ }
175
+ record = {'aaa' => 'xxx'}
176
+
177
+ expect([schema, record]).to fail_validation_with(
178
+ :type => :too_long,
179
+ :path => 'aaa',
180
+ :length => 2
181
+ )
182
+ end
183
+
184
+ specify 'when property of wrong type and many types allowed' do
185
+ schema = {
186
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
187
+ 'type' => 'object',
188
+ 'properties' => {
189
+ 'aaa' => {'type' => ['number', 'string']}
190
+ }
191
+ }
192
+ record = {'aaa' => ['xxx']}
193
+
194
+ expect([schema, record]).to fail_validation_with(
195
+ :type => :type_mismatch,
196
+ :path => 'aaa',
197
+ :allowed_types => ['number', 'string']
198
+ )
199
+ end
200
+
201
+ specify 'when property of wrong type and single type allowed' do
202
+ schema = {
203
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
204
+ 'type' => 'object',
205
+ 'properties' => {
206
+ 'aaa' => {'type' => 'number'}
207
+ }
208
+ }
209
+ record = {'aaa' => 'xxx'}
210
+
211
+ expect([schema, record]).to fail_validation_with(
212
+ :type => :type_mismatch,
213
+ :path => 'aaa',
214
+ :allowed_types => ['number']
215
+ )
216
+ end
217
+
218
+ specify 'when property not in enum' do
219
+ schema = {
220
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
221
+ 'type' => 'object',
222
+ 'properties' => {
223
+ 'aaa' => {'enum' => ['a', 'b', 'c']}
224
+ }
225
+ }
226
+ record = {'aaa' => 'z'}
227
+
228
+ expect([schema, record]).to fail_validation_with(
229
+ :type => :enum_mismatch,
230
+ :path => 'aaa',
231
+ :allowed_values => ['a', 'b', 'c']
232
+ )
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'custom validators' do
4
+ describe 'for date' do
5
+ before do
6
+ @schema = {
7
+ '$schema' => 'http://json-schema.org/draft-04/schema#',
8
+ 'type' => 'object',
9
+ 'properties' => {
10
+ 'aaa' => {'format' => 'date'}
11
+ }
12
+ }
13
+ end
14
+
15
+ specify 'validate valid dates' do
16
+ strings = [
17
+ '2015-01-10',
18
+ '2015-01-10T10:15:57',
19
+ '2015-01-10T10:15:57Z',
20
+ '2015-01-10T10:15:57+00:00',
21
+ '2015-01-10T11:15:57+01:00',
22
+ '2015-01-10T09:15:57-01:00',
23
+ '2015-01-10 10:15:57 +0000',
24
+ '2015-01-10 11:15:57 +0100',
25
+ '2015-01-10 09:15:57 -0100',
26
+ ]
27
+
28
+ strings.each do |string|
29
+ record = {'aaa' => string}
30
+ expect([@schema, record]).to be_valid
31
+ end
32
+ end
33
+
34
+ specify 'do not validator invalid dates' do
35
+ strings = [
36
+ 'nonsense',
37
+ '2015-01-nonsense',
38
+ '2015:01:10',
39
+ '2015/01/10',
40
+ ]
41
+
42
+ strings.each do |string|
43
+ record = {'aaa' => string}
44
+ expect([@schema, record]).not_to be_valid
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,20 @@
1
+ require 'turbot_runner'
2
+
3
+ RSpec::Matchers.define(:fail_validation_with) do |expected|
4
+ match do |actual|
5
+ schema, record = actual
6
+
7
+ error = TurbotRunner::Validator.validate(schema, record)
8
+ expect(error).to eq(expected)
9
+ end
10
+ end
11
+
12
+ RSpec::Matchers.define(:be_valid) do
13
+ match do |actual|
14
+ schema, record = actual
15
+
16
+ error = TurbotRunner::Validator.validate(schema, record)
17
+ expect(error).to eq(nil)
18
+ end
19
+ end
20
+
metadata CHANGED
@@ -1,46 +1,35 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
- version: !ruby/object:Gem::Version
4
- hash: 51
5
- prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 20
10
- version: 0.1.20
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.21
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - OpenCorporates
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2015-01-15 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2015-01-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: json-schema
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ~>
27
- - !ruby/object:Gem::Version
28
- hash: 11
29
- segments:
30
- - 2
31
- - 4
32
- version: "2.4"
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.5.0
33
20
  type: :runtime
34
- version_requirements: *id001
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.5.0
35
27
  description:
36
28
  email: bots@opencorporates.com
37
29
  executables: []
38
-
39
30
  extensions: []
40
-
41
31
  extra_rdoc_files: []
42
-
43
- files:
32
+ files:
44
33
  - bin/rspec
45
34
  - lib/turbot_runner.rb
46
35
  - lib/turbot_runner/base_handler.rb
@@ -49,8 +38,39 @@ files:
49
38
  - lib/turbot_runner/processor.rb
50
39
  - lib/turbot_runner/runner.rb
51
40
  - lib/turbot_runner/script_runner.rb
41
+ - lib/turbot_runner/utils.rb
42
+ - lib/turbot_runner/validator.rb
52
43
  - lib/turbot_runner/validators.rb
53
44
  - lib/turbot_runner/version.rb
45
+ - schema/schemas/company-schema.json
46
+ - schema/schemas/financial-payment-schema.json
47
+ - schema/schemas/includes/address.json
48
+ - schema/schemas/includes/alternative_name.json
49
+ - schema/schemas/includes/company.json
50
+ - schema/schemas/includes/filing.json
51
+ - schema/schemas/includes/financial-payment-data-object.json
52
+ - schema/schemas/includes/identifier.json
53
+ - schema/schemas/includes/industry_code.json
54
+ - schema/schemas/includes/licence-data-object.json
55
+ - schema/schemas/includes/officer.json
56
+ - schema/schemas/includes/organisation.json
57
+ - schema/schemas/includes/permission.json
58
+ - schema/schemas/includes/person.json
59
+ - schema/schemas/includes/person_name.json
60
+ - schema/schemas/includes/previous_name.json
61
+ - schema/schemas/includes/share-parcel-data.json
62
+ - schema/schemas/includes/share-parcel.json
63
+ - schema/schemas/includes/subsidiary-relationship-data.json
64
+ - schema/schemas/includes/total-shares.json
65
+ - schema/schemas/includes/unknown_entity_type.json
66
+ - schema/schemas/licence-schema.json
67
+ - schema/schemas/primary-data-schema.json
68
+ - schema/schemas/rich-licence-schema.json
69
+ - schema/schemas/share-parcel-schema.json
70
+ - schema/schemas/simple-financial-payment-schema.json
71
+ - schema/schemas/simple-licence-schema.json
72
+ - schema/schemas/simple-subsidiary-schema.json
73
+ - schema/schemas/subsidiary-relationship-schema.json
54
74
  - spec/bots/bot-that-crashes-immediately/manifest.json
55
75
  - spec/bots/bot-that-crashes-immediately/scraper.rb
56
76
  - spec/bots/bot-that-crashes-immediately/transformer1.rb
@@ -91,73 +111,36 @@ files:
91
111
  - spec/bots/slow-bot/scraper.rb
92
112
  - spec/lib/processor_spec.rb
93
113
  - spec/lib/runner_spec.rb
114
+ - spec/lib/validator_spec.rb
115
+ - spec/lib/validators_spec.rb
94
116
  - spec/manual_spec.rb
95
117
  - spec/outputs/full-scraper.out
96
118
  - spec/outputs/full-transformer.out
97
119
  - spec/outputs/truncated-scraper.out
98
- - schema/schemas/company-schema.json
99
- - schema/schemas/financial-payment-schema.json
100
- - schema/schemas/includes/address.json
101
- - schema/schemas/includes/alternative_name.json
102
- - schema/schemas/includes/company.json
103
- - schema/schemas/includes/filing.json
104
- - schema/schemas/includes/financial-payment-data-object.json
105
- - schema/schemas/includes/identifier.json
106
- - schema/schemas/includes/industry_code.json
107
- - schema/schemas/includes/licence-data-object.json
108
- - schema/schemas/includes/officer.json
109
- - schema/schemas/includes/organisation.json
110
- - schema/schemas/includes/permission.json
111
- - schema/schemas/includes/person.json
112
- - schema/schemas/includes/person_name.json
113
- - schema/schemas/includes/previous_name.json
114
- - schema/schemas/includes/share-parcel-data.json
115
- - schema/schemas/includes/share-parcel.json
116
- - schema/schemas/includes/subsidiary-relationship-data.json
117
- - schema/schemas/includes/total-shares.json
118
- - schema/schemas/includes/unknown_entity_type.json
119
- - schema/schemas/licence-schema.json
120
- - schema/schemas/primary-data-schema.json
121
- - schema/schemas/rich-licence-schema.json
122
- - schema/schemas/share-parcel-schema.json
123
- - schema/schemas/simple-financial-payment-schema.json
124
- - schema/schemas/simple-licence-schema.json
125
- - schema/schemas/simple-subsidiary-schema.json
126
- - schema/schemas/subsidiary-relationship-schema.json
120
+ - spec/spec_helper.rb
127
121
  homepage: http://turbot.opencorporates.com/
128
- licenses:
122
+ licenses:
129
123
  - MIT
124
+ metadata: {}
130
125
  post_install_message:
131
126
  rdoc_options: []
132
-
133
- require_paths:
127
+ require_paths:
134
128
  - lib
135
- required_ruby_version: !ruby/object:Gem::Requirement
136
- none: false
137
- requirements:
138
- - - ">="
139
- - !ruby/object:Gem::Version
140
- hash: 55
141
- segments:
142
- - 1
143
- - 9
144
- - 2
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
145
133
  version: 1.9.2
146
- required_rubygems_version: !ruby/object:Gem::Requirement
147
- none: false
148
- requirements:
149
- - - ">="
150
- - !ruby/object:Gem::Version
151
- hash: 3
152
- segments:
153
- - 0
154
- version: "0"
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
155
139
  requirements: []
156
-
157
140
  rubyforge_project:
158
- rubygems_version: 1.8.15
141
+ rubygems_version: 2.2.2
159
142
  signing_key:
160
- specification_version: 3
143
+ specification_version: 4
161
144
  summary: Utilities for running bots with Turbot
162
145
  test_files: []
163
-
146
+ has_rdoc: