turbot-runner 0.1.24 → 0.1.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/turbot_runner/processor.rb +1 -42
- data/lib/turbot_runner/validator.rb +44 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +1 -0
- data/spec/lib/processor_spec.rb +44 -73
- data/spec/lib/validator_spec.rb +52 -0
- data/spec/spec_helper.rb +9 -12
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NWI1YjU1NDhkMjg2ZDkzNDJmNGYzNmU3NTg1OGRlMWExZGY5NDI0NA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZDkyNTcxMDhmZjA5NDRjMjczNzdhMTA2MGJlNGJlZTQ3ZjM4NWZkOA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Y2Y5NjYyY2ExOWY1ZTg2OWFmMmUyMjlkNmMwZTI4N2NkYjEzZDY4YWY2Njlh
|
10
|
+
OTJlNWY2N2VjMDQzMDhmMTBjODM5NmI5ZDFjODc3YWQxMjc3ZjAxMjczN2U0
|
11
|
+
MjZmZmY2ODNiOTIxZmZiYTIyZGE2MDliMDMxNTk3NTQ1MTIxMTA=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODE2N2ZhN2M2MWFjMzJhNDgyNGQzNDBlNGM2OGU2NmEyYjBkNDUyNTkyNzZh
|
14
|
+
MTU1OGM1YjM3YjJiZmE2OWEzNTNmODJlNTEzNGIxZjc5NmRiN2E3YmVmNWQy
|
15
|
+
OTFlODIwNGZhOWRlYjliYjJlZjBlMmIwYzhkNGNlOTlhOTFkZDE=
|
@@ -17,7 +17,7 @@ module TurbotRunner
|
|
17
17
|
else
|
18
18
|
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
19
19
|
|
20
|
-
error_message = validate(record)
|
20
|
+
error_message = Validator.validate(schema_path, record, @identifying_fields)
|
21
21
|
|
22
22
|
if error_message.nil?
|
23
23
|
begin
|
@@ -40,50 +40,9 @@ module TurbotRunner
|
|
40
40
|
@runner.interrupt
|
41
41
|
end
|
42
42
|
|
43
|
-
def validate(record)
|
44
|
-
error = Openc::JsonSchema.validate(schema_path, record)
|
45
|
-
|
46
|
-
message = nil
|
47
|
-
|
48
|
-
if error.nil?
|
49
|
-
identifying_attributes = record.reject do |k, v|
|
50
|
-
!@identifying_fields.include?(k) || v.nil? || v == ''
|
51
|
-
end
|
52
|
-
|
53
|
-
if identifying_attributes.empty?
|
54
|
-
message = "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
|
55
|
-
end
|
56
|
-
else
|
57
|
-
message = case error[:type]
|
58
|
-
when :missing
|
59
|
-
"Missing required property: #{error[:path]}"
|
60
|
-
when :one_of_no_matches
|
61
|
-
"No match for property: #{error[:path]}"
|
62
|
-
when :one_of_many_matches
|
63
|
-
"Multiple possible matches for property: #{error[:path]}"
|
64
|
-
when :too_short
|
65
|
-
"Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
|
66
|
-
when :too_long
|
67
|
-
"Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
|
68
|
-
when :type_mismatch
|
69
|
-
"Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
|
70
|
-
when :enum_mismatch
|
71
|
-
"Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
|
72
|
-
when :format_mismatch
|
73
|
-
"Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
|
74
|
-
when :unknown
|
75
|
-
error[:message]
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
message
|
80
|
-
end
|
81
|
-
|
82
43
|
def schema_path
|
83
44
|
hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
84
45
|
File.join(SCHEMAS_PATH, "#{hyphenated_name}-schema.json")
|
85
46
|
end
|
86
|
-
|
87
|
-
class ConversionError < StandardError; end
|
88
47
|
end
|
89
48
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module TurbotRunner
|
2
|
+
module Validator
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def validate(schema_path, record, identifying_fields)
|
6
|
+
error = Openc::JsonSchema.validate(schema_path, record)
|
7
|
+
|
8
|
+
message = nil
|
9
|
+
|
10
|
+
if error.nil?
|
11
|
+
identifying_attributes = record.reject do |k, v|
|
12
|
+
!identifying_fields.include?(k) || v.nil? || v == ''
|
13
|
+
end
|
14
|
+
|
15
|
+
if identifying_attributes.empty?
|
16
|
+
message = "There were no values provided for any of the identifying fields: #{identifying_fields.join(', ')}"
|
17
|
+
end
|
18
|
+
else
|
19
|
+
message = case error[:type]
|
20
|
+
when :missing
|
21
|
+
"Missing required property: #{error[:path]}"
|
22
|
+
when :one_of_no_matches
|
23
|
+
"No match for property: #{error[:path]}"
|
24
|
+
when :one_of_many_matches
|
25
|
+
"Multiple possible matches for property: #{error[:path]}"
|
26
|
+
when :too_short
|
27
|
+
"Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
|
28
|
+
when :too_long
|
29
|
+
"Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
|
30
|
+
when :type_mismatch
|
31
|
+
"Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
|
32
|
+
when :enum_mismatch
|
33
|
+
"Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
|
34
|
+
when :format_mismatch
|
35
|
+
"Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
|
36
|
+
when :unknown
|
37
|
+
error[:message]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
message
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/turbot_runner.rb
CHANGED
data/spec/lib/processor_spec.rb
CHANGED
@@ -17,28 +17,47 @@ describe TurbotRunner::Processor do
|
|
17
17
|
@processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
|
18
18
|
end
|
19
19
|
|
20
|
-
context 'with record
|
21
|
-
|
22
|
-
|
20
|
+
context 'with valid record' do
|
21
|
+
it 'calls Handler#handle_valid_record' do
|
22
|
+
record = {
|
23
23
|
'sample_date' => '2014-06-01',
|
24
|
+
'source_url' => 'http://example.com/123',
|
24
25
|
'number' => 123
|
25
26
|
}
|
27
|
+
|
28
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
29
|
+
@processor.process(record.to_json)
|
26
30
|
end
|
31
|
+
end
|
27
32
|
|
33
|
+
context 'with invalid record' do
|
28
34
|
it 'calls Handler#handle_invalid_record' do
|
35
|
+
record = {
|
36
|
+
'sample_date' => '2014-06-01',
|
37
|
+
'number' => 123
|
38
|
+
}
|
39
|
+
|
29
40
|
expected_error = 'Missing required property: source_url'
|
30
41
|
expect(@handler).to receive(:handle_invalid_record).
|
31
|
-
with(
|
32
|
-
@processor.process(
|
42
|
+
with(record, @data_type, expected_error)
|
43
|
+
@processor.process(record.to_json)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'with invalid JSON' do
|
48
|
+
it 'calls Handler#handle_invalid_json' do
|
49
|
+
line = 'this is not JSON'
|
50
|
+
expect(@handler).to receive(:handle_invalid_json).with(line)
|
51
|
+
@processor.process(line)
|
33
52
|
end
|
34
53
|
end
|
35
54
|
end
|
36
55
|
|
37
56
|
context 'with a runner passed in' do
|
38
57
|
before do
|
39
|
-
script_runner = instance_double('ScriptRunner')
|
40
|
-
allow(script_runner).to receive(:interrupt_and_mark_as_failed)
|
41
|
-
@processor = TurbotRunner::Processor.new(script_runner, @script_config, @handler)
|
58
|
+
@script_runner = instance_double('ScriptRunner')
|
59
|
+
allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
60
|
+
@processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
|
42
61
|
end
|
43
62
|
|
44
63
|
context 'with valid record' do
|
@@ -48,12 +67,13 @@ describe TurbotRunner::Processor do
|
|
48
67
|
'source_url' => 'http://example.com/123',
|
49
68
|
'number' => 123
|
50
69
|
}
|
70
|
+
|
51
71
|
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
52
72
|
@processor.process(record.to_json)
|
53
73
|
end
|
54
74
|
end
|
55
75
|
|
56
|
-
context 'with record
|
76
|
+
context 'with invalid record' do
|
57
77
|
before do
|
58
78
|
@record = {
|
59
79
|
'sample_date' => '2014-06-01',
|
@@ -67,93 +87,44 @@ describe TurbotRunner::Processor do
|
|
67
87
|
with(@record, @data_type, expected_error)
|
68
88
|
@processor.process(@record.to_json)
|
69
89
|
end
|
70
|
-
end
|
71
|
-
|
72
|
-
context 'with record missing all identifying fields' do
|
73
|
-
before do
|
74
|
-
@record = {
|
75
|
-
'sample_date' => '2014-06-01',
|
76
|
-
'source_url' => 'http://example.com/123'
|
77
|
-
}
|
78
|
-
end
|
79
90
|
|
80
|
-
it '
|
81
|
-
|
82
|
-
expect(@handler).to receive(:handle_invalid_record).
|
83
|
-
with(@record, @data_type, expected_error)
|
91
|
+
it 'interrupts runner' do
|
92
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
84
93
|
@processor.process(@record.to_json)
|
85
94
|
end
|
86
95
|
end
|
87
96
|
|
88
97
|
context 'with invalid JSON' do
|
89
|
-
|
90
|
-
line = 'this is not JSON'
|
91
|
-
expect(@handler).to receive(:handle_invalid_json).with(line)
|
92
|
-
@processor.process(line)
|
98
|
+
before do
|
99
|
+
@line = 'this is not JSON'
|
93
100
|
end
|
94
|
-
end
|
95
|
-
|
96
|
-
context 'with record with sample_date from Time.now' do
|
97
|
-
it 'calls Handler#handle_valid_record with converted sample_date' do
|
98
|
-
record = {
|
99
|
-
'sample_date' => '2014-06-01 12:34:56 +0000',
|
100
|
-
'source_url' => 'http://example.com/123',
|
101
|
-
'number' => 123
|
102
|
-
}
|
103
101
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
'number' => 123
|
108
|
-
}
|
109
|
-
expect(@handler).to receive(:handle_valid_record).
|
110
|
-
with(expected_converted_record, @data_type)
|
111
|
-
@processor.process(record.to_json)
|
102
|
+
it 'calls Handler#handle_invalid_json' do
|
103
|
+
expect(@handler).to receive(:handle_invalid_json).with(@line)
|
104
|
+
@processor.process(@line)
|
112
105
|
end
|
113
|
-
end
|
114
|
-
|
115
|
-
context 'with record with missing sample_date' do
|
116
|
-
it 'calls Handler#handle_invalid_record' do
|
117
|
-
record = {
|
118
|
-
'source_url' => 'http://example.com/123',
|
119
|
-
'number' => 123
|
120
|
-
}
|
121
106
|
|
122
|
-
|
123
|
-
expect(@
|
124
|
-
|
125
|
-
@processor.process(record.to_json)
|
107
|
+
it 'interrupts runner' do
|
108
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
109
|
+
@processor.process(@line)
|
126
110
|
end
|
127
111
|
end
|
128
112
|
|
129
|
-
|
130
|
-
it 'calls Handler#handle_invalid_record' do
|
113
|
+
it 'converts date format' do
|
131
114
|
record = {
|
132
|
-
'sample_date' => '',
|
115
|
+
'sample_date' => '2014-06-01 12:34:56 +0000',
|
133
116
|
'source_url' => 'http://example.com/123',
|
134
117
|
'number' => 123
|
135
118
|
}
|
136
119
|
|
137
|
-
|
138
|
-
|
139
|
-
with(record, @data_type, expected_error)
|
140
|
-
@processor.process(record.to_json)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
context 'with record with invalid sample_date' do
|
145
|
-
it 'calls Handler#handle_invalid_record' do
|
146
|
-
record = {
|
147
|
-
'sample_date' => '2014-06-00',
|
120
|
+
converted_record = {
|
121
|
+
'sample_date' => '2014-06-01',
|
148
122
|
'source_url' => 'http://example.com/123',
|
149
123
|
'number' => 123
|
150
124
|
}
|
151
125
|
|
152
|
-
|
153
|
-
expect(@handler).to receive(:handle_invalid_record).
|
154
|
-
with(record, @data_type, expected_error)
|
126
|
+
expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
|
155
127
|
@processor.process(record.to_json)
|
156
|
-
end
|
157
128
|
end
|
158
129
|
end
|
159
130
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TurbotRunner::Validator do
|
4
|
+
describe '.validate' do
|
5
|
+
specify 'with valid record' do
|
6
|
+
record = {
|
7
|
+
'sample_date' => '2014-06-01',
|
8
|
+
'source_url' => 'http://example.com/123',
|
9
|
+
'number' => 123
|
10
|
+
}
|
11
|
+
expect(record).to be_valid
|
12
|
+
end
|
13
|
+
|
14
|
+
specify 'with record missing required field' do
|
15
|
+
record = {
|
16
|
+
'sample_date' => '2014-06-01',
|
17
|
+
'number' => 123
|
18
|
+
}
|
19
|
+
expected_error = 'Missing required property: source_url'
|
20
|
+
expect(record).to fail_validation_with(expected_error)
|
21
|
+
end
|
22
|
+
|
23
|
+
specify 'with record missing all identifying fields' do
|
24
|
+
record = {
|
25
|
+
'sample_date' => '2014-06-01',
|
26
|
+
'source_url' => 'http://example.com/123'
|
27
|
+
}
|
28
|
+
expected_error = 'There were no values provided for any of the identifying fields: number'
|
29
|
+
expect(record).to fail_validation_with(expected_error)
|
30
|
+
end
|
31
|
+
|
32
|
+
specify 'with record with empty sample_date' do
|
33
|
+
record = {
|
34
|
+
'sample_date' => '',
|
35
|
+
'source_url' => 'http://example.com/123',
|
36
|
+
'number' => 123
|
37
|
+
}
|
38
|
+
expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
|
39
|
+
expect(record).to fail_validation_with(expected_error)
|
40
|
+
end
|
41
|
+
|
42
|
+
specify 'with record with invalid sample_date' do
|
43
|
+
record = {
|
44
|
+
'sample_date' => '2014-06-00',
|
45
|
+
'source_url' => 'http://example.com/123',
|
46
|
+
'number' => 123
|
47
|
+
}
|
48
|
+
expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
|
49
|
+
expect(record).to fail_validation_with(expected_error)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,20 +1,17 @@
|
|
1
1
|
require 'turbot_runner'
|
2
2
|
|
3
|
-
RSpec::Matchers.define(:fail_validation_with) do |
|
4
|
-
match do |
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
expect(error).to eq(expected)
|
3
|
+
RSpec::Matchers.define(:fail_validation_with) do |expected_error|
|
4
|
+
match do |record|
|
5
|
+
schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
|
6
|
+
identifying_fields = ['number']
|
7
|
+
expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(expected_error)
|
9
8
|
end
|
10
9
|
end
|
11
10
|
|
12
11
|
RSpec::Matchers.define(:be_valid) do
|
13
|
-
match do |
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
expect(error).to eq(nil)
|
12
|
+
match do |record|
|
13
|
+
schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
|
14
|
+
identifying_fields = ['number']
|
15
|
+
expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(nil)
|
18
16
|
end
|
19
17
|
end
|
20
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.25
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: openc-json_schema
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/turbot_runner/runner.rb
|
40
40
|
- lib/turbot_runner/script_runner.rb
|
41
41
|
- lib/turbot_runner/utils.rb
|
42
|
+
- lib/turbot_runner/validator.rb
|
42
43
|
- lib/turbot_runner/version.rb
|
43
44
|
- schema/schemas/company-schema.json
|
44
45
|
- schema/schemas/financial-payment-schema.json
|
@@ -109,6 +110,7 @@ files:
|
|
109
110
|
- spec/bots/slow-bot/scraper.rb
|
110
111
|
- spec/lib/processor_spec.rb
|
111
112
|
- spec/lib/runner_spec.rb
|
113
|
+
- spec/lib/validator_spec.rb
|
112
114
|
- spec/manual_spec.rb
|
113
115
|
- spec/outputs/full-scraper.out
|
114
116
|
- spec/outputs/full-transformer.out
|