turbot-runner 0.1.24 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/turbot_runner/processor.rb +1 -42
- data/lib/turbot_runner/validator.rb +44 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +1 -0
- data/spec/lib/processor_spec.rb +44 -73
- data/spec/lib/validator_spec.rb +52 -0
- data/spec/spec_helper.rb +9 -12
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NWI1YjU1NDhkMjg2ZDkzNDJmNGYzNmU3NTg1OGRlMWExZGY5NDI0NA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZDkyNTcxMDhmZjA5NDRjMjczNzdhMTA2MGJlNGJlZTQ3ZjM4NWZkOA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Y2Y5NjYyY2ExOWY1ZTg2OWFmMmUyMjlkNmMwZTI4N2NkYjEzZDY4YWY2Njlh
|
10
|
+
OTJlNWY2N2VjMDQzMDhmMTBjODM5NmI5ZDFjODc3YWQxMjc3ZjAxMjczN2U0
|
11
|
+
MjZmZmY2ODNiOTIxZmZiYTIyZGE2MDliMDMxNTk3NTQ1MTIxMTA=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODE2N2ZhN2M2MWFjMzJhNDgyNGQzNDBlNGM2OGU2NmEyYjBkNDUyNTkyNzZh
|
14
|
+
MTU1OGM1YjM3YjJiZmE2OWEzNTNmODJlNTEzNGIxZjc5NmRiN2E3YmVmNWQy
|
15
|
+
OTFlODIwNGZhOWRlYjliYjJlZjBlMmIwYzhkNGNlOTlhOTFkZDE=
|
@@ -17,7 +17,7 @@ module TurbotRunner
|
|
17
17
|
else
|
18
18
|
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
19
19
|
|
20
|
-
error_message = validate(record)
|
20
|
+
error_message = Validator.validate(schema_path, record, @identifying_fields)
|
21
21
|
|
22
22
|
if error_message.nil?
|
23
23
|
begin
|
@@ -40,50 +40,9 @@ module TurbotRunner
|
|
40
40
|
@runner.interrupt
|
41
41
|
end
|
42
42
|
|
43
|
-
def validate(record)
|
44
|
-
error = Openc::JsonSchema.validate(schema_path, record)
|
45
|
-
|
46
|
-
message = nil
|
47
|
-
|
48
|
-
if error.nil?
|
49
|
-
identifying_attributes = record.reject do |k, v|
|
50
|
-
!@identifying_fields.include?(k) || v.nil? || v == ''
|
51
|
-
end
|
52
|
-
|
53
|
-
if identifying_attributes.empty?
|
54
|
-
message = "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
|
55
|
-
end
|
56
|
-
else
|
57
|
-
message = case error[:type]
|
58
|
-
when :missing
|
59
|
-
"Missing required property: #{error[:path]}"
|
60
|
-
when :one_of_no_matches
|
61
|
-
"No match for property: #{error[:path]}"
|
62
|
-
when :one_of_many_matches
|
63
|
-
"Multiple possible matches for property: #{error[:path]}"
|
64
|
-
when :too_short
|
65
|
-
"Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
|
66
|
-
when :too_long
|
67
|
-
"Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
|
68
|
-
when :type_mismatch
|
69
|
-
"Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
|
70
|
-
when :enum_mismatch
|
71
|
-
"Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
|
72
|
-
when :format_mismatch
|
73
|
-
"Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
|
74
|
-
when :unknown
|
75
|
-
error[:message]
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
message
|
80
|
-
end
|
81
|
-
|
82
43
|
def schema_path
|
83
44
|
hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
84
45
|
File.join(SCHEMAS_PATH, "#{hyphenated_name}-schema.json")
|
85
46
|
end
|
86
|
-
|
87
|
-
class ConversionError < StandardError; end
|
88
47
|
end
|
89
48
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module TurbotRunner
|
2
|
+
module Validator
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def validate(schema_path, record, identifying_fields)
|
6
|
+
error = Openc::JsonSchema.validate(schema_path, record)
|
7
|
+
|
8
|
+
message = nil
|
9
|
+
|
10
|
+
if error.nil?
|
11
|
+
identifying_attributes = record.reject do |k, v|
|
12
|
+
!identifying_fields.include?(k) || v.nil? || v == ''
|
13
|
+
end
|
14
|
+
|
15
|
+
if identifying_attributes.empty?
|
16
|
+
message = "There were no values provided for any of the identifying fields: #{identifying_fields.join(', ')}"
|
17
|
+
end
|
18
|
+
else
|
19
|
+
message = case error[:type]
|
20
|
+
when :missing
|
21
|
+
"Missing required property: #{error[:path]}"
|
22
|
+
when :one_of_no_matches
|
23
|
+
"No match for property: #{error[:path]}"
|
24
|
+
when :one_of_many_matches
|
25
|
+
"Multiple possible matches for property: #{error[:path]}"
|
26
|
+
when :too_short
|
27
|
+
"Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
|
28
|
+
when :too_long
|
29
|
+
"Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
|
30
|
+
when :type_mismatch
|
31
|
+
"Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
|
32
|
+
when :enum_mismatch
|
33
|
+
"Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
|
34
|
+
when :format_mismatch
|
35
|
+
"Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
|
36
|
+
when :unknown
|
37
|
+
error[:message]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
message
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/turbot_runner.rb
CHANGED
data/spec/lib/processor_spec.rb
CHANGED
@@ -17,28 +17,47 @@ describe TurbotRunner::Processor do
|
|
17
17
|
@processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
|
18
18
|
end
|
19
19
|
|
20
|
-
context 'with record
|
21
|
-
|
22
|
-
|
20
|
+
context 'with valid record' do
|
21
|
+
it 'calls Handler#handle_valid_record' do
|
22
|
+
record = {
|
23
23
|
'sample_date' => '2014-06-01',
|
24
|
+
'source_url' => 'http://example.com/123',
|
24
25
|
'number' => 123
|
25
26
|
}
|
27
|
+
|
28
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
29
|
+
@processor.process(record.to_json)
|
26
30
|
end
|
31
|
+
end
|
27
32
|
|
33
|
+
context 'with invalid record' do
|
28
34
|
it 'calls Handler#handle_invalid_record' do
|
35
|
+
record = {
|
36
|
+
'sample_date' => '2014-06-01',
|
37
|
+
'number' => 123
|
38
|
+
}
|
39
|
+
|
29
40
|
expected_error = 'Missing required property: source_url'
|
30
41
|
expect(@handler).to receive(:handle_invalid_record).
|
31
|
-
with(
|
32
|
-
@processor.process(
|
42
|
+
with(record, @data_type, expected_error)
|
43
|
+
@processor.process(record.to_json)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'with invalid JSON' do
|
48
|
+
it 'calls Handler#handle_invalid_json' do
|
49
|
+
line = 'this is not JSON'
|
50
|
+
expect(@handler).to receive(:handle_invalid_json).with(line)
|
51
|
+
@processor.process(line)
|
33
52
|
end
|
34
53
|
end
|
35
54
|
end
|
36
55
|
|
37
56
|
context 'with a runner passed in' do
|
38
57
|
before do
|
39
|
-
script_runner = instance_double('ScriptRunner')
|
40
|
-
allow(script_runner).to receive(:interrupt_and_mark_as_failed)
|
41
|
-
@processor = TurbotRunner::Processor.new(script_runner, @script_config, @handler)
|
58
|
+
@script_runner = instance_double('ScriptRunner')
|
59
|
+
allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
60
|
+
@processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
|
42
61
|
end
|
43
62
|
|
44
63
|
context 'with valid record' do
|
@@ -48,12 +67,13 @@ describe TurbotRunner::Processor do
|
|
48
67
|
'source_url' => 'http://example.com/123',
|
49
68
|
'number' => 123
|
50
69
|
}
|
70
|
+
|
51
71
|
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
52
72
|
@processor.process(record.to_json)
|
53
73
|
end
|
54
74
|
end
|
55
75
|
|
56
|
-
context 'with record
|
76
|
+
context 'with invalid record' do
|
57
77
|
before do
|
58
78
|
@record = {
|
59
79
|
'sample_date' => '2014-06-01',
|
@@ -67,93 +87,44 @@ describe TurbotRunner::Processor do
|
|
67
87
|
with(@record, @data_type, expected_error)
|
68
88
|
@processor.process(@record.to_json)
|
69
89
|
end
|
70
|
-
end
|
71
|
-
|
72
|
-
context 'with record missing all identifying fields' do
|
73
|
-
before do
|
74
|
-
@record = {
|
75
|
-
'sample_date' => '2014-06-01',
|
76
|
-
'source_url' => 'http://example.com/123'
|
77
|
-
}
|
78
|
-
end
|
79
90
|
|
80
|
-
it '
|
81
|
-
|
82
|
-
expect(@handler).to receive(:handle_invalid_record).
|
83
|
-
with(@record, @data_type, expected_error)
|
91
|
+
it 'interrupts runner' do
|
92
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
84
93
|
@processor.process(@record.to_json)
|
85
94
|
end
|
86
95
|
end
|
87
96
|
|
88
97
|
context 'with invalid JSON' do
|
89
|
-
|
90
|
-
line = 'this is not JSON'
|
91
|
-
expect(@handler).to receive(:handle_invalid_json).with(line)
|
92
|
-
@processor.process(line)
|
98
|
+
before do
|
99
|
+
@line = 'this is not JSON'
|
93
100
|
end
|
94
|
-
end
|
95
|
-
|
96
|
-
context 'with record with sample_date from Time.now' do
|
97
|
-
it 'calls Handler#handle_valid_record with converted sample_date' do
|
98
|
-
record = {
|
99
|
-
'sample_date' => '2014-06-01 12:34:56 +0000',
|
100
|
-
'source_url' => 'http://example.com/123',
|
101
|
-
'number' => 123
|
102
|
-
}
|
103
101
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
'number' => 123
|
108
|
-
}
|
109
|
-
expect(@handler).to receive(:handle_valid_record).
|
110
|
-
with(expected_converted_record, @data_type)
|
111
|
-
@processor.process(record.to_json)
|
102
|
+
it 'calls Handler#handle_invalid_json' do
|
103
|
+
expect(@handler).to receive(:handle_invalid_json).with(@line)
|
104
|
+
@processor.process(@line)
|
112
105
|
end
|
113
|
-
end
|
114
|
-
|
115
|
-
context 'with record with missing sample_date' do
|
116
|
-
it 'calls Handler#handle_invalid_record' do
|
117
|
-
record = {
|
118
|
-
'source_url' => 'http://example.com/123',
|
119
|
-
'number' => 123
|
120
|
-
}
|
121
106
|
|
122
|
-
|
123
|
-
expect(@
|
124
|
-
|
125
|
-
@processor.process(record.to_json)
|
107
|
+
it 'interrupts runner' do
|
108
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
109
|
+
@processor.process(@line)
|
126
110
|
end
|
127
111
|
end
|
128
112
|
|
129
|
-
|
130
|
-
it 'calls Handler#handle_invalid_record' do
|
113
|
+
it 'converts date format' do
|
131
114
|
record = {
|
132
|
-
'sample_date' => '',
|
115
|
+
'sample_date' => '2014-06-01 12:34:56 +0000',
|
133
116
|
'source_url' => 'http://example.com/123',
|
134
117
|
'number' => 123
|
135
118
|
}
|
136
119
|
|
137
|
-
|
138
|
-
|
139
|
-
with(record, @data_type, expected_error)
|
140
|
-
@processor.process(record.to_json)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
context 'with record with invalid sample_date' do
|
145
|
-
it 'calls Handler#handle_invalid_record' do
|
146
|
-
record = {
|
147
|
-
'sample_date' => '2014-06-00',
|
120
|
+
converted_record = {
|
121
|
+
'sample_date' => '2014-06-01',
|
148
122
|
'source_url' => 'http://example.com/123',
|
149
123
|
'number' => 123
|
150
124
|
}
|
151
125
|
|
152
|
-
|
153
|
-
expect(@handler).to receive(:handle_invalid_record).
|
154
|
-
with(record, @data_type, expected_error)
|
126
|
+
expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
|
155
127
|
@processor.process(record.to_json)
|
156
|
-
end
|
157
128
|
end
|
158
129
|
end
|
159
130
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TurbotRunner::Validator do
|
4
|
+
describe '.validate' do
|
5
|
+
specify 'with valid record' do
|
6
|
+
record = {
|
7
|
+
'sample_date' => '2014-06-01',
|
8
|
+
'source_url' => 'http://example.com/123',
|
9
|
+
'number' => 123
|
10
|
+
}
|
11
|
+
expect(record).to be_valid
|
12
|
+
end
|
13
|
+
|
14
|
+
specify 'with record missing required field' do
|
15
|
+
record = {
|
16
|
+
'sample_date' => '2014-06-01',
|
17
|
+
'number' => 123
|
18
|
+
}
|
19
|
+
expected_error = 'Missing required property: source_url'
|
20
|
+
expect(record).to fail_validation_with(expected_error)
|
21
|
+
end
|
22
|
+
|
23
|
+
specify 'with record missing all identifying fields' do
|
24
|
+
record = {
|
25
|
+
'sample_date' => '2014-06-01',
|
26
|
+
'source_url' => 'http://example.com/123'
|
27
|
+
}
|
28
|
+
expected_error = 'There were no values provided for any of the identifying fields: number'
|
29
|
+
expect(record).to fail_validation_with(expected_error)
|
30
|
+
end
|
31
|
+
|
32
|
+
specify 'with record with empty sample_date' do
|
33
|
+
record = {
|
34
|
+
'sample_date' => '',
|
35
|
+
'source_url' => 'http://example.com/123',
|
36
|
+
'number' => 123
|
37
|
+
}
|
38
|
+
expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
|
39
|
+
expect(record).to fail_validation_with(expected_error)
|
40
|
+
end
|
41
|
+
|
42
|
+
specify 'with record with invalid sample_date' do
|
43
|
+
record = {
|
44
|
+
'sample_date' => '2014-06-00',
|
45
|
+
'source_url' => 'http://example.com/123',
|
46
|
+
'number' => 123
|
47
|
+
}
|
48
|
+
expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
|
49
|
+
expect(record).to fail_validation_with(expected_error)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,20 +1,17 @@
|
|
1
1
|
require 'turbot_runner'
|
2
2
|
|
3
|
-
RSpec::Matchers.define(:fail_validation_with) do |
|
4
|
-
match do |
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
expect(error).to eq(expected)
|
3
|
+
RSpec::Matchers.define(:fail_validation_with) do |expected_error|
|
4
|
+
match do |record|
|
5
|
+
schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
|
6
|
+
identifying_fields = ['number']
|
7
|
+
expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(expected_error)
|
9
8
|
end
|
10
9
|
end
|
11
10
|
|
12
11
|
RSpec::Matchers.define(:be_valid) do
|
13
|
-
match do |
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
expect(error).to eq(nil)
|
12
|
+
match do |record|
|
13
|
+
schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
|
14
|
+
identifying_fields = ['number']
|
15
|
+
expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(nil)
|
18
16
|
end
|
19
17
|
end
|
20
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.25
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: openc-json_schema
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/turbot_runner/runner.rb
|
40
40
|
- lib/turbot_runner/script_runner.rb
|
41
41
|
- lib/turbot_runner/utils.rb
|
42
|
+
- lib/turbot_runner/validator.rb
|
42
43
|
- lib/turbot_runner/version.rb
|
43
44
|
- schema/schemas/company-schema.json
|
44
45
|
- schema/schemas/financial-payment-schema.json
|
@@ -109,6 +110,7 @@ files:
|
|
109
110
|
- spec/bots/slow-bot/scraper.rb
|
110
111
|
- spec/lib/processor_spec.rb
|
111
112
|
- spec/lib/runner_spec.rb
|
113
|
+
- spec/lib/validator_spec.rb
|
112
114
|
- spec/manual_spec.rb
|
113
115
|
- spec/outputs/full-scraper.out
|
114
116
|
- spec/outputs/full-transformer.out
|