turbot-runner 0.1.24 → 0.1.25

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzkxMDdjODdjYjg3NDFkZTVjZjJmMzI4M2RhZmQ1NWI5OTBmNzFiOA==
4
+ NWI1YjU1NDhkMjg2ZDkzNDJmNGYzNmU3NTg1OGRlMWExZGY5NDI0NA==
5
5
  data.tar.gz: !binary |-
6
- ZmU3ODUzNmMxODg5NjZlMDhjZDMzYzk0MTY4YzA4ODVlZjEzZTVlOA==
6
+ ZDkyNTcxMDhmZjA5NDRjMjczNzdhMTA2MGJlNGJlZTQ3ZjM4NWZkOA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- Mzc3NTg0ZTY2ZWM5YmU4MWM4ODM0NmRkZjdhNDk2NWE4YjYxMDllMjJmOTlm
10
- MmQwMzEwMGJkMThiMDFlYzFlYjg0ODQyMjkzMDBlMzgwMWM4YmM3MTI1NjA4
11
- OGMxMjExNjIxNDI2N2FmZjYyODU2Y2IwMDEzMTkwZWJjNzVlODM=
9
+ Y2Y5NjYyY2ExOWY1ZTg2OWFmMmUyMjlkNmMwZTI4N2NkYjEzZDY4YWY2Njlh
10
+ OTJlNWY2N2VjMDQzMDhmMTBjODM5NmI5ZDFjODc3YWQxMjc3ZjAxMjczN2U0
11
+ MjZmZmY2ODNiOTIxZmZiYTIyZGE2MDliMDMxNTk3NTQ1MTIxMTA=
12
12
  data.tar.gz: !binary |-
13
- MDk3NzFhODNlZDA3NGUyMmZlMTBmYzEzM2UyNjFmMDc5NzJjYTJkMjQ0NGRh
14
- M2U3MDlmOGYwNGI4Y2U3YzNkZDEwODk4NzA4ZDY1Y2NhNGJlYWE1MzkyNDYw
15
- M2MyYTNkNjI0YzEwMTMyOTdiMzMwZmU1NTQ2Njk0YTQ5NjZkMGM=
13
+ ODE2N2ZhN2M2MWFjMzJhNDgyNGQzNDBlNGM2OGU2NmEyYjBkNDUyNTkyNzZh
14
+ MTU1OGM1YjM3YjJiZmE2OWEzNTNmODJlNTEzNGIxZjc5NmRiN2E3YmVmNWQy
15
+ OTFlODIwNGZhOWRlYjliYjJlZjBlMmIwYzhkNGNlOTlhOTFkZDE=
@@ -17,7 +17,7 @@ module TurbotRunner
17
17
  else
18
18
  record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
19
19
 
20
- error_message = validate(record)
20
+ error_message = Validator.validate(schema_path, record, @identifying_fields)
21
21
 
22
22
  if error_message.nil?
23
23
  begin
@@ -40,50 +40,9 @@ module TurbotRunner
40
40
  @runner.interrupt
41
41
  end
42
42
 
43
- def validate(record)
44
- error = Openc::JsonSchema.validate(schema_path, record)
45
-
46
- message = nil
47
-
48
- if error.nil?
49
- identifying_attributes = record.reject do |k, v|
50
- !@identifying_fields.include?(k) || v.nil? || v == ''
51
- end
52
-
53
- if identifying_attributes.empty?
54
- message = "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
55
- end
56
- else
57
- message = case error[:type]
58
- when :missing
59
- "Missing required property: #{error[:path]}"
60
- when :one_of_no_matches
61
- "No match for property: #{error[:path]}"
62
- when :one_of_many_matches
63
- "Multiple possible matches for property: #{error[:path]}"
64
- when :too_short
65
- "Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
66
- when :too_long
67
- "Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
68
- when :type_mismatch
69
- "Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
70
- when :enum_mismatch
71
- "Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
72
- when :format_mismatch
73
- "Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
74
- when :unknown
75
- error[:message]
76
- end
77
- end
78
-
79
- message
80
- end
81
-
82
43
  def schema_path
83
44
  hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
84
45
  File.join(SCHEMAS_PATH, "#{hyphenated_name}-schema.json")
85
46
  end
86
-
87
- class ConversionError < StandardError; end
88
47
  end
89
48
  end
@@ -0,0 +1,44 @@
1
+ module TurbotRunner
2
+ module Validator
3
+ extend self
4
+
5
+ def validate(schema_path, record, identifying_fields)
6
+ error = Openc::JsonSchema.validate(schema_path, record)
7
+
8
+ message = nil
9
+
10
+ if error.nil?
11
+ identifying_attributes = record.reject do |k, v|
12
+ !identifying_fields.include?(k) || v.nil? || v == ''
13
+ end
14
+
15
+ if identifying_attributes.empty?
16
+ message = "There were no values provided for any of the identifying fields: #{identifying_fields.join(', ')}"
17
+ end
18
+ else
19
+ message = case error[:type]
20
+ when :missing
21
+ "Missing required property: #{error[:path]}"
22
+ when :one_of_no_matches
23
+ "No match for property: #{error[:path]}"
24
+ when :one_of_many_matches
25
+ "Multiple possible matches for property: #{error[:path]}"
26
+ when :too_short
27
+ "Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
28
+ when :too_long
29
+ "Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
30
+ when :type_mismatch
31
+ "Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
32
+ when :enum_mismatch
33
+ "Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
34
+ when :format_mismatch
35
+ "Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
36
+ when :unknown
37
+ error[:message]
38
+ end
39
+ end
40
+
41
+ message
42
+ end
43
+ end
44
+ end
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.1.24'
2
+ VERSION = '0.1.25'
3
3
  end
data/lib/turbot_runner.rb CHANGED
@@ -4,6 +4,7 @@ require 'turbot_runner/processor'
4
4
  require 'turbot_runner/runner'
5
5
  require 'turbot_runner/script_runner'
6
6
  require 'turbot_runner/utils'
7
+ require 'turbot_runner/validator'
7
8
  require 'turbot_runner/version'
8
9
 
9
10
  module TurbotRunner
@@ -17,28 +17,47 @@ describe TurbotRunner::Processor do
17
17
  @processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
18
18
  end
19
19
 
20
- context 'with record missing required field' do
21
- before do
22
- @record = {
20
+ context 'with valid record' do
21
+ it 'calls Handler#handle_valid_record' do
22
+ record = {
23
23
  'sample_date' => '2014-06-01',
24
+ 'source_url' => 'http://example.com/123',
24
25
  'number' => 123
25
26
  }
27
+
28
+ expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
29
+ @processor.process(record.to_json)
26
30
  end
31
+ end
27
32
 
33
+ context 'with invalid record' do
28
34
  it 'calls Handler#handle_invalid_record' do
35
+ record = {
36
+ 'sample_date' => '2014-06-01',
37
+ 'number' => 123
38
+ }
39
+
29
40
  expected_error = 'Missing required property: source_url'
30
41
  expect(@handler).to receive(:handle_invalid_record).
31
- with(@record, @data_type, expected_error)
32
- @processor.process(@record.to_json)
42
+ with(record, @data_type, expected_error)
43
+ @processor.process(record.to_json)
44
+ end
45
+ end
46
+
47
+ context 'with invalid JSON' do
48
+ it 'calls Handler#handle_invalid_json' do
49
+ line = 'this is not JSON'
50
+ expect(@handler).to receive(:handle_invalid_json).with(line)
51
+ @processor.process(line)
33
52
  end
34
53
  end
35
54
  end
36
55
 
37
56
  context 'with a runner passed in' do
38
57
  before do
39
- script_runner = instance_double('ScriptRunner')
40
- allow(script_runner).to receive(:interrupt_and_mark_as_failed)
41
- @processor = TurbotRunner::Processor.new(script_runner, @script_config, @handler)
58
+ @script_runner = instance_double('ScriptRunner')
59
+ allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
60
+ @processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
42
61
  end
43
62
 
44
63
  context 'with valid record' do
@@ -48,12 +67,13 @@ describe TurbotRunner::Processor do
48
67
  'source_url' => 'http://example.com/123',
49
68
  'number' => 123
50
69
  }
70
+
51
71
  expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
52
72
  @processor.process(record.to_json)
53
73
  end
54
74
  end
55
75
 
56
- context 'with record missing required field' do
76
+ context 'with invalid record' do
57
77
  before do
58
78
  @record = {
59
79
  'sample_date' => '2014-06-01',
@@ -67,93 +87,44 @@ describe TurbotRunner::Processor do
67
87
  with(@record, @data_type, expected_error)
68
88
  @processor.process(@record.to_json)
69
89
  end
70
- end
71
-
72
- context 'with record missing all identifying fields' do
73
- before do
74
- @record = {
75
- 'sample_date' => '2014-06-01',
76
- 'source_url' => 'http://example.com/123'
77
- }
78
- end
79
90
 
80
- it 'calls Handler#handle_invalid_record' do
81
- expected_error = 'There were no values provided for any of the identifying fields: number'
82
- expect(@handler).to receive(:handle_invalid_record).
83
- with(@record, @data_type, expected_error)
91
+ it 'interrupts runner' do
92
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
84
93
  @processor.process(@record.to_json)
85
94
  end
86
95
  end
87
96
 
88
97
  context 'with invalid JSON' do
89
- it 'calls Handler#handle_invalid_json' do
90
- line = 'this is not JSON'
91
- expect(@handler).to receive(:handle_invalid_json).with(line)
92
- @processor.process(line)
98
+ before do
99
+ @line = 'this is not JSON'
93
100
  end
94
- end
95
-
96
- context 'with record with sample_date from Time.now' do
97
- it 'calls Handler#handle_valid_record with converted sample_date' do
98
- record = {
99
- 'sample_date' => '2014-06-01 12:34:56 +0000',
100
- 'source_url' => 'http://example.com/123',
101
- 'number' => 123
102
- }
103
101
 
104
- expected_converted_record = {
105
- 'sample_date' => '2014-06-01',
106
- 'source_url' => 'http://example.com/123',
107
- 'number' => 123
108
- }
109
- expect(@handler).to receive(:handle_valid_record).
110
- with(expected_converted_record, @data_type)
111
- @processor.process(record.to_json)
102
+ it 'calls Handler#handle_invalid_json' do
103
+ expect(@handler).to receive(:handle_invalid_json).with(@line)
104
+ @processor.process(@line)
112
105
  end
113
- end
114
-
115
- context 'with record with missing sample_date' do
116
- it 'calls Handler#handle_invalid_record' do
117
- record = {
118
- 'source_url' => 'http://example.com/123',
119
- 'number' => 123
120
- }
121
106
 
122
- expected_error = 'Missing required property: sample_date'
123
- expect(@handler).to receive(:handle_invalid_record).
124
- with(record, @data_type, expected_error)
125
- @processor.process(record.to_json)
107
+ it 'interrupts runner' do
108
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
109
+ @processor.process(@line)
126
110
  end
127
111
  end
128
112
 
129
- context 'with record with empty sample_date' do
130
- it 'calls Handler#handle_invalid_record' do
113
+ it 'converts date format' do
131
114
  record = {
132
- 'sample_date' => '',
115
+ 'sample_date' => '2014-06-01 12:34:56 +0000',
133
116
  'source_url' => 'http://example.com/123',
134
117
  'number' => 123
135
118
  }
136
119
 
137
- expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
138
- expect(@handler).to receive(:handle_invalid_record).
139
- with(record, @data_type, expected_error)
140
- @processor.process(record.to_json)
141
- end
142
- end
143
-
144
- context 'with record with invalid sample_date' do
145
- it 'calls Handler#handle_invalid_record' do
146
- record = {
147
- 'sample_date' => '2014-06-00',
120
+ converted_record = {
121
+ 'sample_date' => '2014-06-01',
148
122
  'source_url' => 'http://example.com/123',
149
123
  'number' => 123
150
124
  }
151
125
 
152
- expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
153
- expect(@handler).to receive(:handle_invalid_record).
154
- with(record, @data_type, expected_error)
126
+ expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
155
127
  @processor.process(record.to_json)
156
- end
157
128
  end
158
129
  end
159
130
 
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Validator do
4
+ describe '.validate' do
5
+ specify 'with valid record' do
6
+ record = {
7
+ 'sample_date' => '2014-06-01',
8
+ 'source_url' => 'http://example.com/123',
9
+ 'number' => 123
10
+ }
11
+ expect(record).to be_valid
12
+ end
13
+
14
+ specify 'with record missing required field' do
15
+ record = {
16
+ 'sample_date' => '2014-06-01',
17
+ 'number' => 123
18
+ }
19
+ expected_error = 'Missing required property: source_url'
20
+ expect(record).to fail_validation_with(expected_error)
21
+ end
22
+
23
+ specify 'with record missing all identifying fields' do
24
+ record = {
25
+ 'sample_date' => '2014-06-01',
26
+ 'source_url' => 'http://example.com/123'
27
+ }
28
+ expected_error = 'There were no values provided for any of the identifying fields: number'
29
+ expect(record).to fail_validation_with(expected_error)
30
+ end
31
+
32
+ specify 'with record with empty sample_date' do
33
+ record = {
34
+ 'sample_date' => '',
35
+ 'source_url' => 'http://example.com/123',
36
+ 'number' => 123
37
+ }
38
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
39
+ expect(record).to fail_validation_with(expected_error)
40
+ end
41
+
42
+ specify 'with record with invalid sample_date' do
43
+ record = {
44
+ 'sample_date' => '2014-06-00',
45
+ 'source_url' => 'http://example.com/123',
46
+ 'number' => 123
47
+ }
48
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
49
+ expect(record).to fail_validation_with(expected_error)
50
+ end
51
+ end
52
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,20 +1,17 @@
1
1
  require 'turbot_runner'
2
2
 
3
- RSpec::Matchers.define(:fail_validation_with) do |expected|
4
- match do |actual|
5
- schema, record = actual
6
-
7
- error = TurbotRunner::Validator.validate(schema, record)
8
- expect(error).to eq(expected)
3
+ RSpec::Matchers.define(:fail_validation_with) do |expected_error|
4
+ match do |record|
5
+ schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
6
+ identifying_fields = ['number']
7
+ expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(expected_error)
9
8
  end
10
9
  end
11
10
 
12
11
  RSpec::Matchers.define(:be_valid) do
13
- match do |actual|
14
- schema, record = actual
15
-
16
- error = TurbotRunner::Validator.validate(schema, record)
17
- expect(error).to eq(nil)
12
+ match do |record|
13
+ schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
14
+ identifying_fields = ['number']
15
+ expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(nil)
18
16
  end
19
17
  end
20
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.24
4
+ version: 0.1.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-16 00:00:00.000000000 Z
11
+ date: 2015-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openc-json_schema
@@ -39,6 +39,7 @@ files:
39
39
  - lib/turbot_runner/runner.rb
40
40
  - lib/turbot_runner/script_runner.rb
41
41
  - lib/turbot_runner/utils.rb
42
+ - lib/turbot_runner/validator.rb
42
43
  - lib/turbot_runner/version.rb
43
44
  - schema/schemas/company-schema.json
44
45
  - schema/schemas/financial-payment-schema.json
@@ -109,6 +110,7 @@ files:
109
110
  - spec/bots/slow-bot/scraper.rb
110
111
  - spec/lib/processor_spec.rb
111
112
  - spec/lib/runner_spec.rb
113
+ - spec/lib/validator_spec.rb
112
114
  - spec/manual_spec.rb
113
115
  - spec/outputs/full-scraper.out
114
116
  - spec/outputs/full-transformer.out