turbot-runner 0.1.24 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzkxMDdjODdjYjg3NDFkZTVjZjJmMzI4M2RhZmQ1NWI5OTBmNzFiOA==
4
+ NWI1YjU1NDhkMjg2ZDkzNDJmNGYzNmU3NTg1OGRlMWExZGY5NDI0NA==
5
5
  data.tar.gz: !binary |-
6
- ZmU3ODUzNmMxODg5NjZlMDhjZDMzYzk0MTY4YzA4ODVlZjEzZTVlOA==
6
+ ZDkyNTcxMDhmZjA5NDRjMjczNzdhMTA2MGJlNGJlZTQ3ZjM4NWZkOA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- Mzc3NTg0ZTY2ZWM5YmU4MWM4ODM0NmRkZjdhNDk2NWE4YjYxMDllMjJmOTlm
10
- MmQwMzEwMGJkMThiMDFlYzFlYjg0ODQyMjkzMDBlMzgwMWM4YmM3MTI1NjA4
11
- OGMxMjExNjIxNDI2N2FmZjYyODU2Y2IwMDEzMTkwZWJjNzVlODM=
9
+ Y2Y5NjYyY2ExOWY1ZTg2OWFmMmUyMjlkNmMwZTI4N2NkYjEzZDY4YWY2Njlh
10
+ OTJlNWY2N2VjMDQzMDhmMTBjODM5NmI5ZDFjODc3YWQxMjc3ZjAxMjczN2U0
11
+ MjZmZmY2ODNiOTIxZmZiYTIyZGE2MDliMDMxNTk3NTQ1MTIxMTA=
12
12
  data.tar.gz: !binary |-
13
- MDk3NzFhODNlZDA3NGUyMmZlMTBmYzEzM2UyNjFmMDc5NzJjYTJkMjQ0NGRh
14
- M2U3MDlmOGYwNGI4Y2U3YzNkZDEwODk4NzA4ZDY1Y2NhNGJlYWE1MzkyNDYw
15
- M2MyYTNkNjI0YzEwMTMyOTdiMzMwZmU1NTQ2Njk0YTQ5NjZkMGM=
13
+ ODE2N2ZhN2M2MWFjMzJhNDgyNGQzNDBlNGM2OGU2NmEyYjBkNDUyNTkyNzZh
14
+ MTU1OGM1YjM3YjJiZmE2OWEzNTNmODJlNTEzNGIxZjc5NmRiN2E3YmVmNWQy
15
+ OTFlODIwNGZhOWRlYjliYjJlZjBlMmIwYzhkNGNlOTlhOTFkZDE=
@@ -17,7 +17,7 @@ module TurbotRunner
17
17
  else
18
18
  record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
19
19
 
20
- error_message = validate(record)
20
+ error_message = Validator.validate(schema_path, record, @identifying_fields)
21
21
 
22
22
  if error_message.nil?
23
23
  begin
@@ -40,50 +40,9 @@ module TurbotRunner
40
40
  @runner.interrupt
41
41
  end
42
42
 
43
- def validate(record)
44
- error = Openc::JsonSchema.validate(schema_path, record)
45
-
46
- message = nil
47
-
48
- if error.nil?
49
- identifying_attributes = record.reject do |k, v|
50
- !@identifying_fields.include?(k) || v.nil? || v == ''
51
- end
52
-
53
- if identifying_attributes.empty?
54
- message = "There were no values provided for any of the identifying fields: #{@identifying_fields.join(', ')}"
55
- end
56
- else
57
- message = case error[:type]
58
- when :missing
59
- "Missing required property: #{error[:path]}"
60
- when :one_of_no_matches
61
- "No match for property: #{error[:path]}"
62
- when :one_of_many_matches
63
- "Multiple possible matches for property: #{error[:path]}"
64
- when :too_short
65
- "Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
66
- when :too_long
67
- "Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
68
- when :type_mismatch
69
- "Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
70
- when :enum_mismatch
71
- "Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
72
- when :format_mismatch
73
- "Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
74
- when :unknown
75
- error[:message]
76
- end
77
- end
78
-
79
- message
80
- end
81
-
82
43
  def schema_path
83
44
  hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
84
45
  File.join(SCHEMAS_PATH, "#{hyphenated_name}-schema.json")
85
46
  end
86
-
87
- class ConversionError < StandardError; end
88
47
  end
89
48
  end
@@ -0,0 +1,44 @@
1
+ module TurbotRunner
2
+ module Validator
3
+ extend self
4
+
5
+ def validate(schema_path, record, identifying_fields)
6
+ error = Openc::JsonSchema.validate(schema_path, record)
7
+
8
+ message = nil
9
+
10
+ if error.nil?
11
+ identifying_attributes = record.reject do |k, v|
12
+ !identifying_fields.include?(k) || v.nil? || v == ''
13
+ end
14
+
15
+ if identifying_attributes.empty?
16
+ message = "There were no values provided for any of the identifying fields: #{identifying_fields.join(', ')}"
17
+ end
18
+ else
19
+ message = case error[:type]
20
+ when :missing
21
+ "Missing required property: #{error[:path]}"
22
+ when :one_of_no_matches
23
+ "No match for property: #{error[:path]}"
24
+ when :one_of_many_matches
25
+ "Multiple possible matches for property: #{error[:path]}"
26
+ when :too_short
27
+ "Property too short: #{error[:path]} (must be at least #{error[:length]} characters)"
28
+ when :too_long
29
+ "Property too long: #{error[:path]} (must be at most #{error[:length]} characters)"
30
+ when :type_mismatch
31
+ "Property of wrong type: #{error[:path]} (must be of type #{error[:allowed_types].join(', ')})"
32
+ when :enum_mismatch
33
+ "Property not an allowed value: #{error[:path]} (must be one of #{error[:allowed_values].join(', ')})"
34
+ when :format_mismatch
35
+ "Property not of expected format: #{error[:path]} (must be of format #{error[:expected_format]})"
36
+ when :unknown
37
+ error[:message]
38
+ end
39
+ end
40
+
41
+ message
42
+ end
43
+ end
44
+ end
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.1.24'
2
+ VERSION = '0.1.25'
3
3
  end
data/lib/turbot_runner.rb CHANGED
@@ -4,6 +4,7 @@ require 'turbot_runner/processor'
4
4
  require 'turbot_runner/runner'
5
5
  require 'turbot_runner/script_runner'
6
6
  require 'turbot_runner/utils'
7
+ require 'turbot_runner/validator'
7
8
  require 'turbot_runner/version'
8
9
 
9
10
  module TurbotRunner
@@ -17,28 +17,47 @@ describe TurbotRunner::Processor do
17
17
  @processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
18
18
  end
19
19
 
20
- context 'with record missing required field' do
21
- before do
22
- @record = {
20
+ context 'with valid record' do
21
+ it 'calls Handler#handle_valid_record' do
22
+ record = {
23
23
  'sample_date' => '2014-06-01',
24
+ 'source_url' => 'http://example.com/123',
24
25
  'number' => 123
25
26
  }
27
+
28
+ expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
29
+ @processor.process(record.to_json)
26
30
  end
31
+ end
27
32
 
33
+ context 'with invalid record' do
28
34
  it 'calls Handler#handle_invalid_record' do
35
+ record = {
36
+ 'sample_date' => '2014-06-01',
37
+ 'number' => 123
38
+ }
39
+
29
40
  expected_error = 'Missing required property: source_url'
30
41
  expect(@handler).to receive(:handle_invalid_record).
31
- with(@record, @data_type, expected_error)
32
- @processor.process(@record.to_json)
42
+ with(record, @data_type, expected_error)
43
+ @processor.process(record.to_json)
44
+ end
45
+ end
46
+
47
+ context 'with invalid JSON' do
48
+ it 'calls Handler#handle_invalid_json' do
49
+ line = 'this is not JSON'
50
+ expect(@handler).to receive(:handle_invalid_json).with(line)
51
+ @processor.process(line)
33
52
  end
34
53
  end
35
54
  end
36
55
 
37
56
  context 'with a runner passed in' do
38
57
  before do
39
- script_runner = instance_double('ScriptRunner')
40
- allow(script_runner).to receive(:interrupt_and_mark_as_failed)
41
- @processor = TurbotRunner::Processor.new(script_runner, @script_config, @handler)
58
+ @script_runner = instance_double('ScriptRunner')
59
+ allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
60
+ @processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
42
61
  end
43
62
 
44
63
  context 'with valid record' do
@@ -48,12 +67,13 @@ describe TurbotRunner::Processor do
48
67
  'source_url' => 'http://example.com/123',
49
68
  'number' => 123
50
69
  }
70
+
51
71
  expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
52
72
  @processor.process(record.to_json)
53
73
  end
54
74
  end
55
75
 
56
- context 'with record missing required field' do
76
+ context 'with invalid record' do
57
77
  before do
58
78
  @record = {
59
79
  'sample_date' => '2014-06-01',
@@ -67,93 +87,44 @@ describe TurbotRunner::Processor do
67
87
  with(@record, @data_type, expected_error)
68
88
  @processor.process(@record.to_json)
69
89
  end
70
- end
71
-
72
- context 'with record missing all identifying fields' do
73
- before do
74
- @record = {
75
- 'sample_date' => '2014-06-01',
76
- 'source_url' => 'http://example.com/123'
77
- }
78
- end
79
90
 
80
- it 'calls Handler#handle_invalid_record' do
81
- expected_error = 'There were no values provided for any of the identifying fields: number'
82
- expect(@handler).to receive(:handle_invalid_record).
83
- with(@record, @data_type, expected_error)
91
+ it 'interrupts runner' do
92
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
84
93
  @processor.process(@record.to_json)
85
94
  end
86
95
  end
87
96
 
88
97
  context 'with invalid JSON' do
89
- it 'calls Handler#handle_invalid_json' do
90
- line = 'this is not JSON'
91
- expect(@handler).to receive(:handle_invalid_json).with(line)
92
- @processor.process(line)
98
+ before do
99
+ @line = 'this is not JSON'
93
100
  end
94
- end
95
-
96
- context 'with record with sample_date from Time.now' do
97
- it 'calls Handler#handle_valid_record with converted sample_date' do
98
- record = {
99
- 'sample_date' => '2014-06-01 12:34:56 +0000',
100
- 'source_url' => 'http://example.com/123',
101
- 'number' => 123
102
- }
103
101
 
104
- expected_converted_record = {
105
- 'sample_date' => '2014-06-01',
106
- 'source_url' => 'http://example.com/123',
107
- 'number' => 123
108
- }
109
- expect(@handler).to receive(:handle_valid_record).
110
- with(expected_converted_record, @data_type)
111
- @processor.process(record.to_json)
102
+ it 'calls Handler#handle_invalid_json' do
103
+ expect(@handler).to receive(:handle_invalid_json).with(@line)
104
+ @processor.process(@line)
112
105
  end
113
- end
114
-
115
- context 'with record with missing sample_date' do
116
- it 'calls Handler#handle_invalid_record' do
117
- record = {
118
- 'source_url' => 'http://example.com/123',
119
- 'number' => 123
120
- }
121
106
 
122
- expected_error = 'Missing required property: sample_date'
123
- expect(@handler).to receive(:handle_invalid_record).
124
- with(record, @data_type, expected_error)
125
- @processor.process(record.to_json)
107
+ it 'interrupts runner' do
108
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
109
+ @processor.process(@line)
126
110
  end
127
111
  end
128
112
 
129
- context 'with record with empty sample_date' do
130
- it 'calls Handler#handle_invalid_record' do
113
+ it 'converts date format' do
131
114
  record = {
132
- 'sample_date' => '',
115
+ 'sample_date' => '2014-06-01 12:34:56 +0000',
133
116
  'source_url' => 'http://example.com/123',
134
117
  'number' => 123
135
118
  }
136
119
 
137
- expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
138
- expect(@handler).to receive(:handle_invalid_record).
139
- with(record, @data_type, expected_error)
140
- @processor.process(record.to_json)
141
- end
142
- end
143
-
144
- context 'with record with invalid sample_date' do
145
- it 'calls Handler#handle_invalid_record' do
146
- record = {
147
- 'sample_date' => '2014-06-00',
120
+ converted_record = {
121
+ 'sample_date' => '2014-06-01',
148
122
  'source_url' => 'http://example.com/123',
149
123
  'number' => 123
150
124
  }
151
125
 
152
- expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
153
- expect(@handler).to receive(:handle_invalid_record).
154
- with(record, @data_type, expected_error)
126
+ expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
155
127
  @processor.process(record.to_json)
156
- end
157
128
  end
158
129
  end
159
130
 
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Validator do
4
+ describe '.validate' do
5
+ specify 'with valid record' do
6
+ record = {
7
+ 'sample_date' => '2014-06-01',
8
+ 'source_url' => 'http://example.com/123',
9
+ 'number' => 123
10
+ }
11
+ expect(record).to be_valid
12
+ end
13
+
14
+ specify 'with record missing required field' do
15
+ record = {
16
+ 'sample_date' => '2014-06-01',
17
+ 'number' => 123
18
+ }
19
+ expected_error = 'Missing required property: source_url'
20
+ expect(record).to fail_validation_with(expected_error)
21
+ end
22
+
23
+ specify 'with record missing all identifying fields' do
24
+ record = {
25
+ 'sample_date' => '2014-06-01',
26
+ 'source_url' => 'http://example.com/123'
27
+ }
28
+ expected_error = 'There were no values provided for any of the identifying fields: number'
29
+ expect(record).to fail_validation_with(expected_error)
30
+ end
31
+
32
+ specify 'with record with empty sample_date' do
33
+ record = {
34
+ 'sample_date' => '',
35
+ 'source_url' => 'http://example.com/123',
36
+ 'number' => 123
37
+ }
38
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
39
+ expect(record).to fail_validation_with(expected_error)
40
+ end
41
+
42
+ specify 'with record with invalid sample_date' do
43
+ record = {
44
+ 'sample_date' => '2014-06-00',
45
+ 'source_url' => 'http://example.com/123',
46
+ 'number' => 123
47
+ }
48
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
49
+ expect(record).to fail_validation_with(expected_error)
50
+ end
51
+ end
52
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,20 +1,17 @@
1
1
  require 'turbot_runner'
2
2
 
3
- RSpec::Matchers.define(:fail_validation_with) do |expected|
4
- match do |actual|
5
- schema, record = actual
6
-
7
- error = TurbotRunner::Validator.validate(schema, record)
8
- expect(error).to eq(expected)
3
+ RSpec::Matchers.define(:fail_validation_with) do |expected_error|
4
+ match do |record|
5
+ schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
6
+ identifying_fields = ['number']
7
+ expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(expected_error)
9
8
  end
10
9
  end
11
10
 
12
11
  RSpec::Matchers.define(:be_valid) do
13
- match do |actual|
14
- schema, record = actual
15
-
16
- error = TurbotRunner::Validator.validate(schema, record)
17
- expect(error).to eq(nil)
12
+ match do |record|
13
+ schema_path = File.join(TurbotRunner::SCHEMAS_PATH, 'primary-data-schema.json')
14
+ identifying_fields = ['number']
15
+ expect(TurbotRunner::Validator.validate(schema_path, record, identifying_fields)).to eq(nil)
18
16
  end
19
17
  end
20
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.24
4
+ version: 0.1.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-16 00:00:00.000000000 Z
11
+ date: 2015-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openc-json_schema
@@ -39,6 +39,7 @@ files:
39
39
  - lib/turbot_runner/runner.rb
40
40
  - lib/turbot_runner/script_runner.rb
41
41
  - lib/turbot_runner/utils.rb
42
+ - lib/turbot_runner/validator.rb
42
43
  - lib/turbot_runner/version.rb
43
44
  - schema/schemas/company-schema.json
44
45
  - schema/schemas/financial-payment-schema.json
@@ -109,6 +110,7 @@ files:
109
110
  - spec/bots/slow-bot/scraper.rb
110
111
  - spec/lib/processor_spec.rb
111
112
  - spec/lib/runner_spec.rb
113
+ - spec/lib/validator_spec.rb
112
114
  - spec/manual_spec.rb
113
115
  - spec/outputs/full-scraper.out
114
116
  - spec/outputs/full-transformer.out