turbot-runner 0.0.24 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/turbot_runner/base_handler.rb +13 -0
- data/lib/{prerun.rb → turbot_runner/prerun.rb} +0 -0
- data/lib/turbot_runner/processor.rb +55 -0
- data/lib/turbot_runner/runner.rb +150 -0
- data/lib/turbot_runner/script_runner.rb +90 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +5 -335
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +8 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor.rb +48 -0
- data/spec/lib/runner_spec.rb +244 -0
- data/spec/manual_spec.rb +55 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- metadata +40 -19
- data/spec/dummy-bot-python/manifest.json +0 -15
- data/spec/dummy-bot-python/scraper.py +0 -11
- data/spec/dummy-bot-python/transformer.py +0 -15
- data/spec/dummy-bot-ruby/manifest.json +0 -15
- data/spec/dummy-bot-ruby/scraper.rb +0 -8
- data/spec/dummy-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby/manifest.json +0 -8
- data/spec/dummy-broken-bot-ruby/scraper.rb +0 -6
- data/spec/dummy-broken-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby-2/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-2/scraper.rb +0 -4
- data/spec/dummy-broken-bot-ruby-2/transformer.rb +0 -11
- data/spec/dummy-broken-bot-ruby-3/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-3/scraper.rb +0 -5
- data/spec/dummy-broken-bot-ruby-3/transformer.rb +0 -5
- data/spec/turbot_runner_spec.rb +0 -117
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "bot-that-crashes-in-transformer",
|
3
|
+
"description": "This is a bot that crashes in the transformer",
|
4
|
+
"language": "ruby",
|
5
|
+
"data_type": "primary data",
|
6
|
+
"identifying_fields": ["licence_number"],
|
7
|
+
"files": ["scraper.rb", "transformer1.rb", "transformer2.rb"],
|
8
|
+
"transformers": [
|
9
|
+
{
|
10
|
+
"file": "transformer1.rb",
|
11
|
+
"data_type": "simple-licence",
|
12
|
+
"identifying_fields": ["licence_number"]
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"file": "transformer2.rb",
|
16
|
+
"data_type": "simple-licence",
|
17
|
+
"identifying_fields": ["licence_number"]
|
18
|
+
}
|
19
|
+
]
|
20
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
|
16
|
+
raise 'Oh no' if raw_record['licence_number'] == 'XYZ4'
|
17
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
0.upto(9) do |n|
|
4
|
+
record = {
|
5
|
+
:licence_number => "XYZ#{n}",
|
6
|
+
:source_url => 'http://example.com',
|
7
|
+
:sample_date => '2014-06-01'
|
8
|
+
}
|
9
|
+
puts(record.to_json)
|
10
|
+
|
11
|
+
if n == 4
|
12
|
+
$stderr.puts 'The scraper will sleep for ten seconds...'
|
13
|
+
sleep 10
|
14
|
+
$stderr.puts 'The scraper is resuming...'
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "bot-with-transformer",
|
3
|
+
"description": "This is a bot with a transformer",
|
4
|
+
"language": "ruby",
|
5
|
+
"data_type": "primary data",
|
6
|
+
"identifying_fields": ["licence_number"],
|
7
|
+
"files": ["scraper.rb", "transformer1.rb", "transformer2.rb"],
|
8
|
+
"transformers": [
|
9
|
+
{
|
10
|
+
"file": "transformer.rb",
|
11
|
+
"data_type": "simple-licence",
|
12
|
+
"identifying_fields": ["licence_number"]
|
13
|
+
}
|
14
|
+
]
|
15
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "bot-with-transformers",
|
3
|
+
"description": "This is a bot with multiple transformers",
|
4
|
+
"language": "ruby",
|
5
|
+
"data_type": "primary data",
|
6
|
+
"identifying_fields": ["licence_number"],
|
7
|
+
"files": ["scraper.rb"],
|
8
|
+
"transformers": [
|
9
|
+
{
|
10
|
+
"file": "transformer1.rb",
|
11
|
+
"data_type": "simple-licence",
|
12
|
+
"identifying_fields": ["licence_number"]
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"file": "transformer2.rb",
|
16
|
+
"data_type": "simple-licence",
|
17
|
+
"identifying_fields": ["licence_number"]
|
18
|
+
}
|
19
|
+
]
|
20
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::Processor do
|
5
|
+
describe '#process' do
|
6
|
+
before do
|
7
|
+
@handler = TurbotRunner::BaseHandler.new
|
8
|
+
@data_type = 'primary data'
|
9
|
+
@processor = TurbotRunner::Processor.new(@handler, @data_type)
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'with valid record' do
|
13
|
+
it 'calls Handler#handle_valid_record' do
|
14
|
+
record = {
|
15
|
+
'sample_date' => '2014-06-01',
|
16
|
+
'source_url' => 'http://example.com/123',
|
17
|
+
'number' => 123
|
18
|
+
}
|
19
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
20
|
+
@processor.process(record.to_json)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'with invalid record' do
|
25
|
+
it 'calls Handler#handle_invalid_record' do
|
26
|
+
before do
|
27
|
+
@record = {
|
28
|
+
'sample_date' => '2014-06-01',
|
29
|
+
'number' => 123
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
expected_errors = ['Missing required attribute: source_url']
|
34
|
+
expect(@handler).to receive(:handle_invalid_record).
|
35
|
+
with(@record, @data_type, expected_errors)
|
36
|
+
@processor.process(@record.to_json)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'with invalid JSON' do
|
41
|
+
it 'calls Handler#handle_invalid_json' do
|
42
|
+
line = 'this is not JSON'
|
43
|
+
expect(@handler).to receive(:handle_invalid_json).with(line)
|
44
|
+
@processor.process(line)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::Runner do
|
5
|
+
after(:all) do
|
6
|
+
puts
|
7
|
+
puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '#run' do
|
11
|
+
context 'with a bot written in ruby' do
|
12
|
+
before do
|
13
|
+
@runner = TurbotRunner::Runner.new('spec/bots/ruby-bot')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'produces expected output' do
|
17
|
+
@runner.run
|
18
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns true' do
|
22
|
+
expect(@runner.run).to be(true)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'with a bot written in python' do
|
27
|
+
before do
|
28
|
+
@runner = TurbotRunner::Runner.new('spec/bots/python-bot')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'produces expected output' do
|
32
|
+
@runner.run
|
33
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'with a bot with a transformer' do
|
38
|
+
before do
|
39
|
+
@runner = TurbotRunner::Runner.new('spec/bots/bot-with-transformer')
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'produces expected outputs' do
|
43
|
+
@runner.run
|
44
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
45
|
+
expect([@runner, 'transformer']).to have_output('full-transformer.out')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'returns true' do
|
49
|
+
expect(@runner.run).to be(true)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'with a bot with multiple transformers' do
|
54
|
+
before do
|
55
|
+
@runner = TurbotRunner::Runner.new('spec/bots/bot-with-transformers')
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'produces expected outputs' do
|
59
|
+
@runner.run
|
60
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
61
|
+
expect([@runner, 'transformer1']).to have_output('full-transformer.out')
|
62
|
+
expect([@runner, 'transformer2']).to have_output('full-transformer.out')
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'returns true' do
|
66
|
+
expect(@runner.run).to be(true)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'with a bot that logs' do
|
71
|
+
context 'when logging to file enabled' do
|
72
|
+
it 'logs to file' do
|
73
|
+
expected_log = "doing...\ndone\n"
|
74
|
+
runner = TurbotRunner::Runner.new(
|
75
|
+
'spec/bots/logging-bot',
|
76
|
+
:log_to_file => true
|
77
|
+
)
|
78
|
+
runner.run
|
79
|
+
expect([runner, 'scraper']).to have_error_output_matching(expected_log)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'when logging to file not enabled' do
|
84
|
+
xit 'logs to stderr' do
|
85
|
+
# This is tested in manual_spec.rb
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
context 'with a bot that crashes in scraper' do
|
91
|
+
before do
|
92
|
+
@runner = TurbotRunner::Runner.new(
|
93
|
+
'spec/bots/bot-that-crashes-in-scraper',
|
94
|
+
:log_to_file => true
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'returns false' do
|
99
|
+
expect(@runner.run).to be(false)
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'writes error to stderr' do
|
103
|
+
@runner.run
|
104
|
+
expect([@runner, 'scraper']).to have_error_output_matching(/Oh no/)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
context 'with a bot that crashes in transformer' do
|
109
|
+
before do
|
110
|
+
@runner = TurbotRunner::Runner.new(
|
111
|
+
'spec/bots/bot-that-crashes-in-transformer',
|
112
|
+
:log_to_file => true
|
113
|
+
)
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'returns false' do
|
117
|
+
expect(@runner.run).to be(false)
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'writes error to stderr' do
|
121
|
+
@runner.run
|
122
|
+
expect([@runner, 'transformer2']).to have_error_output_matching(/Oh no/)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context 'with a bot that is interrupted in scraper' do
|
127
|
+
xit 'produces truncated output' do
|
128
|
+
# This is tested in manual_spec.rb
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context 'with a handler that interrupts the runner' do
|
133
|
+
before do
|
134
|
+
class Handler < TurbotRunner::BaseHandler
|
135
|
+
def initialize(*)
|
136
|
+
@count = 0
|
137
|
+
super
|
138
|
+
end
|
139
|
+
|
140
|
+
def handle_valid_record(record, data_type)
|
141
|
+
@count += 1
|
142
|
+
@count < 5 ? true : false
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
@runner = TurbotRunner::Runner.new(
|
147
|
+
'spec/bots/slow-bot',
|
148
|
+
:record_handler => Handler.new,
|
149
|
+
:log_to_file => true
|
150
|
+
)
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'produces expected output' do
|
154
|
+
@runner.run
|
155
|
+
expect([@runner, 'scraper']).to have_output('truncated-scraper.out')
|
156
|
+
end
|
157
|
+
|
158
|
+
it 'returns true' do
|
159
|
+
expect(@runner.run).to be(true)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
context 'with a scraper that produces an invalid record' do
|
164
|
+
it 'returns false' do
|
165
|
+
@runner = TurbotRunner::Runner.new('spec/bots/invalid-record-bot')
|
166
|
+
expect(@runner.run).to be(false)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
context 'with a scraper that produces invalid JSON' do
|
171
|
+
it 'returns false' do
|
172
|
+
@runner = TurbotRunner::Runner.new('spec/bots/invalid-json-bot')
|
173
|
+
expect(@runner.run).to be(false)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
context 'with a scraper that hangs' do
|
178
|
+
it 'returns false' do
|
179
|
+
@runner = TurbotRunner::Runner.new(
|
180
|
+
'spec/bots/bot-with-pause',
|
181
|
+
:timeout => 1,
|
182
|
+
:log_to_file => true
|
183
|
+
)
|
184
|
+
expect(@runner.run).to be(false)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
describe '#process_output' do
|
190
|
+
before do
|
191
|
+
# This creates the output to work with
|
192
|
+
TurbotRunner::Runner.new('spec/bots/bot-with-transformer').run
|
193
|
+
end
|
194
|
+
|
195
|
+
it 'calls handler once for each line of output' do
|
196
|
+
class Handler < TurbotRunner::BaseHandler
|
197
|
+
attr_reader :records_seen
|
198
|
+
|
199
|
+
def initialize(*)
|
200
|
+
@records_seen = Hash.new {|h, k| h[k] = 0}
|
201
|
+
super
|
202
|
+
end
|
203
|
+
|
204
|
+
def handle_valid_record(record, data_type)
|
205
|
+
@records_seen[data_type] += 1
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
handler = Handler.new
|
210
|
+
runner = TurbotRunner::Runner.new(
|
211
|
+
'spec/bots/bot-with-transformer',
|
212
|
+
:record_handler => handler
|
213
|
+
)
|
214
|
+
|
215
|
+
runner.process_output
|
216
|
+
expect(handler.records_seen['primary data']).to eq(10)
|
217
|
+
expect(handler.records_seen['simple-licence']).to eq(10)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
RSpec::Matchers.define :have_output do |expected|
|
224
|
+
match do |actual|
|
225
|
+
runner, script = actual
|
226
|
+
|
227
|
+
expected_path = File.join('spec', 'outputs', expected)
|
228
|
+
expected_output = File.readlines(expected_path).map {|line| JSON.parse(line)}
|
229
|
+
actual_path = File.join(runner.directory, 'output', "#{script}.out")
|
230
|
+
actual_output = File.readlines(actual_path).map {|line| JSON.parse(line)}
|
231
|
+
expect(expected_output).to eq(actual_output)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
RSpec::Matchers.define :have_error_output_matching do |expected|
|
237
|
+
match do |actual|
|
238
|
+
runner, script = actual
|
239
|
+
|
240
|
+
actual_path = File.join(runner.directory, 'output', "#{script}.err")
|
241
|
+
actual_output = File.read(actual_path)
|
242
|
+
expect(actual_output).to match(expected)
|
243
|
+
end
|
244
|
+
end
|