turbot-runner 0.0.24 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/turbot_runner/base_handler.rb +13 -0
- data/lib/{prerun.rb → turbot_runner/prerun.rb} +0 -0
- data/lib/turbot_runner/processor.rb +55 -0
- data/lib/turbot_runner/runner.rb +150 -0
- data/lib/turbot_runner/script_runner.rb +90 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +5 -335
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +8 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor.rb +48 -0
- data/spec/lib/runner_spec.rb +244 -0
- data/spec/manual_spec.rb +55 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- metadata +40 -19
- data/spec/dummy-bot-python/manifest.json +0 -15
- data/spec/dummy-bot-python/scraper.py +0 -11
- data/spec/dummy-bot-python/transformer.py +0 -15
- data/spec/dummy-bot-ruby/manifest.json +0 -15
- data/spec/dummy-bot-ruby/scraper.rb +0 -8
- data/spec/dummy-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby/manifest.json +0 -8
- data/spec/dummy-broken-bot-ruby/scraper.rb +0 -6
- data/spec/dummy-broken-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby-2/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-2/scraper.rb +0 -4
- data/spec/dummy-broken-bot-ruby-2/transformer.rb +0 -11
- data/spec/dummy-broken-bot-ruby-3/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-3/scraper.rb +0 -5
- data/spec/dummy-broken-bot-ruby-3/transformer.rb +0 -5
- data/spec/turbot_runner_spec.rb +0 -117
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "bot-that-crashes-in-transformer",
|
3
|
+
"description": "This is a bot that crashes in the transformer",
|
4
|
+
"language": "ruby",
|
5
|
+
"data_type": "primary data",
|
6
|
+
"identifying_fields": ["licence_number"],
|
7
|
+
"files": ["scraper.rb", "transformer1.rb", "transformer2.rb"],
|
8
|
+
"transformers": [
|
9
|
+
{
|
10
|
+
"file": "transformer1.rb",
|
11
|
+
"data_type": "simple-licence",
|
12
|
+
"identifying_fields": ["licence_number"]
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"file": "transformer2.rb",
|
16
|
+
"data_type": "simple-licence",
|
17
|
+
"identifying_fields": ["licence_number"]
|
18
|
+
}
|
19
|
+
]
|
20
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
|
16
|
+
raise 'Oh no' if raw_record['licence_number'] == 'XYZ4'
|
17
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
0.upto(9) do |n|
|
4
|
+
record = {
|
5
|
+
:licence_number => "XYZ#{n}",
|
6
|
+
:source_url => 'http://example.com',
|
7
|
+
:sample_date => '2014-06-01'
|
8
|
+
}
|
9
|
+
puts(record.to_json)
|
10
|
+
|
11
|
+
if n == 4
|
12
|
+
$stderr.puts 'The scraper will sleep for ten seconds...'
|
13
|
+
sleep 10
|
14
|
+
$stderr.puts 'The scraper is resuming...'
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "bot-with-transformer",
|
3
|
+
"description": "This is a bot with a transformer",
|
4
|
+
"language": "ruby",
|
5
|
+
"data_type": "primary data",
|
6
|
+
"identifying_fields": ["licence_number"],
|
7
|
+
"files": ["scraper.rb", "transformer1.rb", "transformer2.rb"],
|
8
|
+
"transformers": [
|
9
|
+
{
|
10
|
+
"file": "transformer.rb",
|
11
|
+
"data_type": "simple-licence",
|
12
|
+
"identifying_fields": ["licence_number"]
|
13
|
+
}
|
14
|
+
]
|
15
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "bot-with-transformers",
|
3
|
+
"description": "This is a bot with multiple transformers",
|
4
|
+
"language": "ruby",
|
5
|
+
"data_type": "primary data",
|
6
|
+
"identifying_fields": ["licence_number"],
|
7
|
+
"files": ["scraper.rb"],
|
8
|
+
"transformers": [
|
9
|
+
{
|
10
|
+
"file": "transformer1.rb",
|
11
|
+
"data_type": "simple-licence",
|
12
|
+
"identifying_fields": ["licence_number"]
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"file": "transformer2.rb",
|
16
|
+
"data_type": "simple-licence",
|
17
|
+
"identifying_fields": ["licence_number"]
|
18
|
+
}
|
19
|
+
]
|
20
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
STDIN.each_line do |line|
|
4
|
+
raw_record = JSON.parse(line)
|
5
|
+
|
6
|
+
transformed_record = {
|
7
|
+
:company_name => 'Foo Widgets',
|
8
|
+
:company_jurisdiction => 'gb',
|
9
|
+
:licence_number => raw_record['licence_number'],
|
10
|
+
:source_url => raw_record['source_url'],
|
11
|
+
:sample_date => raw_record['sample_date'],
|
12
|
+
}
|
13
|
+
|
14
|
+
puts transformed_record.to_json
|
15
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::Processor do
|
5
|
+
describe '#process' do
|
6
|
+
before do
|
7
|
+
@handler = TurbotRunner::BaseHandler.new
|
8
|
+
@data_type = 'primary data'
|
9
|
+
@processor = TurbotRunner::Processor.new(@handler, @data_type)
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'with valid record' do
|
13
|
+
it 'calls Handler#handle_valid_record' do
|
14
|
+
record = {
|
15
|
+
'sample_date' => '2014-06-01',
|
16
|
+
'source_url' => 'http://example.com/123',
|
17
|
+
'number' => 123
|
18
|
+
}
|
19
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
20
|
+
@processor.process(record.to_json)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'with invalid record' do
|
25
|
+
it 'calls Handler#handle_invalid_record' do
|
26
|
+
before do
|
27
|
+
@record = {
|
28
|
+
'sample_date' => '2014-06-01',
|
29
|
+
'number' => 123
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
expected_errors = ['Missing required attribute: source_url']
|
34
|
+
expect(@handler).to receive(:handle_invalid_record).
|
35
|
+
with(@record, @data_type, expected_errors)
|
36
|
+
@processor.process(@record.to_json)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'with invalid JSON' do
|
41
|
+
it 'calls Handler#handle_invalid_json' do
|
42
|
+
line = 'this is not JSON'
|
43
|
+
expect(@handler).to receive(:handle_invalid_json).with(line)
|
44
|
+
@processor.process(line)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::Runner do
|
5
|
+
after(:all) do
|
6
|
+
puts
|
7
|
+
puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '#run' do
|
11
|
+
context 'with a bot written in ruby' do
|
12
|
+
before do
|
13
|
+
@runner = TurbotRunner::Runner.new('spec/bots/ruby-bot')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'produces expected output' do
|
17
|
+
@runner.run
|
18
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns true' do
|
22
|
+
expect(@runner.run).to be(true)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'with a bot written in python' do
|
27
|
+
before do
|
28
|
+
@runner = TurbotRunner::Runner.new('spec/bots/python-bot')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'produces expected output' do
|
32
|
+
@runner.run
|
33
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'with a bot with a transformer' do
|
38
|
+
before do
|
39
|
+
@runner = TurbotRunner::Runner.new('spec/bots/bot-with-transformer')
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'produces expected outputs' do
|
43
|
+
@runner.run
|
44
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
45
|
+
expect([@runner, 'transformer']).to have_output('full-transformer.out')
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'returns true' do
|
49
|
+
expect(@runner.run).to be(true)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'with a bot with multiple transformers' do
|
54
|
+
before do
|
55
|
+
@runner = TurbotRunner::Runner.new('spec/bots/bot-with-transformers')
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'produces expected outputs' do
|
59
|
+
@runner.run
|
60
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
61
|
+
expect([@runner, 'transformer1']).to have_output('full-transformer.out')
|
62
|
+
expect([@runner, 'transformer2']).to have_output('full-transformer.out')
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'returns true' do
|
66
|
+
expect(@runner.run).to be(true)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
context 'with a bot that logs' do
|
71
|
+
context 'when logging to file enabled' do
|
72
|
+
it 'logs to file' do
|
73
|
+
expected_log = "doing...\ndone\n"
|
74
|
+
runner = TurbotRunner::Runner.new(
|
75
|
+
'spec/bots/logging-bot',
|
76
|
+
:log_to_file => true
|
77
|
+
)
|
78
|
+
runner.run
|
79
|
+
expect([runner, 'scraper']).to have_error_output_matching(expected_log)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'when logging to file not enabled' do
|
84
|
+
xit 'logs to stderr' do
|
85
|
+
# This is tested in manual_spec.rb
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
context 'with a bot that crashes in scraper' do
|
91
|
+
before do
|
92
|
+
@runner = TurbotRunner::Runner.new(
|
93
|
+
'spec/bots/bot-that-crashes-in-scraper',
|
94
|
+
:log_to_file => true
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'returns false' do
|
99
|
+
expect(@runner.run).to be(false)
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'writes error to stderr' do
|
103
|
+
@runner.run
|
104
|
+
expect([@runner, 'scraper']).to have_error_output_matching(/Oh no/)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
context 'with a bot that crashes in transformer' do
|
109
|
+
before do
|
110
|
+
@runner = TurbotRunner::Runner.new(
|
111
|
+
'spec/bots/bot-that-crashes-in-transformer',
|
112
|
+
:log_to_file => true
|
113
|
+
)
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'returns false' do
|
117
|
+
expect(@runner.run).to be(false)
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'writes error to stderr' do
|
121
|
+
@runner.run
|
122
|
+
expect([@runner, 'transformer2']).to have_error_output_matching(/Oh no/)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context 'with a bot that is interrupted in scraper' do
|
127
|
+
xit 'produces truncated output' do
|
128
|
+
# This is tested in manual_spec.rb
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context 'with a handler that interrupts the runner' do
|
133
|
+
before do
|
134
|
+
class Handler < TurbotRunner::BaseHandler
|
135
|
+
def initialize(*)
|
136
|
+
@count = 0
|
137
|
+
super
|
138
|
+
end
|
139
|
+
|
140
|
+
def handle_valid_record(record, data_type)
|
141
|
+
@count += 1
|
142
|
+
@count < 5 ? true : false
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
@runner = TurbotRunner::Runner.new(
|
147
|
+
'spec/bots/slow-bot',
|
148
|
+
:record_handler => Handler.new,
|
149
|
+
:log_to_file => true
|
150
|
+
)
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'produces expected output' do
|
154
|
+
@runner.run
|
155
|
+
expect([@runner, 'scraper']).to have_output('truncated-scraper.out')
|
156
|
+
end
|
157
|
+
|
158
|
+
it 'returns true' do
|
159
|
+
expect(@runner.run).to be(true)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
context 'with a scraper that produces an invalid record' do
|
164
|
+
it 'returns false' do
|
165
|
+
@runner = TurbotRunner::Runner.new('spec/bots/invalid-record-bot')
|
166
|
+
expect(@runner.run).to be(false)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
context 'with a scraper that produces invalid JSON' do
|
171
|
+
it 'returns false' do
|
172
|
+
@runner = TurbotRunner::Runner.new('spec/bots/invalid-json-bot')
|
173
|
+
expect(@runner.run).to be(false)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
context 'with a scraper that hangs' do
|
178
|
+
it 'returns false' do
|
179
|
+
@runner = TurbotRunner::Runner.new(
|
180
|
+
'spec/bots/bot-with-pause',
|
181
|
+
:timeout => 1,
|
182
|
+
:log_to_file => true
|
183
|
+
)
|
184
|
+
expect(@runner.run).to be(false)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
describe '#process_output' do
|
190
|
+
before do
|
191
|
+
# This creates the output to work with
|
192
|
+
TurbotRunner::Runner.new('spec/bots/bot-with-transformer').run
|
193
|
+
end
|
194
|
+
|
195
|
+
it 'calls handler once for each line of output' do
|
196
|
+
class Handler < TurbotRunner::BaseHandler
|
197
|
+
attr_reader :records_seen
|
198
|
+
|
199
|
+
def initialize(*)
|
200
|
+
@records_seen = Hash.new {|h, k| h[k] = 0}
|
201
|
+
super
|
202
|
+
end
|
203
|
+
|
204
|
+
def handle_valid_record(record, data_type)
|
205
|
+
@records_seen[data_type] += 1
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
handler = Handler.new
|
210
|
+
runner = TurbotRunner::Runner.new(
|
211
|
+
'spec/bots/bot-with-transformer',
|
212
|
+
:record_handler => handler
|
213
|
+
)
|
214
|
+
|
215
|
+
runner.process_output
|
216
|
+
expect(handler.records_seen['primary data']).to eq(10)
|
217
|
+
expect(handler.records_seen['simple-licence']).to eq(10)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
RSpec::Matchers.define :have_output do |expected|
|
224
|
+
match do |actual|
|
225
|
+
runner, script = actual
|
226
|
+
|
227
|
+
expected_path = File.join('spec', 'outputs', expected)
|
228
|
+
expected_output = File.readlines(expected_path).map {|line| JSON.parse(line)}
|
229
|
+
actual_path = File.join(runner.directory, 'output', "#{script}.out")
|
230
|
+
actual_output = File.readlines(actual_path).map {|line| JSON.parse(line)}
|
231
|
+
expect(expected_output).to eq(actual_output)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
RSpec::Matchers.define :have_error_output_matching do |expected|
|
237
|
+
match do |actual|
|
238
|
+
runner, script = actual
|
239
|
+
|
240
|
+
actual_path = File.join(runner.directory, 'output', "#{script}.err")
|
241
|
+
actual_output = File.read(actual_path)
|
242
|
+
expect(actual_output).to match(expected)
|
243
|
+
end
|
244
|
+
end
|