turbot-runner 0.0.24 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/turbot_runner/base_handler.rb +13 -0
- data/lib/{prerun.rb → turbot_runner/prerun.rb} +0 -0
- data/lib/turbot_runner/processor.rb +55 -0
- data/lib/turbot_runner/runner.rb +150 -0
- data/lib/turbot_runner/script_runner.rb +90 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +5 -335
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +8 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor.rb +48 -0
- data/spec/lib/runner_spec.rb +244 -0
- data/spec/manual_spec.rb +55 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- metadata +40 -19
- data/spec/dummy-bot-python/manifest.json +0 -15
- data/spec/dummy-bot-python/scraper.py +0 -11
- data/spec/dummy-bot-python/transformer.py +0 -15
- data/spec/dummy-bot-ruby/manifest.json +0 -15
- data/spec/dummy-bot-ruby/scraper.rb +0 -8
- data/spec/dummy-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby/manifest.json +0 -8
- data/spec/dummy-broken-bot-ruby/scraper.rb +0 -6
- data/spec/dummy-broken-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby-2/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-2/scraper.rb +0 -4
- data/spec/dummy-broken-bot-ruby-2/transformer.rb +0 -11
- data/spec/dummy-broken-bot-ruby-3/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-3/scraper.rb +0 -5
- data/spec/dummy-broken-bot-ruby-3/transformer.rb +0 -5
- data/spec/turbot_runner_spec.rb +0 -117
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YWYzYWQ4OTMwNGNiMDMxMjJlYzRlMDMyMGNjMmM0ZGNlZjA5MzdiNQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZTJjZGMxNDlkN2EyMmVjZDJjZWNmZWQ0ZWE5NTJjN2EyYjgxYjRkNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Yjg3YThjYzk5NTBhMjFjN2Q1MzMxNmY2N2ZhNDhmNDM3MmUxN2ZiYzgwZTM0
|
10
|
+
ZTM0ZWJmNDA2NDljZDk4YTM0NDlmMjc3ODk3ZjY3NDk3MjZhZTM1ZjJlNzFi
|
11
|
+
Mzg5ODdkODIwNjU3YjVmNmI3OWQ2YjBjZjZlMmIzNTk1MTZmZDQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NDQ2ZjY4YmE1NTI5NjRiM2U3ZTVlYjhlOTY1NDc2NGJjMGM1ODdhZDAzZjE5
|
14
|
+
ZWFhZWFmOTIwYTUyNjhjZDNiOWI3ZDliNWU1ODdlZDViZGYzNjZiZGYzMTg3
|
15
|
+
Y2NkZjE3ODVhYWI4YWQ2NGQzZTI4YTc0NTI3NzA2OGJhMjhiZDM=
|
File without changes
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'json-schema'
|
3
|
+
|
4
|
+
module TurbotRunner
|
5
|
+
class Processor
|
6
|
+
def initialize(runner, data_type, record_handler)
|
7
|
+
@runner = runner
|
8
|
+
@data_type = data_type
|
9
|
+
@record_handler = record_handler
|
10
|
+
end
|
11
|
+
|
12
|
+
def process(line)
|
13
|
+
begin
|
14
|
+
record = JSON.parse(line)
|
15
|
+
errors = validate(record)
|
16
|
+
|
17
|
+
if errors.empty?
|
18
|
+
rc = @record_handler.handle_valid_record(record, @data_type)
|
19
|
+
@runner.interrupt unless rc
|
20
|
+
else
|
21
|
+
@record_handler.handle_invalid_record(record, @data_type, errors)
|
22
|
+
@runner.interrupt_and_mark_as_failed
|
23
|
+
end
|
24
|
+
rescue JSON::ParserError
|
25
|
+
@record_handler.handle_invalid_json(line)
|
26
|
+
@runner.interrupt_and_mark_as_failed
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def interrupt
|
31
|
+
@runner.interrupt
|
32
|
+
end
|
33
|
+
|
34
|
+
def validate(record)
|
35
|
+
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
36
|
+
messages = errors.map do |error|
|
37
|
+
case error[:message]
|
38
|
+
when /The property '#\/' did not contain a required property of '(\w+)'/
|
39
|
+
"Missing required attribute: #{Regexp.last_match(1)}"
|
40
|
+
else
|
41
|
+
error[:message]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def schema
|
47
|
+
@schema ||= get_schema
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_schema
|
51
|
+
hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
52
|
+
File.expand_path("../../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module TurbotRunner
|
5
|
+
class Runner
|
6
|
+
attr_reader :directory
|
7
|
+
|
8
|
+
def initialize(directory, options={})
|
9
|
+
@directory = directory
|
10
|
+
@config = load_config(directory)
|
11
|
+
@record_handler = options[:record_handler]
|
12
|
+
@log_to_file = options[:log_to_file]
|
13
|
+
@timeout = options[:timeout]
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
FileUtils.rm_rf(output_directory)
|
18
|
+
FileUtils.mkdir_p(output_directory)
|
19
|
+
|
20
|
+
return false if not run_scraper
|
21
|
+
|
22
|
+
transformers.each do |transformer|
|
23
|
+
return false if not run_transformer(transformer)
|
24
|
+
end
|
25
|
+
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
def process_output
|
30
|
+
return false if not process_scraper_output
|
31
|
+
|
32
|
+
transformers.each do |transformer|
|
33
|
+
return false if not process_transformer_output(transformer)
|
34
|
+
end
|
35
|
+
|
36
|
+
true
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def load_config(directory)
|
41
|
+
manifest_path = File.join(directory, 'manifest.json')
|
42
|
+
raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
|
43
|
+
|
44
|
+
begin
|
45
|
+
json = open(manifest_path) {|f| f.read}
|
46
|
+
JSON.parse(json, :symbolize_names => true)
|
47
|
+
rescue JSON::ParserError
|
48
|
+
# TODO provide better error message
|
49
|
+
raise "Could not parse #{manifest_path} as JSON"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def run_scraper
|
54
|
+
run_script(scraper_script, scraper_data_type)
|
55
|
+
end
|
56
|
+
|
57
|
+
def run_transformer(transformer)
|
58
|
+
run_script(
|
59
|
+
transformer[:file],
|
60
|
+
transformer[:data_type],
|
61
|
+
input_file=scraper_output_file
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
def run_script(script, data_type, input_file=nil)
|
66
|
+
command = build_command(script, input_file)
|
67
|
+
|
68
|
+
runner = ScriptRunner.new(
|
69
|
+
command,
|
70
|
+
output_file(script),
|
71
|
+
data_type,
|
72
|
+
:record_handler => @record_handler,
|
73
|
+
:timeout => @timeout
|
74
|
+
)
|
75
|
+
|
76
|
+
runner.run
|
77
|
+
end
|
78
|
+
|
79
|
+
def process_scraper_output
|
80
|
+
process_script_output(scraper_script, scraper_data_type)
|
81
|
+
end
|
82
|
+
|
83
|
+
def process_transformer_output(transformer)
|
84
|
+
process_script_output(transformer[:file], transformer[:data_type])
|
85
|
+
end
|
86
|
+
|
87
|
+
def process_script_output(script, data_type)
|
88
|
+
processor = Processor.new(nil, data_type, @record_handler)
|
89
|
+
|
90
|
+
File.open(output_file(script)) do |f|
|
91
|
+
f.each_line do |line|
|
92
|
+
processor.process(line)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_command(script, input_file=nil)
|
98
|
+
raise "Could not run #{script} with #{language}" unless script_extension == File.extname(script)
|
99
|
+
path_to_script = File.join(@directory, script)
|
100
|
+
command = "#{language} #{additional_args} #{path_to_script} >#{output_file(script)}"
|
101
|
+
command << " 2>#{output_file(script, '.err')}" if @log_to_file
|
102
|
+
command << " <#{input_file}" unless input_file.nil?
|
103
|
+
|
104
|
+
command
|
105
|
+
end
|
106
|
+
|
107
|
+
def output_file(script, extension='.out')
|
108
|
+
basename = File.basename(script, script_extension)
|
109
|
+
File.join(output_directory, basename) + extension
|
110
|
+
end
|
111
|
+
|
112
|
+
def script_extension
|
113
|
+
{
|
114
|
+
'ruby' => '.rb',
|
115
|
+
'python' => '.py',
|
116
|
+
}[language]
|
117
|
+
end
|
118
|
+
|
119
|
+
def additional_args
|
120
|
+
{
|
121
|
+
'ruby' => "-r#{File.expand_path('../prerun.rb', __FILE__)}",
|
122
|
+
'python' => '-u',
|
123
|
+
}[language]
|
124
|
+
end
|
125
|
+
|
126
|
+
def scraper_script
|
127
|
+
"scraper#{script_extension}"
|
128
|
+
end
|
129
|
+
|
130
|
+
def transformers
|
131
|
+
@config[:transformers] || []
|
132
|
+
end
|
133
|
+
|
134
|
+
def scraper_output_file
|
135
|
+
File.join(output_directory, 'scraper.out')
|
136
|
+
end
|
137
|
+
|
138
|
+
def language
|
139
|
+
@config[:language].downcase
|
140
|
+
end
|
141
|
+
|
142
|
+
def scraper_data_type
|
143
|
+
@config[:data_type]
|
144
|
+
end
|
145
|
+
|
146
|
+
def output_directory
|
147
|
+
File.join(@directory, 'output')
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'timeout'
|
2
|
+
|
3
|
+
# This is a useful blog post:
|
4
|
+
# http://blog.robseaman.com/2008/12/12/sending-ctrl-c-to-a-subprocess-with-ruby
|
5
|
+
|
6
|
+
# Ensure that SIGINT is ignored by the process running this.
|
7
|
+
trap('INT') {}
|
8
|
+
|
9
|
+
module TurbotRunner
|
10
|
+
class ScriptRunner
|
11
|
+
def initialize(command, output_file, data_type, options={})
|
12
|
+
@command = command
|
13
|
+
@output_file = output_file
|
14
|
+
|
15
|
+
record_handler = options[:record_handler] || BaseHandler.new # A BaseHandler does nothing
|
16
|
+
@processor = Processor.new(self, data_type, record_handler)
|
17
|
+
|
18
|
+
@timeout = options[:timeout] || 3600
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
@interrupted = false
|
23
|
+
@failed = false
|
24
|
+
|
25
|
+
# Start a thread that spawns a subprocess that runs the script and
|
26
|
+
# redirects the script's output to a file at a known location.
|
27
|
+
script_thread = Thread.new { run_command(@command) }
|
28
|
+
|
29
|
+
# Wait for the output file to be created, so that we can start to read
|
30
|
+
# from it.
|
31
|
+
begin
|
32
|
+
f = File.open(@output_file)
|
33
|
+
rescue Errno::ENOENT
|
34
|
+
sleep 0.1
|
35
|
+
retry
|
36
|
+
end
|
37
|
+
|
38
|
+
# Read from output file line by line until either we reach the end of the
|
39
|
+
# file and the script has exited, or @interrupted becomes true.
|
40
|
+
until @interrupted do
|
41
|
+
begin
|
42
|
+
line = f.readline
|
43
|
+
@processor.process(line)
|
44
|
+
rescue EOFError
|
45
|
+
break unless script_thread.alive?
|
46
|
+
sleep 0.1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# script_thread may still be alive if we exited the loop above becuase
|
51
|
+
# @interrupted became true, and so we must kill it.
|
52
|
+
kill_running_processes if script_thread.alive?
|
53
|
+
|
54
|
+
@failed ? false : script_thread.join.value
|
55
|
+
ensure
|
56
|
+
f.close if f
|
57
|
+
end
|
58
|
+
|
59
|
+
def interrupt
|
60
|
+
@interrupted = true
|
61
|
+
end
|
62
|
+
|
63
|
+
def interrupt_and_mark_as_failed
|
64
|
+
@interrupted = true
|
65
|
+
@failed = true
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def run_command(command)
|
70
|
+
begin
|
71
|
+
Timeout::timeout(@timeout) do
|
72
|
+
system(command)
|
73
|
+
|
74
|
+
# A nil exitstatus indicates that the script was interrupted. A
|
75
|
+
# termsig of 2 indicates that the script was interrupted by a SIGINT.
|
76
|
+
$?.exitstatus == 0 || ($?.exitstatus.nil? && $?.termsig == 2)
|
77
|
+
end
|
78
|
+
rescue Timeout::Error
|
79
|
+
kill_running_processes
|
80
|
+
false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def kill_running_processes
|
85
|
+
# Send SIGINT to each process in the current proceess group, having
|
86
|
+
# already ensured that the current process itself ignores the signal.
|
87
|
+
Process.kill('INT', 0)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/turbot_runner.rb
CHANGED
@@ -1,335 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require 'set'
|
7
|
-
require 'timeout'
|
8
|
-
require 'io/wait'
|
9
|
-
|
10
|
-
module TurbotRunner
|
11
|
-
class ScriptError < StandardError; end
|
12
|
-
|
13
|
-
class BaseRunner
|
14
|
-
|
15
|
-
attr_reader :wait_thread
|
16
|
-
attr_reader :error
|
17
|
-
|
18
|
-
def initialize(bot_directory)
|
19
|
-
@bot_directory = bot_directory
|
20
|
-
|
21
|
-
manifest_path = File.join(bot_directory, 'manifest.json')
|
22
|
-
raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
|
23
|
-
|
24
|
-
begin
|
25
|
-
@config = JSON.parse(open(manifest_path) {|f| f.read})
|
26
|
-
rescue JSON::ParserError
|
27
|
-
# TODO provide better error message
|
28
|
-
raise "Could not parse #{manifest_path} as JSON"
|
29
|
-
end
|
30
|
-
|
31
|
-
@status = :initialized
|
32
|
-
@interrupted = false
|
33
|
-
@schemas = {}
|
34
|
-
end
|
35
|
-
|
36
|
-
def run(opts={})
|
37
|
-
@status = :running
|
38
|
-
|
39
|
-
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
40
|
-
data_type = @config['data_type']
|
41
|
-
|
42
|
-
scraper_runner = CommandRunner.new(command)
|
43
|
-
|
44
|
-
transformers.each do |config|
|
45
|
-
file = File.join(@bot_directory, config['file'])
|
46
|
-
command = "#{interpreter_for(file)} #{file}"
|
47
|
-
transformer_runner = CommandRunner.new(command)
|
48
|
-
config['runner'] = transformer_runner
|
49
|
-
end
|
50
|
-
|
51
|
-
begin
|
52
|
-
until @interrupted do
|
53
|
-
line = scraper_runner.get_next_line
|
54
|
-
|
55
|
-
if line.nil?
|
56
|
-
if scraper_runner.finished?
|
57
|
-
if scraper_runner.success?
|
58
|
-
break
|
59
|
-
else
|
60
|
-
scraper_runner.raise_if_failed!
|
61
|
-
end
|
62
|
-
else
|
63
|
-
next
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
begin
|
68
|
-
record = JSON.parse(line)
|
69
|
-
rescue JSON::ParserError
|
70
|
-
handle_non_json_output(line)
|
71
|
-
next
|
72
|
-
end
|
73
|
-
|
74
|
-
errors = validate(record, data_type)
|
75
|
-
|
76
|
-
if errors.empty?
|
77
|
-
handle_valid_record(record, data_type)
|
78
|
-
|
79
|
-
transformers.each do |transformer|
|
80
|
-
data_type1 = transformer['data_type']
|
81
|
-
|
82
|
-
runner = transformer['runner']
|
83
|
-
runner.raise_if_failed!
|
84
|
-
|
85
|
-
runner.send_line(line)
|
86
|
-
line1 = runner.get_next_line
|
87
|
-
|
88
|
-
if line1.nil?
|
89
|
-
if runner.finished?
|
90
|
-
if runner.success?
|
91
|
-
break
|
92
|
-
else
|
93
|
-
runner.raise_if_failed!
|
94
|
-
end
|
95
|
-
else
|
96
|
-
next
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# A transformer can output an empty line if it doesn't make
|
101
|
-
# sense to transform a record.
|
102
|
-
if line1.strip.empty?
|
103
|
-
puts
|
104
|
-
next
|
105
|
-
end
|
106
|
-
|
107
|
-
begin
|
108
|
-
record1 = JSON.parse(line1)
|
109
|
-
rescue JSON::ParserError
|
110
|
-
handle_non_json_output(line1)
|
111
|
-
next
|
112
|
-
end
|
113
|
-
|
114
|
-
errors = validate(record1, data_type1)
|
115
|
-
|
116
|
-
if errors.empty?
|
117
|
-
handle_valid_record(record1, data_type1)
|
118
|
-
else
|
119
|
-
handle_invalid_record(record1, data_type1, errors)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
else
|
123
|
-
handle_invalid_record(record, data_type, errors)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
if @interrupted
|
127
|
-
@status = :interrupted
|
128
|
-
handle_interrupted_run
|
129
|
-
else
|
130
|
-
@status = :successful
|
131
|
-
handle_successful_run
|
132
|
-
end
|
133
|
-
|
134
|
-
handle_stderr(scraper_runner.drain_stderr)
|
135
|
-
|
136
|
-
transformers.each do |transformer|
|
137
|
-
runner = transformer['runner']
|
138
|
-
handle_stderr(runner.drain_stderr)
|
139
|
-
end
|
140
|
-
|
141
|
-
rescue ScriptError => e
|
142
|
-
if @interrupted
|
143
|
-
@status = :interrupted
|
144
|
-
handle_interrupted_run
|
145
|
-
else
|
146
|
-
@status = :failed
|
147
|
-
handle_failed_run
|
148
|
-
end
|
149
|
-
end
|
150
|
-
ensure
|
151
|
-
handle_stderr(scraper_runner.drain_stderr)
|
152
|
-
scraper_runner.close unless scraper_runner.nil?
|
153
|
-
|
154
|
-
transformers.each do |transformer|
|
155
|
-
runner = transformer['runner']
|
156
|
-
if !runner.nil?
|
157
|
-
handle_stderr(runner.drain_stderr)
|
158
|
-
runner.close
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def successful?
|
164
|
-
@status == :successful
|
165
|
-
end
|
166
|
-
|
167
|
-
def interrupt
|
168
|
-
@interrupted = true
|
169
|
-
end
|
170
|
-
|
171
|
-
private
|
172
|
-
def transformers
|
173
|
-
@config['transformers'] || []
|
174
|
-
end
|
175
|
-
|
176
|
-
def validate(record, data_type)
|
177
|
-
schema = get_schema(data_type)
|
178
|
-
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
179
|
-
messages = errors.map do |error|
|
180
|
-
case error[:message]
|
181
|
-
when /The property '#\/' did not contain a required property of '(\w+)'/
|
182
|
-
"Missing required attribute: #{Regexp.last_match(1)}"
|
183
|
-
else
|
184
|
-
error[:message]
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
# if messages.empty?
|
189
|
-
# identifying_fields = identifying_fields_for_data_type(data_type)
|
190
|
-
# identifying_hash = record.slice(*identifying_fields)
|
191
|
-
#
|
192
|
-
# if identifying_hash.empty?
|
193
|
-
# messages << "Missing attributes for identifying fields: #{identifying_fields.join(', ')}"
|
194
|
-
# else
|
195
|
-
# record_uid = Digest::SHA1.hexdigest(identifying_hash.to_query)
|
196
|
-
# if @seen_uids.include?(record_uid)
|
197
|
-
# messages << "Values for identifying fields must be unique. There has already been a record with: #{identifying_hash.to_json}"
|
198
|
-
# else
|
199
|
-
# @seen_uids << record_uid
|
200
|
-
# end
|
201
|
-
# end
|
202
|
-
# end
|
203
|
-
|
204
|
-
messages
|
205
|
-
end
|
206
|
-
|
207
|
-
def identifying_fields_for_data_type(data_type)
|
208
|
-
if data_type == @config['data_type']
|
209
|
-
@config['identifying_fields']
|
210
|
-
else
|
211
|
-
transformers = @config['transformers'].select {|transformer| transformer['data_type'] == data_type}
|
212
|
-
raise "Expected to find precisely 1 transformer matching #{data_type} in manifest.json" unless transformers.size == 1
|
213
|
-
transformers[0]['identifying_fields']
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
def get_schema(data_type)
|
218
|
-
if !@schemas.has_key?(data_type)
|
219
|
-
hyphenated_name = data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
220
|
-
@schemas[data_type] = File.expand_path("../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
221
|
-
end
|
222
|
-
|
223
|
-
@schemas[data_type]
|
224
|
-
end
|
225
|
-
|
226
|
-
def handle_valid_record(record, data_type)
|
227
|
-
raise NotImplementedError
|
228
|
-
end
|
229
|
-
|
230
|
-
def handle_invalid_record(record, data_type, errors)
|
231
|
-
raise NotImplementedError
|
232
|
-
end
|
233
|
-
|
234
|
-
def handle_non_json_output(line)
|
235
|
-
raise NotImplementedError
|
236
|
-
end
|
237
|
-
|
238
|
-
def handle_successful_run
|
239
|
-
end
|
240
|
-
|
241
|
-
def handle_interrupted_run
|
242
|
-
end
|
243
|
-
|
244
|
-
def handle_stderr(data)
|
245
|
-
$stderr.write(data)
|
246
|
-
end
|
247
|
-
|
248
|
-
def scraper_file
|
249
|
-
candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
|
250
|
-
case candidates.size
|
251
|
-
when 0
|
252
|
-
raise 'Could not find scraper to run'
|
253
|
-
when 1
|
254
|
-
candidates.first
|
255
|
-
else
|
256
|
-
raise "Found multiple scrapers: #{candidates.join(', ')}"
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
def interpreter_for(file)
|
261
|
-
case file
|
262
|
-
when /\.rb$/
|
263
|
-
prerun = File.expand_path("../prerun.rb", __FILE__)
|
264
|
-
"ruby -r#{prerun}"
|
265
|
-
when /\.py$/
|
266
|
-
'python -u'
|
267
|
-
else
|
268
|
-
raise "Could not run #{file}"
|
269
|
-
end
|
270
|
-
end
|
271
|
-
end
|
272
|
-
|
273
|
-
class CommandRunner
|
274
|
-
|
275
|
-
def to_s
|
276
|
-
"CommandRunner #{@command}, pid #{@wait_thread.pid} (#{@wait_thread.status})"
|
277
|
-
end
|
278
|
-
|
279
|
-
def initialize(command, opts={})
|
280
|
-
@command = command
|
281
|
-
@timeout = opts[:timeout] ||= 3600
|
282
|
-
@stdin, @stdout, @stderr, @wait_thread = Open3.popen3(command)
|
283
|
-
end
|
284
|
-
|
285
|
-
def get_next_line
|
286
|
-
begin
|
287
|
-
Timeout::timeout(@timeout) { @stdout.gets }
|
288
|
-
rescue Timeout::Error
|
289
|
-
raise TurbotRunner::ScriptError.new("#{@command} produced no output for #{@timeout} seconds")
|
290
|
-
rescue EOFError
|
291
|
-
raise_if_failed!
|
292
|
-
return nil
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
def drain_stderr
|
297
|
-
output = ''
|
298
|
-
while @stderr.ready?
|
299
|
-
output += @stderr.read(256)
|
300
|
-
end
|
301
|
-
output
|
302
|
-
end
|
303
|
-
|
304
|
-
def success?
|
305
|
-
if finished?
|
306
|
-
@wait_thread.value.success?
|
307
|
-
end
|
308
|
-
end
|
309
|
-
|
310
|
-
def failed?
|
311
|
-
if finished?
|
312
|
-
!@wait_thread.value.success?
|
313
|
-
end
|
314
|
-
end
|
315
|
-
|
316
|
-
def raise_if_failed!
|
317
|
-
raise TurbotRunner::ScriptError if failed?
|
318
|
-
end
|
319
|
-
|
320
|
-
def finished?
|
321
|
-
!@wait_thread.status
|
322
|
-
end
|
323
|
-
|
324
|
-
def send_line(line)
|
325
|
-
@stdin.puts(line)
|
326
|
-
end
|
327
|
-
|
328
|
-
def close
|
329
|
-
@stdin.close
|
330
|
-
@stdout.read # drain pipe
|
331
|
-
@stdout.close
|
332
|
-
@wait_thread.kill
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|
1
|
+
require 'turbot_runner/base_handler'
|
2
|
+
require 'turbot_runner/processor'
|
3
|
+
require 'turbot_runner/runner'
|
4
|
+
require 'turbot_runner/script_runner'
|
5
|
+
require 'turbot_runner/version'
|