turbot-runner 0.0.24 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/turbot_runner/base_handler.rb +13 -0
- data/lib/{prerun.rb → turbot_runner/prerun.rb} +0 -0
- data/lib/turbot_runner/processor.rb +55 -0
- data/lib/turbot_runner/runner.rb +150 -0
- data/lib/turbot_runner/script_runner.rb +90 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +5 -335
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +8 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor.rb +48 -0
- data/spec/lib/runner_spec.rb +244 -0
- data/spec/manual_spec.rb +55 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- metadata +40 -19
- data/spec/dummy-bot-python/manifest.json +0 -15
- data/spec/dummy-bot-python/scraper.py +0 -11
- data/spec/dummy-bot-python/transformer.py +0 -15
- data/spec/dummy-bot-ruby/manifest.json +0 -15
- data/spec/dummy-bot-ruby/scraper.rb +0 -8
- data/spec/dummy-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby/manifest.json +0 -8
- data/spec/dummy-broken-bot-ruby/scraper.rb +0 -6
- data/spec/dummy-broken-bot-ruby/transformer.rb +0 -12
- data/spec/dummy-broken-bot-ruby-2/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-2/scraper.rb +0 -4
- data/spec/dummy-broken-bot-ruby-2/transformer.rb +0 -11
- data/spec/dummy-broken-bot-ruby-3/manifest.json +0 -15
- data/spec/dummy-broken-bot-ruby-3/scraper.rb +0 -5
- data/spec/dummy-broken-bot-ruby-3/transformer.rb +0 -5
- data/spec/turbot_runner_spec.rb +0 -117
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YWYzYWQ4OTMwNGNiMDMxMjJlYzRlMDMyMGNjMmM0ZGNlZjA5MzdiNQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZTJjZGMxNDlkN2EyMmVjZDJjZWNmZWQ0ZWE5NTJjN2EyYjgxYjRkNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Yjg3YThjYzk5NTBhMjFjN2Q1MzMxNmY2N2ZhNDhmNDM3MmUxN2ZiYzgwZTM0
|
10
|
+
ZTM0ZWJmNDA2NDljZDk4YTM0NDlmMjc3ODk3ZjY3NDk3MjZhZTM1ZjJlNzFi
|
11
|
+
Mzg5ODdkODIwNjU3YjVmNmI3OWQ2YjBjZjZlMmIzNTk1MTZmZDQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NDQ2ZjY4YmE1NTI5NjRiM2U3ZTVlYjhlOTY1NDc2NGJjMGM1ODdhZDAzZjE5
|
14
|
+
ZWFhZWFmOTIwYTUyNjhjZDNiOWI3ZDliNWU1ODdlZDViZGYzNjZiZGYzMTg3
|
15
|
+
Y2NkZjE3ODVhYWI4YWQ2NGQzZTI4YTc0NTI3NzA2OGJhMjhiZDM=
|
File without changes
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'json-schema'
|
3
|
+
|
4
|
+
module TurbotRunner
|
5
|
+
class Processor
|
6
|
+
def initialize(runner, data_type, record_handler)
|
7
|
+
@runner = runner
|
8
|
+
@data_type = data_type
|
9
|
+
@record_handler = record_handler
|
10
|
+
end
|
11
|
+
|
12
|
+
def process(line)
|
13
|
+
begin
|
14
|
+
record = JSON.parse(line)
|
15
|
+
errors = validate(record)
|
16
|
+
|
17
|
+
if errors.empty?
|
18
|
+
rc = @record_handler.handle_valid_record(record, @data_type)
|
19
|
+
@runner.interrupt unless rc
|
20
|
+
else
|
21
|
+
@record_handler.handle_invalid_record(record, @data_type, errors)
|
22
|
+
@runner.interrupt_and_mark_as_failed
|
23
|
+
end
|
24
|
+
rescue JSON::ParserError
|
25
|
+
@record_handler.handle_invalid_json(line)
|
26
|
+
@runner.interrupt_and_mark_as_failed
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def interrupt
|
31
|
+
@runner.interrupt
|
32
|
+
end
|
33
|
+
|
34
|
+
def validate(record)
|
35
|
+
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
36
|
+
messages = errors.map do |error|
|
37
|
+
case error[:message]
|
38
|
+
when /The property '#\/' did not contain a required property of '(\w+)'/
|
39
|
+
"Missing required attribute: #{Regexp.last_match(1)}"
|
40
|
+
else
|
41
|
+
error[:message]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def schema
|
47
|
+
@schema ||= get_schema
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_schema
|
51
|
+
hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
52
|
+
File.expand_path("../../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module TurbotRunner
|
5
|
+
class Runner
|
6
|
+
attr_reader :directory
|
7
|
+
|
8
|
+
def initialize(directory, options={})
|
9
|
+
@directory = directory
|
10
|
+
@config = load_config(directory)
|
11
|
+
@record_handler = options[:record_handler]
|
12
|
+
@log_to_file = options[:log_to_file]
|
13
|
+
@timeout = options[:timeout]
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
FileUtils.rm_rf(output_directory)
|
18
|
+
FileUtils.mkdir_p(output_directory)
|
19
|
+
|
20
|
+
return false if not run_scraper
|
21
|
+
|
22
|
+
transformers.each do |transformer|
|
23
|
+
return false if not run_transformer(transformer)
|
24
|
+
end
|
25
|
+
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
def process_output
|
30
|
+
return false if not process_scraper_output
|
31
|
+
|
32
|
+
transformers.each do |transformer|
|
33
|
+
return false if not process_transformer_output(transformer)
|
34
|
+
end
|
35
|
+
|
36
|
+
true
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def load_config(directory)
|
41
|
+
manifest_path = File.join(directory, 'manifest.json')
|
42
|
+
raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
|
43
|
+
|
44
|
+
begin
|
45
|
+
json = open(manifest_path) {|f| f.read}
|
46
|
+
JSON.parse(json, :symbolize_names => true)
|
47
|
+
rescue JSON::ParserError
|
48
|
+
# TODO provide better error message
|
49
|
+
raise "Could not parse #{manifest_path} as JSON"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def run_scraper
|
54
|
+
run_script(scraper_script, scraper_data_type)
|
55
|
+
end
|
56
|
+
|
57
|
+
def run_transformer(transformer)
|
58
|
+
run_script(
|
59
|
+
transformer[:file],
|
60
|
+
transformer[:data_type],
|
61
|
+
input_file=scraper_output_file
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
def run_script(script, data_type, input_file=nil)
|
66
|
+
command = build_command(script, input_file)
|
67
|
+
|
68
|
+
runner = ScriptRunner.new(
|
69
|
+
command,
|
70
|
+
output_file(script),
|
71
|
+
data_type,
|
72
|
+
:record_handler => @record_handler,
|
73
|
+
:timeout => @timeout
|
74
|
+
)
|
75
|
+
|
76
|
+
runner.run
|
77
|
+
end
|
78
|
+
|
79
|
+
def process_scraper_output
|
80
|
+
process_script_output(scraper_script, scraper_data_type)
|
81
|
+
end
|
82
|
+
|
83
|
+
def process_transformer_output(transformer)
|
84
|
+
process_script_output(transformer[:file], transformer[:data_type])
|
85
|
+
end
|
86
|
+
|
87
|
+
def process_script_output(script, data_type)
|
88
|
+
processor = Processor.new(nil, data_type, @record_handler)
|
89
|
+
|
90
|
+
File.open(output_file(script)) do |f|
|
91
|
+
f.each_line do |line|
|
92
|
+
processor.process(line)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_command(script, input_file=nil)
|
98
|
+
raise "Could not run #{script} with #{language}" unless script_extension == File.extname(script)
|
99
|
+
path_to_script = File.join(@directory, script)
|
100
|
+
command = "#{language} #{additional_args} #{path_to_script} >#{output_file(script)}"
|
101
|
+
command << " 2>#{output_file(script, '.err')}" if @log_to_file
|
102
|
+
command << " <#{input_file}" unless input_file.nil?
|
103
|
+
|
104
|
+
command
|
105
|
+
end
|
106
|
+
|
107
|
+
def output_file(script, extension='.out')
|
108
|
+
basename = File.basename(script, script_extension)
|
109
|
+
File.join(output_directory, basename) + extension
|
110
|
+
end
|
111
|
+
|
112
|
+
def script_extension
|
113
|
+
{
|
114
|
+
'ruby' => '.rb',
|
115
|
+
'python' => '.py',
|
116
|
+
}[language]
|
117
|
+
end
|
118
|
+
|
119
|
+
def additional_args
|
120
|
+
{
|
121
|
+
'ruby' => "-r#{File.expand_path('../prerun.rb', __FILE__)}",
|
122
|
+
'python' => '-u',
|
123
|
+
}[language]
|
124
|
+
end
|
125
|
+
|
126
|
+
def scraper_script
|
127
|
+
"scraper#{script_extension}"
|
128
|
+
end
|
129
|
+
|
130
|
+
def transformers
|
131
|
+
@config[:transformers] || []
|
132
|
+
end
|
133
|
+
|
134
|
+
def scraper_output_file
|
135
|
+
File.join(output_directory, 'scraper.out')
|
136
|
+
end
|
137
|
+
|
138
|
+
def language
|
139
|
+
@config[:language].downcase
|
140
|
+
end
|
141
|
+
|
142
|
+
def scraper_data_type
|
143
|
+
@config[:data_type]
|
144
|
+
end
|
145
|
+
|
146
|
+
def output_directory
|
147
|
+
File.join(@directory, 'output')
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'timeout'
|
2
|
+
|
3
|
+
# This is a useful blog post:
|
4
|
+
# http://blog.robseaman.com/2008/12/12/sending-ctrl-c-to-a-subprocess-with-ruby
|
5
|
+
|
6
|
+
# Ensure that SIGINT is ignored by the process running this.
|
7
|
+
trap('INT') {}
|
8
|
+
|
9
|
+
module TurbotRunner
|
10
|
+
class ScriptRunner
|
11
|
+
def initialize(command, output_file, data_type, options={})
|
12
|
+
@command = command
|
13
|
+
@output_file = output_file
|
14
|
+
|
15
|
+
record_handler = options[:record_handler] || BaseHandler.new # A BaseHandler does nothing
|
16
|
+
@processor = Processor.new(self, data_type, record_handler)
|
17
|
+
|
18
|
+
@timeout = options[:timeout] || 3600
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
@interrupted = false
|
23
|
+
@failed = false
|
24
|
+
|
25
|
+
# Start a thread that spawns a subprocess that runs the script and
|
26
|
+
# redirects the script's output to a file at a known location.
|
27
|
+
script_thread = Thread.new { run_command(@command) }
|
28
|
+
|
29
|
+
# Wait for the output file to be created, so that we can start to read
|
30
|
+
# from it.
|
31
|
+
begin
|
32
|
+
f = File.open(@output_file)
|
33
|
+
rescue Errno::ENOENT
|
34
|
+
sleep 0.1
|
35
|
+
retry
|
36
|
+
end
|
37
|
+
|
38
|
+
# Read from output file line by line until either we reach the end of the
|
39
|
+
# file and the script has exited, or @interrupted becomes true.
|
40
|
+
until @interrupted do
|
41
|
+
begin
|
42
|
+
line = f.readline
|
43
|
+
@processor.process(line)
|
44
|
+
rescue EOFError
|
45
|
+
break unless script_thread.alive?
|
46
|
+
sleep 0.1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# script_thread may still be alive if we exited the loop above becuase
|
51
|
+
# @interrupted became true, and so we must kill it.
|
52
|
+
kill_running_processes if script_thread.alive?
|
53
|
+
|
54
|
+
@failed ? false : script_thread.join.value
|
55
|
+
ensure
|
56
|
+
f.close if f
|
57
|
+
end
|
58
|
+
|
59
|
+
def interrupt
|
60
|
+
@interrupted = true
|
61
|
+
end
|
62
|
+
|
63
|
+
def interrupt_and_mark_as_failed
|
64
|
+
@interrupted = true
|
65
|
+
@failed = true
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def run_command(command)
|
70
|
+
begin
|
71
|
+
Timeout::timeout(@timeout) do
|
72
|
+
system(command)
|
73
|
+
|
74
|
+
# A nil exitstatus indicates that the script was interrupted. A
|
75
|
+
# termsig of 2 indicates that the script was interrupted by a SIGINT.
|
76
|
+
$?.exitstatus == 0 || ($?.exitstatus.nil? && $?.termsig == 2)
|
77
|
+
end
|
78
|
+
rescue Timeout::Error
|
79
|
+
kill_running_processes
|
80
|
+
false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def kill_running_processes
|
85
|
+
# Send SIGINT to each process in the current proceess group, having
|
86
|
+
# already ensured that the current process itself ignores the signal.
|
87
|
+
Process.kill('INT', 0)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/turbot_runner.rb
CHANGED
@@ -1,335 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require 'set'
|
7
|
-
require 'timeout'
|
8
|
-
require 'io/wait'
|
9
|
-
|
10
|
-
module TurbotRunner
|
11
|
-
class ScriptError < StandardError; end
|
12
|
-
|
13
|
-
class BaseRunner
|
14
|
-
|
15
|
-
attr_reader :wait_thread
|
16
|
-
attr_reader :error
|
17
|
-
|
18
|
-
def initialize(bot_directory)
|
19
|
-
@bot_directory = bot_directory
|
20
|
-
|
21
|
-
manifest_path = File.join(bot_directory, 'manifest.json')
|
22
|
-
raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
|
23
|
-
|
24
|
-
begin
|
25
|
-
@config = JSON.parse(open(manifest_path) {|f| f.read})
|
26
|
-
rescue JSON::ParserError
|
27
|
-
# TODO provide better error message
|
28
|
-
raise "Could not parse #{manifest_path} as JSON"
|
29
|
-
end
|
30
|
-
|
31
|
-
@status = :initialized
|
32
|
-
@interrupted = false
|
33
|
-
@schemas = {}
|
34
|
-
end
|
35
|
-
|
36
|
-
def run(opts={})
|
37
|
-
@status = :running
|
38
|
-
|
39
|
-
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
40
|
-
data_type = @config['data_type']
|
41
|
-
|
42
|
-
scraper_runner = CommandRunner.new(command)
|
43
|
-
|
44
|
-
transformers.each do |config|
|
45
|
-
file = File.join(@bot_directory, config['file'])
|
46
|
-
command = "#{interpreter_for(file)} #{file}"
|
47
|
-
transformer_runner = CommandRunner.new(command)
|
48
|
-
config['runner'] = transformer_runner
|
49
|
-
end
|
50
|
-
|
51
|
-
begin
|
52
|
-
until @interrupted do
|
53
|
-
line = scraper_runner.get_next_line
|
54
|
-
|
55
|
-
if line.nil?
|
56
|
-
if scraper_runner.finished?
|
57
|
-
if scraper_runner.success?
|
58
|
-
break
|
59
|
-
else
|
60
|
-
scraper_runner.raise_if_failed!
|
61
|
-
end
|
62
|
-
else
|
63
|
-
next
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
begin
|
68
|
-
record = JSON.parse(line)
|
69
|
-
rescue JSON::ParserError
|
70
|
-
handle_non_json_output(line)
|
71
|
-
next
|
72
|
-
end
|
73
|
-
|
74
|
-
errors = validate(record, data_type)
|
75
|
-
|
76
|
-
if errors.empty?
|
77
|
-
handle_valid_record(record, data_type)
|
78
|
-
|
79
|
-
transformers.each do |transformer|
|
80
|
-
data_type1 = transformer['data_type']
|
81
|
-
|
82
|
-
runner = transformer['runner']
|
83
|
-
runner.raise_if_failed!
|
84
|
-
|
85
|
-
runner.send_line(line)
|
86
|
-
line1 = runner.get_next_line
|
87
|
-
|
88
|
-
if line1.nil?
|
89
|
-
if runner.finished?
|
90
|
-
if runner.success?
|
91
|
-
break
|
92
|
-
else
|
93
|
-
runner.raise_if_failed!
|
94
|
-
end
|
95
|
-
else
|
96
|
-
next
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# A transformer can output an empty line if it doesn't make
|
101
|
-
# sense to transform a record.
|
102
|
-
if line1.strip.empty?
|
103
|
-
puts
|
104
|
-
next
|
105
|
-
end
|
106
|
-
|
107
|
-
begin
|
108
|
-
record1 = JSON.parse(line1)
|
109
|
-
rescue JSON::ParserError
|
110
|
-
handle_non_json_output(line1)
|
111
|
-
next
|
112
|
-
end
|
113
|
-
|
114
|
-
errors = validate(record1, data_type1)
|
115
|
-
|
116
|
-
if errors.empty?
|
117
|
-
handle_valid_record(record1, data_type1)
|
118
|
-
else
|
119
|
-
handle_invalid_record(record1, data_type1, errors)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
else
|
123
|
-
handle_invalid_record(record, data_type, errors)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
if @interrupted
|
127
|
-
@status = :interrupted
|
128
|
-
handle_interrupted_run
|
129
|
-
else
|
130
|
-
@status = :successful
|
131
|
-
handle_successful_run
|
132
|
-
end
|
133
|
-
|
134
|
-
handle_stderr(scraper_runner.drain_stderr)
|
135
|
-
|
136
|
-
transformers.each do |transformer|
|
137
|
-
runner = transformer['runner']
|
138
|
-
handle_stderr(runner.drain_stderr)
|
139
|
-
end
|
140
|
-
|
141
|
-
rescue ScriptError => e
|
142
|
-
if @interrupted
|
143
|
-
@status = :interrupted
|
144
|
-
handle_interrupted_run
|
145
|
-
else
|
146
|
-
@status = :failed
|
147
|
-
handle_failed_run
|
148
|
-
end
|
149
|
-
end
|
150
|
-
ensure
|
151
|
-
handle_stderr(scraper_runner.drain_stderr)
|
152
|
-
scraper_runner.close unless scraper_runner.nil?
|
153
|
-
|
154
|
-
transformers.each do |transformer|
|
155
|
-
runner = transformer['runner']
|
156
|
-
if !runner.nil?
|
157
|
-
handle_stderr(runner.drain_stderr)
|
158
|
-
runner.close
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def successful?
|
164
|
-
@status == :successful
|
165
|
-
end
|
166
|
-
|
167
|
-
def interrupt
|
168
|
-
@interrupted = true
|
169
|
-
end
|
170
|
-
|
171
|
-
private
|
172
|
-
def transformers
|
173
|
-
@config['transformers'] || []
|
174
|
-
end
|
175
|
-
|
176
|
-
def validate(record, data_type)
|
177
|
-
schema = get_schema(data_type)
|
178
|
-
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
179
|
-
messages = errors.map do |error|
|
180
|
-
case error[:message]
|
181
|
-
when /The property '#\/' did not contain a required property of '(\w+)'/
|
182
|
-
"Missing required attribute: #{Regexp.last_match(1)}"
|
183
|
-
else
|
184
|
-
error[:message]
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
# if messages.empty?
|
189
|
-
# identifying_fields = identifying_fields_for_data_type(data_type)
|
190
|
-
# identifying_hash = record.slice(*identifying_fields)
|
191
|
-
#
|
192
|
-
# if identifying_hash.empty?
|
193
|
-
# messages << "Missing attributes for identifying fields: #{identifying_fields.join(', ')}"
|
194
|
-
# else
|
195
|
-
# record_uid = Digest::SHA1.hexdigest(identifying_hash.to_query)
|
196
|
-
# if @seen_uids.include?(record_uid)
|
197
|
-
# messages << "Values for identifying fields must be unique. There has already been a record with: #{identifying_hash.to_json}"
|
198
|
-
# else
|
199
|
-
# @seen_uids << record_uid
|
200
|
-
# end
|
201
|
-
# end
|
202
|
-
# end
|
203
|
-
|
204
|
-
messages
|
205
|
-
end
|
206
|
-
|
207
|
-
def identifying_fields_for_data_type(data_type)
|
208
|
-
if data_type == @config['data_type']
|
209
|
-
@config['identifying_fields']
|
210
|
-
else
|
211
|
-
transformers = @config['transformers'].select {|transformer| transformer['data_type'] == data_type}
|
212
|
-
raise "Expected to find precisely 1 transformer matching #{data_type} in manifest.json" unless transformers.size == 1
|
213
|
-
transformers[0]['identifying_fields']
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
def get_schema(data_type)
|
218
|
-
if !@schemas.has_key?(data_type)
|
219
|
-
hyphenated_name = data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
220
|
-
@schemas[data_type] = File.expand_path("../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
221
|
-
end
|
222
|
-
|
223
|
-
@schemas[data_type]
|
224
|
-
end
|
225
|
-
|
226
|
-
def handle_valid_record(record, data_type)
|
227
|
-
raise NotImplementedError
|
228
|
-
end
|
229
|
-
|
230
|
-
def handle_invalid_record(record, data_type, errors)
|
231
|
-
raise NotImplementedError
|
232
|
-
end
|
233
|
-
|
234
|
-
def handle_non_json_output(line)
|
235
|
-
raise NotImplementedError
|
236
|
-
end
|
237
|
-
|
238
|
-
def handle_successful_run
|
239
|
-
end
|
240
|
-
|
241
|
-
def handle_interrupted_run
|
242
|
-
end
|
243
|
-
|
244
|
-
def handle_stderr(data)
|
245
|
-
$stderr.write(data)
|
246
|
-
end
|
247
|
-
|
248
|
-
def scraper_file
|
249
|
-
candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
|
250
|
-
case candidates.size
|
251
|
-
when 0
|
252
|
-
raise 'Could not find scraper to run'
|
253
|
-
when 1
|
254
|
-
candidates.first
|
255
|
-
else
|
256
|
-
raise "Found multiple scrapers: #{candidates.join(', ')}"
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
def interpreter_for(file)
|
261
|
-
case file
|
262
|
-
when /\.rb$/
|
263
|
-
prerun = File.expand_path("../prerun.rb", __FILE__)
|
264
|
-
"ruby -r#{prerun}"
|
265
|
-
when /\.py$/
|
266
|
-
'python -u'
|
267
|
-
else
|
268
|
-
raise "Could not run #{file}"
|
269
|
-
end
|
270
|
-
end
|
271
|
-
end
|
272
|
-
|
273
|
-
class CommandRunner
|
274
|
-
|
275
|
-
def to_s
|
276
|
-
"CommandRunner #{@command}, pid #{@wait_thread.pid} (#{@wait_thread.status})"
|
277
|
-
end
|
278
|
-
|
279
|
-
def initialize(command, opts={})
|
280
|
-
@command = command
|
281
|
-
@timeout = opts[:timeout] ||= 3600
|
282
|
-
@stdin, @stdout, @stderr, @wait_thread = Open3.popen3(command)
|
283
|
-
end
|
284
|
-
|
285
|
-
def get_next_line
|
286
|
-
begin
|
287
|
-
Timeout::timeout(@timeout) { @stdout.gets }
|
288
|
-
rescue Timeout::Error
|
289
|
-
raise TurbotRunner::ScriptError.new("#{@command} produced no output for #{@timeout} seconds")
|
290
|
-
rescue EOFError
|
291
|
-
raise_if_failed!
|
292
|
-
return nil
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
def drain_stderr
|
297
|
-
output = ''
|
298
|
-
while @stderr.ready?
|
299
|
-
output += @stderr.read(256)
|
300
|
-
end
|
301
|
-
output
|
302
|
-
end
|
303
|
-
|
304
|
-
def success?
|
305
|
-
if finished?
|
306
|
-
@wait_thread.value.success?
|
307
|
-
end
|
308
|
-
end
|
309
|
-
|
310
|
-
def failed?
|
311
|
-
if finished?
|
312
|
-
!@wait_thread.value.success?
|
313
|
-
end
|
314
|
-
end
|
315
|
-
|
316
|
-
def raise_if_failed!
|
317
|
-
raise TurbotRunner::ScriptError if failed?
|
318
|
-
end
|
319
|
-
|
320
|
-
def finished?
|
321
|
-
!@wait_thread.status
|
322
|
-
end
|
323
|
-
|
324
|
-
def send_line(line)
|
325
|
-
@stdin.puts(line)
|
326
|
-
end
|
327
|
-
|
328
|
-
def close
|
329
|
-
@stdin.close
|
330
|
-
@stdout.read # drain pipe
|
331
|
-
@stdout.close
|
332
|
-
@wait_thread.kill
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|
1
|
+
require 'turbot_runner/base_handler'
|
2
|
+
require 'turbot_runner/processor'
|
3
|
+
require 'turbot_runner/runner'
|
4
|
+
require 'turbot_runner/script_runner'
|
5
|
+
require 'turbot_runner/version'
|