adstax-spark-job-manager 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +15 -0
- data/README.md +101 -0
- data/Rakefile +8 -0
- data/adstax-spark-job-manager.gemspec +27 -0
- data/bin/adstax-spark-job-manager +325 -0
- metadata +123 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cc80ab6b9302b75a919ebb704ea9aae8d9fdd4dd
|
4
|
+
data.tar.gz: ea6fec36162de3c22244ae096674e2d6c4de0f13
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3c368582d4553c96e46b7e405cf4c82be59e2448c3a40a1e7f3dc923bba0b3dc8286029ebaae71a8b7350d8a1d20da193af1c2f3bcf95312f09f0bbd419c43c5
|
7
|
+
data.tar.gz: d7d00b40bbc6d4214805f3b39f0268ecaccf773e0891d5772f7ebbacdbea578275bc7821945821a2d246a5bec1342d2ef49e4b2b93ea5fd838aeadda7ace71d7
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
This software is licensed under the Apache 2 license, quoted below.
|
2
|
+
|
3
|
+
Copyright 2016 ShiftForward, S.A. [http://www.shiftforward.eu]
|
4
|
+
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
6
|
+
use this file except in compliance with the License. You may obtain a copy of
|
7
|
+
the License at
|
8
|
+
|
9
|
+
[http://www.apache.org/licenses/LICENSE-2.0]
|
10
|
+
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
13
|
+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
14
|
+
License for the specific language governing permissions and limitations under
|
15
|
+
the License.
|
data/README.md
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# AdStax Spark Job Manager
|
2
|
+
|
3
|
+
The AdStax Spark Job Manager is a gem to manager Spark jobs running in an AdStax
|
4
|
+
cluster.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
### From RubyGems
|
9
|
+
|
10
|
+
Make sure you have [ruby][ruby-install] (at least v2.0.0) installed, and just
|
11
|
+
run:
|
12
|
+
|
13
|
+
$ gem install adstax-spark-job-manager
|
14
|
+
|
15
|
+
[ruby-install]: https://www.ruby-lang.org/en/documentation/installation/
|
16
|
+
|
17
|
+
### From source
|
18
|
+
|
19
|
+
Clone this repo and build the gem:
|
20
|
+
|
21
|
+
$ git clone git://github.com/ShiftForward/adstax-spark-job-manager.git
|
22
|
+
$ gem build adstax-spark-job-manager.gemspec
|
23
|
+
$ gem install adstax-spark-job-manager-0.0.1.gem
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
The AdStax Spark Job Manager publishes an `adstax-spark-job-manager` binary,
|
28
|
+
which provides a set of utilities to submit, kill and query the status of Spark
|
29
|
+
jobs running on an AdStax cluster. See the help for the command (running it with
|
30
|
+
`-h`) for more details.
|
31
|
+
|
32
|
+
The methods available are `submit`, `kill`, `status` or `log`. To submit a job,
|
33
|
+
one has to provide the task with the `--adstax-host` parameter (pointing to
|
34
|
+
where the AdStax instance is running), a `--jar` parameter pointing to a bundled
|
35
|
+
jar with your application, all required dependencies, and which includes at
|
36
|
+
least one implementation of `eu.shiftforward.adstax.spark.SparkJob`. Note that
|
37
|
+
you don't need to bundle the `spark-core` dependency, as it will be provided at
|
38
|
+
runtime. The `--job` parameter should be the fully qualified name of the class
|
39
|
+
extending `eu.shiftforward.adstax.spark.SparkJob` and which is going to be used
|
40
|
+
as the Spark job to run. Everything following the required parameters will be
|
41
|
+
used as arguments for the `SparkJob`. For example, in order to submit the
|
42
|
+
`SparkPI` example, one can use the following command:
|
43
|
+
|
44
|
+
```
|
45
|
+
$ adstax-spark-job-manager submit --adstax-host sample-adstax-instance.dev.adstax.io --jar http://s3.amazonaws.com/shiftforward-public/bin/spark/adstax-spark-examples-1.0.jar --job eu.shiftforward.adstax.spark.examples.SparkPi 100
|
46
|
+
```
|
47
|
+
|
48
|
+
This command should return information about the submission, for example:
|
49
|
+
|
50
|
+
```
|
51
|
+
{
|
52
|
+
"action" : "CreateSubmissionResponse",
|
53
|
+
"serverSparkVersion" : "2.0.0-SNAPSHOT",
|
54
|
+
"submissionId" : "driver-20160713161243-0002",
|
55
|
+
"success" : true
|
56
|
+
}
|
57
|
+
```
|
58
|
+
|
59
|
+
You can now use the returned submission id to query the status of the job, as
|
60
|
+
well as list its standard output. In order to query the status of the job, use
|
61
|
+
the `status` command:
|
62
|
+
|
63
|
+
```
|
64
|
+
$ adstax-spark-job-manager status --adstax-host sample-adstax-instance.dev.adstax.io --submission-id driver-20160713161243-0002
|
65
|
+
{
|
66
|
+
"action" : "SubmissionStatusResponse",
|
67
|
+
"driverState" : "FINISHED",
|
68
|
+
"message" : "task_id {\n value: \"driver-20160713161243-0002\"\n}\nstate: TASK_FINISHED\nmessage: \"Command exited with status 0\"\nslave_id {\n value: \"9f18159e-ebe9-4a70-89e1-9774adf2cdd6-S9\"\n}\ntimestamp: 1.468426400438861E9\nexecutor_id {\n value: \"driver-20160713161243-0002\"\n}\nsource: SOURCE_EXECUTOR\n11: \"A\\371\\330\\365+\\027Ds\\237\\243\\\"\\317\\276\\353\\363\\367\"\n13: \"\\n\\036\\022\\f10.0.174.173*\\016\\022\\f10.0.174.173\"\n",
|
69
|
+
"serverSparkVersion" : "2.0.0-SNAPSHOT",
|
70
|
+
"submissionId" : "driver-20160713161243-0002",
|
71
|
+
"success" : true
|
72
|
+
}
|
73
|
+
```
|
74
|
+
|
75
|
+
The `log` command allows you to output the stdout and stderr of the job's
|
76
|
+
driver. You can hide the stderr with the `--hide-stderr` command and keep
|
77
|
+
tailing the output with the `--follow` command:
|
78
|
+
|
79
|
+
```
|
80
|
+
$ adstax-spark-job-manager log --adstax-host sample-adstax-instance.dev.adstax.io --submission-id driver-20160713161243-0002 --hide-stderr --follow
|
81
|
+
Registered executor on ec2-54-87-240-29.compute-1.amazonaws.com
|
82
|
+
Starting task driver-20160713161243-0002
|
83
|
+
Forked command at 22260
|
84
|
+
sh -c 'cd spark-2*; bin/spark-submit --name eu.shiftforward.adstax.spark.SparkJobRunner --master mesos://zk://zk.sample-adstax-instance.dev.adstax.io:2181/mesos --driver-cores 1.0 --driver-memory 1024M --class eu.shiftforward.adstax.spark.SparkJobRunner --conf spark.driver.supervise=false --conf spark.app.name=eu.shiftforward.adstax.spark.SparkJobRunner --conf spark.es.port=49200 --conf spark.es.nodes=localhost --conf spark.mesos.coarse=false --conf spark.executor.uri=https://s3.amazonaws.com/shiftforward-public/bin/spark/spark-2.0.0-SNAPSHOT-bin-2.4.0.tgz ../adstax-spark-examples-1.0.jar --job eu.shiftforward.adstax.spark.examples.SparkPi 100'
|
85
|
+
Pi is roughly 3.1407
|
86
|
+
Command exited with status 0 (pid: 22260)
|
87
|
+
```
|
88
|
+
|
89
|
+
The `kill` command allows you to cancel and kill an ongoing job. Killing already
|
90
|
+
finished jobs has no effect:
|
91
|
+
|
92
|
+
```
|
93
|
+
$ adstax-spark-job-manager kill --adstax-host sample-adstax-instance.dev.adstax.io --submission-id driver-20160713161243-0002
|
94
|
+
{
|
95
|
+
"action" : "KillSubmissionResponse",
|
96
|
+
"message" : "Driver already terminated",
|
97
|
+
"serverSparkVersion" : "2.0.0-SNAPSHOT",
|
98
|
+
"submissionId" : "driver-20160713161243-0002",
|
99
|
+
"success" : false
|
100
|
+
}
|
101
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
task "publish" do
|
4
|
+
gem_helper = Bundler::GemHelper.instance
|
5
|
+
built_gem_path = gem_helper.build_gem
|
6
|
+
Process.wait spawn("gem nexus #{built_gem_path}")
|
7
|
+
Bundler.ui.confirm "#{gem_helper.gemspec.name} (#{gem_helper.gemspec.version}) pushed to Nexus."
|
8
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "adstax-spark-job-manager"
|
7
|
+
spec.version = "0.1.0"
|
8
|
+
spec.authors = ["ShiftForward"]
|
9
|
+
spec.email = ["info@shiftforward.eu"]
|
10
|
+
spec.summary = "Manage Spark jobs running on an AdStax cluster."
|
11
|
+
spec.description = "Allow submitting, querying the status, outputting the log and killing Spark jobs on an AdStax cluster."
|
12
|
+
|
13
|
+
spec.licenses = ['Apache-2.0']
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.required_ruby_version = '>= 2.0.0'
|
20
|
+
|
21
|
+
spec.add_runtime_dependency "file-tail", "~> 1.1"
|
22
|
+
spec.add_runtime_dependency "json", "~> 1.8"
|
23
|
+
spec.add_runtime_dependency "colorize", "~> 0.7"
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'colorize'
|
4
|
+
require 'file-tail'
|
5
|
+
require 'json'
|
6
|
+
require 'net/http'
|
7
|
+
require 'optparse'
|
8
|
+
require 'tempfile'
|
9
|
+
|
10
|
+
# -----------------
|
11
|
+
# Constants
|
12
|
+
# -----------------
|
13
|
+
|
14
|
+
MAIN_CLASS = 'eu.shiftforward.adstax.spark.SparkJobRunner'
|
15
|
+
SPARK_EXECUTOR_URI = 'https://s3.amazonaws.com/shiftforward-public/bin/spark/spark-2.0.0-SNAPSHOT-bin-2.4.0.tgz'
|
16
|
+
SPARK_SCALA_VERSION = '2.11' # TODO: Support other versions and use different executors
|
17
|
+
|
18
|
+
# -----------------
|
19
|
+
# CLI arguments parsing
|
20
|
+
# -----------------
|
21
|
+
|
22
|
+
$cli_args = {
|
23
|
+
follow: false,
|
24
|
+
show_stderr: true
|
25
|
+
}
|
26
|
+
|
27
|
+
ARGV << '-h' if ARGV.empty?
|
28
|
+
|
29
|
+
OptionParser.new do |opts|
|
30
|
+
opts.banner = "Usage: #{$PROGRAM_NAME} <action> --adstax-host <adstax_host> [<options>]"
|
31
|
+
opts.separator ''
|
32
|
+
opts.separator 'Submit, kill, query the status, or inspect the log of a Spark job running in an AdStax cluster.'
|
33
|
+
opts.separator "<action> is one of 'submit', 'kill', 'status' or 'log'."
|
34
|
+
opts.separator "Example: #{$PROGRAM_NAME} submit --adstax-host apollo.dev.adstax.io --jar http://s3.amazonaws.com/shiftforward-public/bin/spark/adstax-spark-examples-1.0.jar --job eu.shiftforward.adstax.spark.examples.SparkPi 1000"
|
35
|
+
opts.separator "Example: #{$PROGRAM_NAME} kill driver-20160420105830-0001"
|
36
|
+
opts.separator ''
|
37
|
+
opts.separator 'Options:'
|
38
|
+
|
39
|
+
opts.on('--adstax-host STRING', 'Host suffix to the AdStax cluster services.') do |host_suffix|
|
40
|
+
$cli_args[:host_suffix] = host_suffix
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on('--jar STRING',
|
44
|
+
'Path to a bundled jar including your application and all dependencies.',
|
45
|
+
'The URL must be globally visible inside of your cluster.') do |jar|
|
46
|
+
$cli_args[:jar] = jar
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on('--job STRING',
|
50
|
+
'Fully qualified name of the class extending `eu.shiftforward.adstax.spark.SparkJob`.',
|
51
|
+
'The class will be used as the Spark job to run.') do |job|
|
52
|
+
$cli_args[:job] = job
|
53
|
+
end
|
54
|
+
|
55
|
+
opts.on('--submission-id STRING',
|
56
|
+
'Id of the submission (required for the kill, status and log actions).') do |submission_id|
|
57
|
+
$cli_args[:submission_id] = submission_id
|
58
|
+
end
|
59
|
+
|
60
|
+
opts.on('-f', '--follow',
|
61
|
+
"Enables following the file updates in the 'log' action.") do
|
62
|
+
$cli_args[:follow] = true
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on('--hide-stderr',
|
66
|
+
"Hides stderr output in the 'log' action.") do
|
67
|
+
$cli_args[:show_stderr] = false
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on_tail('-h', '--help', 'Show this message.') do
|
71
|
+
puts opts
|
72
|
+
exit
|
73
|
+
end
|
74
|
+
end.parse!
|
75
|
+
|
76
|
+
def warn_missing(name)
|
77
|
+
puts "Missing required argument: #{name}"
|
78
|
+
exit 1
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_http(uri)
|
82
|
+
uri = URI.parse(uri)
|
83
|
+
Net::HTTP.new(uri.host, uri.port)
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_task(state_response, task_id)
|
87
|
+
target_tasks = []
|
88
|
+
state_response['completed_frameworks'].concat(state_response['frameworks']).each do |framework|
|
89
|
+
framework['completed_tasks'].concat(framework['tasks']).each do |task|
|
90
|
+
target_tasks.push(task) if task['id'] == task_id
|
91
|
+
end
|
92
|
+
end
|
93
|
+
target_tasks[0]
|
94
|
+
end
|
95
|
+
|
96
|
+
def get_executor(state_response, task_id)
|
97
|
+
target_executors = []
|
98
|
+
state_response['completed_frameworks'].concat(state_response['frameworks']).each do |framework|
|
99
|
+
framework['completed_executors'].concat(framework['executors']).each do |executor|
|
100
|
+
target_executors.push(executor) if executor['id'] == task_id
|
101
|
+
end
|
102
|
+
end
|
103
|
+
target_executors[0]
|
104
|
+
end
|
105
|
+
|
106
|
+
def mesos_download(http, remote_file, local_file)
|
107
|
+
params = { path: remote_file }
|
108
|
+
encoded_params = URI.encode_www_form(params)
|
109
|
+
file_response = http.request(Net::HTTP::Get.new(['/files/download', encoded_params].join('?')))
|
110
|
+
unless file_response.class.body_permitted?
|
111
|
+
puts 'Unable to fetch file from slave'
|
112
|
+
exit 1
|
113
|
+
end
|
114
|
+
local_file.rewind
|
115
|
+
local_file.write(file_response.body)
|
116
|
+
end
|
117
|
+
|
118
|
+
def tail_file(file, output_method = Proc.new { |line| puts line })
|
119
|
+
Thread.new do
|
120
|
+
File.open(file.path) do |log|
|
121
|
+
log.extend(File::Tail)
|
122
|
+
log.interval = 1
|
123
|
+
log.backward(10)
|
124
|
+
begin
|
125
|
+
log.tail { |line| output_method.call(line) }
|
126
|
+
rescue Interrupt => e
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
$action = ARGV.shift || begin
|
134
|
+
warn_missing('action')
|
135
|
+
end
|
136
|
+
|
137
|
+
warn_missing('--adstax-host') unless $cli_args[:host_suffix]
|
138
|
+
$cluster_dispatcher_host = "http://spark-cluster-dispatcher.#{$cli_args[:host_suffix]}:7077"
|
139
|
+
|
140
|
+
def submit_job(jar, job)
|
141
|
+
uri = URI.parse($cluster_dispatcher_host)
|
142
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
143
|
+
payload = {
|
144
|
+
'action' => 'CreateSubmissionRequest',
|
145
|
+
'appArgs' => ['--job', job].concat(ARGV),
|
146
|
+
'appResource' => jar,
|
147
|
+
'mainClass' => MAIN_CLASS,
|
148
|
+
'clientSparkVersion' => '1.6.1',
|
149
|
+
'environmentVariables' => {
|
150
|
+
'SPARK_SCALA_VERSION' => SPARK_SCALA_VERSION
|
151
|
+
},
|
152
|
+
'sparkProperties' => {
|
153
|
+
'spark.jars' => $cli_args[:jar],
|
154
|
+
'spark.driver.supervise' => 'false',
|
155
|
+
'spark.app.name' => MAIN_CLASS,
|
156
|
+
'spark.es.port' => '49200',
|
157
|
+
'spark.es.nodes' => 'localhost',
|
158
|
+
'spark.submit.deployMode' => 'cluster',
|
159
|
+
'spark.mesos.coarse' => 'false',
|
160
|
+
'spark.master' => "mesos://spark-cluster-dispatcher.#{$cli_args[:host_suffix]}:7077",
|
161
|
+
'spark.executor.uri' => SPARK_EXECUTOR_URI
|
162
|
+
}
|
163
|
+
}.to_json
|
164
|
+
request = Net::HTTP::Post.new(
|
165
|
+
'/v1/submissions/create',
|
166
|
+
initheader = { 'Content-Type' => 'application/json' })
|
167
|
+
request.body = payload
|
168
|
+
http.request(request)
|
169
|
+
end
|
170
|
+
|
171
|
+
def kill_job(submission_id)
|
172
|
+
uri = URI.parse($cluster_dispatcher_host)
|
173
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
174
|
+
request = Net::HTTP::Post.new("/v1/submissions/kill/#{submission_id}")
|
175
|
+
http.request(request)
|
176
|
+
end
|
177
|
+
|
178
|
+
def status_job(submission_id)
|
179
|
+
uri = URI.parse($cluster_dispatcher_host)
|
180
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
181
|
+
request = Net::HTTP::Get.new("/v1/submissions/status/#{submission_id}")
|
182
|
+
http.request(request)
|
183
|
+
end
|
184
|
+
|
185
|
+
def log_job(submission_id, follow, show_stderr)
|
186
|
+
status_response = JSON.parse(status_job(submission_id).body)
|
187
|
+
if status_response['driverState'] == "NOT_FOUND"
|
188
|
+
puts "Unable to find submission with id #{submission_id}"
|
189
|
+
exit 1
|
190
|
+
end
|
191
|
+
if status_response['driverState'] == "QUEUED"
|
192
|
+
puts "Submission with id #{submission_id} is still queued for execution"
|
193
|
+
if follow
|
194
|
+
print "Waiting for submission with id #{submission_id} to start"
|
195
|
+
waiting_thread = Thread.new do
|
196
|
+
queued = true
|
197
|
+
while queued do
|
198
|
+
begin
|
199
|
+
sleep 1
|
200
|
+
print "."
|
201
|
+
rescue Interrupt => e
|
202
|
+
exit 1
|
203
|
+
end
|
204
|
+
res = JSON.parse(status_job(submission_id).body)
|
205
|
+
queued = res['driverState'] == "QUEUED"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
waiting_thread.join
|
209
|
+
puts ""
|
210
|
+
else
|
211
|
+
exit 1
|
212
|
+
end
|
213
|
+
end
|
214
|
+
marathon_http = get_http("http://marathon.#{$cli_args[:host_suffix]}")
|
215
|
+
marathon_response = marathon_http.request(Net::HTTP::Get.new('/v2/info'))
|
216
|
+
unless marathon_response.class.body_permitted?
|
217
|
+
puts 'Unable to fetch Mesos leader url from Marathon'
|
218
|
+
exit 1
|
219
|
+
end
|
220
|
+
res = JSON.parse(marathon_response.body)
|
221
|
+
mesos_http = get_http(res['marathon_config']['mesos_leader_ui_url'])
|
222
|
+
mesos_response = mesos_http.request(Net::HTTP::Get.new('/state.json'))
|
223
|
+
unless mesos_response.class.body_permitted?
|
224
|
+
puts 'Unable to fetch Mesos status'
|
225
|
+
exit 1
|
226
|
+
end
|
227
|
+
res = JSON.parse(mesos_response.body)
|
228
|
+
target_task = get_task(res, submission_id)
|
229
|
+
unless target_task
|
230
|
+
puts "Unable to find submission with id #{submission_id} in Mesos. Maybe the submission is too old?"
|
231
|
+
exit 1
|
232
|
+
end
|
233
|
+
slaves = res['slaves']
|
234
|
+
slave_id = target_task['slave_id']
|
235
|
+
target_slaves = slaves.select do |slave|
|
236
|
+
slave['id'] == slave_id
|
237
|
+
end
|
238
|
+
if target_slaves.empty?
|
239
|
+
puts "Unable to find slave with id #{slave_id}"
|
240
|
+
exit 1
|
241
|
+
end
|
242
|
+
if target_slaves.length != 1
|
243
|
+
puts "Multiple slaves with id #{slave_id}"
|
244
|
+
exit 1
|
245
|
+
end
|
246
|
+
target_slave = target_slaves[0]
|
247
|
+
slave_http = get_http('http://' + target_slave['hostname'] + ':5051')
|
248
|
+
slave_response = slave_http.request(Net::HTTP::Get.new('/state.json'))
|
249
|
+
unless slave_response.class.body_permitted?
|
250
|
+
puts 'Unable to fetch file from slave'
|
251
|
+
exit 1
|
252
|
+
end
|
253
|
+
res = JSON.parse(slave_response.body)
|
254
|
+
target_executor = get_executor(res, submission_id)
|
255
|
+
unless target_executor
|
256
|
+
puts "Unable to find submission with id #{submission_id} in executor. Maybe the submission is too old?"
|
257
|
+
exit 1
|
258
|
+
end
|
259
|
+
directory = target_executor['directory']
|
260
|
+
stdout_file = Tempfile.new('spark' + submission_id)
|
261
|
+
stderr_file = Tempfile.new('spark' + submission_id)
|
262
|
+
threads = []
|
263
|
+
if follow
|
264
|
+
threads.push(Thread.new do
|
265
|
+
loop do
|
266
|
+
begin
|
267
|
+
sleep 1
|
268
|
+
rescue Interrupt => e
|
269
|
+
exit 1
|
270
|
+
end
|
271
|
+
mesos_download(slave_http, directory + '/stdout', stdout_file)
|
272
|
+
mesos_download(slave_http, directory + '/stderr', stderr_file)
|
273
|
+
end
|
274
|
+
end)
|
275
|
+
else
|
276
|
+
mesos_download(slave_http, directory + '/stdout', stdout_file)
|
277
|
+
mesos_download(slave_http, directory + '/stderr', stderr_file)
|
278
|
+
end
|
279
|
+
if follow
|
280
|
+
threads.push(tail_file(stdout_file))
|
281
|
+
threads.push(tail_file(stderr_file, Proc.new { |line| puts line.chomp.red })) if show_stderr
|
282
|
+
begin
|
283
|
+
threads.each { |thread| thread.join }
|
284
|
+
rescue Interrupt => e
|
285
|
+
exit 1
|
286
|
+
end
|
287
|
+
else
|
288
|
+
if show_stderr
|
289
|
+
stderr_file.rewind
|
290
|
+
puts stderr_file.read.chomp.red
|
291
|
+
end
|
292
|
+
stdout_file.rewind
|
293
|
+
puts stdout_file.read
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# -----------------
|
298
|
+
# Program start
|
299
|
+
# -----------------
|
300
|
+
|
301
|
+
case $action
|
302
|
+
when 'submit'
|
303
|
+
warn_missing('--jar') unless $cli_args[:jar]
|
304
|
+
warn_missing('--job') unless $cli_args[:job]
|
305
|
+
response = submit_job($cli_args[:jar], $cli_args[:job])
|
306
|
+
puts response.body
|
307
|
+
|
308
|
+
when 'kill'
|
309
|
+
warn_missing('--submission_id') unless $cli_args[:submission_id]
|
310
|
+
response = kill_job($cli_args[:submission_id])
|
311
|
+
puts response.body
|
312
|
+
|
313
|
+
when 'status'
|
314
|
+
warn_missing('--submission_id') unless $cli_args[:submission_id]
|
315
|
+
response = status_job($cli_args[:submission_id])
|
316
|
+
puts response.body
|
317
|
+
|
318
|
+
when 'log'
|
319
|
+
warn_missing('--submission_id') unless $cli_args[:submission_id]
|
320
|
+
log_job($cli_args[:submission_id], $cli_args[:follow], $cli_args[:show_stderr])
|
321
|
+
|
322
|
+
else
|
323
|
+
puts "Unrecognized action: #{$action}"
|
324
|
+
exit 1
|
325
|
+
end
|
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: adstax-spark-job-manager
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ShiftForward
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-07-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: file-tail
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: colorize
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.7'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.7'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.7'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.7'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
description: Allow submitting, querying the status, outputting the log and killing
|
84
|
+
Spark jobs on an AdStax cluster.
|
85
|
+
email:
|
86
|
+
- info@shiftforward.eu
|
87
|
+
executables:
|
88
|
+
- adstax-spark-job-manager
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- .gitignore
|
93
|
+
- Gemfile
|
94
|
+
- LICENSE
|
95
|
+
- README.md
|
96
|
+
- Rakefile
|
97
|
+
- adstax-spark-job-manager.gemspec
|
98
|
+
- bin/adstax-spark-job-manager
|
99
|
+
homepage:
|
100
|
+
licenses:
|
101
|
+
- Apache-2.0
|
102
|
+
metadata: {}
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 2.0.0
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - '>='
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
requirements: []
|
118
|
+
rubyforge_project:
|
119
|
+
rubygems_version: 2.4.6
|
120
|
+
signing_key:
|
121
|
+
specification_version: 4
|
122
|
+
summary: Manage Spark jobs running on an AdStax cluster.
|
123
|
+
test_files: []
|