elephant-driver 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +44 -0
- data/Rakefile +75 -0
- data/VERSION +1 -0
- data/lib/elephant-driver.rb +19 -0
- data/lib/elephant-driver/client.rb +69 -0
- data/lib/elephant-driver/job.rb +127 -0
- data/lib/elephant-driver/task.rb +46 -0
- data/lib/elephant-driver/thrift/common.thrift +129 -0
- data/lib/elephant-driver/thrift/common_constants.rb +12 -0
- data/lib/elephant-driver/thrift/common_types.rb +209 -0
- data/lib/elephant-driver/thrift/hadoop_service_base.rb +314 -0
- data/lib/elephant-driver/thrift/jobtracker.rb +1466 -0
- data/lib/elephant-driver/thrift/jobtracker.thrift +478 -0
- data/lib/elephant-driver/thrift/jobtracker_constants.rb +14 -0
- data/lib/elephant-driver/thrift/jobtracker_types.rb +735 -0
- data/lib/elephant-driver/tracker.rb +10 -0
- data/lib/elephant-driver/version.rb +5 -0
- metadata +85 -0
@@ -0,0 +1,478 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to Cloudera, Inc. under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
* See the License for the specific language governing permissions and
|
16
|
+
* limitations under the License.
|
17
|
+
*/
|
18
|
+
/*
|
19
|
+
* Thrift interface for Hadoop JobTracker.
|
20
|
+
*/
|
21
|
+
|
22
|
+
/* Common types and interfaces */
|
23
|
+
include 'common.thrift'
|
24
|
+
|
25
|
+
/*
|
26
|
+
* Namespaces for generated code. The idea is to keep code generated by
|
27
|
+
* Thrift under a 'hadoop.api' namespace, so that a higher-level set of
|
28
|
+
* functions and classes may be defined under 'hadoop'.
|
29
|
+
*/
|
30
|
+
namespace cpp hadoop.api.jobtracker
|
31
|
+
namespace csharp Hadoop.API.JobTracker
|
32
|
+
namespace java org.apache.hadoop.thriftfs.jobtracker.api
|
33
|
+
namespace perl Hadoop.API.jobtracker
|
34
|
+
namespace php hadoop_api_jobtracker
|
35
|
+
namespace py hadoop.api.jobtracker
|
36
|
+
namespace rb Hadoop.API.jobtracker
|
37
|
+
|
38
|
+
|
39
|
+
/*
|
40
|
+
* All type names are prefixed with 'Thrift' to avoid confusion when dealing
|
41
|
+
* with both thrift and hadoop-land objects with the same name in Java.
|
42
|
+
*/
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
enum ThriftTaskType {
|
47
|
+
MAP,
|
48
|
+
REDUCE,
|
49
|
+
JOB_SETUP,
|
50
|
+
JOB_CLEANUP,
|
51
|
+
TASK_CLEANUP // What is this?
|
52
|
+
}
|
53
|
+
|
54
|
+
enum ThriftTaskState {
|
55
|
+
RUNNING, SUCCEEDED, FAILED, UNASSIGNED, KILLED,
|
56
|
+
COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN
|
57
|
+
}
|
58
|
+
|
59
|
+
enum ThriftTaskPhase {
|
60
|
+
STARTING, MAP, SHUFFLE, SORT, REDUCE, CLEANUP
|
61
|
+
}
|
62
|
+
|
63
|
+
/**
|
64
|
+
* It corresponds to the (inferred) internal state of a TaskInProgress,
|
65
|
+
* and not that of a TaskStatus.
|
66
|
+
*/
|
67
|
+
enum ThriftTaskQueryState {
|
68
|
+
SUCCEEDED,
|
69
|
+
FAILED,
|
70
|
+
RUNNING, /* Inferred - Only if startTime is set */
|
71
|
+
PENDING, /* Inferred - Only if startTime is not set */
|
72
|
+
KILLED
|
73
|
+
}
|
74
|
+
|
75
|
+
/** Possible job priorities (see ThriftJobStatus) */
|
76
|
+
enum ThriftJobPriority {
|
77
|
+
VERY_HIGH,
|
78
|
+
HIGH,
|
79
|
+
NORMAL,
|
80
|
+
LOW,
|
81
|
+
VERY_LOW
|
82
|
+
}
|
83
|
+
|
84
|
+
/** Unique identifier for each job */
|
85
|
+
struct ThriftJobID {
|
86
|
+
/** Unique id of jobtracker */
|
87
|
+
1: string jobTrackerID
|
88
|
+
/** Unique (to JT) job id */
|
89
|
+
2: i32 jobID
|
90
|
+
|
91
|
+
/** Flattened as a string */
|
92
|
+
3: string asString
|
93
|
+
}
|
94
|
+
|
95
|
+
|
96
|
+
/** Description of a job queue */
|
97
|
+
struct ThriftJobQueueInfo {
|
98
|
+
1: string queueName
|
99
|
+
2: string schedulingInfo
|
100
|
+
}
|
101
|
+
|
102
|
+
struct ThriftJobQueueList {
|
103
|
+
1: list<ThriftJobQueueInfo> queues
|
104
|
+
}
|
105
|
+
|
106
|
+
/** Counter which represents some custom job metric */
|
107
|
+
struct ThriftCounter {
|
108
|
+
1: string name
|
109
|
+
2: string displayName
|
110
|
+
3: i64 value
|
111
|
+
}
|
112
|
+
|
113
|
+
/** Counters are organized by group */
|
114
|
+
struct ThriftCounterGroup {
|
115
|
+
1: string name
|
116
|
+
2: string displayName
|
117
|
+
3: map<string, ThriftCounter> counters
|
118
|
+
}
|
119
|
+
|
120
|
+
/** Container structure for counter groups */
|
121
|
+
struct ThriftGroupList {
|
122
|
+
1: list<ThriftCounterGroup> groups
|
123
|
+
}
|
124
|
+
|
125
|
+
/** Counters for map tasks only, reduce tasks only, and job-scoped counters */
|
126
|
+
struct ThriftJobCounterRollups {
|
127
|
+
1: ThriftGroupList mapCounters; // eg map input bytes
|
128
|
+
2: ThriftGroupList reduceCounters; // eg reduce input bytes
|
129
|
+
3: ThriftGroupList jobCounters; // eg task counts, etc
|
130
|
+
}
|
131
|
+
|
132
|
+
/** Unique task id */
|
133
|
+
struct ThriftTaskID {
|
134
|
+
/** ID of the job to which the task belongs */
|
135
|
+
1: ThriftJobID jobID
|
136
|
+
|
137
|
+
/** What kind of task is this? */
|
138
|
+
2: ThriftTaskType taskType
|
139
|
+
|
140
|
+
/** Unique (to job) task id */
|
141
|
+
3: i32 taskID
|
142
|
+
|
143
|
+
/** Flattened to a unique string */
|
144
|
+
4: string asString
|
145
|
+
}
|
146
|
+
|
147
|
+
/** Unique task attempt id */
|
148
|
+
struct ThriftTaskAttemptID {
|
149
|
+
1: ThriftTaskID taskID
|
150
|
+
2: i32 attemptID
|
151
|
+
3: string asString
|
152
|
+
}
|
153
|
+
|
154
|
+
/** Describes the current state of a single attempt */
|
155
|
+
struct ThriftTaskStatus {
|
156
|
+
1: ThriftTaskAttemptID taskID
|
157
|
+
2: double progress
|
158
|
+
3: ThriftTaskState state
|
159
|
+
4: string diagnosticInfo
|
160
|
+
5: string stateString
|
161
|
+
6: string taskTracker
|
162
|
+
|
163
|
+
7: i64 startTime
|
164
|
+
8: i64 finishTime
|
165
|
+
|
166
|
+
9: i64 outputSize
|
167
|
+
|
168
|
+
10: ThriftTaskPhase phase
|
169
|
+
|
170
|
+
11: ThriftGroupList counters
|
171
|
+
|
172
|
+
12: i64 shuffleFinishTime,
|
173
|
+
13: i64 sortFinishTime,
|
174
|
+
14: i64 mapFinishTime,
|
175
|
+
}
|
176
|
+
|
177
|
+
/**
|
178
|
+
* A ThriftTaskInProgress contains a list of
|
179
|
+
* task attempts (speculatively executed instances of the same task).
|
180
|
+
* These are indexed by TaskAttemptID.
|
181
|
+
* For simplicity, we convert maps keyed on TaskAttemptIDs to maps keyed
|
182
|
+
* on their string representation.
|
183
|
+
*
|
184
|
+
* Assumption: there won't be so many task attempts that retrieving a single task
|
185
|
+
* will be too expensive.
|
186
|
+
*/
|
187
|
+
struct ThriftTaskInProgress {
|
188
|
+
2: i64 execStartTime
|
189
|
+
3: i64 execFinishTime
|
190
|
+
4: double progress
|
191
|
+
5: i64 startTime
|
192
|
+
6: bool failed
|
193
|
+
7: bool complete
|
194
|
+
8: ThriftTaskID taskID
|
195
|
+
9: list<ThriftTaskAttemptID> tasks
|
196
|
+
/** TaskAttemptID (string) to ThriftTaskStatus map */
|
197
|
+
10: map<string,ThriftTaskStatus> taskStatuses
|
198
|
+
11: map<string,list<string>> taskDiagnosticData
|
199
|
+
12: ThriftGroupList counters
|
200
|
+
/* The last state reported (from TaskReport) */
|
201
|
+
13: string mostRecentState
|
202
|
+
/* The set of attempts that are currently running - could be empty. */
|
203
|
+
14: list<string> runningAttempts
|
204
|
+
/* The id of the successful attempt. If not complete, this field is meaningless */
|
205
|
+
15: string successfulAttempt
|
206
|
+
}
|
207
|
+
|
208
|
+
/** TaskTracker status; contains details of individual tasks */
|
209
|
+
struct ThriftTaskTrackerStatus {
|
210
|
+
1: string trackerName
|
211
|
+
2: string host
|
212
|
+
3: i32 httpPort
|
213
|
+
4: i32 failureCount
|
214
|
+
|
215
|
+
/** List of the state of all tasks on this tracker */
|
216
|
+
5: list<ThriftTaskStatus> taskReports
|
217
|
+
|
218
|
+
/** When did the JobTracker last hear from this TaskTracker? */
|
219
|
+
6: i64 lastSeen
|
220
|
+
|
221
|
+
/** Maximum possible number of both task types */
|
222
|
+
7: i32 maxMapTasks
|
223
|
+
8: i32 maxReduceTasks
|
224
|
+
|
225
|
+
/** Main memory metrics, all in bytes */
|
226
|
+
9: i64 totalVirtualMemory
|
227
|
+
11: i64 totalPhysicalMemory
|
228
|
+
13: i64 availableSpace
|
229
|
+
|
230
|
+
/** Currently running and unassigned map and reduce tasks */
|
231
|
+
14: i32 mapCount
|
232
|
+
15: i32 reduceCount
|
233
|
+
}
|
234
|
+
|
235
|
+
/** Container structure for TaskTrackerStatus objects */
|
236
|
+
struct ThriftTaskTrackerStatusList {
|
237
|
+
1: list<ThriftTaskTrackerStatus> trackers
|
238
|
+
}
|
239
|
+
|
240
|
+
/** States that the jobtracker may be in */
|
241
|
+
enum JobTrackerState {
|
242
|
+
INITIALIZING,
|
243
|
+
RUNNING
|
244
|
+
}
|
245
|
+
|
246
|
+
/** Enum version of the ints in JobStatus */
|
247
|
+
enum ThriftJobState {
|
248
|
+
RUNNING = 1,
|
249
|
+
SUCCEEDED = 2,
|
250
|
+
FAILED = 3,
|
251
|
+
PREP = 4,
|
252
|
+
KILLED = 5
|
253
|
+
}
|
254
|
+
|
255
|
+
/** Status of a job */
|
256
|
+
struct ThriftJobStatus {
|
257
|
+
1: ThriftJobID jobID
|
258
|
+
2: double mapProgress
|
259
|
+
3: double reduceProgress
|
260
|
+
4: double cleanupProgress
|
261
|
+
5: double setupProgress
|
262
|
+
6: ThriftJobState runState
|
263
|
+
7: i64 startTime
|
264
|
+
8: string user
|
265
|
+
9: ThriftJobPriority priority
|
266
|
+
10: string schedulingInfo
|
267
|
+
}
|
268
|
+
|
269
|
+
/** Job metadata */
|
270
|
+
struct ThriftJobProfile {
|
271
|
+
1: string user
|
272
|
+
2: ThriftJobID jobID
|
273
|
+
3: string jobFile
|
274
|
+
4: string name
|
275
|
+
5: string queueName
|
276
|
+
}
|
277
|
+
|
278
|
+
/**
|
279
|
+
* Container structure of a list of tasks. This list may have been put together
|
280
|
+
* according to some selection criteria. That is, it may not correspond to the
|
281
|
+
* mapTasks, or reduceTasks, etc. It may even contain tasks of different types.
|
282
|
+
*/
|
283
|
+
struct ThriftTaskInProgressList {
|
284
|
+
/** A (possibly incomplete) list of tasks */
|
285
|
+
1: list<ThriftTaskInProgress> tasks
|
286
|
+
/** The total number of tasks in this full list. */
|
287
|
+
2: i32 numTotalTasks
|
288
|
+
}
|
289
|
+
|
290
|
+
/** Status of *all* jobs, not just currently running ones */
|
291
|
+
struct ThriftJobInProgress {
|
292
|
+
1: ThriftJobProfile profile
|
293
|
+
2: ThriftJobStatus status
|
294
|
+
3: ThriftJobID jobID
|
295
|
+
4: i32 desiredMaps
|
296
|
+
5: i32 desiredReduces
|
297
|
+
6: i32 finishedMaps
|
298
|
+
7: i32 finishedReduces
|
299
|
+
8: ThriftJobPriority priority
|
300
|
+
|
301
|
+
11: i64 startTime
|
302
|
+
12: i64 finishTime
|
303
|
+
13: i64 launchTime
|
304
|
+
|
305
|
+
23: ThriftTaskInProgressList tasks
|
306
|
+
}
|
307
|
+
|
308
|
+
/** Container structure of a list of jobs, in case we ever want to add metadata */
|
309
|
+
struct ThriftJobList {
|
310
|
+
1: list<ThriftJobInProgress> jobs
|
311
|
+
}
|
312
|
+
|
313
|
+
/** Container structure for job counts for a given user */
|
314
|
+
struct ThriftUserJobCounts {
|
315
|
+
1: i32 nPrep,
|
316
|
+
2: i32 nRunning,
|
317
|
+
3: i32 nSucceeded,
|
318
|
+
4: i32 nFailed,
|
319
|
+
5: i32 nKilled
|
320
|
+
}
|
321
|
+
|
322
|
+
/** Status of the cluster as viewed by the jobtracker */
|
323
|
+
struct ThriftClusterStatus {
|
324
|
+
1: i32 numActiveTrackers
|
325
|
+
2: list<string> activeTrackerNames
|
326
|
+
3: list<string> blacklistedTrackerNames
|
327
|
+
4: i32 numBlacklistedTrackers
|
328
|
+
5: i32 numExcludedNodes
|
329
|
+
|
330
|
+
/* How often does the JobTracker check for expired tasks with the taskTracker */
|
331
|
+
6: i64 taskTrackerExpiryInterval
|
332
|
+
|
333
|
+
7: i32 mapTasks;
|
334
|
+
8: i32 reduceTasks
|
335
|
+
9: i32 maxMapTasks
|
336
|
+
10: i32 maxReduceTasks
|
337
|
+
11: JobTrackerState state
|
338
|
+
|
339
|
+
/** Used and max memory for the cluster, in bytes */
|
340
|
+
12: i64 usedMemory
|
341
|
+
13: i64 maxMemory
|
342
|
+
|
343
|
+
14: i32 totalSubmissions
|
344
|
+
|
345
|
+
/* True if the JobTracker has restarted */
|
346
|
+
15: bool hasRestarted
|
347
|
+
|
348
|
+
/* True if the JobTracker has finished recovering after a restart */
|
349
|
+
16: bool hasRecovered
|
350
|
+
|
351
|
+
17: i64 startTime
|
352
|
+
18: string hostname
|
353
|
+
19: string identifier
|
354
|
+
|
355
|
+
20: i32 httpPort
|
356
|
+
}
|
357
|
+
|
358
|
+
/** Merely an indicator that job wasn't found. */
|
359
|
+
exception JobNotFoundException {
|
360
|
+
}
|
361
|
+
|
362
|
+
/** Merely an indicator that task wasn't found. */
|
363
|
+
exception TaskNotFoundException {
|
364
|
+
}
|
365
|
+
|
366
|
+
/** Indicates that a task attempt wasn't found */
|
367
|
+
exception TaskAttemptNotFoundException {
|
368
|
+
}
|
369
|
+
|
370
|
+
/** Indicates that a tasktracker wasn't found */
|
371
|
+
exception TaskTrackerNotFoundException {
|
372
|
+
}
|
373
|
+
|
374
|
+
/** A proxy service onto a Jobtracker, exposing read-only methods for cluster monitoring */
|
375
|
+
service Jobtracker extends common.HadoopServiceBase {
|
376
|
+
/** Get the name of the tracker exporting this service */
|
377
|
+
string getJobTrackerName(10: common.RequestContext ctx),
|
378
|
+
|
379
|
+
/** Get the current cluster status */
|
380
|
+
ThriftClusterStatus getClusterStatus(10: common.RequestContext ctx),
|
381
|
+
|
382
|
+
/** Get a list of job queues managed by this tracker */
|
383
|
+
ThriftJobQueueList getQueues(10: common.RequestContext ctx)
|
384
|
+
throws(1: common.IOException err),
|
385
|
+
|
386
|
+
/** Get a job by ID */
|
387
|
+
ThriftJobInProgress getJob(10: common.RequestContext ctx, 1: ThriftJobID jobID)
|
388
|
+
throws(1: JobNotFoundException err),
|
389
|
+
|
390
|
+
/** Get a list of currently running jobs */
|
391
|
+
ThriftJobList getRunningJobs(10: common.RequestContext ctx),
|
392
|
+
|
393
|
+
/** Get a list of completed jobs */
|
394
|
+
ThriftJobList getCompletedJobs(10: common.RequestContext ctx),
|
395
|
+
|
396
|
+
/** Get a list of failed (due to error, not killed) jobs */
|
397
|
+
ThriftJobList getFailedJobs(10: common.RequestContext ctx),
|
398
|
+
|
399
|
+
/** Get a list of killed jobs */
|
400
|
+
ThriftJobList getKilledJobs(10: common.RequestContext ctx),
|
401
|
+
|
402
|
+
/** Get a list of all failed, completed and running jobs (could be expensive!) */
|
403
|
+
ThriftJobList getAllJobs(10: common.RequestContext ctx),
|
404
|
+
|
405
|
+
/** Get the count of jobs by status for a given user */
|
406
|
+
ThriftUserJobCounts getUserJobCounts(1: common.RequestContext ctx, 2: string user),
|
407
|
+
|
408
|
+
/** Get a (possibly incomplete) list of tasks */
|
409
|
+
ThriftTaskInProgressList getTaskList(
|
410
|
+
1: common.RequestContext ctx,
|
411
|
+
2: ThriftJobID jobID,
|
412
|
+
3: set<ThriftTaskType> types,
|
413
|
+
4: set<ThriftTaskQueryState> states,
|
414
|
+
5: string text,
|
415
|
+
6: i32 count,
|
416
|
+
7: i32 offset) throws(1: JobNotFoundException err),
|
417
|
+
|
418
|
+
/** Get details of a task */
|
419
|
+
ThriftTaskInProgress getTask(1: common.RequestContext ctx,
|
420
|
+
2: ThriftTaskID taskID)
|
421
|
+
throws(1: JobNotFoundException jnf, 2: TaskNotFoundException tnf),
|
422
|
+
|
423
|
+
/**
|
424
|
+
* Get a list of groups of counters attached to the job with provided id.
|
425
|
+
* This returns the total counters
|
426
|
+
**/
|
427
|
+
ThriftGroupList getJobCounters(10: common.RequestContext ctx,
|
428
|
+
1: ThriftJobID jobID)
|
429
|
+
throws(1: JobNotFoundException err),
|
430
|
+
|
431
|
+
|
432
|
+
/** Return job counters rolled up by map, reduce, and total */
|
433
|
+
ThriftJobCounterRollups getJobCounterRollups(10: common.RequestContext ctx,
|
434
|
+
1: ThriftJobID jobID)
|
435
|
+
throws(1: JobNotFoundException err),
|
436
|
+
|
437
|
+
|
438
|
+
/** Get all active trackers */
|
439
|
+
ThriftTaskTrackerStatusList getActiveTrackers(10: common.RequestContext ctx),
|
440
|
+
|
441
|
+
/** Get all blacklisted trackers */
|
442
|
+
ThriftTaskTrackerStatusList getBlacklistedTrackers(10: common.RequestContext ctx),
|
443
|
+
|
444
|
+
/** Get all trackers */
|
445
|
+
ThriftTaskTrackerStatusList getAllTrackers(10: common.RequestContext ctx),
|
446
|
+
|
447
|
+
/** Get a single task tracker by name */
|
448
|
+
ThriftTaskTrackerStatus getTracker(10: common.RequestContext ctx, 1: string name)
|
449
|
+
throws(1: TaskTrackerNotFoundException tne),
|
450
|
+
|
451
|
+
/** Get the current time in ms according to the JT */
|
452
|
+
i64 getCurrentTime(10: common.RequestContext ctx),
|
453
|
+
|
454
|
+
/** Get the xml for a job's configuration, serialised from the local filesystem on the JT */
|
455
|
+
string getJobConfXML(10: common.RequestContext ctx, 1: ThriftJobID jobID)
|
456
|
+
throws(1: common.IOException err),
|
457
|
+
|
458
|
+
/** Kill a job */
|
459
|
+
void killJob(10: common.RequestContext ctx, 1: ThriftJobID jobID)
|
460
|
+
throws(1: common.IOException err, 2: JobNotFoundException jne),
|
461
|
+
|
462
|
+
/** Kill a task attempt */
|
463
|
+
void killTaskAttempt(10: common.RequestContext ctx, 1: ThriftTaskAttemptID attemptID)
|
464
|
+
throws(1: common.IOException err,
|
465
|
+
2: TaskAttemptNotFoundException tne,
|
466
|
+
3: JobNotFoundException jne),
|
467
|
+
|
468
|
+
/** Set a job's priority */
|
469
|
+
void setJobPriority(10: common.RequestContext ctx,
|
470
|
+
1: ThriftJobID jobID,
|
471
|
+
2: ThriftJobPriority priority)
|
472
|
+
throws(1: common.IOException err, 2: JobNotFoundException jne),
|
473
|
+
|
474
|
+
/** Get an MR delegation token. */
|
475
|
+
common.ThriftDelegationToken getDelegationToken(10:common.RequestContext ctx, 1:string renewer) throws(1: common.IOException err)
|
476
|
+
}
|
477
|
+
|
478
|
+
|