hodor 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/Guardfile +11 -0
- data/README.md +105 -0
- data/Rakefile +105 -0
- data/bin/hodor +18 -0
- data/hodor.gemspec +47 -0
- data/lib/config/log4r_config.xml +35 -0
- data/lib/hodor.rb +83 -0
- data/lib/hodor/api/hdfs.rb +222 -0
- data/lib/hodor/api/oozie.rb +215 -0
- data/lib/hodor/api/oozie/action.rb +52 -0
- data/lib/hodor/api/oozie/bundle.rb +27 -0
- data/lib/hodor/api/oozie/coordinator.rb +53 -0
- data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
- data/lib/hodor/api/oozie/job.rb +192 -0
- data/lib/hodor/api/oozie/materialization.rb +56 -0
- data/lib/hodor/api/oozie/query.rb +115 -0
- data/lib/hodor/api/oozie/session.rb +170 -0
- data/lib/hodor/api/oozie/workflow.rb +58 -0
- data/lib/hodor/cli.rb +146 -0
- data/lib/hodor/command.rb +164 -0
- data/lib/hodor/configuration.rb +80 -0
- data/lib/hodor/environment.rb +437 -0
- data/lib/hodor/ui/table.rb +130 -0
- data/lib/hodor/version.rb +3 -0
- data/lib/tasks/hdfs.thor +138 -0
- data/lib/tasks/master.thor +61 -0
- data/lib/tasks/oozie.thor +399 -0
- data/lib/tasks/sandbox.thor +87 -0
- data/spec/integration/api/oozie/action_spec.rb +69 -0
- data/spec/integration/api/oozie/bundle_spec.rb +33 -0
- data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
- data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
- data/spec/integration/api/oozie/job_spec.rb +15 -0
- data/spec/integration/api/oozie/materialization_spec.rb +66 -0
- data/spec/integration/api/oozie/query_spec.rb +43 -0
- data/spec/integration/api/oozie/session_spec.rb +18 -0
- data/spec/integration/api/oozie/workflow_spec.rb +65 -0
- data/spec/integration/api/oozie_spec.rb +198 -0
- data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
- data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
- data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
- data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
- data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
- data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
- data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
- data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
- data/spec/spec_helper.rb +92 -0
- data/spec/support/d_v_r.rb +125 -0
- data/spec/support/hodor_api.rb +15 -0
- data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
- data/spec/unit/hodor/api/oozie_spec.rb +32 -0
- data/spec/unit/hodor/environment_spec.rb +52 -0
- data/topics/hdfs/corresponding_paths.txt +31 -0
- data/topics/hdfs/overview.txt +10 -0
- data/topics/master/clusters.yml.txt +36 -0
- data/topics/master/overview.txt +17 -0
- data/topics/oozie/blocking_coordinators.txt +46 -0
- data/topics/oozie/composing_job_properties.txt +68 -0
- data/topics/oozie/display_job.txt +52 -0
- data/topics/oozie/driver_scenarios.txt +42 -0
- data/topics/oozie/inspecting_jobs.txt +59 -0
- data/topics/oozie/jobs.yml.txt +185 -0
- data/topics/oozie/overview.txt +43 -0
- data/topics/oozie/workers_and_drivers.txt +40 -0
- metadata +455 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'terminal-table'
|
2
|
+
|
3
|
+
module Hodor
|
4
|
+
class Table
|
5
|
+
|
6
|
+
def initialize(object, verbose = false, matching = nil)
|
7
|
+
@verbose = verbose
|
8
|
+
if object.respond_to?(:session)
|
9
|
+
@verbose ||= object.session.verbose
|
10
|
+
end
|
11
|
+
@matching = matching
|
12
|
+
# Display properties first
|
13
|
+
properties = object.display_properties
|
14
|
+
@title = object.respond_to?(:title) ? object.title : "#{object.class.name} Properties"
|
15
|
+
if properties
|
16
|
+
rows = properties[:rows]
|
17
|
+
if rows.length < 5
|
18
|
+
@prop_table = Terminal::Table.new(properties)
|
19
|
+
@prop_table.align_column 0, :right
|
20
|
+
else
|
21
|
+
|
22
|
+
if @verbose
|
23
|
+
terse_rows = rows.select { |row| row[1].length <= 50 }
|
24
|
+
else
|
25
|
+
terse_rows = rows.select { |row| !row[1].nil? && row[1].length > 0 && row[1].length <= 50 }
|
26
|
+
end
|
27
|
+
verbose_rows = rows.select { |row| row[1].length > 50 }.map { |row| normalize(row) }
|
28
|
+
sorted_rows = terse_rows.sort_by { |row| -row[1].length }
|
29
|
+
|
30
|
+
numrows = (sorted_rows.length / 3).to_i
|
31
|
+
arranged = []
|
32
|
+
slen = terse_rows.length
|
33
|
+
(0..numrows-1).each { |rownum|
|
34
|
+
mcol = []
|
35
|
+
mcol += normalize(sorted_rows[(rownum%numrows)]) if slen > (rownum%numrows)
|
36
|
+
mcol += normalize(sorted_rows[(rownum%numrows)+numrows]) if slen > (rownum%numrows)+numrows
|
37
|
+
mcol += normalize(sorted_rows[(rownum%numrows)+2*numrows]) if slen > (rownum%numrows)+2*numrows
|
38
|
+
arranged << mcol
|
39
|
+
}
|
40
|
+
short_compound = []
|
41
|
+
|
42
|
+
if @verbose
|
43
|
+
@long_table = Terminal::Table.new( { rows:verbose_rows} )
|
44
|
+
@long_table.align_column 0, :right
|
45
|
+
@long_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
|
46
|
+
end
|
47
|
+
|
48
|
+
@prop_table = Terminal::Table.new( {rows: arranged} )
|
49
|
+
@prop_table.align_column 0, :right
|
50
|
+
@prop_table.align_column 2, :right
|
51
|
+
@prop_table.align_column 4, :right
|
52
|
+
@prop_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Next display the table of children
|
57
|
+
rowcol = object.display_children
|
58
|
+
if rowcol && rowcol[:rows] && rowcol[:rows].length > 0
|
59
|
+
@child_table = Terminal::Table.new(rowcol)
|
60
|
+
@child_table.align_column 0, :center
|
61
|
+
else
|
62
|
+
@child_table = Terminal::Table.new(rows: [[@prop_table ? "<< No Children >>" : "<< Empty Set >>"]])
|
63
|
+
@child_table.align_column 0, :center
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def normalize(row)
|
68
|
+
[row[0].to_s.split('_').map { |word| word.capitalize }.join(' ') + ":", row[1].to_s.length > 0 ? row[1] : '<nil>']
|
69
|
+
end
|
70
|
+
|
71
|
+
def shift table, count=3
|
72
|
+
shifted = ""
|
73
|
+
table.each_line { |line|
|
74
|
+
shifted << " "*count + line
|
75
|
+
}
|
76
|
+
shifted
|
77
|
+
end
|
78
|
+
|
79
|
+
def properties
|
80
|
+
output = @prop_table ? @prop_table.to_s : ''
|
81
|
+
output = shift(output,1)
|
82
|
+
stripped_output = ""
|
83
|
+
first_line = true
|
84
|
+
output.each_line { |line|
|
85
|
+
stripped_output << line unless first_line
|
86
|
+
first_line = false
|
87
|
+
}
|
88
|
+
stripped_output.rstrip
|
89
|
+
end
|
90
|
+
|
91
|
+
def long_properties
|
92
|
+
output = @long_table ? @long_table.to_s : ''
|
93
|
+
shift(output, 4)
|
94
|
+
end
|
95
|
+
|
96
|
+
def children
|
97
|
+
output = @child_table ? @child_table.to_s : ''
|
98
|
+
shift(output, 4)
|
99
|
+
end
|
100
|
+
|
101
|
+
def to_s
|
102
|
+
prop_width = (properties.split("\n").first||"").length
|
103
|
+
children_width = (children.split("\n").first||"").length
|
104
|
+
title_width = @title.length
|
105
|
+
if prop_width > 0
|
106
|
+
ruler = [((prop_width - title_width) / 2).to_i - 5, 0].max
|
107
|
+
output = " #{'-'*ruler} #{@title} #{'-'*ruler}\n"
|
108
|
+
elsif children_width > 0
|
109
|
+
ruler = [((children_width - title_width) / 2).to_i - 5, 0].max
|
110
|
+
output = " #{' '*ruler} #{@title} #{' '*ruler}\n"
|
111
|
+
end
|
112
|
+
output += properties + "\n" if @prop_table
|
113
|
+
output += long_properties + "\n" if @long_table
|
114
|
+
if @matching
|
115
|
+
child_lines = children.split("\n")
|
116
|
+
total_lines = child_lines.length
|
117
|
+
child_lines.each_with_index { |line, index|
|
118
|
+
matched = index < 3 || index == total_lines-1
|
119
|
+
@matching.each { |item|
|
120
|
+
matched ||= line.include?(item)
|
121
|
+
} unless matched
|
122
|
+
output += "#{line}\n" if matched
|
123
|
+
}
|
124
|
+
else
|
125
|
+
output += children
|
126
|
+
end
|
127
|
+
output
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/tasks/hdfs.thor
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
require "hodor/api/hdfs"
|
2
|
+
|
3
|
+
module Hodor
|
4
|
+
module Cli
|
5
|
+
class Hdfs < ::Hodor::Command
|
6
|
+
|
7
|
+
no_tasks do
|
8
|
+
|
9
|
+
def hdfs
|
10
|
+
::Hodor::Hdfs.instance
|
11
|
+
end
|
12
|
+
|
13
|
+
def intercept_dispatch(command, trailing)
|
14
|
+
hadoop_command("hadoop fs", trailing) if command == :fs
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.help(shell, subcommand = false)
|
18
|
+
shell.print_wrapped(load_topic('overview'), indent: 0)
|
19
|
+
result = super
|
20
|
+
|
21
|
+
more_help = %Q[Getting More Help:
|
22
|
+
------------------
|
23
|
+
To get detailed help on specific Hdfs commands (i.e. put_dir), run:
|
24
|
+
|
25
|
+
$ hodor help hdfs:put_dir
|
26
|
+
$ hodor hdfs:help put_dir # alternate, works the same
|
27
|
+
|
28
|
+
].unindent(10)
|
29
|
+
shell.say more_help
|
30
|
+
result
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
desc "fs <arguments>", %q{
|
35
|
+
Passes its arguments is-as to remote host, and runs 'hadoop fs <arguments>'
|
36
|
+
}.gsub(/^\s+/, "").strip
|
37
|
+
long_desc <<-LONGDESC
|
38
|
+
Executes the hadoop fs command on the remote host configured as the master,
|
39
|
+
using SSH. The arguments passed to this command are passed directly
|
40
|
+
through to the ssh command and executed as-is on the remote host. Because
|
41
|
+
this a pass-through command, anything the remote tool can do, is available
|
42
|
+
through this facility. So, refer to Apache's documentation on its 'hadoop fs'
|
43
|
+
command line tool for details on the sub-commands and arguments it supports.
|
44
|
+
|
45
|
+
Example:
|
46
|
+
|
47
|
+
$ hodor hdfs:fs -ls -R /shared/jars
|
48
|
+
LONGDESC
|
49
|
+
def fs
|
50
|
+
# handled by intercept_dispatch
|
51
|
+
end
|
52
|
+
|
53
|
+
desc "users", %q{
|
54
|
+
Run an 'hdfs ls' command on the /user directory to list users on HDFS
|
55
|
+
}.gsub(/^\s+/, "").strip
|
56
|
+
def users
|
57
|
+
env.ssh "hadoop fs -ls /user",
|
58
|
+
echo: true, echo_cmd: true
|
59
|
+
end
|
60
|
+
|
61
|
+
desc "rm <filename>", "Removes <filename> from corresponding path on HDFS"
|
62
|
+
def rm(filename)
|
63
|
+
logger.info "Removing #{filename}"
|
64
|
+
hdfs.rm(filename)
|
65
|
+
end
|
66
|
+
|
67
|
+
desc "rm_rf <directory>", "Recursively removes <directory> from corresponding path on HDFS"
|
68
|
+
def rm_rf(path)
|
69
|
+
logger.info "Removing directory #{path} recursively..."
|
70
|
+
hdfs.rm_rf(path)
|
71
|
+
end
|
72
|
+
|
73
|
+
desc "ls [<paths> ...]", "Shows a directory listing of the corresponding path on HDFS"
|
74
|
+
def ls(*paths)
|
75
|
+
paths << "." if paths.length == 0
|
76
|
+
hdfs_paths = paths.inject([]) { |memo, path|
|
77
|
+
memo << hdfs.path_on_hdfs(path)
|
78
|
+
}
|
79
|
+
env.ssh "hadoop fs -ls #{hdfs_paths.join(' ')}",
|
80
|
+
echo: true, echo_cmd: true
|
81
|
+
end
|
82
|
+
|
83
|
+
desc "cat", "Dump contents of file at the corresponding path on HDFS to STDOUT"
|
84
|
+
def cat(filename)
|
85
|
+
env.ssh "hadoop fs -cat #{hdfs.path_on_hdfs(filename)}",
|
86
|
+
echo: true, echo_cmd: true
|
87
|
+
end
|
88
|
+
|
89
|
+
desc "put_dir <path>", "Uploads (recursively) the directory at <path> to corresponding path on HDFS"
|
90
|
+
method_option :dryrun, type: :boolean, aliases: "-d", default: false,
|
91
|
+
desc: "Don't actually deploy the files, just show what would be deployed"
|
92
|
+
method_option :clean, type: :boolean, aliases: "-c", default: false,
|
93
|
+
desc: "Clean the hdfs target before deploying this directory"
|
94
|
+
method_option :verbose, type: :boolean, aliases: "-v", default: false,
|
95
|
+
desc: "Log verbose details about which files are deployed and to where"
|
96
|
+
method_option :maxdepth, type: :string, aliases: "-m", default: nil,
|
97
|
+
desc: "The maximum number of directories deep to copy to HDFS"
|
98
|
+
def put_dir(dirpath)
|
99
|
+
hdfs.put_dir dirpath, options
|
100
|
+
end
|
101
|
+
|
102
|
+
desc "put <filename>", "Uploads <filename> to the corresponding path on HDFS"
|
103
|
+
def put(filename)
|
104
|
+
hdfs.put_file(filename)
|
105
|
+
end
|
106
|
+
|
107
|
+
desc "get <filename>", "Downloads <filename> from the corresponding path on HDFS"
|
108
|
+
method_option :diff, type: :boolean, aliases: "-d", default: false,
|
109
|
+
desc: "After downloading <filename>, a diff is run between local and remote versions"
|
110
|
+
def get(filename)
|
111
|
+
hdfs.get_file(filename)
|
112
|
+
if options[:diff]
|
113
|
+
env.run_local %Q[diff #{filename} #{filename}.hdfs_copy], echo: true, echo_cmd: true
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
desc "touchz", "Creates a file of zero length at the corresponding path on HDFS"
|
118
|
+
def touchz(filename)
|
119
|
+
env.ssh "hadoop fs -touchz #{hdfs.path_on_hdfs(filename)}",
|
120
|
+
echo: true, echo_cmd: true
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "pwd", "Displays both your local and HDFS working directories, and how they correspond"
|
124
|
+
def pwd
|
125
|
+
logger.info "Path on localhost : [#{env.path_on_disc('.')}]"
|
126
|
+
logger.info "Path on Git repo : [#{env.path_on_github('.')}]"
|
127
|
+
logger.info "Path on HDFS : [#{hdfs.path_on_hdfs('.')}]"
|
128
|
+
end
|
129
|
+
|
130
|
+
desc "path_of", "Displays the path of the specified file or directory"
|
131
|
+
def path_of(path)
|
132
|
+
logger.info "Path on local disc: [#{env.path_on_disc(path)}]"
|
133
|
+
logger.info "Path on GitHub: [#{env.path_on_github(path)}]"
|
134
|
+
logger.info "Path on HDFS: [#{hdfs.path_on_hdfs(path)}]"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
|
2
|
+
module Hodor
|
3
|
+
module Cli
|
4
|
+
|
5
|
+
class Master < ::Hodor::Command
|
6
|
+
|
7
|
+
no_tasks do
|
8
|
+
def intercept_dispatch(command, trailing)
|
9
|
+
case command
|
10
|
+
when :exec
|
11
|
+
hadoop_command("-T", trailing)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.help(shell, subcommand = false)
|
16
|
+
shell.print_wrapped(load_topic('overview'), indent: 0)
|
17
|
+
result = super
|
18
|
+
|
19
|
+
more_help = %Q[Getting More Help:
|
20
|
+
------------------
|
21
|
+
To get detailed help on specific Master commands (i.e. config), run:
|
22
|
+
|
23
|
+
$ hodor help master:config
|
24
|
+
$ hodor master:help config # alternate, works the same
|
25
|
+
|
26
|
+
].unindent(10)
|
27
|
+
shell.say more_help
|
28
|
+
result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "config", "List all known variable expansions for the target Hadoop environment"
|
33
|
+
def config
|
34
|
+
env.settings.each_pair { |k,v|
|
35
|
+
logger.info "#{k} : #{v}"
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
desc "exec <arguments>", %q{
|
40
|
+
Pass through command that executes its arguments on the remote master via ssh
|
41
|
+
}.gsub(/^\s+/, "").strip
|
42
|
+
long_desc <<-LONGDESC
|
43
|
+
Executes the shell command on the remote host configured as the master,
|
44
|
+
ussing SSH. The arguments passed to this command are passed directly
|
45
|
+
through to the ssh command and executed as-is on the remote host.
|
46
|
+
|
47
|
+
Example Usage:
|
48
|
+
|
49
|
+
$ hodor master:exec hostname -I
|
50
|
+
LONGDESC
|
51
|
+
def exec
|
52
|
+
# handled by intercept_dispatch
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "ssh_config", "Echo the SSH connection string for the selected hadoop cluster"
|
56
|
+
def ssh_config
|
57
|
+
puts env.ssh_addr
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,399 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Hodor
|
4
|
+
module Cli
|
5
|
+
|
6
|
+
class Appendix < Thor
|
7
|
+
|
8
|
+
end
|
9
|
+
|
10
|
+
class Oozie < ::Hodor::Command
|
11
|
+
no_tasks do
|
12
|
+
|
13
|
+
def oozie
|
14
|
+
::Hodor::Oozie
|
15
|
+
end
|
16
|
+
|
17
|
+
def intercept_dispatch(command, trailing)
|
18
|
+
case command
|
19
|
+
when :jobs
|
20
|
+
hadoop_command("oozie jobs", trailing)
|
21
|
+
when :job
|
22
|
+
hadoop_command("oozie job", trailing)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.help(shell, subcommand = false)
|
27
|
+
shell.print_wrapped(load_topic("overview"), indent: 0)
|
28
|
+
result = super
|
29
|
+
|
30
|
+
more_help = %Q[Getting More Help:
|
31
|
+
------------------
|
32
|
+
To get detailed help on specific Oozie commands (i.e. display_job), run:
|
33
|
+
|
34
|
+
$ hodor help oozie:display_job
|
35
|
+
$ hodor oozie:help display_job # alternate, works the same
|
36
|
+
|
37
|
+
To view information on one of the Oozie topics (i.e. driver scenarios), run:
|
38
|
+
|
39
|
+
$ hodor oozie:topic driver_scenarios
|
40
|
+
|
41
|
+
And to see a list of Oozie topics that are available for display, run:
|
42
|
+
|
43
|
+
$ hodor oozie:topics
|
44
|
+
].unindent(10)
|
45
|
+
shell.say more_help
|
46
|
+
result
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "jobs <arguments>", %q{
|
51
|
+
Pass through command that executes its arguments on the remote master using 'oozie jobs <arguments>'
|
52
|
+
}.gsub(/^\s+/, "").strip
|
53
|
+
long_desc <<-LONGDESC
|
54
|
+
Executes the 'oozie jobs' command on the remote master using SSH. The arguments
|
55
|
+
passed to this command are pass through as-is to the SSH-based command-line.
|
56
|
+
|
57
|
+
Example Usage:
|
58
|
+
|
59
|
+
$ hodor oozie:jobs -oozie http://my.company.com:8080/oozie -localtime -len 2 -fliter status=RUNNING
|
60
|
+
LONGDESC
|
61
|
+
def jobs
|
62
|
+
# handled by intercept_dispatch
|
63
|
+
end
|
64
|
+
|
65
|
+
desc "job <arguments>", %q{
|
66
|
+
Pass through command that executes its arguments on the remote master using 'oozie job <arguments>'
|
67
|
+
}.gsub(/^\s+/, "").strip
|
68
|
+
long_desc <<-LONGDESC
|
69
|
+
Executes the 'oozie job' command on the remote master using SSH. The arguments
|
70
|
+
passed to this command are pass through as-is to the SSH-based command-line.
|
71
|
+
|
72
|
+
Example Usage:
|
73
|
+
|
74
|
+
$ hodor oozie:job -oozie http://my.company.com:8080/oozie -start 14-20090525161321-oozie-joe
|
75
|
+
LONGDESC
|
76
|
+
def job
|
77
|
+
# handled by intercept_dispatch
|
78
|
+
end
|
79
|
+
|
80
|
+
desc "change_job [JOB PATH]", %q{
|
81
|
+
Changes to a different job within the hierarhcy of Oozie jobs
|
82
|
+
}.gsub(/^\s+/, "").strip
|
83
|
+
long_desc %Q[
|
84
|
+
The change_job command changes the "current_job" but does not display anything. Use
|
85
|
+
the "display_job" command to display information about the job change_job
|
86
|
+
just made current. The change_job command can of course take a job id as
|
87
|
+
argument:
|
88
|
+
\x5 $ hodor oozie:change_job 0004729-150629212824266-oozie-oozi-C
|
89
|
+
|
90
|
+
However, other "special" arguments are also allowed:
|
91
|
+
\x5 $ hodor oozie:change_job .. # Change to parent of current job
|
92
|
+
$ hodor oozie:change_job 3 # Change to the child with index 3
|
93
|
+
$ hodor oozie:change_job / # Change to list of topmost coordinators
|
94
|
+
# same as oozie:coordinators
|
95
|
+
|
96
|
+
Suggested Alias:
|
97
|
+
\x5 $ alias cj='hodor oozie:change_job'
|
98
|
+
].unindent(8)
|
99
|
+
def change_job(*job_path)
|
100
|
+
oozie.session.verbose = options[:verbose]
|
101
|
+
oozie.session.len = options[:len] if options[:len]
|
102
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
103
|
+
oozie.change_job(job_path[0])
|
104
|
+
end
|
105
|
+
|
106
|
+
desc "display_job [JOB PATH] [attribute] [options]", %q{
|
107
|
+
Display information about the 'current' job within the Oozie hierarhcy of jobs
|
108
|
+
}.gsub(/^\s+/, "").strip
|
109
|
+
long_desc load_topic("display_job")
|
110
|
+
method_option :query, type: :boolean, aliases: "-q", default: false,
|
111
|
+
desc: "Only query the job for information, but do not change to it"
|
112
|
+
method_option :verbose, type: :boolean, aliases: "-v",
|
113
|
+
desc: "Display all available information"
|
114
|
+
method_option :killed, type: :boolean, aliases: "-k",
|
115
|
+
desc: "Only display killed coordinator materializations"
|
116
|
+
method_option :succeded, type: :boolean, aliases: "-s",
|
117
|
+
desc: "Only display succeeded coordinator materializations"
|
118
|
+
method_option :len, type: :numeric, aliases: "-l", default: nil,
|
119
|
+
desc: "number of matching workflows to display"
|
120
|
+
method_option :offset, type: :numeric, aliases: "-o", default: 0,
|
121
|
+
desc: "The coordinator to start with in listing matching workflows"
|
122
|
+
method_option :match, type: :array, aliases: "-m", default: nil,
|
123
|
+
desc: "Array of words to match in config properties keys and values"
|
124
|
+
method_option :write, type: :string, aliases: "-w", default: nil,
|
125
|
+
desc: "Name of file to write the output of this command into"
|
126
|
+
def display_job(*args)
|
127
|
+
oozie.session.len = options[:len] if options[:len]
|
128
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
129
|
+
query_mode = options[:query] || env.prefs[:display_job_query_mode]
|
130
|
+
job_id = "."
|
131
|
+
aspect = "info"
|
132
|
+
args.each { |arg|
|
133
|
+
if arg =~ /^[0-9]{1,4}$/ || # Index form of job id
|
134
|
+
arg =~ /^[0-9]{5,8}\-[0-9]{10,18}\-oozie/ || # Oozie form
|
135
|
+
arg =~ /job_[0-9]{5,20}/ || # Hadoop mapred form
|
136
|
+
arg.eql?('..') || arg.eql?('/')
|
137
|
+
job_id = arg
|
138
|
+
else
|
139
|
+
aspect = arg
|
140
|
+
end
|
141
|
+
}
|
142
|
+
|
143
|
+
if aspect.eql?("info")
|
144
|
+
filter = []
|
145
|
+
filter << :killed if options[:killed]
|
146
|
+
filter << :succeeded if options[:succeeded]
|
147
|
+
job = oozie.job_by_path(job_id, !query_mode, filter)
|
148
|
+
table = ::Hodor::Table.new(job, options[:verbose], options[:match])
|
149
|
+
doc = table.to_s
|
150
|
+
else
|
151
|
+
job = oozie.job_by_path(job_id, !query_mode)
|
152
|
+
end
|
153
|
+
|
154
|
+
if aspect.eql?("props") || aspect.eql?("conf")
|
155
|
+
if options[:match]
|
156
|
+
doc = job.conf_map.match(options[:match]).awesome_inspect(plain: !options[:write].nil?)
|
157
|
+
else
|
158
|
+
doc = job.conf_map.awesome_inspect(plain: !options[:write].nil?)
|
159
|
+
end
|
160
|
+
elsif aspect.eql?("log")
|
161
|
+
doc = job.log
|
162
|
+
elsif aspect.eql?("rest") || aspect.eql?("request") || aspect.eql?("call")
|
163
|
+
say job.rest_call
|
164
|
+
elsif aspect.eql?("json")
|
165
|
+
json = job.json
|
166
|
+
doc = "REST CALL = #{::Hodor::Oozie::Session.instance.last_query}"
|
167
|
+
doc << ::JSON.pretty_generate(json)
|
168
|
+
elsif aspect.eql?("def") || aspect.eql?("definition")
|
169
|
+
doc = job.definition
|
170
|
+
else
|
171
|
+
end
|
172
|
+
local_filename = options[:write]
|
173
|
+
if !local_filename.nil?
|
174
|
+
File.open(local_filename, 'w') {|f| f.write(doc) }
|
175
|
+
else
|
176
|
+
say doc
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
desc "pwj", "Displays information about which job is 'current' within the hierarchy of Oozie jobs"
|
181
|
+
def pwj
|
182
|
+
say "Current Job ID: #{oozie.session.current_id || 'nil'}"
|
183
|
+
say "Parent Job ID: #{oozie.session.parent_id || 'nil'}"
|
184
|
+
say "Most Recent Job Query: #{oozie.session.root_query || 'nil'}"
|
185
|
+
end
|
186
|
+
|
187
|
+
desc "ssh_display_job [JOB_ID]", %q{
|
188
|
+
Legacy version of display_job that is based on SSH, rather than REST
|
189
|
+
}.gsub(/^\s+/, "").strip
|
190
|
+
method_option :definition, type: :boolean, aliases: "-d",
|
191
|
+
desc: "Display the definition of the specified job"
|
192
|
+
method_option :info, type: :boolean, aliases: "-i",
|
193
|
+
desc: "Display information about the specified job"
|
194
|
+
method_option :log, type: :boolean, aliases: "-l",
|
195
|
+
desc: "Display the log file for the specified job"
|
196
|
+
method_option :configcontent, type: :boolean, aliases: "-c",
|
197
|
+
desc: "Display the variable-expanded config for the specified job"
|
198
|
+
def ssh_display_job(job_id)
|
199
|
+
if job_id.start_with?('job_')
|
200
|
+
hadoop_id = job_id.sub('job_','')
|
201
|
+
trash = hadoop_id.index(/[^0-9_]/)
|
202
|
+
hadoop_id = hadoop_id[0..trash-1] if trash
|
203
|
+
env.ssh "mapred job -logs job_#{hadoop_id} attempt_#{hadoop_id}_m_000000_0",
|
204
|
+
echo: true, echo_cmd: true
|
205
|
+
else
|
206
|
+
job_id.sub!(/-W.*$/, '-W') unless job_id.include?('-W@')
|
207
|
+
if options[:definition]
|
208
|
+
logger.info "DEFINITION:"
|
209
|
+
env.ssh "oozie job -oozie :oozie_url -definition #{job_id}",
|
210
|
+
echo: true, echo_cmd: true
|
211
|
+
elsif options[:log]
|
212
|
+
logger.info "LOG:"
|
213
|
+
env.ssh "oozie job -oozie :oozie_url -log #{job_id}",
|
214
|
+
echo: true, echo_cmd: true
|
215
|
+
elsif options[:configcontent]
|
216
|
+
logger.info "CONFIG:"
|
217
|
+
env.ssh "oozie job -oozie :oozie_url -configcontent #{job_id}",
|
218
|
+
echo: true, echo_cmd: true
|
219
|
+
else
|
220
|
+
logger.info "INFO:"
|
221
|
+
env.ssh "oozie job -oozie :oozie_url -info #{job_id}",
|
222
|
+
echo: true, echo_cmd: true
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
desc "deploy_job", "Deploy to hdfs the directories that this job depends on"
|
228
|
+
method_option :dryrun, type: :boolean, aliases: "-d", default: false,
|
229
|
+
desc: "Don't actually deploy the files, just show what would be deployed"
|
230
|
+
method_option :clean, type: :boolean, aliases: "-c", default: false,
|
231
|
+
desc: "Clean the hdfs target before deploying this directory"
|
232
|
+
method_option :verbose, type: :boolean, aliases: "-v", default: false,
|
233
|
+
desc: "Log verbose details about which files are deployed and to where"
|
234
|
+
method_option :maxdepth, type: :string, aliases: "-m", default: nil,
|
235
|
+
desc: "The maximum number of directories deep to copy to HDFS"
|
236
|
+
long_desc %Q[
|
237
|
+
The deploy_job command reads the contents of the jobs.yml file located
|
238
|
+
in your current directory, and deploys the paths specified by in the
|
239
|
+
driver's "deploy" key. For a fuller explanation, view the "jobs.yml"
|
240
|
+
topic, as follows:
|
241
|
+
\x5 $ hodor oozie:topic jobs.yml
|
242
|
+
].unindent(8)
|
243
|
+
def deploy_job(*driver)
|
244
|
+
oozie.deploy_job(driver.length > 0 ? driver[0] : nil, options[:clean])
|
245
|
+
end
|
246
|
+
|
247
|
+
desc "run_job", "Run an oozie job on the target hadoop cluster"
|
248
|
+
long_desc %Q[
|
249
|
+
The run_job command reads the contents of the jobs.yml file located
|
250
|
+
in your current directory, composes a job.properties file and submits
|
251
|
+
the indicated driver workflow for execution by Oozie. If a job.properties
|
252
|
+
or job.properties.erb file is provided, that file will be used to interpolate property values.
|
253
|
+
For a fuller explanation, view the "jobs.yml" topic, as follows:
|
254
|
+
\x5 $ hodor oozie:topic jobs.yml
|
255
|
+
].unindent(8)
|
256
|
+
method_option :dry_run, type: :boolean, aliases: "-d", default: false,
|
257
|
+
desc: "Generate computed job.properties file without running or deploying associated job."
|
258
|
+
method_option :file_name_prefix, type: :string, aliases: '-p', default: '',
|
259
|
+
desc: 'Add a prefix to job properties filename. This is primarily for use with :dry_run'
|
260
|
+
def run_job(*args)
|
261
|
+
outfile = oozie.run_job(args.length > 0 ? args[0] : nil, options[:dry_run], options[:file_name_prefix])
|
262
|
+
logger.info "Dry run: the properties file is available for inspection at #{outfile}" if options[:dry_run]
|
263
|
+
end
|
264
|
+
|
265
|
+
desc "kill_job [JOB_ID]", "Kill the oozie job with the specified job id"
|
266
|
+
def kill_job(*job_path)
|
267
|
+
job = oozie.job_by_path(job_path[0])
|
268
|
+
env.ssh "oozie job -oozie :oozie_url -kill #{job.id}",
|
269
|
+
echo: true, echo_cmd: true
|
270
|
+
end
|
271
|
+
|
272
|
+
desc "reauth", "Remove cached auth tokens (sometimes necessary after an oozie restart)"
|
273
|
+
def reauth
|
274
|
+
ssh_command nil, "rm .oozie-auth-token"
|
275
|
+
end
|
276
|
+
|
277
|
+
desc "workflows", "List most recently run workflows, most recent first"
|
278
|
+
method_option :verbose, type: :boolean, aliases: "-v",
|
279
|
+
desc: "Display all available information"
|
280
|
+
method_option :running, type: :boolean, aliases: "-r",
|
281
|
+
desc: "Display running workflows"
|
282
|
+
method_option :killed, type: :boolean, aliases: "-k",
|
283
|
+
desc: "Display killed workflows"
|
284
|
+
method_option :succeeded, type: :boolean, aliases: "-s",
|
285
|
+
desc: "Display succeeded workflows"
|
286
|
+
method_option :failed, type: :boolean, aliases: "-f",
|
287
|
+
desc: "Display failed workflows"
|
288
|
+
method_option :timedout, type: :boolean, aliases: "-t",
|
289
|
+
desc: "Display timedout workflows"
|
290
|
+
method_option :suspended, type: :boolean, aliases: "-p",
|
291
|
+
desc: "Display suspended workflows"
|
292
|
+
method_option :len, type: :numeric, aliases: "-l", default: 30,
|
293
|
+
desc: "number of matching workflows to display"
|
294
|
+
method_option :offset, type: :numeric, aliases: "-o", default: 0,
|
295
|
+
desc: "The coordinator to start with in listing matching workflows"
|
296
|
+
method_option :match, type: :string, aliases: "-m",
|
297
|
+
desc: "Only display workflows that contain the given string as a substring"
|
298
|
+
long_desc %Q[
|
299
|
+
The workflows command uses its options to create a REST query for workflows
|
300
|
+
that match your specification, and presents the results formated as a table.
|
301
|
+
|
302
|
+
Examples:
|
303
|
+
\x5 $ hodor oozie:workflows # displays most recent workflows
|
304
|
+
$ hodor oozie:workflows -v # same as before, but verbose
|
305
|
+
$ hodor oozie:workflows -r # displays running workflows
|
306
|
+
$ hodor oozie:workflows -r -s -k # running, succeeded or killed
|
307
|
+
$ hodor oozie:workflows -l 30 -o 30 # display second 30 most recent
|
308
|
+
$ hodor oozie:workflows -m data_source # display only matching workflows
|
309
|
+
].unindent(8)
|
310
|
+
def workflows
|
311
|
+
oozie.session.verbose = options[:verbose]
|
312
|
+
filter = {}
|
313
|
+
filter[:jobtype] = "wf"
|
314
|
+
filter[:status] = []
|
315
|
+
filter[:status] << :running if options[:running]
|
316
|
+
filter[:status] << :killed if options[:killed]
|
317
|
+
filter[:status] << :succeeded if options[:succeeded]
|
318
|
+
filter[:status] << :failed if options[:failed]
|
319
|
+
filter[:status] << :timedout if options[:timedout]
|
320
|
+
filter[:status] << :suspended if options[:suspended]
|
321
|
+
filter[:status] << :running_first if filter[:status].empty?
|
322
|
+
filter[:match] = options[:match] if options[:match]
|
323
|
+
|
324
|
+
oozie.session.len = options[:len] if options[:len]
|
325
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
326
|
+
|
327
|
+
result = ::Hodor::Oozie::Query.new(filter)
|
328
|
+
table = ::Hodor::Table.new(result, options[:verbose])
|
329
|
+
oozie.session.make_current(result)
|
330
|
+
say table
|
331
|
+
end
|
332
|
+
|
333
|
+
desc "coordinators", "List most recently run coordinators, most recent first"
|
334
|
+
method_option :verbose, type: :boolean, aliases: "-v",
|
335
|
+
desc: "Display all available information"
|
336
|
+
method_option :running, type: :boolean, aliases: "-r",
|
337
|
+
desc: "Display running coordinators"
|
338
|
+
method_option :killed, type: :boolean, aliases: "-k",
|
339
|
+
desc: "Display killed coordinators"
|
340
|
+
method_option :succeeded, type: :boolean, aliases: "-s",
|
341
|
+
desc: "Display succeeded coordinators"
|
342
|
+
method_option :failed, type: :boolean, aliases: "-f",
|
343
|
+
desc: "Display failed coordinators"
|
344
|
+
method_option :timedout, type: :boolean, aliases: "-t",
|
345
|
+
desc: "Display timedout coordinators"
|
346
|
+
method_option :suspended, type: :boolean, aliases: "-p",
|
347
|
+
desc: "Display suspended coordinators"
|
348
|
+
method_option :len, type: :numeric, aliases: "-l", default: 30,
|
349
|
+
desc: "number of matching coordinators to display"
|
350
|
+
method_option :offset, type: :numeric, aliases: "-o", default: 0,
|
351
|
+
desc: "The coordinator to start with in listing matching coordinators"
|
352
|
+
method_option :match, type: :string, aliases: "-m",
|
353
|
+
desc: "Only display coordinators that contain the given string as a substring"
|
354
|
+
long_desc %Q[
|
355
|
+
The coordinators command uses its options to create a REST query for coordinators
|
356
|
+
that match your specification, and presents the results formated as a table.
|
357
|
+
|
358
|
+
Examples:
|
359
|
+
\x5 $ hodor oozie:coordinators # displays most recent coordinators
|
360
|
+
$ hodor oozie:coordinators -v # same as before, but verbose
|
361
|
+
$ hodor oozie:coordinators -r # displays running coordinators
|
362
|
+
$ hodor oozie:coordinators -r -s -k # running, succeeded or killed
|
363
|
+
$ hodor oozie:coordinators -l 30 -o 30 # display second 30 most recent
|
364
|
+
$ hodor oozie:coordinators -m data_source # display only matching coordinators
|
365
|
+
].unindent(8)
|
366
|
+
def coordinators
|
367
|
+
oozie.session.verbose = options[:verbose]
|
368
|
+
filter = {}
|
369
|
+
filter[:jobtype] = "coord"
|
370
|
+
filter[:status] = []
|
371
|
+
filter[:status] << :running if options[:running]
|
372
|
+
filter[:status] << :killed if options[:killed]
|
373
|
+
filter[:status] << :succeeded if options[:succeeded]
|
374
|
+
filter[:status] << :failed if options[:failed]
|
375
|
+
filter[:status] << :timedout if options[:timedout]
|
376
|
+
filter[:status] << :suspended if options[:suspended]
|
377
|
+
filter[:status] << :running_first if filter[:status].empty?
|
378
|
+
filter[:match] = options[:match] if options[:match]
|
379
|
+
|
380
|
+
oozie.session.len = options[:len] if options[:len]
|
381
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
382
|
+
|
383
|
+
result = ::Hodor::Oozie::Query.new(filter)
|
384
|
+
table = ::Hodor::Table.new(result, options[:verbose])
|
385
|
+
oozie.session.make_current(result)
|
386
|
+
say table
|
387
|
+
end
|
388
|
+
|
389
|
+
desc "bundles", "List most recently run bundles, most recent first"
|
390
|
+
method_option :len, type: :numeric, aliases: "-l", default: 2,
|
391
|
+
desc: "number of recent bundles to display"
|
392
|
+
def bundles
|
393
|
+
env.ssh "oozie:jobs -oozie :oozie_url -jobtype bundle -len #{options[:len]}",
|
394
|
+
echo: true, echo_cmd: true
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|