hodor 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/Guardfile +11 -0
- data/README.md +105 -0
- data/Rakefile +105 -0
- data/bin/hodor +18 -0
- data/hodor.gemspec +47 -0
- data/lib/config/log4r_config.xml +35 -0
- data/lib/hodor.rb +83 -0
- data/lib/hodor/api/hdfs.rb +222 -0
- data/lib/hodor/api/oozie.rb +215 -0
- data/lib/hodor/api/oozie/action.rb +52 -0
- data/lib/hodor/api/oozie/bundle.rb +27 -0
- data/lib/hodor/api/oozie/coordinator.rb +53 -0
- data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
- data/lib/hodor/api/oozie/job.rb +192 -0
- data/lib/hodor/api/oozie/materialization.rb +56 -0
- data/lib/hodor/api/oozie/query.rb +115 -0
- data/lib/hodor/api/oozie/session.rb +170 -0
- data/lib/hodor/api/oozie/workflow.rb +58 -0
- data/lib/hodor/cli.rb +146 -0
- data/lib/hodor/command.rb +164 -0
- data/lib/hodor/configuration.rb +80 -0
- data/lib/hodor/environment.rb +437 -0
- data/lib/hodor/ui/table.rb +130 -0
- data/lib/hodor/version.rb +3 -0
- data/lib/tasks/hdfs.thor +138 -0
- data/lib/tasks/master.thor +61 -0
- data/lib/tasks/oozie.thor +399 -0
- data/lib/tasks/sandbox.thor +87 -0
- data/spec/integration/api/oozie/action_spec.rb +69 -0
- data/spec/integration/api/oozie/bundle_spec.rb +33 -0
- data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
- data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
- data/spec/integration/api/oozie/job_spec.rb +15 -0
- data/spec/integration/api/oozie/materialization_spec.rb +66 -0
- data/spec/integration/api/oozie/query_spec.rb +43 -0
- data/spec/integration/api/oozie/session_spec.rb +18 -0
- data/spec/integration/api/oozie/workflow_spec.rb +65 -0
- data/spec/integration/api/oozie_spec.rb +198 -0
- data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
- data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
- data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
- data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
- data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
- data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
- data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
- data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
- data/spec/spec_helper.rb +92 -0
- data/spec/support/d_v_r.rb +125 -0
- data/spec/support/hodor_api.rb +15 -0
- data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
- data/spec/unit/hodor/api/oozie_spec.rb +32 -0
- data/spec/unit/hodor/environment_spec.rb +52 -0
- data/topics/hdfs/corresponding_paths.txt +31 -0
- data/topics/hdfs/overview.txt +10 -0
- data/topics/master/clusters.yml.txt +36 -0
- data/topics/master/overview.txt +17 -0
- data/topics/oozie/blocking_coordinators.txt +46 -0
- data/topics/oozie/composing_job_properties.txt +68 -0
- data/topics/oozie/display_job.txt +52 -0
- data/topics/oozie/driver_scenarios.txt +42 -0
- data/topics/oozie/inspecting_jobs.txt +59 -0
- data/topics/oozie/jobs.yml.txt +185 -0
- data/topics/oozie/overview.txt +43 -0
- data/topics/oozie/workers_and_drivers.txt +40 -0
- metadata +455 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'terminal-table'
|
2
|
+
|
3
|
+
module Hodor
|
4
|
+
class Table
|
5
|
+
|
6
|
+
def initialize(object, verbose = false, matching = nil)
|
7
|
+
@verbose = verbose
|
8
|
+
if object.respond_to?(:session)
|
9
|
+
@verbose ||= object.session.verbose
|
10
|
+
end
|
11
|
+
@matching = matching
|
12
|
+
# Display properties first
|
13
|
+
properties = object.display_properties
|
14
|
+
@title = object.respond_to?(:title) ? object.title : "#{object.class.name} Properties"
|
15
|
+
if properties
|
16
|
+
rows = properties[:rows]
|
17
|
+
if rows.length < 5
|
18
|
+
@prop_table = Terminal::Table.new(properties)
|
19
|
+
@prop_table.align_column 0, :right
|
20
|
+
else
|
21
|
+
|
22
|
+
if @verbose
|
23
|
+
terse_rows = rows.select { |row| row[1].length <= 50 }
|
24
|
+
else
|
25
|
+
terse_rows = rows.select { |row| !row[1].nil? && row[1].length > 0 && row[1].length <= 50 }
|
26
|
+
end
|
27
|
+
verbose_rows = rows.select { |row| row[1].length > 50 }.map { |row| normalize(row) }
|
28
|
+
sorted_rows = terse_rows.sort_by { |row| -row[1].length }
|
29
|
+
|
30
|
+
numrows = (sorted_rows.length / 3).to_i
|
31
|
+
arranged = []
|
32
|
+
slen = terse_rows.length
|
33
|
+
(0..numrows-1).each { |rownum|
|
34
|
+
mcol = []
|
35
|
+
mcol += normalize(sorted_rows[(rownum%numrows)]) if slen > (rownum%numrows)
|
36
|
+
mcol += normalize(sorted_rows[(rownum%numrows)+numrows]) if slen > (rownum%numrows)+numrows
|
37
|
+
mcol += normalize(sorted_rows[(rownum%numrows)+2*numrows]) if slen > (rownum%numrows)+2*numrows
|
38
|
+
arranged << mcol
|
39
|
+
}
|
40
|
+
short_compound = []
|
41
|
+
|
42
|
+
if @verbose
|
43
|
+
@long_table = Terminal::Table.new( { rows:verbose_rows} )
|
44
|
+
@long_table.align_column 0, :right
|
45
|
+
@long_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
|
46
|
+
end
|
47
|
+
|
48
|
+
@prop_table = Terminal::Table.new( {rows: arranged} )
|
49
|
+
@prop_table.align_column 0, :right
|
50
|
+
@prop_table.align_column 2, :right
|
51
|
+
@prop_table.align_column 4, :right
|
52
|
+
@prop_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Next display the table of children
|
57
|
+
rowcol = object.display_children
|
58
|
+
if rowcol && rowcol[:rows] && rowcol[:rows].length > 0
|
59
|
+
@child_table = Terminal::Table.new(rowcol)
|
60
|
+
@child_table.align_column 0, :center
|
61
|
+
else
|
62
|
+
@child_table = Terminal::Table.new(rows: [[@prop_table ? "<< No Children >>" : "<< Empty Set >>"]])
|
63
|
+
@child_table.align_column 0, :center
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def normalize(row)
|
68
|
+
[row[0].to_s.split('_').map { |word| word.capitalize }.join(' ') + ":", row[1].to_s.length > 0 ? row[1] : '<nil>']
|
69
|
+
end
|
70
|
+
|
71
|
+
def shift table, count=3
|
72
|
+
shifted = ""
|
73
|
+
table.each_line { |line|
|
74
|
+
shifted << " "*count + line
|
75
|
+
}
|
76
|
+
shifted
|
77
|
+
end
|
78
|
+
|
79
|
+
def properties
|
80
|
+
output = @prop_table ? @prop_table.to_s : ''
|
81
|
+
output = shift(output,1)
|
82
|
+
stripped_output = ""
|
83
|
+
first_line = true
|
84
|
+
output.each_line { |line|
|
85
|
+
stripped_output << line unless first_line
|
86
|
+
first_line = false
|
87
|
+
}
|
88
|
+
stripped_output.rstrip
|
89
|
+
end
|
90
|
+
|
91
|
+
def long_properties
|
92
|
+
output = @long_table ? @long_table.to_s : ''
|
93
|
+
shift(output, 4)
|
94
|
+
end
|
95
|
+
|
96
|
+
def children
|
97
|
+
output = @child_table ? @child_table.to_s : ''
|
98
|
+
shift(output, 4)
|
99
|
+
end
|
100
|
+
|
101
|
+
def to_s
|
102
|
+
prop_width = (properties.split("\n").first||"").length
|
103
|
+
children_width = (children.split("\n").first||"").length
|
104
|
+
title_width = @title.length
|
105
|
+
if prop_width > 0
|
106
|
+
ruler = [((prop_width - title_width) / 2).to_i - 5, 0].max
|
107
|
+
output = " #{'-'*ruler} #{@title} #{'-'*ruler}\n"
|
108
|
+
elsif children_width > 0
|
109
|
+
ruler = [((children_width - title_width) / 2).to_i - 5, 0].max
|
110
|
+
output = " #{' '*ruler} #{@title} #{' '*ruler}\n"
|
111
|
+
end
|
112
|
+
output += properties + "\n" if @prop_table
|
113
|
+
output += long_properties + "\n" if @long_table
|
114
|
+
if @matching
|
115
|
+
child_lines = children.split("\n")
|
116
|
+
total_lines = child_lines.length
|
117
|
+
child_lines.each_with_index { |line, index|
|
118
|
+
matched = index < 3 || index == total_lines-1
|
119
|
+
@matching.each { |item|
|
120
|
+
matched ||= line.include?(item)
|
121
|
+
} unless matched
|
122
|
+
output += "#{line}\n" if matched
|
123
|
+
}
|
124
|
+
else
|
125
|
+
output += children
|
126
|
+
end
|
127
|
+
output
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/tasks/hdfs.thor
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
require "hodor/api/hdfs"
|
2
|
+
|
3
|
+
module Hodor
|
4
|
+
module Cli
|
5
|
+
class Hdfs < ::Hodor::Command
|
6
|
+
|
7
|
+
no_tasks do
|
8
|
+
|
9
|
+
def hdfs
|
10
|
+
::Hodor::Hdfs.instance
|
11
|
+
end
|
12
|
+
|
13
|
+
def intercept_dispatch(command, trailing)
|
14
|
+
hadoop_command("hadoop fs", trailing) if command == :fs
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.help(shell, subcommand = false)
|
18
|
+
shell.print_wrapped(load_topic('overview'), indent: 0)
|
19
|
+
result = super
|
20
|
+
|
21
|
+
more_help = %Q[Getting More Help:
|
22
|
+
------------------
|
23
|
+
To get detailed help on specific Hdfs commands (i.e. put_dir), run:
|
24
|
+
|
25
|
+
$ hodor help hdfs:put_dir
|
26
|
+
$ hodor hdfs:help put_dir # alternate, works the same
|
27
|
+
|
28
|
+
].unindent(10)
|
29
|
+
shell.say more_help
|
30
|
+
result
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
desc "fs <arguments>", %q{
|
35
|
+
Passes its arguments is-as to remote host, and runs 'hadoop fs <arguments>'
|
36
|
+
}.gsub(/^\s+/, "").strip
|
37
|
+
long_desc <<-LONGDESC
|
38
|
+
Executes the hadoop fs command on the remote host configured as the master,
|
39
|
+
using SSH. The arguments passed to this command are passed directly
|
40
|
+
through to the ssh command and executed as-is on the remote host. Because
|
41
|
+
this a pass-through command, anything the remote tool can do, is available
|
42
|
+
through this facility. So, refer to Apache's documentation on its 'hadoop fs'
|
43
|
+
command line tool for details on the sub-commands and arguments it supports.
|
44
|
+
|
45
|
+
Example:
|
46
|
+
|
47
|
+
$ hodor hdfs:fs -ls -R /shared/jars
|
48
|
+
LONGDESC
|
49
|
+
def fs
|
50
|
+
# handled by intercept_dispatch
|
51
|
+
end
|
52
|
+
|
53
|
+
desc "users", %q{
|
54
|
+
Run an 'hdfs ls' command on the /user directory to list users on HDFS
|
55
|
+
}.gsub(/^\s+/, "").strip
|
56
|
+
def users
|
57
|
+
env.ssh "hadoop fs -ls /user",
|
58
|
+
echo: true, echo_cmd: true
|
59
|
+
end
|
60
|
+
|
61
|
+
desc "rm <filename>", "Removes <filename> from corresponding path on HDFS"
|
62
|
+
def rm(filename)
|
63
|
+
logger.info "Removing #{filename}"
|
64
|
+
hdfs.rm(filename)
|
65
|
+
end
|
66
|
+
|
67
|
+
desc "rm_rf <directory>", "Recursively removes <directory> from corresponding path on HDFS"
|
68
|
+
def rm_rf(path)
|
69
|
+
logger.info "Removing directory #{path} recursively..."
|
70
|
+
hdfs.rm_rf(path)
|
71
|
+
end
|
72
|
+
|
73
|
+
desc "ls [<paths> ...]", "Shows a directory listing of the corresponding path on HDFS"
|
74
|
+
def ls(*paths)
|
75
|
+
paths << "." if paths.length == 0
|
76
|
+
hdfs_paths = paths.inject([]) { |memo, path|
|
77
|
+
memo << hdfs.path_on_hdfs(path)
|
78
|
+
}
|
79
|
+
env.ssh "hadoop fs -ls #{hdfs_paths.join(' ')}",
|
80
|
+
echo: true, echo_cmd: true
|
81
|
+
end
|
82
|
+
|
83
|
+
desc "cat", "Dump contents of file at the corresponding path on HDFS to STDOUT"
|
84
|
+
def cat(filename)
|
85
|
+
env.ssh "hadoop fs -cat #{hdfs.path_on_hdfs(filename)}",
|
86
|
+
echo: true, echo_cmd: true
|
87
|
+
end
|
88
|
+
|
89
|
+
desc "put_dir <path>", "Uploads (recursively) the directory at <path> to corresponding path on HDFS"
|
90
|
+
method_option :dryrun, type: :boolean, aliases: "-d", default: false,
|
91
|
+
desc: "Don't actually deploy the files, just show what would be deployed"
|
92
|
+
method_option :clean, type: :boolean, aliases: "-c", default: false,
|
93
|
+
desc: "Clean the hdfs target before deploying this directory"
|
94
|
+
method_option :verbose, type: :boolean, aliases: "-v", default: false,
|
95
|
+
desc: "Log verbose details about which files are deployed and to where"
|
96
|
+
method_option :maxdepth, type: :string, aliases: "-m", default: nil,
|
97
|
+
desc: "The maximum number of directories deep to copy to HDFS"
|
98
|
+
def put_dir(dirpath)
|
99
|
+
hdfs.put_dir dirpath, options
|
100
|
+
end
|
101
|
+
|
102
|
+
desc "put <filename>", "Uploads <filename> to the corresponding path on HDFS"
|
103
|
+
def put(filename)
|
104
|
+
hdfs.put_file(filename)
|
105
|
+
end
|
106
|
+
|
107
|
+
desc "get <filename>", "Downloads <filename> from the corresponding path on HDFS"
|
108
|
+
method_option :diff, type: :boolean, aliases: "-d", default: false,
|
109
|
+
desc: "After downloading <filename>, a diff is run between local and remote versions"
|
110
|
+
def get(filename)
|
111
|
+
hdfs.get_file(filename)
|
112
|
+
if options[:diff]
|
113
|
+
env.run_local %Q[diff #{filename} #{filename}.hdfs_copy], echo: true, echo_cmd: true
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
desc "touchz", "Creates a file of zero length at the corresponding path on HDFS"
|
118
|
+
def touchz(filename)
|
119
|
+
env.ssh "hadoop fs -touchz #{hdfs.path_on_hdfs(filename)}",
|
120
|
+
echo: true, echo_cmd: true
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "pwd", "Displays both your local and HDFS working directories, and how they correspond"
|
124
|
+
def pwd
|
125
|
+
logger.info "Path on localhost : [#{env.path_on_disc('.')}]"
|
126
|
+
logger.info "Path on Git repo : [#{env.path_on_github('.')}]"
|
127
|
+
logger.info "Path on HDFS : [#{hdfs.path_on_hdfs('.')}]"
|
128
|
+
end
|
129
|
+
|
130
|
+
desc "path_of", "Displays the path of the specified file or directory"
|
131
|
+
def path_of(path)
|
132
|
+
logger.info "Path on local disc: [#{env.path_on_disc(path)}]"
|
133
|
+
logger.info "Path on GitHub: [#{env.path_on_github(path)}]"
|
134
|
+
logger.info "Path on HDFS: [#{hdfs.path_on_hdfs(path)}]"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
|
2
|
+
module Hodor
|
3
|
+
module Cli
|
4
|
+
|
5
|
+
class Master < ::Hodor::Command
|
6
|
+
|
7
|
+
no_tasks do
|
8
|
+
def intercept_dispatch(command, trailing)
|
9
|
+
case command
|
10
|
+
when :exec
|
11
|
+
hadoop_command("-T", trailing)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.help(shell, subcommand = false)
|
16
|
+
shell.print_wrapped(load_topic('overview'), indent: 0)
|
17
|
+
result = super
|
18
|
+
|
19
|
+
more_help = %Q[Getting More Help:
|
20
|
+
------------------
|
21
|
+
To get detailed help on specific Master commands (i.e. config), run:
|
22
|
+
|
23
|
+
$ hodor help master:config
|
24
|
+
$ hodor master:help config # alternate, works the same
|
25
|
+
|
26
|
+
].unindent(10)
|
27
|
+
shell.say more_help
|
28
|
+
result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "config", "List all known variable expansions for the target Hadoop environment"
|
33
|
+
def config
|
34
|
+
env.settings.each_pair { |k,v|
|
35
|
+
logger.info "#{k} : #{v}"
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
desc "exec <arguments>", %q{
|
40
|
+
Pass through command that executes its arguments on the remote master via ssh
|
41
|
+
}.gsub(/^\s+/, "").strip
|
42
|
+
long_desc <<-LONGDESC
|
43
|
+
Executes the shell command on the remote host configured as the master,
|
44
|
+
ussing SSH. The arguments passed to this command are passed directly
|
45
|
+
through to the ssh command and executed as-is on the remote host.
|
46
|
+
|
47
|
+
Example Usage:
|
48
|
+
|
49
|
+
$ hodor master:exec hostname -I
|
50
|
+
LONGDESC
|
51
|
+
def exec
|
52
|
+
# handled by intercept_dispatch
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "ssh_config", "Echo the SSH connection string for the selected hadoop cluster"
|
56
|
+
def ssh_config
|
57
|
+
puts env.ssh_addr
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,399 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Hodor
|
4
|
+
module Cli
|
5
|
+
|
6
|
+
class Appendix < Thor
|
7
|
+
|
8
|
+
end
|
9
|
+
|
10
|
+
class Oozie < ::Hodor::Command
|
11
|
+
no_tasks do
|
12
|
+
|
13
|
+
def oozie
|
14
|
+
::Hodor::Oozie
|
15
|
+
end
|
16
|
+
|
17
|
+
def intercept_dispatch(command, trailing)
|
18
|
+
case command
|
19
|
+
when :jobs
|
20
|
+
hadoop_command("oozie jobs", trailing)
|
21
|
+
when :job
|
22
|
+
hadoop_command("oozie job", trailing)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.help(shell, subcommand = false)
|
27
|
+
shell.print_wrapped(load_topic("overview"), indent: 0)
|
28
|
+
result = super
|
29
|
+
|
30
|
+
more_help = %Q[Getting More Help:
|
31
|
+
------------------
|
32
|
+
To get detailed help on specific Oozie commands (i.e. display_job), run:
|
33
|
+
|
34
|
+
$ hodor help oozie:display_job
|
35
|
+
$ hodor oozie:help display_job # alternate, works the same
|
36
|
+
|
37
|
+
To view information on one of the Oozie topics (i.e. driver scenarios), run:
|
38
|
+
|
39
|
+
$ hodor oozie:topic driver_scenarios
|
40
|
+
|
41
|
+
And to see a list of Oozie topics that are available for display, run:
|
42
|
+
|
43
|
+
$ hodor oozie:topics
|
44
|
+
].unindent(10)
|
45
|
+
shell.say more_help
|
46
|
+
result
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "jobs <arguments>", %q{
|
51
|
+
Pass through command that executes its arguments on the remote master using 'oozie jobs <arguments>'
|
52
|
+
}.gsub(/^\s+/, "").strip
|
53
|
+
long_desc <<-LONGDESC
|
54
|
+
Executes the 'oozie jobs' command on the remote master using SSH. The arguments
|
55
|
+
passed to this command are pass through as-is to the SSH-based command-line.
|
56
|
+
|
57
|
+
Example Usage:
|
58
|
+
|
59
|
+
$ hodor oozie:jobs -oozie http://my.company.com:8080/oozie -localtime -len 2 -fliter status=RUNNING
|
60
|
+
LONGDESC
|
61
|
+
def jobs
|
62
|
+
# handled by intercept_dispatch
|
63
|
+
end
|
64
|
+
|
65
|
+
desc "job <arguments>", %q{
|
66
|
+
Pass through command that executes its arguments on the remote master using 'oozie job <arguments>'
|
67
|
+
}.gsub(/^\s+/, "").strip
|
68
|
+
long_desc <<-LONGDESC
|
69
|
+
Executes the 'oozie job' command on the remote master using SSH. The arguments
|
70
|
+
passed to this command are pass through as-is to the SSH-based command-line.
|
71
|
+
|
72
|
+
Example Usage:
|
73
|
+
|
74
|
+
$ hodor oozie:job -oozie http://my.company.com:8080/oozie -start 14-20090525161321-oozie-joe
|
75
|
+
LONGDESC
|
76
|
+
def job
|
77
|
+
# handled by intercept_dispatch
|
78
|
+
end
|
79
|
+
|
80
|
+
desc "change_job [JOB PATH]", %q{
|
81
|
+
Changes to a different job within the hierarhcy of Oozie jobs
|
82
|
+
}.gsub(/^\s+/, "").strip
|
83
|
+
long_desc %Q[
|
84
|
+
The change_job command changes the "current_job" but does not display anything. Use
|
85
|
+
the "display_job" command to display information about the job change_job
|
86
|
+
just made current. The change_job command can of course take a job id as
|
87
|
+
argument:
|
88
|
+
\x5 $ hodor oozie:change_job 0004729-150629212824266-oozie-oozi-C
|
89
|
+
|
90
|
+
However, other "special" arguments are also allowed:
|
91
|
+
\x5 $ hodor oozie:change_job .. # Change to parent of current job
|
92
|
+
$ hodor oozie:change_job 3 # Change to the child with index 3
|
93
|
+
$ hodor oozie:change_job / # Change to list of topmost coordinators
|
94
|
+
# same as oozie:coordinators
|
95
|
+
|
96
|
+
Suggested Alias:
|
97
|
+
\x5 $ alias cj='hodor oozie:change_job'
|
98
|
+
].unindent(8)
|
99
|
+
def change_job(*job_path)
|
100
|
+
oozie.session.verbose = options[:verbose]
|
101
|
+
oozie.session.len = options[:len] if options[:len]
|
102
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
103
|
+
oozie.change_job(job_path[0])
|
104
|
+
end
|
105
|
+
|
106
|
+
desc "display_job [JOB PATH] [attribute] [options]", %q{
|
107
|
+
Display information about the 'current' job within the Oozie hierarhcy of jobs
|
108
|
+
}.gsub(/^\s+/, "").strip
|
109
|
+
long_desc load_topic("display_job")
|
110
|
+
method_option :query, type: :boolean, aliases: "-q", default: false,
|
111
|
+
desc: "Only query the job for information, but do not change to it"
|
112
|
+
method_option :verbose, type: :boolean, aliases: "-v",
|
113
|
+
desc: "Display all available information"
|
114
|
+
method_option :killed, type: :boolean, aliases: "-k",
|
115
|
+
desc: "Only display killed coordinator materializations"
|
116
|
+
method_option :succeded, type: :boolean, aliases: "-s",
|
117
|
+
desc: "Only display succeeded coordinator materializations"
|
118
|
+
method_option :len, type: :numeric, aliases: "-l", default: nil,
|
119
|
+
desc: "number of matching workflows to display"
|
120
|
+
method_option :offset, type: :numeric, aliases: "-o", default: 0,
|
121
|
+
desc: "The coordinator to start with in listing matching workflows"
|
122
|
+
method_option :match, type: :array, aliases: "-m", default: nil,
|
123
|
+
desc: "Array of words to match in config properties keys and values"
|
124
|
+
method_option :write, type: :string, aliases: "-w", default: nil,
|
125
|
+
desc: "Name of file to write the output of this command into"
|
126
|
+
def display_job(*args)
|
127
|
+
oozie.session.len = options[:len] if options[:len]
|
128
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
129
|
+
query_mode = options[:query] || env.prefs[:display_job_query_mode]
|
130
|
+
job_id = "."
|
131
|
+
aspect = "info"
|
132
|
+
args.each { |arg|
|
133
|
+
if arg =~ /^[0-9]{1,4}$/ || # Index form of job id
|
134
|
+
arg =~ /^[0-9]{5,8}\-[0-9]{10,18}\-oozie/ || # Oozie form
|
135
|
+
arg =~ /job_[0-9]{5,20}/ || # Hadoop mapred form
|
136
|
+
arg.eql?('..') || arg.eql?('/')
|
137
|
+
job_id = arg
|
138
|
+
else
|
139
|
+
aspect = arg
|
140
|
+
end
|
141
|
+
}
|
142
|
+
|
143
|
+
if aspect.eql?("info")
|
144
|
+
filter = []
|
145
|
+
filter << :killed if options[:killed]
|
146
|
+
filter << :succeeded if options[:succeeded]
|
147
|
+
job = oozie.job_by_path(job_id, !query_mode, filter)
|
148
|
+
table = ::Hodor::Table.new(job, options[:verbose], options[:match])
|
149
|
+
doc = table.to_s
|
150
|
+
else
|
151
|
+
job = oozie.job_by_path(job_id, !query_mode)
|
152
|
+
end
|
153
|
+
|
154
|
+
if aspect.eql?("props") || aspect.eql?("conf")
|
155
|
+
if options[:match]
|
156
|
+
doc = job.conf_map.match(options[:match]).awesome_inspect(plain: !options[:write].nil?)
|
157
|
+
else
|
158
|
+
doc = job.conf_map.awesome_inspect(plain: !options[:write].nil?)
|
159
|
+
end
|
160
|
+
elsif aspect.eql?("log")
|
161
|
+
doc = job.log
|
162
|
+
elsif aspect.eql?("rest") || aspect.eql?("request") || aspect.eql?("call")
|
163
|
+
say job.rest_call
|
164
|
+
elsif aspect.eql?("json")
|
165
|
+
json = job.json
|
166
|
+
doc = "REST CALL = #{::Hodor::Oozie::Session.instance.last_query}"
|
167
|
+
doc << ::JSON.pretty_generate(json)
|
168
|
+
elsif aspect.eql?("def") || aspect.eql?("definition")
|
169
|
+
doc = job.definition
|
170
|
+
else
|
171
|
+
end
|
172
|
+
local_filename = options[:write]
|
173
|
+
if !local_filename.nil?
|
174
|
+
File.open(local_filename, 'w') {|f| f.write(doc) }
|
175
|
+
else
|
176
|
+
say doc
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
desc "pwj", "Displays information about which job is 'current' within the hierarchy of Oozie jobs"
|
181
|
+
def pwj
|
182
|
+
say "Current Job ID: #{oozie.session.current_id || 'nil'}"
|
183
|
+
say "Parent Job ID: #{oozie.session.parent_id || 'nil'}"
|
184
|
+
say "Most Recent Job Query: #{oozie.session.root_query || 'nil'}"
|
185
|
+
end
|
186
|
+
|
187
|
+
desc "ssh_display_job [JOB_ID]", %q{
|
188
|
+
Legacy version of display_job that is based on SSH, rather than REST
|
189
|
+
}.gsub(/^\s+/, "").strip
|
190
|
+
method_option :definition, type: :boolean, aliases: "-d",
|
191
|
+
desc: "Display the definition of the specified job"
|
192
|
+
method_option :info, type: :boolean, aliases: "-i",
|
193
|
+
desc: "Display information about the specified job"
|
194
|
+
method_option :log, type: :boolean, aliases: "-l",
|
195
|
+
desc: "Display the log file for the specified job"
|
196
|
+
method_option :configcontent, type: :boolean, aliases: "-c",
|
197
|
+
desc: "Display the variable-expanded config for the specified job"
|
198
|
+
def ssh_display_job(job_id)
|
199
|
+
if job_id.start_with?('job_')
|
200
|
+
hadoop_id = job_id.sub('job_','')
|
201
|
+
trash = hadoop_id.index(/[^0-9_]/)
|
202
|
+
hadoop_id = hadoop_id[0..trash-1] if trash
|
203
|
+
env.ssh "mapred job -logs job_#{hadoop_id} attempt_#{hadoop_id}_m_000000_0",
|
204
|
+
echo: true, echo_cmd: true
|
205
|
+
else
|
206
|
+
job_id.sub!(/-W.*$/, '-W') unless job_id.include?('-W@')
|
207
|
+
if options[:definition]
|
208
|
+
logger.info "DEFINITION:"
|
209
|
+
env.ssh "oozie job -oozie :oozie_url -definition #{job_id}",
|
210
|
+
echo: true, echo_cmd: true
|
211
|
+
elsif options[:log]
|
212
|
+
logger.info "LOG:"
|
213
|
+
env.ssh "oozie job -oozie :oozie_url -log #{job_id}",
|
214
|
+
echo: true, echo_cmd: true
|
215
|
+
elsif options[:configcontent]
|
216
|
+
logger.info "CONFIG:"
|
217
|
+
env.ssh "oozie job -oozie :oozie_url -configcontent #{job_id}",
|
218
|
+
echo: true, echo_cmd: true
|
219
|
+
else
|
220
|
+
logger.info "INFO:"
|
221
|
+
env.ssh "oozie job -oozie :oozie_url -info #{job_id}",
|
222
|
+
echo: true, echo_cmd: true
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
desc "deploy_job", "Deploy to hdfs the directories that this job depends on"
|
228
|
+
method_option :dryrun, type: :boolean, aliases: "-d", default: false,
|
229
|
+
desc: "Don't actually deploy the files, just show what would be deployed"
|
230
|
+
method_option :clean, type: :boolean, aliases: "-c", default: false,
|
231
|
+
desc: "Clean the hdfs target before deploying this directory"
|
232
|
+
method_option :verbose, type: :boolean, aliases: "-v", default: false,
|
233
|
+
desc: "Log verbose details about which files are deployed and to where"
|
234
|
+
method_option :maxdepth, type: :string, aliases: "-m", default: nil,
|
235
|
+
desc: "The maximum number of directories deep to copy to HDFS"
|
236
|
+
long_desc %Q[
|
237
|
+
The deploy_job command reads the contents of the jobs.yml file located
|
238
|
+
in your current directory, and deploys the paths specified by in the
|
239
|
+
driver's "deploy" key. For a fuller explanation, view the "jobs.yml"
|
240
|
+
topic, as follows:
|
241
|
+
\x5 $ hodor oozie:topic jobs.yml
|
242
|
+
].unindent(8)
|
243
|
+
def deploy_job(*driver)
|
244
|
+
oozie.deploy_job(driver.length > 0 ? driver[0] : nil, options[:clean])
|
245
|
+
end
|
246
|
+
|
247
|
+
desc "run_job", "Run an oozie job on the target hadoop cluster"
|
248
|
+
long_desc %Q[
|
249
|
+
The run_job command reads the contents of the jobs.yml file located
|
250
|
+
in your current directory, composes a job.properties file and submits
|
251
|
+
the indicated driver workflow for execution by Oozie. If a job.properties
|
252
|
+
or job.properties.erb file is provided, that file will be used to interpolate property values.
|
253
|
+
For a fuller explanation, view the "jobs.yml" topic, as follows:
|
254
|
+
\x5 $ hodor oozie:topic jobs.yml
|
255
|
+
].unindent(8)
|
256
|
+
method_option :dry_run, type: :boolean, aliases: "-d", default: false,
|
257
|
+
desc: "Generate computed job.properties file without running or deploying associated job."
|
258
|
+
method_option :file_name_prefix, type: :string, aliases: '-p', default: '',
|
259
|
+
desc: 'Add a prefix to job properties filename. This is primarily for use with :dry_run'
|
260
|
+
def run_job(*args)
|
261
|
+
outfile = oozie.run_job(args.length > 0 ? args[0] : nil, options[:dry_run], options[:file_name_prefix])
|
262
|
+
logger.info "Dry run: the properties file is available for inspection at #{outfile}" if options[:dry_run]
|
263
|
+
end
|
264
|
+
|
265
|
+
desc "kill_job [JOB_ID]", "Kill the oozie job with the specified job id"
|
266
|
+
def kill_job(*job_path)
|
267
|
+
job = oozie.job_by_path(job_path[0])
|
268
|
+
env.ssh "oozie job -oozie :oozie_url -kill #{job.id}",
|
269
|
+
echo: true, echo_cmd: true
|
270
|
+
end
|
271
|
+
|
272
|
+
desc "reauth", "Remove cached auth tokens (sometimes necessary after an oozie restart)"
|
273
|
+
def reauth
|
274
|
+
ssh_command nil, "rm .oozie-auth-token"
|
275
|
+
end
|
276
|
+
|
277
|
+
desc "workflows", "List most recently run workflows, most recent first"
|
278
|
+
method_option :verbose, type: :boolean, aliases: "-v",
|
279
|
+
desc: "Display all available information"
|
280
|
+
method_option :running, type: :boolean, aliases: "-r",
|
281
|
+
desc: "Display running workflows"
|
282
|
+
method_option :killed, type: :boolean, aliases: "-k",
|
283
|
+
desc: "Display killed workflows"
|
284
|
+
method_option :succeeded, type: :boolean, aliases: "-s",
|
285
|
+
desc: "Display succeeded workflows"
|
286
|
+
method_option :failed, type: :boolean, aliases: "-f",
|
287
|
+
desc: "Display failed workflows"
|
288
|
+
method_option :timedout, type: :boolean, aliases: "-t",
|
289
|
+
desc: "Display timedout workflows"
|
290
|
+
method_option :suspended, type: :boolean, aliases: "-p",
|
291
|
+
desc: "Display suspended workflows"
|
292
|
+
method_option :len, type: :numeric, aliases: "-l", default: 30,
|
293
|
+
desc: "number of matching workflows to display"
|
294
|
+
method_option :offset, type: :numeric, aliases: "-o", default: 0,
|
295
|
+
desc: "The coordinator to start with in listing matching workflows"
|
296
|
+
method_option :match, type: :string, aliases: "-m",
|
297
|
+
desc: "Only display workflows that contain the given string as a substring"
|
298
|
+
long_desc %Q[
|
299
|
+
The workflows command uses its options to create a REST query for workflows
|
300
|
+
that match your specification, and presents the results formated as a table.
|
301
|
+
|
302
|
+
Examples:
|
303
|
+
\x5 $ hodor oozie:workflows # displays most recent workflows
|
304
|
+
$ hodor oozie:workflows -v # same as before, but verbose
|
305
|
+
$ hodor oozie:workflows -r # displays running workflows
|
306
|
+
$ hodor oozie:workflows -r -s -k # running, succeeded or killed
|
307
|
+
$ hodor oozie:workflows -l 30 -o 30 # display second 30 most recent
|
308
|
+
$ hodor oozie:workflows -m data_source # display only matching workflows
|
309
|
+
].unindent(8)
|
310
|
+
def workflows
|
311
|
+
oozie.session.verbose = options[:verbose]
|
312
|
+
filter = {}
|
313
|
+
filter[:jobtype] = "wf"
|
314
|
+
filter[:status] = []
|
315
|
+
filter[:status] << :running if options[:running]
|
316
|
+
filter[:status] << :killed if options[:killed]
|
317
|
+
filter[:status] << :succeeded if options[:succeeded]
|
318
|
+
filter[:status] << :failed if options[:failed]
|
319
|
+
filter[:status] << :timedout if options[:timedout]
|
320
|
+
filter[:status] << :suspended if options[:suspended]
|
321
|
+
filter[:status] << :running_first if filter[:status].empty?
|
322
|
+
filter[:match] = options[:match] if options[:match]
|
323
|
+
|
324
|
+
oozie.session.len = options[:len] if options[:len]
|
325
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
326
|
+
|
327
|
+
result = ::Hodor::Oozie::Query.new(filter)
|
328
|
+
table = ::Hodor::Table.new(result, options[:verbose])
|
329
|
+
oozie.session.make_current(result)
|
330
|
+
say table
|
331
|
+
end
|
332
|
+
|
333
|
+
desc "coordinators", "List most recently run coordinators, most recent first"
|
334
|
+
method_option :verbose, type: :boolean, aliases: "-v",
|
335
|
+
desc: "Display all available information"
|
336
|
+
method_option :running, type: :boolean, aliases: "-r",
|
337
|
+
desc: "Display running coordinators"
|
338
|
+
method_option :killed, type: :boolean, aliases: "-k",
|
339
|
+
desc: "Display killed coordinators"
|
340
|
+
method_option :succeeded, type: :boolean, aliases: "-s",
|
341
|
+
desc: "Display succeeded coordinators"
|
342
|
+
method_option :failed, type: :boolean, aliases: "-f",
|
343
|
+
desc: "Display failed coordinators"
|
344
|
+
method_option :timedout, type: :boolean, aliases: "-t",
|
345
|
+
desc: "Display timedout coordinators"
|
346
|
+
method_option :suspended, type: :boolean, aliases: "-p",
|
347
|
+
desc: "Display suspended coordinators"
|
348
|
+
method_option :len, type: :numeric, aliases: "-l", default: 30,
|
349
|
+
desc: "number of matching coordinators to display"
|
350
|
+
method_option :offset, type: :numeric, aliases: "-o", default: 0,
|
351
|
+
desc: "The coordinator to start with in listing matching coordinators"
|
352
|
+
method_option :match, type: :string, aliases: "-m",
|
353
|
+
desc: "Only display coordinators that contain the given string as a substring"
|
354
|
+
long_desc %Q[
|
355
|
+
The coordinators command uses its options to create a REST query for coordinators
|
356
|
+
that match your specification, and presents the results formated as a table.
|
357
|
+
|
358
|
+
Examples:
|
359
|
+
\x5 $ hodor oozie:coordinators # displays most recent coordinators
|
360
|
+
$ hodor oozie:coordinators -v # same as before, but verbose
|
361
|
+
$ hodor oozie:coordinators -r # displays running coordinators
|
362
|
+
$ hodor oozie:coordinators -r -s -k # running, succeeded or killed
|
363
|
+
$ hodor oozie:coordinators -l 30 -o 30 # display second 30 most recent
|
364
|
+
$ hodor oozie:coordinators -m data_source # display only matching coordinators
|
365
|
+
].unindent(8)
|
366
|
+
def coordinators
|
367
|
+
oozie.session.verbose = options[:verbose]
|
368
|
+
filter = {}
|
369
|
+
filter[:jobtype] = "coord"
|
370
|
+
filter[:status] = []
|
371
|
+
filter[:status] << :running if options[:running]
|
372
|
+
filter[:status] << :killed if options[:killed]
|
373
|
+
filter[:status] << :succeeded if options[:succeeded]
|
374
|
+
filter[:status] << :failed if options[:failed]
|
375
|
+
filter[:status] << :timedout if options[:timedout]
|
376
|
+
filter[:status] << :suspended if options[:suspended]
|
377
|
+
filter[:status] << :running_first if filter[:status].empty?
|
378
|
+
filter[:match] = options[:match] if options[:match]
|
379
|
+
|
380
|
+
oozie.session.len = options[:len] if options[:len]
|
381
|
+
oozie.session.offset = options[:offset] if options[:offset]
|
382
|
+
|
383
|
+
result = ::Hodor::Oozie::Query.new(filter)
|
384
|
+
table = ::Hodor::Table.new(result, options[:verbose])
|
385
|
+
oozie.session.make_current(result)
|
386
|
+
say table
|
387
|
+
end
|
388
|
+
|
389
|
+
desc "bundles", "List most recently run bundles, most recent first"
|
390
|
+
method_option :len, type: :numeric, aliases: "-l", default: 2,
|
391
|
+
desc: "number of recent bundles to display"
|
392
|
+
def bundles
|
393
|
+
env.ssh "oozie:jobs -oozie :oozie_url -jobtype bundle -len #{options[:len]}",
|
394
|
+
echo: true, echo_cmd: true
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|