hodor 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,130 @@
1
+ require 'terminal-table'
2
+
3
+ module Hodor
4
+ class Table
5
+
6
+ def initialize(object, verbose = false, matching = nil)
7
+ @verbose = verbose
8
+ if object.respond_to?(:session)
9
+ @verbose ||= object.session.verbose
10
+ end
11
+ @matching = matching
12
+ # Display properties first
13
+ properties = object.display_properties
14
+ @title = object.respond_to?(:title) ? object.title : "#{object.class.name} Properties"
15
+ if properties
16
+ rows = properties[:rows]
17
+ if rows.length < 5
18
+ @prop_table = Terminal::Table.new(properties)
19
+ @prop_table.align_column 0, :right
20
+ else
21
+
22
+ if @verbose
23
+ terse_rows = rows.select { |row| row[1].length <= 50 }
24
+ else
25
+ terse_rows = rows.select { |row| !row[1].nil? && row[1].length > 0 && row[1].length <= 50 }
26
+ end
27
+ verbose_rows = rows.select { |row| row[1].length > 50 }.map { |row| normalize(row) }
28
+ sorted_rows = terse_rows.sort_by { |row| -row[1].length }
29
+
30
+ numrows = (sorted_rows.length / 3).to_i
31
+ arranged = []
32
+ slen = terse_rows.length
33
+ (0..numrows-1).each { |rownum|
34
+ mcol = []
35
+ mcol += normalize(sorted_rows[(rownum%numrows)]) if slen > (rownum%numrows)
36
+ mcol += normalize(sorted_rows[(rownum%numrows)+numrows]) if slen > (rownum%numrows)+numrows
37
+ mcol += normalize(sorted_rows[(rownum%numrows)+2*numrows]) if slen > (rownum%numrows)+2*numrows
38
+ arranged << mcol
39
+ }
40
+ short_compound = []
41
+
42
+ if @verbose
43
+ @long_table = Terminal::Table.new( { rows:verbose_rows} )
44
+ @long_table.align_column 0, :right
45
+ @long_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
46
+ end
47
+
48
+ @prop_table = Terminal::Table.new( {rows: arranged} )
49
+ @prop_table.align_column 0, :right
50
+ @prop_table.align_column 2, :right
51
+ @prop_table.align_column 4, :right
52
+ @prop_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
53
+ end
54
+ end
55
+
56
+ # Next display the table of children
57
+ rowcol = object.display_children
58
+ if rowcol && rowcol[:rows] && rowcol[:rows].length > 0
59
+ @child_table = Terminal::Table.new(rowcol)
60
+ @child_table.align_column 0, :center
61
+ else
62
+ @child_table = Terminal::Table.new(rows: [[@prop_table ? "<< No Children >>" : "<< Empty Set >>"]])
63
+ @child_table.align_column 0, :center
64
+ end
65
+ end
66
+
67
+ def normalize(row)
68
+ [row[0].to_s.split('_').map { |word| word.capitalize }.join(' ') + ":", row[1].to_s.length > 0 ? row[1] : '<nil>']
69
+ end
70
+
71
+ def shift table, count=3
72
+ shifted = ""
73
+ table.each_line { |line|
74
+ shifted << " "*count + line
75
+ }
76
+ shifted
77
+ end
78
+
79
+ def properties
80
+ output = @prop_table ? @prop_table.to_s : ''
81
+ output = shift(output,1)
82
+ stripped_output = ""
83
+ first_line = true
84
+ output.each_line { |line|
85
+ stripped_output << line unless first_line
86
+ first_line = false
87
+ }
88
+ stripped_output.rstrip
89
+ end
90
+
91
+ def long_properties
92
+ output = @long_table ? @long_table.to_s : ''
93
+ shift(output, 4)
94
+ end
95
+
96
+ def children
97
+ output = @child_table ? @child_table.to_s : ''
98
+ shift(output, 4)
99
+ end
100
+
101
+ def to_s
102
+ prop_width = (properties.split("\n").first||"").length
103
+ children_width = (children.split("\n").first||"").length
104
+ title_width = @title.length
105
+ if prop_width > 0
106
+ ruler = [((prop_width - title_width) / 2).to_i - 5, 0].max
107
+ output = " #{'-'*ruler} #{@title} #{'-'*ruler}\n"
108
+ elsif children_width > 0
109
+ ruler = [((children_width - title_width) / 2).to_i - 5, 0].max
110
+ output = " #{' '*ruler} #{@title} #{' '*ruler}\n"
111
+ end
112
+ output += properties + "\n" if @prop_table
113
+ output += long_properties + "\n" if @long_table
114
+ if @matching
115
+ child_lines = children.split("\n")
116
+ total_lines = child_lines.length
117
+ child_lines.each_with_index { |line, index|
118
+ matched = index < 3 || index == total_lines-1
119
+ @matching.each { |item|
120
+ matched ||= line.include?(item)
121
+ } unless matched
122
+ output += "#{line}\n" if matched
123
+ }
124
+ else
125
+ output += children
126
+ end
127
+ output
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,3 @@
1
+ module Hodor
2
+ VERSION = "1.0.2"
3
+ end
@@ -0,0 +1,138 @@
1
+ require "hodor/api/hdfs"
2
+
3
+ module Hodor
4
+ module Cli
5
+ class Hdfs < ::Hodor::Command
6
+
7
+ no_tasks do
8
+
9
+ def hdfs
10
+ ::Hodor::Hdfs.instance
11
+ end
12
+
13
+ def intercept_dispatch(command, trailing)
14
+ hadoop_command("hadoop fs", trailing) if command == :fs
15
+ end
16
+
17
+ def self.help(shell, subcommand = false)
18
+ shell.print_wrapped(load_topic('overview'), indent: 0)
19
+ result = super
20
+
21
+ more_help = %Q[Getting More Help:
22
+ ------------------
23
+ To get detailed help on specific Hdfs commands (i.e. put_dir), run:
24
+
25
+ $ hodor help hdfs:put_dir
26
+ $ hodor hdfs:help put_dir # alternate, works the same
27
+
28
+ ].unindent(10)
29
+ shell.say more_help
30
+ result
31
+ end
32
+ end
33
+
34
+ desc "fs <arguments>", %q{
35
+ Passes its arguments is-as to remote host, and runs 'hadoop fs <arguments>'
36
+ }.gsub(/^\s+/, "").strip
37
+ long_desc <<-LONGDESC
38
+ Executes the hadoop fs command on the remote host configured as the master,
39
+ using SSH. The arguments passed to this command are passed directly
40
+ through to the ssh command and executed as-is on the remote host. Because
41
+ this a pass-through command, anything the remote tool can do, is available
42
+ through this facility. So, refer to Apache's documentation on its 'hadoop fs'
43
+ command line tool for details on the sub-commands and arguments it supports.
44
+
45
+ Example:
46
+
47
+ $ hodor hdfs:fs -ls -R /shared/jars
48
+ LONGDESC
49
+ def fs
50
+ # handled by intercept_dispatch
51
+ end
52
+
53
+ desc "users", %q{
54
+ Run an 'hdfs ls' command on the /user directory to list users on HDFS
55
+ }.gsub(/^\s+/, "").strip
56
+ def users
57
+ env.ssh "hadoop fs -ls /user",
58
+ echo: true, echo_cmd: true
59
+ end
60
+
61
+ desc "rm <filename>", "Removes <filename> from corresponding path on HDFS"
62
+ def rm(filename)
63
+ logger.info "Removing #{filename}"
64
+ hdfs.rm(filename)
65
+ end
66
+
67
+ desc "rm_rf <directory>", "Recursively removes <directory> from corresponding path on HDFS"
68
+ def rm_rf(path)
69
+ logger.info "Removing directory #{path} recursively..."
70
+ hdfs.rm_rf(path)
71
+ end
72
+
73
+ desc "ls [<paths> ...]", "Shows a directory listing of the corresponding path on HDFS"
74
+ def ls(*paths)
75
+ paths << "." if paths.length == 0
76
+ hdfs_paths = paths.inject([]) { |memo, path|
77
+ memo << hdfs.path_on_hdfs(path)
78
+ }
79
+ env.ssh "hadoop fs -ls #{hdfs_paths.join(' ')}",
80
+ echo: true, echo_cmd: true
81
+ end
82
+
83
+ desc "cat", "Dump contents of file at the corresponding path on HDFS to STDOUT"
84
+ def cat(filename)
85
+ env.ssh "hadoop fs -cat #{hdfs.path_on_hdfs(filename)}",
86
+ echo: true, echo_cmd: true
87
+ end
88
+
89
+ desc "put_dir <path>", "Uploads (recursively) the directory at <path> to corresponding path on HDFS"
90
+ method_option :dryrun, type: :boolean, aliases: "-d", default: false,
91
+ desc: "Don't actually deploy the files, just show what would be deployed"
92
+ method_option :clean, type: :boolean, aliases: "-c", default: false,
93
+ desc: "Clean the hdfs target before deploying this directory"
94
+ method_option :verbose, type: :boolean, aliases: "-v", default: false,
95
+ desc: "Log verbose details about which files are deployed and to where"
96
+ method_option :maxdepth, type: :string, aliases: "-m", default: nil,
97
+ desc: "The maximum number of directories deep to copy to HDFS"
98
+ def put_dir(dirpath)
99
+ hdfs.put_dir dirpath, options
100
+ end
101
+
102
+ desc "put <filename>", "Uploads <filename> to the corresponding path on HDFS"
103
+ def put(filename)
104
+ hdfs.put_file(filename)
105
+ end
106
+
107
+ desc "get <filename>", "Downloads <filename> from the corresponding path on HDFS"
108
+ method_option :diff, type: :boolean, aliases: "-d", default: false,
109
+ desc: "After downloading <filename>, a diff is run between local and remote versions"
110
+ def get(filename)
111
+ hdfs.get_file(filename)
112
+ if options[:diff]
113
+ env.run_local %Q[diff #{filename} #{filename}.hdfs_copy], echo: true, echo_cmd: true
114
+ end
115
+ end
116
+
117
+ desc "touchz", "Creates a file of zero length at the corresponding path on HDFS"
118
+ def touchz(filename)
119
+ env.ssh "hadoop fs -touchz #{hdfs.path_on_hdfs(filename)}",
120
+ echo: true, echo_cmd: true
121
+ end
122
+
123
+ desc "pwd", "Displays both your local and HDFS working directories, and how they correspond"
124
+ def pwd
125
+ logger.info "Path on localhost : [#{env.path_on_disc('.')}]"
126
+ logger.info "Path on Git repo : [#{env.path_on_github('.')}]"
127
+ logger.info "Path on HDFS : [#{hdfs.path_on_hdfs('.')}]"
128
+ end
129
+
130
+ desc "path_of", "Displays the path of the specified file or directory"
131
+ def path_of(path)
132
+ logger.info "Path on local disc: [#{env.path_on_disc(path)}]"
133
+ logger.info "Path on GitHub: [#{env.path_on_github(path)}]"
134
+ logger.info "Path on HDFS: [#{hdfs.path_on_hdfs(path)}]"
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,61 @@
1
+
2
+ module Hodor
3
+ module Cli
4
+
5
+ class Master < ::Hodor::Command
6
+
7
+ no_tasks do
8
+ def intercept_dispatch(command, trailing)
9
+ case command
10
+ when :exec
11
+ hadoop_command("-T", trailing)
12
+ end
13
+ end
14
+
15
+ def self.help(shell, subcommand = false)
16
+ shell.print_wrapped(load_topic('overview'), indent: 0)
17
+ result = super
18
+
19
+ more_help = %Q[Getting More Help:
20
+ ------------------
21
+ To get detailed help on specific Master commands (i.e. config), run:
22
+
23
+ $ hodor help master:config
24
+ $ hodor master:help config # alternate, works the same
25
+
26
+ ].unindent(10)
27
+ shell.say more_help
28
+ result
29
+ end
30
+ end
31
+
32
+ desc "config", "List all known variable expansions for the target Hadoop environment"
33
+ def config
34
+ env.settings.each_pair { |k,v|
35
+ logger.info "#{k} : #{v}"
36
+ }
37
+ end
38
+
39
+ desc "exec <arguments>", %q{
40
+ Pass through command that executes its arguments on the remote master via ssh
41
+ }.gsub(/^\s+/, "").strip
42
+ long_desc <<-LONGDESC
43
+ Executes the shell command on the remote host configured as the master,
44
+ ussing SSH. The arguments passed to this command are passed directly
45
+ through to the ssh command and executed as-is on the remote host.
46
+
47
+ Example Usage:
48
+
49
+ $ hodor master:exec hostname -I
50
+ LONGDESC
51
+ def exec
52
+ # handled by intercept_dispatch
53
+ end
54
+
55
+ desc "ssh_config", "Echo the SSH connection string for the selected hadoop cluster"
56
+ def ssh_config
57
+ puts env.ssh_addr
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,399 @@
1
+ require 'json'
2
+
3
+ module Hodor
4
+ module Cli
5
+
6
+ class Appendix < Thor
7
+
8
+ end
9
+
10
+ class Oozie < ::Hodor::Command
11
+ no_tasks do
12
+
13
+ def oozie
14
+ ::Hodor::Oozie
15
+ end
16
+
17
+ def intercept_dispatch(command, trailing)
18
+ case command
19
+ when :jobs
20
+ hadoop_command("oozie jobs", trailing)
21
+ when :job
22
+ hadoop_command("oozie job", trailing)
23
+ end
24
+ end
25
+
26
+ def self.help(shell, subcommand = false)
27
+ shell.print_wrapped(load_topic("overview"), indent: 0)
28
+ result = super
29
+
30
+ more_help = %Q[Getting More Help:
31
+ ------------------
32
+ To get detailed help on specific Oozie commands (i.e. display_job), run:
33
+
34
+ $ hodor help oozie:display_job
35
+ $ hodor oozie:help display_job # alternate, works the same
36
+
37
+ To view information on one of the Oozie topics (i.e. driver scenarios), run:
38
+
39
+ $ hodor oozie:topic driver_scenarios
40
+
41
+ And to see a list of Oozie topics that are available for display, run:
42
+
43
+ $ hodor oozie:topics
44
+ ].unindent(10)
45
+ shell.say more_help
46
+ result
47
+ end
48
+ end
49
+
50
+ desc "jobs <arguments>", %q{
51
+ Pass through command that executes its arguments on the remote master using 'oozie jobs <arguments>'
52
+ }.gsub(/^\s+/, "").strip
53
+ long_desc <<-LONGDESC
54
+ Executes the 'oozie jobs' command on the remote master using SSH. The arguments
55
+ passed to this command are pass through as-is to the SSH-based command-line.
56
+
57
+ Example Usage:
58
+
59
+ $ hodor oozie:jobs -oozie http://my.company.com:8080/oozie -localtime -len 2 -fliter status=RUNNING
60
+ LONGDESC
61
+ def jobs
62
+ # handled by intercept_dispatch
63
+ end
64
+
65
+ desc "job <arguments>", %q{
66
+ Pass through command that executes its arguments on the remote master using 'oozie job <arguments>'
67
+ }.gsub(/^\s+/, "").strip
68
+ long_desc <<-LONGDESC
69
+ Executes the 'oozie job' command on the remote master using SSH. The arguments
70
+ passed to this command are pass through as-is to the SSH-based command-line.
71
+
72
+ Example Usage:
73
+
74
+ $ hodor oozie:job -oozie http://my.company.com:8080/oozie -start 14-20090525161321-oozie-joe
75
+ LONGDESC
76
+ def job
77
+ # handled by intercept_dispatch
78
+ end
79
+
80
+ desc "change_job [JOB PATH]", %q{
81
+ Changes to a different job within the hierarhcy of Oozie jobs
82
+ }.gsub(/^\s+/, "").strip
83
+ long_desc %Q[
84
+ The change_job command changes the "current_job" but does not display anything. Use
85
+ the "display_job" command to display information about the job change_job
86
+ just made current. The change_job command can of course take a job id as
87
+ argument:
88
+ \x5 $ hodor oozie:change_job 0004729-150629212824266-oozie-oozi-C
89
+
90
+ However, other "special" arguments are also allowed:
91
+ \x5 $ hodor oozie:change_job .. # Change to parent of current job
92
+ $ hodor oozie:change_job 3 # Change to the child with index 3
93
+ $ hodor oozie:change_job / # Change to list of topmost coordinators
94
+ # same as oozie:coordinators
95
+
96
+ Suggested Alias:
97
+ \x5 $ alias cj='hodor oozie:change_job'
98
+ ].unindent(8)
99
+ def change_job(*job_path)
100
+ oozie.session.verbose = options[:verbose]
101
+ oozie.session.len = options[:len] if options[:len]
102
+ oozie.session.offset = options[:offset] if options[:offset]
103
+ oozie.change_job(job_path[0])
104
+ end
105
+
106
+ desc "display_job [JOB PATH] [attribute] [options]", %q{
107
+ Display information about the 'current' job within the Oozie hierarhcy of jobs
108
+ }.gsub(/^\s+/, "").strip
109
+ long_desc load_topic("display_job")
110
+ method_option :query, type: :boolean, aliases: "-q", default: false,
111
+ desc: "Only query the job for information, but do not change to it"
112
+ method_option :verbose, type: :boolean, aliases: "-v",
113
+ desc: "Display all available information"
114
+ method_option :killed, type: :boolean, aliases: "-k",
115
+ desc: "Only display killed coordinator materializations"
116
+ method_option :succeded, type: :boolean, aliases: "-s",
117
+ desc: "Only display succeeded coordinator materializations"
118
+ method_option :len, type: :numeric, aliases: "-l", default: nil,
119
+ desc: "number of matching workflows to display"
120
+ method_option :offset, type: :numeric, aliases: "-o", default: 0,
121
+ desc: "The coordinator to start with in listing matching workflows"
122
+ method_option :match, type: :array, aliases: "-m", default: nil,
123
+ desc: "Array of words to match in config properties keys and values"
124
+ method_option :write, type: :string, aliases: "-w", default: nil,
125
+ desc: "Name of file to write the output of this command into"
126
+ def display_job(*args)
127
+ oozie.session.len = options[:len] if options[:len]
128
+ oozie.session.offset = options[:offset] if options[:offset]
129
+ query_mode = options[:query] || env.prefs[:display_job_query_mode]
130
+ job_id = "."
131
+ aspect = "info"
132
+ args.each { |arg|
133
+ if arg =~ /^[0-9]{1,4}$/ || # Index form of job id
134
+ arg =~ /^[0-9]{5,8}\-[0-9]{10,18}\-oozie/ || # Oozie form
135
+ arg =~ /job_[0-9]{5,20}/ || # Hadoop mapred form
136
+ arg.eql?('..') || arg.eql?('/')
137
+ job_id = arg
138
+ else
139
+ aspect = arg
140
+ end
141
+ }
142
+
143
+ if aspect.eql?("info")
144
+ filter = []
145
+ filter << :killed if options[:killed]
146
+ filter << :succeeded if options[:succeeded]
147
+ job = oozie.job_by_path(job_id, !query_mode, filter)
148
+ table = ::Hodor::Table.new(job, options[:verbose], options[:match])
149
+ doc = table.to_s
150
+ else
151
+ job = oozie.job_by_path(job_id, !query_mode)
152
+ end
153
+
154
+ if aspect.eql?("props") || aspect.eql?("conf")
155
+ if options[:match]
156
+ doc = job.conf_map.match(options[:match]).awesome_inspect(plain: !options[:write].nil?)
157
+ else
158
+ doc = job.conf_map.awesome_inspect(plain: !options[:write].nil?)
159
+ end
160
+ elsif aspect.eql?("log")
161
+ doc = job.log
162
+ elsif aspect.eql?("rest") || aspect.eql?("request") || aspect.eql?("call")
163
+ say job.rest_call
164
+ elsif aspect.eql?("json")
165
+ json = job.json
166
+ doc = "REST CALL = #{::Hodor::Oozie::Session.instance.last_query}"
167
+ doc << ::JSON.pretty_generate(json)
168
+ elsif aspect.eql?("def") || aspect.eql?("definition")
169
+ doc = job.definition
170
+ else
171
+ end
172
+ local_filename = options[:write]
173
+ if !local_filename.nil?
174
+ File.open(local_filename, 'w') {|f| f.write(doc) }
175
+ else
176
+ say doc
177
+ end
178
+ end
179
+
180
+ desc "pwj", "Displays information about which job is 'current' within the hierarchy of Oozie jobs"
181
+ def pwj
182
+ say "Current Job ID: #{oozie.session.current_id || 'nil'}"
183
+ say "Parent Job ID: #{oozie.session.parent_id || 'nil'}"
184
+ say "Most Recent Job Query: #{oozie.session.root_query || 'nil'}"
185
+ end
186
+
187
+ desc "ssh_display_job [JOB_ID]", %q{
188
+ Legacy version of display_job that is based on SSH, rather than REST
189
+ }.gsub(/^\s+/, "").strip
190
+ method_option :definition, type: :boolean, aliases: "-d",
191
+ desc: "Display the definition of the specified job"
192
+ method_option :info, type: :boolean, aliases: "-i",
193
+ desc: "Display information about the specified job"
194
+ method_option :log, type: :boolean, aliases: "-l",
195
+ desc: "Display the log file for the specified job"
196
+ method_option :configcontent, type: :boolean, aliases: "-c",
197
+ desc: "Display the variable-expanded config for the specified job"
198
+ def ssh_display_job(job_id)
199
+ if job_id.start_with?('job_')
200
+ hadoop_id = job_id.sub('job_','')
201
+ trash = hadoop_id.index(/[^0-9_]/)
202
+ hadoop_id = hadoop_id[0..trash-1] if trash
203
+ env.ssh "mapred job -logs job_#{hadoop_id} attempt_#{hadoop_id}_m_000000_0",
204
+ echo: true, echo_cmd: true
205
+ else
206
+ job_id.sub!(/-W.*$/, '-W') unless job_id.include?('-W@')
207
+ if options[:definition]
208
+ logger.info "DEFINITION:"
209
+ env.ssh "oozie job -oozie :oozie_url -definition #{job_id}",
210
+ echo: true, echo_cmd: true
211
+ elsif options[:log]
212
+ logger.info "LOG:"
213
+ env.ssh "oozie job -oozie :oozie_url -log #{job_id}",
214
+ echo: true, echo_cmd: true
215
+ elsif options[:configcontent]
216
+ logger.info "CONFIG:"
217
+ env.ssh "oozie job -oozie :oozie_url -configcontent #{job_id}",
218
+ echo: true, echo_cmd: true
219
+ else
220
+ logger.info "INFO:"
221
+ env.ssh "oozie job -oozie :oozie_url -info #{job_id}",
222
+ echo: true, echo_cmd: true
223
+ end
224
+ end
225
+ end
226
+
227
+ desc "deploy_job", "Deploy to hdfs the directories that this job depends on"
228
+ method_option :dryrun, type: :boolean, aliases: "-d", default: false,
229
+ desc: "Don't actually deploy the files, just show what would be deployed"
230
+ method_option :clean, type: :boolean, aliases: "-c", default: false,
231
+ desc: "Clean the hdfs target before deploying this directory"
232
+ method_option :verbose, type: :boolean, aliases: "-v", default: false,
233
+ desc: "Log verbose details about which files are deployed and to where"
234
+ method_option :maxdepth, type: :string, aliases: "-m", default: nil,
235
+ desc: "The maximum number of directories deep to copy to HDFS"
236
+ long_desc %Q[
237
+ The deploy_job command reads the contents of the jobs.yml file located
238
+ in your current directory, and deploys the paths specified by in the
239
+ driver's "deploy" key. For a fuller explanation, view the "jobs.yml"
240
+ topic, as follows:
241
+ \x5 $ hodor oozie:topic jobs.yml
242
+ ].unindent(8)
243
+ def deploy_job(*driver)
244
+ oozie.deploy_job(driver.length > 0 ? driver[0] : nil, options[:clean])
245
+ end
246
+
247
+ desc "run_job", "Run an oozie job on the target hadoop cluster"
248
+ long_desc %Q[
249
+ The run_job command reads the contents of the jobs.yml file located
250
+ in your current directory, composes a job.properties file and submits
251
+ the indicated driver workflow for execution by Oozie. If a job.properties
252
+ or job.properties.erb file is provided, that file will be used to interpolate property values.
253
+ For a fuller explanation, view the "jobs.yml" topic, as follows:
254
+ \x5 $ hodor oozie:topic jobs.yml
255
+ ].unindent(8)
256
+ method_option :dry_run, type: :boolean, aliases: "-d", default: false,
257
+ desc: "Generate computed job.properties file without running or deploying associated job."
258
+ method_option :file_name_prefix, type: :string, aliases: '-p', default: '',
259
+ desc: 'Add a prefix to job properties filename. This is primarily for use with :dry_run'
260
+ def run_job(*args)
261
+ outfile = oozie.run_job(args.length > 0 ? args[0] : nil, options[:dry_run], options[:file_name_prefix])
262
+ logger.info "Dry run: the properties file is available for inspection at #{outfile}" if options[:dry_run]
263
+ end
264
+
265
+ desc "kill_job [JOB_ID]", "Kill the oozie job with the specified job id"
266
+ def kill_job(*job_path)
267
+ job = oozie.job_by_path(job_path[0])
268
+ env.ssh "oozie job -oozie :oozie_url -kill #{job.id}",
269
+ echo: true, echo_cmd: true
270
+ end
271
+
272
+ desc "reauth", "Remove cached auth tokens (sometimes necessary after an oozie restart)"
273
+ def reauth
274
+ ssh_command nil, "rm .oozie-auth-token"
275
+ end
276
+
277
+ desc "workflows", "List most recently run workflows, most recent first"
278
+ method_option :verbose, type: :boolean, aliases: "-v",
279
+ desc: "Display all available information"
280
+ method_option :running, type: :boolean, aliases: "-r",
281
+ desc: "Display running workflows"
282
+ method_option :killed, type: :boolean, aliases: "-k",
283
+ desc: "Display killed workflows"
284
+ method_option :succeeded, type: :boolean, aliases: "-s",
285
+ desc: "Display succeeded workflows"
286
+ method_option :failed, type: :boolean, aliases: "-f",
287
+ desc: "Display failed workflows"
288
+ method_option :timedout, type: :boolean, aliases: "-t",
289
+ desc: "Display timedout workflows"
290
+ method_option :suspended, type: :boolean, aliases: "-p",
291
+ desc: "Display suspended workflows"
292
+ method_option :len, type: :numeric, aliases: "-l", default: 30,
293
+ desc: "number of matching workflows to display"
294
+ method_option :offset, type: :numeric, aliases: "-o", default: 0,
295
+ desc: "The coordinator to start with in listing matching workflows"
296
+ method_option :match, type: :string, aliases: "-m",
297
+ desc: "Only display workflows that contain the given string as a substring"
298
+ long_desc %Q[
299
+ The workflows command uses its options to create a REST query for workflows
300
+ that match your specification, and presents the results formated as a table.
301
+
302
+ Examples:
303
+ \x5 $ hodor oozie:workflows # displays most recent workflows
304
+ $ hodor oozie:workflows -v # same as before, but verbose
305
+ $ hodor oozie:workflows -r # displays running workflows
306
+ $ hodor oozie:workflows -r -s -k # running, succeeded or killed
307
+ $ hodor oozie:workflows -l 30 -o 30 # display second 30 most recent
308
+ $ hodor oozie:workflows -m data_source # display only matching workflows
309
+ ].unindent(8)
310
+ def workflows
311
+ oozie.session.verbose = options[:verbose]
312
+ filter = {}
313
+ filter[:jobtype] = "wf"
314
+ filter[:status] = []
315
+ filter[:status] << :running if options[:running]
316
+ filter[:status] << :killed if options[:killed]
317
+ filter[:status] << :succeeded if options[:succeeded]
318
+ filter[:status] << :failed if options[:failed]
319
+ filter[:status] << :timedout if options[:timedout]
320
+ filter[:status] << :suspended if options[:suspended]
321
+ filter[:status] << :running_first if filter[:status].empty?
322
+ filter[:match] = options[:match] if options[:match]
323
+
324
+ oozie.session.len = options[:len] if options[:len]
325
+ oozie.session.offset = options[:offset] if options[:offset]
326
+
327
+ result = ::Hodor::Oozie::Query.new(filter)
328
+ table = ::Hodor::Table.new(result, options[:verbose])
329
+ oozie.session.make_current(result)
330
+ say table
331
+ end
332
+
333
+ desc "coordinators", "List most recently run coordinators, most recent first"
334
+ method_option :verbose, type: :boolean, aliases: "-v",
335
+ desc: "Display all available information"
336
+ method_option :running, type: :boolean, aliases: "-r",
337
+ desc: "Display running coordinators"
338
+ method_option :killed, type: :boolean, aliases: "-k",
339
+ desc: "Display killed coordinators"
340
+ method_option :succeeded, type: :boolean, aliases: "-s",
341
+ desc: "Display succeeded coordinators"
342
+ method_option :failed, type: :boolean, aliases: "-f",
343
+ desc: "Display failed coordinators"
344
+ method_option :timedout, type: :boolean, aliases: "-t",
345
+ desc: "Display timedout coordinators"
346
+ method_option :suspended, type: :boolean, aliases: "-p",
347
+ desc: "Display suspended coordinators"
348
+ method_option :len, type: :numeric, aliases: "-l", default: 30,
349
+ desc: "number of matching coordinators to display"
350
+ method_option :offset, type: :numeric, aliases: "-o", default: 0,
351
+ desc: "The coordinator to start with in listing matching coordinators"
352
+ method_option :match, type: :string, aliases: "-m",
353
+ desc: "Only display coordinators that contain the given string as a substring"
354
+ long_desc %Q[
355
+ The coordinators command uses its options to create a REST query for coordinators
356
+ that match your specification, and presents the results formated as a table.
357
+
358
+ Examples:
359
+ \x5 $ hodor oozie:coordinators # displays most recent coordinators
360
+ $ hodor oozie:coordinators -v # same as before, but verbose
361
+ $ hodor oozie:coordinators -r # displays running coordinators
362
+ $ hodor oozie:coordinators -r -s -k # running, succeeded or killed
363
+ $ hodor oozie:coordinators -l 30 -o 30 # display second 30 most recent
364
+ $ hodor oozie:coordinators -m data_source # display only matching coordinators
365
+ ].unindent(8)
366
+ def coordinators
367
+ oozie.session.verbose = options[:verbose]
368
+ filter = {}
369
+ filter[:jobtype] = "coord"
370
+ filter[:status] = []
371
+ filter[:status] << :running if options[:running]
372
+ filter[:status] << :killed if options[:killed]
373
+ filter[:status] << :succeeded if options[:succeeded]
374
+ filter[:status] << :failed if options[:failed]
375
+ filter[:status] << :timedout if options[:timedout]
376
+ filter[:status] << :suspended if options[:suspended]
377
+ filter[:status] << :running_first if filter[:status].empty?
378
+ filter[:match] = options[:match] if options[:match]
379
+
380
+ oozie.session.len = options[:len] if options[:len]
381
+ oozie.session.offset = options[:offset] if options[:offset]
382
+
383
+ result = ::Hodor::Oozie::Query.new(filter)
384
+ table = ::Hodor::Table.new(result, options[:verbose])
385
+ oozie.session.make_current(result)
386
+ say table
387
+ end
388
+
389
+ desc "bundles", "List most recently run bundles, most recent first"
390
+ method_option :len, type: :numeric, aliases: "-l", default: 2,
391
+ desc: "number of recent bundles to display"
392
+ def bundles
393
+ env.ssh "oozie:jobs -oozie :oozie_url -jobtype bundle -len #{options[:len]}",
394
+ echo: true, echo_cmd: true
395
+ end
396
+
397
+ end
398
+ end
399
+ end