hodor 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,130 @@
1
+ require 'terminal-table'
2
+
3
+ module Hodor
4
+ class Table
5
+
6
+ def initialize(object, verbose = false, matching = nil)
7
+ @verbose = verbose
8
+ if object.respond_to?(:session)
9
+ @verbose ||= object.session.verbose
10
+ end
11
+ @matching = matching
12
+ # Display properties first
13
+ properties = object.display_properties
14
+ @title = object.respond_to?(:title) ? object.title : "#{object.class.name} Properties"
15
+ if properties
16
+ rows = properties[:rows]
17
+ if rows.length < 5
18
+ @prop_table = Terminal::Table.new(properties)
19
+ @prop_table.align_column 0, :right
20
+ else
21
+
22
+ if @verbose
23
+ terse_rows = rows.select { |row| row[1].length <= 50 }
24
+ else
25
+ terse_rows = rows.select { |row| !row[1].nil? && row[1].length > 0 && row[1].length <= 50 }
26
+ end
27
+ verbose_rows = rows.select { |row| row[1].length > 50 }.map { |row| normalize(row) }
28
+ sorted_rows = terse_rows.sort_by { |row| -row[1].length }
29
+
30
+ numrows = (sorted_rows.length / 3).to_i
31
+ arranged = []
32
+ slen = terse_rows.length
33
+ (0..numrows-1).each { |rownum|
34
+ mcol = []
35
+ mcol += normalize(sorted_rows[(rownum%numrows)]) if slen > (rownum%numrows)
36
+ mcol += normalize(sorted_rows[(rownum%numrows)+numrows]) if slen > (rownum%numrows)+numrows
37
+ mcol += normalize(sorted_rows[(rownum%numrows)+2*numrows]) if slen > (rownum%numrows)+2*numrows
38
+ arranged << mcol
39
+ }
40
+ short_compound = []
41
+
42
+ if @verbose
43
+ @long_table = Terminal::Table.new( { rows:verbose_rows} )
44
+ @long_table.align_column 0, :right
45
+ @long_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
46
+ end
47
+
48
+ @prop_table = Terminal::Table.new( {rows: arranged} )
49
+ @prop_table.align_column 0, :right
50
+ @prop_table.align_column 2, :right
51
+ @prop_table.align_column 4, :right
52
+ @prop_table.style = {border_y: ' ', border_x: " ", border_i: ' ' }
53
+ end
54
+ end
55
+
56
+ # Next display the table of children
57
+ rowcol = object.display_children
58
+ if rowcol && rowcol[:rows] && rowcol[:rows].length > 0
59
+ @child_table = Terminal::Table.new(rowcol)
60
+ @child_table.align_column 0, :center
61
+ else
62
+ @child_table = Terminal::Table.new(rows: [[@prop_table ? "<< No Children >>" : "<< Empty Set >>"]])
63
+ @child_table.align_column 0, :center
64
+ end
65
+ end
66
+
67
+ def normalize(row)
68
+ [row[0].to_s.split('_').map { |word| word.capitalize }.join(' ') + ":", row[1].to_s.length > 0 ? row[1] : '<nil>']
69
+ end
70
+
71
+ def shift table, count=3
72
+ shifted = ""
73
+ table.each_line { |line|
74
+ shifted << " "*count + line
75
+ }
76
+ shifted
77
+ end
78
+
79
+ def properties
80
+ output = @prop_table ? @prop_table.to_s : ''
81
+ output = shift(output,1)
82
+ stripped_output = ""
83
+ first_line = true
84
+ output.each_line { |line|
85
+ stripped_output << line unless first_line
86
+ first_line = false
87
+ }
88
+ stripped_output.rstrip
89
+ end
90
+
91
+ def long_properties
92
+ output = @long_table ? @long_table.to_s : ''
93
+ shift(output, 4)
94
+ end
95
+
96
+ def children
97
+ output = @child_table ? @child_table.to_s : ''
98
+ shift(output, 4)
99
+ end
100
+
101
+ def to_s
102
+ prop_width = (properties.split("\n").first||"").length
103
+ children_width = (children.split("\n").first||"").length
104
+ title_width = @title.length
105
+ if prop_width > 0
106
+ ruler = [((prop_width - title_width) / 2).to_i - 5, 0].max
107
+ output = " #{'-'*ruler} #{@title} #{'-'*ruler}\n"
108
+ elsif children_width > 0
109
+ ruler = [((children_width - title_width) / 2).to_i - 5, 0].max
110
+ output = " #{' '*ruler} #{@title} #{' '*ruler}\n"
111
+ end
112
+ output += properties + "\n" if @prop_table
113
+ output += long_properties + "\n" if @long_table
114
+ if @matching
115
+ child_lines = children.split("\n")
116
+ total_lines = child_lines.length
117
+ child_lines.each_with_index { |line, index|
118
+ matched = index < 3 || index == total_lines-1
119
+ @matching.each { |item|
120
+ matched ||= line.include?(item)
121
+ } unless matched
122
+ output += "#{line}\n" if matched
123
+ }
124
+ else
125
+ output += children
126
+ end
127
+ output
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,3 @@
1
+ module Hodor
2
+ VERSION = "1.0.2"
3
+ end
@@ -0,0 +1,138 @@
1
+ require "hodor/api/hdfs"
2
+
3
+ module Hodor
4
+ module Cli
5
+ class Hdfs < ::Hodor::Command
6
+
7
+ no_tasks do
8
+
9
+ def hdfs
10
+ ::Hodor::Hdfs.instance
11
+ end
12
+
13
+ def intercept_dispatch(command, trailing)
14
+ hadoop_command("hadoop fs", trailing) if command == :fs
15
+ end
16
+
17
+ def self.help(shell, subcommand = false)
18
+ shell.print_wrapped(load_topic('overview'), indent: 0)
19
+ result = super
20
+
21
+ more_help = %Q[Getting More Help:
22
+ ------------------
23
+ To get detailed help on specific Hdfs commands (i.e. put_dir), run:
24
+
25
+ $ hodor help hdfs:put_dir
26
+ $ hodor hdfs:help put_dir # alternate, works the same
27
+
28
+ ].unindent(10)
29
+ shell.say more_help
30
+ result
31
+ end
32
+ end
33
+
34
+ desc "fs <arguments>", %q{
35
+ Passes its arguments is-as to remote host, and runs 'hadoop fs <arguments>'
36
+ }.gsub(/^\s+/, "").strip
37
+ long_desc <<-LONGDESC
38
+ Executes the hadoop fs command on the remote host configured as the master,
39
+ using SSH. The arguments passed to this command are passed directly
40
+ through to the ssh command and executed as-is on the remote host. Because
41
+ this a pass-through command, anything the remote tool can do, is available
42
+ through this facility. So, refer to Apache's documentation on its 'hadoop fs'
43
+ command line tool for details on the sub-commands and arguments it supports.
44
+
45
+ Example:
46
+
47
+ $ hodor hdfs:fs -ls -R /shared/jars
48
+ LONGDESC
49
+ def fs
50
+ # handled by intercept_dispatch
51
+ end
52
+
53
+ desc "users", %q{
54
+ Run an 'hdfs ls' command on the /user directory to list users on HDFS
55
+ }.gsub(/^\s+/, "").strip
56
+ def users
57
+ env.ssh "hadoop fs -ls /user",
58
+ echo: true, echo_cmd: true
59
+ end
60
+
61
+ desc "rm <filename>", "Removes <filename> from corresponding path on HDFS"
62
+ def rm(filename)
63
+ logger.info "Removing #{filename}"
64
+ hdfs.rm(filename)
65
+ end
66
+
67
+ desc "rm_rf <directory>", "Recursively removes <directory> from corresponding path on HDFS"
68
+ def rm_rf(path)
69
+ logger.info "Removing directory #{path} recursively..."
70
+ hdfs.rm_rf(path)
71
+ end
72
+
73
+ desc "ls [<paths> ...]", "Shows a directory listing of the corresponding path on HDFS"
74
+ def ls(*paths)
75
+ paths << "." if paths.length == 0
76
+ hdfs_paths = paths.inject([]) { |memo, path|
77
+ memo << hdfs.path_on_hdfs(path)
78
+ }
79
+ env.ssh "hadoop fs -ls #{hdfs_paths.join(' ')}",
80
+ echo: true, echo_cmd: true
81
+ end
82
+
83
+ desc "cat", "Dump contents of file at the corresponding path on HDFS to STDOUT"
84
+ def cat(filename)
85
+ env.ssh "hadoop fs -cat #{hdfs.path_on_hdfs(filename)}",
86
+ echo: true, echo_cmd: true
87
+ end
88
+
89
+ desc "put_dir <path>", "Uploads (recursively) the directory at <path> to corresponding path on HDFS"
90
+ method_option :dryrun, type: :boolean, aliases: "-d", default: false,
91
+ desc: "Don't actually deploy the files, just show what would be deployed"
92
+ method_option :clean, type: :boolean, aliases: "-c", default: false,
93
+ desc: "Clean the hdfs target before deploying this directory"
94
+ method_option :verbose, type: :boolean, aliases: "-v", default: false,
95
+ desc: "Log verbose details about which files are deployed and to where"
96
+ method_option :maxdepth, type: :string, aliases: "-m", default: nil,
97
+ desc: "The maximum number of directories deep to copy to HDFS"
98
+ def put_dir(dirpath)
99
+ hdfs.put_dir dirpath, options
100
+ end
101
+
102
+ desc "put <filename>", "Uploads <filename> to the corresponding path on HDFS"
103
+ def put(filename)
104
+ hdfs.put_file(filename)
105
+ end
106
+
107
+ desc "get <filename>", "Downloads <filename> from the corresponding path on HDFS"
108
+ method_option :diff, type: :boolean, aliases: "-d", default: false,
109
+ desc: "After downloading <filename>, a diff is run between local and remote versions"
110
+ def get(filename)
111
+ hdfs.get_file(filename)
112
+ if options[:diff]
113
+ env.run_local %Q[diff #{filename} #{filename}.hdfs_copy], echo: true, echo_cmd: true
114
+ end
115
+ end
116
+
117
+ desc "touchz", "Creates a file of zero length at the corresponding path on HDFS"
118
+ def touchz(filename)
119
+ env.ssh "hadoop fs -touchz #{hdfs.path_on_hdfs(filename)}",
120
+ echo: true, echo_cmd: true
121
+ end
122
+
123
+ desc "pwd", "Displays both your local and HDFS working directories, and how they correspond"
124
+ def pwd
125
+ logger.info "Path on localhost : [#{env.path_on_disc('.')}]"
126
+ logger.info "Path on Git repo : [#{env.path_on_github('.')}]"
127
+ logger.info "Path on HDFS : [#{hdfs.path_on_hdfs('.')}]"
128
+ end
129
+
130
+ desc "path_of", "Displays the path of the specified file or directory"
131
+ def path_of(path)
132
+ logger.info "Path on local disc: [#{env.path_on_disc(path)}]"
133
+ logger.info "Path on GitHub: [#{env.path_on_github(path)}]"
134
+ logger.info "Path on HDFS: [#{hdfs.path_on_hdfs(path)}]"
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,61 @@
1
+
2
+ module Hodor
3
+ module Cli
4
+
5
+ class Master < ::Hodor::Command
6
+
7
+ no_tasks do
8
+ def intercept_dispatch(command, trailing)
9
+ case command
10
+ when :exec
11
+ hadoop_command("-T", trailing)
12
+ end
13
+ end
14
+
15
+ def self.help(shell, subcommand = false)
16
+ shell.print_wrapped(load_topic('overview'), indent: 0)
17
+ result = super
18
+
19
+ more_help = %Q[Getting More Help:
20
+ ------------------
21
+ To get detailed help on specific Master commands (i.e. config), run:
22
+
23
+ $ hodor help master:config
24
+ $ hodor master:help config # alternate, works the same
25
+
26
+ ].unindent(10)
27
+ shell.say more_help
28
+ result
29
+ end
30
+ end
31
+
32
+ desc "config", "List all known variable expansions for the target Hadoop environment"
33
+ def config
34
+ env.settings.each_pair { |k,v|
35
+ logger.info "#{k} : #{v}"
36
+ }
37
+ end
38
+
39
+ desc "exec <arguments>", %q{
40
+ Pass through command that executes its arguments on the remote master via ssh
41
+ }.gsub(/^\s+/, "").strip
42
+ long_desc <<-LONGDESC
43
+ Executes the shell command on the remote host configured as the master,
44
+ ussing SSH. The arguments passed to this command are passed directly
45
+ through to the ssh command and executed as-is on the remote host.
46
+
47
+ Example Usage:
48
+
49
+ $ hodor master:exec hostname -I
50
+ LONGDESC
51
+ def exec
52
+ # handled by intercept_dispatch
53
+ end
54
+
55
+ desc "ssh_config", "Echo the SSH connection string for the selected hadoop cluster"
56
+ def ssh_config
57
+ puts env.ssh_addr
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,399 @@
1
+ require 'json'
2
+
3
+ module Hodor
4
+ module Cli
5
+
6
+ class Appendix < Thor
7
+
8
+ end
9
+
10
+ class Oozie < ::Hodor::Command
11
+ no_tasks do
12
+
13
+ def oozie
14
+ ::Hodor::Oozie
15
+ end
16
+
17
+ def intercept_dispatch(command, trailing)
18
+ case command
19
+ when :jobs
20
+ hadoop_command("oozie jobs", trailing)
21
+ when :job
22
+ hadoop_command("oozie job", trailing)
23
+ end
24
+ end
25
+
26
+ def self.help(shell, subcommand = false)
27
+ shell.print_wrapped(load_topic("overview"), indent: 0)
28
+ result = super
29
+
30
+ more_help = %Q[Getting More Help:
31
+ ------------------
32
+ To get detailed help on specific Oozie commands (i.e. display_job), run:
33
+
34
+ $ hodor help oozie:display_job
35
+ $ hodor oozie:help display_job # alternate, works the same
36
+
37
+ To view information on one of the Oozie topics (i.e. driver scenarios), run:
38
+
39
+ $ hodor oozie:topic driver_scenarios
40
+
41
+ And to see a list of Oozie topics that are available for display, run:
42
+
43
+ $ hodor oozie:topics
44
+ ].unindent(10)
45
+ shell.say more_help
46
+ result
47
+ end
48
+ end
49
+
50
+ desc "jobs <arguments>", %q{
51
+ Pass through command that executes its arguments on the remote master using 'oozie jobs <arguments>'
52
+ }.gsub(/^\s+/, "").strip
53
+ long_desc <<-LONGDESC
54
+ Executes the 'oozie jobs' command on the remote master using SSH. The arguments
55
+ passed to this command are pass through as-is to the SSH-based command-line.
56
+
57
+ Example Usage:
58
+
59
+ $ hodor oozie:jobs -oozie http://my.company.com:8080/oozie -localtime -len 2 -fliter status=RUNNING
60
+ LONGDESC
61
+ def jobs
62
+ # handled by intercept_dispatch
63
+ end
64
+
65
+ desc "job <arguments>", %q{
66
+ Pass through command that executes its arguments on the remote master using 'oozie job <arguments>'
67
+ }.gsub(/^\s+/, "").strip
68
+ long_desc <<-LONGDESC
69
+ Executes the 'oozie job' command on the remote master using SSH. The arguments
70
+ passed to this command are pass through as-is to the SSH-based command-line.
71
+
72
+ Example Usage:
73
+
74
+ $ hodor oozie:job -oozie http://my.company.com:8080/oozie -start 14-20090525161321-oozie-joe
75
+ LONGDESC
76
+ def job
77
+ # handled by intercept_dispatch
78
+ end
79
+
80
+ desc "change_job [JOB PATH]", %q{
81
+ Changes to a different job within the hierarhcy of Oozie jobs
82
+ }.gsub(/^\s+/, "").strip
83
+ long_desc %Q[
84
+ The change_job command changes the "current_job" but does not display anything. Use
85
+ the "display_job" command to display information about the job change_job
86
+ just made current. The change_job command can of course take a job id as
87
+ argument:
88
+ \x5 $ hodor oozie:change_job 0004729-150629212824266-oozie-oozi-C
89
+
90
+ However, other "special" arguments are also allowed:
91
+ \x5 $ hodor oozie:change_job .. # Change to parent of current job
92
+ $ hodor oozie:change_job 3 # Change to the child with index 3
93
+ $ hodor oozie:change_job / # Change to list of topmost coordinators
94
+ # same as oozie:coordinators
95
+
96
+ Suggested Alias:
97
+ \x5 $ alias cj='hodor oozie:change_job'
98
+ ].unindent(8)
99
+ def change_job(*job_path)
100
+ oozie.session.verbose = options[:verbose]
101
+ oozie.session.len = options[:len] if options[:len]
102
+ oozie.session.offset = options[:offset] if options[:offset]
103
+ oozie.change_job(job_path[0])
104
+ end
105
+
106
+ desc "display_job [JOB PATH] [attribute] [options]", %q{
107
+ Display information about the 'current' job within the Oozie hierarhcy of jobs
108
+ }.gsub(/^\s+/, "").strip
109
+ long_desc load_topic("display_job")
110
+ method_option :query, type: :boolean, aliases: "-q", default: false,
111
+ desc: "Only query the job for information, but do not change to it"
112
+ method_option :verbose, type: :boolean, aliases: "-v",
113
+ desc: "Display all available information"
114
+ method_option :killed, type: :boolean, aliases: "-k",
115
+ desc: "Only display killed coordinator materializations"
116
+ method_option :succeded, type: :boolean, aliases: "-s",
117
+ desc: "Only display succeeded coordinator materializations"
118
+ method_option :len, type: :numeric, aliases: "-l", default: nil,
119
+ desc: "number of matching workflows to display"
120
+ method_option :offset, type: :numeric, aliases: "-o", default: 0,
121
+ desc: "The coordinator to start with in listing matching workflows"
122
+ method_option :match, type: :array, aliases: "-m", default: nil,
123
+ desc: "Array of words to match in config properties keys and values"
124
+ method_option :write, type: :string, aliases: "-w", default: nil,
125
+ desc: "Name of file to write the output of this command into"
126
+ def display_job(*args)
127
+ oozie.session.len = options[:len] if options[:len]
128
+ oozie.session.offset = options[:offset] if options[:offset]
129
+ query_mode = options[:query] || env.prefs[:display_job_query_mode]
130
+ job_id = "."
131
+ aspect = "info"
132
+ args.each { |arg|
133
+ if arg =~ /^[0-9]{1,4}$/ || # Index form of job id
134
+ arg =~ /^[0-9]{5,8}\-[0-9]{10,18}\-oozie/ || # Oozie form
135
+ arg =~ /job_[0-9]{5,20}/ || # Hadoop mapred form
136
+ arg.eql?('..') || arg.eql?('/')
137
+ job_id = arg
138
+ else
139
+ aspect = arg
140
+ end
141
+ }
142
+
143
+ if aspect.eql?("info")
144
+ filter = []
145
+ filter << :killed if options[:killed]
146
+ filter << :succeeded if options[:succeeded]
147
+ job = oozie.job_by_path(job_id, !query_mode, filter)
148
+ table = ::Hodor::Table.new(job, options[:verbose], options[:match])
149
+ doc = table.to_s
150
+ else
151
+ job = oozie.job_by_path(job_id, !query_mode)
152
+ end
153
+
154
+ if aspect.eql?("props") || aspect.eql?("conf")
155
+ if options[:match]
156
+ doc = job.conf_map.match(options[:match]).awesome_inspect(plain: !options[:write].nil?)
157
+ else
158
+ doc = job.conf_map.awesome_inspect(plain: !options[:write].nil?)
159
+ end
160
+ elsif aspect.eql?("log")
161
+ doc = job.log
162
+ elsif aspect.eql?("rest") || aspect.eql?("request") || aspect.eql?("call")
163
+ say job.rest_call
164
+ elsif aspect.eql?("json")
165
+ json = job.json
166
+ doc = "REST CALL = #{::Hodor::Oozie::Session.instance.last_query}"
167
+ doc << ::JSON.pretty_generate(json)
168
+ elsif aspect.eql?("def") || aspect.eql?("definition")
169
+ doc = job.definition
170
+ else
171
+ end
172
+ local_filename = options[:write]
173
+ if !local_filename.nil?
174
+ File.open(local_filename, 'w') {|f| f.write(doc) }
175
+ else
176
+ say doc
177
+ end
178
+ end
179
+
180
+ desc "pwj", "Displays information about which job is 'current' within the hierarchy of Oozie jobs"
181
+ def pwj
182
+ say "Current Job ID: #{oozie.session.current_id || 'nil'}"
183
+ say "Parent Job ID: #{oozie.session.parent_id || 'nil'}"
184
+ say "Most Recent Job Query: #{oozie.session.root_query || 'nil'}"
185
+ end
186
+
187
+ desc "ssh_display_job [JOB_ID]", %q{
188
+ Legacy version of display_job that is based on SSH, rather than REST
189
+ }.gsub(/^\s+/, "").strip
190
+ method_option :definition, type: :boolean, aliases: "-d",
191
+ desc: "Display the definition of the specified job"
192
+ method_option :info, type: :boolean, aliases: "-i",
193
+ desc: "Display information about the specified job"
194
+ method_option :log, type: :boolean, aliases: "-l",
195
+ desc: "Display the log file for the specified job"
196
+ method_option :configcontent, type: :boolean, aliases: "-c",
197
+ desc: "Display the variable-expanded config for the specified job"
198
+ def ssh_display_job(job_id)
199
+ if job_id.start_with?('job_')
200
+ hadoop_id = job_id.sub('job_','')
201
+ trash = hadoop_id.index(/[^0-9_]/)
202
+ hadoop_id = hadoop_id[0..trash-1] if trash
203
+ env.ssh "mapred job -logs job_#{hadoop_id} attempt_#{hadoop_id}_m_000000_0",
204
+ echo: true, echo_cmd: true
205
+ else
206
+ job_id.sub!(/-W.*$/, '-W') unless job_id.include?('-W@')
207
+ if options[:definition]
208
+ logger.info "DEFINITION:"
209
+ env.ssh "oozie job -oozie :oozie_url -definition #{job_id}",
210
+ echo: true, echo_cmd: true
211
+ elsif options[:log]
212
+ logger.info "LOG:"
213
+ env.ssh "oozie job -oozie :oozie_url -log #{job_id}",
214
+ echo: true, echo_cmd: true
215
+ elsif options[:configcontent]
216
+ logger.info "CONFIG:"
217
+ env.ssh "oozie job -oozie :oozie_url -configcontent #{job_id}",
218
+ echo: true, echo_cmd: true
219
+ else
220
+ logger.info "INFO:"
221
+ env.ssh "oozie job -oozie :oozie_url -info #{job_id}",
222
+ echo: true, echo_cmd: true
223
+ end
224
+ end
225
+ end
226
+
227
+ desc "deploy_job", "Deploy to hdfs the directories that this job depends on"
228
+ method_option :dryrun, type: :boolean, aliases: "-d", default: false,
229
+ desc: "Don't actually deploy the files, just show what would be deployed"
230
+ method_option :clean, type: :boolean, aliases: "-c", default: false,
231
+ desc: "Clean the hdfs target before deploying this directory"
232
+ method_option :verbose, type: :boolean, aliases: "-v", default: false,
233
+ desc: "Log verbose details about which files are deployed and to where"
234
+ method_option :maxdepth, type: :string, aliases: "-m", default: nil,
235
+ desc: "The maximum number of directories deep to copy to HDFS"
236
+ long_desc %Q[
237
+ The deploy_job command reads the contents of the jobs.yml file located
238
+ in your current directory, and deploys the paths specified by in the
239
+ driver's "deploy" key. For a fuller explanation, view the "jobs.yml"
240
+ topic, as follows:
241
+ \x5 $ hodor oozie:topic jobs.yml
242
+ ].unindent(8)
243
+ def deploy_job(*driver)
244
+ oozie.deploy_job(driver.length > 0 ? driver[0] : nil, options[:clean])
245
+ end
246
+
247
+ desc "run_job", "Run an oozie job on the target hadoop cluster"
248
+ long_desc %Q[
249
+ The run_job command reads the contents of the jobs.yml file located
250
+ in your current directory, composes a job.properties file and submits
251
+ the indicated driver workflow for execution by Oozie. If a job.properties
252
+ or job.properties.erb file is provided, that file will be used to interpolate property values.
253
+ For a fuller explanation, view the "jobs.yml" topic, as follows:
254
+ \x5 $ hodor oozie:topic jobs.yml
255
+ ].unindent(8)
256
+ method_option :dry_run, type: :boolean, aliases: "-d", default: false,
257
+ desc: "Generate computed job.properties file without running or deploying associated job."
258
+ method_option :file_name_prefix, type: :string, aliases: '-p', default: '',
259
+ desc: 'Add a prefix to job properties filename. This is primarily for use with :dry_run'
260
+ def run_job(*args)
261
+ outfile = oozie.run_job(args.length > 0 ? args[0] : nil, options[:dry_run], options[:file_name_prefix])
262
+ logger.info "Dry run: the properties file is available for inspection at #{outfile}" if options[:dry_run]
263
+ end
264
+
265
+ desc "kill_job [JOB_ID]", "Kill the oozie job with the specified job id"
266
+ def kill_job(*job_path)
267
+ job = oozie.job_by_path(job_path[0])
268
+ env.ssh "oozie job -oozie :oozie_url -kill #{job.id}",
269
+ echo: true, echo_cmd: true
270
+ end
271
+
272
+ desc "reauth", "Remove cached auth tokens (sometimes necessary after an oozie restart)"
273
+ def reauth
274
+ ssh_command nil, "rm .oozie-auth-token"
275
+ end
276
+
277
+ desc "workflows", "List most recently run workflows, most recent first"
278
+ method_option :verbose, type: :boolean, aliases: "-v",
279
+ desc: "Display all available information"
280
+ method_option :running, type: :boolean, aliases: "-r",
281
+ desc: "Display running workflows"
282
+ method_option :killed, type: :boolean, aliases: "-k",
283
+ desc: "Display killed workflows"
284
+ method_option :succeeded, type: :boolean, aliases: "-s",
285
+ desc: "Display succeeded workflows"
286
+ method_option :failed, type: :boolean, aliases: "-f",
287
+ desc: "Display failed workflows"
288
+ method_option :timedout, type: :boolean, aliases: "-t",
289
+ desc: "Display timedout workflows"
290
+ method_option :suspended, type: :boolean, aliases: "-p",
291
+ desc: "Display suspended workflows"
292
+ method_option :len, type: :numeric, aliases: "-l", default: 30,
293
+ desc: "number of matching workflows to display"
294
+ method_option :offset, type: :numeric, aliases: "-o", default: 0,
295
+ desc: "The coordinator to start with in listing matching workflows"
296
+ method_option :match, type: :string, aliases: "-m",
297
+ desc: "Only display workflows that contain the given string as a substring"
298
+ long_desc %Q[
299
+ The workflows command uses its options to create a REST query for workflows
300
+ that match your specification, and presents the results formated as a table.
301
+
302
+ Examples:
303
+ \x5 $ hodor oozie:workflows # displays most recent workflows
304
+ $ hodor oozie:workflows -v # same as before, but verbose
305
+ $ hodor oozie:workflows -r # displays running workflows
306
+ $ hodor oozie:workflows -r -s -k # running, succeeded or killed
307
+ $ hodor oozie:workflows -l 30 -o 30 # display second 30 most recent
308
+ $ hodor oozie:workflows -m data_source # display only matching workflows
309
+ ].unindent(8)
310
+ def workflows
311
+ oozie.session.verbose = options[:verbose]
312
+ filter = {}
313
+ filter[:jobtype] = "wf"
314
+ filter[:status] = []
315
+ filter[:status] << :running if options[:running]
316
+ filter[:status] << :killed if options[:killed]
317
+ filter[:status] << :succeeded if options[:succeeded]
318
+ filter[:status] << :failed if options[:failed]
319
+ filter[:status] << :timedout if options[:timedout]
320
+ filter[:status] << :suspended if options[:suspended]
321
+ filter[:status] << :running_first if filter[:status].empty?
322
+ filter[:match] = options[:match] if options[:match]
323
+
324
+ oozie.session.len = options[:len] if options[:len]
325
+ oozie.session.offset = options[:offset] if options[:offset]
326
+
327
+ result = ::Hodor::Oozie::Query.new(filter)
328
+ table = ::Hodor::Table.new(result, options[:verbose])
329
+ oozie.session.make_current(result)
330
+ say table
331
+ end
332
+
333
+ desc "coordinators", "List most recently run coordinators, most recent first"
334
+ method_option :verbose, type: :boolean, aliases: "-v",
335
+ desc: "Display all available information"
336
+ method_option :running, type: :boolean, aliases: "-r",
337
+ desc: "Display running coordinators"
338
+ method_option :killed, type: :boolean, aliases: "-k",
339
+ desc: "Display killed coordinators"
340
+ method_option :succeeded, type: :boolean, aliases: "-s",
341
+ desc: "Display succeeded coordinators"
342
+ method_option :failed, type: :boolean, aliases: "-f",
343
+ desc: "Display failed coordinators"
344
+ method_option :timedout, type: :boolean, aliases: "-t",
345
+ desc: "Display timedout coordinators"
346
+ method_option :suspended, type: :boolean, aliases: "-p",
347
+ desc: "Display suspended coordinators"
348
+ method_option :len, type: :numeric, aliases: "-l", default: 30,
349
+ desc: "number of matching coordinators to display"
350
+ method_option :offset, type: :numeric, aliases: "-o", default: 0,
351
+ desc: "The coordinator to start with in listing matching coordinators"
352
+ method_option :match, type: :string, aliases: "-m",
353
+ desc: "Only display coordinators that contain the given string as a substring"
354
+ long_desc %Q[
355
+ The coordinators command uses its options to create a REST query for coordinators
356
+ that match your specification, and presents the results formated as a table.
357
+
358
+ Examples:
359
+ \x5 $ hodor oozie:coordinators # displays most recent coordinators
360
+ $ hodor oozie:coordinators -v # same as before, but verbose
361
+ $ hodor oozie:coordinators -r # displays running coordinators
362
+ $ hodor oozie:coordinators -r -s -k # running, succeeded or killed
363
+ $ hodor oozie:coordinators -l 30 -o 30 # display second 30 most recent
364
+ $ hodor oozie:coordinators -m data_source # display only matching coordinators
365
+ ].unindent(8)
366
+ def coordinators
367
+ oozie.session.verbose = options[:verbose]
368
+ filter = {}
369
+ filter[:jobtype] = "coord"
370
+ filter[:status] = []
371
+ filter[:status] << :running if options[:running]
372
+ filter[:status] << :killed if options[:killed]
373
+ filter[:status] << :succeeded if options[:succeeded]
374
+ filter[:status] << :failed if options[:failed]
375
+ filter[:status] << :timedout if options[:timedout]
376
+ filter[:status] << :suspended if options[:suspended]
377
+ filter[:status] << :running_first if filter[:status].empty?
378
+ filter[:match] = options[:match] if options[:match]
379
+
380
+ oozie.session.len = options[:len] if options[:len]
381
+ oozie.session.offset = options[:offset] if options[:offset]
382
+
383
+ result = ::Hodor::Oozie::Query.new(filter)
384
+ table = ::Hodor::Table.new(result, options[:verbose])
385
+ oozie.session.make_current(result)
386
+ say table
387
+ end
388
+
389
+ desc "bundles", "List most recently run bundles, most recent first"
390
+ method_option :len, type: :numeric, aliases: "-l", default: 2,
391
+ desc: "number of recent bundles to display"
392
+ def bundles
393
+ env.ssh "oozie:jobs -oozie :oozie_url -jobtype bundle -len #{options[:len]}",
394
+ echo: true, echo_cmd: true
395
+ end
396
+
397
+ end
398
+ end
399
+ end