hodor 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,80 @@
1
+ require 'yaml'
2
+ require 'erb'
3
+
4
+ module Hodor
5
+ class Configuration
6
+
7
+ def env
8
+ Environment.instance
9
+ end
10
+
11
+ def target
12
+ env.settings[:target]
13
+ end
14
+
15
+ def logger
16
+ env.logger
17
+ end
18
+
19
+ def initialize(yml_file)
20
+ @yml_file = yml_file
21
+ @kvp = {}
22
+ end
23
+
24
+ def load
25
+
26
+ @loaded = true
27
+
28
+ yml_expand(@target_cluster, [@clusters])
29
+ end
30
+
31
+ def yml_expand(val, parents)
32
+ if val.is_a? String
33
+ val.gsub(/\$\{.+?\}/) { |match|
34
+ cv = match.split(/\${|}/)
35
+ expr = cv[1]
36
+ ups = expr.split('^')
37
+ parent_index = parents.length - ups.length
38
+ parent = parents[parent_index]
39
+ parent_key = ups[-1]
40
+ parent_key = parent_key[1..-1] if parent_key.start_with?(':')
41
+ if parent.has_key?(parent_key)
42
+ parent[parent_key]
43
+ elsif parent.has_key?(parent_key.to_sym)
44
+ parent[parent_key.to_sym]
45
+ else
46
+ parent_key
47
+ end
48
+ }
49
+ elsif val.is_a? Hash
50
+ more_parents = parents << val
51
+ val.each_pair { |k, v|
52
+ exp_val = yml_expand(v, more_parents)
53
+ val[k] = exp_val
54
+ }
55
+ else
56
+ val
57
+ end
58
+ end
59
+
60
+ def yml_flatten(parent_key, val)
61
+ flat_vals = []
62
+ if val.is_a? Hash
63
+ val.each_pair { |k, v|
64
+ flat_vals += yml_flatten("#{parent_key}.#{k}", v)
65
+ }
66
+ else
67
+ parent_key = parent_key[1..-1] if parent_key.start_with?('.')
68
+ flat_vals = ["#{parent_key} = #{val}"]
69
+ end
70
+ flat_vals
71
+ end
72
+
73
+ def render_flattened
74
+ flat_vals = yml_flatten('', egress_to)
75
+ flat_vals.join("\n")
76
+ end
77
+
78
+ end
79
+ end
80
+
@@ -0,0 +1,437 @@
1
+ require 'singleton'
2
+
3
+ require 'yaml'
4
+ require 'erb'
5
+ require 'log4r'
6
+ require 'log4r/configurator'
7
+ require 'tmpdir'
8
+ require 'open4'
9
+ require 'socket'
10
+ require 'etc'
11
+
12
+ include Log4r
13
+
14
+ module Hodor
15
+ class Environment
16
+ include Singleton
17
+
18
+ attr_reader :logger
19
+ attr_accessor :options
20
+
21
+ def root
22
+ begin
23
+ @root = run_local "git rev-parse --show-toplevel", raise_on_error: true
24
+ rescue Hodor::Cli::AbnormalExitStatus => ex
25
+ puts "#{ex.message.strip}\nHodor must be run inside a Git working tree. Aborting..."
26
+ Kernel.exit
27
+ end if @root.nil?
28
+ @root
29
+ end
30
+
31
+ def logger_id
32
+ 'MainLogger'
33
+ end
34
+
35
+ def logger
36
+ begin
37
+ ::Configurator.load_xml_file(File.join(root, 'config', 'log4r_config.xml'))
38
+ @logger = Log4r::Logger[logger_id]
39
+ rescue => ex
40
+ puts "Error: #{ex.message}"
41
+ end if @logger.nil?
42
+ @logger
43
+ end
44
+
45
+ def erb_sub(erb_body)
46
+ ERB.new(erb_body).result(self.instance_eval { binding })
47
+ end
48
+
49
+ def erb_load(filename, suppress_erb=false)
50
+ if File.exists?(filename)
51
+ file_contents = File.read(filename)
52
+ sub_content = suppress_erb ? file_contents : erb_sub(file_contents)
53
+ sub_content
54
+ elsif !filename.start_with?(root)
55
+ erb_load(File.join(root, filename))
56
+ end
57
+ end
58
+
59
+ def yml_load(filename) #, suppress_erb=false)
60
+ YAML.load(erb_load(filename, false)) # suppress_erb))
61
+ end
62
+
63
+ def terse?
64
+ options[:terse]
65
+ end
66
+
67
+ def silent?
68
+ options[:silent]
69
+ end
70
+
71
+ def verbose?
72
+ options[:verbose]
73
+ end
74
+
75
+ def dryrun?
76
+ options[:dryrun]
77
+ end
78
+
79
+ def clean?
80
+ options[:clean]
81
+ end
82
+
83
+ def hadoop_env
84
+ ENV['HADOOP_ENV'] || 'sandbox'
85
+ end
86
+
87
+ def initialize
88
+ @options = {}
89
+ end
90
+
91
+ def load_settings
92
+ target_env = hadoop_env.to_sym
93
+ @clusters = yml_load('config/clusters.yml')
94
+
95
+ @target_cluster = @clusters[target_env]
96
+ if @target_cluster.nil?
97
+ raise "The target environment '#{target_env}' was not defined in the config/clusters.yml file. Aborting..."
98
+ end
99
+
100
+ if File.exist?('config/local.yml')
101
+ @target_cluster.merge! yml_load('config/local.yml')
102
+ end
103
+
104
+ @target_cluster[:target] = target_env
105
+
106
+ @loaded = true
107
+ end
108
+
109
+ def prefs
110
+ if @prefs.nil?
111
+ preffile = "#{Etc.getpwuid.dir}/.hodor.yml"
112
+ @prefs = yml_load(preffile) if File.exists?(preffile)
113
+ @prefs ||= {}
114
+ end
115
+ @prefs
116
+ end
117
+
118
+ def reset
119
+ @clusters = nil
120
+ @target_cluster = nil
121
+ @loaded = false
122
+ @jobs = nil
123
+ @run = nil
124
+ end
125
+
126
+ def path_on_github(path = nil)
127
+ if path
128
+ if path.start_with?('/')
129
+ abspath = true
130
+ lpath = path
131
+ else
132
+ relpath = true
133
+ end
134
+ end
135
+ lpath ||= FileUtils.pwd
136
+
137
+ lpath = lpath.sub(root, '')
138
+ git_path = relpath ? "#{lpath}/#{path}" : lpath
139
+ git_path = git_path.sub(/\/\//, '/').sub(/\/\.\//, '/').sub(/\/\.$/, '').sub(/\/$/, '')
140
+
141
+ if git_path.end_with?('..')
142
+ up_index = git_path.rindex(/[^\.]\//)
143
+ if up_index > 0
144
+ last_path = git_path[0..up_index]
145
+ up_path = git_path[up_index+2..-1]
146
+ up_paths = up_path.split('/')
147
+ abs_path = File.expand_path(File.join(up_paths), "#{root}/#{last_path}".sub(/\/\//, '/'))
148
+ git_path = path_on_github(abs_path)
149
+ end
150
+ end
151
+
152
+ git_path
153
+ end
154
+
155
+ def path_on_disc(path = nil)
156
+ relpath = path_on_github(path)
157
+ "#{root}/#{relpath}".sub(/\/\//, '/').sub(/\/$/, '')
158
+ end
159
+
160
+ def pwd(path = nil)
161
+ if path
162
+ if path.start_with?('/')
163
+ abspath = true
164
+ lpwd = path
165
+ else
166
+ relpath = true
167
+ end
168
+ end
169
+ lpwd ||= FileUtils.pwd
170
+ rpwd = lpwd.sub(root, '')
171
+ if rpwd.length < lpwd.length
172
+ lpwd = rpwd[1..-1] if rpwd.start_with?('/')
173
+ else
174
+ lpwd = rpwd
175
+ end
176
+ relpath ? "#{lpwd}/#{path}" : lpwd
177
+ end
178
+
179
+ def abs_path(path)
180
+ "#{root}/#{path}"
181
+ end
182
+
183
+ def paths_from_root(end_path)
184
+ # returns an array of paths from the root of the repo
185
+ paths = []
186
+ curpath = end_path
187
+ loop do
188
+ paths << curpath
189
+ break if curpath == root || curpath.length < root.length || curpath.length == 0
190
+ curpath = File.dirname(curpath)
191
+ end
192
+ paths.reverse
193
+ end
194
+
195
+ def target_cluster
196
+ load_settings if !@loaded || !@target_cluster
197
+ raise "No settings for target cluster '#{hadoop_env}' were loaded" if !@loaded || !@target_cluster
198
+ @target_cluster
199
+ end
200
+
201
+ def [](key)
202
+ target_cluster[key]
203
+ end
204
+
205
+ def env
206
+ target_cluster
207
+ end
208
+
209
+ def select_job(job)
210
+ @job = job
211
+ end
212
+
213
+ def job
214
+ @job || {}
215
+ end
216
+
217
+ def has_key? key
218
+ target_cluster.has_key? key
219
+ end
220
+
221
+ def settings
222
+ target_cluster
223
+ end
224
+
225
+ def ssh_user
226
+ env[:ssh_user]
227
+ end
228
+
229
+ def hostname
230
+ Socket.gethostname
231
+ end
232
+
233
+ def username
234
+ Etc.getpwuid(Process.uid).name
235
+ end
236
+
237
+ # Compute SSH command (user, machine and port part)
238
+ def ssh_addr
239
+ va = "#{ssh_user}@#{settings[:ssh_host]}"
240
+ va << " -p #{settings[:ssh_port] || 22}"
241
+ end
242
+
243
+ # Run an ssh command, performing any optional variable expansion
244
+ # on the command line that might be necessary.
245
+ #
246
+ # The following variable expansions are supported:
247
+ # env.ssh %Q[ssh ${ssh_addr} ...] # calls "ssh_addr" function
248
+ # env.ssh %Q[ssh ${env[:ssh_user]} ...] # retrieves value from hash
249
+ # env.ssh %Q[ssh :ssh_user ...] # retrieves value from hash
250
+ # env.ssh %Q[ssh #{env.ssh_addr} ...] # skip variable expansion.
251
+ # Use normal string interpolation instead
252
+ def kvp_expand(script)
253
+ script.gsub!(/:[^\s]+|\$\{.+?\}/) { |match|
254
+ begin
255
+ if match.start_with?(':')
256
+ k = match[1..-1].to_sym
257
+ if settings.has_key?(k)
258
+ val = settings[k]
259
+ else
260
+ val = match
261
+ end
262
+ else
263
+ cv = match.split(/\{|\}/)
264
+ cv = cv[1].split(/\[|\]/)
265
+ fn = cv[0].to_sym
266
+ if self.respond_to?(fn)
267
+ rtn = self.send(fn)
268
+ if cv.size == 1
269
+ val = rtn
270
+ else
271
+ k = cv[1]
272
+ k = k[1..-1].to_sym if k.start_with?(':')
273
+ val = rtn[k]
274
+ end
275
+ val
276
+ else
277
+ match
278
+ end
279
+ end
280
+ rescue StandardError
281
+ match
282
+ end
283
+ }
284
+ script
285
+ end
286
+
287
+ # user_args
288
+ # strip off the "-u <username>" argument, which hadoop commands don't understand.
289
+ # The username has to be set using an environment variable instead. This is a
290
+ # convience method to facilitate this swapping around that is necessary in several
291
+ # hadoop commands (fs, oozie etc.).
292
+ def extract_sudoer(trailing)
293
+ username_next = false
294
+ username = nil # nil assignment avoids "unused variable" warning
295
+ args = []
296
+ trailing.each { |arg|
297
+ if arg.eql?("-u")
298
+ username_next = arg.eql?("-u")
299
+ elsif username_next
300
+ username = arg
301
+ username_next = false
302
+ else
303
+ args << arg
304
+ end
305
+ }
306
+
307
+ return [username, args]
308
+ end
309
+
310
+ def ssh script, opts = {}
311
+ opts[:ssh] = true
312
+ run_local script, opts
313
+ end
314
+
315
+ def deploy_tmp_file local_file, opts = {}
316
+ deploy_path = "/tmp/#{File.basename(local_file, ".*")}-#{username}-#{hostname}#{File.extname(local_file)}"
317
+ run_local %Q[scp #{local_file} #{settings[:ssh_user]}@#{settings[:ssh_host]}:#{deploy_path}],
318
+ echo: true, echo_cmd: true
319
+ deploy_path
320
+ end
321
+
322
+
323
+ # Alternative to system() that (optionally) echos STDOUT as it is
324
+ # appended, rather than after the command completes.
325
+ #
326
+ # command_line - the shell command and arguments to execute
327
+ # --terse => if --terse appears on the command line, only
328
+ # the native output of the command is printed.
329
+ # I.e. the extra output of log4r is suppressed.
330
+ # opts - options to the function, that include:
331
+ # [:echo] => true - append stdout and stderr as it is generated
332
+ # => false - execute the command silently
333
+ # [:echo_cmd] => true - log the command to be executed
334
+ # => false - remain silent
335
+ # [:raise_on_error] => true - failed commands raise an exception
336
+ # => false - remain silent
337
+ # [:suppress_expansion]
338
+ # => true - don't expand key-value pairs in the command line
339
+ # => false - expand key-value pairs
340
+ # [:sudo]
341
+ # => true - invoke with sudo, extracting username from -u argument
342
+ # => false - run without sudo
343
+ # [:ssh]
344
+ # => true - prefix the command with ssh to run remotely
345
+ # => false - don't prefix command line with ssh
346
+ #
347
+ # Returns stdout/stderr as a string
348
+ def run_local command_line, opts = {}
349
+ if opts[:sudo]
350
+ username, args = extract_sudoer(command_line)
351
+ command_line = "sudo -u #{username} #{args}" if username
352
+ end
353
+
354
+ if opts[:ssh]
355
+ ssh_prefix = "ssh #{settings[:ssh_user]}@#{settings[:ssh_host]} "
356
+ ssh_prefix << "-p #{settings[:ssh_port]} -T " unless settings[:ssh_port].nil?
357
+ command_line = ssh_prefix + command_line
358
+ end
359
+
360
+ command_line = kvp_expand(command_line) unless opts[:suppress_expansion]
361
+ native_output_only = command_line.include?('--terse')
362
+ if native_output_only
363
+ command_line.sub!(' --terse', '')
364
+ opts[:echo] = true
365
+ opts[:echo_cmd] = false
366
+ end
367
+ echo_command_output = opts[:echo] || false
368
+ command_line = "#{command_line}"
369
+ logger.sshcmd "$ #{command_line}" if opts[:echo_cmd]
370
+ command_output = ""
371
+ status = Open4::popen4(command_line) do |pid, stdin, stdout, stderr|
372
+ command_output = capture_output(stdout, stderr, echo_command_output, native_output_only)
373
+ end
374
+ if status.exitstatus != 0
375
+ raise Hodor::Cli::AbnormalExitStatus.new(status.exitstatus, command_output) if opts[:raise_on_error]
376
+ end
377
+ command_output.strip
378
+ rescue Hodor::Cli::AbnormalExitStatus
379
+ raise
380
+ rescue Errno::ENOENT
381
+ raise Hodor::Cli::CommandNotFound, "Bash Error. Command or file arguments not found." if opts[:raise_on_error]
382
+ end
383
+
384
+ private
385
+
386
+ def capture_output stdout, stderr, echo_command_output, native_output_only
387
+ stdout_lines = ""
388
+ stderr_lines = ""
389
+ command_output = ""
390
+ loop do
391
+ begin
392
+ # check whether stdout, stderr or both are
393
+ # ready to be read from without blocking
394
+ IO.select([stdout,stderr]).flatten.compact.each { |io|
395
+ # stdout, if ready, goes to stdout_lines
396
+ stdout_lines += io.readpartial(1024) if io.fileno == stdout.fileno
397
+ # stderr, if ready, goes to stdout_lines
398
+ stderr_lines += io.readpartial(1024) if io.fileno == stderr.fileno
399
+ }
400
+ break if stdout.closed? && stderr.closed?
401
+ rescue EOFError
402
+ # Note, readpartial triggers the EOFError too soon. Continue to flush the
403
+ # pending io (via readpartial) until we have received all characters
404
+ # out from the IO socket.
405
+ break if stdout_lines.length == 0 && stderr_lines.length == 0
406
+ ensure
407
+ # if we acumulated any complete lines (\n-terminated)
408
+ # in either stdout/err_lines, output them now
409
+ stdout_lines.sub!(/.*\n/) {
410
+ command_output << $&
411
+ if echo_command_output
412
+ if native_output_only
413
+ puts $&.strip
414
+ else
415
+ logger.stdout $&.strip
416
+ end
417
+ end
418
+ ''
419
+ }
420
+ stderr_lines.sub!(/.*\n/) {
421
+ command_output << $&
422
+ if echo_command_output
423
+ if native_output_only
424
+ puts $&.strip
425
+ else
426
+ logger.stderr $&.strip
427
+ end
428
+ end
429
+ ''
430
+ }
431
+ end
432
+ end
433
+ command_output
434
+ end
435
+
436
+ end
437
+ end