hodor 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,80 @@
1
+ require 'yaml'
2
+ require 'erb'
3
+
4
+ module Hodor
5
+ class Configuration
6
+
7
+ def env
8
+ Environment.instance
9
+ end
10
+
11
+ def target
12
+ env.settings[:target]
13
+ end
14
+
15
+ def logger
16
+ env.logger
17
+ end
18
+
19
+ def initialize(yml_file)
20
+ @yml_file = yml_file
21
+ @kvp = {}
22
+ end
23
+
24
+ def load
25
+
26
+ @loaded = true
27
+
28
+ yml_expand(@target_cluster, [@clusters])
29
+ end
30
+
31
+ def yml_expand(val, parents)
32
+ if val.is_a? String
33
+ val.gsub(/\$\{.+?\}/) { |match|
34
+ cv = match.split(/\${|}/)
35
+ expr = cv[1]
36
+ ups = expr.split('^')
37
+ parent_index = parents.length - ups.length
38
+ parent = parents[parent_index]
39
+ parent_key = ups[-1]
40
+ parent_key = parent_key[1..-1] if parent_key.start_with?(':')
41
+ if parent.has_key?(parent_key)
42
+ parent[parent_key]
43
+ elsif parent.has_key?(parent_key.to_sym)
44
+ parent[parent_key.to_sym]
45
+ else
46
+ parent_key
47
+ end
48
+ }
49
+ elsif val.is_a? Hash
50
+ more_parents = parents << val
51
+ val.each_pair { |k, v|
52
+ exp_val = yml_expand(v, more_parents)
53
+ val[k] = exp_val
54
+ }
55
+ else
56
+ val
57
+ end
58
+ end
59
+
60
+ def yml_flatten(parent_key, val)
61
+ flat_vals = []
62
+ if val.is_a? Hash
63
+ val.each_pair { |k, v|
64
+ flat_vals += yml_flatten("#{parent_key}.#{k}", v)
65
+ }
66
+ else
67
+ parent_key = parent_key[1..-1] if parent_key.start_with?('.')
68
+ flat_vals = ["#{parent_key} = #{val}"]
69
+ end
70
+ flat_vals
71
+ end
72
+
73
+ def render_flattened
74
+ flat_vals = yml_flatten('', egress_to)
75
+ flat_vals.join("\n")
76
+ end
77
+
78
+ end
79
+ end
80
+
@@ -0,0 +1,437 @@
1
+ require 'singleton'
2
+
3
+ require 'yaml'
4
+ require 'erb'
5
+ require 'log4r'
6
+ require 'log4r/configurator'
7
+ require 'tmpdir'
8
+ require 'open4'
9
+ require 'socket'
10
+ require 'etc'
11
+
12
+ include Log4r
13
+
14
+ module Hodor
15
+ class Environment
16
+ include Singleton
17
+
18
+ attr_reader :logger
19
+ attr_accessor :options
20
+
21
+ def root
22
+ begin
23
+ @root = run_local "git rev-parse --show-toplevel", raise_on_error: true
24
+ rescue Hodor::Cli::AbnormalExitStatus => ex
25
+ puts "#{ex.message.strip}\nHodor must be run inside a Git working tree. Aborting..."
26
+ Kernel.exit
27
+ end if @root.nil?
28
+ @root
29
+ end
30
+
31
+ def logger_id
32
+ 'MainLogger'
33
+ end
34
+
35
+ def logger
36
+ begin
37
+ ::Configurator.load_xml_file(File.join(root, 'config', 'log4r_config.xml'))
38
+ @logger = Log4r::Logger[logger_id]
39
+ rescue => ex
40
+ puts "Error: #{ex.message}"
41
+ end if @logger.nil?
42
+ @logger
43
+ end
44
+
45
+ def erb_sub(erb_body)
46
+ ERB.new(erb_body).result(self.instance_eval { binding })
47
+ end
48
+
49
+ def erb_load(filename, suppress_erb=false)
50
+ if File.exists?(filename)
51
+ file_contents = File.read(filename)
52
+ sub_content = suppress_erb ? file_contents : erb_sub(file_contents)
53
+ sub_content
54
+ elsif !filename.start_with?(root)
55
+ erb_load(File.join(root, filename))
56
+ end
57
+ end
58
+
59
+ def yml_load(filename) #, suppress_erb=false)
60
+ YAML.load(erb_load(filename, false)) # suppress_erb))
61
+ end
62
+
63
+ def terse?
64
+ options[:terse]
65
+ end
66
+
67
+ def silent?
68
+ options[:silent]
69
+ end
70
+
71
+ def verbose?
72
+ options[:verbose]
73
+ end
74
+
75
+ def dryrun?
76
+ options[:dryrun]
77
+ end
78
+
79
+ def clean?
80
+ options[:clean]
81
+ end
82
+
83
+ def hadoop_env
84
+ ENV['HADOOP_ENV'] || 'sandbox'
85
+ end
86
+
87
+ def initialize
88
+ @options = {}
89
+ end
90
+
91
+ def load_settings
92
+ target_env = hadoop_env.to_sym
93
+ @clusters = yml_load('config/clusters.yml')
94
+
95
+ @target_cluster = @clusters[target_env]
96
+ if @target_cluster.nil?
97
+ raise "The target environment '#{target_env}' was not defined in the config/clusters.yml file. Aborting..."
98
+ end
99
+
100
+ if File.exist?('config/local.yml')
101
+ @target_cluster.merge! yml_load('config/local.yml')
102
+ end
103
+
104
+ @target_cluster[:target] = target_env
105
+
106
+ @loaded = true
107
+ end
108
+
109
+ def prefs
110
+ if @prefs.nil?
111
+ preffile = "#{Etc.getpwuid.dir}/.hodor.yml"
112
+ @prefs = yml_load(preffile) if File.exists?(preffile)
113
+ @prefs ||= {}
114
+ end
115
+ @prefs
116
+ end
117
+
118
+ def reset
119
+ @clusters = nil
120
+ @target_cluster = nil
121
+ @loaded = false
122
+ @jobs = nil
123
+ @run = nil
124
+ end
125
+
126
+ def path_on_github(path = nil)
127
+ if path
128
+ if path.start_with?('/')
129
+ abspath = true
130
+ lpath = path
131
+ else
132
+ relpath = true
133
+ end
134
+ end
135
+ lpath ||= FileUtils.pwd
136
+
137
+ lpath = lpath.sub(root, '')
138
+ git_path = relpath ? "#{lpath}/#{path}" : lpath
139
+ git_path = git_path.sub(/\/\//, '/').sub(/\/\.\//, '/').sub(/\/\.$/, '').sub(/\/$/, '')
140
+
141
+ if git_path.end_with?('..')
142
+ up_index = git_path.rindex(/[^\.]\//)
143
+ if up_index > 0
144
+ last_path = git_path[0..up_index]
145
+ up_path = git_path[up_index+2..-1]
146
+ up_paths = up_path.split('/')
147
+ abs_path = File.expand_path(File.join(up_paths), "#{root}/#{last_path}".sub(/\/\//, '/'))
148
+ git_path = path_on_github(abs_path)
149
+ end
150
+ end
151
+
152
+ git_path
153
+ end
154
+
155
+ def path_on_disc(path = nil)
156
+ relpath = path_on_github(path)
157
+ "#{root}/#{relpath}".sub(/\/\//, '/').sub(/\/$/, '')
158
+ end
159
+
160
+ def pwd(path = nil)
161
+ if path
162
+ if path.start_with?('/')
163
+ abspath = true
164
+ lpwd = path
165
+ else
166
+ relpath = true
167
+ end
168
+ end
169
+ lpwd ||= FileUtils.pwd
170
+ rpwd = lpwd.sub(root, '')
171
+ if rpwd.length < lpwd.length
172
+ lpwd = rpwd[1..-1] if rpwd.start_with?('/')
173
+ else
174
+ lpwd = rpwd
175
+ end
176
+ relpath ? "#{lpwd}/#{path}" : lpwd
177
+ end
178
+
179
+ def abs_path(path)
180
+ "#{root}/#{path}"
181
+ end
182
+
183
+ def paths_from_root(end_path)
184
+ # returns an array of paths from the root of the repo
185
+ paths = []
186
+ curpath = end_path
187
+ loop do
188
+ paths << curpath
189
+ break if curpath == root || curpath.length < root.length || curpath.length == 0
190
+ curpath = File.dirname(curpath)
191
+ end
192
+ paths.reverse
193
+ end
194
+
195
+ def target_cluster
196
+ load_settings if !@loaded || !@target_cluster
197
+ raise "No settings for target cluster '#{hadoop_env}' were loaded" if !@loaded || !@target_cluster
198
+ @target_cluster
199
+ end
200
+
201
+ def [](key)
202
+ target_cluster[key]
203
+ end
204
+
205
+ def env
206
+ target_cluster
207
+ end
208
+
209
+ def select_job(job)
210
+ @job = job
211
+ end
212
+
213
+ def job
214
+ @job || {}
215
+ end
216
+
217
+ def has_key? key
218
+ target_cluster.has_key? key
219
+ end
220
+
221
+ def settings
222
+ target_cluster
223
+ end
224
+
225
+ def ssh_user
226
+ env[:ssh_user]
227
+ end
228
+
229
+ def hostname
230
+ Socket.gethostname
231
+ end
232
+
233
+ def username
234
+ Etc.getpwuid(Process.uid).name
235
+ end
236
+
237
+ # Compute SSH command (user, machine and port part)
238
+ def ssh_addr
239
+ va = "#{ssh_user}@#{settings[:ssh_host]}"
240
+ va << " -p #{settings[:ssh_port] || 22}"
241
+ end
242
+
243
+ # Run an ssh command, performing any optional variable expansion
244
+ # on the command line that might be necessary.
245
+ #
246
+ # The following variable expansions are supported:
247
+ # env.ssh %Q[ssh ${ssh_addr} ...] # calls "ssh_addr" function
248
+ # env.ssh %Q[ssh ${env[:ssh_user]} ...] # retrieves value from hash
249
+ # env.ssh %Q[ssh :ssh_user ...] # retrieves value from hash
250
+ # env.ssh %Q[ssh #{env.ssh_addr} ...] # skip variable expansion.
251
+ # Use normal string interpolation instead
252
+ def kvp_expand(script)
253
+ script.gsub!(/:[^\s]+|\$\{.+?\}/) { |match|
254
+ begin
255
+ if match.start_with?(':')
256
+ k = match[1..-1].to_sym
257
+ if settings.has_key?(k)
258
+ val = settings[k]
259
+ else
260
+ val = match
261
+ end
262
+ else
263
+ cv = match.split(/\{|\}/)
264
+ cv = cv[1].split(/\[|\]/)
265
+ fn = cv[0].to_sym
266
+ if self.respond_to?(fn)
267
+ rtn = self.send(fn)
268
+ if cv.size == 1
269
+ val = rtn
270
+ else
271
+ k = cv[1]
272
+ k = k[1..-1].to_sym if k.start_with?(':')
273
+ val = rtn[k]
274
+ end
275
+ val
276
+ else
277
+ match
278
+ end
279
+ end
280
+ rescue StandardError
281
+ match
282
+ end
283
+ }
284
+ script
285
+ end
286
+
287
+ # user_args
288
+ # strip off the "-u <username>" argument, which hadoop commands don't understand.
289
+ # The username has to be set using an environment variable instead. This is a
290
+ # convience method to facilitate this swapping around that is necessary in several
291
+ # hadoop commands (fs, oozie etc.).
292
+ def extract_sudoer(trailing)
293
+ username_next = false
294
+ username = nil # nil assignment avoids "unused variable" warning
295
+ args = []
296
+ trailing.each { |arg|
297
+ if arg.eql?("-u")
298
+ username_next = arg.eql?("-u")
299
+ elsif username_next
300
+ username = arg
301
+ username_next = false
302
+ else
303
+ args << arg
304
+ end
305
+ }
306
+
307
+ return [username, args]
308
+ end
309
+
310
+ def ssh script, opts = {}
311
+ opts[:ssh] = true
312
+ run_local script, opts
313
+ end
314
+
315
+ def deploy_tmp_file local_file, opts = {}
316
+ deploy_path = "/tmp/#{File.basename(local_file, ".*")}-#{username}-#{hostname}#{File.extname(local_file)}"
317
+ run_local %Q[scp #{local_file} #{settings[:ssh_user]}@#{settings[:ssh_host]}:#{deploy_path}],
318
+ echo: true, echo_cmd: true
319
+ deploy_path
320
+ end
321
+
322
+
323
+ # Alternative to system() that (optionally) echos STDOUT as it is
324
+ # appended, rather than after the command completes.
325
+ #
326
+ # command_line - the shell command and arguments to execute
327
+ # --terse => if --terse appears on the command line, only
328
+ # the native output of the command is printed.
329
+ # I.e. the extra output of log4r is suppressed.
330
+ # opts - options to the function, that include:
331
+ # [:echo] => true - append stdout and stderr as it is generated
332
+ # => false - execute the command silently
333
+ # [:echo_cmd] => true - log the command to be executed
334
+ # => false - remain silent
335
+ # [:raise_on_error] => true - failed commands raise an exception
336
+ # => false - remain silent
337
+ # [:suppress_expansion]
338
+ # => true - don't expand key-value pairs in the command line
339
+ # => false - expand key-value pairs
340
+ # [:sudo]
341
+ # => true - invoke with sudo, extracting username from -u argument
342
+ # => false - run without sudo
343
+ # [:ssh]
344
+ # => true - prefix the command with ssh to run remotely
345
+ # => false - don't prefix command line with ssh
346
+ #
347
+ # Returns stdout/stderr as a string
348
+ def run_local command_line, opts = {}
349
+ if opts[:sudo]
350
+ username, args = extract_sudoer(command_line)
351
+ command_line = "sudo -u #{username} #{args}" if username
352
+ end
353
+
354
+ if opts[:ssh]
355
+ ssh_prefix = "ssh #{settings[:ssh_user]}@#{settings[:ssh_host]} "
356
+ ssh_prefix << "-p #{settings[:ssh_port]} -T " unless settings[:ssh_port].nil?
357
+ command_line = ssh_prefix + command_line
358
+ end
359
+
360
+ command_line = kvp_expand(command_line) unless opts[:suppress_expansion]
361
+ native_output_only = command_line.include?('--terse')
362
+ if native_output_only
363
+ command_line.sub!(' --terse', '')
364
+ opts[:echo] = true
365
+ opts[:echo_cmd] = false
366
+ end
367
+ echo_command_output = opts[:echo] || false
368
+ command_line = "#{command_line}"
369
+ logger.sshcmd "$ #{command_line}" if opts[:echo_cmd]
370
+ command_output = ""
371
+ status = Open4::popen4(command_line) do |pid, stdin, stdout, stderr|
372
+ command_output = capture_output(stdout, stderr, echo_command_output, native_output_only)
373
+ end
374
+ if status.exitstatus != 0
375
+ raise Hodor::Cli::AbnormalExitStatus.new(status.exitstatus, command_output) if opts[:raise_on_error]
376
+ end
377
+ command_output.strip
378
+ rescue Hodor::Cli::AbnormalExitStatus
379
+ raise
380
+ rescue Errno::ENOENT
381
+ raise Hodor::Cli::CommandNotFound, "Bash Error. Command or file arguments not found." if opts[:raise_on_error]
382
+ end
383
+
384
+ private
385
+
386
+ def capture_output stdout, stderr, echo_command_output, native_output_only
387
+ stdout_lines = ""
388
+ stderr_lines = ""
389
+ command_output = ""
390
+ loop do
391
+ begin
392
+ # check whether stdout, stderr or both are
393
+ # ready to be read from without blocking
394
+ IO.select([stdout,stderr]).flatten.compact.each { |io|
395
+ # stdout, if ready, goes to stdout_lines
396
+ stdout_lines += io.readpartial(1024) if io.fileno == stdout.fileno
397
+ # stderr, if ready, goes to stdout_lines
398
+ stderr_lines += io.readpartial(1024) if io.fileno == stderr.fileno
399
+ }
400
+ break if stdout.closed? && stderr.closed?
401
+ rescue EOFError
402
+ # Note, readpartial triggers the EOFError too soon. Continue to flush the
403
+ # pending io (via readpartial) until we have received all characters
404
+ # out from the IO socket.
405
+ break if stdout_lines.length == 0 && stderr_lines.length == 0
406
+ ensure
407
+ # if we acumulated any complete lines (\n-terminated)
408
+ # in either stdout/err_lines, output them now
409
+ stdout_lines.sub!(/.*\n/) {
410
+ command_output << $&
411
+ if echo_command_output
412
+ if native_output_only
413
+ puts $&.strip
414
+ else
415
+ logger.stdout $&.strip
416
+ end
417
+ end
418
+ ''
419
+ }
420
+ stderr_lines.sub!(/.*\n/) {
421
+ command_output << $&
422
+ if echo_command_output
423
+ if native_output_only
424
+ puts $&.strip
425
+ else
426
+ logger.stderr $&.strip
427
+ end
428
+ end
429
+ ''
430
+ }
431
+ end
432
+ end
433
+ command_output
434
+ end
435
+
436
+ end
437
+ end