hodor 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,170 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'singleton'
5
+
6
+ module Hodor::Oozie
7
+ class Session
8
+ include Singleton
9
+
10
+ attr_accessor :mode, :verbose, :filter, :len, :offset
11
+ attr_reader :last_query
12
+
13
+ def env
14
+ Hodor::Environment.instance
15
+ end
16
+
17
+ def logger
18
+ env.logger
19
+ end
20
+
21
+ def initialize
22
+ @len = env.prefs[:default_list_length] || 30
23
+ @offset = 0
24
+ end
25
+
26
+ def rest_call(api)
27
+ num_retries = 0
28
+ begin
29
+ url = "#{env[:oozie_url]}#{api}".gsub(/oozie\/\//,'oozie/')
30
+ @last_query = url
31
+ #puts "REST CALL: #{url}"
32
+ uri = URI.parse(url)
33
+
34
+ http = Net::HTTP.new(uri.host, uri.port)
35
+
36
+ http.read_timeout = 10
37
+ http.open_timeout = 10
38
+
39
+ data = http.start() {|http|
40
+ http.get(uri.request_uri).body
41
+ }
42
+ rescue Net::OpenTimeout => ex
43
+ logger.error "Network connection timed out! Make sure you are connected to the Internet or VPN. Retrying..."
44
+ if num_retries <= 4
45
+ num_retries += 1
46
+ retry
47
+ else
48
+ nil
49
+ end
50
+ end
51
+ end
52
+
53
+ def search_jobs(*args)
54
+ json = rest_call("/v2/jobs?#{args.map { |v| v.nil? || v.size == 0 ? nil : v }.compact.join('&')}")
55
+ @root_query = @last_query
56
+ json
57
+ end
58
+
59
+ def get_job_state(job_id, *args)
60
+ rest_call("/v1/job/#{job_id}?#{args.map { |v| v.nil? || v.size == 0 ? nil : v }.compact.join('&')}")
61
+ end
62
+
63
+ def refresh_index(children, current_id, parent_id)
64
+ if children
65
+ children.each_with_index { |c, i|
66
+ c.set_index(i)
67
+ }
68
+ child_ids = children.map { |c| c.skip_to || c.id }
69
+ else
70
+ child_ids = nil
71
+ end
72
+ index_overwrite = { children: child_ids,
73
+ current_id: current_id,
74
+ parent_id: parent_id,
75
+ root_query: @root_query }
76
+ File.open(cache_file, 'wb') {|f| f.write(::Marshal.dump(index_overwrite)) }
77
+ children
78
+ end
79
+
80
+ def index
81
+ if @index.nil?
82
+ @index = load_index
83
+ end
84
+ @index
85
+ end
86
+
87
+ def child_id(child_index)
88
+ children = index[:children]
89
+ if children
90
+ index_size = index[:children].length
91
+ if child_index < index_size
92
+ cid = index[:children][child_index]
93
+ cid
94
+ else
95
+ raise "No child with index '#{child_index}' was found"
96
+ end
97
+ end
98
+ end
99
+
100
+ def current_id
101
+ index[:current_id]
102
+ end
103
+
104
+ def parent_id
105
+ index[:parent_id]
106
+ end
107
+
108
+ def root_query
109
+ @root_query || index[:root_query]
110
+ end
111
+
112
+ def cache_file
113
+ if @cache_file.nil?
114
+ if env[:display_job_query_mode]
115
+ default_id = 'default'
116
+ else
117
+ default_id = `ps -p #{Process.pid} -o ppid=`.strip
118
+ end
119
+ index_id = ENV['HODOR_INDEX_ID'] || default_id
120
+ @cache_file = "/tmp/hodor-#{index_id}.index"
121
+ end
122
+ @cache_file
123
+ end
124
+
125
+ def load_index
126
+ index_read = {}
127
+ if File.exists? cache_file
128
+ File.open(cache_file, 'rb') {|f| index_read = ::Marshal.load(f) }
129
+ @root_query ||= index_read[:root_query] if index_read.has_key?(:root_query)
130
+ else
131
+ index_read = { children: nil,
132
+ current_id: nil,
133
+ parent_id: nil,
134
+ root_query: nil }
135
+ end
136
+ index_read || { children: nil,
137
+ current_id: nil,
138
+ parent_id: nil,
139
+ root_query: nil }
140
+ rescue => ex
141
+ raise "Failed to load Hodor cache file. #{ex.message}"
142
+ end
143
+
144
+ def pwj
145
+ { current_id: session.current_id,
146
+ parent_id: session.parent_id,
147
+ root_query: session.root_query }
148
+ end
149
+
150
+ def job_relative(movement, request = nil)
151
+ case movement
152
+ when :root;
153
+ nil
154
+ when :up;
155
+ parent_id
156
+ when :down;
157
+ child_id(request.to_i)
158
+ when :none;
159
+ current_id
160
+ when :jump;
161
+ request
162
+ end
163
+ end
164
+
165
+ def make_current(job)
166
+ refresh_index(job.children, job.id, job.parent_id) if job
167
+ job
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,58 @@
1
+ require_relative 'job'
2
+
3
+ module Hodor::Oozie
4
+
5
+ class Workflow < Job
6
+
7
+ attr_reader :json, :app_path, :acl, :status, :created_at, :conf, :last_mod_time, :run,
8
+ :end_time, :external_id, :name, :app_name, :id, :start_time, :materialization_id, :parent_id,
9
+ :materialization, :to_string, :group, :console_url, :user
10
+
11
+ class << self
12
+ def default_columns
13
+ [:index, :id, :status, :created_at, :last_mod_time, :app_name]
14
+ end
15
+ end
16
+
17
+ def initialize(json)
18
+ super()
19
+ @json = json
20
+
21
+ @app_path = json["appPath"]
22
+ @acl = json["acl"]
23
+ @status = json["status"]
24
+ @created_at = parse_time json["createdTime"]
25
+ @conf = json["conf"]
26
+ @last_mod_time = parse_time json["lastModTime"]
27
+ @run = json["run"]
28
+ @end_time = parse_time json["endTime"]
29
+ @external_id = json["externalId"]
30
+ @name = @app_name = json["appName"]
31
+ @id = json["id"]
32
+ @start_time = parse_time json["startTime"]
33
+ @materialization_id = json["parentId"]
34
+ ati = @materializeation_id.nil? ? nil : @materialization_id.index('@')
35
+ if ati && ati > 0
36
+ @parent_id = @materialization_id[0..ati-1]
37
+ else
38
+ @parent_id = @materialization_id
39
+ @materialization = nil
40
+ end
41
+
42
+ @to_string = json["toString"]
43
+ @group = json["group"]
44
+ @console_url = json["consoleUrl"]
45
+ @user = json["user"]
46
+ end
47
+
48
+ def expand
49
+ # expand immediate children
50
+ @actions = json["actions"].map do |item|
51
+ require_relative 'action'
52
+ Hodor::Oozie::Action.new(item)
53
+ end.compact
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,146 @@
1
+ require "thor/runner"
2
+ require_relative 'environment'
3
+
4
+ module Hodor
5
+
6
+ module Cli
7
+ class Usage < StandardError
8
+ end
9
+
10
+ class CommandNotFound < StandardError
11
+ end
12
+
13
+ class AbnormalExitStatus < StandardError
14
+ attr_reader :exit_status
15
+ def initialize(exit_status, error_lines)
16
+ @exit_status = exit_status
17
+ super error_lines
18
+ end
19
+ end
20
+
21
+ class Runner < ::Thor::Runner
22
+
23
+ def help(meth = nil)
24
+ if meth && !self.respond_to?(meth)
25
+ super
26
+ else
27
+ overview = %Q[Hodor is an object-oriented scripting toolkit and Ruby-based API that automates and simplifies the way you
28
+ specify, deploy, test, inspect and administer your hadoop cluster and Oozie workflows. Hodor commands follow
29
+ the convention of:
30
+
31
+ $ hodor [namespace]:[command] [arguments] [options]
32
+
33
+ To get more information about the namespaces and commands available in Hodor, run:
34
+
35
+ $ hodor -T
36
+
37
+ WARNING! Hodor must be run via 'bundle exec'. For example:
38
+
39
+ $ bundle exec hodor -T
40
+
41
+ Note: examples shown in help pages don't show the 'bundle exec' prefix because they assume you have the following alias in place:
42
+
43
+ $ alias hodor='bundle exec hodor'
44
+ ].unindent(10)
45
+ say overview
46
+ end
47
+ end
48
+
49
+ desc "list [SEARCH]", "List the available thor commands (--substring means .*SEARCH)"
50
+ method_options :substring => :boolean, :group => :string, :all => :boolean, :debug => :boolean
51
+ def list(search = "")
52
+ overview = %Q[
53
+ Hodor's Namespaces & Commands
54
+ ======================================================================================================
55
+ Hodor divides its command set into the namespaces shown below (e.g. 'oozie', 'hdfs', 'master' etc.) Each
56
+ namespace contains a set of commands that support the overall purpose of its parent namespace. For example, the
57
+ hdfs namespace includes commands to list, put and get files to/from a remote HDFS volume. The following table shows
58
+ all the namespaces Hodor supports, along with a short description of the commands that fall within each namespace.
59
+
60
+ ].unindent(8)
61
+
62
+ say overview
63
+ super
64
+
65
+ more_help = %Q[Getting More Help:
66
+ ------------------
67
+ Each Hodor namespace offers full help, including an overview of the namespace itself, references to "topic
68
+ pages" that explain core concepts implemented by the namespace and detailed help for each command that falls
69
+ within the namespace. To access help for a Hodor namespace, run hodor passing <namespace> as the sole
70
+ argument. For example, to see help for Hodor's Oozie namespace, run:
71
+
72
+ $ hodor oozie
73
+ $ hodor help oozie # alternate, works the same
74
+
75
+ Furthermore, to see detailed help for the oozie:display_job command, run:
76
+
77
+ $ hodor help oozie:display_job
78
+ $ hodor oozie:help display_job # alternate, works the same
79
+
80
+ Lastly, to see the topic page that explains the "corresponding paths" concept, that is central to the
81
+ Hdfs namespace, run:
82
+
83
+ $ hodor hdfs:topic corresponding_paths
84
+
85
+ And to obtain a list of all topics available within the oozie namespace, for example, run:
86
+
87
+ $ hodor oozie:topics
88
+ ].unindent(8)
89
+ say more_help
90
+ end
91
+
92
+
93
+ def method_missing(meth, *args)
94
+ if args[0].eql?('nocorrect')
95
+ fail %Q[You are using a shell alias with an improper trailing space. For example:
96
+ alias dj='bundle exec hodor oozie:display_job' (works)
97
+ alias dj='bundle exec hodor oozie:display_job ' (fails)]
98
+ end
99
+ super meth, *args
100
+ rescue
101
+ raise
102
+ end
103
+
104
+ def self.handle_no_command_error(command, bv)
105
+ raise CommandNotFound.new("No Such Command: #{command.inspect}")
106
+ end
107
+
108
+ no_tasks do
109
+ def thorfiles(*args)
110
+ Dir[File.join(File.dirname(__FILE__), '..', 'tasks/**/*.thor')]
111
+ end
112
+ end
113
+
114
+ end
115
+ end
116
+ end
117
+
118
+
119
+ class Thor
120
+ module Shell
121
+ class Basic # rubocop:disable ClassLength
122
+ def print_wrapped(message, options = {})
123
+ indent = options[:indent] || 0
124
+ width = terminal_width - indent - 5
125
+ paras = message.split("\n\n")
126
+
127
+ paras.map! do |unwrapped|
128
+ unwrapped.strip.gsub(/\n([^\s\-\005])/, ' \1').gsub(/.{1,#{width}}(?:\s|\Z)/) {
129
+ ($& + 5.chr).gsub(/\n\005/, "\n").gsub(/\005/, "\n")
130
+ }
131
+ end
132
+
133
+ paras.each do |para|
134
+ para.split("\n").each do |line|
135
+ stdout.puts line.insert(0, " " * indent)
136
+ end
137
+ stdout.puts unless para == paras.last
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ require_relative "command"
145
+ require_relative "ui/table"
146
+ require_relative "api/oozie"
@@ -0,0 +1,164 @@
1
+ require 'thor'
2
+
3
+ module Hodor
4
+ class Command < ::Thor
5
+
6
+ no_tasks do
7
+
8
+ def env
9
+ Environment.instance
10
+ end
11
+
12
+ def target
13
+ env.settings[:target]
14
+ end
15
+
16
+ def logger
17
+ env.logger
18
+ end
19
+
20
+ # Part of workaround to prevent parent command arguments from being appended
21
+ # to child commands
22
+ # NOTE: the args argument below should actually be *args.
23
+ def invoke(name=nil, *args)
24
+
25
+ name.sub!(/^Hodor:/, '') if name && $hodor_runner
26
+ super(name, args + ["-EOLSTOP"])
27
+ end
28
+
29
+ def invoke_command(command, trailing)
30
+ env.options = options
31
+ @invoking_command = command.name
32
+ workaround_thor_trailing_bug(trailing)
33
+ erb_expand_command_line(trailing)
34
+ @trailing = trailing
35
+
36
+ if self.respond_to?(:intercept_dispatch)
37
+ @was_intercepted = false
38
+ intercept_dispatch(command.name.to_sym, trailing)
39
+ super unless @was_intercepted
40
+ else
41
+ super
42
+ end
43
+ rescue Hodor::Cli::Usage => ex
44
+ logger.error "CLI Usage: #{ex.message}"
45
+ rescue SystemExit, Interrupt
46
+ rescue => ex
47
+ if env.prefs[:debug_mode]
48
+ logger.error "EXCEPTION! #{ex.class.name} :: #{ex.message}\nBACKTRACE:\n\t#{ex.backtrace.join("\n\t")}"
49
+ else
50
+ logger.error "#{ex.message}\nException Class: '#{ex.class.name}'"
51
+ end
52
+ end
53
+
54
+ # This function works around a bug in thor. Basically, when one thor command
55
+ # calls another (ie. via "invoke"), the parent command's last argument is
56
+ # appended to the arguments array of the invoked command. This function
57
+ # just chops off the extra arguments that shouldn't be in the trailing string.
58
+ def workaround_thor_trailing_bug(trailing)
59
+ sentinel = false
60
+ trailing.select! { |element|
61
+ sentinel = true if element.eql?("-EOLSTOP")
62
+ !sentinel
63
+ }
64
+ end
65
+
66
+ # Expand any ERB variables on the command line against the loaded environment. If
67
+ # the environment has no value for the specified key, leave the command line unchanged.
68
+ #
69
+ # Examples:
70
+ # $ bthor sandbox:oozie --oozie "<%= env[:oozie_url] %>"
71
+ # $ bthor sandbox:oozie --oozie :oozie_url
72
+ #
73
+ # Note: Either of above works, since :oozie_url is gsub'd to <%= env[:oozie_url] %>
74
+ #
75
+ def erb_expand_command_line(trailing)
76
+ trailing.map! { |subarg|
77
+ env.erb_sub(
78
+ subarg.gsub(/(?<!\[):[a-zA-Z][_0-9a-zA-Z~]+/) { |match|
79
+ if env.settings.has_key?(match[1..-1].to_sym)
80
+ "<%= env[#{match}] %>"
81
+ else
82
+ match
83
+ end
84
+ }
85
+ )
86
+ }
87
+ end
88
+
89
+ def hadoop_command(cmd, trailing)
90
+ @was_intercepted = true
91
+ cmdline = cmd ? "#{cmd} " : ""
92
+ cmdline << trailing.join(' ')
93
+ env.ssh cmdline, echo: true, echo_cmd: true
94
+ end
95
+
96
+ def dest_path
97
+ options[:to] || "."
98
+ end
99
+
100
+ def scp_file(file)
101
+ # If the file has .erb extension, perform ERB expansion of the file first
102
+ if file.end_with?('.erb')
103
+ dest_file = file.sub(/\.erb$/,'')
104
+ erb_expanded = env.erb_load(file)
105
+ src_file = "/tmp/#{File.basename(dest_file)}"
106
+ File.open(src_file, 'w') { |f| f.write(erb_expanded) }
107
+ else
108
+ dest_file = "#{options[:parent] || ''}#{file}"
109
+ src_file = file
110
+ end
111
+
112
+ file_path = "#{dest_path}/#{File.basename(src_file)}"
113
+ env.run_local %Q[scp #{src_file} #{env.ssh_user}@#{env[:ssh_host]}:#{file_path}],
114
+ echo: true, echo_cmd: true
115
+ return file_path
116
+ end
117
+
118
+ def self.load_topic(title)
119
+ topics = File.join(File.dirname(__FILE__), '..', '..', 'topics', name.split('::').last.downcase)
120
+ contents = File.open( File.join(topics, "#{title}.txt"), 'rt') { |f| f.read }
121
+ contents.gsub(/^\\x5/, "\x5")
122
+ end
123
+
124
+ def load_topics
125
+ topics = File.join(File.dirname(__FILE__), '..', '..', 'topics', self.class.name.split('::').last)
126
+ Dir.glob(File.join(topics, '*.txt'))
127
+ end
128
+ end
129
+
130
+ desc "topic [title]", "Display named help topic [title]"
131
+ def topic(title)
132
+ say self.class.load_topic(title)
133
+ end
134
+
135
+ desc "topics", "Display a list of topic discussions available for the namespace"
136
+ def topics
137
+ say "The following topics (in no particular order) are available within the namespace:"
138
+ load_topics.each_with_index { |topic, i|
139
+ say " Topic: #{File.basename(topic).sub(/.txt$/, '')}"
140
+ }
141
+ end
142
+
143
+ class << self
144
+ def inherited(base) #:nodoc:
145
+ base.send :extend, ClassMethods
146
+ end
147
+ end
148
+
149
+ module ClassMethods
150
+ def namespace(name=nil)
151
+ case name
152
+ when nil
153
+ constant = self.to_s.gsub(/^Thor::Sandbox::/, "")
154
+ strip = $hodor_runner ? /^Hodor::Cli::/ : /(?<=Hodor::)Cli::/
155
+ constant = constant.gsub(strip, "")
156
+ constant = ::Thor::Util.snake_case(constant).squeeze(":")
157
+ @namespace ||= constant
158
+ else
159
+ super
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end