hodor 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 084614a22bcc209a5af059347fd77a82c16a9c09
4
+ data.tar.gz: ff9dc14dfe18285bb32c3c58bf72dfe4537117f8
5
+ SHA512:
6
+ metadata.gz: 3402deea656434bf1099015af5e0c8e3d5a0d0a94b2c164c0cd18e7bbc077db86c8527f93d9af5a47e3be21379779e812f9d4ad5bea9a0fb8e809dec37ed795a
7
+ data.tar.gz: a6dc9c33adaf9079a72b5fd30c9419aed906194304935580da0f70b7ba6a7b700b0c8ddd206539ab4d899f5e551b25e60db3ae4c553a72b12057245c293e6ff2
@@ -0,0 +1,16 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /.idea/
11
+ **/.tmp
12
+ *.bundle
13
+ *.so
14
+ *.o
15
+ *.a
16
+ mkmf.log
@@ -0,0 +1,3 @@
1
+ [submodule "spec/test_repo"]
2
+ path = spec/test_repo
3
+ url = https://github.com/data-wranglers/hadoop-refrepo.git
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -0,0 +1 @@
1
+ hodor
@@ -0,0 +1 @@
1
+ ruby-2.2-head
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.2.1"
4
+ script: bundle exec rspec spec
5
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in hodor.gemspec
4
+ gemspec
@@ -0,0 +1,11 @@
1
+ guard 'rspec', cmd: "bundle exec rspec" do
2
+ # watch /lib/ files
3
+ watch(%r{^lib/(.+).rb$}) do |m|
4
+ "spec/#{m[1]}_spec.rb"
5
+ end
6
+
7
+ # watch /spec/ files
8
+ watch(%r{^spec/(.+).rb$}) do |m|
9
+ "spec/#{m[1]}.rb"
10
+ end
11
+ end
@@ -0,0 +1,105 @@
1
+
2
+ Introduction
3
+ ===============
4
+ Hodor is a ruby-based framework, API and Command Line Interface that automates and simplifies the way you specify, deploy, debug and administer Hadoop and Oozie solutions. Why did we write Hodor? Because we recognized that the Hadoop ecosystem was missing something essential. While the creators of the Hadoop ecosystem have provided a cutting-edge data ingestion, storage, and transformation engine that performs well in production when provided a specific, fully debugged workflow, getting there is hard. Hadoop lacks a mature toolchain to manage a codebase with modern software development discipline. Enter Hodor: a combination of tools and conventions to address these shortcomings, by enabling in the Hadoop environment many of the software development practices, and deployment facilities we take for granted in normal software development.
5
+
6
+ Background
7
+ ===============
8
+ The Hodor framework and CLI grew out of developer necessity: we needed a developer-oriented commandline alternative to the analyst-oriented Hue web interface tool. After working with Apache's Hue web interface, we quickly realized this tool was not designed to facilitate construction of the kind of production data systems we are building. We found no good alternative to Hue compatible with modern software development practice. Dumping Hue forced us to answer several important process questions. Without Hue, how would we manage production deployments? How should we build solutions that could be commited and reviewed in a version control system? How should we specify and structure Oozie workflows conforming to Oozie's data dependency architecture? How should we submit and monitor job status? As we began to answer these questions for ourselves, Hodor began to take shape.
9
+
10
+ Getting Help
11
+ ============
12
+ Hodor includes a rich help system that can be separated into overview, detail and topic help pages. For example, here are a few help and topic commands that provide a sprectrum of depth, from overview to conceptual tutorials.
13
+
14
+ ```bash
15
+ $ hodor # Prints generation information about Hodor
16
+ $ hodor -T # Prints a more detail overview of Hodor's namespaces and commands
17
+ $ hodor help oozie # Prints an overview of the Oozie namespace, its purpose and commands
18
+ $ hodor help oozie:display_job # Prints detailed information about the display_job command, arguments and options
19
+ $ hodor oozie:topic drivers.yml # Provides an overview of a key concept (drivers.yml) utilized by the namespace
20
+ $ hodor master:topic clusters.yml # Provides an overview of the clusters.yml file and its central role in Hodor
21
+ ```
22
+
23
+ Hodor Setup and Aliasing
24
+ ===========================
25
+
26
+ This section demonstrates how one might use the hodor commands to during your Hadoop development effort. Before using `hodor`, you'll need to build and install the `hodor` gem locally:
27
+
28
+ ```bash
29
+ gem build hodor.gemspec
30
+ gem install hodor-0.0.8.gem
31
+ ```
32
+
33
+ To specify the Hadoop cluster a Hodor command should target, you have two options:
34
+
35
+ ```bash
36
+ $ HADOOP_ENV=target_env hodor oozie:display_job /
37
+ ```
38
+
39
+ Note, the name 'target_env' indicates which section in your config/clusters.yml file to use when defining your Hadoop cluster. Alternatively, you can avoid having to type the HADOOP_ENV prefix for every command, by adding the following to your `~/.bashrc':
40
+
41
+ ```bash
42
+ export HADOOP_ENV=target_env # name of target hadoop cluster defined in config/clusters.yml
43
+ ```
44
+
45
+ You may also wish to consider adding a few aliases for the more frequently used Hodor commands. For example, the following
46
+ aliases can (optionally) be defined in your '~/.bashrc':
47
+
48
+ ```bash
49
+ alias cj='hodor oozie:change_job' # avoid trailing space with aliases. Can cause parse problems with zsh
50
+ alias dj='hodor oozie:display_job' # avoid trailing space with aliases. Can cause parse problems with zsh
51
+ alias rj='hodor oozie:run_job'
52
+ ```
53
+
54
+ Hodor Core Concepts & Assumptions:
55
+ ==================================
56
+ Hodor's commands can be studied individually, but there are some larger principles that will shed light on
57
+ the assumptions those commands are making, and the concepts they are implementing. These topics are broken
58
+ down by Hodor Namespace as follows:
59
+
60
+ Master Topics
61
+ -------------
62
+ * [The "clusters.yml" file](topics/master/clusters.yml.txt)
63
+
64
+ Oozie Topics
65
+ ------------
66
+ * [Inspecting Oozie Jobs](topics/oozie/inspecting_jobs.txt)
67
+ * [Composing Job Properties](topics/oozie/composing_job_properties.txt)
68
+ * [Workers And Drivers](topics/oozie/workers_and_drivers.txt)
69
+ * [Driver Run Scenarios](topics/oozie/driver_scenarios.txt)
70
+ * [Blocking Coordinators](topics/oozie/blocking_coordinators.txt)
71
+ * [Deploying & Running Jobs](topics/oozie/jobs.yml.txt)
72
+
73
+ Hdfs Topics
74
+ -----------
75
+ * [Corresponding Hdfs Paths](topics/hdfs/corresponding_paths.txt)
76
+
77
+ ## Future / Enhancements
78
+
79
+ Pull requests will be very happily considered.
80
+
81
+ __Maintained by Dean Hallman__
82
+
83
+ ## License
84
+
85
+ The MIT License (MIT)
86
+
87
+ Copyright (c) 2015 Dean Hallman
88
+
89
+ Permission is hereby granted, free of charge, to any person obtaining a copy
90
+ of this software and associated documentation files (the "Software"), to deal
91
+ in the Software without restriction, including without limitation the rights
92
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
93
+ copies of the Software, and to permit persons to whom the Software is
94
+ furnished to do so, subject to the following conditions:
95
+
96
+ The above copyright notice and this permission notice shall be included in
97
+ all copies or substantial portions of the Software.
98
+
99
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
100
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
101
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
102
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
103
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
104
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
105
+ THE SOFTWARE.
@@ -0,0 +1,105 @@
1
+
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "hodor/version"
4
+
5
+ task :default => :run_specs
6
+
7
+ ##################
8
+ # Build & Release Tasks
9
+ #
10
+
11
+ require 'bundler/gem_tasks'
12
+
13
+ ##################
14
+ # RuboCop Tasks
15
+ #
16
+ namespace :cop do
17
+
18
+ task :depends do
19
+ require 'rubocop/rake_task'
20
+ end
21
+
22
+ desc "Run rubocop on Hadoop checks"
23
+ task :cli => :depends do
24
+ cli = RuboCop::RakeTask.new do |task|
25
+ task.formatters = ["s"]
26
+ task.patterns = ["lib/", "spec/"]
27
+ end
28
+ Rake::Task["rubocop"].invoke
29
+ end
30
+
31
+ task :all => [:cli]
32
+ end
33
+
34
+ desc "Run lint on all sections"
35
+ task :cop => 'cop:all'
36
+
37
+ ##################
38
+ # Ruby-Lint Tasks
39
+ #
40
+ namespace :lint do
41
+
42
+ task :depends do
43
+ require 'ruby-lint/rake_task'
44
+ end
45
+
46
+ desc "Run lint on cli checks"
47
+ task :cli => :depends do
48
+ RubyLint::RakeTask.new do |task|
49
+ task.name = 'lint'
50
+ task.files = ['lib/']
51
+ end
52
+ Rake::Task["lint"].invoke
53
+ end
54
+
55
+ task :all => [:cli]
56
+ end
57
+
58
+ desc "Run lint on all sections"
59
+ task :lint => 'lint:all'
60
+
61
+ ##################
62
+ # Rspec Tasks
63
+ #
64
+ namespace :spec do
65
+
66
+ desc "Ensure dependencies load once"
67
+ task :depends do
68
+ require 'rspec/core/rake_task'
69
+ end
70
+
71
+ desc "Run unit tests"
72
+ task :unit => :depends do
73
+ RSpec::Core::RakeTask.new("spec:unit") do |t|
74
+ t.pattern = 'spec/unit/**/*_spec.rb'
75
+ t.fail_on_error = true
76
+ t.rspec_opts = %w(--color --require spec_helper --format progress)
77
+ end
78
+ end
79
+
80
+ desc "Run unit tests"
81
+ task :integration => :depends do
82
+ RSpec::Core::RakeTask.new("spec:integration") do |t|
83
+ t.pattern = 'spec/integration/**/*_spec.rb'
84
+ t.fail_on_error = true
85
+ t.rspec_opts = %w(--color --require spec_helper --format progress)
86
+ end
87
+ end
88
+
89
+ desc "Run unit tests"
90
+ task :target => :depends do
91
+ RSpec::Core::RakeTask.new("spec:target") do |t|
92
+ t.pattern = 'spec/integration/api/oozie/action_spec.rb'
93
+ t.fail_on_error = true
94
+ t.rspec_opts = %w(--color --require spec_helper --format progress)
95
+ end
96
+ end
97
+
98
+ desc "Run all specs"
99
+ task :all => [:integration, :unit]
100
+
101
+ task :default => :all
102
+ end
103
+
104
+ desc "Run rspec on all sections"
105
+ task :run_specs => ['spec:all']
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require_relative '../lib/hodor/cli'
5
+
6
+ $thor_runner = true
7
+ $hodor_runner = true
8
+
9
+ begin
10
+ Hodor::Cli::Runner.start(ARGV)
11
+ rescue Hodor::Cli::CommandNotFound => ex
12
+ namespace = ARGV.shift
13
+ begin
14
+ Kernel.exec("hodor-#{namespace} #{ARGV.join(' ')}")
15
+ rescue => ex
16
+ puts "HODOR! '#{namespace}' is unknown.\nNOTE: If this is a Hodor plugin, then perhaps you need to install the gem 'hodor-#{namespace}'..."
17
+ end
18
+ end
@@ -0,0 +1,47 @@
1
+ # coding: utf-8
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require 'hodor/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "hodor"
7
+ spec.version = Hodor::VERSION
8
+ spec.authors = ["Dean Hallman"]
9
+ spec.email = ["rdhallman@gmail.com"]
10
+ spec.summary = %q{Manages Hadoop and Oozie data pipelines, through development, testing, deployment and monitoring}
11
+ spec.description = %q{Hodor is a ruby-based framework, API and Command Line Interface that automates and simplifies the way you specify, deploy, debug and administer Hadoop and Oozie solutions. Hadoop lacks a mature toolchain to manage a codebase with modern software development discipline. To address this need, Hodor comprises a combination of tools and conventions that enable in the Hadoop environment many of the modern software development practices, and deployment facilities we take for granted in normal software development.}
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_runtime_dependency "thor", ">= 0.19.1"
21
+ spec.add_runtime_dependency "log4r", "~> 1.1"
22
+ spec.add_runtime_dependency "open4"
23
+ spec.add_runtime_dependency "terminal-table"
24
+ spec.add_runtime_dependency "awesome_print"
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.7"
27
+ spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency "byebug"
29
+
30
+ # Gems needed by Hodor's Hadoop/Oozie API
31
+ spec.add_runtime_dependency "rest-client"
32
+ spec.add_runtime_dependency "chronic"
33
+ spec.add_runtime_dependency "ox"
34
+
35
+ spec.add_development_dependency "rspec"
36
+ spec.add_development_dependency "wrong"
37
+ spec.add_development_dependency "simplecov"
38
+ spec.add_development_dependency "ruby-lint"
39
+ spec.add_development_dependency "rubocop"
40
+ spec.add_development_dependency "rspec-nc"
41
+ spec.add_development_dependency "guard"
42
+ spec.add_development_dependency "guard-rspec"
43
+ spec.add_development_dependency "pry"
44
+ spec.add_development_dependency "pry-remote"
45
+ spec.add_development_dependency "pry-nav"
46
+ end
47
+
@@ -0,0 +1,35 @@
1
+ <!--
2
+ Log Configuration
3
+ -->
4
+
5
+ <log4r_config>
6
+
7
+ <!-- Logging Levels -->
8
+ <pre_config>
9
+ <custom_levels>DEBUG, INFO, SSHCMD, STDOUT, STDERR, WARN, ERROR, FATAL</custom_levels>
10
+ <global level="DEBUG"/>
11
+ </pre_config>
12
+
13
+ <!-- Outputters -->
14
+ <outputter name="logconsole" type="StdoutOutputter" level="DEBUG" >
15
+ </outputter>
16
+
17
+ <outputter name="console" type="StdoutOutputter" level="DEBUG" >
18
+ <formatter type="Log4r::PatternFormatter">
19
+ <pattern>%5l|%M</pattern>
20
+ </formatter>
21
+ </outputter>
22
+
23
+ <!-- Loggers -->
24
+ <logger name="MainLogger"
25
+ level="INFO" additive="false" trace="true">
26
+ <outputter>console</outputter>
27
+ </logger>
28
+
29
+ <!-- Rspec Loggers -->
30
+ <logger name="RspecLogger"
31
+ level="WARN" additive="false" trace="true">
32
+ <outputter>console</outputter>
33
+ </logger>
34
+
35
+ </log4r_config>
@@ -0,0 +1,83 @@
1
+ require "hodor/version"
2
+
3
+ module Hodor
4
+
5
+ # Hodor Exception Classes
6
+ class NestedError < StandardError
7
+ attr_reader :cause
8
+
9
+ def initialize(cause, kvp = {})
10
+ @cause = cause
11
+ @kvp = kvp
12
+ end
13
+
14
+ alias :orig_to_s :to_s
15
+ def to_s
16
+ msg = @kvp[:msg] || orig_to_s
17
+ if @kvp.size > 1 || (@kvp.size == 1 && !@kvp.has_key?(:msg))
18
+ msg << " Exception Context:\n"
19
+ @kvp.each_pair { |k,v|
20
+ next if k == :msg
21
+ if k.nil?
22
+ msg << " nil => "
23
+ elsif k.is_a?(Symbol)
24
+ msg << " :#{k.to_s} => "
25
+ else
26
+ msg << " #{k} => "
27
+ end
28
+ if v.nil?
29
+ msg << "nil"
30
+ elsif v.is_a?(Symbol)
31
+ msg << ":#{v.to_s}"
32
+ else
33
+ msg << v
34
+ end
35
+ msg << "\n"
36
+ }
37
+ end
38
+ msg << "Root cause: #{@cause}"
39
+ msg << "\nBacktrace:\n "
40
+ msg << "#{@cause.backtrace[0..5].join("\n ")}"
41
+ end
42
+ end
43
+
44
+ class AbnormalExitStatus < StandardError
45
+ attr_reader :exit_status
46
+ def initialize(exit_status, error_lines)
47
+ @exit_status = exit_status
48
+ super error_lines
49
+ end
50
+ end
51
+
52
+ end
53
+
54
+ class Hash
55
+ def symbolize_keys
56
+ inject({}) { |memo,(k,v)|
57
+ memo[k] = v.is_a?(Hash) ? v.symbolize_keys : v;
58
+ memo[k.to_sym] = v.is_a?(Hash) ? v.symbolize_keys : v;
59
+ memo
60
+ }
61
+ end
62
+
63
+ def match strings
64
+ select { |key,val|
65
+ is_match = false
66
+ strings.each { |findstr|
67
+ is_match ||= key.downcase.include?(findstr) || val.downcase.include?(findstr)
68
+ }
69
+ is_match
70
+ }
71
+ end
72
+ end
73
+
74
+ class String
75
+ def unindent(count)
76
+ #gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
77
+ gsub(/^[ \t]{1,#{count}}/, "")
78
+ end
79
+ end
80
+
81
+ require "hodor/environment"
82
+
83
+