hodor 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,87 @@
1
+
2
+ module Hodor
3
+ module Cli
4
+ class Sandbox < ::Hodor::Command
5
+ #
6
+ # Manual change required:
7
+ #
8
+ # edit /etc/hadoop/conf/core-site.xml
9
+ #
10
+ # Change the following sections to agree with:
11
+ #
12
+ # <property
13
+ # <name>hadoop.proxyuser.oozie.hosts</name>
14
+ # <value>*</value>
15
+ # </property>
16
+ #
17
+ # <property>
18
+ # <name>hadoop.proxyuser.oozie.groups</name>
19
+ # <value>*</value>
20
+ # </property>
21
+ #
22
+
23
+ no_tasks do
24
+ def ssh_user_addr(user_key)
25
+ va = "#{env[user_key]}@#{env[:ssh_host]}"
26
+ va << " -p #{env[:ssh_port] || 22}"
27
+ end
28
+
29
+ def deploy_ssh_key(user_key)
30
+ logger.info "Preventing future password prompts for '#{env[user_key]}' sandbox user."
31
+ logger.info "Note: this may require you to enter the password for '#{env[user_key]}'."
32
+ remote_cmd = %q['umask 077; mkdir -p ~/.ssh; touch ~/.ssh/authorized_keys; cat >> ~/.ssh/authorized_keys']
33
+ env.run_local %Q[cat ~/.ssh/id_rsa.pub | ssh #{ssh_user_addr(user_key)} #{remote_cmd}], echo: true, echo_cmd: true
34
+ end
35
+
36
+ def self.help(shell, subcommand = false)
37
+ overview = %Q[Hodor's Sandbox namespace functions as a local proxy for Hortonworks "HDP Sandbox" that you may have
38
+ running in a virtual machine. The commands in this namespace are responsible for performing one-time
39
+ initialization tasks on the sandbox virtual cluster, so that expected user accounts are created etc.
40
+ To be clear, Hodor generally does not require that you run Hortonwork's Sandbox. Only this particular
41
+ namespace expects that. So, if you aren't running one, just avoid use of this namespace.
42
+
43
+ Note: this namespace has not be used in well over a year and is probably broken right now. It needs
44
+ to be reviewed and updated or overhauled. Meanwhile, use at your own risk.
45
+
46
+ ].unindent(10)
47
+ shell.say overview
48
+ result = super
49
+
50
+ more_help = %Q[Getting More Help:
51
+ ------------------
52
+ To get detailed help on specific Sandbox commands (i.e. setup_ssh), run:
53
+
54
+ $ hodor help sandbox:setup_ssh
55
+ $ hodor sandbox:help setup_ssh # alternate, works the same
56
+
57
+ ].unindent(10)
58
+ shell.say more_help
59
+ result
60
+ end
61
+ end
62
+
63
+ # Set up a hortonworks sandbox. Currently, all this does is copy your SSH key
64
+ # to avoid password prompting. In the future, we may want to install components
65
+ # we expect to be available, etc.
66
+ desc "setup_ssh", "Set up a new sandbox to include required components and SSH keys"
67
+ def setup_ssh
68
+ deploy_ssh_key(:ssh_user)
69
+ end
70
+
71
+ desc "setup_users", "Set up a new sandbox to include required components and SSH keys"
72
+ def setup_users
73
+ deploy_ssh_key(:oozie_user)
74
+ end
75
+
76
+ desc "setup_hdfs", "Set up a new sandbox to include hdfs directories with required group settings"
77
+ def setup_hdfs
78
+ oozie_root = env[:oozie_root] || 'pipeline'
79
+ invoke "hdfs:fs", %w[-u hdfs -mkdir /shared]
80
+ invoke "hdfs:fs", %W[-u hdfs -mkdir /#{oozie_root}]
81
+
82
+ invoke "hdfs:fs", %w[-u hdfs -chgrp hadoop /shared]
83
+ invoke "hdfs:fs", %W[-u hdfs -chgrp hadoop /#{oozie_root}]
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,69 @@
1
+ module Hodor::Oozie
2
+ describe Action do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Action }
5
+
6
+ # Public fields
7
+ it { should respond_to? :id }
8
+ it { should respond_to? :json }
9
+ it { should respond_to? :status }
10
+ it { should respond_to? :parent_id }
11
+ it { should respond_to? :error_message }
12
+ it { should respond_to? :data }
13
+ it { should respond_to? :transition }
14
+ it { should respond_to? :external_status }
15
+ it { should respond_to? :cred }
16
+ it { should respond_to? :type }
17
+ it { should respond_to? :end_time }
18
+ it { should respond_to? :external_id }
19
+ it { should respond_to? :start_time }
20
+ it { should respond_to? :external_child_ids }
21
+ it { should respond_to? :name }
22
+ it { should respond_to? :error_code }
23
+ it { should respond_to? :tracker_url }
24
+ it { should respond_to? :retries }
25
+ it { should respond_to? :to_string }
26
+ it { should respond_to? :console_url }
27
+
28
+ # Public methods
29
+ it { should respond_to? :children }
30
+ end
31
+
32
+ context "Request action by job id" do
33
+ include_context "hodor api" do
34
+ let(:playback) { :sample_action }
35
+ end
36
+
37
+ let(:request_details) {
38
+ /v1\/job\/0025060-151002103648730-oozie-oozi-W@run_worker/
39
+ }
40
+
41
+ let(:request_children) {
42
+ /v1\/job\/0025062-151002103648730-oozie-oozi-W/
43
+ }
44
+
45
+ before(:each) do
46
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
47
+ expect(session).to receive(:rest_call).with(request_children).once.and_mimic_original(memo)
48
+ @action = oozie.job_by_id "0025060-151002103648730-oozie-oozi-W@run_worker"
49
+ @children = @action.children
50
+ end
51
+
52
+ it "should have the correct type" do
53
+ expect(@action.class).to eql(Hodor::Oozie::Action)
54
+ end
55
+
56
+ it "should have correct count" do
57
+ expect(@action.type).to match(/sub-workflow/)
58
+ end
59
+
60
+ it "should have 1 child" do
61
+ expect(@children.size).to eql(1)
62
+ end
63
+
64
+ it "should have example_business_W as only child" do
65
+ expect(@children[0].app_name).to match(/example_business_W/)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,33 @@
1
+ module Hodor::Oozie
2
+ describe Bundle do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Bundle.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :status }
8
+
9
+ # Public methods
10
+ it { should include :children }
11
+ end
12
+
13
+ context "Request bundle by job id that does not exist" do
14
+ include_context "hodor api" do
15
+ let(:playback) { :sample_bundle }
16
+ end
17
+
18
+ let(:request_details) {
19
+ /v1\/job\/0023753-151002103648730-oozie-oozi-B/
20
+ }
21
+
22
+ before(:each) do
23
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
24
+ end
25
+
26
+ it "should should throw exception when searching for a bundle that does not exist" do
27
+ expect {
28
+ oozie.job_by_id "0023753-151002103648730-oozie-oozi-B"
29
+ }.to raise_error JSON::ParserError
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,66 @@
1
+ module Hodor::Oozie
2
+ describe Coordinator do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Coordinator.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :status }
8
+ it { should include :id }
9
+ it { should include :json }
10
+ it { should include :name }
11
+ it { should include :path }
12
+ it { should include :timezone }
13
+ it { should include :frequency }
14
+ it { should include :conf }
15
+ it { should include :end_time }
16
+ it { should include :execution_policy }
17
+ it { should include :start_time }
18
+ it { should include :time_unit }
19
+ it { should include :concurrency }
20
+ it { should include :last_action }
21
+ it { should include :acl }
22
+ it { should include :mat_throttling }
23
+ it { should include :timeout }
24
+ it { should include :next_materialized_time }
25
+ it { should include :parent_id }
26
+ it { should include :external_id }
27
+ it { should include :group }
28
+ it { should include :user }
29
+ it { should include :console_url }
30
+ it { should include :actions }
31
+ it { should include :acl }
32
+ it { should include :materializations }
33
+
34
+ # Public methods
35
+ it { should include :children }
36
+ end
37
+
38
+ context "Request coordinator by job id" do
39
+ include_context "hodor api" do
40
+ let(:playback) { :sample_coordinator }
41
+ end
42
+
43
+ let(:request_details) {
44
+ /v1\/job\/0023753-151002103648730-oozie-oozi-C/
45
+ }
46
+
47
+ before(:each) do
48
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
49
+ @coord = oozie.job_by_id "0023753-151002103648730-oozie-oozi-C"
50
+ @children = @coord.children
51
+ end
52
+
53
+ it "should have the correct type" do
54
+ expect(@coord.class).to eql(Hodor::Oozie::Coordinator)
55
+ end
56
+
57
+ it "should have 6 children" do
58
+ expect(@children.size).to eql(6)
59
+ end
60
+
61
+ it "should show success status for child 2" do
62
+ expect(@children[2].status).to eql("SUCCEEDED")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,29 @@
1
+ module Hodor::Oozie
2
+ describe HadoopJob do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::HadoopJob.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :parent_id }
9
+
10
+ # Public methods
11
+ it { should include :children }
12
+ end
13
+
14
+ context "Request action by job id" do
15
+ include_context "hodor api" do
16
+ let(:playback) { :sample_hadoop_job }
17
+ end
18
+
19
+ before(:each) do
20
+ expect(session).not_to receive(:rest_call)
21
+ @job = oozie.job_by_id "job_1443733596356_96843"
22
+ end
23
+
24
+ it "should have the correct type" do
25
+ expect(@job.class).to eql(Hodor::Oozie::HadoopJob)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,15 @@
1
+ module Hodor::Oozie
2
+ describe Job do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Job.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+
9
+ # Public methods
10
+ it { should include :children }
11
+ it { should include :display_properties }
12
+ it { should include :display_children }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,66 @@
1
+ module Hodor::Oozie
2
+ describe Materialization do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Materialization.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :json }
9
+ it { should include :error_message }
10
+ it { should include :last_modified_time }
11
+ it { should include :created_at }
12
+ it { should include :status }
13
+ it { should include :push_missing_dependencies }
14
+ it { should include :external_status }
15
+ it { should include :type }
16
+ it { should include :nominal_time }
17
+ it { should include :external_id }
18
+ it { should include :created_conf }
19
+ it { should include :missing_dependencies }
20
+ it { should include :run_conf }
21
+ it { should include :action_number }
22
+ it { should include :error_code }
23
+ it { should include :tracker_uri }
24
+ it { should include :to_string }
25
+ it { should include :parent_id }
26
+ it { should include :coord_job_id }
27
+ it { should include :console_url }
28
+
29
+ # Public methods
30
+ it { should include :children }
31
+ end
32
+
33
+ context "Request materialization by job id" do
34
+ include_context "hodor api" do
35
+ let(:playback) { :sample_materialization }
36
+ end
37
+
38
+ let(:request_details) {
39
+ /v1\/job\/0023753-151002103648730-oozie-oozi-C/
40
+ }
41
+
42
+ let(:request_children) {
43
+ /v1\/job\/0025060-151002103648730-oozie-oozi-W/
44
+ }
45
+
46
+ before(:each) do
47
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
48
+ expect(session).to receive(:rest_call).with(request_children).once.and_mimic_original(memo)
49
+ @materialization = oozie.job_by_id "0023753-151002103648730-oozie-oozi-C@3"
50
+ @children = @materialization.children
51
+ end
52
+
53
+ it "should have the correct type" do
54
+ expect(@materialization.class).to eql(Hodor::Oozie::Materialization)
55
+ end
56
+
57
+ it "should have 1 child" do
58
+ expect(@children.size).to eql(1)
59
+ end
60
+
61
+ it "should show success status for child 0" do
62
+ expect(@children[0].status).to eql("SUCCEEDED")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,43 @@
1
+ module Hodor::Oozie
2
+ describe Query do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Query.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :request }
9
+ it { should include :json }
10
+
11
+ # Public methods
12
+ it { should include :children }
13
+ end
14
+
15
+ context "List all running coordinators" do
16
+ include_context "hodor api" do
17
+ let(:playback) { :running_coordinators }
18
+ end
19
+
20
+ let(:parent_request) {
21
+ /v2\/jobs\?jobtype=coord&filter=status%3DRUNNING/
22
+ }
23
+
24
+ before(:each) do
25
+ expect(session).to receive(:rest_call).with(parent_request).once.and_mimic_original(memo)
26
+ @query = Hodor::Oozie::Query.new status: [:running]
27
+ @matches = @query.children
28
+ end
29
+
30
+ it "should have correct count" do
31
+ expect(@matches.size).to eql(4)
32
+ end
33
+
34
+ it "should include worker_data_source\/business_C coordinator" do
35
+ expect(@matches[1].name).to match(/driver_example_workflows_master_workflow.xml_C/)
36
+ end
37
+
38
+ it "should include hourly_master incremental coordinator" do
39
+ expect(@matches[0].name).to match(/example_workflows\/hourly_master_hourly_incremental-C/)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,18 @@
1
+ module Hodor::Oozie
2
+ describe Session do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Session }
5
+
6
+ # Public methods
7
+ it { should respond_to? :pwj }
8
+ it { should respond_to? :make_current }
9
+ it { should respond_to? :current_id }
10
+ it { should respond_to? :current_id }
11
+ it { should respond_to? :get_job_state }
12
+ it { should respond_to? :search_jobs }
13
+ it { should respond_to? :len }
14
+ it { should respond_to? :offset }
15
+ end
16
+ end
17
+ end
18
+
@@ -0,0 +1,65 @@
1
+ module Hodor::Oozie
2
+ describe Workflow do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Workflow.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :json }
9
+ it { should include :app_path }
10
+ it { should include :acl }
11
+ it { should include :status }
12
+ it { should include :created_at }
13
+ it { should include :conf }
14
+ it { should include :last_mod_time }
15
+ it { should include :run }
16
+ it { should include :end_time }
17
+ it { should include :external_id }
18
+ it { should include :app_name }
19
+ it { should include :start_time }
20
+ it { should include :materialization_id }
21
+ it { should include :parent_id }
22
+ it { should include :materialization }
23
+ it { should include :to_string }
24
+ it { should include :group }
25
+ it { should include :console_url }
26
+ it { should include :user }
27
+
28
+ # Public methods
29
+ it { should include :children }
30
+ end
31
+
32
+ context "List all running coordinators" do
33
+ include_context "hodor api" do
34
+ let(:playback) { :sample_workflow }
35
+ end
36
+
37
+ let(:request) {
38
+ /v1\/job\/0025062-151002103648730-oozie-oozi-W/
39
+ }
40
+
41
+ before(:each) do
42
+ expect(session).to receive(:rest_call).with(request).once.and_mimic_original(memo)
43
+ @workflow = oozie.job_by_id "0025062-151002103648730-oozie-oozi-W"
44
+ @children = @workflow.children
45
+ end
46
+
47
+ it "should have the correct type" do
48
+ expect(@workflow.class).to eql(Hodor::Oozie::Workflow)
49
+ end
50
+
51
+
52
+ it "should have correct count" do
53
+ expect(@workflow.app_name).to match(/example_business_W/)
54
+ end
55
+
56
+ it "should have 3 children" do
57
+ expect(@children.size).to eql(3)
58
+ end
59
+
60
+ it "should have 3 children" do
61
+ expect(@children[1].name).to match(/data_workflow/)
62
+ end
63
+ end
64
+ end
65
+ end