hodor 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/Guardfile +11 -0
  10. data/README.md +105 -0
  11. data/Rakefile +105 -0
  12. data/bin/hodor +18 -0
  13. data/hodor.gemspec +47 -0
  14. data/lib/config/log4r_config.xml +35 -0
  15. data/lib/hodor.rb +83 -0
  16. data/lib/hodor/api/hdfs.rb +222 -0
  17. data/lib/hodor/api/oozie.rb +215 -0
  18. data/lib/hodor/api/oozie/action.rb +52 -0
  19. data/lib/hodor/api/oozie/bundle.rb +27 -0
  20. data/lib/hodor/api/oozie/coordinator.rb +53 -0
  21. data/lib/hodor/api/oozie/hadoop_job.rb +29 -0
  22. data/lib/hodor/api/oozie/job.rb +192 -0
  23. data/lib/hodor/api/oozie/materialization.rb +56 -0
  24. data/lib/hodor/api/oozie/query.rb +115 -0
  25. data/lib/hodor/api/oozie/session.rb +170 -0
  26. data/lib/hodor/api/oozie/workflow.rb +58 -0
  27. data/lib/hodor/cli.rb +146 -0
  28. data/lib/hodor/command.rb +164 -0
  29. data/lib/hodor/configuration.rb +80 -0
  30. data/lib/hodor/environment.rb +437 -0
  31. data/lib/hodor/ui/table.rb +130 -0
  32. data/lib/hodor/version.rb +3 -0
  33. data/lib/tasks/hdfs.thor +138 -0
  34. data/lib/tasks/master.thor +61 -0
  35. data/lib/tasks/oozie.thor +399 -0
  36. data/lib/tasks/sandbox.thor +87 -0
  37. data/spec/integration/api/oozie/action_spec.rb +69 -0
  38. data/spec/integration/api/oozie/bundle_spec.rb +33 -0
  39. data/spec/integration/api/oozie/coordinator_spec.rb +66 -0
  40. data/spec/integration/api/oozie/hadoop_job_spec.rb +29 -0
  41. data/spec/integration/api/oozie/job_spec.rb +15 -0
  42. data/spec/integration/api/oozie/materialization_spec.rb +66 -0
  43. data/spec/integration/api/oozie/query_spec.rb +43 -0
  44. data/spec/integration/api/oozie/session_spec.rb +18 -0
  45. data/spec/integration/api/oozie/workflow_spec.rb +65 -0
  46. data/spec/integration/api/oozie_spec.rb +198 -0
  47. data/spec/integration/fixtures/api/running_coordinators/req_resp_00.memo +6 -0
  48. data/spec/integration/fixtures/api/sample_action/req_resp_00.memo +5 -0
  49. data/spec/integration/fixtures/api/sample_action/req_resp_01.memo +7 -0
  50. data/spec/integration/fixtures/api/sample_bundle/req_resp_00.memo +6 -0
  51. data/spec/integration/fixtures/api/sample_coordinator/req_resp_00.memo +5 -0
  52. data/spec/integration/fixtures/api/sample_materialization/req_resp_00.memo +5 -0
  53. data/spec/integration/fixtures/api/sample_materialization/req_resp_01.memo +7 -0
  54. data/spec/integration/fixtures/api/sample_workflow/req_resp_00.memo +5 -0
  55. data/spec/spec_helper.rb +92 -0
  56. data/spec/support/d_v_r.rb +125 -0
  57. data/spec/support/hodor_api.rb +15 -0
  58. data/spec/unit/hodor/api/hdfs_spec.rb +63 -0
  59. data/spec/unit/hodor/api/oozie_spec.rb +32 -0
  60. data/spec/unit/hodor/environment_spec.rb +52 -0
  61. data/topics/hdfs/corresponding_paths.txt +31 -0
  62. data/topics/hdfs/overview.txt +10 -0
  63. data/topics/master/clusters.yml.txt +36 -0
  64. data/topics/master/overview.txt +17 -0
  65. data/topics/oozie/blocking_coordinators.txt +46 -0
  66. data/topics/oozie/composing_job_properties.txt +68 -0
  67. data/topics/oozie/display_job.txt +52 -0
  68. data/topics/oozie/driver_scenarios.txt +42 -0
  69. data/topics/oozie/inspecting_jobs.txt +59 -0
  70. data/topics/oozie/jobs.yml.txt +185 -0
  71. data/topics/oozie/overview.txt +43 -0
  72. data/topics/oozie/workers_and_drivers.txt +40 -0
  73. metadata +455 -0
@@ -0,0 +1,87 @@
1
+
2
+ module Hodor
3
+ module Cli
4
+ class Sandbox < ::Hodor::Command
5
+ #
6
+ # Manual change required:
7
+ #
8
+ # edit /etc/hadoop/conf/core-site.xml
9
+ #
10
+ # Change the following sections to agree with:
11
+ #
12
+ # <property
13
+ # <name>hadoop.proxyuser.oozie.hosts</name>
14
+ # <value>*</value>
15
+ # </property>
16
+ #
17
+ # <property>
18
+ # <name>hadoop.proxyuser.oozie.groups</name>
19
+ # <value>*</value>
20
+ # </property>
21
+ #
22
+
23
+ no_tasks do
24
+ def ssh_user_addr(user_key)
25
+ va = "#{env[user_key]}@#{env[:ssh_host]}"
26
+ va << " -p #{env[:ssh_port] || 22}"
27
+ end
28
+
29
+ def deploy_ssh_key(user_key)
30
+ logger.info "Preventing future password prompts for '#{env[user_key]}' sandbox user."
31
+ logger.info "Note: this may require you to enter the password for '#{env[user_key]}'."
32
+ remote_cmd = %q['umask 077; mkdir -p ~/.ssh; touch ~/.ssh/authorized_keys; cat >> ~/.ssh/authorized_keys']
33
+ env.run_local %Q[cat ~/.ssh/id_rsa.pub | ssh #{ssh_user_addr(user_key)} #{remote_cmd}], echo: true, echo_cmd: true
34
+ end
35
+
36
+ def self.help(shell, subcommand = false)
37
+ overview = %Q[Hodor's Sandbox namespace functions as a local proxy for Hortonworks "HDP Sandbox" that you may have
38
+ running in a virtual machine. The commands in this namespace are responsible for performing one-time
39
+ initialization tasks on the sandbox virtual cluster, so that expected user accounts are created etc.
40
+ To be clear, Hodor generally does not require that you run Hortonwork's Sandbox. Only this particular
41
+ namespace expects that. So, if you aren't running one, just avoid use of this namespace.
42
+
43
+ Note: this namespace has not be used in well over a year and is probably broken right now. It needs
44
+ to be reviewed and updated or overhauled. Meanwhile, use at your own risk.
45
+
46
+ ].unindent(10)
47
+ shell.say overview
48
+ result = super
49
+
50
+ more_help = %Q[Getting More Help:
51
+ ------------------
52
+ To get detailed help on specific Sandbox commands (i.e. setup_ssh), run:
53
+
54
+ $ hodor help sandbox:setup_ssh
55
+ $ hodor sandbox:help setup_ssh # alternate, works the same
56
+
57
+ ].unindent(10)
58
+ shell.say more_help
59
+ result
60
+ end
61
+ end
62
+
63
+ # Set up a hortonworks sandbox. Currently, all this does is copy your SSH key
64
+ # to avoid password prompting. In the future, we may want to install components
65
+ # we expect to be available, etc.
66
+ desc "setup_ssh", "Set up a new sandbox to include required components and SSH keys"
67
+ def setup_ssh
68
+ deploy_ssh_key(:ssh_user)
69
+ end
70
+
71
+ desc "setup_users", "Set up a new sandbox to include required components and SSH keys"
72
+ def setup_users
73
+ deploy_ssh_key(:oozie_user)
74
+ end
75
+
76
+ desc "setup_hdfs", "Set up a new sandbox to include hdfs directories with required group settings"
77
+ def setup_hdfs
78
+ oozie_root = env[:oozie_root] || 'pipeline'
79
+ invoke "hdfs:fs", %w[-u hdfs -mkdir /shared]
80
+ invoke "hdfs:fs", %W[-u hdfs -mkdir /#{oozie_root}]
81
+
82
+ invoke "hdfs:fs", %w[-u hdfs -chgrp hadoop /shared]
83
+ invoke "hdfs:fs", %W[-u hdfs -chgrp hadoop /#{oozie_root}]
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,69 @@
1
+ module Hodor::Oozie
2
+ describe Action do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Action }
5
+
6
+ # Public fields
7
+ it { should respond_to? :id }
8
+ it { should respond_to? :json }
9
+ it { should respond_to? :status }
10
+ it { should respond_to? :parent_id }
11
+ it { should respond_to? :error_message }
12
+ it { should respond_to? :data }
13
+ it { should respond_to? :transition }
14
+ it { should respond_to? :external_status }
15
+ it { should respond_to? :cred }
16
+ it { should respond_to? :type }
17
+ it { should respond_to? :end_time }
18
+ it { should respond_to? :external_id }
19
+ it { should respond_to? :start_time }
20
+ it { should respond_to? :external_child_ids }
21
+ it { should respond_to? :name }
22
+ it { should respond_to? :error_code }
23
+ it { should respond_to? :tracker_url }
24
+ it { should respond_to? :retries }
25
+ it { should respond_to? :to_string }
26
+ it { should respond_to? :console_url }
27
+
28
+ # Public methods
29
+ it { should respond_to? :children }
30
+ end
31
+
32
+ context "Request action by job id" do
33
+ include_context "hodor api" do
34
+ let(:playback) { :sample_action }
35
+ end
36
+
37
+ let(:request_details) {
38
+ /v1\/job\/0025060-151002103648730-oozie-oozi-W@run_worker/
39
+ }
40
+
41
+ let(:request_children) {
42
+ /v1\/job\/0025062-151002103648730-oozie-oozi-W/
43
+ }
44
+
45
+ before(:each) do
46
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
47
+ expect(session).to receive(:rest_call).with(request_children).once.and_mimic_original(memo)
48
+ @action = oozie.job_by_id "0025060-151002103648730-oozie-oozi-W@run_worker"
49
+ @children = @action.children
50
+ end
51
+
52
+ it "should have the correct type" do
53
+ expect(@action.class).to eql(Hodor::Oozie::Action)
54
+ end
55
+
56
+ it "should have correct count" do
57
+ expect(@action.type).to match(/sub-workflow/)
58
+ end
59
+
60
+ it "should have 1 child" do
61
+ expect(@children.size).to eql(1)
62
+ end
63
+
64
+ it "should have example_business_W as only child" do
65
+ expect(@children[0].app_name).to match(/example_business_W/)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,33 @@
1
+ module Hodor::Oozie
2
+ describe Bundle do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Bundle.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :status }
8
+
9
+ # Public methods
10
+ it { should include :children }
11
+ end
12
+
13
+ context "Request bundle by job id that does not exist" do
14
+ include_context "hodor api" do
15
+ let(:playback) { :sample_bundle }
16
+ end
17
+
18
+ let(:request_details) {
19
+ /v1\/job\/0023753-151002103648730-oozie-oozi-B/
20
+ }
21
+
22
+ before(:each) do
23
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
24
+ end
25
+
26
+ it "should should throw exception when searching for a bundle that does not exist" do
27
+ expect {
28
+ oozie.job_by_id "0023753-151002103648730-oozie-oozi-B"
29
+ }.to raise_error JSON::ParserError
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,66 @@
1
+ module Hodor::Oozie
2
+ describe Coordinator do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Coordinator.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :status }
8
+ it { should include :id }
9
+ it { should include :json }
10
+ it { should include :name }
11
+ it { should include :path }
12
+ it { should include :timezone }
13
+ it { should include :frequency }
14
+ it { should include :conf }
15
+ it { should include :end_time }
16
+ it { should include :execution_policy }
17
+ it { should include :start_time }
18
+ it { should include :time_unit }
19
+ it { should include :concurrency }
20
+ it { should include :last_action }
21
+ it { should include :acl }
22
+ it { should include :mat_throttling }
23
+ it { should include :timeout }
24
+ it { should include :next_materialized_time }
25
+ it { should include :parent_id }
26
+ it { should include :external_id }
27
+ it { should include :group }
28
+ it { should include :user }
29
+ it { should include :console_url }
30
+ it { should include :actions }
31
+ it { should include :acl }
32
+ it { should include :materializations }
33
+
34
+ # Public methods
35
+ it { should include :children }
36
+ end
37
+
38
+ context "Request coordinator by job id" do
39
+ include_context "hodor api" do
40
+ let(:playback) { :sample_coordinator }
41
+ end
42
+
43
+ let(:request_details) {
44
+ /v1\/job\/0023753-151002103648730-oozie-oozi-C/
45
+ }
46
+
47
+ before(:each) do
48
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
49
+ @coord = oozie.job_by_id "0023753-151002103648730-oozie-oozi-C"
50
+ @children = @coord.children
51
+ end
52
+
53
+ it "should have the correct type" do
54
+ expect(@coord.class).to eql(Hodor::Oozie::Coordinator)
55
+ end
56
+
57
+ it "should have 6 children" do
58
+ expect(@children.size).to eql(6)
59
+ end
60
+
61
+ it "should show success status for child 2" do
62
+ expect(@children[2].status).to eql("SUCCEEDED")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,29 @@
1
+ module Hodor::Oozie
2
+ describe HadoopJob do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::HadoopJob.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :parent_id }
9
+
10
+ # Public methods
11
+ it { should include :children }
12
+ end
13
+
14
+ context "Request action by job id" do
15
+ include_context "hodor api" do
16
+ let(:playback) { :sample_hadoop_job }
17
+ end
18
+
19
+ before(:each) do
20
+ expect(session).not_to receive(:rest_call)
21
+ @job = oozie.job_by_id "job_1443733596356_96843"
22
+ end
23
+
24
+ it "should have the correct type" do
25
+ expect(@job.class).to eql(Hodor::Oozie::HadoopJob)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,15 @@
1
+ module Hodor::Oozie
2
+ describe Job do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Job.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+
9
+ # Public methods
10
+ it { should include :children }
11
+ it { should include :display_properties }
12
+ it { should include :display_children }
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,66 @@
1
+ module Hodor::Oozie
2
+ describe Materialization do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Materialization.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :json }
9
+ it { should include :error_message }
10
+ it { should include :last_modified_time }
11
+ it { should include :created_at }
12
+ it { should include :status }
13
+ it { should include :push_missing_dependencies }
14
+ it { should include :external_status }
15
+ it { should include :type }
16
+ it { should include :nominal_time }
17
+ it { should include :external_id }
18
+ it { should include :created_conf }
19
+ it { should include :missing_dependencies }
20
+ it { should include :run_conf }
21
+ it { should include :action_number }
22
+ it { should include :error_code }
23
+ it { should include :tracker_uri }
24
+ it { should include :to_string }
25
+ it { should include :parent_id }
26
+ it { should include :coord_job_id }
27
+ it { should include :console_url }
28
+
29
+ # Public methods
30
+ it { should include :children }
31
+ end
32
+
33
+ context "Request materialization by job id" do
34
+ include_context "hodor api" do
35
+ let(:playback) { :sample_materialization }
36
+ end
37
+
38
+ let(:request_details) {
39
+ /v1\/job\/0023753-151002103648730-oozie-oozi-C/
40
+ }
41
+
42
+ let(:request_children) {
43
+ /v1\/job\/0025060-151002103648730-oozie-oozi-W/
44
+ }
45
+
46
+ before(:each) do
47
+ expect(session).to receive(:rest_call).with(request_details).once.and_mimic_original(memo)
48
+ expect(session).to receive(:rest_call).with(request_children).once.and_mimic_original(memo)
49
+ @materialization = oozie.job_by_id "0023753-151002103648730-oozie-oozi-C@3"
50
+ @children = @materialization.children
51
+ end
52
+
53
+ it "should have the correct type" do
54
+ expect(@materialization.class).to eql(Hodor::Oozie::Materialization)
55
+ end
56
+
57
+ it "should have 1 child" do
58
+ expect(@children.size).to eql(1)
59
+ end
60
+
61
+ it "should show success status for child 0" do
62
+ expect(@children[0].status).to eql("SUCCEEDED")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,43 @@
1
+ module Hodor::Oozie
2
+ describe Query do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Query.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :request }
9
+ it { should include :json }
10
+
11
+ # Public methods
12
+ it { should include :children }
13
+ end
14
+
15
+ context "List all running coordinators" do
16
+ include_context "hodor api" do
17
+ let(:playback) { :running_coordinators }
18
+ end
19
+
20
+ let(:parent_request) {
21
+ /v2\/jobs\?jobtype=coord&filter=status%3DRUNNING/
22
+ }
23
+
24
+ before(:each) do
25
+ expect(session).to receive(:rest_call).with(parent_request).once.and_mimic_original(memo)
26
+ @query = Hodor::Oozie::Query.new status: [:running]
27
+ @matches = @query.children
28
+ end
29
+
30
+ it "should have correct count" do
31
+ expect(@matches.size).to eql(4)
32
+ end
33
+
34
+ it "should include worker_data_source\/business_C coordinator" do
35
+ expect(@matches[1].name).to match(/driver_example_workflows_master_workflow.xml_C/)
36
+ end
37
+
38
+ it "should include hourly_master incremental coordinator" do
39
+ expect(@matches[0].name).to match(/example_workflows\/hourly_master_hourly_incremental-C/)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,18 @@
1
+ module Hodor::Oozie
2
+ describe Session do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Session }
5
+
6
+ # Public methods
7
+ it { should respond_to? :pwj }
8
+ it { should respond_to? :make_current }
9
+ it { should respond_to? :current_id }
10
+ it { should respond_to? :current_id }
11
+ it { should respond_to? :get_job_state }
12
+ it { should respond_to? :search_jobs }
13
+ it { should respond_to? :len }
14
+ it { should respond_to? :offset }
15
+ end
16
+ end
17
+ end
18
+
@@ -0,0 +1,65 @@
1
+ module Hodor::Oozie
2
+ describe Workflow do
3
+ describe "Required Public Interface" do
4
+ subject { Hodor::Oozie::Workflow.instance_methods }
5
+
6
+ # Public fields
7
+ it { should include :id }
8
+ it { should include :json }
9
+ it { should include :app_path }
10
+ it { should include :acl }
11
+ it { should include :status }
12
+ it { should include :created_at }
13
+ it { should include :conf }
14
+ it { should include :last_mod_time }
15
+ it { should include :run }
16
+ it { should include :end_time }
17
+ it { should include :external_id }
18
+ it { should include :app_name }
19
+ it { should include :start_time }
20
+ it { should include :materialization_id }
21
+ it { should include :parent_id }
22
+ it { should include :materialization }
23
+ it { should include :to_string }
24
+ it { should include :group }
25
+ it { should include :console_url }
26
+ it { should include :user }
27
+
28
+ # Public methods
29
+ it { should include :children }
30
+ end
31
+
32
+ context "List all running coordinators" do
33
+ include_context "hodor api" do
34
+ let(:playback) { :sample_workflow }
35
+ end
36
+
37
+ let(:request) {
38
+ /v1\/job\/0025062-151002103648730-oozie-oozi-W/
39
+ }
40
+
41
+ before(:each) do
42
+ expect(session).to receive(:rest_call).with(request).once.and_mimic_original(memo)
43
+ @workflow = oozie.job_by_id "0025062-151002103648730-oozie-oozi-W"
44
+ @children = @workflow.children
45
+ end
46
+
47
+ it "should have the correct type" do
48
+ expect(@workflow.class).to eql(Hodor::Oozie::Workflow)
49
+ end
50
+
51
+
52
+ it "should have correct count" do
53
+ expect(@workflow.app_name).to match(/example_business_W/)
54
+ end
55
+
56
+ it "should have 3 children" do
57
+ expect(@children.size).to eql(3)
58
+ end
59
+
60
+ it "should have 3 children" do
61
+ expect(@children[1].name).to match(/data_workflow/)
62
+ end
63
+ end
64
+ end
65
+ end