tengine_job 0.6.13 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. data/Gemfile +1 -20
  2. data/Gemfile.lock +70 -72
  3. data/README.rdoc +2 -2
  4. data/bin/create_indexes_for_tengine_job +18 -0
  5. data/lib/tengine/job/connectable.rb +4 -4
  6. data/lib/tengine/job/drivers/job_execution_driver.rb +2 -2
  7. data/lib/tengine/job/drivers/jobnet_control_driver.rb +3 -3
  8. data/lib/tengine/job/drivers/schedule_driver.rb +12 -12
  9. data/lib/tengine/job/edge.rb +2 -2
  10. data/lib/tengine/job/expansion.rb +1 -1
  11. data/lib/tengine/job/jobnet.rb +12 -2
  12. data/lib/tengine/job/jobnet_actual.rb +29 -0
  13. data/lib/tengine/job/root_jobnet_actual.rb +20 -1
  14. data/lib/tengine/job/root_jobnet_template.rb +4 -4
  15. data/lib/tengine/job/signal.rb +1 -1
  16. data/lib/tengine/job/vertex.rb +10 -1
  17. metadata +130 -126
  18. data/.document +0 -5
  19. data/.rspec +0 -1
  20. data/Rakefile +0 -42
  21. data/VERSION +0 -1
  22. data/spec/fixtures/rjn_0001_simple_jobnet_builder.rb +0 -42
  23. data/spec/fixtures/rjn_0002_simple_parallel_jobnet_builder.rb +0 -42
  24. data/spec/fixtures/rjn_0003_fork_join_jobnet_builder.rb +0 -61
  25. data/spec/fixtures/rjn_0004_parallel_jobnet_with_finally_fixture.rb +0 -62
  26. data/spec/fixtures/rjn_0005_retry_two_layer_fixture.rb +0 -153
  27. data/spec/fixtures/rjn_0008_expansion_fixture.rb +0 -32
  28. data/spec/fixtures/rjn_0009_tree_sequential_jobnet_builder.rb +0 -174
  29. data/spec/fixtures/rjn_0010_2jobs_and_1job_parallel_jobnet_builder.rb +0 -39
  30. data/spec/fixtures/rjn_0011_nested_fork_jobnet_builder.rb +0 -96
  31. data/spec/fixtures/rjn_0012_nested_and_finally_builder.rb +0 -157
  32. data/spec/fixtures/rjn_1004_hadoop_job_in_jobnet_fixture.rb +0 -105
  33. data/spec/fixtures/rjn_means_root_jobnet +0 -0
  34. data/spec/fixtures/test_credential_fixture.rb +0 -12
  35. data/spec/fixtures/test_server_fixture.rb +0 -28
  36. data/spec/mongoid.yml +0 -35
  37. data/spec/spec_helper.rb +0 -58
  38. data/spec/sshd/.gitignore +0 -1
  39. data/spec/sshd/id_rsa +0 -51
  40. data/spec/sshd/id_rsa.pub +0 -1
  41. data/spec/sshd/ssh_host_rsa_key +0 -51
  42. data/spec/sshd/ssh_host_rsa_key.pub +0 -1
  43. data/spec/sshd/sshd_config +0 -10
  44. data/spec/sshd/sshd_config.erb +0 -11
  45. data/spec/sshd/tengine_job_test.sh +0 -6
  46. data/spec/support/jobnet_fixture_builder.rb +0 -145
  47. data/spec/support/mongo_index_key_log.rb +0 -91
  48. data/spec/tengine/job/category_spec.rb +0 -193
  49. data/spec/tengine/job/connectable_spec.rb +0 -94
  50. data/spec/tengine/job/drivers/job_controll_driver/connection_error_spec.rb +0 -236
  51. data/spec/tengine/job/drivers/job_controll_driver/duplicated_job_start_spec.rb +0 -302
  52. data/spec/tengine/job/drivers/job_controll_driver/expansion_spec.rb +0 -120
  53. data/spec/tengine/job/drivers/job_controll_driver/stop_spec.rb +0 -159
  54. data/spec/tengine/job/drivers/job_controll_driver_spec.rb +0 -740
  55. data/spec/tengine/job/drivers/job_execution_driver_spec.rb +0 -138
  56. data/spec/tengine/job/drivers/jobnet_control_driver/nested_and_finally_spec.rb +0 -472
  57. data/spec/tengine/job/drivers/jobnet_control_driver/nested_jobnet_spec.rb +0 -231
  58. data/spec/tengine/job/drivers/jobnet_control_driver/stop_jobnet_spec.rb +0 -202
  59. data/spec/tengine/job/drivers/jobnet_control_driver_spec.rb +0 -608
  60. data/spec/tengine/job/drivers/schedule_driver_spec.rb +0 -241
  61. data/spec/tengine/job/dsl_binder_spec.rb +0 -36
  62. data/spec/tengine/job/dsl_loader_spec.rb +0 -437
  63. data/spec/tengine/job/dsls/0013_hadoop_job_run.rb +0 -29
  64. data/spec/tengine/job/dsls/0014_join_and_join.rb +0 -19
  65. data/spec/tengine/job/dsls/0015_fork_and_fork.rb +0 -18
  66. data/spec/tengine/job/dsls/0016_complex_fork_and_join.rb +0 -20
  67. data/spec/tengine/job/dsls/0017_finally.rb +0 -15
  68. data/spec/tengine/job/dsls/0018_expansion.rb +0 -23
  69. data/spec/tengine/job/dsls/0019_execute_job_on_event.rb +0 -16
  70. data/spec/tengine/job/dsls/0020_duplicated_jobnet_name.rb +0 -16
  71. data/spec/tengine/job/dsls/0021_caption.rb +0 -13
  72. data/spec/tengine/job/dsls/1060_test_dir1/1060_test_dir2/0013_hadoop_job_run.rb +0 -29
  73. data/spec/tengine/job/dsls/2003_expansion/expansion_5.rb +0 -11
  74. data/spec/tengine/job/dsls/VERSION +0 -1
  75. data/spec/tengine/job/dynamic_env_spec.rb +0 -95
  76. data/spec/tengine/job/edge_spec.rb +0 -241
  77. data/spec/tengine/job/element_selector_notation_spec.rb +0 -354
  78. data/spec/tengine/job/examples_spec.rb +0 -62
  79. data/spec/tengine/job/execution_spec.rb +0 -100
  80. data/spec/tengine/job/expansion_spec.rb +0 -116
  81. data/spec/tengine/job/hadoop_job_run_spec.rb +0 -65
  82. data/spec/tengine/job/job_spec.rb +0 -4
  83. data/spec/tengine/job/jobnet/1015_complecated_jobnet_spec.rb +0 -72
  84. data/spec/tengine/job/jobnet_actual_spec.rb +0 -175
  85. data/spec/tengine/job/jobnet_spec.rb +0 -399
  86. data/spec/tengine/job/jobnet_template_spec.rb +0 -240
  87. data/spec/tengine/job/killing_spec.rb +0 -91
  88. data/spec/tengine/job/reset_spec.rb +0 -958
  89. data/spec/tengine/job/reset_spec/4056_1_dump.txt +0 -1
  90. data/spec/tengine/job/root_jobnet_actual_spec.rb +0 -89
  91. data/spec/tengine/job/root_jobnet_template_spec.rb +0 -248
  92. data/spec/tengine/job/script_executable_spec.rb +0 -132
  93. data/spec/tengine/job/stoppable_spec.rb +0 -176
  94. data/spec/tengine/job/vertex_spec.rb +0 -25
  95. data/spec/tengine_job_spec.rb +0 -4
  96. data/tengine_job.gemspec +0 -198
  97. data/tmp/log/.gitignore +0 -1
@@ -1,740 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require 'spec_helper'
3
- require 'tengine/rspec'
4
-
5
- require 'net/ssh'
6
-
7
- describe 'job_control_driver' do
8
- include Tengine::RSpec::Extension
9
-
10
- target_dsl File.expand_path("../../../../lib/tengine/job/drivers/job_control_driver.rb", File.dirname(__FILE__))
11
- driver :job_control_driver
12
-
13
- context "rjn0001" do
14
- before do
15
- Tengine::Job::Vertex.delete_all
16
- builder = Rjn0001SimpleJobnetBuilder.new
17
- @jobnet = builder.create_actual
18
- @ctx = builder.context
19
- @execution = Tengine::Job::Execution.create!({
20
- :root_jobnet_id => @jobnet.id,
21
- })
22
- end
23
-
24
- context "ジョブの起動イベントを受け取ったら" do
25
- it "通常の場合" do
26
- @jobnet.phase_key = :starting
27
- @ctx.edge(:e1).phase_key = :transmitting
28
- @ctx.vertex(:j11).phase_key = :ready
29
- @jobnet.save!
30
- @jobnet.reload
31
- tengine.should_not_fire
32
- mock_ssh = mock(:ssh)
33
- mock_channel = mock(:channel)
34
- Net::SSH.should_receive(:start).
35
- with("localhost", an_instance_of(Tengine::Resource::Credential), an_instance_of(Hash)).and_yield(mock_ssh)
36
- mock_ssh.should_receive(:open_channel).and_yield(mock_channel)
37
- mock_channel.should_receive(:exec) do |*args|
38
- args.length.should == 1
39
- # args.first.should =~ %r<source \/etc\/profile && export MM_ACTUAL_JOB_ID=[0-9a-f]{24} MM_ACTUAL_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" MM_FULL_ACTUAL_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" MM_ACTUAL_JOB_NAME_PATH=\\"/rjn0001/j11\\" MM_ACTUAL_JOB_SECURITY_TOKEN= MM_SCHEDULE_ID=[0-9a-f]{24} MM_SCHEDULE_ESTIMATED_TIME= MM_TEMPLATE_JOB_ID=[0-9a-f]{24} MM_TEMPLATE_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" && tengine_job_agent_run -- \$HOME/j11\.sh>
40
- args.first.should =~ %r<source \/etc\/profile>
41
- args.first.should =~ %r<MM_ACTUAL_JOB_ID=[0-9a-f]{24} MM_ACTUAL_JOB_ANCESTOR_IDS=\"[0-9a-f]{24}\" MM_FULL_ACTUAL_JOB_ANCESTOR_IDS=\"[0-9a-f]{24}\" MM_ACTUAL_JOB_NAME_PATH=\"/rjn0001/j11\" MM_ACTUAL_JOB_SECURITY_TOKEN= MM_SCHEDULE_ID=[0-9a-f]{24} MM_SCHEDULE_ESTIMATED_TIME= MM_TEMPLATE_JOB_ID=[0-9a-f]{24} MM_TEMPLATE_JOB_ANCESTOR_IDS=\"[0-9a-f]{24}\">
42
- args.first.should =~ %r<job_test j11>
43
- end
44
- tengine.receive("start.job.job.tengine", :properties => {
45
- :execution_id => @execution.id.to_s,
46
- :root_jobnet_id => @jobnet.id.to_s,
47
- :root_jobnet_name_path => @jobnet.name_path,
48
- :target_jobnet_id => @jobnet.id.to_s,
49
- :target_jobnet_name_path => @jobnet.name_path,
50
- :target_job_id => @ctx.vertex(:j11).id.to_s,
51
- :target_job_name_path => @ctx.vertex(:j11).name_path,
52
- })
53
- @jobnet.reload
54
- @ctx.edge(:e1).phase_key.should == :transmitted
55
- @ctx.edge(:e2).phase_key.should == :active
56
- @ctx.vertex(:j11).phase_key.should == :starting
57
- end
58
-
59
- context "starting直前stopによってinitializedになっている場合" do
60
- [:starting, :running].each do |root_phase_key|
61
-
62
- it "ルートが#{root_phase_key}" do
63
- @jobnet.phase_key = root_phase_key
64
- @ctx[:e1].phase_key = :closing
65
- @ctx[:e2].phase_key = :closing
66
- @ctx[:e3].phase_key = :closing
67
- @ctx[:j11].phase_key = :initialized
68
- @jobnet.save!
69
- @jobnet.reload
70
- tengine.should_fire(:"error.jobnet.job.tengine", {
71
- :source_name => @ctx[:root].name_as_resource,
72
- :properties=>{
73
- :execution_id => @execution.id.to_s,
74
- :root_jobnet_id => @jobnet.id.to_s,
75
- :root_jobnet_name_path => @jobnet.name_path,
76
- :target_jobnet_id => @jobnet.id.to_s,
77
- :target_jobnet_name_path => @jobnet.name_path,
78
- }
79
- })
80
- tengine.receive("start.job.job.tengine", :properties => {
81
- :execution_id => @execution.id.to_s,
82
- :root_jobnet_id => @jobnet.id.to_s,
83
- :root_jobnet_name_path => @jobnet.name_path,
84
- :target_jobnet_id => @jobnet.id.to_s,
85
- :target_jobnet_name_path => @jobnet.name_path,
86
- :target_job_id => @ctx.vertex(:j11).id.to_s,
87
- :target_job_name_path => @ctx.vertex(:j11).name_path,
88
- })
89
- @jobnet.reload
90
- @ctx.edge(:e1).phase_key.should == :closing
91
- @ctx.edge(:e2).phase_key.should == :closed
92
- @ctx.edge(:e3).phase_key.should == :closed
93
- @ctx.vertex(:j11).phase_key.should == :initialized
94
- @jobnet.phase_key.should == :error
95
- end
96
- end
97
-
98
- end
99
-
100
- it "存在しないスクリプトを実行しようとした場合、標準エラー出力にエラーメッセージが返されるので、それを保持する" do
101
- @jobnet.phase_key = :starting
102
- @ctx.edge(:e1).phase_key = :transmitting
103
- @ctx.vertex(:j11).phase_key = :ready
104
- @jobnet.save!
105
- @jobnet.reload
106
- mock_ssh = mock(:ssh)
107
- Net::SSH.stub(:start).with(any_args).and_yield(mock_ssh)
108
- mock_channel = mock(:channel)
109
- mock_ssh.stub(:open_channel).and_yield(mock_channel)
110
- mock_channel.stub(:exec).with(any_args).and_yield(mock_channel, true)
111
- mock_channel.stub(:on_data)
112
- mock_channel.should_receive(:on_extended_data).and_yield(mock_channel,
113
- "session", "[Errno::ENOENT] No such file or directory - /home/goku/unexist_script.sh")
114
- mock_channel.stub(:on_close)
115
- tengine.should_fire(:"error.job.job.tengine", {
116
- :source_name => @ctx[:j11].name_as_resource,
117
- :properties=>{
118
- :execution_id => @execution.id.to_s,
119
- :root_jobnet_id => @jobnet.id.to_s,
120
- :root_jobnet_name_path => @jobnet.name_path,
121
- :target_jobnet_id => @jobnet.id.to_s,
122
- :target_jobnet_name_path => @jobnet.name_path,
123
- :target_job_id => @ctx.vertex(:j11).id.to_s,
124
- :target_job_name_path => @ctx.vertex(:j11).name_path,
125
- :exit_status=>nil,
126
- :message=>"Failure to execute /rjn0001/j11 via SSH: [Errno::ENOENT] No such file or directory - /home/goku/unexist_script.sh"
127
- }
128
- })
129
- tengine.receive("start.job.job.tengine", :properties => {
130
- :execution_id => @execution.id.to_s,
131
- :root_jobnet_id => @jobnet.id.to_s,
132
- :root_jobnet_name_path => @jobnet.name_path,
133
- :target_jobnet_id => @jobnet.id.to_s,
134
- :target_jobnet_name_path => @jobnet.name_path,
135
- :target_job_id => @ctx.vertex(:j11).id.to_s,
136
- :target_job_name_path => @ctx.vertex(:j11).name_path,
137
- })
138
- @jobnet.reload
139
- @ctx.edge(:e1).phase_key.should == :transmitted
140
- @ctx.edge(:e2).phase_key.should == :active
141
- @ctx.vertex(:j11).tap do |job|
142
- job.phase_key.should == :error
143
- job.error_messages.should == [
144
- "[Errno::ENOENT] No such file or directory - /home/goku/unexist_script.sh",
145
- "Failure to execute /rjn0001/j11 via SSH: [Errno::ENOENT] No such file or directory - /home/goku/unexist_script.sh"
146
- ]
147
- end
148
- @jobnet.phase_key.should == :running
149
- end
150
-
151
- end
152
-
153
-
154
- it "PIDを取得できたら" do
155
- @ctx.edge(:e1).phase_key = :transmitted
156
- @ctx.edge(:e2).phase_key = :active
157
- @ctx.vertex(:j11).phase_key = :starting
158
- @jobnet.save!
159
- @jobnet.reload
160
- tengine.should_not_fire
161
- mock_event = mock(:event)
162
- @pid = "123"
163
- signal = Tengine::Job::Signal.new(mock_event)
164
- signal.data = {:executing_pid => @pid}
165
- @ctx.vertex(:j11).ack(signal) # このメソッド内ではsaveされないので、ここでreloadもしません。
166
- @ctx.vertex(:j11).executing_pid.should == @pid
167
- @ctx.edge(:e1).phase_key.should == :transmitted
168
- @ctx.edge(:e2).phase_key.should == :active
169
- @ctx.vertex(:j11).phase_key.should == :running
170
- end
171
-
172
- test_error_message1 = "Job process failed. STDOUT and STDERR were redirected to files. You can see them at /home/goku/stdout-1234.log and /home/goku/stderr-1234.log on the server test_server1"
173
- {
174
- :success => ["0", {}],
175
- :error => ["1", {
176
- :stdout_log => "/home/goku/stdout-1234.log",
177
- :stderr_log => "/home/goku/stderr-1234.log",
178
- :message => test_error_message1
179
- }]
180
- }.each do |phase_key, (exit_status, extra_props)|
181
- it "ジョブ実行#{phase_key}の通知" do
182
- test_key = "test_key.finished.process.job.tengine"
183
- Tengine::Core::Event.delete_all(:conditions => {:key => test_key})
184
- Tengine::Core::Event.create!(:event_type_name => "job.heartbeat.tengine", :key => test_key)
185
- @jobnet.reload
186
- j11 = @jobnet.find_descendant_by_name_path("/rjn0001/j11")
187
- j11.executing_pid = "123"
188
- j11.phase_key = :running
189
- j11.previous_edges.length.should == 1
190
- j11.previous_edges.first.phase_key = :transmitted
191
- @ctx[:root].save!
192
- tengine.should_fire(:"#{phase_key}.job.job.tengine",
193
- :source_name => @ctx[:j11].name_as_resource,
194
- :properties => {
195
- :execution_id => @execution.id.to_s,
196
- :root_jobnet_id => @jobnet.id.to_s,
197
- :root_jobnet_name_path => @jobnet.name_path,
198
- :target_jobnet_id => @jobnet.id.to_s,
199
- :target_jobnet_name_path => @jobnet.name_path,
200
- :target_job_id => @ctx[:j11].id.to_s,
201
- :target_job_name_path => @ctx[:j11].name_path,
202
- :exit_status => exit_status
203
- })
204
- tengine.receive(:"finished.process.job.tengine",
205
- :key => test_key,
206
- :source_name => @ctx[:j11].name_as_resource,
207
- :properties => {
208
- :execution_id => @execution.id.to_s,
209
- :root_jobnet_id => @jobnet.id.to_s,
210
- :root_jobnet_name_path => @jobnet.name_path,
211
- :target_jobnet_id => @jobnet.id.to_s,
212
- :target_jobnet_name_path => @jobnet.name_path,
213
- :target_job_id => @ctx[:j11].id.to_s,
214
- :target_job_name_path => @ctx[:j11].name_path,
215
- :exit_status => exit_status
216
- }.merge(extra_props))
217
- @jobnet.reload
218
- @ctx.edge(:e1).phase_key.should == :transmitted
219
- @ctx.edge(:e2).phase_key.should == :active
220
- @ctx.vertex(:j11).tap do |j|
221
- j.phase_key.should == phase_key
222
- j.exit_status.should == exit_status
223
- if phase_key == :error
224
- j.error_messages.should == [test_error_message1]
225
- end
226
- end
227
- end
228
- end
229
-
230
- it "stuckからのfinished.process.job.tengine" do
231
- @jobnet.reload
232
- j11 = @jobnet.find_descendant_by_name_path("/rjn0001/j11")
233
- j11.phase_key = :stuck
234
- j11.previous_edges.first.phase_key = :transmitted
235
- @ctx[:root].save!
236
- tengine.receive(:"finished.process.job.tengine",
237
- :properties => {
238
- :execution_id => @execution.id.to_s,
239
- :root_jobnet_id => @jobnet.id.to_s,
240
- :root_jobnet_name_path => @jobnet.name_path,
241
- :target_jobnet_id => @jobnet.id.to_s,
242
- :target_jobnet_name_path => @jobnet.name_path,
243
- :target_job_id => @ctx[:j11].id.to_s,
244
- :target_job_name_path => @ctx[:j11].name_path,
245
- :exit_status => 0
246
- })
247
- @jobnet.reload
248
- @ctx.vertex(:j11).phase_key.should == :stuck
249
- end
250
-
251
- it "強制停止" do
252
- @pid = "123"
253
- @jobnet.reload
254
- j11 = @jobnet.find_descendant_by_name_path("/rjn0001/j11")
255
- j11.executing_pid = @pid
256
- j11.phase_key = :running
257
- j11.previous_edges.length.should == 1
258
- j11.previous_edges.first.phase_key = :transmitted
259
- @ctx[:root].save!
260
-
261
- tengine.should_not_fire
262
- mock_ssh = mock(:ssh)
263
- mock_channel = mock(:channel)
264
- Net::SSH.should_receive(:start).
265
- with("localhost", an_instance_of(Tengine::Resource::Credential), an_instance_of(Hash)).and_yield(mock_ssh)
266
- mock_ssh.should_receive(:open_channel).and_yield(mock_channel)
267
- mock_channel.should_receive(:exec) do |*args|
268
- interval = Tengine::Job::Killing::DEFAULT_KILLING_SIGNAL_INTERVAL
269
- args.length.should == 1
270
- args.first.should =~ %r<source \/etc\/profile>
271
- args.first.should =~ %r<tengine_job_agent_kill #{@pid} #{interval} KILL$>
272
- end
273
- tengine.receive(:"stop.job.job.tengine",
274
- :source_name => @ctx[:j11].name_as_resource,
275
- :properties => {
276
- :execution_id => @execution.id.to_s,
277
- :root_jobnet_id => @jobnet.id.to_s,
278
- :target_jobnet_id => @jobnet.id.to_s,
279
- :target_job_id => @ctx[:j11].id.to_s,
280
- })
281
- @jobnet.reload
282
- @ctx.edge(:e1).phase_key.should == :transmitted
283
- @ctx.edge(:e2).phase_key.should == :active
284
- @ctx.vertex(:j11).tap do |j|
285
- j.phase_key.should == :dying
286
- j.exit_status.should == nil
287
- end
288
- end
289
-
290
- it "強制停止(ジョブネット)" do
291
- @pid11 = "11"
292
- @pid12 = "12"
293
- @jobnet.reload
294
- j11 = @jobnet.find_descendant_by_name_path("/rjn0001/j11")
295
- j11.executing_pid = @pid11
296
- j11.phase_key = :success
297
- j11.previous_edges.length.should == 1
298
- j11.previous_edges.first.phase_key = :transmitted
299
- j12 = @jobnet.find_descendant_by_name_path("/rjn0001/j12")
300
- j12.executing_pid = @pid12
301
- j12.phase_key = :running
302
- j12.previous_edges.length.should == 1
303
- j12.previous_edges.first.phase_key = :transmitted
304
- @ctx[:root].save!
305
-
306
- # phase_key が success の j11 は fireされない
307
- tengine.should_not_fire(:"stop.job.job.tengine")
308
- # phase_key が running の j12 は fireされる
309
- tengine.should_fire(:"stop.job.job.tengine",
310
- :source_name => @ctx[:j12].name_as_resource,
311
- :properties => {
312
- :stop_reason => "user_stop",
313
- :target_jobnet_id => @jobnet.id.to_s,
314
- :target_jobnet_name_path => "/rjn0001",
315
- :target_job_id => @ctx[:j12].id.to_s,
316
- :target_job_name_path => "/rjn0001/j12",
317
- :execution_id => @execution.id.to_s,
318
- :root_jobnet_id => @jobnet.id.to_s,
319
- :root_jobnet_name_path => "/rjn0001",
320
- })
321
- # jobnet に対して強制停止された
322
- tengine.receive(:"stop.jobnet.job.tengine",
323
- :source_name => @jobnet.name_as_resource,
324
- :properties => {
325
- :stop_reason => "user_stop",
326
- :target_jobnet_id => @jobnet.id.to_s,
327
- :target_jobnet_name_path => "/rjn0001",
328
- :execution_id => @execution.id.to_s,
329
- :root_jobnet_id => @jobnet.id.to_s,
330
- :root_jobnet_name_path => "/rjn0001",
331
- })
332
- end
333
-
334
- it "強制停止(後続のジョブ)" do
335
- @pid11 = "11"
336
- @pid12 = "12"
337
- @jobnet.reload
338
- j11 = @jobnet.find_descendant_by_name_path("/rjn0001/j11")
339
- j11.executing_pid = @pid11
340
- j11.phase_key = :success
341
- j11.previous_edges.length.should == 1
342
- j11.previous_edges.first.phase_key = :transmitted
343
- j12 = @jobnet.find_descendant_by_name_path("/rjn0001/j12")
344
- j12.executing_pid = @pid12
345
- j12.phase_key = :running
346
- j12.previous_edges.length.should == 1
347
- j12.previous_edges.first.phase_key = :transmitted
348
- @ctx[:root].save!
349
-
350
- mock_ssh = mock(:ssh)
351
- mock_channel = mock(:channel)
352
- Net::SSH.should_receive(:start).
353
- with("localhost", an_instance_of(Tengine::Resource::Credential), an_instance_of(Hash)).and_yield(mock_ssh)
354
- mock_ssh.should_receive(:open_channel).and_yield(mock_channel)
355
- mock_channel.should_receive(:exec) do |*args|
356
- interval = Tengine::Job::Killing::DEFAULT_KILLING_SIGNAL_INTERVAL
357
- args.length.should == 1
358
- args.first.should =~ %r<source \/etc\/profile>
359
- args.first.should =~ %r<tengine_job_agent_kill #{@pid12} #{interval} KILL$>
360
- end
361
-
362
- # job12 に対して強制停止
363
- tengine.receive(:"stop.job.job.tengine",
364
- :source_name => @ctx[:j12].name_as_resource,
365
- :properties => {
366
- :stop_reason => "user_stop",
367
- :target_jobnet_id => @jobnet.id.to_s,
368
- :target_jobnet_name_path => "/rjn0001",
369
- :target_job_id => @ctx[:j12].id.to_s,
370
- :target_job_name_path => "/rjn0001/j12",
371
- :execution_id => @execution.id.to_s,
372
- :root_jobnet_id => @jobnet.id.to_s,
373
- :root_jobnet_name_path => "/rjn0001",
374
- })
375
- @jobnet.reload
376
- @ctx.edge(:e1).phase_key.should == :transmitted
377
- @ctx.edge(:e2).phase_key.should == :transmitted
378
- @ctx.edge(:e3).phase_key.should == :active
379
- @ctx.vertex(:j11).tap do |j|
380
- j.phase_key.should == :success
381
- j.stop_reason.should == nil
382
- end
383
- @ctx.vertex(:j12).tap do |j|
384
- j.phase_key.should == :dying
385
- j.stop_reason.should == "user_stop"
386
- end
387
- end
388
-
389
-
390
- if ENV['PASSWORD']
391
- context "実際にSSHで接続", :ssh_actual => true do
392
- before do
393
- resource_fixture = GokuAtEc2ApNortheast.new
394
- credential = resource_fixture.goku_ssh_pw
395
- credential.auth_values = {:username => ENV['USER'], :password => ENV['PASSWORD']}
396
- credential.save!
397
- server = resource_fixture.hadoop_master_node
398
- server.local_ipv4 = "127.0.0.1"
399
- server.save!
400
- end
401
-
402
- it do
403
- tengine.should_not_fire
404
- tengine.receive("start.job.job.tengine", :properties => {
405
- :execution_id => @execution.id.to_s,
406
- :root_jobnet_id => @jobnet.id.to_s,
407
- :target_jobnet_id => @jobnet.id.to_s,
408
- })
409
- @jobnet.reload
410
- j11 = @jobnet.find_descendant_by_name_path("/rjn0001/j11")
411
- j11.executing_pid.should_not be_nil
412
- j11.exit_status.should == nil
413
- j11.phase_key.should == :running
414
- j11.previous_edges.length.should == 1
415
- j11.previous_edges.first.phase_key.should == :transmitted
416
- end
417
-
418
- end
419
- end
420
- end
421
-
422
- context "再実行" do
423
- context "ジョブを再実行" do
424
- {
425
- false => "後続も実行",
426
- true => "スポット再実行"
427
- }.each do |spot, caption|
428
- context(caption) do
429
-
430
- before do
431
- Tengine::Job::Vertex.delete_all
432
- builder = Rjn0001SimpleJobnetBuilder.new
433
- @root = builder.create_actual
434
- @ctx = builder.context
435
- @execution = Tengine::Job::Execution.create!({
436
- :root_jobnet_id => @root.id,
437
- :spot => spot, :retry => true,
438
- :target_actual_ids => [@ctx[:j11].id.to_s]
439
- })
440
- @root.phase_key = :running
441
- @ctx[:j11].phase_key = :success
442
- @ctx[:j12].phase_key = :error
443
- @ctx[:e1].phase_key = :transmitted
444
- @ctx[:e2].phase_key = :transmitted
445
- @ctx[:e3].phase_key = :active
446
- end
447
-
448
- [:initialized, :success, :error, :stuck].each do |phase_key|
449
- it "phase_keyが#{phase_key}ならば再実行できるので、startのイベントを発火する" do
450
- @ctx[:j11].phase_key = phase_key
451
- @root.save!
452
- tengine.should_fire(:"start.job.job.tengine", {
453
- :source_name => @ctx[:j11].name_as_resource,
454
- :properties=>{
455
- :execution_id => @execution.id.to_s,
456
- :root_jobnet_name_path => @root.name_path,
457
- :root_jobnet_id => @root.id.to_s,
458
- :target_jobnet_name_path => @root.name_path,
459
- :target_jobnet_id => @root.id.to_s,
460
- :target_job_name_path => @ctx.vertex(:j11).name_path,
461
- :target_job_id => @ctx.vertex(:j11).id.to_s,
462
- }
463
- })
464
- tengine.receive("restart.job.job.tengine", :properties => {
465
- :execution_id => @execution.id.to_s,
466
- :root_jobnet_id => @root.id.to_s,
467
- :root_jobnet_name_path => @root.name_path,
468
- :target_jobnet_id => @root.id.to_s,
469
- :target_jobnet_name_path => @root.name_path,
470
- :target_job_id => @ctx.vertex(:j11).id.to_s,
471
- :target_job_name_path => @ctx.vertex(:j11).name_path,
472
- })
473
- @root.reload
474
- @root.phase_key.should == :running
475
- @ctx.edge(:e1).phase_key.should == :transmitted
476
- @ctx.vertex(:j11).phase_key.should == :ready
477
- if spot
478
- @ctx.vertex(:j12).phase_key.should == :error
479
- @ctx.edge(:e2).phase_key.should == :transmitted
480
- @ctx.edge(:e3).phase_key.should == :active
481
- else
482
- @ctx.vertex(:j12).phase_key.should == :initialized
483
- @ctx.edge(:e2).phase_key.should == :active
484
- @ctx.edge(:e3).phase_key.should == :active
485
- end
486
- end
487
- end
488
-
489
- [:ready, :starting, :running, :dying].each do |phase_key|
490
- it "phase_keyが#{phase_key}ならば再実行できず、エラーのイベントを発火する" do
491
- @ctx[:j11].phase_key = phase_key
492
- @root.save!
493
- tengine.should_fire("restart.job.job.tengine.error.tengined").with(any_args)
494
- Tengine::Core::Kernel.temp_exception_reporter(:except_test) do
495
- tengine.receive("restart.job.job.tengine", :properties => {
496
- :execution_id => @execution.id.to_s,
497
- :root_jobnet_id => @root.id.to_s,
498
- :target_jobnet_id => @root.id.to_s,
499
- :target_job_id => @ctx.vertex(:j11).id.to_s,
500
- })
501
- end
502
- # 再実行に失敗したのでルートジョブネット以下何も状態は変更されません
503
- @root.reload
504
- @root.phase_key.should == :running
505
- @ctx.edge(:e1).phase_key.should == :transmitted
506
- @ctx.vertex(:j11).phase_key.should == phase_key
507
- end
508
-
509
- end
510
- end
511
- end
512
-
513
- end
514
-
515
- end
516
-
517
-
518
- context "<BUG>同じジョブネットが複数バージョン存在する際、ジョブ実行時にスクリプトに渡される環境変数の「MM_TEMPLATE_JOB_ID」「MM_TEMPLATE_JOB_ANCESTOR_IDS」が実行しているバージョン以外のものがセットされている" do
519
- shared_examples_for "最新のバージョンのルートジョブネットを参照する" do |dsl_version|
520
-
521
- it do
522
- @root.phase_key = :starting
523
- @root.element("prev!j11").phase_key = :transmitting
524
- @root.element('j11').phase_key = :ready
525
- @root.save!
526
- @root.reload
527
- tengine.should_not_fire
528
- mock_ssh = mock(:ssh)
529
- mock_channel = mock(:channel)
530
- Net::SSH.should_receive(:start).
531
- with("localhost", an_instance_of(Tengine::Resource::Credential), an_instance_of(Hash)).and_yield(mock_ssh)
532
- mock_ssh.should_receive(:open_channel).and_yield(mock_channel)
533
- mock_channel.should_receive(:exec) do |*args|
534
- args.length.should == 1
535
- # args.first.should =~ %r<source \/etc\/profile && export MM_ACTUAL_JOB_ID=[0-9a-f]{24} MM_ACTUAL_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" MM_FULL_ACTUAL_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" MM_ACTUAL_JOB_NAME_PATH=\\"/rjn0001/j11\\" MM_ACTUAL_JOB_SECURITY_TOKEN= MM_SCHEDULE_ID=[0-9a-f]{24} MM_SCHEDULE_ESTIMATED_TIME= MM_TEMPLATE_JOB_ID=[0-9a-f]{24} MM_TEMPLATE_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" && tengine_job_agent_run -- \$HOME/j11\.sh>
536
- args.first.should =~ %r<source \/etc\/profile>
537
- args.first.should =~ %r<MM_ACTUAL_JOB_ID=[0-9a-f]{24} MM_ACTUAL_JOB_ANCESTOR_IDS=\"[0-9a-f]{24}\" MM_FULL_ACTUAL_JOB_ANCESTOR_IDS=\"[0-9a-f]{24}\" MM_ACTUAL_JOB_NAME_PATH=\"/rjn0001/j11\" MM_ACTUAL_JOB_SECURITY_TOKEN= MM_SCHEDULE_ID=[0-9a-f]{24} MM_SCHEDULE_ESTIMATED_TIME= MM_TEMPLATE_JOB_ID=[0-9a-f]{24} MM_TEMPLATE_JOB_ANCESTOR_IDS=\"[0-9a-f]{24}\">
538
- @template.dsl_version.should == dsl_version
539
- template_job = @template.element("/rjn0001/j11")
540
- args.first.should =~ %r<MM_TEMPLATE_JOB_ID=#{template_job.id.to_s}>
541
- args.first.should =~ %r<MM_TEMPLATE_JOB_ANCESTOR_IDS=\"#{@template.id.to_s}\">
542
- args.first.should =~ %r<job_test j11>
543
- end
544
- tengine.receive("start.job.job.tengine", :properties => {
545
- :execution_id => @execution.id.to_s,
546
- :root_jobnet_id => @root.id.to_s,
547
- :root_jobnet_name_path => @root.name_path,
548
- :target_jobnet_id => @root.id.to_s,
549
- :target_jobnet_name_path => @root.name_path,
550
- :target_job_id => @root.element('j11').id.to_s,
551
- :target_job_name_path => @root.element('j11').name_path,
552
- })
553
- @root.reload
554
- @root.element('prev!j11').phase_key.should == :transmitted
555
- @root.element('next!j11').phase_key.should == :active
556
- @root.element('j11').phase_key.should == :starting
557
- end
558
- end
559
-
560
- context "バージョン1つだけ" do
561
- before do
562
- Tengine::Core::Setting.delete_all
563
- Tengine::Core::Setting.create!(:name => "dsl_version", :value => "1")
564
- Tengine::Job::Vertex.delete_all
565
- Rjn0001SimpleJobnetBuilder.new.tap do |builder|
566
- @template = builder.create_template(:dsl_version => "1")
567
- @root = @template.generate
568
- @ctx = builder.context
569
- end
570
- @execution = Tengine::Job::Execution.create!({
571
- :root_jobnet_id => @root.id,
572
- })
573
- end
574
- it{ @root.template.dsl_version.should == "1" }
575
- it_should_behave_like "最新のバージョンのルートジョブネットを参照する", "1"
576
- end
577
-
578
- context "バージョン2つ" do
579
- before do
580
- Tengine::Core::Setting.delete_all
581
- Tengine::Core::Setting.create!(:name => "dsl_version", :value => "2")
582
- Tengine::Job::Vertex.delete_all
583
- Rjn0001SimpleJobnetBuilder.new.tap do |builder|
584
- builder.create_template(:dsl_version => "1")
585
- @template = builder.create_template(:dsl_version => "2")
586
- @root = @template.generate
587
- @ctx = builder.context
588
- end
589
- @execution = Tengine::Job::Execution.create!({
590
- :root_jobnet_id => @root.id,
591
- })
592
- end
593
- it{ @root.template.dsl_version.should == "2" }
594
- it_should_behave_like "最新のバージョンのルートジョブネットを参照する", "2"
595
- end
596
-
597
- context "バージョン10個" do
598
- before do
599
- Tengine::Core::Setting.delete_all
600
- Tengine::Core::Setting.create!(:name => "dsl_version", :value => "10")
601
- Tengine::Job::Vertex.delete_all
602
- Rjn0001SimpleJobnetBuilder.new.tap do |builder|
603
- (1..9).each do |idx|
604
- builder.create_template(:dsl_version => idx.to_s)
605
- end
606
- @template = builder.create_template(:dsl_version => "10")
607
- @root = @template.generate
608
- @ctx = builder.context
609
- end
610
- @execution = Tengine::Job::Execution.create!({
611
- :root_jobnet_id => @root.id,
612
- })
613
- end
614
- it{ @root.template.dsl_version.should == "10" }
615
- it_should_behave_like "最新のバージョンのルートジョブネットを参照する", "10"
616
- end
617
- end
618
-
619
- context "https://www.pivotaltracker.com/story/show/22624209" do
620
- it "stuckにする" do
621
- Tengine::Core::Schedule.delete_all
622
- Tengine::Job::Vertex.delete_all
623
- builder = Rjn0001SimpleJobnetBuilder.new
624
- @root = builder.create_actual
625
- @ctx = builder.context
626
- @execution = Tengine::Job::Execution.create!({
627
- :root_jobnet_id => @root.id,
628
- })
629
- @root.phase_key = :initialized
630
- @root.save!
631
- EM.run_block do
632
- tengine.receive("expired.job.heartbeat.tengine", :properties => {
633
- :execution_id => @execution.id.to_s,
634
- :root_jobnet_id => @root.id.to_s,
635
- :target_job_id => @root.children[1].id.to_s,
636
- })
637
- end
638
- @root.reload
639
- @root.children[1].phase_key.should == :stuck
640
- @root.phase_key.should_not == :stuck # initialized
641
- end
642
- end
643
-
644
- context "start.job.job.tengine.failed.tengined" do
645
- it "stuckにする" do
646
- Tengine::Core::Schedule.delete_all
647
- Tengine::Job::Vertex.delete_all
648
- builder = Rjn0001SimpleJobnetBuilder.new
649
- @root = builder.create_actual
650
- @ctx = builder.context
651
- @execution = Tengine::Job::Execution.create!({
652
- :root_jobnet_id => @root.id,
653
- })
654
- @root.phase_key = :initialized
655
- @root.save!
656
- EM.run_block do
657
- tengine.receive("start.job.job.tengine.failed.tengined", :properties => {
658
- :original_event => {
659
- :event_type_name => "start.job.job.tengine",
660
- :properties => {
661
- :execution_id => @execution.id.to_s,
662
- :root_jobnet_id => @root.id.to_s,
663
- :root_jobnet_name_path => @root.name_path,
664
- :target_jobnet_id => @root.id.to_s,
665
- :target_jobnet_name_path => @root.name_path,
666
- :target_job_id => @root.children[1].id.to_s,
667
- }}})
668
- end
669
- @root.reload
670
- @root.children[1].phase_key.should == :stuck
671
- @root.phase_key.should_not == :stuck # initialized
672
- end
673
-
674
- it "broken event" do
675
- Tengine::Core::Schedule.delete_all
676
- Tengine::Job::Vertex.delete_all
677
- builder = Rjn0001SimpleJobnetBuilder.new
678
- @root = builder.create_actual
679
- @ctx = builder.context
680
- @execution = Tengine::Job::Execution.create!({
681
- :root_jobnet_id => @root.id,
682
- })
683
- @root.phase_key = :initialized
684
- @root.save!
685
- EM.run_block do
686
- tengine.receive("start.job.job.tengine.failed.tengined", :properties => {
687
- :original_event => {
688
- :event_type_name => "start.job.job.tengine",
689
- :properties => {
690
- :execution_id => @execution.id.to_s,
691
- :root_jobnet_id => @root.id.to_s,
692
- :root_jobnet_name_path => @root.name_path,
693
- :target_jobnet_id => @root.id.to_s,
694
- :target_jobnet_name_path => @root.name_path,
695
- }}})
696
- end
697
- @root.reload
698
- @root.children[1].phase_key.should == :initialized
699
- @root.phase_key.should_not == :stuck # initialized
700
- end
701
- end
702
-
703
- %w[
704
- stop.job.job.tengine.failed.tengined
705
- finished.process.job.tengine.failed.tengined
706
- expired.job.heartbeat.tengine.failed.tengined
707
- restart.job.job.tengine.failed.tengined
708
- ].each do |i|
709
- describe i do
710
- it "stuckにする" do
711
- Tengine::Core::Schedule.delete_all
712
- Tengine::Job::Vertex.delete_all
713
- builder = Rjn0001SimpleJobnetBuilder.new
714
- @root = builder.create_actual
715
- @ctx = builder.context
716
- @execution = Tengine::Job::Execution.create!({
717
- :root_jobnet_id => @root.id,
718
- })
719
- @root.phase_key = :running
720
- @root.save!
721
- EM.run_block do
722
- tengine.receive(i, :properties => {
723
- :original_event => {
724
- :event_type_name => "start.job.job.tengine",
725
- :properties => {
726
- :execution_id => @execution.id.to_s,
727
- :root_jobnet_id => @root.id.to_s,
728
- :root_jobnet_name_path => @root.name_path,
729
- :target_jobnet_id => @root.id.to_s,
730
- :target_jobnet_name_path => @root.name_path,
731
- :target_job_id => @root.children[1].id.to_s,
732
- }}})
733
- end
734
- @root.reload
735
- @root.children[1].phase_key.should == :stuck
736
- @root.phase_key.should_not == :stuck # running
737
- end
738
- end
739
- end
740
- end