tengine_job 0.6.13 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -20
- data/Gemfile.lock +70 -72
- data/README.rdoc +2 -2
- data/bin/create_indexes_for_tengine_job +18 -0
- data/lib/tengine/job/connectable.rb +4 -4
- data/lib/tengine/job/drivers/job_execution_driver.rb +2 -2
- data/lib/tengine/job/drivers/jobnet_control_driver.rb +3 -3
- data/lib/tengine/job/drivers/schedule_driver.rb +12 -12
- data/lib/tengine/job/edge.rb +2 -2
- data/lib/tengine/job/expansion.rb +1 -1
- data/lib/tengine/job/jobnet.rb +12 -2
- data/lib/tengine/job/jobnet_actual.rb +29 -0
- data/lib/tengine/job/root_jobnet_actual.rb +20 -1
- data/lib/tengine/job/root_jobnet_template.rb +4 -4
- data/lib/tengine/job/signal.rb +1 -1
- data/lib/tengine/job/vertex.rb +10 -1
- metadata +130 -126
- data/.document +0 -5
- data/.rspec +0 -1
- data/Rakefile +0 -42
- data/VERSION +0 -1
- data/spec/fixtures/rjn_0001_simple_jobnet_builder.rb +0 -42
- data/spec/fixtures/rjn_0002_simple_parallel_jobnet_builder.rb +0 -42
- data/spec/fixtures/rjn_0003_fork_join_jobnet_builder.rb +0 -61
- data/spec/fixtures/rjn_0004_parallel_jobnet_with_finally_fixture.rb +0 -62
- data/spec/fixtures/rjn_0005_retry_two_layer_fixture.rb +0 -153
- data/spec/fixtures/rjn_0008_expansion_fixture.rb +0 -32
- data/spec/fixtures/rjn_0009_tree_sequential_jobnet_builder.rb +0 -174
- data/spec/fixtures/rjn_0010_2jobs_and_1job_parallel_jobnet_builder.rb +0 -39
- data/spec/fixtures/rjn_0011_nested_fork_jobnet_builder.rb +0 -96
- data/spec/fixtures/rjn_0012_nested_and_finally_builder.rb +0 -157
- data/spec/fixtures/rjn_1004_hadoop_job_in_jobnet_fixture.rb +0 -105
- data/spec/fixtures/rjn_means_root_jobnet +0 -0
- data/spec/fixtures/test_credential_fixture.rb +0 -12
- data/spec/fixtures/test_server_fixture.rb +0 -28
- data/spec/mongoid.yml +0 -35
- data/spec/spec_helper.rb +0 -58
- data/spec/sshd/.gitignore +0 -1
- data/spec/sshd/id_rsa +0 -51
- data/spec/sshd/id_rsa.pub +0 -1
- data/spec/sshd/ssh_host_rsa_key +0 -51
- data/spec/sshd/ssh_host_rsa_key.pub +0 -1
- data/spec/sshd/sshd_config +0 -10
- data/spec/sshd/sshd_config.erb +0 -11
- data/spec/sshd/tengine_job_test.sh +0 -6
- data/spec/support/jobnet_fixture_builder.rb +0 -145
- data/spec/support/mongo_index_key_log.rb +0 -91
- data/spec/tengine/job/category_spec.rb +0 -193
- data/spec/tengine/job/connectable_spec.rb +0 -94
- data/spec/tengine/job/drivers/job_controll_driver/connection_error_spec.rb +0 -236
- data/spec/tengine/job/drivers/job_controll_driver/duplicated_job_start_spec.rb +0 -302
- data/spec/tengine/job/drivers/job_controll_driver/expansion_spec.rb +0 -120
- data/spec/tengine/job/drivers/job_controll_driver/stop_spec.rb +0 -159
- data/spec/tengine/job/drivers/job_controll_driver_spec.rb +0 -740
- data/spec/tengine/job/drivers/job_execution_driver_spec.rb +0 -138
- data/spec/tengine/job/drivers/jobnet_control_driver/nested_and_finally_spec.rb +0 -472
- data/spec/tengine/job/drivers/jobnet_control_driver/nested_jobnet_spec.rb +0 -231
- data/spec/tengine/job/drivers/jobnet_control_driver/stop_jobnet_spec.rb +0 -202
- data/spec/tengine/job/drivers/jobnet_control_driver_spec.rb +0 -608
- data/spec/tengine/job/drivers/schedule_driver_spec.rb +0 -241
- data/spec/tengine/job/dsl_binder_spec.rb +0 -36
- data/spec/tengine/job/dsl_loader_spec.rb +0 -437
- data/spec/tengine/job/dsls/0013_hadoop_job_run.rb +0 -29
- data/spec/tengine/job/dsls/0014_join_and_join.rb +0 -19
- data/spec/tengine/job/dsls/0015_fork_and_fork.rb +0 -18
- data/spec/tengine/job/dsls/0016_complex_fork_and_join.rb +0 -20
- data/spec/tengine/job/dsls/0017_finally.rb +0 -15
- data/spec/tengine/job/dsls/0018_expansion.rb +0 -23
- data/spec/tengine/job/dsls/0019_execute_job_on_event.rb +0 -16
- data/spec/tengine/job/dsls/0020_duplicated_jobnet_name.rb +0 -16
- data/spec/tengine/job/dsls/0021_caption.rb +0 -13
- data/spec/tengine/job/dsls/1060_test_dir1/1060_test_dir2/0013_hadoop_job_run.rb +0 -29
- data/spec/tengine/job/dsls/2003_expansion/expansion_5.rb +0 -11
- data/spec/tengine/job/dsls/VERSION +0 -1
- data/spec/tengine/job/dynamic_env_spec.rb +0 -95
- data/spec/tengine/job/edge_spec.rb +0 -241
- data/spec/tengine/job/element_selector_notation_spec.rb +0 -354
- data/spec/tengine/job/examples_spec.rb +0 -62
- data/spec/tengine/job/execution_spec.rb +0 -100
- data/spec/tengine/job/expansion_spec.rb +0 -116
- data/spec/tengine/job/hadoop_job_run_spec.rb +0 -65
- data/spec/tengine/job/job_spec.rb +0 -4
- data/spec/tengine/job/jobnet/1015_complecated_jobnet_spec.rb +0 -72
- data/spec/tengine/job/jobnet_actual_spec.rb +0 -175
- data/spec/tengine/job/jobnet_spec.rb +0 -399
- data/spec/tengine/job/jobnet_template_spec.rb +0 -240
- data/spec/tengine/job/killing_spec.rb +0 -91
- data/spec/tengine/job/reset_spec.rb +0 -958
- data/spec/tengine/job/reset_spec/4056_1_dump.txt +0 -1
- data/spec/tengine/job/root_jobnet_actual_spec.rb +0 -89
- data/spec/tengine/job/root_jobnet_template_spec.rb +0 -248
- data/spec/tengine/job/script_executable_spec.rb +0 -132
- data/spec/tengine/job/stoppable_spec.rb +0 -176
- data/spec/tengine/job/vertex_spec.rb +0 -25
- data/spec/tengine_job_spec.rb +0 -4
- data/tengine_job.gemspec +0 -198
- data/tmp/log/.gitignore +0 -1
@@ -1,302 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require 'spec_helper'
|
3
|
-
require 'tengine/rspec'
|
4
|
-
|
5
|
-
require 'net/ssh'
|
6
|
-
|
7
|
-
|
8
|
-
# 背景
|
9
|
-
# 以下の2つの条件が満たされ場合
|
10
|
-
# * 2つのtenginedプロセスが動いている
|
11
|
-
# * 並列で実行されるジョブを持つジョブネットが実行される(例えばrjn0002)
|
12
|
-
#
|
13
|
-
# 問題の詳細
|
14
|
-
# プロセス1がstart.job.job.tengineイベントによって起動したj11のプロセスのPIDを得る前に、
|
15
|
-
# プロセス2がstart.job.job.tengineイベントによってj12を起動することで、それらのルートジョブネットの
|
16
|
-
# versionが更新されてしまい、j11のPIDを得てルートジョブネットを更新する際にversionが
|
17
|
-
# 異なってしまっているため、update_with_lockメソッドによって実行に失敗したものと見なされて、
|
18
|
-
# 再度update_with_lockのブロックが実行されて、j11のプロセスが実行されてしまう。
|
19
|
-
#
|
20
|
-
# 本来どうあるべきか?
|
21
|
-
# update_with_lock内ではSSHなどの繰り返し実行することを想定していない処理や、
|
22
|
-
# イベントの送信を行ってはいけないので、それらの重複が起こらない仕組みになっているべき。
|
23
|
-
#
|
24
|
-
describe "<BUG>tengindのプロセスを二つ起動した際に並列ジョブがある際にジョブが2度実行される" do
|
25
|
-
include Tengine::RSpec::Extension
|
26
|
-
|
27
|
-
driver_path = File.expand_path("../../../../../lib/tengine/job/drivers/job_control_driver.rb", File.dirname(__FILE__))
|
28
|
-
|
29
|
-
# in [rjn0002]
|
30
|
-
# |--e2-->(j11)--e4-->|
|
31
|
-
# (S1)--e1-->[F1] [J1]--e6-->(E1)
|
32
|
-
# |--e3-->(j12)--e5-->|
|
33
|
-
context "rjn0002" do
|
34
|
-
before do
|
35
|
-
Tengine::Resource::Server.delete_all
|
36
|
-
Tengine::Job::Execution.delete_all
|
37
|
-
Tengine::Job::Vertex.delete_all
|
38
|
-
TestCredentialFixture.test_credential1
|
39
|
-
TestServerFixture.test_server1
|
40
|
-
TestServerFixture.test_server2
|
41
|
-
builder = Rjn0002SimpleParallelJobnetBuilder.new
|
42
|
-
@root = builder.create_actual
|
43
|
-
j12 = @root.element("j12")
|
44
|
-
j12.server_name = "test_server2"
|
45
|
-
@root.save!
|
46
|
-
|
47
|
-
@ctx = builder.context
|
48
|
-
@execution = Tengine::Job::Execution.create!({
|
49
|
-
:root_jobnet_id => @root.id,
|
50
|
-
})
|
51
|
-
@base_props = {
|
52
|
-
:execution_id => @execution.id.to_s,
|
53
|
-
:root_jobnet_id => @root.id.to_s,
|
54
|
-
:root_jobnet_name_path => @root.name_path,
|
55
|
-
:target_jobnet_id => @root.id.to_s,
|
56
|
-
:target_jobnet_name_path => @root.name_path,
|
57
|
-
}
|
58
|
-
|
59
|
-
# 2つのプロセスの代わりに、2つのカーネルを別のFiberで動かす
|
60
|
-
@bootstrap1 = Tengine::Core::Bootstrap.new(:tengined => { :load_path => driver_path })
|
61
|
-
@bootstrap1.kernel.tap{|k| k.bind; k.evaluate}
|
62
|
-
@tengine1 = Tengine::RSpec::ContextWrapper.new(@bootstrap1.kernel)
|
63
|
-
#
|
64
|
-
@bootstrap2 = Tengine::Core::Bootstrap.new(:tengined => { :load_path => driver_path })
|
65
|
-
@bootstrap2.kernel.tap{|k| k.bind; k.evaluate}
|
66
|
-
@tengine2 = Tengine::RSpec::ContextWrapper.new(@bootstrap2.kernel)
|
67
|
-
end
|
68
|
-
|
69
|
-
# tengine1が起動したプロセスのPIDを得る前にtengine2がプロセスを起動することはできません。
|
70
|
-
#
|
71
|
-
# job_control_driverでのstart.job.job.tengineの処理の概略以下の通りです
|
72
|
-
#
|
73
|
-
# start.job.job.tengine
|
74
|
-
# 1. be starting
|
75
|
-
# 2. root_jobnet.update_with_lock
|
76
|
-
# 3. execute job with SSH
|
77
|
-
# 4. be running
|
78
|
-
# 5. root_jobnet.update_with_lock
|
79
|
-
#
|
80
|
-
# パターン1 (ほぼ同時に1に突入する)
|
81
|
-
# ||f1 ||f2 ||DB |
|
82
|
-
# ||ver|step||ver|step||ver|
|
83
|
-
# ---------------------------------------------------------
|
84
|
-
# || 0 | 1 || - | - || 0| f1 starting
|
85
|
-
# || 0 | 1 || 0 | 1 || 0| f2 starting 1st
|
86
|
-
# || 1 | 2 || 0 | 1 || 1| f1 update_with_lock success
|
87
|
-
# || 1 | 2 || 0 | 2 || 1| f2 update_with_lock fail & retry
|
88
|
-
# || 1 | 2 || 1 | 1 || 1| f2 starting 2nd
|
89
|
-
# || 1 | 2 || 2 | 2 || 2| f2 update_with_lock success
|
90
|
-
# || 2 | 3 || 2 | 2 || 2| f1 refrsh & SSH starting
|
91
|
-
# || 2 | 3 || 2 | 3 || 2| f2 refrsh & SSH starting
|
92
|
-
# || 2 | 4 || 2 | 3 || 2| f1 running
|
93
|
-
# || 3 | 5 || 2 | 3 || 3| f1 update_with_lock success
|
94
|
-
# || 3 | 5 || 2 | 4 || 3| f2 running 1st
|
95
|
-
# || 3 | 5 || 2 | 5 || 3| f2 update_with_lock fail & retry
|
96
|
-
# || 3 | 5 || 3 | 4 || 3| f2 running 2nd
|
97
|
-
# || 3 | 5 || 4 | 5 || 4| f2 update_with_lock success
|
98
|
-
|
99
|
-
before do
|
100
|
-
@ctx[:e1].phase_key = :transmitted
|
101
|
-
@ctx[:e2].phase_key = :transmitting
|
102
|
-
@ctx[:e3].phase_key = :transmitting
|
103
|
-
@ctx[:j11].phase_key = :ready
|
104
|
-
@ctx[:j12].phase_key = :ready
|
105
|
-
@root.phase_key = :starting
|
106
|
-
@root.version = 0
|
107
|
-
@root.save!
|
108
|
-
|
109
|
-
@pid = Process.pid.to_s
|
110
|
-
|
111
|
-
@f1 = Fiber.new do
|
112
|
-
ssh1 = mock(:ssh1)
|
113
|
-
Net::SSH.should_receive(:start).with("localhost",
|
114
|
-
an_instance_of(Tengine::Resource::Credential),
|
115
|
-
an_instance_of(Hash)).once.and_yield(ssh1)
|
116
|
-
channel1 = mock(:channel1)
|
117
|
-
ssh1.stub(:open_channel).and_yield(channel1)
|
118
|
-
channel1.stub(:exec).with(any_args).and_yield(channel1, true)
|
119
|
-
channel1.should_receive(:on_close) do
|
120
|
-
Tengine.logger.debug( ("!" * 100) << "\non_close: Fiber.yield #{Process.pid} #{__FILE__}##{__LINE__}")
|
121
|
-
Fiber.yield
|
122
|
-
end # on_dataが呼び出される前に止める
|
123
|
-
channel1.should_receive(:on_data).and_yield(channel1, @pid)
|
124
|
-
channel1.stub(:on_extended_data)
|
125
|
-
@tengine1.receive("start.job.job.tengine", :properties => {
|
126
|
-
:target_job_id => @ctx.vertex(:j11).id.to_s,
|
127
|
-
:target_job_name_path => @ctx.vertex(:j11).name_path,
|
128
|
-
}.update(@base_props))
|
129
|
-
:end
|
130
|
-
end
|
131
|
-
|
132
|
-
@f2 = Fiber.new do
|
133
|
-
ssh2 = mock(:ssh2)
|
134
|
-
Net::SSH.should_receive(:start).with("192.168.1.2",
|
135
|
-
an_instance_of(Tengine::Resource::Credential),
|
136
|
-
an_instance_of(Hash)).once.and_yield(ssh2)
|
137
|
-
channel2 = mock(:channel2)
|
138
|
-
ssh2.stub(:open_channel).and_yield(channel2)
|
139
|
-
channel2.stub(:exec).with(any_args).and_yield(channel2, true)
|
140
|
-
channel2.should_receive(:on_close) do
|
141
|
-
Tengine.logger.debug( ("!" * 100) << "\non_close: Fiber.yield #{Process.pid} #{__FILE__}##{__LINE__}")
|
142
|
-
Fiber.yield
|
143
|
-
end # on_dataが呼び出される前に止める
|
144
|
-
channel2.should_receive(:on_data).and_yield(channel2, @pid)
|
145
|
-
channel2.stub(:on_extended_data)
|
146
|
-
@tengine2.receive("start.job.job.tengine", :properties => {
|
147
|
-
:target_job_id => @ctx.vertex(:j12).id.to_s,
|
148
|
-
:target_job_name_path => @ctx.vertex(:j12).name_path,
|
149
|
-
}.update(@base_props))
|
150
|
-
:end
|
151
|
-
end
|
152
|
-
|
153
|
-
@j11 = @root.element("j11")
|
154
|
-
@j12 = @root.element("j12")
|
155
|
-
|
156
|
-
@root.reload
|
157
|
-
@root.version.should == 0
|
158
|
-
Tengine::Job.test_harness_clear
|
159
|
-
end
|
160
|
-
|
161
|
-
it "パターン1" do
|
162
|
-
# f1-1.
|
163
|
-
Tengine.logger.info("1" * 100)
|
164
|
-
Tengine::Job.should_receive(:test_harness).with(1, "before yield in update_with_lock").once
|
165
|
-
Tengine::Job.should_receive(:test_harness).with(2, "after yield in update_with_lock").once{ Fiber.yield}
|
166
|
-
@f1.resume.should_not == :end
|
167
|
-
@root.reload
|
168
|
-
@root.version.should == 0
|
169
|
-
@root.element("j11").phase_key.should == :ready
|
170
|
-
@root.element("j12").phase_key.should == :ready
|
171
|
-
|
172
|
-
# f2-1.
|
173
|
-
Tengine.logger.info("2" * 100)
|
174
|
-
Tengine::Job.should_receive(:test_harness).with(3, "before yield in update_with_lock").once
|
175
|
-
Tengine::Job.should_receive(:test_harness).with(4, "after yield in update_with_lock").once{ Fiber.yield}
|
176
|
-
@f2.resume.should_not == :end
|
177
|
-
@root.reload
|
178
|
-
@root.version.should == 0
|
179
|
-
@root.element("j11").phase_key.should == :ready
|
180
|
-
@root.element("j12").phase_key.should == :ready
|
181
|
-
|
182
|
-
# f1-2.
|
183
|
-
Tengine.logger.info("3" * 100)
|
184
|
-
Tengine::Job.should_receive(:test_harness).with(5, "after update_with_lock").once{ Fiber.yield}
|
185
|
-
@f1.resume.should_not == :end
|
186
|
-
@root.reload
|
187
|
-
@root.version.should == 1
|
188
|
-
@root.element("j11").phase_key.should == :starting
|
189
|
-
@root.element("j12").phase_key.should == :ready
|
190
|
-
|
191
|
-
# f2-1.
|
192
|
-
Tengine.logger.info("4" * 100)
|
193
|
-
Tengine::Job.should_receive(:test_harness).with(6, "before yield in update_with_lock").once.once
|
194
|
-
Tengine::Job.should_receive(:test_harness).with(7, "after yield in update_with_lock").once.once{ Fiber.yield}
|
195
|
-
@f2.resume.should_not == :end
|
196
|
-
@root.reload
|
197
|
-
@root.version.should == 1
|
198
|
-
@root.element("j11").phase_key.should == :starting
|
199
|
-
@root.element("j12").phase_key.should == :ready
|
200
|
-
|
201
|
-
# f2-2.
|
202
|
-
Tengine.logger.info("5" * 100)
|
203
|
-
Tengine::Job.should_receive(:test_harness).with(8, "after update_with_lock").once{ Fiber.yield}
|
204
|
-
@f2.resume.should_not == :end
|
205
|
-
@root.reload
|
206
|
-
@root.version.should == 2
|
207
|
-
@root.element("j11").phase_key.should == :starting
|
208
|
-
@root.element("j12").phase_key.should == :starting
|
209
|
-
|
210
|
-
# f1-3.
|
211
|
-
Tengine.logger.info("6" * 100)
|
212
|
-
@f1.resume.should_not == :end
|
213
|
-
@root.reload
|
214
|
-
@root.version.should == 2
|
215
|
-
@root.element("j11").phase_key.should == :starting
|
216
|
-
@root.element("j12").phase_key.should == :starting
|
217
|
-
|
218
|
-
# f2-3.
|
219
|
-
Tengine.logger.info("7" * 100)
|
220
|
-
@f2.resume.should_not == :end
|
221
|
-
@root.reload
|
222
|
-
@root.version.should == 2
|
223
|
-
@root.element("j11").phase_key.should == :starting
|
224
|
-
@root.element("j12").phase_key.should == :starting
|
225
|
-
|
226
|
-
# f1-4.
|
227
|
-
Tengine.logger.info("8" * 100)
|
228
|
-
Tengine::Job.should_receive(:test_harness).with(9, "before yield in update_with_lock").once
|
229
|
-
Tengine::Job.should_receive(:test_harness).with(10, "after yield in update_with_lock").once{ Fiber.yield }
|
230
|
-
@f1.resume.should_not == :end
|
231
|
-
@root.reload
|
232
|
-
@root.version.should == 2
|
233
|
-
@root.element("j11").phase_key.should == :starting
|
234
|
-
@root.element("j12").phase_key.should == :starting
|
235
|
-
|
236
|
-
# f1-5.
|
237
|
-
Tengine.logger.info("9" * 100)
|
238
|
-
Tengine::Job.should_receive(:test_harness).with(11, "after update_with_lock").once
|
239
|
-
@f1.resume.should == :end
|
240
|
-
@root.reload
|
241
|
-
@root.version.should == 3
|
242
|
-
@root.element("j11").tap do |j|
|
243
|
-
j.phase_key.should == :running
|
244
|
-
j.executing_pid.should_not be_nil
|
245
|
-
end
|
246
|
-
@root.element("j12").phase_key.should == :starting
|
247
|
-
|
248
|
-
# f2-4. 1st
|
249
|
-
Tengine.logger.info("a" * 100)
|
250
|
-
Tengine::Job.should_receive(:test_harness).with(12, "before yield in update_with_lock").once
|
251
|
-
Tengine::Job.should_receive(:test_harness).with(13, "after yield in update_with_lock").once{ Fiber.yield }
|
252
|
-
@f2.resume.should_not == :end
|
253
|
-
@root.reload
|
254
|
-
@root.version.should == 3
|
255
|
-
@root.element("j11").tap do |j|
|
256
|
-
j.phase_key.should == :running
|
257
|
-
j.executing_pid.should_not be_nil
|
258
|
-
end
|
259
|
-
@root.element("j12").phase_key.should == :starting
|
260
|
-
|
261
|
-
# f2-5.
|
262
|
-
Tengine.logger.info("b" * 100)
|
263
|
-
Tengine::Job.should_receive(:test_harness).with(14, "before yield in update_with_lock").once{ Fiber.yield }
|
264
|
-
@f2.resume.should_not == :end
|
265
|
-
@root.reload
|
266
|
-
@root.version.should == 3
|
267
|
-
@root.element("j11").tap do |j|
|
268
|
-
j.phase_key.should == :running
|
269
|
-
j.executing_pid.should_not be_nil
|
270
|
-
end
|
271
|
-
@root.element("j12").phase_key.should == :starting
|
272
|
-
|
273
|
-
# f2-4. 2nd
|
274
|
-
Tengine.logger.info("c" * 100)
|
275
|
-
Tengine::Job.should_receive(:test_harness).with(15, "after yield in update_with_lock").once{ Fiber.yield }
|
276
|
-
@f2.resume.should_not == :end
|
277
|
-
@root.reload
|
278
|
-
@root.version.should == 3
|
279
|
-
@root.element("j11").tap do |j|
|
280
|
-
j.phase_key.should == :running
|
281
|
-
j.executing_pid.should_not be_nil
|
282
|
-
end
|
283
|
-
@root.element("j12").phase_key.should == :starting
|
284
|
-
|
285
|
-
# f2-5.
|
286
|
-
Tengine.logger.info("d" * 100)
|
287
|
-
Tengine::Job.should_receive(:test_harness).with(16, "after update_with_lock").once
|
288
|
-
@f2.resume.should == :end
|
289
|
-
@root.reload
|
290
|
-
@root.version.should == 4
|
291
|
-
@root.element("j11").tap do |j|
|
292
|
-
j.phase_key.should == :running
|
293
|
-
j.executing_pid.should_not be_nil
|
294
|
-
end
|
295
|
-
@root.element("j12").tap do |j|
|
296
|
-
j.executing_pid.should_not be_nil
|
297
|
-
j.phase_key.should == :running
|
298
|
-
end
|
299
|
-
end
|
300
|
-
|
301
|
-
end
|
302
|
-
end
|
@@ -1,120 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require 'spec_helper'
|
3
|
-
require 'tengine/rspec'
|
4
|
-
|
5
|
-
describe 'job_control_driver' do
|
6
|
-
include Tengine::RSpec::Extension
|
7
|
-
|
8
|
-
target_dsl File.expand_path("../../../../../lib/tengine/job/drivers/job_control_driver.rb", File.dirname(__FILE__))
|
9
|
-
driver :job_control_driver
|
10
|
-
|
11
|
-
shared_examples_for "/rjn0008/rjn0001/j11を実行する際の環境変数" do |dsl_version|
|
12
|
-
it "expansionだったジョブネットよりも上位のジョブの情報は出力されない" do
|
13
|
-
@rjn0001 = @root.vertex_by_name_path("/rjn0008/rjn0001")
|
14
|
-
@j11 = @root.vertex_by_name_path("/rjn0008/rjn0001/j11")
|
15
|
-
@root.phase_key = :running
|
16
|
-
@rjn0001.phase_key = :running
|
17
|
-
@j11.phase_key = :ready
|
18
|
-
@j11.prev_edges.each{|edge| edge.phase_key = :transmitting}
|
19
|
-
@root.save!
|
20
|
-
@root.reload
|
21
|
-
tengine.should_not_fire
|
22
|
-
mock_ssh = mock(:ssh)
|
23
|
-
mock_channel = mock(:channel)
|
24
|
-
Net::SSH.should_receive(:start).
|
25
|
-
with("localhost", an_instance_of(Tengine::Resource::Credential), an_instance_of(Hash)).and_yield(mock_ssh)
|
26
|
-
mock_ssh.should_receive(:open_channel).and_yield(mock_channel)
|
27
|
-
mock_channel.should_receive(:exec) do |*args|
|
28
|
-
args.length.should == 1
|
29
|
-
# args.first.should =~ %r<source \/etc\/profile && export MM_ACTUAL_JOB_ID=[0-9a-f]{24} MM_ACTUAL_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" MM_FULL_ACTUAL_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" MM_ACTUAL_JOB_NAME_PATH=\\"/rjn0001/j11\\" MM_ACTUAL_JOB_SECURITY_TOKEN= MM_SCHEDULE_ID=[0-9a-f]{24} MM_SCHEDULE_ESTIMATED_TIME= MM_TEMPLATE_JOB_ID=[0-9a-f]{24} MM_TEMPLATE_JOB_ANCESTOR_IDS=\\"[0-9a-f]{24}\\" && tengine_job_agent_run -- \$HOME/j11\.sh>
|
30
|
-
args.first.should =~ %r<source \/etc\/profile>
|
31
|
-
t_rjn1001 = Tengine::Job::RootJobnetTemplate.find_by_name("rjn0001")
|
32
|
-
t_rjn1001.dsl_version.should == dsl_version
|
33
|
-
t_j11 = t_rjn1001.vertex_by_name_path("/rjn0001/j11")
|
34
|
-
args.first.should =~ %r<MM_TEMPLATE_JOB_ID=#{t_j11.id.to_s}>
|
35
|
-
args.first.should_not =~ %r<MM_TEMPLATE_JOB_ANCESTOR_IDS=\"#{@template.id.to_s};#{t_rjn1001.id.to_s}\">
|
36
|
-
args.first.should =~ %r<MM_TEMPLATE_JOB_ANCESTOR_IDS=\"#{t_rjn1001.id.to_s}\">
|
37
|
-
args.first.should =~ %r<job_test j11>
|
38
|
-
end
|
39
|
-
tengine.receive("start.job.job.tengine", :properties => {
|
40
|
-
:execution_id => @execution.id.to_s,
|
41
|
-
:root_jobnet_id => @root.id.to_s,
|
42
|
-
:target_jobnet_id => @rjn0001.id.to_s,
|
43
|
-
:target_job_id => @j11.id.to_s,
|
44
|
-
})
|
45
|
-
@root.reload
|
46
|
-
@rjn0001 = @root.vertex_by_name_path("/rjn0008/rjn0001")
|
47
|
-
@j11 = @root.vertex_by_name_path("/rjn0008/rjn0001/j11")
|
48
|
-
@root.phase_key = :running
|
49
|
-
@rjn0001.phase_key = :running
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
# in [rjn0008]
|
54
|
-
# (S1) --e1-->(rjn0001)--e2-->(rjn0002)--e3-->(E1)
|
55
|
-
#
|
56
|
-
# in [rjn0001]
|
57
|
-
# (S1) --e1-->(j11)--e2-->(j12)--e3-->(E1)
|
58
|
-
#
|
59
|
-
# in [rjn0002]
|
60
|
-
# |--e2-->(j11)--e4-->|
|
61
|
-
# (S1)--e1-->[F1] [J1]--e6-->(E1)
|
62
|
-
# |--e3-->(j12)--e5-->|
|
63
|
-
context "rjn0008" do
|
64
|
-
before do
|
65
|
-
Tengine::Core::Setting.delete_all
|
66
|
-
Tengine::Core::Setting.create!(:name => "dsl_version", :value => "1")
|
67
|
-
Tengine::Job::Vertex.delete_all
|
68
|
-
Rjn0001SimpleJobnetBuilder.new.create_template
|
69
|
-
Rjn0002SimpleParallelJobnetBuilder.new.create_template
|
70
|
-
builder = Rjn0008ExpansionFixture.new
|
71
|
-
@template = builder.create_template
|
72
|
-
@root = @template.generate
|
73
|
-
@ctx = builder.context
|
74
|
-
@execution = Tengine::Job::Execution.create!({
|
75
|
-
:root_jobnet_id => @root.id,
|
76
|
-
})
|
77
|
-
@base_props = {
|
78
|
-
:execution_id => @execution.id.to_s,
|
79
|
-
:root_jobnet_id => @root.id.to_s,
|
80
|
-
:target_jobnet_id => @root.id.to_s,
|
81
|
-
}
|
82
|
-
end
|
83
|
-
|
84
|
-
it_should_behave_like "/rjn0008/rjn0001/j11を実行する際の環境変数", "1"
|
85
|
-
end
|
86
|
-
|
87
|
-
context "複数のバージョンのデータがある場合" do
|
88
|
-
before do
|
89
|
-
Tengine::Core::Setting.delete_all
|
90
|
-
Tengine::Core::Setting.create!(:name => "dsl_version", :value => "2")
|
91
|
-
Tengine::Job::Vertex.delete_all
|
92
|
-
Rjn0001SimpleJobnetBuilder.new.tap do |f|
|
93
|
-
f.create_template(:dsl_version => "1")
|
94
|
-
f.create_template(:dsl_version => "2")
|
95
|
-
end
|
96
|
-
Rjn0002SimpleParallelJobnetBuilder.new.tap do |f|
|
97
|
-
f.create_template(:dsl_version => "1")
|
98
|
-
f.create_template(:dsl_version => "2")
|
99
|
-
end
|
100
|
-
builder = Rjn0008ExpansionFixture.new
|
101
|
-
builder.create_template(:dsl_version => "1")
|
102
|
-
@template = builder.create_template(:dsl_version => "2")
|
103
|
-
@root = @template.generate
|
104
|
-
@ctx = builder.context
|
105
|
-
@execution = Tengine::Job::Execution.create!({
|
106
|
-
:root_jobnet_id => @root.id,
|
107
|
-
})
|
108
|
-
@base_props = {
|
109
|
-
:execution_id => @execution.id.to_s,
|
110
|
-
:root_jobnet_id => @root.id.to_s,
|
111
|
-
:target_jobnet_id => @root.id.to_s,
|
112
|
-
}
|
113
|
-
end
|
114
|
-
|
115
|
-
it_should_behave_like "/rjn0008/rjn0001/j11を実行する際の環境変数", '2'
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
119
|
-
|
120
|
-
|
@@ -1,159 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require 'spec_helper'
|
3
|
-
require 'tengine/rspec'
|
4
|
-
|
5
|
-
require 'net/ssh'
|
6
|
-
|
7
|
-
describe "<BUG>(tengined複数起動)強制停止すると、ステータスが「強制停止済」ではなく「エラー終了」になる" do
|
8
|
-
include Tengine::RSpec::Extension
|
9
|
-
|
10
|
-
driver_path = File.expand_path("../../../../../lib/tengine/job/drivers/job_control_driver.rb", File.dirname(__FILE__))
|
11
|
-
|
12
|
-
#
|
13
|
-
# in [jn0004]
|
14
|
-
# |--e3-->(j2)--e5-->|
|
15
|
-
# (S1)--e1-->(j1)--e2-->[F1] [J1]--e7-->(j4)--e8-->(E1)
|
16
|
-
# |--e4-->(j3)--e6-->|
|
17
|
-
#
|
18
|
-
# in [jn0004/finally]
|
19
|
-
# (S2) --e9-->(jn0004_f)-e10-->(E2)
|
20
|
-
#
|
21
|
-
# 現象:
|
22
|
-
# j1を強制停止した際に、プロセスが2つ動いている場合、その片方のプロセスAが
|
23
|
-
# stop.job.job.tengineイベントを受け取りSSHでtengine_job_agent_killを実行します。
|
24
|
-
# その実行の戻り値を得るまでの間に、もう片方のプロセスBが、プロセスが終了して発火される
|
25
|
-
# finished.process.job.tengineイベントを処理すると、stop_reasonがuser_stopでなくなってしまっていました。
|
26
|
-
#
|
27
|
-
context "jn0004" do
|
28
|
-
before do
|
29
|
-
Tengine::Resource::Server.delete_all
|
30
|
-
Tengine::Job::Execution.delete_all
|
31
|
-
Tengine::Job::Vertex.delete_all
|
32
|
-
TestCredentialFixture.test_credential1
|
33
|
-
TestServerFixture.test_server1
|
34
|
-
TestServerFixture.test_server2
|
35
|
-
builder = Rjn0004ParallelJobnetWithFinally.new
|
36
|
-
@root = builder.create_actual
|
37
|
-
@ctx = builder.context
|
38
|
-
@execution = Tengine::Job::Execution.create!({
|
39
|
-
:root_jobnet_id => @root.id,
|
40
|
-
})
|
41
|
-
@base_props = {
|
42
|
-
:execution_id => @execution.id.to_s,
|
43
|
-
:root_jobnet_id => @root.id.to_s,
|
44
|
-
:root_jobnet_name_path => @root.name_path,
|
45
|
-
:target_jobnet_id => @root.id.to_s,
|
46
|
-
:target_jobnet_name_path => @root.name_path,
|
47
|
-
}
|
48
|
-
|
49
|
-
# 2つのプロセスの代わりに、2つのカーネルを別のFiberで動かす
|
50
|
-
@bootstrap1 = Tengine::Core::Bootstrap.new(:tengined => { :load_path => driver_path })
|
51
|
-
@bootstrap1.kernel.tap{|k| k.bind; k.evaluate}
|
52
|
-
@tengine1 = Tengine::RSpec::ContextWrapper.new(@bootstrap1.kernel)
|
53
|
-
#
|
54
|
-
@bootstrap2 = Tengine::Core::Bootstrap.new(:tengined => { :load_path => driver_path })
|
55
|
-
@bootstrap2.kernel.tap{|k| k.bind; k.evaluate}
|
56
|
-
@tengine2 = Tengine::RSpec::ContextWrapper.new(@bootstrap2.kernel)
|
57
|
-
end
|
58
|
-
|
59
|
-
before do
|
60
|
-
@pid = "123"
|
61
|
-
@ctx[:e1].phase_key = :transmitted
|
62
|
-
@ctx[:j1].tap do |j|
|
63
|
-
j.phase_key = :running
|
64
|
-
j.executing_pid = @pid
|
65
|
-
end
|
66
|
-
@root.phase_key = :running
|
67
|
-
@root.version = 4
|
68
|
-
@root.save!
|
69
|
-
|
70
|
-
@f1 = Fiber.new do
|
71
|
-
@tengine1.should_not_fire
|
72
|
-
ssh1 = mock(:ssh1)
|
73
|
-
Net::SSH.should_receive(:start).with("localhost",
|
74
|
-
an_instance_of(Tengine::Resource::Credential),
|
75
|
-
an_instance_of(Hash)).once.and_yield(ssh1)
|
76
|
-
channel1 = mock(:channel1)
|
77
|
-
ssh1.stub(:open_channel).and_yield(channel1)
|
78
|
-
channel1.stub(:exec).with(any_args).and_yield(channel1, true)
|
79
|
-
channel1.should_receive(:on_close) do
|
80
|
-
Tengine.logger.debug( ("!" * 100) << "\non_close: Fiber.yield #{Process.pid} #{__FILE__}##{__LINE__}")
|
81
|
-
Fiber.yield
|
82
|
-
end # on_dataが呼び出される前に止める
|
83
|
-
channel1.stub(:on_data)
|
84
|
-
channel1.stub(:on_extended_data)
|
85
|
-
@tengine1.receive("stop.job.job.tengine", :properties => {
|
86
|
-
:stop_reason => "user_stop",
|
87
|
-
:target_job_id => @ctx.vertex(:j1).id.to_s,
|
88
|
-
:target_job_name_path => @ctx.vertex(:j1).name_path,
|
89
|
-
}.update(@base_props))
|
90
|
-
:end
|
91
|
-
end
|
92
|
-
|
93
|
-
@f2 = Fiber.new do
|
94
|
-
@tengine2.should_fire(:"error.job.job.tengine", {
|
95
|
-
:source_name => @ctx[:j1].name_as_resource,
|
96
|
-
:properties=>{
|
97
|
-
:target_job_id => @ctx.vertex(:j1).id.to_s,
|
98
|
-
:target_job_name_path => @ctx.vertex(:j1).name_path,
|
99
|
-
:exit_status=>nil,
|
100
|
-
}.update(@base_props)
|
101
|
-
})
|
102
|
-
@tengine2.receive("finished.process.job.tengine", :properties => {
|
103
|
-
:pid=>17485,
|
104
|
-
:exit_status=>nil,
|
105
|
-
:target_job_id => @ctx.vertex(:j1).id.to_s,
|
106
|
-
:target_job_name_path => @ctx.vertex(:j1).name_path,
|
107
|
-
}.update(@base_props))
|
108
|
-
:end
|
109
|
-
end
|
110
|
-
|
111
|
-
@j1 = @root.element("j1")
|
112
|
-
|
113
|
-
@root.reload
|
114
|
-
@root.version.should == 4
|
115
|
-
Tengine::Job.test_harness_clear
|
116
|
-
end
|
117
|
-
|
118
|
-
it "tengine_job_agent_killの戻り値の前にfinished.process.job.tengineが来ても強制終了となるべき" do
|
119
|
-
# f1-1.
|
120
|
-
Tengine.logger.info("1" * 100)
|
121
|
-
# Tengine::Job.should_receive(:test_harness).with(1, "before yield in update_with_lock").once
|
122
|
-
# Tengine::Job.should_receive(:test_harness).with(2, "after yield in update_with_lock").once{ Fiber.yield}
|
123
|
-
@f1.resume.should_not == :end
|
124
|
-
@root.reload
|
125
|
-
@root.version.should == 5
|
126
|
-
@root.element("j1").tap do |j|
|
127
|
-
j.phase_key.should == :dying
|
128
|
-
j.executing_pid.should == @pid
|
129
|
-
j.stop_reason.should == "user_stop"
|
130
|
-
end
|
131
|
-
|
132
|
-
# f2
|
133
|
-
Tengine.logger.info("2" * 100)
|
134
|
-
# Tengine::Job.should_receive(:test_harness).with(3, "before yield in update_with_lock").once
|
135
|
-
# Tengine::Job.should_receive(:test_harness).with(4, "after yield in update_with_lock").once{ Fiber.yield}
|
136
|
-
@f2.resume.should == :end
|
137
|
-
@root.reload
|
138
|
-
@root.version.should == 6
|
139
|
-
@root.element("j1").tap do |j|
|
140
|
-
j.phase_key.should == :error
|
141
|
-
j.executing_pid.should == @pid
|
142
|
-
j.stop_reason.should == "user_stop"
|
143
|
-
end
|
144
|
-
|
145
|
-
# f1-2.
|
146
|
-
Tengine.logger.info("3" * 100)
|
147
|
-
# Tengine::Job.should_receive(:test_harness).with(5, "after update_with_lock").once{ Fiber.yield}
|
148
|
-
@f1.resume.should == :end
|
149
|
-
@root.reload
|
150
|
-
@root.version.should == 6
|
151
|
-
@root.element("j1").tap do |j|
|
152
|
-
j.phase_key.should == :error
|
153
|
-
j.executing_pid.should == @pid
|
154
|
-
j.stop_reason.should == "user_stop"
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
end
|
159
|
-
end
|