tengine_job 0.6.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +109 -0
- data/README.rdoc +20 -0
- data/Rakefile +42 -0
- data/VERSION +1 -0
- data/examples/0004_retry_one_layer.rb +24 -0
- data/examples/0004_retry_one_layer.sh +38 -0
- data/examples/0005_retry_two_layer.rb +54 -0
- data/examples/0005_retry_two_layer.sh +80 -0
- data/examples/0006_retry_three_layer.rb +58 -0
- data/examples/0006_retry_three_layer.sh +74 -0
- data/examples/0007_simple_jobnet.rb +7 -0
- data/examples/0021_dynamic_env.rb +20 -0
- data/examples/VERSION +1 -0
- data/examples/tengine_job_test.sh +10 -0
- data/lib/tengine/job.rb +94 -0
- data/lib/tengine/job/category.rb +54 -0
- data/lib/tengine/job/connectable.rb +43 -0
- data/lib/tengine/job/drivers/job_control_driver.rb +82 -0
- data/lib/tengine/job/drivers/job_execution_driver.rb +30 -0
- data/lib/tengine/job/drivers/jobnet_control_driver.rb +117 -0
- data/lib/tengine/job/drivers/schedule_driver.rb +30 -0
- data/lib/tengine/job/dsl_binder.rb +12 -0
- data/lib/tengine/job/dsl_evaluator.rb +18 -0
- data/lib/tengine/job/dsl_loader.rb +180 -0
- data/lib/tengine/job/edge.rb +150 -0
- data/lib/tengine/job/element_selector_notation.rb +169 -0
- data/lib/tengine/job/end.rb +32 -0
- data/lib/tengine/job/executable.rb +74 -0
- data/lib/tengine/job/execution.rb +141 -0
- data/lib/tengine/job/expansion.rb +37 -0
- data/lib/tengine/job/fork.rb +6 -0
- data/lib/tengine/job/job.rb +23 -0
- data/lib/tengine/job/jobnet.rb +173 -0
- data/lib/tengine/job/jobnet/builder.rb +150 -0
- data/lib/tengine/job/jobnet/job_state_transition.rb +167 -0
- data/lib/tengine/job/jobnet/jobnet_state_transition.rb +110 -0
- data/lib/tengine/job/jobnet/state_transition.rb +37 -0
- data/lib/tengine/job/jobnet_actual.rb +55 -0
- data/lib/tengine/job/jobnet_template.rb +10 -0
- data/lib/tengine/job/join.rb +6 -0
- data/lib/tengine/job/junction.rb +29 -0
- data/lib/tengine/job/killing.rb +30 -0
- data/lib/tengine/job/mm_compatibility.rb +6 -0
- data/lib/tengine/job/mm_compatibility/connectable.rb +13 -0
- data/lib/tengine/job/name_path.rb +31 -0
- data/lib/tengine/job/root.rb +16 -0
- data/lib/tengine/job/root_jobnet_actual.rb +39 -0
- data/lib/tengine/job/root_jobnet_template.rb +49 -0
- data/lib/tengine/job/script_executable.rb +235 -0
- data/lib/tengine/job/signal.rb +121 -0
- data/lib/tengine/job/start.rb +20 -0
- data/lib/tengine/job/stoppable.rb +15 -0
- data/lib/tengine/job/vertex.rb +172 -0
- data/lib/tengine_job.rb +3 -0
- data/spec/fixtures/rjn_0001_simple_jobnet_builder.rb +42 -0
- data/spec/fixtures/rjn_0002_simple_parallel_jobnet_builder.rb +42 -0
- data/spec/fixtures/rjn_0003_fork_join_jobnet_builder.rb +61 -0
- data/spec/fixtures/rjn_0004_parallel_jobnet_with_finally_fixture.rb +62 -0
- data/spec/fixtures/rjn_0005_retry_two_layer_fixture.rb +153 -0
- data/spec/fixtures/rjn_0008_expansion_fixture.rb +32 -0
- data/spec/fixtures/rjn_0009_tree_sequential_jobnet_builder.rb +174 -0
- data/spec/fixtures/rjn_0010_2jobs_and_1job_parallel_jobnet_builder.rb +39 -0
- data/spec/fixtures/rjn_0011_nested_fork_jobnet_builder.rb +96 -0
- data/spec/fixtures/rjn_0012_nested_and_finally_builder.rb +157 -0
- data/spec/fixtures/rjn_1004_hadoop_job_in_jobnet_fixture.rb +105 -0
- data/spec/fixtures/rjn_means_root_jobnet +0 -0
- data/spec/fixtures/test_credential_fixture.rb +12 -0
- data/spec/fixtures/test_server_fixture.rb +28 -0
- data/spec/mongoid.yml +35 -0
- data/spec/spec_helper.rb +56 -0
- data/spec/sshd/.gitignore +1 -0
- data/spec/sshd/id_rsa +51 -0
- data/spec/sshd/id_rsa.pub +1 -0
- data/spec/sshd/ssh_host_rsa_key +51 -0
- data/spec/sshd/ssh_host_rsa_key.pub +1 -0
- data/spec/sshd/sshd_config +10 -0
- data/spec/sshd/sshd_config.erb +11 -0
- data/spec/sshd/tengine_job_test.sh +6 -0
- data/spec/support/jobnet_fixture_builder.rb +145 -0
- data/spec/support/mongo_index_key_log.rb +91 -0
- data/spec/tengine/job/category_spec.rb +193 -0
- data/spec/tengine/job/connectable_spec.rb +94 -0
- data/spec/tengine/job/drivers/job_controll_driver/connection_error_spec.rb +236 -0
- data/spec/tengine/job/drivers/job_controll_driver/duplicated_job_start_spec.rb +302 -0
- data/spec/tengine/job/drivers/job_controll_driver/expansion_spec.rb +120 -0
- data/spec/tengine/job/drivers/job_controll_driver/stop_spec.rb +159 -0
- data/spec/tengine/job/drivers/job_controll_driver_spec.rb +623 -0
- data/spec/tengine/job/drivers/job_execution_driver_spec.rb +88 -0
- data/spec/tengine/job/drivers/jobnet_control_driver/nested_and_finally_spec.rb +472 -0
- data/spec/tengine/job/drivers/jobnet_control_driver/nested_jobnet_spec.rb +231 -0
- data/spec/tengine/job/drivers/jobnet_control_driver/stop_jobnet_spec.rb +202 -0
- data/spec/tengine/job/drivers/jobnet_control_driver_spec.rb +446 -0
- data/spec/tengine/job/drivers/schedule_driver_spec.rb +202 -0
- data/spec/tengine/job/dsl_binder_spec.rb +36 -0
- data/spec/tengine/job/dsl_loader_spec.rb +403 -0
- data/spec/tengine/job/dsls/0013_hadoop_job_run.rb +29 -0
- data/spec/tengine/job/dsls/0014_join_and_join.rb +19 -0
- data/spec/tengine/job/dsls/0015_fork_and_fork.rb +18 -0
- data/spec/tengine/job/dsls/0016_complex_fork_and_join.rb +20 -0
- data/spec/tengine/job/dsls/0017_finally.rb +15 -0
- data/spec/tengine/job/dsls/0018_expansion.rb +23 -0
- data/spec/tengine/job/dsls/0019_execute_job_on_event.rb +16 -0
- data/spec/tengine/job/dsls/0020_duplicated_jobnet_name.rb +16 -0
- data/spec/tengine/job/dsls/1060_test_dir1/1060_test_dir2/0013_hadoop_job_run.rb +29 -0
- data/spec/tengine/job/dsls/2003_expansion/expansion_5.rb +11 -0
- data/spec/tengine/job/dsls/VERSION +1 -0
- data/spec/tengine/job/dynamic_env_spec.rb +95 -0
- data/spec/tengine/job/edge_spec.rb +241 -0
- data/spec/tengine/job/element_selector_notation_spec.rb +354 -0
- data/spec/tengine/job/examples_spec.rb +62 -0
- data/spec/tengine/job/execution_spec.rb +100 -0
- data/spec/tengine/job/expansion_spec.rb +116 -0
- data/spec/tengine/job/hadoop_job_run_spec.rb +65 -0
- data/spec/tengine/job/job_spec.rb +4 -0
- data/spec/tengine/job/jobnet/1015_complecated_jobnet_spec.rb +72 -0
- data/spec/tengine/job/jobnet_actual_spec.rb +175 -0
- data/spec/tengine/job/jobnet_spec.rb +399 -0
- data/spec/tengine/job/jobnet_template_spec.rb +240 -0
- data/spec/tengine/job/killing_spec.rb +91 -0
- data/spec/tengine/job/reset_spec.rb +958 -0
- data/spec/tengine/job/reset_spec/4056_1_dump.txt +1 -0
- data/spec/tengine/job/root_jobnet_actual_spec.rb +89 -0
- data/spec/tengine/job/root_jobnet_template_spec.rb +248 -0
- data/spec/tengine/job/script_executable_spec.rb +132 -0
- data/spec/tengine/job/stoppable_spec.rb +176 -0
- data/spec/tengine/job/vertex_spec.rb +25 -0
- data/spec/tengine_job_spec.rb +4 -0
- data/tengine_job.gemspec +197 -0
- data/tmp/log/.gitignore +1 -0
- metadata +296 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Tengine::Job::Connectable do
|
5
|
+
|
6
|
+
context "Rjn0001SimpleJobnetBuilderを使う場合" do
|
7
|
+
[:actual, :template].each do |jobnet_type|
|
8
|
+
context "#{jobnet_type}の場合" do
|
9
|
+
|
10
|
+
before(:all) do
|
11
|
+
builder = Rjn0009TreeSequentialJobnetBuilder.new
|
12
|
+
builder.send(:"create_#{jobnet_type}")
|
13
|
+
@ctx = builder.context
|
14
|
+
end
|
15
|
+
|
16
|
+
{
|
17
|
+
"rjn0009" => [nil, nil],
|
18
|
+
"j1100" => ["test_credential1" , "test_server1"],
|
19
|
+
"j1110" => ["test_credential1" , "test_server1"],
|
20
|
+
"j1120" => ["test_credential1" , "test_server1"],
|
21
|
+
"j1200" => ["test_credential1" , nil ],
|
22
|
+
"j1210" => ["test_credential1" , "mysql_master"],
|
23
|
+
"j1300" => [nil , "mysql_master"],
|
24
|
+
"j1310" => ["test_credential1" , "mysql_master"],
|
25
|
+
"j1400" => [nil , nil ],
|
26
|
+
"j1410" => ["test_credential1" , "mysql_master"],
|
27
|
+
"j1500" => ["test_credential1" , "mysql_master"],
|
28
|
+
"j1510" => ["test_credential1" , "mysql_master"],
|
29
|
+
"j1511" => ["test_credential1" , "mysql_master"],
|
30
|
+
"j1600" => ["test_credential1" , "mysql_master"],
|
31
|
+
"j1610" => ["test_credential1" , "mysql_master"],
|
32
|
+
"j1611" => ["test_credential1" , "test_server1"],
|
33
|
+
"j1612" => ["gohan_ssh_pk" , "mysql_master"],
|
34
|
+
"j1620" => ["test_credential1" , "test_server1"],
|
35
|
+
"j1621" => ["test_credential1" , "test_server1"],
|
36
|
+
"j1630" => ["gohan_ssh_pk", "mysql_master" ],
|
37
|
+
"j1631" => ["gohan_ssh_pk", "mysql_master" ],
|
38
|
+
}.each do |job_name, (credential_name, server_name)|
|
39
|
+
context job_name do
|
40
|
+
subject{ @ctx[job_name.to_sym] }
|
41
|
+
its(:actual_credential_name){ should == credential_name }
|
42
|
+
its(:actual_server_name){ should == server_name }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
describe :actual_credential do
|
52
|
+
before do
|
53
|
+
resource_fixture = GokuAtEc2ApNortheast.new
|
54
|
+
resource_fixture.goku_ssh_pw
|
55
|
+
end
|
56
|
+
|
57
|
+
it "存在するCredentialの場合" do
|
58
|
+
jobnet = Tengine::Job::JobnetTemplate.new(:credential_name => "test_credential1")
|
59
|
+
credential = jobnet.actual_credential
|
60
|
+
credential.should be_a(Tengine::Resource::Credential)
|
61
|
+
credential.name.should == "test_credential1"
|
62
|
+
end
|
63
|
+
|
64
|
+
it "存在しないCredentialの場合" do
|
65
|
+
jobnet = Tengine::Job::JobnetTemplate.new(:credential_name => "unexist_credential")
|
66
|
+
expect{
|
67
|
+
jobnet.actual_credential
|
68
|
+
}.to raise_error(Mongoid::Errors::DocumentNotFound)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
describe :actual_server do
|
74
|
+
before do
|
75
|
+
resource_fixture = GokuAtEc2ApNortheast.new
|
76
|
+
resource_fixture.hadoop_master_node
|
77
|
+
end
|
78
|
+
|
79
|
+
it "存在するServerの場合" do
|
80
|
+
jobnet = Tengine::Job::JobnetTemplate.new(:server_name => "test_server1")
|
81
|
+
server = jobnet.actual_server
|
82
|
+
server.should be_a(Tengine::Resource::Server)
|
83
|
+
server.name.should == "test_server1"
|
84
|
+
end
|
85
|
+
|
86
|
+
it "存在しないServerの場合" do
|
87
|
+
jobnet = Tengine::Job::JobnetTemplate.new(:server_name => "unexist_server")
|
88
|
+
expect{
|
89
|
+
jobnet.actual_server
|
90
|
+
}.to raise_error(Mongoid::Errors::DocumentNotFound)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'tengine/rspec'
|
4
|
+
|
5
|
+
describe 'connection error' do
|
6
|
+
include Tengine::RSpec::Extension
|
7
|
+
|
8
|
+
target_dsl File.expand_path("../../../../../lib/tengine/job/drivers/job_control_driver.rb", File.dirname(__FILE__))
|
9
|
+
driver :job_control_driver
|
10
|
+
|
11
|
+
let :ssh_dir do
|
12
|
+
File.expand_path("../../../../../sshd", __FILE__)
|
13
|
+
end
|
14
|
+
|
15
|
+
before :all do
|
16
|
+
raise "WRONG" if $_pid
|
17
|
+
|
18
|
+
uid = Etc.getlogin
|
19
|
+
case uid
|
20
|
+
when "root"
|
21
|
+
pending "rootは危険なのでこのテストを実行できません"
|
22
|
+
when NilClass
|
23
|
+
raise "who am i?"
|
24
|
+
end
|
25
|
+
|
26
|
+
# 1. sshdをさがす
|
27
|
+
sshd = nil
|
28
|
+
ENV["PATH"].split(/:/).find do |dir|
|
29
|
+
Dir.glob("#{dir}/sshd") do |path|
|
30
|
+
if File.executable?(path)
|
31
|
+
sshd = path
|
32
|
+
break
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
raise "sshd not found" unless sshd
|
38
|
+
|
39
|
+
# 2. sshd_configの生成
|
40
|
+
template = File.expand_path("sshd_config.erb", ssh_dir)
|
41
|
+
hostkey = File.expand_path("ssh_host_rsa_key", ssh_dir)
|
42
|
+
clientkey = File.expand_path("id_rsa", ssh_dir)
|
43
|
+
File.chmod(0400, hostkey, clientkey)
|
44
|
+
File.chmod(0700, ssh_dir)
|
45
|
+
$_port = nil
|
46
|
+
|
47
|
+
# 指定したポートはもう使われているかもしれないので、その際は
|
48
|
+
# sshdが起動に失敗するので、何回かポートを変えて試す。
|
49
|
+
catch(:return) do
|
50
|
+
n = 0
|
51
|
+
@port = rand(32768)
|
52
|
+
begin
|
53
|
+
Tempfile.open("sshd_config", ssh_dir) do |conf|
|
54
|
+
File.open(template, "rb") do |tmpl|
|
55
|
+
conf.write ERB.new(tmpl.read).result(binding)
|
56
|
+
end
|
57
|
+
conf.flush
|
58
|
+
conf.close(false) # no unlink
|
59
|
+
argv = [sshd, "-Def", conf.path, "-h", hostkey]
|
60
|
+
$_pid = Process.spawn(*argv)
|
61
|
+
x = Time.now
|
62
|
+
while Time.now < x + 16.0 do # まあこんくらい待てばいいでしょ
|
63
|
+
sleep 0.1
|
64
|
+
Process.waitpid2($_pid, Process::WNOHANG)
|
65
|
+
Process.kill 0, $_pid
|
66
|
+
# netstat -an は Linux / BSD ともに有効
|
67
|
+
# どちらかに限ればもう少し効率的な探し方はある。たとえば Linux 限定でよければ netstat -lnt ...
|
68
|
+
y = `netstat -an | fgrep LISTEN | fgrep #{@port}`
|
69
|
+
if y.lines.to_a.size > 1
|
70
|
+
$_port = @port
|
71
|
+
throw :return
|
72
|
+
end
|
73
|
+
end
|
74
|
+
pending "failed to invoke sshd in 16 secs."
|
75
|
+
end
|
76
|
+
rescue Errno::ECHILD, Errno::ESRCH
|
77
|
+
if (n += 1) > 10
|
78
|
+
pending "10 attempts to invoke sshd failed."
|
79
|
+
else
|
80
|
+
@port = rand(32768)
|
81
|
+
retry
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
after :all do
|
88
|
+
if $_pid
|
89
|
+
begin
|
90
|
+
Process.kill "INT", $_pid
|
91
|
+
Process.waitpid $_pid
|
92
|
+
rescue Errno::ECHILD
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# in [rjn0001]
|
98
|
+
# (S1) --e1-->(j11)--e2-->(j12)--e3-->(E1)
|
99
|
+
#
|
100
|
+
context "rjn0001" do
|
101
|
+
before do
|
102
|
+
Tengine::Job::Vertex.delete_all
|
103
|
+
builder = Rjn0001SimpleJobnetBuilder.new
|
104
|
+
@root = builder.create_actual
|
105
|
+
@ctx = builder.context
|
106
|
+
@execution = Tengine::Job::Execution.create!({
|
107
|
+
:root_jobnet_id => @root.id,
|
108
|
+
})
|
109
|
+
@base_props = {
|
110
|
+
:execution_id => @execution.id.to_s,
|
111
|
+
:root_jobnet_id => @root.id.to_s,
|
112
|
+
:target_jobnet_id => @root.id.to_s,
|
113
|
+
}
|
114
|
+
Tengine::Resource::Server.find_by_name("test_server1").update_attributes :properties => { :ssh_port => $_port }
|
115
|
+
end
|
116
|
+
|
117
|
+
after do
|
118
|
+
# 中身を書き換えてしまうので他のテストに影響しないように削除します
|
119
|
+
Tengine::Resource::Credential.delete_all
|
120
|
+
Tengine::Resource::Server.delete_all
|
121
|
+
end
|
122
|
+
|
123
|
+
context "credential not found" do
|
124
|
+
it "対象のジョブはerrorになりエラーイベントが発火される" do
|
125
|
+
Tengine::Resource::Credential.delete_all
|
126
|
+
@root.phase_key = :starting
|
127
|
+
@ctx.edge(:e1).phase_key = :transmitting
|
128
|
+
@ctx.vertex(:j11).phase_key = :ready
|
129
|
+
@root.save!
|
130
|
+
@root.reload
|
131
|
+
tengine.should_fire(:"error.job.job.tengine", an_instance_of(Hash))
|
132
|
+
tengine.receive("start.job.job.tengine", :properties => {
|
133
|
+
:execution_id => @execution.id.to_s,
|
134
|
+
:root_jobnet_id => @root.id.to_s,
|
135
|
+
:root_jobnet_name_path => @root.name_path,
|
136
|
+
:target_jobnet_id => @root.id.to_s,
|
137
|
+
:target_jobnet_name_path => @root.name_path,
|
138
|
+
:target_job_id => @ctx.vertex(:j11).id.to_s,
|
139
|
+
:target_job_name_path => @ctx.vertex(:j11).name_path,
|
140
|
+
})
|
141
|
+
@root.reload
|
142
|
+
@ctx.edge(:e1).phase_key.should == :transmitted
|
143
|
+
@ctx.edge(:e2).phase_key.should == :active
|
144
|
+
@ctx.vertex(:j11).phase_key.should == :error
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
context "wrong credential" do
|
150
|
+
it "対象のジョブはerrorになりエラーイベントが発火される" do
|
151
|
+
credential = Tengine::Resource::Credential.find_by_name("test_credential1")
|
152
|
+
hash = credential.auth_values.dup
|
153
|
+
hash['username'] = "piccolo"
|
154
|
+
credential.auth_values = hash
|
155
|
+
credential.save!
|
156
|
+
@root.phase_key = :starting
|
157
|
+
@ctx.edge(:e1).phase_key = :transmitting
|
158
|
+
@ctx.vertex(:j11).phase_key = :ready
|
159
|
+
@root.save!
|
160
|
+
@root.reload
|
161
|
+
tengine.should_fire(:"error.job.job.tengine", an_instance_of(Hash))
|
162
|
+
tengine.receive("start.job.job.tengine", :properties => {
|
163
|
+
:execution_id => @execution.id.to_s,
|
164
|
+
:root_jobnet_id => @root.id.to_s,
|
165
|
+
:root_jobnet_name_path => @root.name_path,
|
166
|
+
:target_jobnet_id => @root.id.to_s,
|
167
|
+
:target_jobnet_name_path => @root.name_path,
|
168
|
+
:target_job_id => @ctx.vertex(:j11).id.to_s,
|
169
|
+
:target_job_name_path => @ctx.vertex(:j11).name_path,
|
170
|
+
})
|
171
|
+
@root.reload
|
172
|
+
@ctx.edge(:e1).phase_key.should == :transmitted
|
173
|
+
@ctx.edge(:e2).phase_key.should == :active
|
174
|
+
@ctx.vertex(:j11).phase_key.should == :error
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
context "server not found" do
|
179
|
+
it "対象のジョブはerrorになりエラーイベントが発火される" do
|
180
|
+
Tengine::Resource::Server.delete_all
|
181
|
+
@root.phase_key = :starting
|
182
|
+
@ctx.edge(:e1).phase_key = :transmitting
|
183
|
+
@ctx.vertex(:j11).phase_key = :ready
|
184
|
+
@root.save!
|
185
|
+
@root.reload
|
186
|
+
tengine.should_fire(:"error.job.job.tengine", an_instance_of(Hash))
|
187
|
+
tengine.receive("start.job.job.tengine", :properties => {
|
188
|
+
:execution_id => @execution.id.to_s,
|
189
|
+
:root_jobnet_id => @root.id.to_s,
|
190
|
+
:root_jobnet_name_path => @root.name_path,
|
191
|
+
:target_jobnet_id => @root.id.to_s,
|
192
|
+
:target_jobnet_name_path => @root.name_path,
|
193
|
+
:target_job_id => @ctx.vertex(:j11).id.to_s,
|
194
|
+
:target_job_name_path => @ctx.vertex(:j11).name_path,
|
195
|
+
})
|
196
|
+
@root.reload
|
197
|
+
@ctx.edge(:e1).phase_key.should == :transmitted
|
198
|
+
@ctx.edge(:e2).phase_key.should == :active
|
199
|
+
@ctx.vertex(:j11).phase_key.should == :error
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
context "wrong server IP" do
|
205
|
+
it "対象のジョブはerrorになりエラーイベントが発火される" do
|
206
|
+
server = Tengine::Resource::Server.find_by_name("test_server1")
|
207
|
+
server.addresses = {'private_ip_address' => "unexist_ip"}
|
208
|
+
server.save!
|
209
|
+
@root.phase_key = :starting
|
210
|
+
@ctx.edge(:e1).phase_key = :transmitting
|
211
|
+
@ctx.vertex(:j11).phase_key = :ready
|
212
|
+
@root.save!
|
213
|
+
@root.reload
|
214
|
+
tengine.should_fire(:"error.job.job.tengine", an_instance_of(Hash))
|
215
|
+
tengine.receive("start.job.job.tengine", :properties => {
|
216
|
+
:execution_id => @execution.id.to_s,
|
217
|
+
:root_jobnet_id => @root.id.to_s,
|
218
|
+
:root_jobnet_name_path => @root.name_path,
|
219
|
+
:target_jobnet_id => @root.id.to_s,
|
220
|
+
:target_jobnet_name_path => @root.name_path,
|
221
|
+
:target_job_id => @ctx.vertex(:j11).id.to_s,
|
222
|
+
:target_job_name_path => @ctx.vertex(:j11).name_path,
|
223
|
+
})
|
224
|
+
@root.reload
|
225
|
+
@ctx.edge(:e1).phase_key.should == :transmitted
|
226
|
+
@ctx.edge(:e2).phase_key.should == :active
|
227
|
+
@ctx.vertex(:j11).phase_key.should == :error
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
end
|
235
|
+
|
236
|
+
|
@@ -0,0 +1,302 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'tengine/rspec'
|
4
|
+
|
5
|
+
require 'net/ssh'
|
6
|
+
|
7
|
+
|
8
|
+
# 背景
|
9
|
+
# 以下の2つの条件が満たされ場合
|
10
|
+
# * 2つのtenginedプロセスが動いている
|
11
|
+
# * 並列で実行されるジョブを持つジョブネットが実行される(例えばrjn0002)
|
12
|
+
#
|
13
|
+
# 問題の詳細
|
14
|
+
# プロセス1がstart.job.job.tengineイベントによって起動したj11のプロセスのPIDを得る前に、
|
15
|
+
# プロセス2がstart.job.job.tengineイベントによってj12を起動することで、それらのルートジョブネットの
|
16
|
+
# versionが更新されてしまい、j11のPIDを得てルートジョブネットを更新する際にversionが
|
17
|
+
# 異なってしまっているため、update_with_lockメソッドによって実行に失敗したものと見なされて、
|
18
|
+
# 再度update_with_lockのブロックが実行されて、j11のプロセスが実行されてしまう。
|
19
|
+
#
|
20
|
+
# 本来どうあるべきか?
|
21
|
+
# update_with_lock内ではSSHなどの繰り返し実行することを想定していない処理や、
|
22
|
+
# イベントの送信を行ってはいけないので、それらの重複が起こらない仕組みになっているべき。
|
23
|
+
#
|
24
|
+
describe "<BUG>tengindのプロセスを二つ起動した際に並列ジョブがある際にジョブが2度実行される" do
|
25
|
+
include Tengine::RSpec::Extension
|
26
|
+
|
27
|
+
driver_path = File.expand_path("../../../../../lib/tengine/job/drivers/job_control_driver.rb", File.dirname(__FILE__))
|
28
|
+
|
29
|
+
# in [rjn0002]
|
30
|
+
# |--e2-->(j11)--e4-->|
|
31
|
+
# (S1)--e1-->[F1] [J1]--e6-->(E1)
|
32
|
+
# |--e3-->(j12)--e5-->|
|
33
|
+
context "rjn0002" do
|
34
|
+
before do
|
35
|
+
Tengine::Resource::Server.delete_all
|
36
|
+
Tengine::Job::Execution.delete_all
|
37
|
+
Tengine::Job::Vertex.delete_all
|
38
|
+
TestCredentialFixture.test_credential1
|
39
|
+
TestServerFixture.test_server1
|
40
|
+
TestServerFixture.test_server2
|
41
|
+
builder = Rjn0002SimpleParallelJobnetBuilder.new
|
42
|
+
@root = builder.create_actual
|
43
|
+
j12 = @root.element("j12")
|
44
|
+
j12.server_name = "test_server2"
|
45
|
+
@root.save!
|
46
|
+
|
47
|
+
@ctx = builder.context
|
48
|
+
@execution = Tengine::Job::Execution.create!({
|
49
|
+
:root_jobnet_id => @root.id,
|
50
|
+
})
|
51
|
+
@base_props = {
|
52
|
+
:execution_id => @execution.id.to_s,
|
53
|
+
:root_jobnet_id => @root.id.to_s,
|
54
|
+
:root_jobnet_name_path => @root.name_path,
|
55
|
+
:target_jobnet_id => @root.id.to_s,
|
56
|
+
:target_jobnet_name_path => @root.name_path,
|
57
|
+
}
|
58
|
+
|
59
|
+
# 2つのプロセスの代わりに、2つのカーネルを別のFiberで動かす
|
60
|
+
@bootstrap1 = Tengine::Core::Bootstrap.new(:tengined => { :load_path => driver_path })
|
61
|
+
@bootstrap1.kernel.tap{|k| k.bind; k.evaluate}
|
62
|
+
@tengine1 = Tengine::RSpec::ContextWrapper.new(@bootstrap1.kernel)
|
63
|
+
#
|
64
|
+
@bootstrap2 = Tengine::Core::Bootstrap.new(:tengined => { :load_path => driver_path })
|
65
|
+
@bootstrap2.kernel.tap{|k| k.bind; k.evaluate}
|
66
|
+
@tengine2 = Tengine::RSpec::ContextWrapper.new(@bootstrap2.kernel)
|
67
|
+
end
|
68
|
+
|
69
|
+
# tengine1が起動したプロセスのPIDを得る前にtengine2がプロセスを起動することはできません。
|
70
|
+
#
|
71
|
+
# job_control_driverでのstart.job.job.tengineの処理の概略以下の通りです
|
72
|
+
#
|
73
|
+
# start.job.job.tengine
|
74
|
+
# 1. be starting
|
75
|
+
# 2. root_jobnet.update_with_lock
|
76
|
+
# 3. execute job with SSH
|
77
|
+
# 4. be running
|
78
|
+
# 5. root_jobnet.update_with_lock
|
79
|
+
#
|
80
|
+
# パターン1 (ほぼ同時に1に突入する)
|
81
|
+
# ||f1 ||f2 ||DB |
|
82
|
+
# ||ver|step||ver|step||ver|
|
83
|
+
# ---------------------------------------------------------
|
84
|
+
# || 0 | 1 || - | - || 0| f1 starting
|
85
|
+
# || 0 | 1 || 0 | 1 || 0| f2 starting 1st
|
86
|
+
# || 1 | 2 || 0 | 1 || 1| f1 update_with_lock success
|
87
|
+
# || 1 | 2 || 0 | 2 || 1| f2 update_with_lock fail & retry
|
88
|
+
# || 1 | 2 || 1 | 1 || 1| f2 starting 2nd
|
89
|
+
# || 1 | 2 || 2 | 2 || 2| f2 update_with_lock success
|
90
|
+
# || 2 | 3 || 2 | 2 || 2| f1 refrsh & SSH starting
|
91
|
+
# || 2 | 3 || 2 | 3 || 2| f2 refrsh & SSH starting
|
92
|
+
# || 2 | 4 || 2 | 3 || 2| f1 running
|
93
|
+
# || 3 | 5 || 2 | 3 || 3| f1 update_with_lock success
|
94
|
+
# || 3 | 5 || 2 | 4 || 3| f2 running 1st
|
95
|
+
# || 3 | 5 || 2 | 5 || 3| f2 update_with_lock fail & retry
|
96
|
+
# || 3 | 5 || 3 | 4 || 3| f2 running 2nd
|
97
|
+
# || 3 | 5 || 4 | 5 || 4| f2 update_with_lock success
|
98
|
+
|
99
|
+
before do
|
100
|
+
@ctx[:e1].phase_key = :transmitted
|
101
|
+
@ctx[:e2].phase_key = :transmitting
|
102
|
+
@ctx[:e3].phase_key = :transmitting
|
103
|
+
@ctx[:j11].phase_key = :ready
|
104
|
+
@ctx[:j12].phase_key = :ready
|
105
|
+
@root.phase_key = :starting
|
106
|
+
@root.version = 0
|
107
|
+
@root.save!
|
108
|
+
|
109
|
+
@pid = Process.pid.to_s
|
110
|
+
|
111
|
+
@f1 = Fiber.new do
|
112
|
+
ssh1 = mock(:ssh1)
|
113
|
+
Net::SSH.should_receive(:start).with("localhost",
|
114
|
+
an_instance_of(Tengine::Resource::Credential),
|
115
|
+
an_instance_of(Hash)).once.and_yield(ssh1)
|
116
|
+
channel1 = mock(:channel1)
|
117
|
+
ssh1.stub(:open_channel).and_yield(channel1)
|
118
|
+
channel1.stub(:exec).with(any_args).and_yield(channel1, true)
|
119
|
+
channel1.should_receive(:on_close) do
|
120
|
+
Tengine.logger.debug( ("!" * 100) << "\non_close: Fiber.yield #{Process.pid} #{__FILE__}##{__LINE__}")
|
121
|
+
Fiber.yield
|
122
|
+
end # on_dataが呼び出される前に止める
|
123
|
+
channel1.should_receive(:on_data).and_yield(channel1, @pid)
|
124
|
+
channel1.stub(:on_extended_data)
|
125
|
+
@tengine1.receive("start.job.job.tengine", :properties => {
|
126
|
+
:target_job_id => @ctx.vertex(:j11).id.to_s,
|
127
|
+
:target_job_name_path => @ctx.vertex(:j11).name_path,
|
128
|
+
}.update(@base_props))
|
129
|
+
:end
|
130
|
+
end
|
131
|
+
|
132
|
+
@f2 = Fiber.new do
|
133
|
+
ssh2 = mock(:ssh2)
|
134
|
+
Net::SSH.should_receive(:start).with("192.168.1.2",
|
135
|
+
an_instance_of(Tengine::Resource::Credential),
|
136
|
+
an_instance_of(Hash)).once.and_yield(ssh2)
|
137
|
+
channel2 = mock(:channel2)
|
138
|
+
ssh2.stub(:open_channel).and_yield(channel2)
|
139
|
+
channel2.stub(:exec).with(any_args).and_yield(channel2, true)
|
140
|
+
channel2.should_receive(:on_close) do
|
141
|
+
Tengine.logger.debug( ("!" * 100) << "\non_close: Fiber.yield #{Process.pid} #{__FILE__}##{__LINE__}")
|
142
|
+
Fiber.yield
|
143
|
+
end # on_dataが呼び出される前に止める
|
144
|
+
channel2.should_receive(:on_data).and_yield(channel2, @pid)
|
145
|
+
channel2.stub(:on_extended_data)
|
146
|
+
@tengine2.receive("start.job.job.tengine", :properties => {
|
147
|
+
:target_job_id => @ctx.vertex(:j12).id.to_s,
|
148
|
+
:target_job_name_path => @ctx.vertex(:j12).name_path,
|
149
|
+
}.update(@base_props))
|
150
|
+
:end
|
151
|
+
end
|
152
|
+
|
153
|
+
@j11 = @root.element("j11")
|
154
|
+
@j12 = @root.element("j12")
|
155
|
+
|
156
|
+
@root.reload
|
157
|
+
@root.version.should == 0
|
158
|
+
Tengine::Job.test_harness_clear
|
159
|
+
end
|
160
|
+
|
161
|
+
it "パターン1" do
|
162
|
+
# f1-1.
|
163
|
+
Tengine.logger.info("1" * 100)
|
164
|
+
Tengine::Job.should_receive(:test_harness).with(1, "before yield in update_with_lock").once
|
165
|
+
Tengine::Job.should_receive(:test_harness).with(2, "after yield in update_with_lock").once{ Fiber.yield}
|
166
|
+
@f1.resume.should_not == :end
|
167
|
+
@root.reload
|
168
|
+
@root.version.should == 0
|
169
|
+
@root.element("j11").phase_key.should == :ready
|
170
|
+
@root.element("j12").phase_key.should == :ready
|
171
|
+
|
172
|
+
# f2-1.
|
173
|
+
Tengine.logger.info("2" * 100)
|
174
|
+
Tengine::Job.should_receive(:test_harness).with(3, "before yield in update_with_lock").once
|
175
|
+
Tengine::Job.should_receive(:test_harness).with(4, "after yield in update_with_lock").once{ Fiber.yield}
|
176
|
+
@f2.resume.should_not == :end
|
177
|
+
@root.reload
|
178
|
+
@root.version.should == 0
|
179
|
+
@root.element("j11").phase_key.should == :ready
|
180
|
+
@root.element("j12").phase_key.should == :ready
|
181
|
+
|
182
|
+
# f1-2.
|
183
|
+
Tengine.logger.info("3" * 100)
|
184
|
+
Tengine::Job.should_receive(:test_harness).with(5, "after update_with_lock").once{ Fiber.yield}
|
185
|
+
@f1.resume.should_not == :end
|
186
|
+
@root.reload
|
187
|
+
@root.version.should == 1
|
188
|
+
@root.element("j11").phase_key.should == :starting
|
189
|
+
@root.element("j12").phase_key.should == :ready
|
190
|
+
|
191
|
+
# f2-1.
|
192
|
+
Tengine.logger.info("4" * 100)
|
193
|
+
Tengine::Job.should_receive(:test_harness).with(6, "before yield in update_with_lock").once.once
|
194
|
+
Tengine::Job.should_receive(:test_harness).with(7, "after yield in update_with_lock").once.once{ Fiber.yield}
|
195
|
+
@f2.resume.should_not == :end
|
196
|
+
@root.reload
|
197
|
+
@root.version.should == 1
|
198
|
+
@root.element("j11").phase_key.should == :starting
|
199
|
+
@root.element("j12").phase_key.should == :ready
|
200
|
+
|
201
|
+
# f2-2.
|
202
|
+
Tengine.logger.info("5" * 100)
|
203
|
+
Tengine::Job.should_receive(:test_harness).with(8, "after update_with_lock").once{ Fiber.yield}
|
204
|
+
@f2.resume.should_not == :end
|
205
|
+
@root.reload
|
206
|
+
@root.version.should == 2
|
207
|
+
@root.element("j11").phase_key.should == :starting
|
208
|
+
@root.element("j12").phase_key.should == :starting
|
209
|
+
|
210
|
+
# f1-3.
|
211
|
+
Tengine.logger.info("6" * 100)
|
212
|
+
@f1.resume.should_not == :end
|
213
|
+
@root.reload
|
214
|
+
@root.version.should == 2
|
215
|
+
@root.element("j11").phase_key.should == :starting
|
216
|
+
@root.element("j12").phase_key.should == :starting
|
217
|
+
|
218
|
+
# f2-3.
|
219
|
+
Tengine.logger.info("7" * 100)
|
220
|
+
@f2.resume.should_not == :end
|
221
|
+
@root.reload
|
222
|
+
@root.version.should == 2
|
223
|
+
@root.element("j11").phase_key.should == :starting
|
224
|
+
@root.element("j12").phase_key.should == :starting
|
225
|
+
|
226
|
+
# f1-4.
|
227
|
+
Tengine.logger.info("8" * 100)
|
228
|
+
Tengine::Job.should_receive(:test_harness).with(9, "before yield in update_with_lock").once
|
229
|
+
Tengine::Job.should_receive(:test_harness).with(10, "after yield in update_with_lock").once{ Fiber.yield }
|
230
|
+
@f1.resume.should_not == :end
|
231
|
+
@root.reload
|
232
|
+
@root.version.should == 2
|
233
|
+
@root.element("j11").phase_key.should == :starting
|
234
|
+
@root.element("j12").phase_key.should == :starting
|
235
|
+
|
236
|
+
# f1-5.
|
237
|
+
Tengine.logger.info("9" * 100)
|
238
|
+
Tengine::Job.should_receive(:test_harness).with(11, "after update_with_lock").once
|
239
|
+
@f1.resume.should == :end
|
240
|
+
@root.reload
|
241
|
+
@root.version.should == 3
|
242
|
+
@root.element("j11").tap do |j|
|
243
|
+
j.phase_key.should == :running
|
244
|
+
j.executing_pid.should_not be_nil
|
245
|
+
end
|
246
|
+
@root.element("j12").phase_key.should == :starting
|
247
|
+
|
248
|
+
# f2-4. 1st
|
249
|
+
Tengine.logger.info("a" * 100)
|
250
|
+
Tengine::Job.should_receive(:test_harness).with(12, "before yield in update_with_lock").once
|
251
|
+
Tengine::Job.should_receive(:test_harness).with(13, "after yield in update_with_lock").once{ Fiber.yield }
|
252
|
+
@f2.resume.should_not == :end
|
253
|
+
@root.reload
|
254
|
+
@root.version.should == 3
|
255
|
+
@root.element("j11").tap do |j|
|
256
|
+
j.phase_key.should == :running
|
257
|
+
j.executing_pid.should_not be_nil
|
258
|
+
end
|
259
|
+
@root.element("j12").phase_key.should == :starting
|
260
|
+
|
261
|
+
# f2-5.
|
262
|
+
Tengine.logger.info("b" * 100)
|
263
|
+
Tengine::Job.should_receive(:test_harness).with(14, "before yield in update_with_lock").once{ Fiber.yield }
|
264
|
+
@f2.resume.should_not == :end
|
265
|
+
@root.reload
|
266
|
+
@root.version.should == 3
|
267
|
+
@root.element("j11").tap do |j|
|
268
|
+
j.phase_key.should == :running
|
269
|
+
j.executing_pid.should_not be_nil
|
270
|
+
end
|
271
|
+
@root.element("j12").phase_key.should == :starting
|
272
|
+
|
273
|
+
# f2-4. 2nd
|
274
|
+
Tengine.logger.info("c" * 100)
|
275
|
+
Tengine::Job.should_receive(:test_harness).with(15, "after yield in update_with_lock").once{ Fiber.yield }
|
276
|
+
@f2.resume.should_not == :end
|
277
|
+
@root.reload
|
278
|
+
@root.version.should == 3
|
279
|
+
@root.element("j11").tap do |j|
|
280
|
+
j.phase_key.should == :running
|
281
|
+
j.executing_pid.should_not be_nil
|
282
|
+
end
|
283
|
+
@root.element("j12").phase_key.should == :starting
|
284
|
+
|
285
|
+
# f2-5.
|
286
|
+
Tengine.logger.info("d" * 100)
|
287
|
+
Tengine::Job.should_receive(:test_harness).with(16, "after update_with_lock").once
|
288
|
+
@f2.resume.should == :end
|
289
|
+
@root.reload
|
290
|
+
@root.version.should == 4
|
291
|
+
@root.element("j11").tap do |j|
|
292
|
+
j.phase_key.should == :running
|
293
|
+
j.executing_pid.should_not be_nil
|
294
|
+
end
|
295
|
+
@root.element("j12").tap do |j|
|
296
|
+
j.executing_pid.should_not be_nil
|
297
|
+
j.phase_key.should == :running
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
end
|
302
|
+
end
|