tiny_ge 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/README.org +19 -153
- data/README_J.org +236 -0
- data/exe/qstat +1 -1
- data/lib/.#tiny_ge.rb +1 -1
- data/lib/tiny_ge.rb +54 -43
- data/lib/tiny_ge/child_process.rb +30 -0
- data/lib/tiny_ge/version.rb +1 -1
- metadata +4 -15
- data/exe/qdel~ +0 -6
- data/exe/qfinish~ +0 -6
- data/exe/qstat~ +0 -6
- data/exe/qsub~ +0 -16
- data/exe/tge~ +0 -8
- data/lib/#tiny_ge.rb# +0 -121
- data/lib/check_ve_lock~ +0 -30
- data/lib/unlock_ve_lock~ +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7a82a96859e47c3fd2b9c5791153d702f3ba1985c4bb6f239b90baf9fa508f0
|
4
|
+
data.tar.gz: 425c1e7fc169bcf8a6657597b931f053ec9db5d3aebce1bf34d5f9106b431101
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b542034953484ee9d0940b253166314f988e5b99518430942fdea53910c8b0dc4f7bdfdbea95b638c991691ef7aa1fbd52aed85a9e71991161380b0de972c295
|
7
|
+
data.tar.gz: abe0e4e19fc6a2c1ca2b81d2663de5bb9d4e20816ff7a34bbea1caf76441cd8a65bdc0586b75ea00dbf9e5c899f559a5948bea0ca5adb4c08969b07df243d74d
|
data/.gitignore
CHANGED
data/README.org
CHANGED
@@ -1,108 +1,19 @@
|
|
1
|
-
#+qiita_private: 79491472592ae821dcdc
|
2
1
|
#+OPTIONS: ^:{}
|
3
2
|
#+STARTUP: indent nolineimages
|
4
|
-
#+TITLE:
|
3
|
+
#+TITLE: tiny_ge
|
5
4
|
#+AUTHOR: Shigeto R. Nishitani
|
6
5
|
#+EMAIL: (concat "shigeto_nishitani@mac.com")
|
7
6
|
#+LANGUAGE: jp
|
8
7
|
# +OPTIONS: H:4 toc:t num:2
|
9
8
|
#+OPTIONS: toc:nil
|
10
|
-
#+TAG: Linux,
|
9
|
+
#+TAG: Linux, exclusive_processing
|
11
10
|
#+TWITTER: off
|
12
11
|
# +SETUPFILE: ~/.emacs.d/org-mode/theme-readtheorg.setup
|
13
12
|
|
14
|
-
|
15
|
-
necのvector engineでprocessを排他処理する.
|
16
|
-
|
17
|
-
necのVector Engineでmpirunを投げるとそのまま行っちゃう.
|
18
|
-
そこでlock fileによる排他処理を提案されたが,そのままだと順序が保証できない.
|
19
|
-
そこんとこちょっと改善
|
20
|
-
|
21
|
-
* 方針
|
22
|
-
統一ファイルを用意して,そこにveにsubmitしたjobのstatusを記録し,
|
23
|
-
そこから排他処理と実行を行う.
|
24
|
-
|
25
|
-
* 実装
|
26
|
-
** 最初の提案
|
27
|
-
例えば、ロックファイルを作るのはどうでしょうか?
|
28
|
-
#+begin_src shell
|
29
|
-
#!/bin/sh
|
30
|
-
|
31
|
-
while [ -f "${HOME}/.running" ]; do
|
32
|
-
sleep 10
|
33
|
-
done
|
34
|
-
touch "${HOME}/.running"
|
35
|
-
|
36
|
-
[...]
|
37
|
-
mpirun -np 8 vasp_std 1> stdout 2> stderr
|
38
|
-
|
39
|
-
rm "${HOME}/.running"
|
40
|
-
#+end_src
|
41
|
-
これをバックグラウンドで実行すればVEに複数ジョブが一度に入ることはなくなります。
|
42
|
-
|
43
|
-
- 実行順は保証されません。
|
44
|
-
|
45
|
-
** 最初の実装
|
46
|
-
home directory配下にファイルを用意してそこにstatus([finished, running, waiting])を
|
47
|
-
書き込んでそれを参照してjobを実行する.
|
48
|
-
|
49
|
-
それぞれの投入ジョブのshellは次の通り.
|
50
|
-
#+name: check_ve.sh
|
51
|
-
#+include: "./test/check_ve.sh" src sh
|
52
|
-
|
53
|
-
実際の稼働shellは以下の二つ.
|
54
|
-
#+name: check_ve_lock
|
55
|
-
#+include: "./lib/check_ve_lock" src ruby
|
56
|
-
|
57
|
-
#+name: unlock_ve_lock
|
58
|
-
#+include: "./lib/unlock_ve_lock" src ruby
|
59
|
-
動いた.
|
60
|
-
#+begin_example
|
61
|
-
12753: finished: /home/bob/bin: 2021-01-26 20:45:56 +0900
|
62
|
-
13209: finished: /home/bob/bin: 2021-01-26 20:46:05 +0900
|
63
|
-
13407: finished: /home/bob/bin: 2021-01-26 20:46:14 +0900
|
64
|
-
8512: finished: /home/bob/frenkel_aurora/al_110_lambda_05: 2021-01-26 23:37:06 +0900
|
65
|
-
20683: running: /home/bob/frenkel_aurora/al_110_lambda_075: 2021-01-26 23:48:25 +0900
|
66
|
-
#+end_example
|
67
|
-
** rubyからbackgroundでの実行
|
68
|
-
- [[https://stackoverflow.com/questions/11982057/how-can-i-trigger-a-shell-script-and-run-in-background-async-in-ruby][How can I trigger a shell script and run in background (async) in Ruby?]]
|
69
|
-
|
70
|
-
に書かれている手法で,
|
71
|
-
#+begin_src ruby
|
72
|
-
shell_file = "./test.sh"
|
73
|
-
File.write(shell_file, "sleep 10\necho \"hoge\"\n")
|
74
|
-
command_line("chmod u+x #{shell_file}")
|
75
|
-
p pid = spawn(shell_file, :out => "test.out", :err => "test.err")
|
76
|
-
Process.detach(pid)
|
77
|
-
#+end_src
|
78
|
-
にて実装.
|
79
|
-
|
80
|
-
結果は,
|
81
|
-
#+begin_example
|
82
|
-
> ls -la --time-style=full-iso test*
|
83
|
-
-rw-r--r--. 1 bob bob 0 2021-01-29 12:12:29.572812565 +0900 test.err
|
84
|
-
-rw-r--r--. 1 bob bob 5 2021-01-29 12:12:39.575812241 +0900 test.out
|
85
|
-
-rwxrw-r--. 1 bob bob 21 2021-01-29 12:12:29.571812565 +0900 test.sh*
|
86
|
-
-rw-rw-r--. 1 bob bob 155 2021-01-28 10:01:57.380865004 +0900 test_helper.rb
|
87
|
-
#+end_example
|
88
|
-
となり,10秒後に書き込まれているのを確認.outファイルは実行直後に出来てたみたい.
|
89
|
-
chmodが嫌ですね.
|
90
|
-
でも,変なpermissionいらないからuser directoryで実行するshellを生成するのが良さそう.
|
91
|
-
|
92
|
-
前回これを実装しようとして,child processとかで悩んだ.
|
93
|
-
|
94
|
-
この検索過程で,gemでqueueシステムをいくつも発見.
|
95
|
-
railsとかtest用にいくつも開発されている.
|
96
|
-
- [[https://www.ruby-toolbox.com/categories/Background_Jobs]]
|
97
|
-
- [[https://blog.appsignal.com/2019/04/02/background-processing-system-in-ruby.html][Ruby Magic Learning by building, a Background Processing System in Ruby]]
|
98
|
-
ただ,難しそう...
|
99
|
-
|
100
|
-
|
101
|
-
* Tiny GE
|
102
|
-
次節の改善案にしたがってgemで実装.
|
103
|
-
SGEのコマンドに似せて作成.
|
104
|
-
|
13
|
+
tiny_ge is a tiny grid engine like SGE.
|
105
14
|
|
15
|
+
* Usage
|
16
|
+
tiny_ge has a similar interface with SGE.
|
106
17
|
#+begin_example
|
107
18
|
> tge --help
|
108
19
|
|
@@ -112,9 +23,10 @@ qdel [pid] # delete job
|
|
112
23
|
qfinish [pid] # finish forcely
|
113
24
|
#+end_example
|
114
25
|
|
115
|
-
shell
|
26
|
+
When you qsub shell, test.s_23520 will be made.
|
116
27
|
#+begin_src shell
|
117
|
-
>
|
28
|
+
> qsub ve_lock_vasp.sh
|
29
|
+
> cat test.s23520
|
118
30
|
#!/bin/sh
|
119
31
|
while ! qsub 23520; do
|
120
32
|
sleep 10
|
@@ -124,66 +36,20 @@ sh /home/.../ve_lock_vasp.sh
|
|
124
36
|
|
125
37
|
qfinish 23520
|
126
38
|
#+end_src
|
39
|
+
The job is queued in ~/.tge_test_jobs.txt as YAML format.
|
127
40
|
|
128
|
-
|
129
|
-
|
130
|
-
qstatで'running'などの状況を確認.
|
41
|
+
qstat will show the status like 'running'.
|
131
42
|
#+begin_example shell
|
132
43
|
> qstat
|
133
|
-
11670: 11702: finished: /home/
|
134
|
-
14735: 14764: finished: /home/
|
135
|
-
18515: 18545: finished: /home/
|
136
|
-
29533: 29562: finished: /home/
|
137
|
-
18648: 18678: running: /home/
|
44
|
+
11670: 11702: finished: /home/test/hello_world.sh
|
45
|
+
14735: 14764: finished: /home/test/hello_world.sh
|
46
|
+
18515: 18545: finished: /home/test2/ve_lock_vasp.sh
|
47
|
+
29533: 29562: finished: /home/test2/ve_lock_vasp.sh
|
48
|
+
18648: 18678: running: /home/test2/ve_lock_vasp.sh
|
49
|
+
27624: 27654: waiting: /home/test/hello_world.sh
|
138
50
|
#+end_example
|
139
51
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
- [X] qsubを常駐させて,そこにsubmitするという手はないか?
|
144
|
-
1. 常駐させる必要はなくて,qsubに対してve_lock用のshellを常駐させればいい.
|
145
|
-
1. /tmpにおく?
|
146
|
-
- [ ] 走ってないゾンビをチェックする必要あり
|
147
|
-
- [X] qstat, qdelが必要
|
148
|
-
|
149
|
-
- [X] gem化するのが良さそう.そこでは
|
150
|
-
: exeに個別のコマンドを用意するが,実体は同じclassの違う振る舞い
|
151
|
-
とすればいい.
|
152
|
-
|
153
|
-
- [X] qsub -> TGE.qsub(pid, shell_path)
|
154
|
-
- [X] qfinish -> TGE.qfinish(pid)
|
155
|
-
- [X] qstat -> TGE.qstat(line = 10)
|
156
|
-
- [X] qdel -> TGE.qdel(pid)
|
157
|
-
なんかのmodule methodを用意して,それぞれのコマンドを実行させればいい.
|
158
|
-
それにはThorなんかのCLIはいらない.
|
159
|
-
|
160
|
-
* テスト
|
161
|
-
- [[https://github.com/minitest-reporters/minitest-reporters][minitest-reporters]]
|
162
|
-
|
163
|
-
** test_helperの呼び方
|
164
|
-
rake testでやるときと,
|
165
|
-
: ruby tiny_ge_test.rb -n test_qsub
|
166
|
-
とかでやるときでtest_helperが呼ばれない時がある.
|
167
|
-
|
168
|
-
: require_relative "./test_helper"
|
169
|
-
|
170
|
-
とすると両方で呼ばれる.
|
171
|
-
|
172
|
-
#+name: test_helper.rb
|
173
|
-
#+begin_ruby
|
174
|
-
$LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
|
175
|
-
#require 'test/unit'
|
176
|
-
require "tiny_ge"
|
177
|
-
|
178
|
-
require "minitest/autorun"
|
179
|
-
require "minitest/reporters"
|
180
|
-
Minitest::Reporters.use!
|
181
|
-
#+end_ruby
|
182
|
-
- qconf
|
183
|
-
- かきこ
|
184
|
-
** kill zombie
|
185
|
-
開発の段階でzombie processの大量発生が起こった.
|
52
|
+
You may write the job in ve_lock_vasp.sh,
|
53
|
+
then the queue system will wait the finish of previous job
|
54
|
+
and will run the waiting job.
|
186
55
|
|
187
|
-
: ps -xal |grep test.sh
|
188
|
-
とかで親プロセスまで見ることができて.殺せる.
|
189
|
-
第4出力が親プロセス.
|
data/README_J.org
ADDED
@@ -0,0 +1,236 @@
|
|
1
|
+
#+qiita_private: 79491472592ae821dcdc
|
2
|
+
#+OPTIONS: ^:{}
|
3
|
+
#+STARTUP: indent nolineimages
|
4
|
+
#+TITLE: linuxでprocessの排他処理
|
5
|
+
#+AUTHOR: Shigeto R. Nishitani
|
6
|
+
#+EMAIL: (concat "shigeto_nishitani@mac.com")
|
7
|
+
#+LANGUAGE: jp
|
8
|
+
# +OPTIONS: H:4 toc:t num:2
|
9
|
+
#+OPTIONS: toc:nil
|
10
|
+
#+TAG: Linux, 排他処理
|
11
|
+
#+TWITTER: off
|
12
|
+
# +SETUPFILE: ~/.emacs.d/org-mode/theme-readtheorg.setup
|
13
|
+
|
14
|
+
* intro
|
15
|
+
necのvector engineでprocessを排他処理する.
|
16
|
+
|
17
|
+
necのVector Engineでmpirunを投げるとそのまま行っちゃう.
|
18
|
+
そこでlock fileによる排他処理を提案されたが,そのままだと順序が保証できない.
|
19
|
+
そこんとこちょっと改善
|
20
|
+
|
21
|
+
* 方針
|
22
|
+
統一ファイルを用意して,そこにveにsubmitしたjobのstatusを記録し,
|
23
|
+
そこから排他処理と実行を行う.
|
24
|
+
|
25
|
+
* Tiny GE
|
26
|
+
次節の改善案にしたがってgemで実装.
|
27
|
+
- [[https://rubygems.org/gems/tiny_ge]]
|
28
|
+
- [[https://github.com/daddygongon/tiny_ge]]
|
29
|
+
|
30
|
+
SGEのコマンドに似せたCLI.
|
31
|
+
#+begin_example
|
32
|
+
> tge --help
|
33
|
+
|
34
|
+
qsub [shell] # submit shell job
|
35
|
+
qstat # show queue status
|
36
|
+
qdel [pid] # delete job
|
37
|
+
qfinish [pid] # finish forcely
|
38
|
+
#+end_example
|
39
|
+
|
40
|
+
shellを指定してqsubすると以下のtest.shが作成される.
|
41
|
+
#+begin_src shell
|
42
|
+
> qsub ve_lock_vasp.sh
|
43
|
+
> cat test.s23520
|
44
|
+
#!/bin/sh
|
45
|
+
while ! qsub 23520; do
|
46
|
+
sleep 10
|
47
|
+
done
|
48
|
+
|
49
|
+
sh /home/.../ve_lock_vasp.sh
|
50
|
+
|
51
|
+
qfinish 23520
|
52
|
+
#+end_src
|
53
|
+
|
54
|
+
defaultでは~/.tge_test_jobs.txtにqueueがYAML形式で保存されている.
|
55
|
+
|
56
|
+
qstatで'running'などの状況を確認.
|
57
|
+
#+begin_example shell
|
58
|
+
> qstat
|
59
|
+
11670: 11702: finished: /home/test/hello_world.sh
|
60
|
+
14735: 14764: finished: /home/test/hello_world.sh
|
61
|
+
18515: 18545: finished: /home/test2/ve_lock_vasp.sh
|
62
|
+
29533: 29562: finished: /home/test2/ve_lock_vasp.sh
|
63
|
+
18648: 18678: running: /home/test2/ve_lock_vasp.sh
|
64
|
+
#+end_example
|
65
|
+
|
66
|
+
|
67
|
+
* 実装の途中段階
|
68
|
+
** 最初の提案
|
69
|
+
例えば、ロックファイルを作るのはどうでしょうか?
|
70
|
+
#+begin_src shell
|
71
|
+
#!/bin/sh
|
72
|
+
|
73
|
+
while [ -f "${HOME}/.running" ]; do
|
74
|
+
sleep 10
|
75
|
+
done
|
76
|
+
touch "${HOME}/.running"
|
77
|
+
|
78
|
+
[...]
|
79
|
+
mpirun -np 8 vasp_std 1> stdout 2> stderr
|
80
|
+
|
81
|
+
rm "${HOME}/.running"
|
82
|
+
#+end_src
|
83
|
+
これをバックグラウンドで実行すればVEに複数ジョブが一度に入ることはなくなります。
|
84
|
+
|
85
|
+
- 実行順は保証されません。
|
86
|
+
** 最初の実装
|
87
|
+
home directory配下にファイルを用意してそこにstatus([finished, running, waiting])を
|
88
|
+
書き込んでそれを参照してjobを実行する.
|
89
|
+
|
90
|
+
それぞれの投入ジョブのshellは次の通り.
|
91
|
+
#+name: check_ve.sh
|
92
|
+
#+include: "./test/check_ve.sh" src sh
|
93
|
+
|
94
|
+
実際の稼働shellは以下の二つ.
|
95
|
+
#+name: check_ve_lock
|
96
|
+
#+include: "./lib/check_ve_lock" src ruby
|
97
|
+
|
98
|
+
#+name: unlock_ve_lock
|
99
|
+
#+include: "./lib/unlock_ve_lock" src ruby
|
100
|
+
動いた.
|
101
|
+
#+begin_example
|
102
|
+
12753: finished: /home/bob/bin: 2021-01-26 20:45:56 +0900
|
103
|
+
13209: finished: /home/bob/bin: 2021-01-26 20:46:05 +0900
|
104
|
+
13407: finished: /home/bob/bin: 2021-01-26 20:46:14 +0900
|
105
|
+
8512: finished: /home/bob/frenkel_aurora/al_110_lambda_05: 2021-01-26 23:37:06 +0900
|
106
|
+
20683: running: /home/bob/frenkel_aurora/al_110_lambda_075: 2021-01-26 23:48:25 +0900
|
107
|
+
#+end_example
|
108
|
+
** rubyからbackgroundでの実行
|
109
|
+
- [[https://stackoverflow.com/questions/11982057/how-can-i-trigger-a-shell-script-and-run-in-background-async-in-ruby][How can I trigger a shell script and run in background (async) in Ruby?]]
|
110
|
+
|
111
|
+
に書かれている手法で,
|
112
|
+
#+begin_src ruby
|
113
|
+
shell_file = "./test.sh"
|
114
|
+
File.write(shell_file, "sleep 10\necho \"hoge\"\n")
|
115
|
+
command_line("chmod u+x #{shell_file}")
|
116
|
+
p pid = spawn(shell_file, :out => "test.out", :err => "test.err")
|
117
|
+
Process.detach(pid)
|
118
|
+
#+end_src
|
119
|
+
にて実装.
|
120
|
+
|
121
|
+
結果は,
|
122
|
+
#+begin_example
|
123
|
+
> ls -la --time-style=full-iso test*
|
124
|
+
-rw-r--r--. 1 bob bob 0 2021-01-29 12:12:29.572812565 +0900 test.err
|
125
|
+
-rw-r--r--. 1 bob bob 5 2021-01-29 12:12:39.575812241 +0900 test.out
|
126
|
+
-rwxrw-r--. 1 bob bob 21 2021-01-29 12:12:29.571812565 +0900 test.sh*
|
127
|
+
-rw-rw-r--. 1 bob bob 155 2021-01-28 10:01:57.380865004 +0900 test_helper.rb
|
128
|
+
#+end_example
|
129
|
+
となり,10秒後に書き込まれているのを確認.outファイルは実行直後に出来てたみたい.
|
130
|
+
chmodが嫌ですね.
|
131
|
+
でも,変なpermissionいらないからuser directoryで実行するshellを生成するのが良さそう.
|
132
|
+
|
133
|
+
前回これを実装しようとして,child processとかで悩んだ.
|
134
|
+
|
135
|
+
この検索過程で,gemでqueueシステムをいくつも発見.
|
136
|
+
railsとかtest用にいくつも開発されている.
|
137
|
+
- [[https://www.ruby-toolbox.com/categories/Background_Jobs]]
|
138
|
+
- [[https://blog.appsignal.com/2019/04/02/background-processing-system-in-ruby.html][Ruby Magic Learning by building, a Background Processing System in Ruby]]
|
139
|
+
ただ,難しそう...
|
140
|
+
|
141
|
+
** find child process
|
142
|
+
親プロセスを殺しただけでは,子プロセスは動いたまま.
|
143
|
+
そこでそれらを再帰的に見つけて殺すプログラムを実装.
|
144
|
+
|
145
|
+
#+name: lib/tiny_ge/child_prcess.rb
|
146
|
+
#+include: "lib/tiny_ge/child_process.rb" src ruby
|
147
|
+
|
148
|
+
#+begin_example shell
|
149
|
+
> ruby kill_child_process.rb
|
150
|
+
0 1000 13961 13931 20 0 11004 3116 - S ? 0:00 sh /home/bob/frenkel_aurora/lambda_10/ve_lock_vasp.sh
|
151
|
+
0 1000 13998 13961 20 0 302264 15372 x64_sy Sl ? 0:00 ruby ../bin/frenkel.rb vasp 8 1.0 1000 3 1.0 513
|
152
|
+
0 1000 19301 13998 20 0 4488 768 - S ? 0:00 mpirun -np 8 /home/nec/release20210125/vasp.5.4.4/bin/vasp_std
|
153
|
+
0 1000 19302 19301 20 0 186784 5256 core_s Ss ? 0:00 mpid
|
154
|
+
0 1000 19308 19302 20 0 1090611052 189368 - S ? 0:00 /opt/nec/ve/libexec/ve_exec -d /dev/veslot0 -s /var/
|
155
|
+
0 1000 19309 19302 20 0 1090596356 221076 - S ? 0:00 /opt/nec/ve/libexec/ve_exec -d /dev/veslot0 -s /var/
|
156
|
+
...
|
157
|
+
kill -9 13931
|
158
|
+
kill -9 13961
|
159
|
+
kill -9 13998
|
160
|
+
kill -9 19301
|
161
|
+
...
|
162
|
+
#+end_example
|
163
|
+
|
164
|
+
これをqdelに入れるか,qfinishに入れるかで悩む.
|
165
|
+
- qdelは'wait' -> delete
|
166
|
+
- qfinishは'running' -> 'finished'
|
167
|
+
なんで,'deleted'を作るか.そうすればqfinishはqdelと合体できる.
|
168
|
+
|
169
|
+
** pidをqueueの連番に変える
|
170
|
+
pidを親プロセスのpidからqueueの番号に変更.
|
171
|
+
|
172
|
+
それに伴って,idのuniquenessを保証するため,
|
173
|
+
deleteでデータを直接消すのをやめて,
|
174
|
+
'deleted'状態として保持することにした.
|
175
|
+
|
176
|
+
+それに伴ってfinished あるいはdeletedがnext processの判断基準とした.+
|
177
|
+
|
178
|
+
** qsubのロジック変更
|
179
|
+
finished, deletedの次としていたらすぐにrunningが始まって失敗.
|
180
|
+
他のrunning, waitingがあるかないかで判断.
|
181
|
+
|
182
|
+
- shell jobならばqueueに登録
|
183
|
+
- そうでなければpidとみなして,
|
184
|
+
- running, waitingが他に一つでもあれば待て(return false).
|
185
|
+
- なければ進め(change status and return true)
|
186
|
+
|
187
|
+
** 改善案
|
188
|
+
- [X] テキストにして置いておくより,yamlかjsonが良さそう.
|
189
|
+
見にくいけれど,間違いがないだろうから.
|
190
|
+
- [X] qsubを常駐させて,そこにsubmitするという手はないか?
|
191
|
+
1. 常駐させる必要はなくて,qsubに対してve_lock用のshellを常駐させればいい.
|
192
|
+
1. /tmpにおく?
|
193
|
+
- [ ] 走ってないゾンビをチェックする必要あり
|
194
|
+
- [X] qstat, qdelが必要
|
195
|
+
|
196
|
+
- [X] gem化するのが良さそう.そこでは
|
197
|
+
: exeに個別のコマンドを用意するが,実体は同じclassの違う振る舞い
|
198
|
+
とすればいい.
|
199
|
+
|
200
|
+
- [X] qsub -> TGE.qsub(pid, shell_path)
|
201
|
+
- [X] qfinish -> TGE.qfinish(pid)
|
202
|
+
- [X] qstat -> TGE.qstat(line = 10)
|
203
|
+
- [X] qdel -> TGE.qdel(pid)
|
204
|
+
なんかのmodule methodを用意して,それぞれのコマンドを実行させればいい.
|
205
|
+
それにはThorなんかのCLIはいらない.
|
206
|
+
|
207
|
+
* テスト
|
208
|
+
- [[https://github.com/minitest-reporters/minitest-reporters][minitest-reporters]]
|
209
|
+
|
210
|
+
** test_helperの呼び方
|
211
|
+
rake testでやるときと,
|
212
|
+
: ruby tiny_ge_test.rb -n test_qsub
|
213
|
+
とかでやるときでtest_helperが呼ばれない時がある.
|
214
|
+
|
215
|
+
: require_relative "./test_helper"
|
216
|
+
|
217
|
+
とすると両方で呼ばれる.
|
218
|
+
|
219
|
+
#+name: test_helper.rb
|
220
|
+
#+begin_ruby
|
221
|
+
$LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
|
222
|
+
#require 'test/unit'
|
223
|
+
require "tiny_ge"
|
224
|
+
|
225
|
+
require "minitest/autorun"
|
226
|
+
require "minitest/reporters"
|
227
|
+
Minitest::Reporters.use!
|
228
|
+
#+end_ruby
|
229
|
+
- qconf
|
230
|
+
- かきこ
|
231
|
+
** kill zombie
|
232
|
+
開発の段階でzombie processの大量発生が起こった.
|
233
|
+
|
234
|
+
: ps -xal |grep test.sh
|
235
|
+
とかで親プロセスまで見ることができて.殺せる.
|
236
|
+
第4出力が親プロセス.
|
data/exe/qstat
CHANGED
data/lib/.#tiny_ge.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
lib/bob@aurora0.
|
1
|
+
lib/bob@aurora0.21626:1611034391
|
data/lib/tiny_ge.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "tiny_ge/version"
|
2
|
+
require "tiny_ge/child_process"
|
2
3
|
require 'yaml'
|
3
4
|
require 'thor'
|
4
5
|
require 'command_line/global'
|
@@ -7,27 +8,16 @@ require 'command_line/global'
|
|
7
8
|
VE_TEST_FILE = File.join(ENV['HOME'],".tge_test_jobs.txt")
|
8
9
|
|
9
10
|
class TGE
|
10
|
-
|
11
|
-
|
11
|
+
include ChildProcess
|
12
|
+
def initialize(line=0)
|
13
|
+
@q_file =VE_TEST_FILE
|
14
|
+
command_line("touch #{@q_file}") unless File.exist?(@q_file)
|
12
15
|
@data = YAML.load(File.read(@q_file))
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
File.write(shell_file, shell_script)
|
19
|
-
|
20
|
-
p pid0 = spawn("sh #{shell_file}", :out => "test.o#{pid}", :err => "test.e#{pid}")
|
21
|
-
Process.detach(pid0)
|
22
|
-
puts "#{pid} is added on the queue."
|
23
|
-
|
24
|
-
@data << {pid: pid, status: 'waiting', shell_path: shell_path,
|
25
|
-
real_pid: pid0,
|
26
|
-
submit: Time.now,
|
27
|
-
start: nil,
|
28
|
-
finish: nil
|
29
|
-
}
|
30
|
-
File.write(VE_TEST_FILE, YAML.dump(@data))
|
16
|
+
unless @data
|
17
|
+
@data = []
|
18
|
+
puts 'no data'
|
19
|
+
return
|
20
|
+
end
|
31
21
|
end
|
32
22
|
|
33
23
|
def change_job_status(pid, status)
|
@@ -37,6 +27,7 @@ class TGE
|
|
37
27
|
case status
|
38
28
|
when 'running' ; job[:start] = Time.now
|
39
29
|
when 'finished'; job[:finish] = Time.now
|
30
|
+
when 'deleted' ; job[:finish] = Time.now
|
40
31
|
end
|
41
32
|
File.write(VE_TEST_FILE, YAML.dump(@data))
|
42
33
|
break
|
@@ -49,24 +40,40 @@ class TGE
|
|
49
40
|
return true
|
50
41
|
end
|
51
42
|
|
43
|
+
def add_job(pid, shell_path)
|
44
|
+
shell_name = File.basename(shell_path,'.sh')
|
45
|
+
shell_file = "./#{shell_name}.s#{pid}"
|
46
|
+
shell_script = mk_shell_script(pid, shell_path)
|
47
|
+
File.write(shell_file, shell_script)
|
48
|
+
|
49
|
+
p pid0 = spawn("sh #{shell_file}",
|
50
|
+
:out => "#{shell_name}.o#{pid}",
|
51
|
+
:err => "#{shell_name}.e#{pid}")
|
52
|
+
Process.detach(pid0)
|
53
|
+
puts "#{pid} is added on the queue."
|
54
|
+
|
55
|
+
@data << {pid: pid, status: 'waiting',
|
56
|
+
shell_path: shell_path,
|
57
|
+
real_pid: pid0,
|
58
|
+
submit: Time.now,
|
59
|
+
start: nil,
|
60
|
+
finish: nil
|
61
|
+
}
|
62
|
+
File.write(VE_TEST_FILE, YAML.dump(@data))
|
63
|
+
end
|
64
|
+
|
52
65
|
def qsub(pid, shell_path=Dir.pwd)
|
53
66
|
unless pid_on_file(pid)
|
54
|
-
add_job(
|
67
|
+
add_job(@data.size, shell_path)
|
55
68
|
return false
|
56
69
|
end
|
57
|
-
|
58
|
-
@data.
|
59
|
-
if job[:pid] == pid
|
60
|
-
|
61
|
-
|
62
|
-
return true
|
63
|
-
end
|
64
|
-
if job[:status] == 'running'
|
65
|
-
return true
|
66
|
-
end
|
67
|
-
return false
|
70
|
+
|
71
|
+
@data.each do |job|
|
72
|
+
if job[:pid] == pid and job[:status] == 'waiting'
|
73
|
+
change_job_status(pid, 'running')
|
74
|
+
return true
|
68
75
|
end
|
69
|
-
|
76
|
+
return false if job[:status] == 'waiting' or job[:status] == 'running'
|
70
77
|
end
|
71
78
|
end
|
72
79
|
|
@@ -84,18 +91,22 @@ class TGE
|
|
84
91
|
end
|
85
92
|
@data.each_with_index do |job, i|
|
86
93
|
if job[:pid] == pid
|
87
|
-
res = command_line
|
88
|
-
|
89
|
-
|
94
|
+
res = command_line "ps -xal |grep #{job[:real_pid]}"
|
95
|
+
puts res.stdout
|
96
|
+
kill_all_child_process(job[:real_pid].to_i)
|
97
|
+
# res = command_line("kill -9 #{job[:real_pid]}")
|
98
|
+
# p res
|
99
|
+
# @data.delete_at(i)
|
100
|
+
change_job_status(pid, 'deleted')
|
90
101
|
File.write(VE_TEST_FILE, YAML.dump(@data))
|
91
102
|
puts "#{pid} is deleted from the qeueu."
|
103
|
+
|
92
104
|
return true
|
93
105
|
end
|
94
106
|
end
|
95
107
|
end
|
96
108
|
|
97
109
|
def qstat(item_num=0)
|
98
|
-
@data = YAML.load(File.read(VE_TEST_FILE))
|
99
110
|
@data[item_num..-1].each do |job, i|
|
100
111
|
real_pid = job[:real_pid] || 0
|
101
112
|
puts "%5d: %5d: %10s: %s" % [job[:pid], real_pid, job[:status], job[:shell_path]]
|
@@ -107,13 +118,13 @@ class TGE
|
|
107
118
|
#!/bin/sh
|
108
119
|
while ! qsub #{pid}; do
|
109
120
|
sleep 10
|
110
|
-
|
121
|
+
done
|
111
122
|
|
112
|
-
|
123
|
+
sh #{shell_path}
|
113
124
|
|
114
|
-
|
115
|
-
EOS
|
116
|
-
|
125
|
+
qfinish #{pid}
|
126
|
+
EOS
|
127
|
+
end
|
117
128
|
|
118
|
-
end
|
129
|
+
end
|
119
130
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'command_line/global'
|
2
|
+
|
3
|
+
module ChildProcess
|
4
|
+
def find_child_process_recursively(pid)
|
5
|
+
$ps_data.each do |item|
|
6
|
+
next if item==nil
|
7
|
+
if item[4].to_i == pid.to_i
|
8
|
+
$pids << item[3].to_i
|
9
|
+
find_child_process_recursively(item[3].to_i)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
return nil
|
13
|
+
end
|
14
|
+
|
15
|
+
def kill_all_child_process(pid)
|
16
|
+
p $pids = [pid]
|
17
|
+
# store jobs in $data
|
18
|
+
$ps_data = command_line('ps -xal').stdout.split("\n").inject([]) do |dd, line|
|
19
|
+
dd << line.match(/^(\d+)\s+(\d+)\s+(\d+)\s+(\d+)(.+)/)
|
20
|
+
# stored in String
|
21
|
+
end
|
22
|
+
|
23
|
+
find_child_process_recursively(pid)
|
24
|
+
p $pids
|
25
|
+
$pids.uniq.each do |pid|
|
26
|
+
p command = "kill -9 #{pid}"
|
27
|
+
command_line command
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/tiny_ge/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiny_ge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shigeto R. Nishitani
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -85,15 +85,10 @@ email:
|
|
85
85
|
- shigeto_nishitani@me.com
|
86
86
|
executables:
|
87
87
|
- qdel
|
88
|
-
- qdel~
|
89
88
|
- qfinish
|
90
|
-
- qfinish~
|
91
89
|
- qstat
|
92
|
-
- qstat~
|
93
90
|
- qsub
|
94
|
-
- qsub~
|
95
91
|
- tge
|
96
|
-
- tge~
|
97
92
|
extensions: []
|
98
93
|
extra_rdoc_files: []
|
99
94
|
files:
|
@@ -104,25 +99,19 @@ files:
|
|
104
99
|
- Gemfile.lock
|
105
100
|
- LICENSE.txt
|
106
101
|
- README.org
|
102
|
+
- README_J.org
|
107
103
|
- Rakefile
|
108
104
|
- exe/qdel
|
109
|
-
- exe/qdel~
|
110
105
|
- exe/qfinish
|
111
|
-
- exe/qfinish~
|
112
106
|
- exe/qstat
|
113
|
-
- exe/qstat~
|
114
107
|
- exe/qsub
|
115
|
-
- exe/qsub~
|
116
108
|
- exe/tge
|
117
|
-
- exe/tge~
|
118
|
-
- lib/#tiny_ge.rb#
|
119
109
|
- lib/.#tiny_ge.rb
|
120
110
|
- lib/check_ve_lock
|
121
|
-
- lib/check_ve_lock~
|
122
111
|
- lib/tiny_ge.rb
|
112
|
+
- lib/tiny_ge/child_process.rb
|
123
113
|
- lib/tiny_ge/version.rb
|
124
114
|
- lib/unlock_ve_lock
|
125
|
-
- lib/unlock_ve_lock~
|
126
115
|
- tiny_ge.gemspec
|
127
116
|
homepage: https://github.com/daddygongon/tiny_ge
|
128
117
|
licenses:
|
data/exe/qdel~
DELETED
data/exe/qfinish~
DELETED
data/exe/qstat~
DELETED
data/exe/qsub~
DELETED
data/exe/tge~
DELETED
data/lib/#tiny_ge.rb#
DELETED
@@ -1,121 +0,0 @@
|
|
1
|
-
require "tiny_ge/version"
|
2
|
-
require 'yaml'
|
3
|
-
require 'thor'
|
4
|
-
require 'command_line/global'
|
5
|
-
|
6
|
-
#VE_SUBMIT_JOBS_FILE = File.join(ENV['HOME'],".ve_submit_jobs.txt")
|
7
|
-
VE_TEST_FILE = File.join(ENV['HOME'],".tge_test_jobs.txt")
|
8
|
-
|
9
|
-
class TGE
|
10
|
-
def initialize(q_file=VE_TEST_FILE)
|
11
|
-
@q_file = VE_TEST_FILE
|
12
|
-
@data = YAML.load(File.read(@q_file))
|
13
|
-
end
|
14
|
-
|
15
|
-
def add_job(pid, shell_path)
|
16
|
-
shell_file = "./test.s#{pid}"
|
17
|
-
shell_script = mk_shell_script(pid, shell_path)
|
18
|
-
File.write(shell_file, shell_script)
|
19
|
-
|
20
|
-
p pid0 = spawn("sh #{shell_file}", :out => "test.o#{pid}", :err => "test.e#{pid}")
|
21
|
-
Process.detach(pid0)
|
22
|
-
puts "#{pid} is added on the queue."
|
23
|
-
|
24
|
-
@data << {pid: pid, status: 'waiting', shell_path: shell_path,
|
25
|
-
real_pid: pid0,
|
26
|
-
submit: Time.now,
|
27
|
-
start: nil,
|
28
|
-
finish: nil
|
29
|
-
}
|
30
|
-
File.write(VE_TEST_FILE, YAML.dump(@data))
|
31
|
-
end
|
32
|
-
|
33
|
-
def change_job_status(pid, status)
|
34
|
-
@data.each do |job, i|
|
35
|
-
if job[:pid] == pid
|
36
|
-
job[:status] = status
|
37
|
-
case status
|
38
|
-
when 'running' ; job[:start] = Time.now
|
39
|
-
when 'finished'; job[:finish] = Time.now
|
40
|
-
end
|
41
|
-
File.write(VE_TEST_FILE, YAML.dump(@data))
|
42
|
-
break
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def qfinish(pid)
|
48
|
-
change_job_status(pid, 'finished')
|
49
|
-
return true
|
50
|
-
end
|
51
|
-
|
52
|
-
def qsub(pid, shell_path=Dir.pwd)
|
53
|
-
unless pid_on_file(pid)
|
54
|
-
add_job(pid, shell_path)
|
55
|
-
return false
|
56
|
-
end
|
57
|
-
last_finished = -1
|
58
|
-
@data.each_with_index do |job, i|
|
59
|
-
if job[:pid] == pid
|
60
|
-
if job[:status] == 'waiting' and i == last_finished + 1
|
61
|
-
change_job_status(pid, 'running')
|
62
|
-
return true
|
63
|
-
end
|
64
|
-
if job[:status] == 'running'
|
65
|
-
return true
|
66
|
-
end
|
67
|
-
return false
|
68
|
-
end
|
69
|
-
last_finished = i if job[:status] == 'finished'
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
def pid_on_file(pid)
|
74
|
-
@data.each do |job, i|
|
75
|
-
return job[:status] if job[:pid] == pid
|
76
|
-
end
|
77
|
-
return false
|
78
|
-
end
|
79
|
-
|
80
|
-
def qdel(pid)
|
81
|
-
unless pid_on_file(pid)
|
82
|
-
puts "#{pid} is not on the qeueu."
|
83
|
-
return false
|
84
|
-
end
|
85
|
-
@data.each_with_index do |job, i|
|
86
|
-
if job[:pid] == pid
|
87
|
-
res = command_line("kill -9 #{job[:real_pid]}")
|
88
|
-
p res
|
89
|
-
@data.delete_at(i)
|
90
|
-
File.write(VE_TEST_FILE, YAML.dump(@data))
|
91
|
-
puts "#{pid} is deleted from the qeueu."
|
92
|
-
return true
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def qstat(item_num=0)
|
98
|
-
if item_num == '-f'
|
99
|
-
item_num
|
100
|
-
@data = YAML.load(File.read(VE_TEST_FILE))
|
101
|
-
@data[item_num..-1].each do |job, i|
|
102
|
-
real_pid = job[:real_pid] || 0
|
103
|
-
puts "%5d: %5d: %10s: %s" % [job[:pid], real_pid, job[:status], job[:shell_path]]
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def mk_shell_script(pid, shell_path)
|
108
|
-
return <<~EOS
|
109
|
-
#!/bin/sh
|
110
|
-
while ! qsub #{pid}; do
|
111
|
-
sleep 10
|
112
|
-
done
|
113
|
-
|
114
|
-
sh #{shell_path}
|
115
|
-
|
116
|
-
qfinish #{pid}
|
117
|
-
EOS
|
118
|
-
end
|
119
|
-
|
120
|
-
end
|
121
|
-
|
data/lib/check_ve_lock~
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
#!/bin/env ruby
|
2
|
-
#VE_SUBMIT_JOBS_FILE = File.join(ENV['HOME'],".ve_submit_jobs.txt")
|
3
|
-
VE_SUBMIT_JOBS_FILE = File.join(ENV['HOME'],".tge_test_jobs.txt")
|
4
|
-
pid = ARGV[0] || 1234
|
5
|
-
pid = pid.to_i
|
6
|
-
|
7
|
-
status = ['not_on_file', -1]
|
8
|
-
lines = File.readlines(VE_SUBMIT_JOBS_FILE)
|
9
|
-
finished_job = -1
|
10
|
-
lines.each_with_index do |line, i|
|
11
|
-
data = line.match(/\s+(\d+):\s+(\w+):\s+(.+):/)
|
12
|
-
if data[1].to_i == pid
|
13
|
-
status = [data[2], i]
|
14
|
-
if status[1] == finished_job + 1
|
15
|
-
line.gsub!('waiting','running')
|
16
|
-
File.write(VE_SUBMIT_JOBS_FILE, lines.join)
|
17
|
-
exit 0
|
18
|
-
end
|
19
|
-
break
|
20
|
-
end
|
21
|
-
finished_job = i if data[2] == 'finished'
|
22
|
-
end
|
23
|
-
|
24
|
-
if status[0] == 'not_on_file'
|
25
|
-
line = "%8d: %10s: %s: %s\n" % [pid, 'waiting', Dir.pwd(), Time.now]
|
26
|
-
File.open(VE_SUBMIT_JOBS_FILE, 'a'){ |f| f.write line }
|
27
|
-
end
|
28
|
-
|
29
|
-
exit 1 # return false
|
30
|
-
#exit 0 # return true
|
data/lib/unlock_ve_lock~
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
#!/bin/env ruby
|
2
|
-
VE_SUBMIT_JOBS_FILE = File.join(ENV['HOME'],".ve_submit_jobs.txt")
|
3
|
-
pid = ARGV[0] || 1234
|
4
|
-
pid = pid.to_i
|
5
|
-
lines = File.readlines(VE_SUBMIT_JOBS_FILE)
|
6
|
-
lines.each_with_index do |line, i|
|
7
|
-
data = line.match(/\s+(\d+):\s+(\w+):\s+(.+)/)
|
8
|
-
if data[1].to_i == pid
|
9
|
-
line.gsub!(' running', 'finished')
|
10
|
-
File.write(VE_SUBMIT_JOBS_FILE, lines.join)
|
11
|
-
break
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
exit 0 # return true
|