comana 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES +52 -19
- data/Gemfile +1 -0
- data/README.rdoc +6 -1
- data/VERSION +1 -1
- data/bin/genqsub +38 -30
- data/bin/scpall +1 -1
- data/comana.gemspec +19 -15
- data/example/dot.clustersetting +24 -9
- data/lib/comana/clustersetting.rb +10 -0
- data/lib/comana/computationmanager.rb +134 -10
- data/lib/comana/gridengine.rb +173 -0
- data/lib/comana/hostinspector.rb +5 -59
- data/lib/comana.rb +4 -7
- data/test/gridengine/qconfsql.dat +13 -0
- data/test/gridengine/qstatf0.xml +1275 -0
- data/test/gridengine/qstatf1.xml +39 -0
- data/test/gridengine/qstatq0.xml +247 -0
- data/test/gridengine/qstatq1.xml +37 -0
- data/test/gridengine/qstatu0.xml +437 -0
- data/test/gridengine/qstatu1.xml +47 -0
- data/test/hostselector/dot.clustersetting +15 -0
- data/test/test_clustersetting.rb +20 -23
- data/test/test_computationmanager.rb +49 -15
- data/test/test_gridengine.rb +198 -0
- data/test/test_hostinspector.rb +11 -29
- data/test/test_hostselector.rb +2 -13
- metadata +27 -14
- data/bin/machinestatus +0 -192
- data/bin/queueinfo +0 -28
- data/lib/comana/gridenginescript.rb +0 -68
- data/lib/comana/queuemanager.rb +0 -34
- data/lib/comana/queuesubmitter.rb +0 -156
- data/memo.txt +0 -34
- data/test/test_gridenginescript.rb +0 -16
- data/test/test_queuemanager.rb +0 -19
- data/test/test_queuesubmitter.rb +0 -214
@@ -0,0 +1,173 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
#
|
5
|
+
#
|
6
|
+
#
|
7
|
+
class Comana::GridEngine
|
8
|
+
|
9
|
+
def self.write_qsub_script(q_name:, pe_name:, ppn:, ld_library_path: , command:, io:)
|
10
|
+
io.puts "#! /bin/sh"
|
11
|
+
io.puts "#$ -S /bin/sh"
|
12
|
+
io.puts "#$ -cwd"
|
13
|
+
io.puts "#$ -o stdout"
|
14
|
+
io.puts "#$ -e stderr"
|
15
|
+
io.puts "#$ -q #{q_name}"
|
16
|
+
io.puts "#$ -pe #{pe_name} #{ppn}"
|
17
|
+
io.puts "MACHINE_FILE='machines'"
|
18
|
+
io.puts "LD_LIBRARY_PATH=#{ld_library_path}" if ld_library_path
|
19
|
+
io.puts "export LD_LIBRARY_PATH" if ld_library_path
|
20
|
+
io.puts "cd $SGE_O_WORKDIR"
|
21
|
+
io.puts "printenv | sort > printenv.log"
|
22
|
+
io.puts "cut -d ' ' -f 1,2 $PE_HOSTFILE | sed 's/ / cpu=/' > $MACHINE_FILE"
|
23
|
+
io.puts "#{command}"
|
24
|
+
#{__FILE__} execute
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.qstat_f(io = IO.popen("qstat -f -xml", "r+"))
|
28
|
+
qs = Nokogiri::XML(io)
|
29
|
+
results = qs.xpath("/job_info/queue_info/Queue-List").map do |queue|
|
30
|
+
hash = {}
|
31
|
+
queue.children.each do |j|
|
32
|
+
next if j.name == 'text'
|
33
|
+
key = j.name
|
34
|
+
val = j.children.to_s
|
35
|
+
val = val.to_i if val.integer?
|
36
|
+
hash[key] = val
|
37
|
+
end
|
38
|
+
hash
|
39
|
+
end
|
40
|
+
results
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.queue_alive_hosts(io = IO.popen("qstat -f -xml", "r+"))
|
44
|
+
qs = self.qstat_f(io)
|
45
|
+
results = {}
|
46
|
+
qs.each do |q|
|
47
|
+
next if q['state'] == 'au'
|
48
|
+
/(.*)\@(.*)/ =~ q["name"]
|
49
|
+
q = $1
|
50
|
+
host = $2
|
51
|
+
results[q] ||= []
|
52
|
+
results[q] << host
|
53
|
+
end
|
54
|
+
results
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.queue_alive_nums(io = IO.popen("qstat -f -xml", "r+"))
|
58
|
+
qs = self.queue_alive_hosts(io)
|
59
|
+
results = {}
|
60
|
+
qs.each do |key,val|
|
61
|
+
results[key] = val.size
|
62
|
+
end
|
63
|
+
results
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.qstat_u(io = IO.popen("qstat -u '*' -xml", "r+"))
|
67
|
+
qs = Nokogiri::XML(io)
|
68
|
+
results = []
|
69
|
+
qs.xpath("/job_info/queue_info/job_list").each do |queue|
|
70
|
+
hash = {}
|
71
|
+
value = queue.attributes.values[0].to_s
|
72
|
+
hash['job_list_state'] = value
|
73
|
+
queue.children.each do |j|
|
74
|
+
next if j.name == 'text'
|
75
|
+
key = j.name
|
76
|
+
val = j.children.to_s
|
77
|
+
if val.integer?
|
78
|
+
val = val.to_i
|
79
|
+
elsif val.float?
|
80
|
+
val = val.to_f
|
81
|
+
end
|
82
|
+
hash[key] = val
|
83
|
+
end
|
84
|
+
results << hash
|
85
|
+
end
|
86
|
+
|
87
|
+
qs.xpath("/job_info/job_info/job_list").each do |queue|
|
88
|
+
hash = {}
|
89
|
+
value = queue.attributes.values[0].to_s
|
90
|
+
hash['job_list_state'] = value
|
91
|
+
queue.children.each do |j|
|
92
|
+
next if j.name == 'text'
|
93
|
+
key = j.name
|
94
|
+
val = j.children.to_s
|
95
|
+
if val.integer?
|
96
|
+
val = val.to_i
|
97
|
+
elsif val.float?
|
98
|
+
val = val.to_f
|
99
|
+
end
|
100
|
+
hash[key] = val
|
101
|
+
end
|
102
|
+
results << hash
|
103
|
+
end
|
104
|
+
|
105
|
+
results
|
106
|
+
end
|
107
|
+
|
108
|
+
#def self.qconf_sql(str = `qconf -sql`)
|
109
|
+
def self.queues(str = `qconf -sql`)
|
110
|
+
str.strip.split("\n")
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
#def self.queue_jobs(qname, str = `qconf -sql`)
|
115
|
+
def self.queue_jobs(qname, io = nil)
|
116
|
+
io ||= IO.popen("qstat -q #{qname} -u '*' -xml", "r+")
|
117
|
+
results = []
|
118
|
+
qs = Nokogiri::XML(io)
|
119
|
+
qs.xpath("/job_info/queue_info/job_list").each do |queue|
|
120
|
+
hash = {}
|
121
|
+
value = queue.attributes.values[0].to_s
|
122
|
+
hash['job_list_state'] = value
|
123
|
+
queue.children.each do |j|
|
124
|
+
next if j.name == 'text'
|
125
|
+
key = j.name
|
126
|
+
val = j.children.to_s
|
127
|
+
if val.integer?
|
128
|
+
val = val.to_i
|
129
|
+
elsif val.float?
|
130
|
+
val = val.to_f
|
131
|
+
end
|
132
|
+
hash[key] = val
|
133
|
+
end
|
134
|
+
results << hash
|
135
|
+
end
|
136
|
+
|
137
|
+
qs.xpath("/job_info/job_info/job_list").each do |queue|
|
138
|
+
hash = {}
|
139
|
+
value = queue.attributes.values[0].to_s
|
140
|
+
hash['job_list_state'] = value
|
141
|
+
queue.children.each do |j|
|
142
|
+
next if j.name == 'text'
|
143
|
+
key = j.name
|
144
|
+
val = j.children.to_s
|
145
|
+
if val.integer?
|
146
|
+
val = val.to_i
|
147
|
+
elsif val.float?
|
148
|
+
val = val.to_f
|
149
|
+
end
|
150
|
+
hash[key] = val
|
151
|
+
end
|
152
|
+
results << hash
|
153
|
+
end
|
154
|
+
|
155
|
+
results
|
156
|
+
#qs.xpath("/job_info/job_info/job_list").each do |queue|
|
157
|
+
end
|
158
|
+
|
159
|
+
#nj: number of jobs
|
160
|
+
#nh: number of hosts
|
161
|
+
#bench: benchmark time
|
162
|
+
def self.guess_end_time(nj:, nh:, bench:)
|
163
|
+
nj = nj.to_f
|
164
|
+
nh = nh.to_f
|
165
|
+
if nj < nh
|
166
|
+
start = 0
|
167
|
+
else
|
168
|
+
start = nj/nh
|
169
|
+
end
|
170
|
+
(start + 1) * bench
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
data/lib/comana/hostinspector.rb
CHANGED
@@ -12,16 +12,15 @@ class Comana::HostInspector
|
|
12
12
|
attr_reader :hostname
|
13
13
|
|
14
14
|
#
|
15
|
-
def initialize(hostname)
|
15
|
+
def initialize(hostname, cache_dir)
|
16
16
|
@hostname = hostname
|
17
|
-
@cache_dir = "#{
|
17
|
+
@cache_dir = "#{cache_dir}/#{@hostname}"
|
18
18
|
end
|
19
19
|
|
20
20
|
|
21
|
-
##ping
|
22
|
-
|
23
|
-
|
24
|
-
#def ping3
|
21
|
+
##Try ping three times.
|
22
|
+
##Return true if at least one time responds.
|
23
|
+
##def ping3
|
25
24
|
def update_ping
|
26
25
|
result = false
|
27
26
|
3.times do
|
@@ -39,11 +38,9 @@ class Comana::HostInspector
|
|
39
38
|
##cwd
|
40
39
|
##readlink コマンドが使えるかとも思ったが、シムリンク自体の名前が不明瞭になる。
|
41
40
|
def update_cwd
|
42
|
-
#str = `ssh #{@hostname} 'ls -l /proc/*/cwd'`
|
43
41
|
str = ssh_str('ls -l /proc/\*/cwd 2> /dev/null')
|
44
42
|
results = {}
|
45
43
|
str.split("\n").each do |line|
|
46
|
-
#pp line
|
47
44
|
items = line.split
|
48
45
|
pid = items[8].sub(/^\/proc\//, '').sub(/\/cwd$/, '')
|
49
46
|
results[pid] = items[10]
|
@@ -69,26 +66,6 @@ class Comana::HostInspector
|
|
69
66
|
#auxw だと、
|
70
67
|
#ippei 2948 198 11.8 4495708 3884740 pts/3 Rl Apr01 173494:26 /opt/bin/vasp5212openmpi で桁が崩れることがある。
|
71
68
|
def update_ps
|
72
|
-
#str = ssh_str('ps -eo "user pid %cpu %mem command"')
|
73
|
-
#results = {}
|
74
|
-
#lines = str.split("\n")
|
75
|
-
#lines.shift # titles of items
|
76
|
-
#lines.each do |line|
|
77
|
-
# user = line[0..7]
|
78
|
-
# pid = line[9..13]
|
79
|
-
# cpu = line[15..18]
|
80
|
-
# mem = line[20..23]
|
81
|
-
# command = line[25..-1]
|
82
|
-
# results[pid] = {
|
83
|
-
# "user" => user,
|
84
|
-
# "cpu" => cpu,
|
85
|
-
# "mem" => mem,
|
86
|
-
# "command" => command
|
87
|
-
# }
|
88
|
-
#end
|
89
|
-
#write_cache('ps', results)
|
90
|
-
|
91
|
-
#str = `ssh #{@hostname} 'ps auxw'`
|
92
69
|
str = ssh_str('ps auxw')
|
93
70
|
results = {}
|
94
71
|
lines = str.split("\n")
|
@@ -109,32 +86,6 @@ class Comana::HostInspector
|
|
109
86
|
}
|
110
87
|
end
|
111
88
|
write_cache('ps', results)
|
112
|
-
|
113
|
-
##str = `ssh #{@hostname} 'ps auxw'`
|
114
|
-
#str = ssh_str('ps auxw')
|
115
|
-
#results = {}
|
116
|
-
#lines = str.split("\n")
|
117
|
-
#lines.shift # titles of items
|
118
|
-
#lines.each do |line|
|
119
|
-
# user = line[0..7]
|
120
|
-
# pid = line[9..13]
|
121
|
-
# cpu = line[15..18]
|
122
|
-
# mem = line[20..23]
|
123
|
-
# #vsz = line[25..30]
|
124
|
-
# #rss = line[32..36]
|
125
|
-
# #tty = line[38..45]
|
126
|
-
# #stat = line[47..50]
|
127
|
-
# #start = line[52..56]
|
128
|
-
# #time = line[58..63]
|
129
|
-
# command = line[65..-1]
|
130
|
-
# results[pid] = {
|
131
|
-
# "user" => user,
|
132
|
-
# "cpu" => cpu,
|
133
|
-
# "mem" => mem,
|
134
|
-
# "command" => command
|
135
|
-
# }
|
136
|
-
#end
|
137
|
-
#write_cache('ps', results)
|
138
89
|
end
|
139
90
|
|
140
91
|
# dmesg ログ形式でつらい。
|
@@ -162,7 +113,6 @@ class Comana::HostInspector
|
|
162
113
|
end
|
163
114
|
|
164
115
|
def update_meminfo
|
165
|
-
#str = `ssh #{@hostname} 'cat /proc/meminfo'`
|
166
116
|
str = ssh_str('cat /proc/meminfo')
|
167
117
|
results = {}
|
168
118
|
lines = str.split("\n")
|
@@ -173,8 +123,6 @@ class Comana::HostInspector
|
|
173
123
|
write_cache('meminfo', results)
|
174
124
|
end
|
175
125
|
|
176
|
-
############################################################
|
177
|
-
## common
|
178
126
|
#Return from cached ping data.
|
179
127
|
def fetch(name)
|
180
128
|
load_cache(name)
|
@@ -193,7 +141,6 @@ class Comana::HostInspector
|
|
193
141
|
|
194
142
|
# 先に ping を打ち、返事がなければ 空文字列 を返す。
|
195
143
|
def ssh_str(command)
|
196
|
-
#pp command
|
197
144
|
update_ping
|
198
145
|
if fetch('ping')
|
199
146
|
return `ssh #{@hostname} #{command}`
|
@@ -210,7 +157,6 @@ class Comana::HostInspector
|
|
210
157
|
end
|
211
158
|
|
212
159
|
def load_cache(name)
|
213
|
-
#return nil unless File.exist? "#{@cache_dir}/#{name}.yaml"
|
214
160
|
cache_file = "#{@cache_dir}/#{name}.yaml"
|
215
161
|
unless File.exist? cache_file
|
216
162
|
raise NoUpdateFile, "#{cache_file} not found."
|
data/lib/comana.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
module Comana; end
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'string/integer'
|
5
|
+
require 'string/float'
|
7
6
|
require "comana/computationmanager.rb"
|
8
7
|
require "comana/clustersetting.rb"
|
9
8
|
require "comana/hostselector.rb"
|
10
|
-
require "comana/queuesubmitter.rb"
|
11
9
|
require "comana/hostinspector.rb"
|
12
|
-
require "comana/
|
13
|
-
require "comana/gridenginescript.rb"
|
10
|
+
require "comana/gridengine.rb"
|