server_scripts 0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -0
- data/bin/mem_monitor +56 -0
- data/lib/server_scripts.rb +4 -1
- data/lib/server_scripts/memory_monitor.rb +56 -0
- data/lib/server_scripts/parser.rb +1 -0
- data/lib/server_scripts/parser/starpu_profile.rb +15 -3
- data/lib/server_scripts/parser/vtune.rb +1 -0
- data/lib/server_scripts/parser/vtune/hotspots.rb +3 -0
- data/lib/server_scripts/parser/vtune/hotspots/base.rb +20 -0
- data/lib/server_scripts/parser/vtune/hotspots/threads.rb +72 -0
- data/lib/server_scripts/parser/vtune/hotspots/threads/starpu.rb +22 -0
- data/lib/server_scripts/version.rb +1 -1
- data/server_scripts.gemspec +2 -0
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b72b201ae506a4e955c8b19ce8e0f3b1f3880cb
|
4
|
+
data.tar.gz: 3644cad87a968c5032e6265d371ac299a001f39d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b72fdf3f6c2340e4599326191c7e37033d04ff4a7a93716401c21fe13bf4143aba8a214f1a63300874d7283335eaf3615abdd81d6fb61546b983ff2f0d405848
|
7
|
+
data.tar.gz: c29c3f522a6325caf5ee70e8ebde1a7edfd9ae62954871a4301b564ab9174a939b87fc3642e0f0204c909e60eb0f1622a0d7a3da16dffe8262f712f0e057a1b0
|
data/README.md
CHANGED
@@ -6,6 +6,21 @@ The following functionality is provided:
|
|
6
6
|
* Generate job scripts and run batch jobs on TSUBAME 3.0, ABCI and reedbush machines.
|
7
7
|
* Parse various kinds of profiling files and generate meaningful output.
|
8
8
|
|
9
|
+
<!-- markdown-toc start - Don't edit this section. Run M-x markdown-toc-generate-toc again -->
|
10
|
+
**Table of Contents**
|
11
|
+
|
12
|
+
- [server-scripts](#server-scripts)
|
13
|
+
- [Usage](#usage)
|
14
|
+
- [ENV variables](#env-variables)
|
15
|
+
- [Writing job scripts](#writing-job-scripts)
|
16
|
+
- [Simple openMPI job script](#simple-openmpi-job-script)
|
17
|
+
- [Intel MPI profiling job script](#intel-mpi-profiling-job-script)
|
18
|
+
- [Parse intel ITAC output](#parse-intel-itac-output)
|
19
|
+
- [Parse starpu worker info](#parse-starpu-worker-info)
|
20
|
+
- [Usage](#usage)
|
21
|
+
|
22
|
+
<!-- markdown-toc end -->
|
23
|
+
|
9
24
|
# Usage
|
10
25
|
|
11
26
|
## ENV variables
|
@@ -55,6 +70,8 @@ The intel ITAC tool can be helpful for generating traces of parallel MPI program
|
|
55
70
|
This class can be used for converting an ITAC file to an ideal trace and then generating
|
56
71
|
the function profile for obtaining things like the MPI wait time.
|
57
72
|
|
73
|
+
### Usage
|
74
|
+
|
58
75
|
For extracting the MPI wait time from an ITAC trace, do the following:
|
59
76
|
``` ruby
|
60
77
|
require 'server_scripts'
|
@@ -72,4 +89,21 @@ puts itac.event_time("getrf_start", how: :per_proc, kind: :real)
|
|
72
89
|
|
73
90
|
## Parse starpu worker info
|
74
91
|
|
92
|
+
The `ServerScripts::Parser::StarpuProfile` class has various functions for parsing the
|
93
|
+
`*.starpu_profile` files that are generated by starpu with per-worker CPU execution info.
|
94
|
+
These can be batch-processed using server\_scripts by specifying a regex that will match the
|
95
|
+
profile for each process that produces it. You can either get per-worker or per-process
|
96
|
+
information from this.
|
75
97
|
|
98
|
+
### Usage
|
99
|
+
|
100
|
+
``` ruby
|
101
|
+
parser = Parser::StarpuProfile.new("test/artifacts/4_proc_profile_8_*.starpu_profile")
|
102
|
+
|
103
|
+
puts parser.total_time
|
104
|
+
puts parser.total_exec_time
|
105
|
+
puts parser.total_sleep_time
|
106
|
+
puts parser.total_overhead_time
|
107
|
+
puts parser.time(event: :total_time, proc_id: 0, worker_id: 4)
|
108
|
+
puts parser.proc_time event: :exec_time, proc_id: 2
|
109
|
+
```
|
data/bin/mem_monitor
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'server_scripts'
|
4
|
+
|
5
|
+
if ARGV[0] == "--help"
|
6
|
+
puts "Usage:"
|
7
|
+
puts "\tmem_monitor [--pname=pname|--pid=pid] --duration=HH:MM:SS --interval(HH:MM:SS)"
|
8
|
+
exit
|
9
|
+
end
|
10
|
+
|
11
|
+
pname = nil
|
12
|
+
pid = nil
|
13
|
+
duration = nil
|
14
|
+
interval = nil
|
15
|
+
ofile = nil
|
16
|
+
ARGV.each do |arg|
|
17
|
+
pname = arg.match(/--pname=(\w+)/)[1] if arg.match(/--pname/)
|
18
|
+
pid = arg.match(/--pid=(\w+)/)[1].to_i if arg.match(/--pid/)
|
19
|
+
duration = arg.match(/--duration=(\w+:\w+:\w+)/)[1] if arg.match(/--duration/)
|
20
|
+
interval = arg.match(/--interval=(\w+:\w+:\w+)/)[1] if arg.match(/--interval/)
|
21
|
+
ofile = arg.match(/--output=(.*)/)[1] if arg.match(/--output/)
|
22
|
+
end
|
23
|
+
|
24
|
+
if !pname && pid
|
25
|
+
pname = `ps -p #{pid} -o comm=`.strip
|
26
|
+
end
|
27
|
+
|
28
|
+
if !pid && pname
|
29
|
+
pid = `pidof #{pname}`.strip
|
30
|
+
raise ArgumentError, "Process #{pname} does not exist."
|
31
|
+
end
|
32
|
+
|
33
|
+
duration = "00:00:30" if !duration
|
34
|
+
interval = "00:00:01" if !interval
|
35
|
+
|
36
|
+
puts "Using params:"
|
37
|
+
puts "\tPID: #{pid}"
|
38
|
+
puts "\tPNAME: #{pname}"
|
39
|
+
puts "\tDURATION: #{duration}"
|
40
|
+
puts "\tINTERVAL: #{interval}"
|
41
|
+
puts "\tOFILE: #{ofile}" if ofile
|
42
|
+
|
43
|
+
monitor = ServerScripts::MemoryMonitor.new(pid: pid, duration: duration,
|
44
|
+
interval: interval)
|
45
|
+
monitor.start!
|
46
|
+
|
47
|
+
if ofile
|
48
|
+
file = File.open ofile, "w+"
|
49
|
+
file << "VmRSS (MB): #{monitor.vmrss}\n"
|
50
|
+
file << "VmSize (MB): #{monitor.vmsize}\n"
|
51
|
+
file.close
|
52
|
+
else
|
53
|
+
puts "VmRSS (MB): #{monitor.vmrss}"
|
54
|
+
puts "VmSize (MB): #{monitor.vmsize}"
|
55
|
+
end
|
56
|
+
|
data/lib/server_scripts.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require 'ptools'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
require 'server_scripts/version'
|
3
6
|
|
4
7
|
require 'server_scripts/node_type'
|
5
8
|
require 'server_scripts/executor'
|
@@ -7,7 +10,7 @@ require 'server_scripts/computer'
|
|
7
10
|
require 'server_scripts/parser'
|
8
11
|
require 'server_scripts/experiment'
|
9
12
|
require 'server_scripts/batch_job'
|
10
|
-
require 'server_scripts/
|
13
|
+
require 'server_scripts/memory_monitor'
|
11
14
|
|
12
15
|
module ServerScripts
|
13
16
|
class << self
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
class MemoryMonitor
|
3
|
+
attr_reader :pid
|
4
|
+
attr_reader :pname
|
5
|
+
attr_reader :duration_sec
|
6
|
+
attr_reader :interval_sec
|
7
|
+
attr_reader :vmrss
|
8
|
+
attr_reader :vmsize
|
9
|
+
|
10
|
+
def initialize(pid: nil, pname: nil, duration: "00:00:30", interval: "00:00:01")
|
11
|
+
@vmrss = []
|
12
|
+
@vmsize = []
|
13
|
+
|
14
|
+
if pname.nil? && pid
|
15
|
+
@pname = `ps -p #{pid} -o comm=`.strip
|
16
|
+
@pid = pid
|
17
|
+
end
|
18
|
+
|
19
|
+
if pid.nil? && pname
|
20
|
+
@pname = pname
|
21
|
+
@pid = `pidof #{pname}`.strip.to_i
|
22
|
+
end
|
23
|
+
|
24
|
+
parse_time_intervals duration, interval
|
25
|
+
end
|
26
|
+
|
27
|
+
def start!
|
28
|
+
@duration_sec.times do
|
29
|
+
file = File.open "/proc/#{@pid}/status"
|
30
|
+
file.each_line do |l|
|
31
|
+
rss = l.match(/VmRSS:\s+(\d+)\s+kB/)
|
32
|
+
@vmrss << (rss[1].to_i / 1e3) if rss
|
33
|
+
|
34
|
+
size = l.match(/VmSize:\s+(\d+)\s+kB/)
|
35
|
+
@vmsize << (size[1].to_i / 1e3) if size
|
36
|
+
end
|
37
|
+
|
38
|
+
sleep @interval_sec
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def parse_time_intervals dur, inter
|
45
|
+
hours, min, sec = get_hms(dur)
|
46
|
+
@duration_sec = hours * 3600 + min * 60 + sec
|
47
|
+
hours, min, sec = get_hms(inter)
|
48
|
+
@interval_sec = hours * 3600 + min * 60 + sec
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_hms(time)
|
52
|
+
matcher = time.match(/(\d+):(\d+):(\d+)/)
|
53
|
+
[matcher[1].to_i, matcher[2].to_i, matcher[3].to_i]
|
54
|
+
end
|
55
|
+
end # class MemoryMonitor
|
56
|
+
end # module ServerScripts
|
@@ -2,7 +2,9 @@ module ServerScripts
|
|
2
2
|
module Parser
|
3
3
|
class StarpuProfile
|
4
4
|
|
5
|
+
# The Hash containing the time records of various workers and processes.
|
5
6
|
attr_reader :time_hash
|
7
|
+
|
6
8
|
# Specify the regex that will allow finding the profile files for the given starpu
|
7
9
|
# processes. Each process will output one file.
|
8
10
|
#
|
@@ -13,24 +15,30 @@ module ServerScripts
|
|
13
15
|
@regex = regex
|
14
16
|
@time_hash = {}
|
15
17
|
extract_data_from_profiles
|
18
|
+
raise ArgumentError, "could not find any starpu profiles." if @time_hash.empty?
|
16
19
|
end
|
17
20
|
|
21
|
+
# Get the sum of total time in seconds summed over all processes and workers.
|
18
22
|
def total_time
|
19
23
|
extract_from_time_hash :total_time
|
20
24
|
end
|
21
25
|
|
26
|
+
# Get the sum of exec total time in seconds summed over all processes and workers.
|
22
27
|
def total_exec_time
|
23
28
|
extract_from_time_hash :exec_time
|
24
29
|
end
|
25
|
-
|
30
|
+
|
31
|
+
# Get the sum of sleep total time in seconds summed over all processes and workers.
|
26
32
|
def total_sleep_time
|
27
33
|
extract_from_time_hash :sleep_time
|
28
34
|
end
|
29
35
|
|
36
|
+
# Get the sum of overhead total time in seconds summed over all processes and workers.
|
30
37
|
def total_overhead_time
|
31
38
|
extract_from_time_hash :overhead_time
|
32
39
|
end
|
33
40
|
|
41
|
+
# Get the total time for an event summed over all the workers in a given process.
|
34
42
|
def proc_time event:, proc_id:
|
35
43
|
time = 0.0
|
36
44
|
@time_hash[proc_id].each_value do |thread_info|
|
@@ -40,6 +48,11 @@ module ServerScripts
|
|
40
48
|
time
|
41
49
|
end
|
42
50
|
|
51
|
+
# Get the time in seconds for the given event, worker and process.
|
52
|
+
#
|
53
|
+
# :event can be one of :total_time, :exec_time, :sleep_time or :overhead_time.
|
54
|
+
# :proc_id should be a number specifying process number.
|
55
|
+
# :worker_id should be a number specifying the worker ID.
|
43
56
|
def time event:, proc_id:, worker_id:
|
44
57
|
@time_hash[proc_id][worker_id][event]
|
45
58
|
end
|
@@ -68,8 +81,7 @@ module ServerScripts
|
|
68
81
|
if ServerScripts.verbose
|
69
82
|
puts "--------------------------------------"
|
70
83
|
puts "Reading file #{fname}..."
|
71
|
-
end
|
72
|
-
|
84
|
+
end
|
73
85
|
proc_id = fname.match(@regex.gsub("*", "(\\d+)"))[1].to_i
|
74
86
|
@time_hash[proc_id] = {}
|
75
87
|
output = File.read(fname).split("\n")
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'server_scripts/parser/vtune/hotspots'
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
module Parser
|
3
|
+
module VTune
|
4
|
+
module Hotspots
|
5
|
+
class Base
|
6
|
+
CPU_TIME = "CPU Time"
|
7
|
+
CPU_EFFECTIVE_TIME = "CPU Time:Effective Time"
|
8
|
+
CPU_OVERHEAD_TIME = "CPU Time:Overhead Time"
|
9
|
+
CPU_SPIN_TIME = "CPU Time:Spin Time"
|
10
|
+
WAIT_TIME = "Wait Time"
|
11
|
+
|
12
|
+
def initialize fname
|
13
|
+
@threads = {}
|
14
|
+
parse_csv! fname
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end # class Base
|
18
|
+
end # module VTune
|
19
|
+
end # module Parser
|
20
|
+
end # module ServerScripts
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
module Parser
|
3
|
+
module VTune
|
4
|
+
# Classes for analysing files showing hotspot analysis.
|
5
|
+
module Hotspots
|
6
|
+
# Parse a file with a hotspots report and grouped by threads. This class
|
7
|
+
# is made for parsing things from a single node, multi threaded execution.
|
8
|
+
# CSV delimiter should be a comma.
|
9
|
+
#
|
10
|
+
# Example command:
|
11
|
+
# vtune -collect threading -report hotspots -group-by thread -csv-delimiter=, a.out 65536
|
12
|
+
class Threads < Base
|
13
|
+
# Get time for a particular event in a particular thread.
|
14
|
+
def time event:, tid:
|
15
|
+
@threads[tid][event]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Sum of total CPU and wait time.
|
19
|
+
def total_time
|
20
|
+
total_cpu_time + total_wait_time
|
21
|
+
end
|
22
|
+
|
23
|
+
# Total CPU time of all the threads. Does not include the wait time.
|
24
|
+
def total_cpu_time
|
25
|
+
@total_cpu_time ||= parse_for_event(:cpu_time)
|
26
|
+
@total_cpu_time
|
27
|
+
end
|
28
|
+
|
29
|
+
# Total Effective CPU time.
|
30
|
+
def total_cpu_effective_time
|
31
|
+
@total_cpu_effective_time ||= parse_for_event(:cpu_effective_time)
|
32
|
+
@total_cpu_effective_time
|
33
|
+
end
|
34
|
+
|
35
|
+
# Total CPU overhead: sum of CPU Spin Time + CPU Overhead Time
|
36
|
+
def total_cpu_overhead_time
|
37
|
+
@total_cpu_overhead_time ||= parse_for_event(:cpu_overhead_time)
|
38
|
+
@total_cpu_overhead_time
|
39
|
+
end
|
40
|
+
|
41
|
+
# Total Wait Time.
|
42
|
+
def total_wait_time
|
43
|
+
@total_wait_time ||= parse_for_event(:wait_time)
|
44
|
+
@total_wait_time
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def parse_for_event event
|
50
|
+
total = 0.0
|
51
|
+
@threads.each_value do |thread|
|
52
|
+
total += thread[event]
|
53
|
+
end
|
54
|
+
total
|
55
|
+
end
|
56
|
+
|
57
|
+
def parse_csv! fname
|
58
|
+
data = CSV.parse(File.read(fname), headers: true)
|
59
|
+
data.each_with_index do |row, i|
|
60
|
+
@threads[i] = {}
|
61
|
+
@threads[i][:cpu_time] = data[CPU_TIME][i].to_f
|
62
|
+
@threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
|
63
|
+
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f +
|
64
|
+
data[CPU_SPIN_TIME][i].to_f
|
65
|
+
@threads[i][:wait_time] = data[WAIT_TIME][i].to_f
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end # class Threads
|
69
|
+
end # module Hotspots
|
70
|
+
end # class ITAC
|
71
|
+
end # module Parser
|
72
|
+
end # module ServerScripts
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
module Parser
|
3
|
+
module VTune
|
4
|
+
module Hotspots
|
5
|
+
class Starpu < Base
|
6
|
+
# Get time for a particular event of a particular worker, master thread
|
7
|
+
# or MPI thread. Specify :tid as :CPU_#ID for worker, :MPI for MPI thread,
|
8
|
+
# and :master for the task submission thread.
|
9
|
+
def time event:, tid:
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def parse_csv! fname
|
16
|
+
|
17
|
+
end
|
18
|
+
end # class Starpu
|
19
|
+
end # module Hotspots
|
20
|
+
end # module VTune
|
21
|
+
end # module Parser
|
22
|
+
end # module ServerScripts
|
data/server_scripts.gemspec
CHANGED
@@ -34,6 +34,8 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
35
35
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
36
36
|
spec.require_paths = ["lib"]
|
37
|
+
spec.bindir = "bin"
|
38
|
+
spec.executables << "mem_monitor"
|
37
39
|
|
38
40
|
spec.add_runtime_dependency 'ptools'
|
39
41
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: server_scripts
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sameer Deshmukh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ptools
|
@@ -71,13 +71,15 @@ description: 'Easily write scripts for submitted jobs to various machines.
|
|
71
71
|
'
|
72
72
|
email:
|
73
73
|
- sameer.deshmukh93@gmail.com
|
74
|
-
executables:
|
74
|
+
executables:
|
75
|
+
- mem_monitor
|
75
76
|
extensions: []
|
76
77
|
extra_rdoc_files: []
|
77
78
|
files:
|
78
79
|
- Gemfile
|
79
80
|
- README.md
|
80
81
|
- Rakefile
|
82
|
+
- bin/mem_monitor
|
81
83
|
- lib/server_scripts.rb
|
82
84
|
- lib/server_scripts/batch_job.rb
|
83
85
|
- lib/server_scripts/computer.rb
|
@@ -91,10 +93,16 @@ files:
|
|
91
93
|
- lib/server_scripts/executor/valgrind.rb
|
92
94
|
- lib/server_scripts/executor/vanilla.rb
|
93
95
|
- lib/server_scripts/experiment.rb
|
96
|
+
- lib/server_scripts/memory_monitor.rb
|
94
97
|
- lib/server_scripts/node_type.rb
|
95
98
|
- lib/server_scripts/parser.rb
|
96
99
|
- lib/server_scripts/parser/itac.rb
|
97
100
|
- lib/server_scripts/parser/starpu_profile.rb
|
101
|
+
- lib/server_scripts/parser/vtune.rb
|
102
|
+
- lib/server_scripts/parser/vtune/hotspots.rb
|
103
|
+
- lib/server_scripts/parser/vtune/hotspots/base.rb
|
104
|
+
- lib/server_scripts/parser/vtune/hotspots/threads.rb
|
105
|
+
- lib/server_scripts/parser/vtune/hotspots/threads/starpu.rb
|
98
106
|
- lib/server_scripts/version.rb
|
99
107
|
- server_scripts.gemspec
|
100
108
|
homepage: https://github.com/v0dro/server-scripts
|