server_scripts 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -0
- data/bin/mem_monitor +56 -0
- data/lib/server_scripts.rb +4 -1
- data/lib/server_scripts/memory_monitor.rb +56 -0
- data/lib/server_scripts/parser.rb +1 -0
- data/lib/server_scripts/parser/starpu_profile.rb +15 -3
- data/lib/server_scripts/parser/vtune.rb +1 -0
- data/lib/server_scripts/parser/vtune/hotspots.rb +3 -0
- data/lib/server_scripts/parser/vtune/hotspots/base.rb +20 -0
- data/lib/server_scripts/parser/vtune/hotspots/threads.rb +72 -0
- data/lib/server_scripts/parser/vtune/hotspots/threads/starpu.rb +22 -0
- data/lib/server_scripts/version.rb +1 -1
- data/server_scripts.gemspec +2 -0
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b72b201ae506a4e955c8b19ce8e0f3b1f3880cb
|
4
|
+
data.tar.gz: 3644cad87a968c5032e6265d371ac299a001f39d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b72fdf3f6c2340e4599326191c7e37033d04ff4a7a93716401c21fe13bf4143aba8a214f1a63300874d7283335eaf3615abdd81d6fb61546b983ff2f0d405848
|
7
|
+
data.tar.gz: c29c3f522a6325caf5ee70e8ebde1a7edfd9ae62954871a4301b564ab9174a939b87fc3642e0f0204c909e60eb0f1622a0d7a3da16dffe8262f712f0e057a1b0
|
data/README.md
CHANGED
@@ -6,6 +6,21 @@ The following functionality is provided:
|
|
6
6
|
* Generate job scripts and run batch jobs on TSUBAME 3.0, ABCI and reedbush machines.
|
7
7
|
* Parse various kinds of profiling files and generate meaningful output.
|
8
8
|
|
9
|
+
<!-- markdown-toc start - Don't edit this section. Run M-x markdown-toc-generate-toc again -->
|
10
|
+
**Table of Contents**
|
11
|
+
|
12
|
+
- [server-scripts](#server-scripts)
|
13
|
+
- [Usage](#usage)
|
14
|
+
- [ENV variables](#env-variables)
|
15
|
+
- [Writing job scripts](#writing-job-scripts)
|
16
|
+
- [Simple openMPI job script](#simple-openmpi-job-script)
|
17
|
+
- [Intel MPI profiling job script](#intel-mpi-profiling-job-script)
|
18
|
+
- [Parse intel ITAC output](#parse-intel-itac-output)
|
19
|
+
- [Parse starpu worker info](#parse-starpu-worker-info)
|
20
|
+
- [Usage](#usage)
|
21
|
+
|
22
|
+
<!-- markdown-toc end -->
|
23
|
+
|
9
24
|
# Usage
|
10
25
|
|
11
26
|
## ENV variables
|
@@ -55,6 +70,8 @@ The intel ITAC tool can be helpful for generating traces of parallel MPI program
|
|
55
70
|
This class can be used for converting an ITAC file to an ideal trace and then generating
|
56
71
|
the function profile for obtaining things like the MPI wait time.
|
57
72
|
|
73
|
+
### Usage
|
74
|
+
|
58
75
|
For extracting the MPI wait time from an ITAC trace, do the following:
|
59
76
|
``` ruby
|
60
77
|
require 'server_scripts'
|
@@ -72,4 +89,21 @@ puts itac.event_time("getrf_start", how: :per_proc, kind: :real)
|
|
72
89
|
|
73
90
|
## Parse starpu worker info
|
74
91
|
|
92
|
+
The `ServerScripts::Parser::StarpuProfile` class has various functions for parsing the
|
93
|
+
`*.starpu_profile` files that are generated by starpu with per-worker CPU execution info.
|
94
|
+
These can be batch-processed using server\_scripts by specifying a regex that will match the
|
95
|
+
profile for each process that produces it. You can either get per-worker or per-process
|
96
|
+
information from this.
|
75
97
|
|
98
|
+
### Usage
|
99
|
+
|
100
|
+
``` ruby
|
101
|
+
parser = Parser::StarpuProfile.new("test/artifacts/4_proc_profile_8_*.starpu_profile")
|
102
|
+
|
103
|
+
puts parser.total_time
|
104
|
+
puts parser.total_exec_time
|
105
|
+
puts parser.total_sleep_time
|
106
|
+
puts parser.total_overhead_time
|
107
|
+
puts parser.time(event: :total_time, proc_id: 0, worker_id: 4)
|
108
|
+
puts parser.proc_time event: :exec_time, proc_id: 2
|
109
|
+
```
|
data/bin/mem_monitor
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'server_scripts'
|
4
|
+
|
5
|
+
if ARGV[0] == "--help"
|
6
|
+
puts "Usage:"
|
7
|
+
puts "\tmem_monitor [--pname=pname|--pid=pid] --duration=HH:MM:SS --interval(HH:MM:SS)"
|
8
|
+
exit
|
9
|
+
end
|
10
|
+
|
11
|
+
pname = nil
|
12
|
+
pid = nil
|
13
|
+
duration = nil
|
14
|
+
interval = nil
|
15
|
+
ofile = nil
|
16
|
+
ARGV.each do |arg|
|
17
|
+
pname = arg.match(/--pname=(\w+)/)[1] if arg.match(/--pname/)
|
18
|
+
pid = arg.match(/--pid=(\w+)/)[1].to_i if arg.match(/--pid/)
|
19
|
+
duration = arg.match(/--duration=(\w+:\w+:\w+)/)[1] if arg.match(/--duration/)
|
20
|
+
interval = arg.match(/--interval=(\w+:\w+:\w+)/)[1] if arg.match(/--interval/)
|
21
|
+
ofile = arg.match(/--output=(.*)/)[1] if arg.match(/--output/)
|
22
|
+
end
|
23
|
+
|
24
|
+
if !pname && pid
|
25
|
+
pname = `ps -p #{pid} -o comm=`.strip
|
26
|
+
end
|
27
|
+
|
28
|
+
if !pid && pname
|
29
|
+
pid = `pidof #{pname}`.strip
|
30
|
+
raise ArgumentError, "Process #{pname} does not exist."
|
31
|
+
end
|
32
|
+
|
33
|
+
duration = "00:00:30" if !duration
|
34
|
+
interval = "00:00:01" if !interval
|
35
|
+
|
36
|
+
puts "Using params:"
|
37
|
+
puts "\tPID: #{pid}"
|
38
|
+
puts "\tPNAME: #{pname}"
|
39
|
+
puts "\tDURATION: #{duration}"
|
40
|
+
puts "\tINTERVAL: #{interval}"
|
41
|
+
puts "\tOFILE: #{ofile}" if ofile
|
42
|
+
|
43
|
+
monitor = ServerScripts::MemoryMonitor.new(pid: pid, duration: duration,
|
44
|
+
interval: interval)
|
45
|
+
monitor.start!
|
46
|
+
|
47
|
+
if ofile
|
48
|
+
file = File.open ofile, "w+"
|
49
|
+
file << "VmRSS (MB): #{monitor.vmrss}\n"
|
50
|
+
file << "VmSize (MB): #{monitor.vmsize}\n"
|
51
|
+
file.close
|
52
|
+
else
|
53
|
+
puts "VmRSS (MB): #{monitor.vmrss}"
|
54
|
+
puts "VmSize (MB): #{monitor.vmsize}"
|
55
|
+
end
|
56
|
+
|
data/lib/server_scripts.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require 'ptools'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
require 'server_scripts/version'
|
3
6
|
|
4
7
|
require 'server_scripts/node_type'
|
5
8
|
require 'server_scripts/executor'
|
@@ -7,7 +10,7 @@ require 'server_scripts/computer'
|
|
7
10
|
require 'server_scripts/parser'
|
8
11
|
require 'server_scripts/experiment'
|
9
12
|
require 'server_scripts/batch_job'
|
10
|
-
require 'server_scripts/
|
13
|
+
require 'server_scripts/memory_monitor'
|
11
14
|
|
12
15
|
module ServerScripts
|
13
16
|
class << self
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
class MemoryMonitor
|
3
|
+
attr_reader :pid
|
4
|
+
attr_reader :pname
|
5
|
+
attr_reader :duration_sec
|
6
|
+
attr_reader :interval_sec
|
7
|
+
attr_reader :vmrss
|
8
|
+
attr_reader :vmsize
|
9
|
+
|
10
|
+
def initialize(pid: nil, pname: nil, duration: "00:00:30", interval: "00:00:01")
|
11
|
+
@vmrss = []
|
12
|
+
@vmsize = []
|
13
|
+
|
14
|
+
if pname.nil? && pid
|
15
|
+
@pname = `ps -p #{pid} -o comm=`.strip
|
16
|
+
@pid = pid
|
17
|
+
end
|
18
|
+
|
19
|
+
if pid.nil? && pname
|
20
|
+
@pname = pname
|
21
|
+
@pid = `pidof #{pname}`.strip.to_i
|
22
|
+
end
|
23
|
+
|
24
|
+
parse_time_intervals duration, interval
|
25
|
+
end
|
26
|
+
|
27
|
+
def start!
|
28
|
+
@duration_sec.times do
|
29
|
+
file = File.open "/proc/#{@pid}/status"
|
30
|
+
file.each_line do |l|
|
31
|
+
rss = l.match(/VmRSS:\s+(\d+)\s+kB/)
|
32
|
+
@vmrss << (rss[1].to_i / 1e3) if rss
|
33
|
+
|
34
|
+
size = l.match(/VmSize:\s+(\d+)\s+kB/)
|
35
|
+
@vmsize << (size[1].to_i / 1e3) if size
|
36
|
+
end
|
37
|
+
|
38
|
+
sleep @interval_sec
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def parse_time_intervals dur, inter
|
45
|
+
hours, min, sec = get_hms(dur)
|
46
|
+
@duration_sec = hours * 3600 + min * 60 + sec
|
47
|
+
hours, min, sec = get_hms(inter)
|
48
|
+
@interval_sec = hours * 3600 + min * 60 + sec
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_hms(time)
|
52
|
+
matcher = time.match(/(\d+):(\d+):(\d+)/)
|
53
|
+
[matcher[1].to_i, matcher[2].to_i, matcher[3].to_i]
|
54
|
+
end
|
55
|
+
end # class MemoryMonitor
|
56
|
+
end # module ServerScripts
|
@@ -2,7 +2,9 @@ module ServerScripts
|
|
2
2
|
module Parser
|
3
3
|
class StarpuProfile
|
4
4
|
|
5
|
+
# The Hash containing the time records of various workers and processes.
|
5
6
|
attr_reader :time_hash
|
7
|
+
|
6
8
|
# Specify the regex that will allow finding the profile files for the given starpu
|
7
9
|
# processes. Each process will output one file.
|
8
10
|
#
|
@@ -13,24 +15,30 @@ module ServerScripts
|
|
13
15
|
@regex = regex
|
14
16
|
@time_hash = {}
|
15
17
|
extract_data_from_profiles
|
18
|
+
raise ArgumentError, "could not find any starpu profiles." if @time_hash.empty?
|
16
19
|
end
|
17
20
|
|
21
|
+
# Get the sum of total time in seconds summed over all processes and workers.
|
18
22
|
def total_time
|
19
23
|
extract_from_time_hash :total_time
|
20
24
|
end
|
21
25
|
|
26
|
+
# Get the sum of exec total time in seconds summed over all processes and workers.
|
22
27
|
def total_exec_time
|
23
28
|
extract_from_time_hash :exec_time
|
24
29
|
end
|
25
|
-
|
30
|
+
|
31
|
+
# Get the sum of sleep total time in seconds summed over all processes and workers.
|
26
32
|
def total_sleep_time
|
27
33
|
extract_from_time_hash :sleep_time
|
28
34
|
end
|
29
35
|
|
36
|
+
# Get the sum of overhead total time in seconds summed over all processes and workers.
|
30
37
|
def total_overhead_time
|
31
38
|
extract_from_time_hash :overhead_time
|
32
39
|
end
|
33
40
|
|
41
|
+
# Get the total time for an event summed over all the workers in a given process.
|
34
42
|
def proc_time event:, proc_id:
|
35
43
|
time = 0.0
|
36
44
|
@time_hash[proc_id].each_value do |thread_info|
|
@@ -40,6 +48,11 @@ module ServerScripts
|
|
40
48
|
time
|
41
49
|
end
|
42
50
|
|
51
|
+
# Get the time in seconds for the given event, worker and process.
|
52
|
+
#
|
53
|
+
# :event can be one of :total_time, :exec_time, :sleep_time or :overhead_time.
|
54
|
+
# :proc_id should be a number specifying process number.
|
55
|
+
# :worker_id should be a number specifying the worker ID.
|
43
56
|
def time event:, proc_id:, worker_id:
|
44
57
|
@time_hash[proc_id][worker_id][event]
|
45
58
|
end
|
@@ -68,8 +81,7 @@ module ServerScripts
|
|
68
81
|
if ServerScripts.verbose
|
69
82
|
puts "--------------------------------------"
|
70
83
|
puts "Reading file #{fname}..."
|
71
|
-
end
|
72
|
-
|
84
|
+
end
|
73
85
|
proc_id = fname.match(@regex.gsub("*", "(\\d+)"))[1].to_i
|
74
86
|
@time_hash[proc_id] = {}
|
75
87
|
output = File.read(fname).split("\n")
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'server_scripts/parser/vtune/hotspots'
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
module Parser
|
3
|
+
module VTune
|
4
|
+
module Hotspots
|
5
|
+
class Base
|
6
|
+
CPU_TIME = "CPU Time"
|
7
|
+
CPU_EFFECTIVE_TIME = "CPU Time:Effective Time"
|
8
|
+
CPU_OVERHEAD_TIME = "CPU Time:Overhead Time"
|
9
|
+
CPU_SPIN_TIME = "CPU Time:Spin Time"
|
10
|
+
WAIT_TIME = "Wait Time"
|
11
|
+
|
12
|
+
def initialize fname
|
13
|
+
@threads = {}
|
14
|
+
parse_csv! fname
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end # class Base
|
18
|
+
end # module VTune
|
19
|
+
end # module Parser
|
20
|
+
end # module ServerScripts
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
module Parser
|
3
|
+
module VTune
|
4
|
+
# Classes for analysing files showing hotspot analysis.
|
5
|
+
module Hotspots
|
6
|
+
# Parse a file with a hotspots report and grouped by threads. This class
|
7
|
+
# is made for parsing things from a single node, multi threaded execution.
|
8
|
+
# CSV delimiter should be a comma.
|
9
|
+
#
|
10
|
+
# Example command:
|
11
|
+
# vtune -collect threading -report hotspots -group-by thread -csv-delimiter=, a.out 65536
|
12
|
+
class Threads < Base
|
13
|
+
# Get time for a particular event in a particular thread.
|
14
|
+
def time event:, tid:
|
15
|
+
@threads[tid][event]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Sum of total CPU and wait time.
|
19
|
+
def total_time
|
20
|
+
total_cpu_time + total_wait_time
|
21
|
+
end
|
22
|
+
|
23
|
+
# Total CPU time of all the threads. Does not include the wait time.
|
24
|
+
def total_cpu_time
|
25
|
+
@total_cpu_time ||= parse_for_event(:cpu_time)
|
26
|
+
@total_cpu_time
|
27
|
+
end
|
28
|
+
|
29
|
+
# Total Effective CPU time.
|
30
|
+
def total_cpu_effective_time
|
31
|
+
@total_cpu_effective_time ||= parse_for_event(:cpu_effective_time)
|
32
|
+
@total_cpu_effective_time
|
33
|
+
end
|
34
|
+
|
35
|
+
# Total CPU overhead: sum of CPU Spin Time + CPU Overhead Time
|
36
|
+
def total_cpu_overhead_time
|
37
|
+
@total_cpu_overhead_time ||= parse_for_event(:cpu_overhead_time)
|
38
|
+
@total_cpu_overhead_time
|
39
|
+
end
|
40
|
+
|
41
|
+
# Total Wait Time.
|
42
|
+
def total_wait_time
|
43
|
+
@total_wait_time ||= parse_for_event(:wait_time)
|
44
|
+
@total_wait_time
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def parse_for_event event
|
50
|
+
total = 0.0
|
51
|
+
@threads.each_value do |thread|
|
52
|
+
total += thread[event]
|
53
|
+
end
|
54
|
+
total
|
55
|
+
end
|
56
|
+
|
57
|
+
def parse_csv! fname
|
58
|
+
data = CSV.parse(File.read(fname), headers: true)
|
59
|
+
data.each_with_index do |row, i|
|
60
|
+
@threads[i] = {}
|
61
|
+
@threads[i][:cpu_time] = data[CPU_TIME][i].to_f
|
62
|
+
@threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
|
63
|
+
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f +
|
64
|
+
data[CPU_SPIN_TIME][i].to_f
|
65
|
+
@threads[i][:wait_time] = data[WAIT_TIME][i].to_f
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end # class Threads
|
69
|
+
end # module Hotspots
|
70
|
+
end # class ITAC
|
71
|
+
end # module Parser
|
72
|
+
end # module ServerScripts
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ServerScripts
|
2
|
+
module Parser
|
3
|
+
module VTune
|
4
|
+
module Hotspots
|
5
|
+
class Starpu < Base
|
6
|
+
# Get time for a particular event of a particular worker, master thread
|
7
|
+
# or MPI thread. Specify :tid as :CPU_#ID for worker, :MPI for MPI thread,
|
8
|
+
# and :master for the task submission thread.
|
9
|
+
def time event:, tid:
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def parse_csv! fname
|
16
|
+
|
17
|
+
end
|
18
|
+
end # class Starpu
|
19
|
+
end # module Hotspots
|
20
|
+
end # module VTune
|
21
|
+
end # module Parser
|
22
|
+
end # module ServerScripts
|
data/server_scripts.gemspec
CHANGED
@@ -34,6 +34,8 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
35
35
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
36
36
|
spec.require_paths = ["lib"]
|
37
|
+
spec.bindir = "bin"
|
38
|
+
spec.executables << "mem_monitor"
|
37
39
|
|
38
40
|
spec.add_runtime_dependency 'ptools'
|
39
41
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: server_scripts
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sameer Deshmukh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ptools
|
@@ -71,13 +71,15 @@ description: 'Easily write scripts for submitted jobs to various machines.
|
|
71
71
|
'
|
72
72
|
email:
|
73
73
|
- sameer.deshmukh93@gmail.com
|
74
|
-
executables:
|
74
|
+
executables:
|
75
|
+
- mem_monitor
|
75
76
|
extensions: []
|
76
77
|
extra_rdoc_files: []
|
77
78
|
files:
|
78
79
|
- Gemfile
|
79
80
|
- README.md
|
80
81
|
- Rakefile
|
82
|
+
- bin/mem_monitor
|
81
83
|
- lib/server_scripts.rb
|
82
84
|
- lib/server_scripts/batch_job.rb
|
83
85
|
- lib/server_scripts/computer.rb
|
@@ -91,10 +93,16 @@ files:
|
|
91
93
|
- lib/server_scripts/executor/valgrind.rb
|
92
94
|
- lib/server_scripts/executor/vanilla.rb
|
93
95
|
- lib/server_scripts/experiment.rb
|
96
|
+
- lib/server_scripts/memory_monitor.rb
|
94
97
|
- lib/server_scripts/node_type.rb
|
95
98
|
- lib/server_scripts/parser.rb
|
96
99
|
- lib/server_scripts/parser/itac.rb
|
97
100
|
- lib/server_scripts/parser/starpu_profile.rb
|
101
|
+
- lib/server_scripts/parser/vtune.rb
|
102
|
+
- lib/server_scripts/parser/vtune/hotspots.rb
|
103
|
+
- lib/server_scripts/parser/vtune/hotspots/base.rb
|
104
|
+
- lib/server_scripts/parser/vtune/hotspots/threads.rb
|
105
|
+
- lib/server_scripts/parser/vtune/hotspots/threads/starpu.rb
|
98
106
|
- lib/server_scripts/version.rb
|
99
107
|
- server_scripts.gemspec
|
100
108
|
homepage: https://github.com/v0dro/server-scripts
|