server_scripts 0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f1fcfa7acf474ee5e2995af309c00516764836c4
4
- data.tar.gz: 3e3567be1f67c495143ecabe583be364235cadf1
3
+ metadata.gz: 4b72b201ae506a4e955c8b19ce8e0f3b1f3880cb
4
+ data.tar.gz: 3644cad87a968c5032e6265d371ac299a001f39d
5
5
  SHA512:
6
- metadata.gz: 8996a1076526a5e44e58b649ae8daf11a520ed7cecbc2c26bd1eff94f2c6a7109e609f3ce995c00d333895303a9c9e6c47242b440306a551e2b803d88701176d
7
- data.tar.gz: 99e4065047cba125b88baf6a8f4f7391f926a7b9909ba7136d395f4bc584f1e671c9f65ae5caea0ef2aa5e74076c8ff6637124c0acd13f416c0a357ab57a627e
6
+ metadata.gz: b72fdf3f6c2340e4599326191c7e37033d04ff4a7a93716401c21fe13bf4143aba8a214f1a63300874d7283335eaf3615abdd81d6fb61546b983ff2f0d405848
7
+ data.tar.gz: c29c3f522a6325caf5ee70e8ebde1a7edfd9ae62954871a4301b564ab9174a939b87fc3642e0f0204c909e60eb0f1622a0d7a3da16dffe8262f712f0e057a1b0
data/README.md CHANGED
@@ -6,6 +6,21 @@ The following functionality is provided:
6
6
  * Generate job scripts and run batch jobs on TSUBAME 3.0, ABCI and reedbush machines.
7
7
  * Parse various kinds of profiling files and generate meaningful output.
8
8
 
9
+ <!-- markdown-toc start - Don't edit this section. Run M-x markdown-toc-generate-toc again -->
10
+ **Table of Contents**
11
+
12
+ - [server-scripts](#server-scripts)
13
+ - [Usage](#usage)
14
+ - [ENV variables](#env-variables)
15
+ - [Writing job scripts](#writing-job-scripts)
16
+ - [Simple openMPI job script](#simple-openmpi-job-script)
17
+ - [Intel MPI profiling job script](#intel-mpi-profiling-job-script)
18
+ - [Parse intel ITAC output](#parse-intel-itac-output)
19
+ - [Parse starpu worker info](#parse-starpu-worker-info)
20
+ - [Usage](#usage)
21
+
22
+ <!-- markdown-toc end -->
23
+
9
24
  # Usage
10
25
 
11
26
  ## ENV variables
@@ -55,6 +70,8 @@ The intel ITAC tool can be helpful for generating traces of parallel MPI program
55
70
  This class can be used for converting an ITAC file to an ideal trace and then generating
56
71
  the function profile for obtaining things like the MPI wait time.
57
72
 
73
+ ### Usage
74
+
58
75
  For extracting the MPI wait time from an ITAC trace, do the following:
59
76
  ``` ruby
60
77
  require 'server_scripts'
@@ -72,4 +89,21 @@ puts itac.event_time("getrf_start", how: :per_proc, kind: :real)
72
89
 
73
90
  ## Parse starpu worker info
74
91
 
92
+ The `ServerScripts::Parser::StarpuProfile` class has various functions for parsing the
93
+ `*.starpu_profile` files that are generated by starpu with per-worker CPU execution info.
94
+ These can be batch-processed using server\_scripts by specifying a regex that will match the
95
+ profile for each process that produces it. You can either get per-worker or per-process
96
+ information from this.
75
97
 
98
+ ### Usage
99
+
100
+ ``` ruby
101
+ parser = Parser::StarpuProfile.new("test/artifacts/4_proc_profile_8_*.starpu_profile")
102
+
103
+ puts parser.total_time
104
+ puts parser.total_exec_time
105
+ puts parser.total_sleep_time
106
+ puts parser.total_overhead_time
107
+ puts parser.time(event: :total_time, proc_id: 0, worker_id: 4)
108
+ puts parser.proc_time event: :exec_time, proc_id: 2
109
+ ```
data/bin/mem_monitor ADDED
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'server_scripts'
4
+
5
+ if ARGV[0] == "--help"
6
+ puts "Usage:"
7
+ puts "\tmem_monitor [--pname=pname|--pid=pid] --duration=HH:MM:SS --interval(HH:MM:SS)"
8
+ exit
9
+ end
10
+
11
+ pname = nil
12
+ pid = nil
13
+ duration = nil
14
+ interval = nil
15
+ ofile = nil
16
+ ARGV.each do |arg|
17
+ pname = arg.match(/--pname=(\w+)/)[1] if arg.match(/--pname/)
18
+ pid = arg.match(/--pid=(\w+)/)[1].to_i if arg.match(/--pid/)
19
+ duration = arg.match(/--duration=(\w+:\w+:\w+)/)[1] if arg.match(/--duration/)
20
+ interval = arg.match(/--interval=(\w+:\w+:\w+)/)[1] if arg.match(/--interval/)
21
+ ofile = arg.match(/--output=(.*)/)[1] if arg.match(/--output/)
22
+ end
23
+
24
+ if !pname && pid
25
+ pname = `ps -p #{pid} -o comm=`.strip
26
+ end
27
+
28
+ if !pid && pname
29
+ pid = `pidof #{pname}`.strip
30
+ raise ArgumentError, "Process #{pname} does not exist."
31
+ end
32
+
33
+ duration = "00:00:30" if !duration
34
+ interval = "00:00:01" if !interval
35
+
36
+ puts "Using params:"
37
+ puts "\tPID: #{pid}"
38
+ puts "\tPNAME: #{pname}"
39
+ puts "\tDURATION: #{duration}"
40
+ puts "\tINTERVAL: #{interval}"
41
+ puts "\tOFILE: #{ofile}" if ofile
42
+
43
+ monitor = ServerScripts::MemoryMonitor.new(pid: pid, duration: duration,
44
+ interval: interval)
45
+ monitor.start!
46
+
47
+ if ofile
48
+ file = File.open ofile, "w+"
49
+ file << "VmRSS (MB): #{monitor.vmrss}\n"
50
+ file << "VmSize (MB): #{monitor.vmsize}\n"
51
+ file.close
52
+ else
53
+ puts "VmRSS (MB): #{monitor.vmrss}"
54
+ puts "VmSize (MB): #{monitor.vmsize}"
55
+ end
56
+
@@ -1,5 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require 'ptools'
3
+ require 'csv'
4
+
5
+ require 'server_scripts/version'
3
6
 
4
7
  require 'server_scripts/node_type'
5
8
  require 'server_scripts/executor'
@@ -7,7 +10,7 @@ require 'server_scripts/computer'
7
10
  require 'server_scripts/parser'
8
11
  require 'server_scripts/experiment'
9
12
  require 'server_scripts/batch_job'
10
- require 'server_scripts/version'
13
+ require 'server_scripts/memory_monitor'
11
14
 
12
15
  module ServerScripts
13
16
  class << self
@@ -0,0 +1,56 @@
1
+ module ServerScripts
2
+ class MemoryMonitor
3
+ attr_reader :pid
4
+ attr_reader :pname
5
+ attr_reader :duration_sec
6
+ attr_reader :interval_sec
7
+ attr_reader :vmrss
8
+ attr_reader :vmsize
9
+
10
+ def initialize(pid: nil, pname: nil, duration: "00:00:30", interval: "00:00:01")
11
+ @vmrss = []
12
+ @vmsize = []
13
+
14
+ if pname.nil? && pid
15
+ @pname = `ps -p #{pid} -o comm=`.strip
16
+ @pid = pid
17
+ end
18
+
19
+ if pid.nil? && pname
20
+ @pname = pname
21
+ @pid = `pidof #{pname}`.strip.to_i
22
+ end
23
+
24
+ parse_time_intervals duration, interval
25
+ end
26
+
27
+ def start!
28
+ @duration_sec.times do
29
+ file = File.open "/proc/#{@pid}/status"
30
+ file.each_line do |l|
31
+ rss = l.match(/VmRSS:\s+(\d+)\s+kB/)
32
+ @vmrss << (rss[1].to_i / 1e3) if rss
33
+
34
+ size = l.match(/VmSize:\s+(\d+)\s+kB/)
35
+ @vmsize << (size[1].to_i / 1e3) if size
36
+ end
37
+
38
+ sleep @interval_sec
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def parse_time_intervals dur, inter
45
+ hours, min, sec = get_hms(dur)
46
+ @duration_sec = hours * 3600 + min * 60 + sec
47
+ hours, min, sec = get_hms(inter)
48
+ @interval_sec = hours * 3600 + min * 60 + sec
49
+ end
50
+
51
+ def get_hms(time)
52
+ matcher = time.match(/(\d+):(\d+):(\d+)/)
53
+ [matcher[1].to_i, matcher[2].to_i, matcher[3].to_i]
54
+ end
55
+ end # class MemoryMonitor
56
+ end # module ServerScripts
@@ -1,2 +1,3 @@
1
1
  require 'server_scripts/parser/starpu_profile'
2
2
  require 'server_scripts/parser/itac'
3
+ require 'server_scripts/parser/vtune'
@@ -2,7 +2,9 @@ module ServerScripts
2
2
  module Parser
3
3
  class StarpuProfile
4
4
 
5
+ # The Hash containing the time records of various workers and processes.
5
6
  attr_reader :time_hash
7
+
6
8
  # Specify the regex that will allow finding the profile files for the given starpu
7
9
  # processes. Each process will output one file.
8
10
  #
@@ -13,24 +15,30 @@ module ServerScripts
13
15
  @regex = regex
14
16
  @time_hash = {}
15
17
  extract_data_from_profiles
18
+ raise ArgumentError, "could not find any starpu profiles." if @time_hash.empty?
16
19
  end
17
20
 
21
+ # Get the sum of total time in seconds summed over all processes and workers.
18
22
  def total_time
19
23
  extract_from_time_hash :total_time
20
24
  end
21
25
 
26
+ # Get the sum of exec total time in seconds summed over all processes and workers.
22
27
  def total_exec_time
23
28
  extract_from_time_hash :exec_time
24
29
  end
25
-
30
+
31
+ # Get the sum of sleep total time in seconds summed over all processes and workers.
26
32
  def total_sleep_time
27
33
  extract_from_time_hash :sleep_time
28
34
  end
29
35
 
36
+ # Get the sum of overhead total time in seconds summed over all processes and workers.
30
37
  def total_overhead_time
31
38
  extract_from_time_hash :overhead_time
32
39
  end
33
40
 
41
+ # Get the total time for an event summed over all the workers in a given process.
34
42
  def proc_time event:, proc_id:
35
43
  time = 0.0
36
44
  @time_hash[proc_id].each_value do |thread_info|
@@ -40,6 +48,11 @@ module ServerScripts
40
48
  time
41
49
  end
42
50
 
51
+ # Get the time in seconds for the given event, worker and process.
52
+ #
53
+ # :event can be one of :total_time, :exec_time, :sleep_time or :overhead_time.
54
+ # :proc_id should be a number specifying process number.
55
+ # :worker_id should be a number specifying the worker ID.
43
56
  def time event:, proc_id:, worker_id:
44
57
  @time_hash[proc_id][worker_id][event]
45
58
  end
@@ -68,8 +81,7 @@ module ServerScripts
68
81
  if ServerScripts.verbose
69
82
  puts "--------------------------------------"
70
83
  puts "Reading file #{fname}..."
71
- end
72
-
84
+ end
73
85
  proc_id = fname.match(@regex.gsub("*", "(\\d+)"))[1].to_i
74
86
  @time_hash[proc_id] = {}
75
87
  output = File.read(fname).split("\n")
@@ -0,0 +1 @@
1
+ require 'server_scripts/parser/vtune/hotspots'
@@ -0,0 +1,3 @@
1
+ require 'server_scripts/parser/vtune/hotspots/base'
2
+ require 'server_scripts/parser/vtune/hotspots/threads'
3
+ require 'server_scripts/parser/vtune/hotspots/threads/starpu'
@@ -0,0 +1,20 @@
1
+ module ServerScripts
2
+ module Parser
3
+ module VTune
4
+ module Hotspots
5
+ class Base
6
+ CPU_TIME = "CPU Time"
7
+ CPU_EFFECTIVE_TIME = "CPU Time:Effective Time"
8
+ CPU_OVERHEAD_TIME = "CPU Time:Overhead Time"
9
+ CPU_SPIN_TIME = "CPU Time:Spin Time"
10
+ WAIT_TIME = "Wait Time"
11
+
12
+ def initialize fname
13
+ @threads = {}
14
+ parse_csv! fname
15
+ end
16
+ end
17
+ end # class Base
18
+ end # module VTune
19
+ end # module Parser
20
+ end # module ServerScripts
@@ -0,0 +1,72 @@
1
+ module ServerScripts
2
+ module Parser
3
+ module VTune
4
+ # Classes for analysing files showing hotspot analysis.
5
+ module Hotspots
6
+ # Parse a file with a hotspots report and grouped by threads. This class
7
+ # is made for parsing things from a single node, multi threaded execution.
8
+ # CSV delimiter should be a comma.
9
+ #
10
+ # Example command:
11
+ # vtune -collect threading -report hotspots -group-by thread -csv-delimiter=, a.out 65536
12
+ class Threads < Base
13
+ # Get time for a particular event in a particular thread.
14
+ def time event:, tid:
15
+ @threads[tid][event]
16
+ end
17
+
18
+ # Sum of total CPU and wait time.
19
+ def total_time
20
+ total_cpu_time + total_wait_time
21
+ end
22
+
23
+ # Total CPU time of all the threads. Does not include the wait time.
24
+ def total_cpu_time
25
+ @total_cpu_time ||= parse_for_event(:cpu_time)
26
+ @total_cpu_time
27
+ end
28
+
29
+ # Total Effective CPU time.
30
+ def total_cpu_effective_time
31
+ @total_cpu_effective_time ||= parse_for_event(:cpu_effective_time)
32
+ @total_cpu_effective_time
33
+ end
34
+
35
+ # Total CPU overhead: sum of CPU Spin Time + CPU Overhead Time
36
+ def total_cpu_overhead_time
37
+ @total_cpu_overhead_time ||= parse_for_event(:cpu_overhead_time)
38
+ @total_cpu_overhead_time
39
+ end
40
+
41
+ # Total Wait Time.
42
+ def total_wait_time
43
+ @total_wait_time ||= parse_for_event(:wait_time)
44
+ @total_wait_time
45
+ end
46
+
47
+ private
48
+
49
+ def parse_for_event event
50
+ total = 0.0
51
+ @threads.each_value do |thread|
52
+ total += thread[event]
53
+ end
54
+ total
55
+ end
56
+
57
+ def parse_csv! fname
58
+ data = CSV.parse(File.read(fname), headers: true)
59
+ data.each_with_index do |row, i|
60
+ @threads[i] = {}
61
+ @threads[i][:cpu_time] = data[CPU_TIME][i].to_f
62
+ @threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
63
+ @threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f +
64
+ data[CPU_SPIN_TIME][i].to_f
65
+ @threads[i][:wait_time] = data[WAIT_TIME][i].to_f
66
+ end
67
+ end
68
+ end # class Threads
69
+ end # module Hotspots
70
+ end # class ITAC
71
+ end # module Parser
72
+ end # module ServerScripts
@@ -0,0 +1,22 @@
1
+ module ServerScripts
2
+ module Parser
3
+ module VTune
4
+ module Hotspots
5
+ class Starpu < Base
6
+ # Get time for a particular event of a particular worker, master thread
7
+ # or MPI thread. Specify :tid as :CPU_#ID for worker, :MPI for MPI thread,
8
+ # and :master for the task submission thread.
9
+ def time event:, tid:
10
+
11
+ end
12
+
13
+ private
14
+
15
+ def parse_csv! fname
16
+
17
+ end
18
+ end # class Starpu
19
+ end # module Hotspots
20
+ end # module VTune
21
+ end # module Parser
22
+ end # module ServerScripts
@@ -1,3 +1,3 @@
1
1
  module ServerScripts
2
- VERSION = "0.1"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -34,6 +34,8 @@ Gem::Specification.new do |spec|
34
34
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
35
35
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
36
36
  spec.require_paths = ["lib"]
37
+ spec.bindir = "bin"
38
+ spec.executables << "mem_monitor"
37
39
 
38
40
  spec.add_runtime_dependency 'ptools'
39
41
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: server_scripts
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-28 00:00:00.000000000 Z
11
+ date: 2020-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ptools
@@ -71,13 +71,15 @@ description: 'Easily write scripts for submitted jobs to various machines.
71
71
  '
72
72
  email:
73
73
  - sameer.deshmukh93@gmail.com
74
- executables: []
74
+ executables:
75
+ - mem_monitor
75
76
  extensions: []
76
77
  extra_rdoc_files: []
77
78
  files:
78
79
  - Gemfile
79
80
  - README.md
80
81
  - Rakefile
82
+ - bin/mem_monitor
81
83
  - lib/server_scripts.rb
82
84
  - lib/server_scripts/batch_job.rb
83
85
  - lib/server_scripts/computer.rb
@@ -91,10 +93,16 @@ files:
91
93
  - lib/server_scripts/executor/valgrind.rb
92
94
  - lib/server_scripts/executor/vanilla.rb
93
95
  - lib/server_scripts/experiment.rb
96
+ - lib/server_scripts/memory_monitor.rb
94
97
  - lib/server_scripts/node_type.rb
95
98
  - lib/server_scripts/parser.rb
96
99
  - lib/server_scripts/parser/itac.rb
97
100
  - lib/server_scripts/parser/starpu_profile.rb
101
+ - lib/server_scripts/parser/vtune.rb
102
+ - lib/server_scripts/parser/vtune/hotspots.rb
103
+ - lib/server_scripts/parser/vtune/hotspots/base.rb
104
+ - lib/server_scripts/parser/vtune/hotspots/threads.rb
105
+ - lib/server_scripts/parser/vtune/hotspots/threads/starpu.rb
98
106
  - lib/server_scripts/version.rb
99
107
  - server_scripts.gemspec
100
108
  homepage: https://github.com/v0dro/server-scripts