server_scripts 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74bc6e9790b0f8f43c099ebbf2bb7097ba078f58
|
4
|
+
data.tar.gz: 507e795a3d6f9baefd62276f71d0a44b64484fd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9c5b973a951b906699ccfc51d59b72fed77658cf267c22d2cbbcf3e40e71cce27ded005948888ca89f1a878a7b1ed33774d551168e539adbe1d85a4fb8674dd
|
7
|
+
data.tar.gz: 3b052b8fbc0f94477ed790642bf3c2eb7a536370e8a9e506e13b1b688c2498e05ea022971b8e2468e4a301a47a7036ce941472a1f903fb1cd19922806dcf51f7
|
data/README.md
CHANGED
@@ -66,6 +66,47 @@ like setting the ITAC and VTUNE output file/folder names.
|
|
66
66
|
|
67
67
|
## Parse intel VTune output
|
68
68
|
|
69
|
+
### Output chart of intel VTune
|
70
|
+
|
71
|
+
The way VTune classfies the output in the CSV is a little funny and should be
|
72
|
+
understood properly unless you want to have a hard time. The output can be said
|
73
|
+
to be classified as a tree that looks like so:
|
74
|
+
```
|
75
|
+
CPU Time
|
76
|
+
- Effective Time
|
77
|
+
- Idle
|
78
|
+
- Poor
|
79
|
+
- Ok
|
80
|
+
- Ideal
|
81
|
+
- Spin Time
|
82
|
+
- Imbalance or Serial Spinning
|
83
|
+
- Lock Contention
|
84
|
+
- MPI Busy Wait Time
|
85
|
+
- Other
|
86
|
+
- Overhead Time
|
87
|
+
- Scheduling
|
88
|
+
- Reduction
|
89
|
+
- Atomics
|
90
|
+
- Other
|
91
|
+
Wait Time
|
92
|
+
- Idle
|
93
|
+
- Poor
|
94
|
+
- Ok
|
95
|
+
- Ideal
|
96
|
+
- Over
|
97
|
+
Wait Count
|
98
|
+
PID
|
99
|
+
TID
|
100
|
+
```
|
101
|
+
The total time the sum of `CPU Time` and `Wait Time`.
|
102
|
+
|
103
|
+
### Usage
|
104
|
+
A sample program for parsing the firt 16 threads reported by the vtune command:
|
105
|
+
```
|
106
|
+
vtune -report hotspots -group-by thread -result-dir result_file.vtune \
|
107
|
+
-report-output result_thread_res.csv -csv-delimiter=,
|
108
|
+
```
|
109
|
+
|
69
110
|
``` ruby
|
70
111
|
parser = Parser::VTune::Hotspots::SLATE.new(
|
71
112
|
"test/artifacts/slate-two-proc-p1.csv", nthreads: 16)
|
@@ -7,7 +7,7 @@ module ServerScripts
|
|
7
7
|
CPU_EFFECTIVE_TIME = "CPU Time:Effective Time"
|
8
8
|
CPU_OVERHEAD_TIME = "CPU Time:Overhead Time"
|
9
9
|
CPU_SPIN_TIME = "CPU Time:Spin Time"
|
10
|
-
|
10
|
+
MPI_BUSY_WAIT_TIME = "CPU Time:Spin Time:MPI Busy Wait Time"
|
11
11
|
WAIT_TIME = "Wait Time"
|
12
12
|
|
13
13
|
def initialize fname
|
@@ -8,8 +8,11 @@ module ServerScripts
|
|
8
8
|
super(fname)
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
# Get the total time for all threads under the header
|
12
|
+
# "CPU Time:Spin Time:MPI Busy Wait Time". This time
|
13
|
+
# is included within "CPU Time:Spin Time".
|
14
|
+
def total_mpi_busy_wait_time
|
15
|
+
@total_mpi_busy_time ||= parse_for_event(:mpi_busy_wait_time)
|
13
16
|
@total_mpi_busy_time
|
14
17
|
end
|
15
18
|
|
@@ -22,10 +25,10 @@ module ServerScripts
|
|
22
25
|
@threads[i] = {}
|
23
26
|
@threads[i][:cpu_time] = data[CPU_TIME][i].to_f
|
24
27
|
@threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
|
25
|
-
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
26
|
-
|
28
|
+
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
29
|
+
@threads[i][:cpu_spin_time] = data[CPU_SPIN_TIME][i].to_f
|
27
30
|
@threads[i][:wait_time] = data[WAIT_TIME][i].to_f
|
28
|
-
@threads[i][:
|
31
|
+
@threads[i][:mpi_busy_wait_time] = data[MPI_BUSY_WAIT_TIME][i].to_f
|
29
32
|
end
|
30
33
|
end
|
31
34
|
end # class SLATE
|
@@ -32,11 +32,18 @@ module ServerScripts
|
|
32
32
|
@total_cpu_effective_time
|
33
33
|
end
|
34
34
|
|
35
|
-
# Total CPU overhead:
|
35
|
+
# Total CPU overhead: "CPU Time:Overhead Time"
|
36
36
|
def total_cpu_overhead_time
|
37
37
|
@total_cpu_overhead_time ||= parse_for_event(:cpu_overhead_time)
|
38
38
|
@total_cpu_overhead_time
|
39
39
|
end
|
40
|
+
|
41
|
+
# Total CPU Spin time: "CPU Time:Spin Time". This includes the MPI busy
|
42
|
+
# wait time, which for some reason is classified under this banner by vtune.
|
43
|
+
def total_cpu_spin_time
|
44
|
+
@total_cpu_spin_time ||= parse_for_event(:cpu_spin_time)
|
45
|
+
@total_cpu_spin_time
|
46
|
+
end
|
40
47
|
|
41
48
|
# Total Wait Time.
|
42
49
|
def total_wait_time
|
@@ -52,8 +59,8 @@ module ServerScripts
|
|
52
59
|
@threads[i] = {}
|
53
60
|
@threads[i][:cpu_time] = data[CPU_TIME][i].to_f
|
54
61
|
@threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
|
55
|
-
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
56
|
-
|
62
|
+
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
63
|
+
@threads[i][:cpu_spin_time] = data[CPU_SPIN_TIME][i].to_f
|
57
64
|
@threads[i][:wait_time] = data[WAIT_TIME][i].to_f
|
58
65
|
end
|
59
66
|
end
|