server_scripts 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74bc6e9790b0f8f43c099ebbf2bb7097ba078f58
|
4
|
+
data.tar.gz: 507e795a3d6f9baefd62276f71d0a44b64484fd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9c5b973a951b906699ccfc51d59b72fed77658cf267c22d2cbbcf3e40e71cce27ded005948888ca89f1a878a7b1ed33774d551168e539adbe1d85a4fb8674dd
|
7
|
+
data.tar.gz: 3b052b8fbc0f94477ed790642bf3c2eb7a536370e8a9e506e13b1b688c2498e05ea022971b8e2468e4a301a47a7036ce941472a1f903fb1cd19922806dcf51f7
|
data/README.md
CHANGED
@@ -66,6 +66,47 @@ like setting the ITAC and VTUNE output file/folder names.
|
|
66
66
|
|
67
67
|
## Parse intel VTune output
|
68
68
|
|
69
|
+
### Output chart of intel VTune
|
70
|
+
|
71
|
+
The way VTune classfies the output in the CSV is a little funny and should be
|
72
|
+
understood properly unless you want to have a hard time. The output can be said
|
73
|
+
to be classified as a tree that looks like so:
|
74
|
+
```
|
75
|
+
CPU Time
|
76
|
+
- Effective Time
|
77
|
+
- Idle
|
78
|
+
- Poor
|
79
|
+
- Ok
|
80
|
+
- Ideal
|
81
|
+
- Spin Time
|
82
|
+
- Imbalance or Serial Spinning
|
83
|
+
- Lock Contention
|
84
|
+
- MPI Busy Wait Time
|
85
|
+
- Other
|
86
|
+
- Overhead Time
|
87
|
+
- Scheduling
|
88
|
+
- Reduction
|
89
|
+
- Atomics
|
90
|
+
- Other
|
91
|
+
Wait Time
|
92
|
+
- Idle
|
93
|
+
- Poor
|
94
|
+
- Ok
|
95
|
+
- Ideal
|
96
|
+
- Over
|
97
|
+
Wait Count
|
98
|
+
PID
|
99
|
+
TID
|
100
|
+
```
|
101
|
+
The total time the sum of `CPU Time` and `Wait Time`.
|
102
|
+
|
103
|
+
### Usage
|
104
|
+
A sample program for parsing the firt 16 threads reported by the vtune command:
|
105
|
+
```
|
106
|
+
vtune -report hotspots -group-by thread -result-dir result_file.vtune \
|
107
|
+
-report-output result_thread_res.csv -csv-delimiter=,
|
108
|
+
```
|
109
|
+
|
69
110
|
``` ruby
|
70
111
|
parser = Parser::VTune::Hotspots::SLATE.new(
|
71
112
|
"test/artifacts/slate-two-proc-p1.csv", nthreads: 16)
|
@@ -7,7 +7,7 @@ module ServerScripts
|
|
7
7
|
CPU_EFFECTIVE_TIME = "CPU Time:Effective Time"
|
8
8
|
CPU_OVERHEAD_TIME = "CPU Time:Overhead Time"
|
9
9
|
CPU_SPIN_TIME = "CPU Time:Spin Time"
|
10
|
-
|
10
|
+
MPI_BUSY_WAIT_TIME = "CPU Time:Spin Time:MPI Busy Wait Time"
|
11
11
|
WAIT_TIME = "Wait Time"
|
12
12
|
|
13
13
|
def initialize fname
|
@@ -8,8 +8,11 @@ module ServerScripts
|
|
8
8
|
super(fname)
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
# Get the total time for all threads under the header
|
12
|
+
# "CPU Time:Spin Time:MPI Busy Wait Time". This time
|
13
|
+
# is included within "CPU Time:Spin Time".
|
14
|
+
def total_mpi_busy_wait_time
|
15
|
+
@total_mpi_busy_time ||= parse_for_event(:mpi_busy_wait_time)
|
13
16
|
@total_mpi_busy_time
|
14
17
|
end
|
15
18
|
|
@@ -22,10 +25,10 @@ module ServerScripts
|
|
22
25
|
@threads[i] = {}
|
23
26
|
@threads[i][:cpu_time] = data[CPU_TIME][i].to_f
|
24
27
|
@threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
|
25
|
-
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
26
|
-
|
28
|
+
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
29
|
+
@threads[i][:cpu_spin_time] = data[CPU_SPIN_TIME][i].to_f
|
27
30
|
@threads[i][:wait_time] = data[WAIT_TIME][i].to_f
|
28
|
-
@threads[i][:
|
31
|
+
@threads[i][:mpi_busy_wait_time] = data[MPI_BUSY_WAIT_TIME][i].to_f
|
29
32
|
end
|
30
33
|
end
|
31
34
|
end # class SLATE
|
@@ -32,11 +32,18 @@ module ServerScripts
|
|
32
32
|
@total_cpu_effective_time
|
33
33
|
end
|
34
34
|
|
35
|
-
# Total CPU overhead:
|
35
|
+
# Total CPU overhead: "CPU Time:Overhead Time"
|
36
36
|
def total_cpu_overhead_time
|
37
37
|
@total_cpu_overhead_time ||= parse_for_event(:cpu_overhead_time)
|
38
38
|
@total_cpu_overhead_time
|
39
39
|
end
|
40
|
+
|
41
|
+
# Total CPU Spin time: "CPU Time:Spin Time". This includes the MPI busy
|
42
|
+
# wait time, which for some reason is classified under this banner by vtune.
|
43
|
+
def total_cpu_spin_time
|
44
|
+
@total_cpu_spin_time ||= parse_for_event(:cpu_spin_time)
|
45
|
+
@total_cpu_spin_time
|
46
|
+
end
|
40
47
|
|
41
48
|
# Total Wait Time.
|
42
49
|
def total_wait_time
|
@@ -52,8 +59,8 @@ module ServerScripts
|
|
52
59
|
@threads[i] = {}
|
53
60
|
@threads[i][:cpu_time] = data[CPU_TIME][i].to_f
|
54
61
|
@threads[i][:cpu_effective_time] = data[CPU_EFFECTIVE_TIME][i].to_f
|
55
|
-
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
56
|
-
|
62
|
+
@threads[i][:cpu_overhead_time] = data[CPU_OVERHEAD_TIME][i].to_f
|
63
|
+
@threads[i][:cpu_spin_time] = data[CPU_SPIN_TIME][i].to_f
|
57
64
|
@threads[i][:wait_time] = data[WAIT_TIME][i].to_f
|
58
65
|
end
|
59
66
|
end
|