ruby-prof 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/bin/ruby-prof-check-trace +45 -45
- data/docs/advanced-usage.md +132 -132
- data/docs/best-practices.md +27 -27
- data/docs/getting-started.md +130 -130
- data/docs/index.md +45 -45
- data/docs/profiling-rails.md +64 -64
- data/docs/public/examples/generate_reports.rb +92 -92
- data/docs/public/examples/reports/call_stack.html +835 -835
- data/docs/public/examples/reports/graph.html +1319 -1319
- data/docs/reports.md +150 -150
- data/lib/ruby-prof/version.rb +1 -1
- data/ruby-prof.gemspec +66 -66
- data/test/call_tree_builder.rb +126 -126
- data/test/exceptions_test.rb +24 -24
- data/test/marshal_test.rb +144 -144
- data/test/printer_call_stack_test.rb +28 -28
- data/test/printer_flame_graph_test.rb +82 -82
- data/test/printer_flat_test.rb +99 -99
- data/test/printer_graph_html_test.rb +62 -62
- data/test/printer_graph_test.rb +42 -42
- data/test/printers_test.rb +162 -162
- data/test/printing_recursive_graph_test.rb +81 -81
- data/test/profile_test.rb +101 -101
- data/test/rack_test.rb +103 -103
- data/test/scheduler.rb +367 -367
- data/test/singleton_test.rb +39 -39
- data/test/thread_test.rb +229 -229
- data/test/yarv_test.rb +56 -56
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c521f29e398d026fbad89897f5582d24c37c022fe06271fa36d357d56afeff4c
|
|
4
|
+
data.tar.gz: a951daae0d9c766ab6be88ce4e9b4f5eb1b655f7d24609310ff49ca3d19d3404
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bfa23414f2167fc919906f1d337d63ecec04fe90879b6c9276a91677eb256ccd2058fb2074350f02c8136001ad7ff7764ffcdf50526845f6d6d26912bc731e13
|
|
7
|
+
data.tar.gz: 94c62c4e9f1576bf8420d8759d0ebfa5b5c536b6bc0366bb2fa8227fc4777af529f111929c4537dd6193ff1fd7e09b0b7ae2ece424e6ddb24adca8ddeab5911c
|
data/CHANGELOG.md
CHANGED
data/bin/ruby-prof-check-trace
CHANGED
|
@@ -1,45 +1,45 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
stacks = Hash.new{|h,k| h[k] = Hash.new{|h,k| h[k] = []}}
|
|
4
|
-
i = 0
|
|
5
|
-
File.open(ARGV[0]).each_line do |l|
|
|
6
|
-
i += 1
|
|
7
|
-
unless l =~ /^(\d+):(\d+): *\d+ms *([^ ]+) *(.*): *(\d+) *(.+)$/
|
|
8
|
-
next if l =~/^ *$/
|
|
9
|
-
puts "line doesn't match: #{l}"
|
|
10
|
-
next
|
|
11
|
-
end
|
|
12
|
-
details = $1.to_i, $2.to_i, $3, $4, $5.to_i, $6
|
|
13
|
-
thread, fiber, event, file, line, method = *details
|
|
14
|
-
# puts method
|
|
15
|
-
stack = stacks[thread][fiber]
|
|
16
|
-
case event
|
|
17
|
-
when 'call', 'c-call'
|
|
18
|
-
stack << method
|
|
19
|
-
when 'return', 'c-return'
|
|
20
|
-
last_method = stack.pop
|
|
21
|
-
if last_method != method
|
|
22
|
-
puts "LINE #{i}: return event without call: #{method}"
|
|
23
|
-
puts "STACK: #{stack.inspect}"
|
|
24
|
-
if stack.find(method)
|
|
25
|
-
puts "fixing stack"
|
|
26
|
-
while (popped = stack.pop) && (popped != method)
|
|
27
|
-
puts "popped #{popped}"
|
|
28
|
-
end
|
|
29
|
-
else
|
|
30
|
-
raise "stack unfixable"
|
|
31
|
-
end
|
|
32
|
-
# stack << last_method
|
|
33
|
-
end
|
|
34
|
-
when 'line'
|
|
35
|
-
last_method = stack[-1]
|
|
36
|
-
if last_method != method
|
|
37
|
-
unless stack.find(method)
|
|
38
|
-
raise "LINE #{i}: line event without call: #{method}"
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
else
|
|
42
|
-
puts "unkown event"
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
puts stacks.inspect
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
stacks = Hash.new{|h,k| h[k] = Hash.new{|h,k| h[k] = []}}
|
|
4
|
+
i = 0
|
|
5
|
+
File.open(ARGV[0]).each_line do |l|
|
|
6
|
+
i += 1
|
|
7
|
+
unless l =~ /^(\d+):(\d+): *\d+ms *([^ ]+) *(.*): *(\d+) *(.+)$/
|
|
8
|
+
next if l =~/^ *$/
|
|
9
|
+
puts "line doesn't match: #{l}"
|
|
10
|
+
next
|
|
11
|
+
end
|
|
12
|
+
details = $1.to_i, $2.to_i, $3, $4, $5.to_i, $6
|
|
13
|
+
thread, fiber, event, file, line, method = *details
|
|
14
|
+
# puts method
|
|
15
|
+
stack = stacks[thread][fiber]
|
|
16
|
+
case event
|
|
17
|
+
when 'call', 'c-call'
|
|
18
|
+
stack << method
|
|
19
|
+
when 'return', 'c-return'
|
|
20
|
+
last_method = stack.pop
|
|
21
|
+
if last_method != method
|
|
22
|
+
puts "LINE #{i}: return event without call: #{method}"
|
|
23
|
+
puts "STACK: #{stack.inspect}"
|
|
24
|
+
if stack.find(method)
|
|
25
|
+
puts "fixing stack"
|
|
26
|
+
while (popped = stack.pop) && (popped != method)
|
|
27
|
+
puts "popped #{popped}"
|
|
28
|
+
end
|
|
29
|
+
else
|
|
30
|
+
raise "stack unfixable"
|
|
31
|
+
end
|
|
32
|
+
# stack << last_method
|
|
33
|
+
end
|
|
34
|
+
when 'line'
|
|
35
|
+
last_method = stack[-1]
|
|
36
|
+
if last_method != method
|
|
37
|
+
unless stack.find(method)
|
|
38
|
+
raise "LINE #{i}: line event without call: #{method}"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
else
|
|
42
|
+
puts "unkown event"
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
puts stacks.inspect
|
data/docs/advanced-usage.md
CHANGED
|
@@ -1,132 +1,132 @@
|
|
|
1
|
-
# Advanced Usage
|
|
2
|
-
|
|
3
|
-
This section describes advanced usage of ruby-prof. Additional documentation for every class is also [available](index.md#api-documentation). For workflow guidance, see [Best Practices](best-practices.md).
|
|
4
|
-
|
|
5
|
-
## Profiling Options
|
|
6
|
-
|
|
7
|
-
ruby-prof understands the following options when profiling code:
|
|
8
|
-
|
|
9
|
-
**measure_mode** - What ruby-prof should measure. For more information see the [Measurement Mode](#measurement-mode) section.
|
|
10
|
-
|
|
11
|
-
**track_allocations** - Tracks each object location, including the object class and source file location. For more information see the [Allocation Tracking](#allocation-tracking) section.
|
|
12
|
-
|
|
13
|
-
**exclude_threads** - Array of threads which should not be profiled. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
|
|
14
|
-
|
|
15
|
-
**include_threads** - Array of threads which should be profiled. All other threads will be ignored. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
|
|
16
|
-
|
|
17
|
-
**allow_exceptions** - Whether to raise exceptions encountered during profiling, or to suppress them. Defaults to false.
|
|
18
|
-
|
|
19
|
-
**exclude_common** - Automatically calls `exclude_common_methods!` to exclude commonly cluttering methods. Defaults to false. For more information see the [Method Exclusion](#method-exclusion) section.
|
|
20
|
-
|
|
21
|
-
## Measurement Mode
|
|
22
|
-
|
|
23
|
-
The measurement mode determines what ruby-prof measures when profiling code. Supported measurements are:
|
|
24
|
-
|
|
25
|
-
### Wall Time
|
|
26
|
-
|
|
27
|
-
Wall time measures the real-world time elapsed between any two moments in seconds. If there are other processes concurrently running on the system that use significant CPU or disk time during a profiling run then the reported results will be larger than expected. On Windows, wall time is measured using `QueryPerformanceCounter` and on other platforms by `clock_gettime(CLOCK_MONOTONIC)`. Use `RubyProf::WALL_TIME` to select this mode.
|
|
28
|
-
|
|
29
|
-
### Process Time
|
|
30
|
-
|
|
31
|
-
Process time measures the time used by a process between any two moments in seconds. It is unaffected by other processes concurrently running on the system. Remember with process time that calls to methods like sleep will not be included in profiling results. On Windows, process time is measured using `GetProcessTimes` and on other platforms by `clock_gettime`. Use `RubyProf::PROCESS_TIME` to select this mode.
|
|
32
|
-
|
|
33
|
-
### Object Allocations
|
|
34
|
-
|
|
35
|
-
Object allocations measures how many objects each method in a program allocates. Measurements are done via Ruby's `RUBY_INTERNAL_EVENT_NEWOBJ` trace event, counting each new object created (excluding internal `T_IMEMO` objects). Use `RubyProf::ALLOCATIONS` to select this mode.
|
|
36
|
-
|
|
37
|
-
To set the measurement mode:
|
|
38
|
-
|
|
39
|
-
```ruby
|
|
40
|
-
profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME)
|
|
41
|
-
profile = RubyProf::Profile.new(measure_mode: RubyProf::PROCESS_TIME)
|
|
42
|
-
profile = RubyProf::Profile.new(measure_mode: RubyProf::ALLOCATIONS)
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
The default value is `RubyProf::WALL_TIME`. You may also specify the measure mode by using the `RUBY_PROF_MEASURE_MODE` environment variable:
|
|
46
|
-
|
|
47
|
-
```
|
|
48
|
-
export RUBY_PROF_MEASURE_MODE=wall
|
|
49
|
-
export RUBY_PROF_MEASURE_MODE=process
|
|
50
|
-
export RUBY_PROF_MEASURE_MODE=allocations
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## Allocation Tracking
|
|
54
|
-
|
|
55
|
-
ruby-prof also has the ability to track object allocations. This functionality can be turned on via the track_allocations option:
|
|
56
|
-
|
|
57
|
-
```ruby
|
|
58
|
-
require 'ruby-prof'
|
|
59
|
-
|
|
60
|
-
RubyProf::Profile.profile(track_allocations: true) do
|
|
61
|
-
...
|
|
62
|
-
end
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
Note the `RubyProf::ALLOCATIONS` measure mode is slightly different than tracking allocations. The measurement mode provides high level information about the number of allocations performed in each method. In contrast, tracking allocations provides detailed information about allocation type, count, and source location. Currently, to see allocations results you must use the `RubyProf::GraphHtmlPrinter`.
|
|
66
|
-
|
|
67
|
-
## Thread Inclusion/Exclusion
|
|
68
|
-
|
|
69
|
-
ruby-prof can profile multiple threads. Sometimes this can be overwhelming. For example, assume you want to determine why your tests are running slowly. If you are using minitest, it can run tests in parallel by spawning worker threads (to force a single worker, set `N=0` when running tests). Thus, ruby-prof provides two options to specify which threads should be profiled:
|
|
70
|
-
|
|
71
|
-
**exclude_threads** - Array of threads which should not be profiled.
|
|
72
|
-
|
|
73
|
-
**include_threads** - Array of threads which should be profiled. All other threads will be ignored.
|
|
74
|
-
|
|
75
|
-
## Method Exclusion
|
|
76
|
-
|
|
77
|
-
ruby-prof supports excluding specific methods and threads from profiling results. This is useful for reducing connectivity in the call graph, making it easier to identify the source of performance problems when using a graph printer. For example, consider `Integer#times`: it's hardly ever useful to know how much time is spent in the method itself. We are more interested in how much the passed in block contributes to the time spent in the method which contains the `Integer#times` call. The effect on collected metrics are identical to eliminating methods from the profiling result in a post process step.
|
|
78
|
-
|
|
79
|
-
```ruby
|
|
80
|
-
profile = RubyProf::Profile.new(...)
|
|
81
|
-
profile.exclude_methods!(Integer, :times, ...)
|
|
82
|
-
profile.start
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
A convenience method is provided to exclude a large number of methods which usually clutter up profiles:
|
|
86
|
-
|
|
87
|
-
```ruby
|
|
88
|
-
profile.exclude_common_methods!
|
|
89
|
-
```
|
|
90
|
-
|
|
91
|
-
However, this is a somewhat opinionated method collection. It's usually better to view it as an inspiration instead of using it directly (see [exclude_common_methods.rb](https://github.com/ruby-prof/ruby-prof/blob/e087b7d7ca11eecf1717d95a5c5fea1e36ea3136/lib/ruby-prof/profile/exclude_common_methods.rb)).
|
|
92
|
-
|
|
93
|
-
## Merging Threads and Fibers
|
|
94
|
-
|
|
95
|
-
ruby-prof profiles each thread and fiber separately. A common design pattern is to have a main thread delegate work to background threads or fibers. Examples include web servers such as Puma and Falcon, as well as code that uses `Enumerator`, `Fiber.new`, or async libraries.
|
|
96
|
-
|
|
97
|
-
Understanding profiling results can be very difficult when there are many threads or fibers because each one appears as a separate entry in the output. To help with this, ruby-prof includes the ability to merge results for threads and fibers that start with the same root method. In the best case, this can collapse results into just two entries - one for the parent thread and one for all workers.
|
|
98
|
-
|
|
99
|
-
Note the collapsed results show the sum of times for all merged threads/fibers. For example, assume there are 10 worker fibers that each took 5 seconds to run. The single merged entry will show a total time of 50 seconds.
|
|
100
|
-
|
|
101
|
-
To merge threads and fibers:
|
|
102
|
-
|
|
103
|
-
```ruby
|
|
104
|
-
profile = RubyProf::Profile.profile do
|
|
105
|
-
...
|
|
106
|
-
end
|
|
107
|
-
profile.merge!
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
This is also supported in the Rack adapter via the `merge_fibers` option:
|
|
111
|
-
|
|
112
|
-
```ruby
|
|
113
|
-
config.middleware.use Rack::RubyProf, path: Rails.root.join("tmp/profile"), merge_fibers: true
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
## Saving Results
|
|
117
|
-
|
|
118
|
-
It can be helpful to save the results of a profiling run for later analysis. Results can be saved using Ruby's [marshal](https://docs.ruby-lang.org/en/master/Marshal.html) library.
|
|
119
|
-
|
|
120
|
-
```ruby
|
|
121
|
-
profile_1 = RubyProf::Profile.profile do
|
|
122
|
-
...
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# Save the results
|
|
126
|
-
data = Marshal.dump(profile_1)
|
|
127
|
-
|
|
128
|
-
# Sometime later load the results
|
|
129
|
-
profile_2 = Marshal.load(data)
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
**!!!WARNING!!!** - Only load ruby-prof profiles that you know are safe. Demarshaling data can lead to arbitrary code execution and thus can be [dangerous](https://docs.ruby-lang.org/en/master/Marshal.html#module-Marshal-label-Security+considerations).
|
|
1
|
+
# Advanced Usage
|
|
2
|
+
|
|
3
|
+
This section describes advanced usage of ruby-prof. Additional documentation for every class is also [available](index.md#api-documentation). For workflow guidance, see [Best Practices](best-practices.md).
|
|
4
|
+
|
|
5
|
+
## Profiling Options
|
|
6
|
+
|
|
7
|
+
ruby-prof understands the following options when profiling code:
|
|
8
|
+
|
|
9
|
+
**measure_mode** - What ruby-prof should measure. For more information see the [Measurement Mode](#measurement-mode) section.
|
|
10
|
+
|
|
11
|
+
**track_allocations** - Tracks each object location, including the object class and source file location. For more information see the [Allocation Tracking](#allocation-tracking) section.
|
|
12
|
+
|
|
13
|
+
**exclude_threads** - Array of threads which should not be profiled. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
|
|
14
|
+
|
|
15
|
+
**include_threads** - Array of threads which should be profiled. All other threads will be ignored. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
|
|
16
|
+
|
|
17
|
+
**allow_exceptions** - Whether to raise exceptions encountered during profiling, or to suppress them. Defaults to false.
|
|
18
|
+
|
|
19
|
+
**exclude_common** - Automatically calls `exclude_common_methods!` to exclude commonly cluttering methods. Defaults to false. For more information see the [Method Exclusion](#method-exclusion) section.
|
|
20
|
+
|
|
21
|
+
## Measurement Mode
|
|
22
|
+
|
|
23
|
+
The measurement mode determines what ruby-prof measures when profiling code. Supported measurements are:
|
|
24
|
+
|
|
25
|
+
### Wall Time
|
|
26
|
+
|
|
27
|
+
Wall time measures the real-world time elapsed between any two moments in seconds. If there are other processes concurrently running on the system that use significant CPU or disk time during a profiling run then the reported results will be larger than expected. On Windows, wall time is measured using `QueryPerformanceCounter` and on other platforms by `clock_gettime(CLOCK_MONOTONIC)`. Use `RubyProf::WALL_TIME` to select this mode.
|
|
28
|
+
|
|
29
|
+
### Process Time
|
|
30
|
+
|
|
31
|
+
Process time measures the time used by a process between any two moments in seconds. It is unaffected by other processes concurrently running on the system. Remember with process time that calls to methods like sleep will not be included in profiling results. On Windows, process time is measured using `GetProcessTimes` and on other platforms by `clock_gettime`. Use `RubyProf::PROCESS_TIME` to select this mode.
|
|
32
|
+
|
|
33
|
+
### Object Allocations
|
|
34
|
+
|
|
35
|
+
Object allocations measures how many objects each method in a program allocates. Measurements are done via Ruby's `RUBY_INTERNAL_EVENT_NEWOBJ` trace event, counting each new object created (excluding internal `T_IMEMO` objects). Use `RubyProf::ALLOCATIONS` to select this mode.
|
|
36
|
+
|
|
37
|
+
To set the measurement mode:
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME)
|
|
41
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::PROCESS_TIME)
|
|
42
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::ALLOCATIONS)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The default value is `RubyProf::WALL_TIME`. You may also specify the measure mode by using the `RUBY_PROF_MEASURE_MODE` environment variable:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
export RUBY_PROF_MEASURE_MODE=wall
|
|
49
|
+
export RUBY_PROF_MEASURE_MODE=process
|
|
50
|
+
export RUBY_PROF_MEASURE_MODE=allocations
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Allocation Tracking
|
|
54
|
+
|
|
55
|
+
ruby-prof also has the ability to track object allocations. This functionality can be turned on via the track_allocations option:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
require 'ruby-prof'
|
|
59
|
+
|
|
60
|
+
RubyProf::Profile.profile(track_allocations: true) do
|
|
61
|
+
...
|
|
62
|
+
end
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Note the `RubyProf::ALLOCATIONS` measure mode is slightly different than tracking allocations. The measurement mode provides high level information about the number of allocations performed in each method. In contrast, tracking allocations provides detailed information about allocation type, count, and source location. Currently, to see allocations results you must use the `RubyProf::GraphHtmlPrinter`.
|
|
66
|
+
|
|
67
|
+
## Thread Inclusion/Exclusion
|
|
68
|
+
|
|
69
|
+
ruby-prof can profile multiple threads. Sometimes this can be overwhelming. For example, assume you want to determine why your tests are running slowly. If you are using minitest, it can run tests in parallel by spawning worker threads (to force a single worker, set `N=0` when running tests). Thus, ruby-prof provides two options to specify which threads should be profiled:
|
|
70
|
+
|
|
71
|
+
**exclude_threads** - Array of threads which should not be profiled.
|
|
72
|
+
|
|
73
|
+
**include_threads** - Array of threads which should be profiled. All other threads will be ignored.
|
|
74
|
+
|
|
75
|
+
## Method Exclusion
|
|
76
|
+
|
|
77
|
+
ruby-prof supports excluding specific methods and threads from profiling results. This is useful for reducing connectivity in the call graph, making it easier to identify the source of performance problems when using a graph printer. For example, consider `Integer#times`: it's hardly ever useful to know how much time is spent in the method itself. We are more interested in how much the passed in block contributes to the time spent in the method which contains the `Integer#times` call. The effect on collected metrics are identical to eliminating methods from the profiling result in a post process step.
|
|
78
|
+
|
|
79
|
+
```ruby
|
|
80
|
+
profile = RubyProf::Profile.new(...)
|
|
81
|
+
profile.exclude_methods!(Integer, :times, ...)
|
|
82
|
+
profile.start
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
A convenience method is provided to exclude a large number of methods which usually clutter up profiles:
|
|
86
|
+
|
|
87
|
+
```ruby
|
|
88
|
+
profile.exclude_common_methods!
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
However, this is a somewhat opinionated method collection. It's usually better to view it as an inspiration instead of using it directly (see [exclude_common_methods.rb](https://github.com/ruby-prof/ruby-prof/blob/e087b7d7ca11eecf1717d95a5c5fea1e36ea3136/lib/ruby-prof/profile/exclude_common_methods.rb)).
|
|
92
|
+
|
|
93
|
+
## Merging Threads and Fibers
|
|
94
|
+
|
|
95
|
+
ruby-prof profiles each thread and fiber separately. A common design pattern is to have a main thread delegate work to background threads or fibers. Examples include web servers such as Puma and Falcon, as well as code that uses `Enumerator`, `Fiber.new`, or async libraries.
|
|
96
|
+
|
|
97
|
+
Understanding profiling results can be very difficult when there are many threads or fibers because each one appears as a separate entry in the output. To help with this, ruby-prof includes the ability to merge results for threads and fibers that start with the same root method. In the best case, this can collapse results into just two entries - one for the parent thread and one for all workers.
|
|
98
|
+
|
|
99
|
+
Note the collapsed results show the sum of times for all merged threads/fibers. For example, assume there are 10 worker fibers that each took 5 seconds to run. The single merged entry will show a total time of 50 seconds.
|
|
100
|
+
|
|
101
|
+
To merge threads and fibers:
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
profile = RubyProf::Profile.profile do
|
|
105
|
+
...
|
|
106
|
+
end
|
|
107
|
+
profile.merge!
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
This is also supported in the Rack adapter via the `merge_fibers` option:
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
config.middleware.use Rack::RubyProf, path: Rails.root.join("tmp/profile"), merge_fibers: true
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Saving Results
|
|
117
|
+
|
|
118
|
+
It can be helpful to save the results of a profiling run for later analysis. Results can be saved using Ruby's [marshal](https://docs.ruby-lang.org/en/master/Marshal.html) library.
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
profile_1 = RubyProf::Profile.profile do
|
|
122
|
+
...
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Save the results
|
|
126
|
+
data = Marshal.dump(profile_1)
|
|
127
|
+
|
|
128
|
+
# Sometime later load the results
|
|
129
|
+
profile_2 = Marshal.load(data)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**!!!WARNING!!!** - Only load ruby-prof profiles that you know are safe. Demarshaling data can lead to arbitrary code execution and thus can be [dangerous](https://docs.ruby-lang.org/en/master/Marshal.html#module-Marshal-label-Security+considerations).
|
data/docs/best-practices.md
CHANGED
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
# Best Practices
|
|
2
|
-
|
|
3
|
-
Profiling gives you amazing insight into your program. What you think is slow is almost never what is actually slow. Below are some best practices to help unlock this power.
|
|
4
|
-
|
|
5
|
-
## Start With Realistic Runs
|
|
6
|
-
|
|
7
|
-
When profiling data-heavy work, start with a smaller sample of the data instead of the full dataset. Profile a portion first (for example 1% or 10%). It is faster, easier to understand, and often enough to find the main bottleneck. Once you have a likely fix, validate it with a larger and more realistic workload so you know the result still holds in context. Run the same profile more than once and warm up before you measure so one-time startup work does not dominate the report.
|
|
8
|
-
|
|
9
|
-
## Choose The Right Measurement Mode
|
|
10
|
-
|
|
11
|
-
Pick the measurement mode based on the question you are asking. Use `WALL_TIME` for end-to-end latency, `PROCESS_TIME` for CPU-focused work, and `ALLOCATIONS` when object churn is the concern. See [Measurement Mode](advanced-usage.md#measurement-mode) for details.
|
|
12
|
-
|
|
13
|
-
## Reduce Noise Before Deep Analysis
|
|
14
|
-
|
|
15
|
-
When framework internals or concurrency noise dominate output, narrow the scope first. Use `exclude_common` or explicit method exclusions, and use thread filtering (`include_threads` / `exclude_threads`) when needed. For highly concurrent workloads, merging worker results (`merge!` or Rack `merge_fibers: true`) can make trends much easier to read. See [Profiling Options](advanced-usage.md#profiling-options), [Method Exclusion](advanced-usage.md#method-exclusion), and [Merging Threads and Fibers](advanced-usage.md#merging-threads-and-fibers).
|
|
16
|
-
|
|
17
|
-
## Use Reports In A Sequence
|
|
18
|
-
|
|
19
|
-
Start with a quick summary, then drill down. In practice, this usually means using `FlatPrinter` to find hotspots, `GraphHtmlPrinter` (or `GraphPrinter`) to understand caller/callee relationships, and `FlameGraphPrinter` to validate dominant paths visually. See [Reports](reports.md), especially [Creating Reports](reports.md#creating-reports) and [Report Types](reports.md#report-types).
|
|
20
|
-
|
|
21
|
-
## Use Threshold Filters Early
|
|
22
|
-
|
|
23
|
-
Threshold filters are one of the fastest ways to make a large profile readable. Start with `min_percent` to hide low-impact methods in most printers. For `GraphHtmlPrinter`, use `min_time` when you want to drop methods below an absolute time cutoff. These filters help you focus on the code that actually moves total runtime.
|
|
24
|
-
|
|
25
|
-
## Compare Trends, Not Single Snapshots
|
|
26
|
-
|
|
27
|
-
Do not optimize based on one run unless the signal is overwhelming. Compare before/after profiles under the same workload, then prioritize repeated hot paths over one-off spikes.
|
|
1
|
+
# Best Practices
|
|
2
|
+
|
|
3
|
+
Profiling gives you amazing insight into your program. What you think is slow is almost never what is actually slow. Below are some best practices to help unlock this power.
|
|
4
|
+
|
|
5
|
+
## Start With Realistic Runs
|
|
6
|
+
|
|
7
|
+
When profiling data-heavy work, start with a smaller sample of the data instead of the full dataset. Profile a portion first (for example 1% or 10%). It is faster, easier to understand, and often enough to find the main bottleneck. Once you have a likely fix, validate it with a larger and more realistic workload so you know the result still holds in context. Run the same profile more than once and warm up before you measure so one-time startup work does not dominate the report.
|
|
8
|
+
|
|
9
|
+
## Choose The Right Measurement Mode
|
|
10
|
+
|
|
11
|
+
Pick the measurement mode based on the question you are asking. Use `WALL_TIME` for end-to-end latency, `PROCESS_TIME` for CPU-focused work, and `ALLOCATIONS` when object churn is the concern. See [Measurement Mode](advanced-usage.md#measurement-mode) for details.
|
|
12
|
+
|
|
13
|
+
## Reduce Noise Before Deep Analysis
|
|
14
|
+
|
|
15
|
+
When framework internals or concurrency noise dominate output, narrow the scope first. Use `exclude_common` or explicit method exclusions, and use thread filtering (`include_threads` / `exclude_threads`) when needed. For highly concurrent workloads, merging worker results (`merge!` or Rack `merge_fibers: true`) can make trends much easier to read. See [Profiling Options](advanced-usage.md#profiling-options), [Method Exclusion](advanced-usage.md#method-exclusion), and [Merging Threads and Fibers](advanced-usage.md#merging-threads-and-fibers).
|
|
16
|
+
|
|
17
|
+
## Use Reports In A Sequence
|
|
18
|
+
|
|
19
|
+
Start with a quick summary, then drill down. In practice, this usually means using `FlatPrinter` to find hotspots, `GraphHtmlPrinter` (or `GraphPrinter`) to understand caller/callee relationships, and `FlameGraphPrinter` to validate dominant paths visually. See [Reports](reports.md), especially [Creating Reports](reports.md#creating-reports) and [Report Types](reports.md#report-types).
|
|
20
|
+
|
|
21
|
+
## Use Threshold Filters Early
|
|
22
|
+
|
|
23
|
+
Threshold filters are one of the fastest ways to make a large profile readable. Start with `min_percent` to hide low-impact methods in most printers. For `GraphHtmlPrinter`, use `min_time` when you want to drop methods below an absolute time cutoff. These filters help you focus on the code that actually moves total runtime.
|
|
24
|
+
|
|
25
|
+
## Compare Trends, Not Single Snapshots
|
|
26
|
+
|
|
27
|
+
Do not optimize based on one run unless the signal is overwhelming. Compare before/after profiles under the same workload, then prioritize repeated hot paths over one-off spikes.
|