ruby-prof 1.7.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. checksums.yaml +4 -4
  2. data/{CHANGES → CHANGELOG.md} +112 -178
  3. data/README.md +5 -5
  4. data/bin/ruby-prof +1 -4
  5. data/docs/advanced-usage.md +132 -0
  6. data/docs/alternatives.md +98 -0
  7. data/docs/architecture.md +122 -0
  8. data/docs/best-practices.md +27 -0
  9. data/docs/getting-started.md +130 -0
  10. data/docs/history.md +11 -0
  11. data/docs/index.md +45 -0
  12. data/docs/profiling-rails.md +64 -0
  13. data/docs/public/examples/example.rb +33 -0
  14. data/docs/public/examples/generate_reports.rb +92 -0
  15. data/docs/public/examples/reports/call_info.txt +27 -0
  16. data/docs/public/examples/reports/call_stack.html +835 -0
  17. data/docs/public/examples/reports/callgrind.out +150 -0
  18. data/docs/public/examples/reports/flame_graph.html +408 -0
  19. data/docs/public/examples/reports/flat.txt +45 -0
  20. data/docs/public/examples/reports/graph.dot +129 -0
  21. data/docs/public/examples/reports/graph.html +1319 -0
  22. data/docs/public/examples/reports/graph.txt +100 -0
  23. data/docs/public/examples/reports/graphviz_viewer.html +1 -0
  24. data/docs/public/images/call_stack.png +0 -0
  25. data/docs/public/images/class_diagram.png +0 -0
  26. data/docs/public/images/dot_printer.png +0 -0
  27. data/docs/public/images/flame_graph.png +0 -0
  28. data/docs/public/images/flat.png +0 -0
  29. data/docs/public/images/graph.png +0 -0
  30. data/docs/public/images/graph_html.png +0 -0
  31. data/docs/public/images/ruby-prof-logo.svg +1 -0
  32. data/docs/reports.md +150 -0
  33. data/docs/stylesheets/extra.css +80 -0
  34. data/ext/ruby_prof/rp_allocation.c +0 -15
  35. data/ext/ruby_prof/rp_allocation.h +29 -33
  36. data/ext/ruby_prof/rp_call_tree.c +3 -0
  37. data/ext/ruby_prof/rp_call_tree.h +1 -4
  38. data/ext/ruby_prof/rp_call_trees.h +1 -4
  39. data/ext/ruby_prof/rp_measurement.c +0 -5
  40. data/ext/ruby_prof/rp_measurement.h +49 -53
  41. data/ext/ruby_prof/rp_method.c +3 -0
  42. data/ext/ruby_prof/rp_method.h +1 -4
  43. data/ext/ruby_prof/rp_profile.c +1 -1
  44. data/ext/ruby_prof/rp_profile.h +1 -5
  45. data/ext/ruby_prof/rp_stack.h +50 -53
  46. data/ext/ruby_prof/rp_thread.h +1 -4
  47. data/ext/ruby_prof/ruby_prof.h +1 -4
  48. data/ext/ruby_prof/vc/ruby_prof.vcxproj +7 -8
  49. data/lib/ruby-prof/assets/call_stack_printer.html.erb +746 -711
  50. data/lib/ruby-prof/assets/flame_graph_printer.html.erb +412 -0
  51. data/lib/ruby-prof/assets/graph_printer.html.erb +355 -355
  52. data/lib/ruby-prof/call_tree.rb +57 -57
  53. data/lib/ruby-prof/call_tree_visitor.rb +36 -36
  54. data/lib/ruby-prof/measurement.rb +17 -17
  55. data/lib/ruby-prof/printers/abstract_printer.rb +19 -33
  56. data/lib/ruby-prof/printers/call_info_printer.rb +53 -53
  57. data/lib/ruby-prof/printers/call_stack_printer.rb +168 -180
  58. data/lib/ruby-prof/printers/call_tree_printer.rb +132 -145
  59. data/lib/ruby-prof/printers/dot_printer.rb +177 -132
  60. data/lib/ruby-prof/printers/flame_graph_printer.rb +79 -0
  61. data/lib/ruby-prof/printers/flat_printer.rb +52 -52
  62. data/lib/ruby-prof/printers/graph_html_printer.rb +62 -63
  63. data/lib/ruby-prof/printers/graph_printer.rb +112 -113
  64. data/lib/ruby-prof/printers/multi_printer.rb +134 -127
  65. data/lib/ruby-prof/profile.rb +13 -0
  66. data/lib/ruby-prof/rack.rb +114 -105
  67. data/lib/ruby-prof/task.rb +147 -147
  68. data/lib/ruby-prof/thread.rb +20 -20
  69. data/lib/ruby-prof/version.rb +1 -1
  70. data/lib/ruby-prof.rb +50 -52
  71. data/lib/unprof.rb +10 -10
  72. data/ruby-prof.gemspec +5 -5
  73. data/test/abstract_printer_test.rb +25 -27
  74. data/test/alias_test.rb +203 -117
  75. data/test/call_tree_builder.rb +126 -126
  76. data/test/call_tree_visitor_test.rb +27 -27
  77. data/test/call_trees_test.rb +66 -66
  78. data/test/duplicate_names_test.rb +32 -32
  79. data/test/dynamic_method_test.rb +50 -50
  80. data/test/exceptions_test.rb +24 -24
  81. data/test/exclude_threads_test.rb +48 -48
  82. data/test/fiber_test.rb +72 -72
  83. data/test/inverse_call_tree_test.rb +174 -174
  84. data/test/line_number_test.rb +138 -1
  85. data/test/marshal_test.rb +144 -145
  86. data/test/measure_allocations.rb +26 -26
  87. data/test/measure_allocations_test.rb +340 -1
  88. data/test/measure_process_time_test.rb +3098 -3142
  89. data/test/measure_times.rb +56 -56
  90. data/test/measure_wall_time_test.rb +511 -372
  91. data/test/measurement_test.rb +82 -82
  92. data/test/merge_test.rb +48 -48
  93. data/test/multi_printer_test.rb +52 -66
  94. data/test/no_method_class_test.rb +15 -15
  95. data/test/pause_resume_test.rb +171 -171
  96. data/test/prime.rb +54 -54
  97. data/test/prime_script.rb +5 -5
  98. data/test/printer_call_stack_test.rb +28 -27
  99. data/test/printer_call_tree_test.rb +30 -30
  100. data/test/printer_flame_graph_test.rb +82 -0
  101. data/test/printer_flat_test.rb +99 -99
  102. data/test/printer_graph_html_test.rb +62 -59
  103. data/test/printer_graph_test.rb +42 -40
  104. data/test/printers_test.rb +28 -44
  105. data/test/printing_recursive_graph_test.rb +81 -81
  106. data/test/profile_test.rb +101 -101
  107. data/test/rack_test.rb +103 -93
  108. data/test/recursive_test.rb +139 -139
  109. data/test/scheduler.rb +4 -0
  110. data/test/singleton_test.rb +39 -38
  111. data/test/stack_printer_test.rb +61 -61
  112. data/test/start_stop_test.rb +106 -106
  113. data/test/test_helper.rb +4 -0
  114. data/test/thread_test.rb +29 -29
  115. data/test/unique_call_path_test.rb +123 -123
  116. data/test/yarv_test.rb +56 -56
  117. metadata +53 -11
  118. data/ext/ruby_prof/rp_measure_memory.c +0 -46
  119. data/lib/ruby-prof/compatibility.rb +0 -113
  120. data/test/compatibility_test.rb +0 -49
  121. data/test/measure_memory_test.rb +0 -1193
@@ -0,0 +1,132 @@
1
+ # Advanced Usage
2
+
3
+ This section describes advanced usage of ruby-prof. Additional documentation for every class is also [available](index.md#api-documentation). For workflow guidance, see [Best Practices](best-practices.md).
4
+
5
+ ## Profiling Options
6
+
7
+ ruby-prof understands the following options when profiling code:
8
+
9
+ **measure_mode** - What ruby-prof should measure. For more information see the [Measurement Mode](#measurement-mode) section.
10
+
11
+ **track_allocations** - Tracks each object location, including the object class and source file location. For more information see the [Allocation Tracking](#allocation-tracking) section.
12
+
13
+ **exclude_threads** - Array of threads which should not be profiled. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
14
+
15
+ **include_threads** - Array of threads which should be profiled. All other threads will be ignored. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
16
+
17
+ **allow_exceptions** - Whether to raise exceptions encountered during profiling, or to suppress them. Defaults to false.
18
+
19
+ **exclude_common** - Automatically calls `exclude_common_methods!` to exclude commonly cluttering methods. Defaults to false. For more information see the [Method Exclusion](#method-exclusion) section.
20
+
21
+ ## Measurement Mode
22
+
23
+ The measurement mode determines what ruby-prof measures when profiling code. Supported measurements are:
24
+
25
+ ### Wall Time
26
+
27
+ Wall time measures the real-world time elapsed between any two moments in seconds. If there are other processes concurrently running on the system that use significant CPU or disk time during a profiling run then the reported results will be larger than expected. On Windows, wall time is measured using `QueryPerformanceCounter` and on other platforms by `clock_gettime(CLOCK_MONOTONIC)`. Use `RubyProf::WALL_TIME` to select this mode.
28
+
29
+ ### Process Time
30
+
31
+ Process time measures the time used by a process between any two moments in seconds. It is unaffected by other processes concurrently running on the system. Remember with process time that calls to methods like sleep will not be included in profiling results. On Windows, process time is measured using `GetProcessTimes` and on other platforms by `clock_gettime`. Use `RubyProf::PROCESS_TIME` to select this mode.
32
+
33
+ ### Object Allocations
34
+
35
+ Object allocations measures how many objects each method in a program allocates. Measurements are done via Ruby's `RUBY_INTERNAL_EVENT_NEWOBJ` trace event, counting each new object created (excluding internal `T_IMEMO` objects). Use `RubyProf::ALLOCATIONS` to select this mode.
36
+
37
+ To set the measurement mode:
38
+
39
+ ```ruby
40
+ profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME)
41
+ profile = RubyProf::Profile.new(measure_mode: RubyProf::PROCESS_TIME)
42
+ profile = RubyProf::Profile.new(measure_mode: RubyProf::ALLOCATIONS)
43
+ ```
44
+
45
+ The default value is `RubyProf::WALL_TIME`. You may also specify the measure mode by using the `RUBY_PROF_MEASURE_MODE` environment variable:
46
+
47
+ ```
48
+ export RUBY_PROF_MEASURE_MODE=wall
49
+ export RUBY_PROF_MEASURE_MODE=process
50
+ export RUBY_PROF_MEASURE_MODE=allocations
51
+ ```
52
+
53
+ ## Allocation Tracking
54
+
55
+ ruby-prof also has the ability to track object allocations. This functionality can be turned on via the track_allocations option:
56
+
57
+ ```ruby
58
+ require 'ruby-prof'
59
+
60
+ RubyProf::Profile.profile(track_allocations: true) do
61
+ ...
62
+ end
63
+ ```
64
+
65
+ Note the `RubyProf::ALLOCATIONS` measure mode is slightly different than tracking allocations. The measurement mode provides high level information about the number of allocations performed in each method. In contrast, tracking allocations provides detailed information about allocation type, count, and source location. Currently, to see allocations results you must use the `RubyProf::GraphHtmlPrinter`.
66
+
67
+ ## Thread Inclusion/Exclusion
68
+
69
+ ruby-prof can profile multiple threads. Sometimes this can be overwhelming. For example, assume you want to determine why your tests are running slowly. If you are using minitest, it can run tests in parallel by spawning worker threads (to force a single worker, set `N=0` when running tests). Thus, ruby-prof provides two options to specify which threads should be profiled:
70
+
71
+ **exclude_threads** - Array of threads which should not be profiled.
72
+
73
+ **include_threads** - Array of threads which should be profiled. All other threads will be ignored.
74
+
75
+ ## Method Exclusion
76
+
77
+ ruby-prof supports excluding specific methods and threads from profiling results. This is useful for reducing connectivity in the call graph, making it easier to identify the source of performance problems when using a graph printer. For example, consider `Integer#times`: it's hardly ever useful to know how much time is spent in the method itself. We are more interested in how much the passed in block contributes to the time spent in the method which contains the `Integer#times` call. The effect on collected metrics are identical to eliminating methods from the profiling result in a post process step.
78
+
79
+ ```ruby
80
+ profile = RubyProf::Profile.new(...)
81
+ profile.exclude_methods!(Integer, :times, ...)
82
+ profile.start
83
+ ```
84
+
85
+ A convenience method is provided to exclude a large number of methods which usually clutter up profiles:
86
+
87
+ ```ruby
88
+ profile.exclude_common_methods!
89
+ ```
90
+
91
+ However, this is a somewhat opinionated method collection. It's usually better to view it as an inspiration instead of using it directly (see [exclude_common_methods.rb](https://github.com/ruby-prof/ruby-prof/blob/e087b7d7ca11eecf1717d95a5c5fea1e36ea3136/lib/ruby-prof/profile/exclude_common_methods.rb)).
92
+
93
+ ## Merging Threads and Fibers
94
+
95
+ ruby-prof profiles each thread and fiber separately. A common design pattern is to have a main thread delegate work to background threads or fibers. Examples include web servers such as Puma and Falcon, as well as code that uses `Enumerator`, `Fiber.new`, or async libraries.
96
+
97
+ Understanding profiling results can be very difficult when there are many threads or fibers because each one appears as a separate entry in the output. To help with this, ruby-prof includes the ability to merge results for threads and fibers that start with the same root method. In the best case, this can collapse results into just two entries - one for the parent thread and one for all workers.
98
+
99
+ Note the collapsed results show the sum of times for all merged threads/fibers. For example, assume there are 10 worker fibers that each took 5 seconds to run. The single merged entry will show a total time of 50 seconds.
100
+
101
+ To merge threads and fibers:
102
+
103
+ ```ruby
104
+ profile = RubyProf::Profile.profile do
105
+ ...
106
+ end
107
+ profile.merge!
108
+ ```
109
+
110
+ This is also supported in the Rack adapter via the `merge_fibers` option:
111
+
112
+ ```ruby
113
+ config.middleware.use Rack::RubyProf, path: Rails.root.join("tmp/profile"), merge_fibers: true
114
+ ```
115
+
116
+ ## Saving Results
117
+
118
+ It can be helpful to save the results of a profiling run for later analysis. Results can be saved using Ruby's [marshal](https://docs.ruby-lang.org/en/master/Marshal.html) library.
119
+
120
+ ```ruby
121
+ profile_1 = RubyProf::Profile.profile do
122
+ ...
123
+ end
124
+
125
+ # Save the results
126
+ data = Marshal.dump(profile_1)
127
+
128
+ # Sometime later load the results
129
+ profile_2 = Marshal.load(data)
130
+ ```
131
+
132
+ **!!!WARNING!!!** - Only load ruby-prof profiles that you know are safe. Demarshaling data can lead to arbitrary code execution and thus can be [dangerous](https://docs.ruby-lang.org/en/master/Marshal.html#module-Marshal-label-Security+considerations).
@@ -0,0 +1,98 @@
1
+ # Comparison with Other Profilers
2
+
3
+ Ruby has several excellent profiling tools, each with different strengths. This page compares ruby-prof with three popular alternatives to help you choose the right tool for your needs.
4
+
5
+ ## Tracing vs Sampling
6
+
7
+ The most important distinction between profilers is **tracing** vs **sampling**:
8
+
9
+ - **Tracing profilers** (ruby-prof) instrument every method call and return. This provides exact call counts and complete call graphs, but adds overhead to every method invocation.
10
+ - **Sampling profilers** (stackprof, rbspy, vernier) periodically capture stack snapshots. This has much lower overhead but may miss short-lived method calls.
11
+
12
+ ## Overview
13
+
14
+ The table below compares ruby-prof with [stackprof](https://github.com/tmm1/stackprof), [rbspy](https://github.com/rbspy/rbspy), and [vernier](https://github.com/jhawthorn/vernier) — the three most popular sampling profilers for Ruby.
15
+
16
+ | | ruby-prof | stackprof | rbspy | vernier |
17
+ |---|---|---|---|---|
18
+ | **Type** | Tracing | Sampling | Sampling | Sampling |
19
+ | **Implementation** | C extension (TracePoint API) | C extension (signals) | External Rust binary | C extension (signals) |
20
+ | **Code changes** | None ([CLI](getting-started.md#command-line)) or minimal | Minimal | None | Minimal |
21
+ | **Ruby versions** | All, since 2006 (currently 3.2+) | 2.2+ | 1.9.3+ | 3.2.1+ |
22
+ | **OS support** | Linux, macOS, Windows | Linux | Linux, macOS, Windows, FreeBSD | Linux, macOS |
23
+
24
+ ## Measurement Capabilities
25
+
26
+ | | ruby-prof | stackprof | rbspy | vernier |
27
+ |---|---|---|---|---|
28
+ | **Wall time** | Yes | Yes | Yes | Yes |
29
+ | **CPU/Process time** | Yes | Yes | No | No |
30
+ | **Allocations** | Yes | Yes | No | Yes |
31
+ | **GVL visibility** | No | No | No | Yes |
32
+ | **GC pauses** | No | No | No | Yes |
33
+ | **Retained memory** | No | No | No | Yes |
34
+ | **Multi-thread** | Yes | No | No | Yes |
35
+ | **Fibers** | Yes | No | No | No |
36
+
37
+ ## Report Formats
38
+
39
+ | | ruby-prof | stackprof | rbspy | vernier |
40
+ |---|---|---|---|---|
41
+ | **Flat/Summary** | Yes | Yes | Yes | No |
42
+ | **Call graph** | Yes (text + HTML) | No | No | No |
43
+ | **Flame graph** | Yes (HTML) | Yes | Yes (SVG) | Yes (Firefox Profiler) |
44
+ | **Call stack** | Yes (HTML) | No | No | No |
45
+ | **Callgrind** | Yes | No | Yes | No |
46
+ | **Graphviz dot** | Yes | Yes | No | No |
47
+
48
+ ## When to Use Each
49
+
50
+ ### ruby-prof
51
+
52
+ ruby-prof is the longest-standing Ruby profiler, with its [first](./history.md) release in 2005. It has been continuously maintained for nearly two decades, evolving alongside Ruby itself from 1.8 through 4.0. Over that time it has supported every major Ruby version and platform, including Windows — a rarity among Ruby C extensions.
53
+
54
+ Being a tracing profiler, ruby-prof provides *exact* information about your program. It tracks every thread, every fiber and every method call. It shines with its support for multiple measurements modes and excellent reporting capabilities.
55
+
56
+ ruby-prof can be used from the [command line](getting-started.md#command-line) with no code changes, or via an API for more control.
57
+
58
+ The biggest downsides of ruby-prof are:
59
+
60
+ * It adds significant overhead for running programs, so is not suitable for production use
61
+ * It must start a Ruby program, it cannot attach to an already running program
62
+
63
+ ### stackprof
64
+
65
+ [stackprof](https://github.com/tmm1/stackprof) is a low-overhead, sampling profiler that is good for development. It adds minimal overhead while still providing useful flame graphs and per-line hit counts. A good choice when you want something lightweight and well-established.
66
+
67
+ The biggest downsides of stackprof are:
68
+
69
+ * Single-thread only
70
+ * Linux only for time-based modes
71
+
72
+ ### rbspy
73
+
74
+ [rbspy](https://github.com/rbspy/rbspy) is a sampling profiler best for profiling in production or when you cannot modify the application code. As an external process, it attaches to a running Ruby process by PID with zero code changes. It is particularly useful for profiling third-party Ruby applications (Chef, Puppet, etc.), investigating slow test runs, or quick profiling of scripts via `rbspy record ruby my-script.rb`. Supports the widest range of Ruby versions.
75
+
76
+ The biggest downsides of rbspy are:
77
+
78
+ * No allocation profiling
79
+ * No call graph or caller/callee data
80
+
81
+ ### vernier
82
+
83
+ [vernier](https://github.com/jhawthorn/vernier) is a sampling profiler best for diagnosing concurrency issues and understanding GVL contention. It is the only Ruby profiler that reports GVL state, GC pauses and idle time. Its Firefox Profiler integration provides rich interactive visualizations with per-thread timelines.
84
+
85
+ The biggest downsides of vernier are:
86
+
87
+ * Requires Ruby 3.2.1+
88
+ * No Windows support
89
+
90
+ ### rack-mini-profiler
91
+
92
+ [rack-mini-profiler](https://github.com/MiniProfiler/rack-mini-profiler) is a "batteries-included" profiling tool for Rails and Rack applications. It uses stackprof under the hood for CPU profiling while also supporting memory profiling. It is a good choice if you want an integrated profiling solution that works directly in the browser during development.
93
+
94
+ ## Memory Profiling
95
+
96
+ [memory_profiler](https://github.com/SamSaffron/memory_profiler) is another profiler, but it focuses exclusively on memory usage. It uses Ruby's `ObjectSpace` API to track every object allocation during a block of code, recording the source file, line number, object type, and size via `ObjectSpace.memsize_of`. By snapshotting the GC generation before and after, it distinguishes between allocated objects (created during the block) and retained objects (still alive after GC). This makes it useful for finding memory leaks and identifying allocation-heavy code. It's pure Ruby with no C extension, so it works across Ruby versions and platforms.
97
+
98
+ ruby-prof can track allocation counts via its `RubyProf::ALLOCATIONS` mode, but memory_profiler gives deeper insight into memory specifically — object sizes, retained vs allocated, and per-gem breakdowns.
@@ -0,0 +1,122 @@
1
+ # Architecture
2
+
3
+ ## Overview
4
+
5
+ ruby-prof is a C extension that uses Ruby's [TracePoint](https://docs.ruby-lang.org/en/master/TracePoint.html) API to intercept method calls and returns. Every time a method is entered or exited, ruby-prof records timing and (optionally) allocation data. This tracing approach means ruby-prof captures every method invocation, giving exact call counts and complete call graphs.
6
+
7
+ The diagram below shows the main classes that make up ruby-prof:
8
+
9
+ ```mermaid
10
+ classDiagram
11
+ Profile "1" *-- "1" Measurer
12
+ Profile "1" *-- "*" Thread
13
+ Thread "1" *-- "1" Stack
14
+ Thread "1" *-- "*" MethodInfo
15
+ Thread "1" *-- "1" CallTree
16
+ Stack "1" o-- "*" Frame
17
+ Frame --> CallTree
18
+ CallTree "1" *-- "1" Measurement
19
+ CallTree --> MethodInfo : target
20
+ MethodInfo "1" *-- "1" CallTrees
21
+ MethodInfo "1" *-- "1" Measurement
22
+ MethodInfo "1" *-- "*" Allocation
23
+ CallTrees o-- "*" CallTree
24
+
25
+ class Profile {
26
+ +threads: Hash
27
+ +measurer: Measurer
28
+ }
29
+ class Measurer {
30
+ +mode: MeasurerMode
31
+ +track_allocations: boolean
32
+ +multiplier: double
33
+ +measure: function pointer
34
+ }
35
+ class Thread {
36
+ +methods: Hash
37
+ +stack: Stack
38
+ +callTree: CallTree
39
+ }
40
+ class Stack {
41
+ +frames: Array
42
+ }
43
+ class Frame {
44
+ +callTree: CallTree
45
+ }
46
+ class CallTree {
47
+ +parent: CallTree
48
+ +children: Hash
49
+ +target: MethodInfo
50
+ +measurement: Measurement
51
+ }
52
+ class MethodInfo {
53
+ +allocations: Hash
54
+ +callTrees: CallTrees
55
+ +measurement: Measurement
56
+ }
57
+ class Measurement {
58
+ +total_time: double
59
+ +self_time: double
60
+ +wait_time: double
61
+ +called: integer
62
+ }
63
+ class Allocation {
64
+ +count: integer
65
+ +source_file: string
66
+ +source_line: int
67
+ +klass: VALUE
68
+ }
69
+ class CallTrees {
70
+ +callTrees: Array
71
+ }
72
+ ```
73
+
74
+ ## Profile
75
+
76
+ Profile is the top-level object returned by a profiling run:
77
+
78
+ ```ruby
79
+ profile = RubyProf::Profile.profile do
80
+ ...
81
+ end
82
+ ```
83
+
84
+ A Profile owns a Measurer that determines what is being measured, and a collection of Threads representing each thread (or fiber) that was active during profiling.
85
+
86
+ ## Measurer and Measurement
87
+
88
+ The **Measurer** controls what ruby-prof measures. It holds a function pointer that is called on every method entry and exit to take a measurement. The three modes are:
89
+
90
+ - **Wall time** — elapsed real time
91
+ - **Process time** — CPU time consumed by the process (excludes time spent in sleep or I/O)
92
+ - **Allocations** — number of objects allocated
93
+
94
+ Each CallTree and MethodInfo holds a **Measurement** that accumulates the results: total time, self time (excluding children), wait time (time spent waiting on other threads), and call count.
95
+
96
+ ## Thread
97
+
98
+ Each Thread tracks the methods called on that thread and owns the root of a call tree. It also maintains an internal Stack of Frames used during profiling to track the current call depth.
99
+
100
+ **Stack** and **Frame** are transient — they exist only while profiling is active. A Frame records timing data for a single method invocation on the stack, including start time and time spent in child calls. When a method returns, its Frame is popped and the accumulated timing is transferred to the corresponding CallTree node.
101
+
102
+ ## CallTree and MethodInfo
103
+
104
+ These two classes are central to ruby-prof and represent two different views of the same profiling data:
105
+
106
+ - **CallTree** records the calling structure — which method called which, forming a graph. Each node has a parent, children, and a reference to its target MethodInfo. A method that is called from two different call sites will have two separate CallTree nodes, each with its own Measurement. Recursive methods create cycles in the graph.
107
+
108
+ - **MethodInfo** represents a single method regardless of where it was called from. It aggregates data across all call sites. Each MethodInfo holds a CallTrees collection that links back to every CallTree node that invoked that method, providing both caller and callee information.
109
+
110
+ This separation is what allows ruby-prof to generate both call graph reports (which show calling relationships) and flat reports (which show per-method totals).
111
+
112
+ ## Allocation
113
+
114
+ When allocation tracking is enabled, each MethodInfo records the objects it allocated. An Allocation tracks the class of object created, the source location, and the count.
115
+
116
+ ## Memory Management
117
+
118
+ The Profile object is responsible for managing the memory of its child objects, which are C structures. When a Profile is garbage collected, it recursively frees all its objects. In the class diagram, composition relationships (filled diamond) indicate ownership — a Profile frees its Threads, Threads free their CallTrees and MethodInfo instances, and so on.
119
+
120
+ ruby-prof keeps a Profile alive as long as there are live references to any of its MethodInfo or CallTree objects. This is done via Ruby's GC mark phase: CallTree instances mark their associated MethodInfo, and MethodInfo instances mark their owning Profile.
121
+
122
+ Starting with version 1.5, it is possible to create Thread, CallTree and MethodInfo instances from Ruby (this was added to support testing). These Ruby-created objects are owned by Ruby's garbage collector rather than the C extension. An internal ownership flag on each instance tracks who is responsible for freeing it.
@@ -0,0 +1,27 @@
1
+ # Best Practices
2
+
3
+ Profiling gives you amazing insight into your program. What you think is slow is almost never what is actually slow. Below are some best practices to help unlock this power.
4
+
5
+ ## Start With Realistic Runs
6
+
7
+ When profiling data-heavy work, start with a smaller sample of the data instead of the full dataset. Profile a portion first (for example 1% or 10%). It is faster, easier to understand, and often enough to find the main bottleneck. Once you have a likely fix, validate it with a larger and more realistic workload so you know the result still holds in context. Run the same profile more than once and warm up before you measure so one-time startup work does not dominate the report.
8
+
9
+ ## Choose The Right Measurement Mode
10
+
11
+ Pick the measurement mode based on the question you are asking. Use `WALL_TIME` for end-to-end latency, `PROCESS_TIME` for CPU-focused work, and `ALLOCATIONS` when object churn is the concern. See [Measurement Mode](advanced-usage.md#measurement-mode) for details.
12
+
13
+ ## Reduce Noise Before Deep Analysis
14
+
15
+ When framework internals or concurrency noise dominate output, narrow the scope first. Use `exclude_common` or explicit method exclusions, and use thread filtering (`include_threads` / `exclude_threads`) when needed. For highly concurrent workloads, merging worker results (`merge!` or Rack `merge_fibers: true`) can make trends much easier to read. See [Profiling Options](advanced-usage.md#profiling-options), [Method Exclusion](advanced-usage.md#method-exclusion), and [Merging Threads and Fibers](advanced-usage.md#merging-threads-and-fibers).
16
+
17
+ ## Use Reports In A Sequence
18
+
19
+ Start with a quick summary, then drill down. In practice, this usually means using `FlatPrinter` to find hotspots, `GraphHtmlPrinter` (or `GraphPrinter`) to understand caller/callee relationships, and `FlameGraphPrinter` to validate dominant paths visually. See [Reports](reports.md), especially [Creating Reports](reports.md#creating-reports) and [Report Types](reports.md#report-types).
20
+
21
+ ## Use Threshold Filters Early
22
+
23
+ Threshold filters are one of the fastest ways to make a large profile readable. Start with `min_percent` to hide low-impact methods in most printers. For `GraphHtmlPrinter`, use `min_time` when you want to drop methods below an absolute time cutoff. These filters help you focus on the code that actually moves total runtime.
24
+
25
+ ## Compare Trends, Not Single Snapshots
26
+
27
+ Do not optimize based on one run unless the signal is overwhelming. Compare before/after profiles under the same workload, then prioritize repeated hot paths over one-off spikes.
@@ -0,0 +1,130 @@
1
+ # Getting Started
2
+
3
+ There are three ways to use ruby-prof:
4
+
5
+ - command line
6
+ - convenience API
7
+ - core API
8
+
9
+ ## Command Line
10
+
11
+ The easiest way to use ruby-prof is via the command line, which requires no modifications to your program. The basic usage is:
12
+
13
+ ```
14
+ ruby-prof [options] <script.rb> [--] [script-options]
15
+ ```
16
+
17
+ Where script.rb is the program you want to profile.
18
+
19
+ For a full list of options, see the RubyProf::Cmd documentation or execute the following command:
20
+
21
+ ```
22
+ ruby-prof -h
23
+ ```
24
+
25
+ ## Convenience API
26
+
27
+ The second way to use ruby-prof is via its convenience API. This requires small modifications to the program you want to profile:
28
+
29
+ ```ruby
30
+ require 'ruby-prof'
31
+
32
+ profile = RubyProf::Profile.new
33
+
34
+ # profile the code
35
+ profile.start
36
+ # ... code to profile ...
37
+ result = profile.stop
38
+
39
+ # print a flat profile to text
40
+ printer = RubyProf::FlatPrinter.new(result)
41
+ printer.print(STDOUT)
42
+ ```
43
+
44
+ Alternatively, you can use a block to tell ruby-prof what to profile:
45
+
46
+ ```ruby
47
+ require 'ruby-prof'
48
+
49
+ # profile the code
50
+ result = RubyProf::Profile.profile do
51
+ # ... code to profile ...
52
+ end
53
+
54
+ # print a graph profile to text
55
+ printer = RubyProf::GraphPrinter.new(result)
56
+ printer.print(STDOUT)
57
+ ```
58
+
59
+ ruby-prof also supports pausing and resuming profiling runs.
60
+
61
+ ```ruby
62
+ require 'ruby-prof'
63
+
64
+ profile = RubyProf::Profile.new
65
+
66
+ # profile the code
67
+ profile.start
68
+ # ... code to profile ...
69
+
70
+ profile.pause
71
+ # ... other code ...
72
+
73
+ profile.resume
74
+ # ... code to profile ...
75
+
76
+ result = profile.stop
77
+ ```
78
+
79
+ Note that resume will only work if start has been called previously. In addition, resume can also take a block:
80
+
81
+ ```ruby
82
+ require 'ruby-prof'
83
+
84
+ profile = RubyProf::Profile.new
85
+
86
+ # profile the code
87
+ profile.start
88
+ # ... code to profile ...
89
+
90
+ profile.pause
91
+ # ... other code ...
92
+
93
+ profile.resume do
94
+ # ... code to profile...
95
+ end
96
+
97
+ result = profile.stop
98
+ ```
99
+
100
+ With this usage, resume will automatically call pause at the end of the block.
101
+
102
+ The `RubyProf::Profile.profile` method can take various options, which are described in [Profiling Options](advanced-usage.md#profiling-options).
103
+
104
+ ## Core API
105
+
106
+ The convenience API is a wrapper around the `RubyProf::Profile` class. Using the Profile class directly provides additional functionality, such as [method exclusion](advanced-usage.md#method-exclusion).
107
+
108
+ To create a new profile:
109
+
110
+ ```ruby
111
+ require 'ruby-prof'
112
+
113
+ profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME)
114
+ result = profile.profile do
115
+ ...
116
+ end
117
+ ```
118
+
119
+ Once a profile is completed, you can either generate a [report](reports.md) via a printer or [save](advanced-usage.md#saving-results) the results for later analysis. For a list of profiling options, please see the [Profiling Options](advanced-usage.md#profiling-options) section.
120
+ If you are unsure which report to generate first, see [Report Types](reports.md#report-types).
121
+
122
+
123
+ However, using ruby-prof also comes with two caveats:
124
+
125
+ - To use ruby-prof you generally need to include a few lines of extra code in your program (although see [command line usage](getting-started.md#command-line))
126
+ - Using ruby-prof will cause your program to run slower (see [Performance](index.md#performance) section)
127
+
128
+ Most of the time, these two caveats are acceptable. But if you need to determine why a program running in production is slow or hung, a sampling profiler will be a better choice. Excellent choices include [stackprof](https://github.com/tmm1/stackprof) or [rbspy](https://rbspy.github.io/).
129
+
130
+ If you are just interested in memory usage, you may also want to checkout the [memory_profiler](https://github.com/SamSaffron/memory_profiler) gem (although ruby-prof provides similar information).
data/docs/history.md ADDED
@@ -0,0 +1,11 @@
1
+ # History
2
+
3
+ For a full list of changes between versions, see the [Changelog](changelog.md).
4
+
5
+ The first version of ruby-prof, 0.1.1, was released on March 22, 2005 by [Shugo Maeda](https://shugo.net/) The original [source](https://shugo.net/archive/ruby-prof/) code is still available on his website (it is not actually in the git history). ruby-prof was a vast improvement at the time, running 30 times faster as the original ruby profiler.
6
+
7
+ Version [0.4.0](https://rubygems.org/gems/ruby-prof/versions/0.4.0) was the first version packaged as a Ruby gem. Version 0.4.0 also introduced Windows support, thread support and added a number of additional reports such as the graph report in HTML and the call graph report.
8
+
9
+ A number of versions were subsequently released, with a 1.0.0 [release](https://cfis.savagexi.com/2019/07/29/ruby-prof-1-0/) finally happening in July of 2019. Version 1.0.0 was a major rewrite that significantly improved performance, correctly profiled recursive methods, redesigned reports, added allocation/memory measurement support and introduced saving and reloading profiling results. Since then ruby-prof has continued to evolve along with Ruby with 19 releases.
10
+
11
+ Version 2.0.0 will mark the 20th release of ruby-prof since the 1.0.0 release. Version 2.0.0 supports Ruby 4 and includes new flame/icicle graph support, revamped reports and improved documentation. The reason for the 2.0.0 jump is because profiling memory sizes has been removed due to changes in Ruby 4.0.0. In addition, the old compatibility API was also removed.
data/docs/index.md ADDED
@@ -0,0 +1,45 @@
1
+ # ruby-prof
2
+
3
+ ruby-prof is a [tracing](./alternatives.md#tracing-vs-sampling) profiler for MRI Ruby with a long [history](./history.md) that dates back to 2005! Its features include:
4
+
5
+ - Measurement Modes - ruby-prof can measure program [wall time](advanced-usage.md#wall-time), [process time](advanced-usage.md#process-time) and [object allocations](advanced-usage.md#object-allocations).
6
+ - Reports - ruby-prof can generate [flat](reports.md#flat), [graph (text)](reports.md#graph-text), [graph (HTML)](reports.md#graph-html), [flame graph](reports.md#flame-graph), [call stack](reports.md#call-stack), [graphviz](reports.md#graphviz), [cachegrind](reports.md#cachegrind), and [call info](reports.md#call-info-report) reports.
7
+ - Threads - supports profiling multiple threads simultaneously.
8
+ - Fibers - supports profiling multiple fibers simultaneously.
9
+ - Merging - supports merging results across fibers or threads
10
+ - Recursive - supports profiling recursive methods
11
+
12
+ ![Flame Graph](../public/images/flame_graph.png)
13
+
14
+ ## Why ruby-prof?
15
+
16
+ ruby-prof is helpful if your program is slow and you want to know why! It can help you track down methods that are either slow or allocate a large number of objects. Often times the results will surprise you - when profiling what you think you know almost always turns out to be wrong.
17
+
18
+ ## Installation
19
+ To install ruby-prof:
20
+
21
+ ```
22
+ gem install ruby-prof
23
+ ```
24
+
25
+ If you are running Linux or Unix you'll need to have a C compiler installed so the extension can be built when it is installed. If you are running Windows, then you should install the Windows specific gem or install [devkit](https://rubyinstaller.org/add-ons/devkit.html).
26
+
27
+ ruby-prof requires Ruby 3.2.0 or higher. If you need to work with older Ruby versions then you can download an older version of ruby-prof.
28
+
29
+ ## Performance
30
+ ruby-prof is a tracing profiler, not a sampling profiler, and thus will cause your program to run slower. Our tests show that the overhead varies considerably based on the code being profiled. Significant effort has been put into reducing this overhead, but most programs will run approximately twice as slow while highly recursive programs (like the fibonacci series test) may run up to five times slower.
31
+
32
+ ## History
33
+ ruby-prof has been under continuous development since 2005 — see the full [History](history.md) page.
34
+
35
+ ## API Documentation
36
+
37
+ API documentation for each class is available at the [ruby-prof API docs](https://ruby-prof.github.io/doc/index.html).
38
+
39
+ ## License
40
+
41
+ See [LICENSE](../LICENSE) for license information.
42
+
43
+ ## Development
44
+
45
+ Code is located at [github.com/ruby-prof/ruby-prof](https://github.com/ruby-prof/ruby-prof).
@@ -0,0 +1,64 @@
1
+ # Profiling Rails
2
+
3
+ To profile a Rails application it is vital to run it using production-like settings (cache classes, cache view lookups, etc.). Otherwise, Rails dependency loading code will overwhelm any time spent in the application itself (our tests show that Rails dependency loading causes a roughly 6x slowdown). The best way to do this is to create a new Rails environment, `profile`.
4
+
5
+ To profile Rails:
6
+
7
+ 1. Add ruby-prof to your Gemfile:
8
+
9
+ ```ruby
10
+ group :profile do
11
+ gem 'ruby-prof'
12
+ end
13
+ ```
14
+
15
+ Then install it:
16
+
17
+ ```bash
18
+ bundle install
19
+ ```
20
+
21
+ 2. Create `config/environments/profile.rb` with production-like settings and the ruby-prof middleware:
22
+
23
+ ```ruby
24
+ # config/environments/profile.rb
25
+ require_relative "production"
26
+
27
+ Rails.application.configure do
28
+ # Optional: reduce noise while profiling.
29
+ config.log_level = :warn
30
+
31
+ # Optional: disable controller/view caching if you want raw app execution timing.
32
+ config.action_controller.perform_caching = false
33
+
34
+ config.middleware.use Rack::RubyProf, path: Rails.root.join("tmp/profile")
35
+ end
36
+ ```
37
+
38
+ By default the rack adapter generates flat text, graph text, graph HTML, and call stack HTML reports.
39
+
40
+ 3. Start Rails in the profile environment:
41
+
42
+ ```bash
43
+ bin/rails server -e profile
44
+ ```
45
+
46
+ You can run a console in the same environment with:
47
+
48
+ ```bash
49
+ bin/rails console -e profile
50
+ ```
51
+
52
+ 4. Make a request to generate profile output:
53
+
54
+ ```bash
55
+ curl http://127.0.0.1:3000/
56
+ ```
57
+
58
+ 5. Inspect reports in `tmp/profile`:
59
+
60
+ ```bash
61
+ ls -1 tmp/profile
62
+ ```
63
+
64
+ Reports are generated per request path. Repeating the same request path overwrites the previous report files for that path.
@@ -0,0 +1,33 @@
1
+ # A small synthetic workload for demonstrating ruby-prof reports.
2
+ # word_freq.rb
3
+
4
+ def normalize(text)
5
+ text.downcase.gsub(/[^a-z\s]/, "")
6
+ end
7
+
8
+ def tokenize(text)
9
+ text.split(/\s+/)
10
+ end
11
+
12
+ def count_words(words)
13
+ counts = Hash.new(0)
14
+ words.each { |w| counts[w] += 1 }
15
+ counts
16
+ end
17
+
18
+ def top_words(counts, n = 10)
19
+ counts.sort_by { |_, v| -v }.take(n)
20
+ end
21
+
22
+ def run_example
23
+ text = <<~EOS * 200
24
+ Ruby is a dynamic, open source programming language with a focus on
25
+ simplicity and productivity. It has an elegant syntax that is natural
26
+ to read and easy to write.
27
+ EOS
28
+
29
+ normalized = normalize(text)
30
+ tokens = tokenize(normalized)
31
+ counts = count_words(tokens)
32
+ top = top_words(counts)
33
+ end