ruby-prof 1.7.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{CHANGES → CHANGELOG.md} +112 -178
- data/README.md +5 -5
- data/bin/ruby-prof +1 -4
- data/docs/advanced-usage.md +132 -0
- data/docs/alternatives.md +98 -0
- data/docs/architecture.md +122 -0
- data/docs/best-practices.md +27 -0
- data/docs/getting-started.md +130 -0
- data/docs/history.md +11 -0
- data/docs/index.md +45 -0
- data/docs/profiling-rails.md +64 -0
- data/docs/public/examples/example.rb +33 -0
- data/docs/public/examples/generate_reports.rb +92 -0
- data/docs/public/examples/reports/call_info.txt +27 -0
- data/docs/public/examples/reports/call_stack.html +835 -0
- data/docs/public/examples/reports/callgrind.out +150 -0
- data/docs/public/examples/reports/flame_graph.html +408 -0
- data/docs/public/examples/reports/flat.txt +45 -0
- data/docs/public/examples/reports/graph.dot +129 -0
- data/docs/public/examples/reports/graph.html +1319 -0
- data/docs/public/examples/reports/graph.txt +100 -0
- data/docs/public/examples/reports/graphviz_viewer.html +1 -0
- data/docs/public/images/call_stack.png +0 -0
- data/docs/public/images/class_diagram.png +0 -0
- data/docs/public/images/dot_printer.png +0 -0
- data/docs/public/images/flame_graph.png +0 -0
- data/docs/public/images/flat.png +0 -0
- data/docs/public/images/graph.png +0 -0
- data/docs/public/images/graph_html.png +0 -0
- data/docs/public/images/ruby-prof-logo.svg +1 -0
- data/docs/reports.md +150 -0
- data/docs/stylesheets/extra.css +80 -0
- data/ext/ruby_prof/rp_allocation.c +0 -15
- data/ext/ruby_prof/rp_allocation.h +29 -33
- data/ext/ruby_prof/rp_call_tree.c +3 -0
- data/ext/ruby_prof/rp_call_tree.h +1 -4
- data/ext/ruby_prof/rp_call_trees.h +1 -4
- data/ext/ruby_prof/rp_measurement.c +0 -5
- data/ext/ruby_prof/rp_measurement.h +49 -53
- data/ext/ruby_prof/rp_method.c +3 -0
- data/ext/ruby_prof/rp_method.h +1 -4
- data/ext/ruby_prof/rp_profile.c +1 -1
- data/ext/ruby_prof/rp_profile.h +1 -5
- data/ext/ruby_prof/rp_stack.h +50 -53
- data/ext/ruby_prof/rp_thread.h +1 -4
- data/ext/ruby_prof/ruby_prof.h +1 -4
- data/ext/ruby_prof/vc/ruby_prof.vcxproj +7 -8
- data/lib/ruby-prof/assets/call_stack_printer.html.erb +746 -711
- data/lib/ruby-prof/assets/flame_graph_printer.html.erb +412 -0
- data/lib/ruby-prof/assets/graph_printer.html.erb +355 -355
- data/lib/ruby-prof/call_tree.rb +57 -57
- data/lib/ruby-prof/call_tree_visitor.rb +36 -36
- data/lib/ruby-prof/measurement.rb +17 -17
- data/lib/ruby-prof/printers/abstract_printer.rb +19 -33
- data/lib/ruby-prof/printers/call_info_printer.rb +53 -53
- data/lib/ruby-prof/printers/call_stack_printer.rb +168 -180
- data/lib/ruby-prof/printers/call_tree_printer.rb +132 -145
- data/lib/ruby-prof/printers/dot_printer.rb +177 -132
- data/lib/ruby-prof/printers/flame_graph_printer.rb +79 -0
- data/lib/ruby-prof/printers/flat_printer.rb +52 -52
- data/lib/ruby-prof/printers/graph_html_printer.rb +62 -63
- data/lib/ruby-prof/printers/graph_printer.rb +112 -113
- data/lib/ruby-prof/printers/multi_printer.rb +134 -127
- data/lib/ruby-prof/profile.rb +13 -0
- data/lib/ruby-prof/rack.rb +114 -105
- data/lib/ruby-prof/task.rb +147 -147
- data/lib/ruby-prof/thread.rb +20 -20
- data/lib/ruby-prof/version.rb +1 -1
- data/lib/ruby-prof.rb +50 -52
- data/lib/unprof.rb +10 -10
- data/ruby-prof.gemspec +5 -5
- data/test/abstract_printer_test.rb +25 -27
- data/test/alias_test.rb +203 -117
- data/test/call_tree_builder.rb +126 -126
- data/test/call_tree_visitor_test.rb +27 -27
- data/test/call_trees_test.rb +66 -66
- data/test/duplicate_names_test.rb +32 -32
- data/test/dynamic_method_test.rb +50 -50
- data/test/exceptions_test.rb +24 -24
- data/test/exclude_threads_test.rb +48 -48
- data/test/fiber_test.rb +72 -72
- data/test/inverse_call_tree_test.rb +174 -174
- data/test/line_number_test.rb +138 -1
- data/test/marshal_test.rb +144 -145
- data/test/measure_allocations.rb +26 -26
- data/test/measure_allocations_test.rb +340 -1
- data/test/measure_process_time_test.rb +3098 -3142
- data/test/measure_times.rb +56 -56
- data/test/measure_wall_time_test.rb +511 -372
- data/test/measurement_test.rb +82 -82
- data/test/merge_test.rb +48 -48
- data/test/multi_printer_test.rb +52 -66
- data/test/no_method_class_test.rb +15 -15
- data/test/pause_resume_test.rb +171 -171
- data/test/prime.rb +54 -54
- data/test/prime_script.rb +5 -5
- data/test/printer_call_stack_test.rb +28 -27
- data/test/printer_call_tree_test.rb +30 -30
- data/test/printer_flame_graph_test.rb +82 -0
- data/test/printer_flat_test.rb +99 -99
- data/test/printer_graph_html_test.rb +62 -59
- data/test/printer_graph_test.rb +42 -40
- data/test/printers_test.rb +28 -44
- data/test/printing_recursive_graph_test.rb +81 -81
- data/test/profile_test.rb +101 -101
- data/test/rack_test.rb +103 -93
- data/test/recursive_test.rb +139 -139
- data/test/scheduler.rb +4 -0
- data/test/singleton_test.rb +39 -38
- data/test/stack_printer_test.rb +61 -61
- data/test/start_stop_test.rb +106 -106
- data/test/test_helper.rb +4 -0
- data/test/thread_test.rb +29 -29
- data/test/unique_call_path_test.rb +123 -123
- data/test/yarv_test.rb +56 -56
- metadata +53 -11
- data/ext/ruby_prof/rp_measure_memory.c +0 -46
- data/lib/ruby-prof/compatibility.rb +0 -113
- data/test/compatibility_test.rb +0 -49
- data/test/measure_memory_test.rb +0 -1193
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Advanced Usage
|
|
2
|
+
|
|
3
|
+
This section describes advanced usage of ruby-prof. Additional documentation for every class is also [available](index.md#api-documentation). For workflow guidance, see [Best Practices](best-practices.md).
|
|
4
|
+
|
|
5
|
+
## Profiling Options
|
|
6
|
+
|
|
7
|
+
ruby-prof understands the following options when profiling code:
|
|
8
|
+
|
|
9
|
+
**measure_mode** - What ruby-prof should measure. For more information see the [Measurement Mode](#measurement-mode) section.
|
|
10
|
+
|
|
11
|
+
**track_allocations** - Tracks each object location, including the object class and source file location. For more information see the [Allocation Tracking](#allocation-tracking) section.
|
|
12
|
+
|
|
13
|
+
**exclude_threads** - Array of threads which should not be profiled. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
|
|
14
|
+
|
|
15
|
+
**include_threads** - Array of threads which should be profiled. All other threads will be ignored. For more information see the [Thread Inclusion/Exclusion](#thread-inclusionexclusion) section.
|
|
16
|
+
|
|
17
|
+
**allow_exceptions** - Whether to raise exceptions encountered during profiling, or to suppress them. Defaults to false.
|
|
18
|
+
|
|
19
|
+
**exclude_common** - Automatically calls `exclude_common_methods!` to exclude commonly cluttering methods. Defaults to false. For more information see the [Method Exclusion](#method-exclusion) section.
|
|
20
|
+
|
|
21
|
+
## Measurement Mode
|
|
22
|
+
|
|
23
|
+
The measurement mode determines what ruby-prof measures when profiling code. Supported measurements are:
|
|
24
|
+
|
|
25
|
+
### Wall Time
|
|
26
|
+
|
|
27
|
+
Wall time measures the real-world time elapsed between any two moments in seconds. If there are other processes concurrently running on the system that use significant CPU or disk time during a profiling run then the reported results will be larger than expected. On Windows, wall time is measured using `QueryPerformanceCounter` and on other platforms by `clock_gettime(CLOCK_MONOTONIC)`. Use `RubyProf::WALL_TIME` to select this mode.
|
|
28
|
+
|
|
29
|
+
### Process Time
|
|
30
|
+
|
|
31
|
+
Process time measures the time used by a process between any two moments in seconds. It is unaffected by other processes concurrently running on the system. Remember with process time that calls to methods like sleep will not be included in profiling results. On Windows, process time is measured using `GetProcessTimes` and on other platforms by `clock_gettime`. Use `RubyProf::PROCESS_TIME` to select this mode.
|
|
32
|
+
|
|
33
|
+
### Object Allocations
|
|
34
|
+
|
|
35
|
+
Object allocations measures how many objects each method in a program allocates. Measurements are done via Ruby's `RUBY_INTERNAL_EVENT_NEWOBJ` trace event, counting each new object created (excluding internal `T_IMEMO` objects). Use `RubyProf::ALLOCATIONS` to select this mode.
|
|
36
|
+
|
|
37
|
+
To set the measurement mode:
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME)
|
|
41
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::PROCESS_TIME)
|
|
42
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::ALLOCATIONS)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The default value is `RubyProf::WALL_TIME`. You may also specify the measure mode by using the `RUBY_PROF_MEASURE_MODE` environment variable:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
export RUBY_PROF_MEASURE_MODE=wall
|
|
49
|
+
export RUBY_PROF_MEASURE_MODE=process
|
|
50
|
+
export RUBY_PROF_MEASURE_MODE=allocations
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Allocation Tracking
|
|
54
|
+
|
|
55
|
+
ruby-prof also has the ability to track object allocations. This functionality can be turned on via the track_allocations option:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
require 'ruby-prof'
|
|
59
|
+
|
|
60
|
+
RubyProf::Profile.profile(track_allocations: true) do
|
|
61
|
+
...
|
|
62
|
+
end
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Note the `RubyProf::ALLOCATIONS` measure mode is slightly different than tracking allocations. The measurement mode provides high level information about the number of allocations performed in each method. In contrast, tracking allocations provides detailed information about allocation type, count, and source location. Currently, to see allocations results you must use the `RubyProf::GraphHtmlPrinter`.
|
|
66
|
+
|
|
67
|
+
## Thread Inclusion/Exclusion
|
|
68
|
+
|
|
69
|
+
ruby-prof can profile multiple threads. Sometimes this can be overwhelming. For example, assume you want to determine why your tests are running slowly. If you are using minitest, it can run tests in parallel by spawning worker threads (to force a single worker, set `N=0` when running tests). Thus, ruby-prof provides two options to specify which threads should be profiled:
|
|
70
|
+
|
|
71
|
+
**exclude_threads** - Array of threads which should not be profiled.
|
|
72
|
+
|
|
73
|
+
**include_threads** - Array of threads which should be profiled. All other threads will be ignored.
|
|
74
|
+
|
|
75
|
+
## Method Exclusion
|
|
76
|
+
|
|
77
|
+
ruby-prof supports excluding specific methods and threads from profiling results. This is useful for reducing connectivity in the call graph, making it easier to identify the source of performance problems when using a graph printer. For example, consider `Integer#times`: it's hardly ever useful to know how much time is spent in the method itself. We are more interested in how much the passed in block contributes to the time spent in the method which contains the `Integer#times` call. The effect on collected metrics are identical to eliminating methods from the profiling result in a post process step.
|
|
78
|
+
|
|
79
|
+
```ruby
|
|
80
|
+
profile = RubyProf::Profile.new(...)
|
|
81
|
+
profile.exclude_methods!(Integer, :times, ...)
|
|
82
|
+
profile.start
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
A convenience method is provided to exclude a large number of methods which usually clutter up profiles:
|
|
86
|
+
|
|
87
|
+
```ruby
|
|
88
|
+
profile.exclude_common_methods!
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
However, this is a somewhat opinionated method collection. It's usually better to view it as an inspiration instead of using it directly (see [exclude_common_methods.rb](https://github.com/ruby-prof/ruby-prof/blob/e087b7d7ca11eecf1717d95a5c5fea1e36ea3136/lib/ruby-prof/profile/exclude_common_methods.rb)).
|
|
92
|
+
|
|
93
|
+
## Merging Threads and Fibers
|
|
94
|
+
|
|
95
|
+
ruby-prof profiles each thread and fiber separately. A common design pattern is to have a main thread delegate work to background threads or fibers. Examples include web servers such as Puma and Falcon, as well as code that uses `Enumerator`, `Fiber.new`, or async libraries.
|
|
96
|
+
|
|
97
|
+
Understanding profiling results can be very difficult when there are many threads or fibers because each one appears as a separate entry in the output. To help with this, ruby-prof includes the ability to merge results for threads and fibers that start with the same root method. In the best case, this can collapse results into just two entries - one for the parent thread and one for all workers.
|
|
98
|
+
|
|
99
|
+
Note the collapsed results show the sum of times for all merged threads/fibers. For example, assume there are 10 worker fibers that each took 5 seconds to run. The single merged entry will show a total time of 50 seconds.
|
|
100
|
+
|
|
101
|
+
To merge threads and fibers:
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
profile = RubyProf::Profile.profile do
|
|
105
|
+
...
|
|
106
|
+
end
|
|
107
|
+
profile.merge!
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
This is also supported in the Rack adapter via the `merge_fibers` option:
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
config.middleware.use Rack::RubyProf, path: Rails.root.join("tmp/profile"), merge_fibers: true
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Saving Results
|
|
117
|
+
|
|
118
|
+
It can be helpful to save the results of a profiling run for later analysis. Results can be saved using Ruby's [marshal](https://docs.ruby-lang.org/en/master/Marshal.html) library.
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
profile_1 = RubyProf::Profile.profile do
|
|
122
|
+
...
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Save the results
|
|
126
|
+
data = Marshal.dump(profile_1)
|
|
127
|
+
|
|
128
|
+
# Sometime later load the results
|
|
129
|
+
profile_2 = Marshal.load(data)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**!!!WARNING!!!** - Only load ruby-prof profiles that you know are safe. Demarshaling data can lead to arbitrary code execution and thus can be [dangerous](https://docs.ruby-lang.org/en/master/Marshal.html#module-Marshal-label-Security+considerations).
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Comparison with Other Profilers
|
|
2
|
+
|
|
3
|
+
Ruby has several excellent profiling tools, each with different strengths. This page compares ruby-prof with three popular alternatives to help you choose the right tool for your needs.
|
|
4
|
+
|
|
5
|
+
## Tracing vs Sampling
|
|
6
|
+
|
|
7
|
+
The most important distinction between profilers is **tracing** vs **sampling**:
|
|
8
|
+
|
|
9
|
+
- **Tracing profilers** (ruby-prof) instrument every method call and return. This provides exact call counts and complete call graphs, but adds overhead to every method invocation.
|
|
10
|
+
- **Sampling profilers** (stackprof, rbspy, vernier) periodically capture stack snapshots. This has much lower overhead but may miss short-lived method calls.
|
|
11
|
+
|
|
12
|
+
## Overview
|
|
13
|
+
|
|
14
|
+
The table below compares ruby-prof with [stackprof](https://github.com/tmm1/stackprof), [rbspy](https://github.com/rbspy/rbspy), and [vernier](https://github.com/jhawthorn/vernier) — the three most popular sampling profilers for Ruby.
|
|
15
|
+
|
|
16
|
+
| | ruby-prof | stackprof | rbspy | vernier |
|
|
17
|
+
|---|---|---|---|---|
|
|
18
|
+
| **Type** | Tracing | Sampling | Sampling | Sampling |
|
|
19
|
+
| **Implementation** | C extension (TracePoint API) | C extension (signals) | External Rust binary | C extension (signals) |
|
|
20
|
+
| **Code changes** | None ([CLI](getting-started.md#command-line)) or minimal | Minimal | None | Minimal |
|
|
21
|
+
| **Ruby versions** | All, since 2006 (currently 3.2+) | 2.2+ | 1.9.3+ | 3.2.1+ |
|
|
22
|
+
| **OS support** | Linux, macOS, Windows | Linux | Linux, macOS, Windows, FreeBSD | Linux, macOS |
|
|
23
|
+
|
|
24
|
+
## Measurement Capabilities
|
|
25
|
+
|
|
26
|
+
| | ruby-prof | stackprof | rbspy | vernier |
|
|
27
|
+
|---|---|---|---|---|
|
|
28
|
+
| **Wall time** | Yes | Yes | Yes | Yes |
|
|
29
|
+
| **CPU/Process time** | Yes | Yes | No | No |
|
|
30
|
+
| **Allocations** | Yes | Yes | No | Yes |
|
|
31
|
+
| **GVL visibility** | No | No | No | Yes |
|
|
32
|
+
| **GC pauses** | No | No | No | Yes |
|
|
33
|
+
| **Retained memory** | No | No | No | Yes |
|
|
34
|
+
| **Multi-thread** | Yes | No | No | Yes |
|
|
35
|
+
| **Fibers** | Yes | No | No | No |
|
|
36
|
+
|
|
37
|
+
## Report Formats
|
|
38
|
+
|
|
39
|
+
| | ruby-prof | stackprof | rbspy | vernier |
|
|
40
|
+
|---|---|---|---|---|
|
|
41
|
+
| **Flat/Summary** | Yes | Yes | Yes | No |
|
|
42
|
+
| **Call graph** | Yes (text + HTML) | No | No | No |
|
|
43
|
+
| **Flame graph** | Yes (HTML) | Yes | Yes (SVG) | Yes (Firefox Profiler) |
|
|
44
|
+
| **Call stack** | Yes (HTML) | No | No | No |
|
|
45
|
+
| **Callgrind** | Yes | No | Yes | No |
|
|
46
|
+
| **Graphviz dot** | Yes | Yes | No | No |
|
|
47
|
+
|
|
48
|
+
## When to Use Each
|
|
49
|
+
|
|
50
|
+
### ruby-prof
|
|
51
|
+
|
|
52
|
+
ruby-prof is the longest-standing Ruby profiler, with its [first](./history.md) release in 2005. It has been continuously maintained for nearly two decades, evolving alongside Ruby itself from 1.8 through 4.0. Over that time it has supported every major Ruby version and platform, including Windows — a rarity among Ruby C extensions.
|
|
53
|
+
|
|
54
|
+
Being a tracing profiler, ruby-prof provides *exact* information about your program. It tracks every thread, every fiber and every method call. It shines with its support for multiple measurements modes and excellent reporting capabilities.
|
|
55
|
+
|
|
56
|
+
ruby-prof can be used from the [command line](getting-started.md#command-line) with no code changes, or via an API for more control.
|
|
57
|
+
|
|
58
|
+
The biggest downsides of ruby-prof are:
|
|
59
|
+
|
|
60
|
+
* It adds significant overhead for running programs, so is not suitable for production use
|
|
61
|
+
* It must start a Ruby program, it cannot attach to an already running program
|
|
62
|
+
|
|
63
|
+
### stackprof
|
|
64
|
+
|
|
65
|
+
[stackprof](https://github.com/tmm1/stackprof) is a low-overhead, sampling profiler that is good for development. It adds minimal overhead while still providing useful flame graphs and per-line hit counts. A good choice when you want something lightweight and well-established.
|
|
66
|
+
|
|
67
|
+
The biggest downsides of stackprof are:
|
|
68
|
+
|
|
69
|
+
* Single-thread only
|
|
70
|
+
* Linux only for time-based modes
|
|
71
|
+
|
|
72
|
+
### rbspy
|
|
73
|
+
|
|
74
|
+
[rbspy](https://github.com/rbspy/rbspy) is a sampling profiler best for profiling in production or when you cannot modify the application code. As an external process, it attaches to a running Ruby process by PID with zero code changes. It is particularly useful for profiling third-party Ruby applications (Chef, Puppet, etc.), investigating slow test runs, or quick profiling of scripts via `rbspy record ruby my-script.rb`. Supports the widest range of Ruby versions.
|
|
75
|
+
|
|
76
|
+
The biggest downsides of rbspy are:
|
|
77
|
+
|
|
78
|
+
* No allocation profiling
|
|
79
|
+
* No call graph or caller/callee data
|
|
80
|
+
|
|
81
|
+
### vernier
|
|
82
|
+
|
|
83
|
+
[vernier](https://github.com/jhawthorn/vernier) is a sampling profiler best for diagnosing concurrency issues and understanding GVL contention. It is the only Ruby profiler that reports GVL state, GC pauses and idle time. Its Firefox Profiler integration provides rich interactive visualizations with per-thread timelines.
|
|
84
|
+
|
|
85
|
+
The biggest downsides of vernier are:
|
|
86
|
+
|
|
87
|
+
* Requires Ruby 3.2.1+
|
|
88
|
+
* No Windows support
|
|
89
|
+
|
|
90
|
+
### rack-mini-profiler
|
|
91
|
+
|
|
92
|
+
[rack-mini-profiler](https://github.com/MiniProfiler/rack-mini-profiler) is a "batteries-included" profiling tool for Rails and Rack applications. It uses stackprof under the hood for CPU profiling while also supporting memory profiling. It is a good choice if you want an integrated profiling solution that works directly in the browser during development.
|
|
93
|
+
|
|
94
|
+
## Memory Profiling
|
|
95
|
+
|
|
96
|
+
[memory_profiler](https://github.com/SamSaffron/memory_profiler) is another profiler, but it focuses exclusively on memory usage. It uses Ruby's `ObjectSpace` API to track every object allocation during a block of code, recording the source file, line number, object type, and size via `ObjectSpace.memsize_of`. By snapshotting the GC generation before and after, it distinguishes between allocated objects (created during the block) and retained objects (still alive after GC). This makes it useful for finding memory leaks and identifying allocation-heavy code. It's pure Ruby with no C extension, so it works across Ruby versions and platforms.
|
|
97
|
+
|
|
98
|
+
ruby-prof can track allocation counts via its `RubyProf::ALLOCATIONS` mode, but memory_profiler gives deeper insight into memory specifically — object sizes, retained vs allocated, and per-gem breakdowns.
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
ruby-prof is a C extension that uses Ruby's [TracePoint](https://docs.ruby-lang.org/en/master/TracePoint.html) API to intercept method calls and returns. Every time a method is entered or exited, ruby-prof records timing and (optionally) allocation data. This tracing approach means ruby-prof captures every method invocation, giving exact call counts and complete call graphs.
|
|
6
|
+
|
|
7
|
+
The diagram below shows the main classes that make up ruby-prof:
|
|
8
|
+
|
|
9
|
+
```mermaid
|
|
10
|
+
classDiagram
|
|
11
|
+
Profile "1" *-- "1" Measurer
|
|
12
|
+
Profile "1" *-- "*" Thread
|
|
13
|
+
Thread "1" *-- "1" Stack
|
|
14
|
+
Thread "1" *-- "*" MethodInfo
|
|
15
|
+
Thread "1" *-- "1" CallTree
|
|
16
|
+
Stack "1" o-- "*" Frame
|
|
17
|
+
Frame --> CallTree
|
|
18
|
+
CallTree "1" *-- "1" Measurement
|
|
19
|
+
CallTree --> MethodInfo : target
|
|
20
|
+
MethodInfo "1" *-- "1" CallTrees
|
|
21
|
+
MethodInfo "1" *-- "1" Measurement
|
|
22
|
+
MethodInfo "1" *-- "*" Allocation
|
|
23
|
+
CallTrees o-- "*" CallTree
|
|
24
|
+
|
|
25
|
+
class Profile {
|
|
26
|
+
+threads: Hash
|
|
27
|
+
+measurer: Measurer
|
|
28
|
+
}
|
|
29
|
+
class Measurer {
|
|
30
|
+
+mode: MeasurerMode
|
|
31
|
+
+track_allocations: boolean
|
|
32
|
+
+multiplier: double
|
|
33
|
+
+measure: function pointer
|
|
34
|
+
}
|
|
35
|
+
class Thread {
|
|
36
|
+
+methods: Hash
|
|
37
|
+
+stack: Stack
|
|
38
|
+
+callTree: CallTree
|
|
39
|
+
}
|
|
40
|
+
class Stack {
|
|
41
|
+
+frames: Array
|
|
42
|
+
}
|
|
43
|
+
class Frame {
|
|
44
|
+
+callTree: CallTree
|
|
45
|
+
}
|
|
46
|
+
class CallTree {
|
|
47
|
+
+parent: CallTree
|
|
48
|
+
+children: Hash
|
|
49
|
+
+target: MethodInfo
|
|
50
|
+
+measurement: Measurement
|
|
51
|
+
}
|
|
52
|
+
class MethodInfo {
|
|
53
|
+
+allocations: Hash
|
|
54
|
+
+callTrees: CallTrees
|
|
55
|
+
+measurement: Measurement
|
|
56
|
+
}
|
|
57
|
+
class Measurement {
|
|
58
|
+
+total_time: double
|
|
59
|
+
+self_time: double
|
|
60
|
+
+wait_time: double
|
|
61
|
+
+called: integer
|
|
62
|
+
}
|
|
63
|
+
class Allocation {
|
|
64
|
+
+count: integer
|
|
65
|
+
+source_file: string
|
|
66
|
+
+source_line: int
|
|
67
|
+
+klass: VALUE
|
|
68
|
+
}
|
|
69
|
+
class CallTrees {
|
|
70
|
+
+callTrees: Array
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Profile
|
|
75
|
+
|
|
76
|
+
Profile is the top-level object returned by a profiling run:
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
profile = RubyProf::Profile.profile do
|
|
80
|
+
...
|
|
81
|
+
end
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
A Profile owns a Measurer that determines what is being measured, and a collection of Threads representing each thread (or fiber) that was active during profiling.
|
|
85
|
+
|
|
86
|
+
## Measurer and Measurement
|
|
87
|
+
|
|
88
|
+
The **Measurer** controls what ruby-prof measures. It holds a function pointer that is called on every method entry and exit to take a measurement. The three modes are:
|
|
89
|
+
|
|
90
|
+
- **Wall time** — elapsed real time
|
|
91
|
+
- **Process time** — CPU time consumed by the process (excludes time spent in sleep or I/O)
|
|
92
|
+
- **Allocations** — number of objects allocated
|
|
93
|
+
|
|
94
|
+
Each CallTree and MethodInfo holds a **Measurement** that accumulates the results: total time, self time (excluding children), wait time (time spent waiting on other threads), and call count.
|
|
95
|
+
|
|
96
|
+
## Thread
|
|
97
|
+
|
|
98
|
+
Each Thread tracks the methods called on that thread and owns the root of a call tree. It also maintains an internal Stack of Frames used during profiling to track the current call depth.
|
|
99
|
+
|
|
100
|
+
**Stack** and **Frame** are transient — they exist only while profiling is active. A Frame records timing data for a single method invocation on the stack, including start time and time spent in child calls. When a method returns, its Frame is popped and the accumulated timing is transferred to the corresponding CallTree node.
|
|
101
|
+
|
|
102
|
+
## CallTree and MethodInfo
|
|
103
|
+
|
|
104
|
+
These two classes are central to ruby-prof and represent two different views of the same profiling data:
|
|
105
|
+
|
|
106
|
+
- **CallTree** records the calling structure — which method called which, forming a graph. Each node has a parent, children, and a reference to its target MethodInfo. A method that is called from two different call sites will have two separate CallTree nodes, each with its own Measurement. Recursive methods create cycles in the graph.
|
|
107
|
+
|
|
108
|
+
- **MethodInfo** represents a single method regardless of where it was called from. It aggregates data across all call sites. Each MethodInfo holds a CallTrees collection that links back to every CallTree node that invoked that method, providing both caller and callee information.
|
|
109
|
+
|
|
110
|
+
This separation is what allows ruby-prof to generate both call graph reports (which show calling relationships) and flat reports (which show per-method totals).
|
|
111
|
+
|
|
112
|
+
## Allocation
|
|
113
|
+
|
|
114
|
+
When allocation tracking is enabled, each MethodInfo records the objects it allocated. An Allocation tracks the class of object created, the source location, and the count.
|
|
115
|
+
|
|
116
|
+
## Memory Management
|
|
117
|
+
|
|
118
|
+
The Profile object is responsible for managing the memory of its child objects, which are C structures. When a Profile is garbage collected, it recursively frees all its objects. In the class diagram, composition relationships (filled diamond) indicate ownership — a Profile frees its Threads, Threads free their CallTrees and MethodInfo instances, and so on.
|
|
119
|
+
|
|
120
|
+
ruby-prof keeps a Profile alive as long as there are live references to any of its MethodInfo or CallTree objects. This is done via Ruby's GC mark phase: CallTree instances mark their associated MethodInfo, and MethodInfo instances mark their owning Profile.
|
|
121
|
+
|
|
122
|
+
Starting with version 1.5, it is possible to create Thread, CallTree and MethodInfo instances from Ruby (this was added to support testing). These Ruby-created objects are owned by Ruby's garbage collector rather than the C extension. An internal ownership flag on each instance tracks who is responsible for freeing it.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Best Practices
|
|
2
|
+
|
|
3
|
+
Profiling gives you amazing insight into your program. What you think is slow is almost never what is actually slow. Below are some best practices to help unlock this power.
|
|
4
|
+
|
|
5
|
+
## Start With Realistic Runs
|
|
6
|
+
|
|
7
|
+
When profiling data-heavy work, start with a smaller sample of the data instead of the full dataset. Profile a portion first (for example 1% or 10%). It is faster, easier to understand, and often enough to find the main bottleneck. Once you have a likely fix, validate it with a larger and more realistic workload so you know the result still holds in context. Run the same profile more than once and warm up before you measure so one-time startup work does not dominate the report.
|
|
8
|
+
|
|
9
|
+
## Choose The Right Measurement Mode
|
|
10
|
+
|
|
11
|
+
Pick the measurement mode based on the question you are asking. Use `WALL_TIME` for end-to-end latency, `PROCESS_TIME` for CPU-focused work, and `ALLOCATIONS` when object churn is the concern. See [Measurement Mode](advanced-usage.md#measurement-mode) for details.
|
|
12
|
+
|
|
13
|
+
## Reduce Noise Before Deep Analysis
|
|
14
|
+
|
|
15
|
+
When framework internals or concurrency noise dominate output, narrow the scope first. Use `exclude_common` or explicit method exclusions, and use thread filtering (`include_threads` / `exclude_threads`) when needed. For highly concurrent workloads, merging worker results (`merge!` or Rack `merge_fibers: true`) can make trends much easier to read. See [Profiling Options](advanced-usage.md#profiling-options), [Method Exclusion](advanced-usage.md#method-exclusion), and [Merging Threads and Fibers](advanced-usage.md#merging-threads-and-fibers).
|
|
16
|
+
|
|
17
|
+
## Use Reports In A Sequence
|
|
18
|
+
|
|
19
|
+
Start with a quick summary, then drill down. In practice, this usually means using `FlatPrinter` to find hotspots, `GraphHtmlPrinter` (or `GraphPrinter`) to understand caller/callee relationships, and `FlameGraphPrinter` to validate dominant paths visually. See [Reports](reports.md), especially [Creating Reports](reports.md#creating-reports) and [Report Types](reports.md#report-types).
|
|
20
|
+
|
|
21
|
+
## Use Threshold Filters Early
|
|
22
|
+
|
|
23
|
+
Threshold filters are one of the fastest ways to make a large profile readable. Start with `min_percent` to hide low-impact methods in most printers. For `GraphHtmlPrinter`, use `min_time` when you want to drop methods below an absolute time cutoff. These filters help you focus on the code that actually moves total runtime.
|
|
24
|
+
|
|
25
|
+
## Compare Trends, Not Single Snapshots
|
|
26
|
+
|
|
27
|
+
Do not optimize based on one run unless the signal is overwhelming. Compare before/after profiles under the same workload, then prioritize repeated hot paths over one-off spikes.
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Getting Started
|
|
2
|
+
|
|
3
|
+
There are three ways to use ruby-prof:
|
|
4
|
+
|
|
5
|
+
- command line
|
|
6
|
+
- convenience API
|
|
7
|
+
- core API
|
|
8
|
+
|
|
9
|
+
## Command Line
|
|
10
|
+
|
|
11
|
+
The easiest way to use ruby-prof is via the command line, which requires no modifications to your program. The basic usage is:
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
ruby-prof [options] <script.rb> [--] [script-options]
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Where script.rb is the program you want to profile.
|
|
18
|
+
|
|
19
|
+
For a full list of options, see the RubyProf::Cmd documentation or execute the following command:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
ruby-prof -h
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Convenience API
|
|
26
|
+
|
|
27
|
+
The second way to use ruby-prof is via its convenience API. This requires small modifications to the program you want to profile:
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
require 'ruby-prof'
|
|
31
|
+
|
|
32
|
+
profile = RubyProf::Profile.new
|
|
33
|
+
|
|
34
|
+
# profile the code
|
|
35
|
+
profile.start
|
|
36
|
+
# ... code to profile ...
|
|
37
|
+
result = profile.stop
|
|
38
|
+
|
|
39
|
+
# print a flat profile to text
|
|
40
|
+
printer = RubyProf::FlatPrinter.new(result)
|
|
41
|
+
printer.print(STDOUT)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Alternatively, you can use a block to tell ruby-prof what to profile:
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
require 'ruby-prof'
|
|
48
|
+
|
|
49
|
+
# profile the code
|
|
50
|
+
result = RubyProf::Profile.profile do
|
|
51
|
+
# ... code to profile ...
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# print a graph profile to text
|
|
55
|
+
printer = RubyProf::GraphPrinter.new(result)
|
|
56
|
+
printer.print(STDOUT)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
ruby-prof also supports pausing and resuming profiling runs.
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
require 'ruby-prof'
|
|
63
|
+
|
|
64
|
+
profile = RubyProf::Profile.new
|
|
65
|
+
|
|
66
|
+
# profile the code
|
|
67
|
+
profile.start
|
|
68
|
+
# ... code to profile ...
|
|
69
|
+
|
|
70
|
+
profile.pause
|
|
71
|
+
# ... other code ...
|
|
72
|
+
|
|
73
|
+
profile.resume
|
|
74
|
+
# ... code to profile ...
|
|
75
|
+
|
|
76
|
+
result = profile.stop
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Note that resume will only work if start has been called previously. In addition, resume can also take a block:
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
require 'ruby-prof'
|
|
83
|
+
|
|
84
|
+
profile = RubyProf::Profile.new
|
|
85
|
+
|
|
86
|
+
# profile the code
|
|
87
|
+
profile.start
|
|
88
|
+
# ... code to profile ...
|
|
89
|
+
|
|
90
|
+
profile.pause
|
|
91
|
+
# ... other code ...
|
|
92
|
+
|
|
93
|
+
profile.resume do
|
|
94
|
+
# ... code to profile...
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
result = profile.stop
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
With this usage, resume will automatically call pause at the end of the block.
|
|
101
|
+
|
|
102
|
+
The `RubyProf::Profile.profile` method can take various options, which are described in [Profiling Options](advanced-usage.md#profiling-options).
|
|
103
|
+
|
|
104
|
+
## Core API
|
|
105
|
+
|
|
106
|
+
The convenience API is a wrapper around the `RubyProf::Profile` class. Using the Profile class directly provides additional functionality, such as [method exclusion](advanced-usage.md#method-exclusion).
|
|
107
|
+
|
|
108
|
+
To create a new profile:
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
require 'ruby-prof'
|
|
112
|
+
|
|
113
|
+
profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME)
|
|
114
|
+
result = profile.profile do
|
|
115
|
+
...
|
|
116
|
+
end
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Once a profile is completed, you can either generate a [report](reports.md) via a printer or [save](advanced-usage.md#saving-results) the results for later analysis. For a list of profiling options, please see the [Profiling Options](advanced-usage.md#profiling-options) section.
|
|
120
|
+
If you are unsure which report to generate first, see [Report Types](reports.md#report-types).
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
However, using ruby-prof also comes with two caveats:
|
|
124
|
+
|
|
125
|
+
- To use ruby-prof you generally need to include a few lines of extra code in your program (although see [command line usage](getting-started.md#command-line))
|
|
126
|
+
- Using ruby-prof will cause your program to run slower (see [Performance](index.md#performance) section)
|
|
127
|
+
|
|
128
|
+
Most of the time, these two caveats are acceptable. But if you need to determine why a program running in production is slow or hung, a sampling profiler will be a better choice. Excellent choices include [stackprof](https://github.com/tmm1/stackprof) or [rbspy](https://rbspy.github.io/).
|
|
129
|
+
|
|
130
|
+
If you are just interested in memory usage, you may also want to checkout the [memory_profiler](https://github.com/SamSaffron/memory_profiler) gem (although ruby-prof provides similar information).
|
data/docs/history.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# History
|
|
2
|
+
|
|
3
|
+
For a full list of changes between versions, see the [Changelog](changelog.md).
|
|
4
|
+
|
|
5
|
+
The first version of ruby-prof, 0.1.1, was released on March 22, 2005 by [Shugo Maeda](https://shugo.net/) The original [source](https://shugo.net/archive/ruby-prof/) code is still available on his website (it is not actually in the git history). ruby-prof was a vast improvement at the time, running 30 times faster as the original ruby profiler.
|
|
6
|
+
|
|
7
|
+
Version [0.4.0](https://rubygems.org/gems/ruby-prof/versions/0.4.0) was the first version packaged as a Ruby gem. Version 0.4.0 also introduced Windows support, thread support and added a number of additional reports such as the graph report in HTML and the call graph report.
|
|
8
|
+
|
|
9
|
+
A number of versions were subsequently released, with a 1.0.0 [release](https://cfis.savagexi.com/2019/07/29/ruby-prof-1-0/) finally happening in July of 2019. Version 1.0.0 was a major rewrite that significantly improved performance, correctly profiled recursive methods, redesigned reports, added allocation/memory measurement support and introduced saving and reloading profiling results. Since then ruby-prof has continued to evolve along with Ruby with 19 releases.
|
|
10
|
+
|
|
11
|
+
Version 2.0.0 will mark the 20th release of ruby-prof since the 1.0.0 release. Version 2.0.0 supports Ruby 4 and includes new flame/icicle graph support, revamped reports and improved documentation. The reason for the 2.0.0 jump is because profiling memory sizes has been removed due to changes in Ruby 4.0.0. In addition, the old compatibility API was also removed.
|
data/docs/index.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# ruby-prof
|
|
2
|
+
|
|
3
|
+
ruby-prof is a [tracing](./alternatives.md#tracing-vs-sampling) profiler for MRI Ruby with a long [history](./history.md) that dates back to 2005! Its features include:
|
|
4
|
+
|
|
5
|
+
- Measurement Modes - ruby-prof can measure program [wall time](advanced-usage.md#wall-time), [process time](advanced-usage.md#process-time) and [object allocations](advanced-usage.md#object-allocations).
|
|
6
|
+
- Reports - ruby-prof can generate [flat](reports.md#flat), [graph (text)](reports.md#graph-text), [graph (HTML)](reports.md#graph-html), [flame graph](reports.md#flame-graph), [call stack](reports.md#call-stack), [graphviz](reports.md#graphviz), [cachegrind](reports.md#cachegrind), and [call info](reports.md#call-info-report) reports.
|
|
7
|
+
- Threads - supports profiling multiple threads simultaneously.
|
|
8
|
+
- Fibers - supports profiling multiple fibers simultaneously.
|
|
9
|
+
- Merging - supports merging results across fibers or threads
|
|
10
|
+
- Recursive - supports profiling recursive methods
|
|
11
|
+
|
|
12
|
+

|
|
13
|
+
|
|
14
|
+
## Why ruby-prof?
|
|
15
|
+
|
|
16
|
+
ruby-prof is helpful if your program is slow and you want to know why! It can help you track down methods that are either slow or allocate a large number of objects. Often times the results will surprise you - when profiling what you think you know almost always turns out to be wrong.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
To install ruby-prof:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
gem install ruby-prof
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
If you are running Linux or Unix you'll need to have a C compiler installed so the extension can be built when it is installed. If you are running Windows, then you should install the Windows specific gem or install [devkit](https://rubyinstaller.org/add-ons/devkit.html).
|
|
26
|
+
|
|
27
|
+
ruby-prof requires Ruby 3.2.0 or higher. If you need to work with older Ruby versions then you can download an older version of ruby-prof.
|
|
28
|
+
|
|
29
|
+
## Performance
|
|
30
|
+
ruby-prof is a tracing profiler, not a sampling profiler, and thus will cause your program to run slower. Our tests show that the overhead varies considerably based on the code being profiled. Significant effort has been put into reducing this overhead, but most programs will run approximately twice as slow while highly recursive programs (like the fibonacci series test) may run up to five times slower.
|
|
31
|
+
|
|
32
|
+
## History
|
|
33
|
+
ruby-prof has been under continuous development since 2005 — see the full [History](history.md) page.
|
|
34
|
+
|
|
35
|
+
## API Documentation
|
|
36
|
+
|
|
37
|
+
API documentation for each class is available at the [ruby-prof API docs](https://ruby-prof.github.io/doc/index.html).
|
|
38
|
+
|
|
39
|
+
## License
|
|
40
|
+
|
|
41
|
+
See [LICENSE](../LICENSE) for license information.
|
|
42
|
+
|
|
43
|
+
## Development
|
|
44
|
+
|
|
45
|
+
Code is located at [github.com/ruby-prof/ruby-prof](https://github.com/ruby-prof/ruby-prof).
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Profiling Rails
|
|
2
|
+
|
|
3
|
+
To profile a Rails application it is vital to run it using production-like settings (cache classes, cache view lookups, etc.). Otherwise, Rails dependency loading code will overwhelm any time spent in the application itself (our tests show that Rails dependency loading causes a roughly 6x slowdown). The best way to do this is to create a new Rails environment, `profile`.
|
|
4
|
+
|
|
5
|
+
To profile Rails:
|
|
6
|
+
|
|
7
|
+
1. Add ruby-prof to your Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
group :profile do
|
|
11
|
+
gem 'ruby-prof'
|
|
12
|
+
end
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Then install it:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
bundle install
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
2. Create `config/environments/profile.rb` with production-like settings and the ruby-prof middleware:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
# config/environments/profile.rb
|
|
25
|
+
require_relative "production"
|
|
26
|
+
|
|
27
|
+
Rails.application.configure do
|
|
28
|
+
# Optional: reduce noise while profiling.
|
|
29
|
+
config.log_level = :warn
|
|
30
|
+
|
|
31
|
+
# Optional: disable controller/view caching if you want raw app execution timing.
|
|
32
|
+
config.action_controller.perform_caching = false
|
|
33
|
+
|
|
34
|
+
config.middleware.use Rack::RubyProf, path: Rails.root.join("tmp/profile")
|
|
35
|
+
end
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
By default the rack adapter generates flat text, graph text, graph HTML, and call stack HTML reports.
|
|
39
|
+
|
|
40
|
+
3. Start Rails in the profile environment:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
bin/rails server -e profile
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
You can run a console in the same environment with:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
bin/rails console -e profile
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
4. Make a request to generate profile output:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
curl http://127.0.0.1:3000/
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
5. Inspect reports in `tmp/profile`:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
ls -1 tmp/profile
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Reports are generated per request path. Repeating the same request path overwrites the previous report files for that path.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# A small synthetic workload for demonstrating ruby-prof reports.
|
|
2
|
+
# word_freq.rb
|
|
3
|
+
|
|
4
|
+
def normalize(text)
|
|
5
|
+
text.downcase.gsub(/[^a-z\s]/, "")
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def tokenize(text)
|
|
9
|
+
text.split(/\s+/)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def count_words(words)
|
|
13
|
+
counts = Hash.new(0)
|
|
14
|
+
words.each { |w| counts[w] += 1 }
|
|
15
|
+
counts
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def top_words(counts, n = 10)
|
|
19
|
+
counts.sort_by { |_, v| -v }.take(n)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def run_example
|
|
23
|
+
text = <<~EOS * 200
|
|
24
|
+
Ruby is a dynamic, open source programming language with a focus on
|
|
25
|
+
simplicity and productivity. It has an elegant syntax that is natural
|
|
26
|
+
to read and easy to write.
|
|
27
|
+
EOS
|
|
28
|
+
|
|
29
|
+
normalized = normalize(text)
|
|
30
|
+
tokens = tokenize(normalized)
|
|
31
|
+
counts = count_words(tokens)
|
|
32
|
+
top = top_words(counts)
|
|
33
|
+
end
|