pf2 0.13.0 → 1.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -85
- data/README.md +15 -33
- data/Rakefile +0 -6
- data/ext/pf2/configuration.c +0 -14
- data/ext/pf2/configuration.h +0 -3
- data/ext/pf2/extconf.rb +3 -37
- data/ext/pf2/pf2.c +8 -8
- data/ext/pf2/sample.c +3 -1
- data/ext/pf2/sample.h +4 -5
- data/ext/pf2/serializer.c +38 -121
- data/ext/pf2/serializer.h +4 -8
- data/ext/pf2/session.c +115 -292
- data/ext/pf2/session.h +5 -157
- data/lib/pf2/cli.rb +1 -1
- data/lib/pf2/reporter/stack_weaver.rb +1 -1
- data/lib/pf2/serve.rb +2 -1
- data/lib/pf2/session.rb +9 -0
- data/lib/pf2/version.rb +1 -1
- data/lib/pf2.rb +4 -16
- data/vendor/libbacktrace/atomic.c +1 -1
- data/vendor/libbacktrace/configure +4 -12
- data/vendor/libbacktrace/configure.ac +1 -6
- data/vendor/libbacktrace/elf.c +4 -4
- data/vendor/libbacktrace/fileline.c +1 -35
- data/vendor/libbacktrace/filetype.awk +0 -1
- metadata +3 -35
- data/.document +0 -3
- data/.rdoc_options +0 -6
- data/THIRD_PARTY_LICENSES.txt +0 -59
- data/ext/patches/libbacktrace/0001-Support-MACH_O_MH_BUNDLE.patch +0 -32
- data/ext/pf2/khashl.h +0 -506
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9f219b8aa5b5281a6ed662a085e00f82636ab52365d2b0f7a8d61a3e06bfc9c6
|
|
4
|
+
data.tar.gz: 162e1eae488afe17e33291f63f807642aa304d7df8069cb165e3a25ce6f10895
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37b6a1aa4f6ab0753d86983d76cbdfae4b3dfbb7464afb0388aa113d728725f09eabcabb2ae7c2ffadc5ab797be75675b5b27b465c20565d5d69f30291d4837f
|
|
7
|
+
data.tar.gz: 7b69aea55c8873cfd6e28fba3bdddd09325e4c73e1e3e44a5be48fcb9ad776b03e811c707fc049ad8ccc1590e7c011032be7f4053e3d15a5ba5b50c4344db656
|
data/CHANGELOG.md
CHANGED
|
@@ -1,87 +1,5 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
-
## [0.13.0] - 2026-01-18
|
|
4
|
-
|
|
5
|
-
### Added
|
|
6
|
-
|
|
7
|
-
- Pf2 should now have a dramatically lower memory footprint.
|
|
8
|
-
- Samples are now stored in a compact hashmap internally.
|
|
9
|
-
- See https://github.com/osyoyu/pf2/pull/85 for details.
|
|
10
|
-
|
|
11
|
-
### Fixed
|
|
12
|
-
|
|
13
|
-
- `pf2 serve` command now properly works. (Thanks @hanazuki)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
## [0.12.0] - 2026-01-09
|
|
17
|
-
|
|
18
|
-
### Added
|
|
19
|
-
|
|
20
|
-
- `Pf2.profile` now accepts the same options as `Pf2.start`.
|
|
21
|
-
- The resulting profile now has `collected_sample_count` and `dropped_sample_count` fields.
|
|
22
|
-
|
|
23
|
-
### Fixed
|
|
24
|
-
|
|
25
|
-
- Samples captured after the collector thread was stopped now get included in the profile.
|
|
26
|
-
- This shouldn't matter in practice (this all happens after `Pf2.stop` is called).
|
|
27
|
-
|
|
28
|
-
### Changed
|
|
29
|
-
|
|
30
|
-
- Accepted max stack depth is expanded to 1024 for Ruby (was 200) and 512 for native (was 300).
|
|
31
|
-
- This is not configurable, but should be sufficient for most use cases. Please open an issue if you need higher limits.
|
|
32
|
-
- Pf2.profile now accepts the same parameters as Pf2.start.
|
|
33
|
-
- Internal changes
|
|
34
|
-
- Updated libbacktrace to the latest version as of 2026/1/8.
|
|
35
|
-
- Tests are now much more stabilized.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
## [0.11.3] - 2025-12-28
|
|
39
|
-
|
|
40
|
-
This version is for testing the new release process through [Trusted Publishing](https://guides.rubygems.org/trusted-publishing/). All code is identical to 0.11.2.
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
## [0.11.2] - 2025-12-28
|
|
44
|
-
|
|
45
|
-
0.11.1 was yanked since it was accidentally published without libbacktrace vendored. Use 0.11.2.
|
|
46
|
-
|
|
47
|
-
### Fixed
|
|
48
|
-
|
|
49
|
-
- Fixed issues preventing builds on macOS.
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
## [0.11.0] - 2025-12-27
|
|
53
|
-
|
|
54
|
-
### Added
|
|
55
|
-
|
|
56
|
-
- RDoc documentation is now online - https://osyoyu.github.io/pf2/
|
|
57
|
-
- Native stack consolidation now supports LTO-ed binaries (@hanazuki)
|
|
58
|
-
|
|
59
|
-
### Changed
|
|
60
|
-
|
|
61
|
-
- `Pf2c` module is now completely removed. `Pf2c::Session` has been merged as `Pf2::Session`.
|
|
62
|
-
|
|
63
|
-
### Fixed
|
|
64
|
-
|
|
65
|
-
- Fixed an bug where the program crashes when a `Pf2::Session` is GC'd before profiling starts.
|
|
66
|
-
- Fixed an bug where the program crashes when the native stack was more than 200 frames deep.
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
## [0.10.0] - 2025-12-26
|
|
70
|
-
|
|
71
|
-
### Added
|
|
72
|
-
|
|
73
|
-
**This version contains a complete rewrite of the profiler!**
|
|
74
|
-
|
|
75
|
-
- The default sample collection backend has been switched to the new C-based backend.
|
|
76
|
-
- The previous Rust-based backed has been removed. Use v0.9.0 if you need it.
|
|
77
|
-
- macOS / non-Linux platform support!
|
|
78
|
-
- On platforms which lack `timer_create(3)` such as macOS, Pf2 now fall backs to `setitimer(3)` based sampling. This mode does not support per-thread CPU time sampling.
|
|
79
|
-
|
|
80
|
-
### Changed
|
|
81
|
-
|
|
82
|
-
- `logger` is now declared as a dependency (Ruby 4.0 compat).
|
|
83
|
-
|
|
84
|
-
|
|
85
3
|
## [0.9.0] - 2025-03-22
|
|
86
4
|
|
|
87
5
|
## Added
|
|
@@ -93,7 +11,6 @@ This version is for testing the new release process through [Trusted Publishing]
|
|
|
93
11
|
|
|
94
12
|
- Set SA_RESTART flag to reduce EINTRs in profiled code
|
|
95
13
|
|
|
96
|
-
|
|
97
14
|
## [0.8.0] - 2025-01-27
|
|
98
15
|
|
|
99
16
|
## Added
|
|
@@ -102,14 +19,12 @@ This version is for testing the new release process through [Trusted Publishing]
|
|
|
102
19
|
- This serializer is more efficient and has a smaller memory footprint than the default serializer.
|
|
103
20
|
- Ser2 still lacks some features, such as weaving of native stacks.
|
|
104
21
|
|
|
105
|
-
|
|
106
22
|
## [0.7.1] - 2025-01-02
|
|
107
23
|
|
|
108
24
|
### Fixed
|
|
109
25
|
|
|
110
26
|
- Reverted Cargo.lock version to 3 to support older versions of Rust (<1.78).
|
|
111
27
|
|
|
112
|
-
|
|
113
28
|
## [0.7.0] - 2025-01-03
|
|
114
29
|
|
|
115
30
|
### Changed
|
data/README.md
CHANGED
|
@@ -3,10 +3,6 @@ Pf2
|
|
|
3
3
|
|
|
4
4
|
A experimental sampling-based profiler for Ruby 3.3+.
|
|
5
5
|
|
|
6
|
-
- GitHub: https://github.com/osyoyu/pf2
|
|
7
|
-
- Documentation: https://osyoyu.github.io/pf2/
|
|
8
|
-
|
|
9
|
-
|
|
10
6
|
Notable Capabilites
|
|
11
7
|
--------
|
|
12
8
|
|
|
@@ -17,25 +13,6 @@ Notable Capabilites
|
|
|
17
13
|
Usage
|
|
18
14
|
--------
|
|
19
15
|
|
|
20
|
-
### Installation
|
|
21
|
-
|
|
22
|
-
You will need a C compiler to build the native extension.
|
|
23
|
-
|
|
24
|
-
Add this line to your application's Gemfile:
|
|
25
|
-
|
|
26
|
-
```ruby
|
|
27
|
-
gem 'pf2'
|
|
28
|
-
|
|
29
|
-
# When using the main branch, specify submodules: true
|
|
30
|
-
gem 'pf2', git: 'https://github.com/osyoyu/pf2.git', submodules: true
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Pf2 can be installed as a standalone CLI tool as well.
|
|
34
|
-
|
|
35
|
-
```console
|
|
36
|
-
gem install pf2
|
|
37
|
-
```
|
|
38
|
-
|
|
39
16
|
### Quickstart
|
|
40
17
|
|
|
41
18
|
Run your Ruby program through `pf2 serve`.
|
|
@@ -97,7 +74,10 @@ Pf2 accepts the following configuration keys:
|
|
|
97
74
|
```rb
|
|
98
75
|
Pf2.start(
|
|
99
76
|
interval_ms: 9, # Integer: The sampling interval in milliseconds (default: 9)
|
|
100
|
-
time_mode: :cpu,
|
|
77
|
+
time_mode: :cpu, # `:cpu` or `:wall`: The sampling timer's mode
|
|
78
|
+
# (default: `:cpu` for SignalScheduler, `:wall` for TimerThreadScheduler)
|
|
79
|
+
threads: [th1, th2], # `Array<Thread>` | `:all`: A list of Ruby Threads to be tracked.
|
|
80
|
+
# When `:all` or unspecified, Pf2 will track all active Threads.
|
|
101
81
|
)
|
|
102
82
|
```
|
|
103
83
|
|
|
@@ -128,33 +108,36 @@ Pf2 is a _sampling profiler_. This means that Pf2 collects _samples_ of program
|
|
|
128
108
|
|
|
129
109
|
Pf2 uses the `rb_profile_thread_frames()` API for sampling. When to do so is controlled by _Schedulers_, described in the following section.
|
|
130
110
|
|
|
131
|
-
###
|
|
111
|
+
### Schedulers
|
|
132
112
|
|
|
133
|
-
Schedulers determine when to execute sample collection, based on configuration (time mode and interval).
|
|
113
|
+
Schedulers determine when to execute sample collection, based on configuration (time mode and interval). Pf2 has two schedulers available.
|
|
134
114
|
|
|
135
|
-
####
|
|
115
|
+
#### SignalScheduler (Linux-only)
|
|
136
116
|
|
|
137
|
-
|
|
117
|
+
The first is the `SignalScheduler`, based on POSIX timers. Pf2 will use this scheduler when possible. SignalScheduler creates a POSIX timer for each Ruby Thread (the underlying pthread to be more accurate) using `timer_create(2)`. This leaves the actual time-keeping to the OS, which is capable of tracking accurate per-thread CPU time usage.
|
|
138
118
|
|
|
139
|
-
When the specified interval has arrived (the timer has _expired_), the OS delivers us a SIGPROF signal.
|
|
119
|
+
When the specified interval has arrived (the timer has _expired_), the OS delivers us a SIGPROF signal. This is why the scheduler is named SignalScheduler.
|
|
140
120
|
|
|
141
121
|
Signals are directed to Ruby Threads' underlying pthread, effectively "pausing" the Thread's activity. This routing is done using `SIGEV_THREAD_ID`, which is a Linux-only feature. Sample collection is done in the signal handler, which is expected to be more _accurate_, capturing the paused Thread's activity.
|
|
142
122
|
|
|
143
123
|
This scheduler heavily relies on Ruby's 1:N Thread model (1 Ruby Threads is strongly tied to a native pthread). It will not work properly in MaNy (`RUBY_MN_THREADS=1`).
|
|
144
124
|
|
|
145
|
-
####
|
|
146
|
-
|
|
147
|
-
Note: Timer thread-based scheduling has been removed in v0.10.0, when the profiling backend has been rewritten in C. This may come back in the future if needed.
|
|
125
|
+
#### TimerThreadScheduler
|
|
148
126
|
|
|
149
127
|
Another scheduler is the `TimerThreadScheduler`, which maintains a time-keeping thread by itself. A new native thread (pthread on Linux/macOS) will be created, and an infinite loop will be run inside. After `sleep(2)`-ing for the specified interval time, sampling will be queued using Ruby's Postponed Job API.
|
|
150
128
|
|
|
151
129
|
This scheduler is wall-time only, and does not support CPU-time based profiling.
|
|
152
130
|
|
|
131
|
+
#### macOS Support
|
|
132
|
+
|
|
133
|
+
On platforms where `timer_create()` is not supported (namely macOS), Pf2 falls back to `setitimer()`.
|
|
134
|
+
|
|
153
135
|
|
|
154
136
|
Wishlist
|
|
155
137
|
--------
|
|
156
138
|
|
|
157
139
|
- [Flame Scopes](https://www.brendangregg.com/flamescope.html)
|
|
140
|
+
- More unit/e2e tests
|
|
158
141
|
- more
|
|
159
142
|
|
|
160
143
|
Development
|
|
@@ -167,4 +150,3 @@ License
|
|
|
167
150
|
--------
|
|
168
151
|
|
|
169
152
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
|
170
|
-
See [LICENSE.txt](/LICENSE.txt) and [THIRD_PARTY_LICENSES.txt](/THIRD_PARTY_LICENSES.txt) for details.
|
data/Rakefile
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
require 'bundler/gem_tasks'
|
|
2
2
|
require 'rake/extensiontask'
|
|
3
3
|
require 'minitest/test_task'
|
|
4
|
-
require 'rdoc/task'
|
|
5
4
|
|
|
6
5
|
task default: %i[]
|
|
7
6
|
|
|
@@ -15,9 +14,4 @@ Minitest::TestTask.create(:test) do |t|
|
|
|
15
14
|
t.libs << "lib"
|
|
16
15
|
t.warning = false
|
|
17
16
|
t.test_globs = ["test/**/*_test.rb"]
|
|
18
|
-
t.extra_args << "--verbose"
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
RDoc::Task.new do |doc|
|
|
22
|
-
doc.rdoc_dir = "_site" # for GitHub pages
|
|
23
17
|
end
|
data/ext/pf2/configuration.c
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
#include <ruby.h>
|
|
2
2
|
#include <stdlib.h>
|
|
3
|
-
#include <stdbool.h>
|
|
4
3
|
|
|
5
4
|
#include "configuration.h"
|
|
6
5
|
|
|
7
6
|
static int extract_interval_ms(VALUE options_hash);
|
|
8
7
|
static enum pf2_time_mode extract_time_mode(VALUE options_hash);
|
|
9
|
-
static bool extract__test_no_install_timer(VALUE options_hash);
|
|
10
8
|
|
|
11
9
|
struct pf2_configuration *
|
|
12
10
|
pf2_configuration_new_from_options_hash(VALUE options_hash)
|
|
@@ -18,7 +16,6 @@ pf2_configuration_new_from_options_hash(VALUE options_hash)
|
|
|
18
16
|
|
|
19
17
|
config->interval_ms = extract_interval_ms(options_hash);
|
|
20
18
|
config->time_mode = extract_time_mode(options_hash);
|
|
21
|
-
config->_test_no_install_timer = extract__test_no_install_timer(options_hash);
|
|
22
19
|
|
|
23
20
|
return config;
|
|
24
21
|
}
|
|
@@ -60,17 +57,6 @@ extract_time_mode(VALUE options_hash)
|
|
|
60
57
|
}
|
|
61
58
|
}
|
|
62
59
|
|
|
63
|
-
static bool
|
|
64
|
-
extract__test_no_install_timer(VALUE options_hash)
|
|
65
|
-
{
|
|
66
|
-
if (options_hash == Qnil) {
|
|
67
|
-
return PF2_DEFAULT__TEST_NO_INSTALL_TIMER;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
VALUE _test_no_install_timer = rb_hash_aref(options_hash, ID2SYM(rb_intern("_test_no_install_timer")));
|
|
71
|
-
return RTEST(_test_no_install_timer);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
60
|
void
|
|
75
61
|
pf2_configuration_free(struct pf2_configuration *config)
|
|
76
62
|
{
|
data/ext/pf2/configuration.h
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
#define PF2_CONFIGURATION_H
|
|
3
3
|
|
|
4
4
|
#include <ruby.h>
|
|
5
|
-
#include <stdbool.h>
|
|
6
5
|
|
|
7
6
|
enum pf2_time_mode {
|
|
8
7
|
PF2_TIME_MODE_CPU_TIME,
|
|
@@ -12,12 +11,10 @@ enum pf2_time_mode {
|
|
|
12
11
|
struct pf2_configuration {
|
|
13
12
|
int interval_ms;
|
|
14
13
|
enum pf2_time_mode time_mode;
|
|
15
|
-
bool _test_no_install_timer; // for testing only
|
|
16
14
|
};
|
|
17
15
|
|
|
18
16
|
#define PF2_DEFAULT_INTERVAL_MS 9
|
|
19
17
|
#define PF2_DEFAULT_TIME_MODE PF2_TIME_MODE_CPU_TIME
|
|
20
|
-
#define PF2_DEFAULT__TEST_NO_INSTALL_TIMER false
|
|
21
18
|
|
|
22
19
|
struct pf2_configuration *pf2_configuration_new_from_options_hash(VALUE options_hash);
|
|
23
20
|
void pf2_configuration_free(struct pf2_configuration *config);
|
data/ext/pf2/extconf.rb
CHANGED
|
@@ -1,44 +1,10 @@
|
|
|
1
1
|
require 'mkmf'
|
|
2
2
|
require 'mini_portile2'
|
|
3
|
-
require 'fileutils'
|
|
4
|
-
require 'optparse'
|
|
5
|
-
|
|
6
|
-
gem_root = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
|
7
|
-
|
|
8
|
-
options = {
|
|
9
|
-
debug: ENV['PF2_DEBUG'] == '1',
|
|
10
|
-
}
|
|
11
|
-
option_parser = OptionParser.new do |opts|
|
|
12
|
-
opts.on('--debug[=BOOL]') do |debug|
|
|
13
|
-
options[:debug] =
|
|
14
|
-
case debug
|
|
15
|
-
when nil, "true"
|
|
16
|
-
true
|
|
17
|
-
when "false"
|
|
18
|
-
false
|
|
19
|
-
else
|
|
20
|
-
raise OptionParser::InvalidArgument, "Expected true or false for --debug"
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
option_parser.parse!(ARGV)
|
|
25
3
|
|
|
26
4
|
libbacktrace = MiniPortile.new('libbacktrace', '1.0.0')
|
|
27
|
-
libbacktrace.source_directory = File.join(
|
|
28
|
-
libbacktrace.patch_files = Dir.glob(File.join(gem_root, 'ext', 'patches', 'libbacktrace', '*.patch'))
|
|
5
|
+
libbacktrace.source_directory = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendor', 'libbacktrace'))
|
|
29
6
|
libbacktrace.configure_options << 'CFLAGS=-fPIC'
|
|
30
|
-
|
|
31
|
-
# Expand 'libbacktrace.cook' to call #patch on source_directory files
|
|
32
|
-
libbacktrace.prepare_build_directory
|
|
33
|
-
# Added: Copy source to build_directory
|
|
34
|
-
build_directory = libbacktrace.send(:work_path)
|
|
35
|
-
FileUtils.cp_r(File.join(libbacktrace.source_directory, '.'), build_directory)
|
|
36
|
-
libbacktrace.patch
|
|
37
|
-
libbacktrace.configure unless libbacktrace.configured?
|
|
38
|
-
libbacktrace.compile
|
|
39
|
-
libbacktrace.install unless libbacktrace.installed?
|
|
40
|
-
# END expand 'libbacktrace.cook'
|
|
41
|
-
|
|
7
|
+
libbacktrace.cook
|
|
42
8
|
libbacktrace.mkmf_config
|
|
43
9
|
|
|
44
10
|
if !have_func('backtrace_full', 'backtrace.h')
|
|
@@ -47,7 +13,7 @@ end
|
|
|
47
13
|
|
|
48
14
|
append_ldflags('-lrt') # for timer_create
|
|
49
15
|
append_cflags('-fvisibility=hidden')
|
|
50
|
-
append_cflags('-DPF2_DEBUG') if
|
|
16
|
+
append_cflags('-DPF2_DEBUG') if ENV['PF2_DEBUG'] == '1'
|
|
51
17
|
|
|
52
18
|
# Check for timer functions
|
|
53
19
|
have_timer_create = have_func('timer_create')
|
data/ext/pf2/pf2.c
CHANGED
|
@@ -2,16 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
#include "session.h"
|
|
4
4
|
|
|
5
|
-
VALUE
|
|
5
|
+
VALUE rb_mPf2c;
|
|
6
6
|
|
|
7
7
|
RUBY_FUNC_EXPORTED void
|
|
8
8
|
Init_pf2(void)
|
|
9
9
|
{
|
|
10
|
-
|
|
11
|
-
VALUE
|
|
12
|
-
rb_define_alloc_func(
|
|
13
|
-
rb_define_method(
|
|
14
|
-
rb_define_method(
|
|
15
|
-
rb_define_method(
|
|
16
|
-
rb_define_method(
|
|
10
|
+
rb_mPf2c = rb_define_module("Pf2c");
|
|
11
|
+
VALUE rb_mPf2c_cSession = rb_define_class_under(rb_mPf2c, "Session", rb_cObject);
|
|
12
|
+
rb_define_alloc_func(rb_mPf2c_cSession, pf2_session_alloc);
|
|
13
|
+
rb_define_method(rb_mPf2c_cSession, "initialize", rb_pf2_session_initialize, -1);
|
|
14
|
+
rb_define_method(rb_mPf2c_cSession, "start", rb_pf2_session_start, 0);
|
|
15
|
+
rb_define_method(rb_mPf2c_cSession, "stop", rb_pf2_session_stop, 0);
|
|
16
|
+
rb_define_method(rb_mPf2c_cSession, "configuration", rb_pf2_session_configuration, 0);
|
|
17
17
|
}
|
data/ext/pf2/sample.c
CHANGED
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
#include "backtrace_state.h"
|
|
10
10
|
#include "sample.h"
|
|
11
11
|
|
|
12
|
+
const int PF2_SAMPLE_MAX_NATIVE_DEPTH = 300;
|
|
13
|
+
|
|
12
14
|
static int capture_native_backtrace(struct pf2_sample *sample);
|
|
13
15
|
static int backtrace_on_ok(void *data, uintptr_t pc);
|
|
14
16
|
|
|
@@ -27,7 +29,7 @@ pf2_sample_capture(struct pf2_sample *sample)
|
|
|
27
29
|
sample->context_pthread = pthread_self();
|
|
28
30
|
|
|
29
31
|
// Obtain the current stack from Ruby
|
|
30
|
-
sample->depth = rb_profile_frames(0,
|
|
32
|
+
sample->depth = rb_profile_frames(0, 200, sample->cmes, sample->linenos);
|
|
31
33
|
|
|
32
34
|
// Capture C-level backtrace
|
|
33
35
|
sample->native_stack_depth = capture_native_backtrace(sample);
|
data/ext/pf2/sample.h
CHANGED
|
@@ -5,18 +5,17 @@
|
|
|
5
5
|
|
|
6
6
|
#include <ruby.h>
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
#define PF2_SAMPLE_MAX_NATIVE_DEPTH 512
|
|
8
|
+
extern const int PF2_SAMPLE_MAX_NATIVE_DEPTH;
|
|
10
9
|
|
|
11
10
|
struct pf2_sample {
|
|
12
11
|
pthread_t context_pthread;
|
|
13
12
|
|
|
14
13
|
int depth;
|
|
15
|
-
VALUE cmes[
|
|
16
|
-
int linenos[
|
|
14
|
+
VALUE cmes[200];
|
|
15
|
+
int linenos[200];
|
|
17
16
|
|
|
18
17
|
size_t native_stack_depth;
|
|
19
|
-
uintptr_t native_stack[
|
|
18
|
+
uintptr_t native_stack[200];
|
|
20
19
|
|
|
21
20
|
uint64_t consumed_time_ns;
|
|
22
21
|
uint64_t timestamp_ns;
|
data/ext/pf2/serializer.c
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#include <time.h>
|
|
2
2
|
#include <stdint.h>
|
|
3
3
|
#include <string.h>
|
|
4
|
-
#include <stdatomic.h>
|
|
5
4
|
|
|
6
5
|
#include <ruby.h>
|
|
7
6
|
#include <ruby/debug.h>
|
|
@@ -17,8 +16,8 @@ static struct pf2_ser_function extract_function_from_ruby_frame(VALUE frame);
|
|
|
17
16
|
static struct pf2_ser_function extract_function_from_native_pc(uintptr_t pc);
|
|
18
17
|
// static int backtrace_pcinfo_callback(void *data, uintptr_t pc, const char *filename, int lineno, const char *function);
|
|
19
18
|
static void pf2_backtrace_syminfo_callback(void *data, uintptr_t pc, const char *symname, uintptr_t symval, uintptr_t symsize);
|
|
20
|
-
static
|
|
21
|
-
static
|
|
19
|
+
static int function_index_for(struct pf2_ser *serializer, struct pf2_ser_function *function);
|
|
20
|
+
static int location_index_for(struct pf2_ser *serializer, int function_index, int32_t lineno);
|
|
22
21
|
static void ensure_samples_capacity(struct pf2_ser *serializer);
|
|
23
22
|
static void ensure_locations_capacity(struct pf2_ser *serializer);
|
|
24
23
|
static void ensure_functions_capacity(struct pf2_ser *serializer);
|
|
@@ -30,9 +29,6 @@ pf2_ser_new(void) {
|
|
|
30
29
|
ser->start_timestamp_ns = 0;
|
|
31
30
|
ser->duration_ns = 0;
|
|
32
31
|
|
|
33
|
-
ser->collected_sample_count = 0;
|
|
34
|
-
ser->dropped_sample_count = 0;
|
|
35
|
-
|
|
36
32
|
ser->samples = NULL;
|
|
37
33
|
ser->samples_count = 0;
|
|
38
34
|
ser->samples_capacity = 0;
|
|
@@ -80,125 +76,48 @@ pf2_ser_prepare(struct pf2_ser *serializer, struct pf2_session *session) {
|
|
|
80
76
|
(uint64_t)session->start_time_realtime.tv_sec * 1000000000ULL +
|
|
81
77
|
(uint64_t)session->start_time_realtime.tv_nsec;
|
|
82
78
|
serializer->duration_ns = session->duration_ns;
|
|
83
|
-
serializer->collected_sample_count =
|
|
84
|
-
atomic_load_explicit(&session->collected_sample_count, memory_order_relaxed);
|
|
85
|
-
serializer->dropped_sample_count =
|
|
86
|
-
atomic_load_explicit(&session->dropped_sample_count, memory_order_relaxed);
|
|
87
|
-
|
|
88
|
-
// ---------------------------------------------------------------------
|
|
89
|
-
// Build locations/functions from the session's interning tables
|
|
90
|
-
// ---------------------------------------------------------------------
|
|
91
|
-
size_t location_table_size = kh_size(session->location_table);
|
|
92
|
-
if (location_table_size > serializer->locations_capacity) {
|
|
93
|
-
serializer->locations_capacity = location_table_size;
|
|
94
|
-
serializer->locations = realloc(
|
|
95
|
-
serializer->locations,
|
|
96
|
-
serializer->locations_capacity * sizeof(struct pf2_ser_location)
|
|
97
|
-
);
|
|
98
|
-
}
|
|
99
79
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
int lineno = kh_key(session->location_table, k).lineno;
|
|
80
|
+
// Process samples
|
|
81
|
+
for (size_t i = 0; i < session->samples_index; i++) {
|
|
82
|
+
struct pf2_sample *sample = &session->samples[i];
|
|
83
|
+
ensure_samples_capacity(serializer);
|
|
105
84
|
|
|
106
|
-
struct
|
|
107
|
-
|
|
85
|
+
struct pf2_ser_sample *ser_sample = &serializer->samples[serializer->samples_count++];
|
|
86
|
+
ser_sample->ruby_thread_id = sample->context_pthread;
|
|
87
|
+
ser_sample->elapsed_ns = sample->timestamp_ns - serializer->start_timestamp_ns;
|
|
108
88
|
|
|
109
|
-
//
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
serializer->locations_count = location_table_size;
|
|
116
|
-
|
|
117
|
-
// ---------------------------------------------------------------------
|
|
118
|
-
// Precompute stack/native stack lookups by id for fast access
|
|
119
|
-
// ---------------------------------------------------------------------
|
|
120
|
-
size_t ruby_stack_count = kh_size(session->stack_table);
|
|
121
|
-
struct pf2_stack_key *ruby_stacks = NULL;
|
|
122
|
-
if (ruby_stack_count > 0) {
|
|
123
|
-
ruby_stacks = malloc(sizeof(struct pf2_stack_key) * ruby_stack_count);
|
|
124
|
-
kh_foreach(session->stack_table, k) {
|
|
125
|
-
size_t stack_id = kh_val(session->stack_table, k);
|
|
126
|
-
ruby_stacks[stack_id] = kh_key(session->stack_table, k);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
89
|
+
// Copy and process Ruby stack frames
|
|
90
|
+
ser_sample->stack = malloc(sizeof(size_t) * sample->depth);
|
|
91
|
+
ser_sample->stack_count = sample->depth;
|
|
92
|
+
for (int j = 0; j < sample->depth; j++) {
|
|
93
|
+
VALUE frame = sample->cmes[j];
|
|
94
|
+
int32_t lineno = sample->linenos[j];
|
|
129
95
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
native_stacks = malloc(sizeof(struct pf2_native_stack_key) * native_stack_count);
|
|
134
|
-
kh_foreach(session->native_stack_table, k) {
|
|
135
|
-
size_t stack_id = kh_val(session->native_stack_table, k);
|
|
136
|
-
native_stacks[stack_id] = kh_key(session->native_stack_table, k);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
96
|
+
struct pf2_ser_function func = extract_function_from_ruby_frame(frame);
|
|
97
|
+
size_t function_index = function_index_for(serializer, &func);
|
|
98
|
+
size_t location_index = location_index_for(serializer, function_index, lineno);
|
|
139
99
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
// ---------------------------------------------------------------------
|
|
143
|
-
size_t total_samples = 0;
|
|
144
|
-
kh_foreach(session->sample_table, k) {
|
|
145
|
-
total_samples += kh_val(session->sample_table, k).timestamps_count;
|
|
146
|
-
}
|
|
147
|
-
if (total_samples > serializer->samples_capacity) {
|
|
148
|
-
serializer->samples_capacity = total_samples;
|
|
149
|
-
serializer->samples = realloc(
|
|
150
|
-
serializer->samples,
|
|
151
|
-
serializer->samples_capacity * sizeof(struct pf2_ser_sample)
|
|
152
|
-
);
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
kh_foreach(session->sample_table, k) {
|
|
156
|
-
struct pf2_combined_stack_key ckey = kh_key(session->sample_table, k);
|
|
157
|
-
struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
|
|
158
|
-
|
|
159
|
-
const struct pf2_stack_key *ruby_stack = ruby_stacks ? &ruby_stacks[ckey.ruby_stack_id] : NULL;
|
|
160
|
-
const struct pf2_native_stack_key *native_stack = native_stacks ? &native_stacks[ckey.native_stack_id] : NULL;
|
|
161
|
-
|
|
162
|
-
for (size_t t = 0; t < stats->timestamps_count; t++) {
|
|
163
|
-
ensure_samples_capacity(serializer);
|
|
164
|
-
struct pf2_ser_sample *ser_sample = &serializer->samples[serializer->samples_count++];
|
|
165
|
-
|
|
166
|
-
ser_sample->ruby_thread_id = stats->thread_ids ? stats->thread_ids[t] : 0;
|
|
167
|
-
ser_sample->elapsed_ns = stats->timestamps[t] - serializer->start_timestamp_ns;
|
|
100
|
+
ser_sample->stack[j] = location_index;
|
|
101
|
+
}
|
|
168
102
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
for (size_t j = 0; j < ruby_stack->depth; j++) {
|
|
174
|
-
// location ids map directly to indices in serializer->locations
|
|
175
|
-
ser_sample->stack[j] = ruby_stack->frames[j];
|
|
176
|
-
}
|
|
177
|
-
} else {
|
|
178
|
-
ser_sample->stack = NULL;
|
|
179
|
-
ser_sample->stack_count = 0;
|
|
180
|
-
}
|
|
103
|
+
// Copy and process native stack frames, if any
|
|
104
|
+
if (sample->native_stack_depth > 0) {
|
|
105
|
+
ser_sample->native_stack = malloc(sizeof(size_t) * sample->native_stack_depth);
|
|
106
|
+
ser_sample->native_stack_count = sample->native_stack_depth;
|
|
181
107
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
108
|
+
for (size_t j = 0; j < sample->native_stack_depth; j++) {
|
|
109
|
+
struct pf2_ser_function func = extract_function_from_native_pc(sample->native_stack[j]);
|
|
110
|
+
size_t function_index = function_index_for(serializer, &func);
|
|
111
|
+
size_t location_index = location_index_for(serializer, function_index, 0);
|
|
186
112
|
|
|
187
|
-
|
|
188
|
-
struct pf2_ser_function func = extract_function_from_native_pc(native_stack->frames[j]);
|
|
189
|
-
size_t function_index = function_index_for(serializer, &func);
|
|
190
|
-
size_t location_index = location_index_for(serializer, function_index, 0);
|
|
191
|
-
ser_sample->native_stack[j] = location_index;
|
|
192
|
-
}
|
|
193
|
-
} else {
|
|
194
|
-
ser_sample->native_stack = NULL;
|
|
195
|
-
ser_sample->native_stack_count = 0;
|
|
113
|
+
ser_sample->native_stack[j] = location_index;
|
|
196
114
|
}
|
|
115
|
+
} else {
|
|
116
|
+
ser_sample->native_stack = NULL;
|
|
117
|
+
ser_sample->native_stack_count = 0;
|
|
197
118
|
}
|
|
198
|
-
}
|
|
199
119
|
|
|
200
|
-
|
|
201
|
-
free(native_stacks);
|
|
120
|
+
}
|
|
202
121
|
}
|
|
203
122
|
|
|
204
123
|
VALUE
|
|
@@ -208,8 +127,6 @@ pf2_ser_to_ruby_hash(struct pf2_ser *serializer) {
|
|
|
208
127
|
// Add metadata
|
|
209
128
|
rb_hash_aset(hash, ID2SYM(rb_intern("start_timestamp_ns")), ULL2NUM(serializer->start_timestamp_ns));
|
|
210
129
|
rb_hash_aset(hash, ID2SYM(rb_intern("duration_ns")), ULL2NUM(serializer->duration_ns));
|
|
211
|
-
rb_hash_aset(hash, ID2SYM(rb_intern("collected_sample_count")), ULL2NUM(serializer->collected_sample_count));
|
|
212
|
-
rb_hash_aset(hash, ID2SYM(rb_intern("dropped_sample_count")), ULL2NUM(serializer->dropped_sample_count));
|
|
213
130
|
|
|
214
131
|
// Add samples
|
|
215
132
|
VALUE samples = rb_ary_new_capa(serializer->samples_count);
|
|
@@ -228,7 +145,7 @@ pf2_ser_to_ruby_hash(struct pf2_ser *serializer) {
|
|
|
228
145
|
VALUE native_stack = rb_ary_new_capa(sample->native_stack_count);
|
|
229
146
|
if (sample->native_stack != NULL) {
|
|
230
147
|
for (size_t j = 0; j < sample->native_stack_count; j++) {
|
|
231
|
-
rb_ary_push(native_stack,
|
|
148
|
+
rb_ary_push(native_stack, ULL2NUM(sample->native_stack[j]));
|
|
232
149
|
}
|
|
233
150
|
}
|
|
234
151
|
rb_hash_aset(sample_hash, ID2SYM(rb_intern("native_stack")), native_stack);
|
|
@@ -237,7 +154,7 @@ pf2_ser_to_ruby_hash(struct pf2_ser *serializer) {
|
|
|
237
154
|
rb_hash_aset(
|
|
238
155
|
sample_hash,
|
|
239
156
|
ID2SYM(rb_intern("ruby_thread_id")),
|
|
240
|
-
|
|
157
|
+
sample->ruby_thread_id ? SIZET2NUM(sample->ruby_thread_id) : Qnil
|
|
241
158
|
);
|
|
242
159
|
rb_hash_aset(sample_hash, ID2SYM(rb_intern("elapsed_ns")), ULL2NUM(sample->elapsed_ns));
|
|
243
160
|
|
|
@@ -387,7 +304,7 @@ pf2_backtrace_syminfo_callback(void *data, uintptr_t pc, const char *symname, ui
|
|
|
387
304
|
|
|
388
305
|
// Returns the index of the function in `functions`.
|
|
389
306
|
// Calling this method will modify `serializer->profile` in place.
|
|
390
|
-
static
|
|
307
|
+
static int
|
|
391
308
|
function_index_for(struct pf2_ser *serializer, struct pf2_ser_function *function) {
|
|
392
309
|
for (size_t i = 0; i < serializer->functions_count; i++) {
|
|
393
310
|
struct pf2_ser_function *existing = &serializer->functions[i];
|
|
@@ -415,8 +332,8 @@ function_index_for(struct pf2_ser *serializer, struct pf2_ser_function *function
|
|
|
415
332
|
|
|
416
333
|
// Returns the index of the location in `locations`.
|
|
417
334
|
// Calling this method will modify `self.profile` in place.
|
|
418
|
-
static
|
|
419
|
-
location_index_for(struct pf2_ser *serializer,
|
|
335
|
+
static int
|
|
336
|
+
location_index_for(struct pf2_ser *serializer, int function_index, int32_t lineno) {
|
|
420
337
|
for (size_t i = 0; i < serializer->locations_count; i++) {
|
|
421
338
|
struct pf2_ser_location *existing = &serializer->locations[i];
|
|
422
339
|
if (existing->function_index == function_index && existing->lineno == lineno) {
|