pf2 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +11 -0
- data/Rakefile +9 -2
- data/doc/development.md +11 -0
- data/examples/mandelbrot.rb +69 -0
- data/examples/mandelbrot_ractor.rb +77 -0
- data/ext/pf2/build.rs +7 -0
- data/ext/pf2/src/ruby_c_api_helper.c +6 -0
- data/ext/pf2/src/serialization/profile.rs +1 -0
- data/ext/pf2/src/serialization/serializer.rs +4 -0
- data/ext/pf2/src/signal_scheduler.rs +1 -1
- data/ext/pf2/src/util.rs +2 -1
- data/ext/pf2c/backtrace_state.c +10 -0
- data/ext/pf2c/backtrace_state.h +10 -0
- data/ext/pf2c/configuration.c +90 -0
- data/ext/pf2c/configuration.h +23 -0
- data/ext/pf2c/extconf.rb +21 -0
- data/ext/pf2c/pf2.c +17 -0
- data/ext/pf2c/pf2.h +8 -0
- data/ext/pf2c/ringbuffer.c +74 -0
- data/ext/pf2c/ringbuffer.h +24 -0
- data/ext/pf2c/sample.c +70 -0
- data/ext/pf2c/sample.h +22 -0
- data/ext/pf2c/serializer.c +377 -0
- data/ext/pf2c/serializer.h +58 -0
- data/ext/pf2c/session.c +344 -0
- data/ext/pf2c/session.h +51 -0
- data/lib/pf2/cli.rb +33 -2
- data/lib/pf2/reporter/annotate.rb +101 -0
- data/lib/pf2/reporter/firefox_profiler.rb +1 -1
- data/lib/pf2/reporter/firefox_profiler_ser2.rb +308 -0
- data/lib/pf2/reporter.rb +2 -0
- data/lib/pf2/version.rb +1 -1
- data/vendor/libbacktrace/.gitignore +5 -0
- data/vendor/libbacktrace/Isaac.Newton-Opticks.txt +9286 -0
- data/vendor/libbacktrace/LICENSE +29 -0
- data/vendor/libbacktrace/Makefile.am +708 -0
- data/vendor/libbacktrace/Makefile.in +2820 -0
- data/vendor/libbacktrace/README.md +46 -0
- data/vendor/libbacktrace/aclocal.m4 +864 -0
- data/vendor/libbacktrace/alloc.c +167 -0
- data/vendor/libbacktrace/allocfail.c +136 -0
- data/vendor/libbacktrace/allocfail.sh +104 -0
- data/vendor/libbacktrace/atomic.c +113 -0
- data/vendor/libbacktrace/backtrace-supported.h.in +66 -0
- data/vendor/libbacktrace/backtrace.c +129 -0
- data/vendor/libbacktrace/backtrace.h +189 -0
- data/vendor/libbacktrace/btest.c +517 -0
- data/vendor/libbacktrace/compile +348 -0
- data/vendor/libbacktrace/config/enable.m4 +38 -0
- data/vendor/libbacktrace/config/lead-dot.m4 +31 -0
- data/vendor/libbacktrace/config/libtool.m4 +7545 -0
- data/vendor/libbacktrace/config/ltoptions.m4 +369 -0
- data/vendor/libbacktrace/config/ltsugar.m4 +123 -0
- data/vendor/libbacktrace/config/ltversion.m4 +23 -0
- data/vendor/libbacktrace/config/lt~obsolete.m4 +98 -0
- data/vendor/libbacktrace/config/multi.m4 +68 -0
- data/vendor/libbacktrace/config/override.m4 +117 -0
- data/vendor/libbacktrace/config/unwind_ipinfo.m4 +37 -0
- data/vendor/libbacktrace/config/warnings.m4 +227 -0
- data/vendor/libbacktrace/config.guess +1700 -0
- data/vendor/libbacktrace/config.h.in +185 -0
- data/vendor/libbacktrace/config.sub +1885 -0
- data/vendor/libbacktrace/configure +15952 -0
- data/vendor/libbacktrace/configure.ac +642 -0
- data/vendor/libbacktrace/dwarf.c +4593 -0
- data/vendor/libbacktrace/edtest.c +120 -0
- data/vendor/libbacktrace/edtest2.c +43 -0
- data/vendor/libbacktrace/elf.c +7471 -0
- data/vendor/libbacktrace/fileline.c +407 -0
- data/vendor/libbacktrace/filenames.h +52 -0
- data/vendor/libbacktrace/filetype.awk +13 -0
- data/vendor/libbacktrace/install-debuginfo-for-buildid.sh.in +65 -0
- data/vendor/libbacktrace/install-sh +501 -0
- data/vendor/libbacktrace/instrumented_alloc.c +114 -0
- data/vendor/libbacktrace/internal.h +428 -0
- data/vendor/libbacktrace/ltmain.sh +8636 -0
- data/vendor/libbacktrace/macho.c +1361 -0
- data/vendor/libbacktrace/missing +215 -0
- data/vendor/libbacktrace/mmap.c +331 -0
- data/vendor/libbacktrace/mmapio.c +110 -0
- data/vendor/libbacktrace/move-if-change +83 -0
- data/vendor/libbacktrace/mtest.c +410 -0
- data/vendor/libbacktrace/nounwind.c +66 -0
- data/vendor/libbacktrace/pecoff.c +1123 -0
- data/vendor/libbacktrace/posix.c +104 -0
- data/vendor/libbacktrace/print.c +117 -0
- data/vendor/libbacktrace/read.c +110 -0
- data/vendor/libbacktrace/simple.c +108 -0
- data/vendor/libbacktrace/sort.c +108 -0
- data/vendor/libbacktrace/state.c +72 -0
- data/vendor/libbacktrace/stest.c +137 -0
- data/vendor/libbacktrace/test-driver +148 -0
- data/vendor/libbacktrace/test_format.c +55 -0
- data/vendor/libbacktrace/testlib.c +234 -0
- data/vendor/libbacktrace/testlib.h +110 -0
- data/vendor/libbacktrace/ttest.c +161 -0
- data/vendor/libbacktrace/unittest.c +92 -0
- data/vendor/libbacktrace/unknown.c +65 -0
- data/vendor/libbacktrace/xcoff.c +1617 -0
- data/vendor/libbacktrace/xztest.c +508 -0
- data/vendor/libbacktrace/zstdtest.c +523 -0
- data/vendor/libbacktrace/ztest.c +541 -0
- metadata +122 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a0dc2d338a482c56472ec6279cc4fd111c4263a540ab00872d57439a14244af
|
4
|
+
data.tar.gz: 56bdc8e81a1d4bccee07d7785eec713329c8ec4b9e659f2b494e12e8e016e60a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43449c0433cfdc390aecf5335f2a8631d11a0a41ac3afc6fc469909e5fd28c45bde16de8fbcff5229c16551d26a2b47210ff2342dea5b190a47ca128e9656c80
|
7
|
+
data.tar.gz: e6f5f2481932100cd82504dcb3b24e71b123b1263a2588b584cf5e72e53817f2004c7ffe6e96ff12f9815975ddfeac679219468a1af9223f6b5a367b3ec0b5e6
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.9.0] - 2025-03-22
|
4
|
+
|
5
|
+
## Added
|
6
|
+
|
7
|
+
- `pf2 annotate` command
|
8
|
+
- A new sample collection backend implemented in C
|
9
|
+
|
10
|
+
## Changed
|
11
|
+
|
12
|
+
- Set SA_RESTART flag to reduce EINTRs in profiled code
|
13
|
+
|
14
|
+
## [0.8.0] - 2025-01-27
|
15
|
+
|
16
|
+
## Added
|
17
|
+
|
18
|
+
- The new serializer (Ser2) is now available in `Pf2::Session#start` through the `use_experimental_serializer` option.
|
19
|
+
- This serializer is more efficient and has a smaller memory footprint than the default serializer.
|
20
|
+
- Ser2 still lacks some features, such as weaving of native stacks.
|
21
|
+
|
3
22
|
## [0.7.1] - 2025-01-02
|
4
23
|
|
5
24
|
### Fixed
|
data/README.md
CHANGED
@@ -61,6 +61,12 @@ Profiles can be visualized using the [Firefox Profiler](https://profiler.firefox
|
|
61
61
|
$ pf2 report -o report.json my_program.pf2profile
|
62
62
|
```
|
63
63
|
|
64
|
+
Alternatively, `pf2 annotate` can be used to display hit counts side-by-side with source code.
|
65
|
+
|
66
|
+
```console
|
67
|
+
$ pf2 annotate my_program.pf2prof
|
68
|
+
```
|
69
|
+
|
64
70
|
### Configuration
|
65
71
|
|
66
72
|
Pf2 accepts the following configuration keys:
|
@@ -129,6 +135,11 @@ Future Plans
|
|
129
135
|
- Implement a "tracing" scheduler, using the C TracePoint API
|
130
136
|
- more
|
131
137
|
|
138
|
+
Development
|
139
|
+
--------
|
140
|
+
|
141
|
+
See [doc/development.md](doc/development.md).
|
142
|
+
|
132
143
|
|
133
144
|
License
|
134
145
|
--------
|
data/Rakefile
CHANGED
@@ -4,8 +4,15 @@ require 'minitest/test_task'
|
|
4
4
|
|
5
5
|
task default: %i[]
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
if ENV['PF2_PF2C'] == '1'
|
8
|
+
Rake::ExtensionTask.new 'pf2c' do |ext|
|
9
|
+
ext.name = 'pf2'
|
10
|
+
ext.lib_dir = 'lib/pf2'
|
11
|
+
end
|
12
|
+
else
|
13
|
+
Rake::ExtensionTask.new 'pf2' do |ext|
|
14
|
+
ext.lib_dir = 'lib/pf2'
|
15
|
+
end
|
9
16
|
end
|
10
17
|
|
11
18
|
Minitest::TestTask.create(:test) do |t|
|
data/doc/development.md
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# mandelbrot
|
2
|
+
#
|
3
|
+
# Generate a Mandelbrot set image using multiple threads.
|
4
|
+
|
5
|
+
require 'bundler/inline'
|
6
|
+
|
7
|
+
gemfile do
|
8
|
+
source 'https://rubygems.org'
|
9
|
+
gem 'chunky_png'
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'pf2'
|
13
|
+
|
14
|
+
def mandelbrot_pixel(x, y, width, height, max_iter)
|
15
|
+
real_part = (x - width / 2.0) * 4.0 / width
|
16
|
+
imag_part = (y - height / 2.0) * 4.0 / height
|
17
|
+
|
18
|
+
c = Complex(real_part, imag_part)
|
19
|
+
z = 0
|
20
|
+
iter = 0
|
21
|
+
|
22
|
+
while iter < max_iter && z.magnitude <= 2
|
23
|
+
z = z * z + c
|
24
|
+
iter += 1
|
25
|
+
end
|
26
|
+
|
27
|
+
iter
|
28
|
+
end
|
29
|
+
|
30
|
+
def generate_mandelbrot_image(width, height, max_iter, num_threads)
|
31
|
+
image = ChunkyPNG::Image.new(width, height, ChunkyPNG::Color::TRANSPARENT)
|
32
|
+
threads = []
|
33
|
+
num_threads.times do |thread_id|
|
34
|
+
threads << Thread.new(thread_id) do |tid|
|
35
|
+
start_row = tid * (height / num_threads)
|
36
|
+
end_row = (tid + 1) * (height / num_threads)
|
37
|
+
|
38
|
+
(start_row...end_row).each do |y|
|
39
|
+
width.times do |x|
|
40
|
+
color_value = mandelbrot_pixel(x, y, width, height, max_iter)
|
41
|
+
color = ChunkyPNG::Color.grayscale(color_value * 255 / max_iter)
|
42
|
+
image[x, y] = color
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
threads.each(&:join)
|
48
|
+
image
|
49
|
+
end
|
50
|
+
|
51
|
+
# Parameters
|
52
|
+
width = 800
|
53
|
+
height = 800
|
54
|
+
max_iter = 1000
|
55
|
+
threads = 16
|
56
|
+
|
57
|
+
puts "width: #{width}, height: #{height}, max_iter: #{max_iter}, threads: #{threads}"
|
58
|
+
|
59
|
+
Pf2.start
|
60
|
+
|
61
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
62
|
+
generate_mandelbrot_image(width, height, max_iter, threads)
|
63
|
+
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
64
|
+
|
65
|
+
profile = Pf2.stop
|
66
|
+
File.binwrite("mandelbrot.pf2prof", Marshal.dump(profile))
|
67
|
+
|
68
|
+
elapsed = end_time - start_time
|
69
|
+
puts "Complete in #{elapsed} seconds"
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# mandelbrot_ractor
|
2
|
+
#
|
3
|
+
# This script demonstrates how to profile a Ruby program that uses Ractors.
|
4
|
+
|
5
|
+
require 'bundler/inline'
|
6
|
+
|
7
|
+
gemfile do
|
8
|
+
source 'https://rubygems.org'
|
9
|
+
gem 'chunky_png'
|
10
|
+
end
|
11
|
+
|
12
|
+
def mandelbrot_pixel(x, y, width, height, max_iter)
|
13
|
+
real_part = (x - width / 2.0) * 4.0 / width
|
14
|
+
imag_part = (y - height / 2.0) * 4.0 / height
|
15
|
+
|
16
|
+
c = Complex(real_part, imag_part)
|
17
|
+
z = 0
|
18
|
+
iter = 0
|
19
|
+
|
20
|
+
while iter < max_iter && z.magnitude <= 2
|
21
|
+
z = z * z + c
|
22
|
+
iter += 1
|
23
|
+
end
|
24
|
+
|
25
|
+
iter
|
26
|
+
end
|
27
|
+
|
28
|
+
def generate_mandelbrot_image(width, height, max_iter, num_ractors)
|
29
|
+
ractors = []
|
30
|
+
num_ractors.times do |ractor_id|
|
31
|
+
ractors << Ractor.new(width, height, max_iter, num_ractors, ractor_id) do |width, height, max_iter, num_ractors, rid|
|
32
|
+
image = ChunkyPNG::Image.new(width, height, ChunkyPNG::Color::TRANSPARENT)
|
33
|
+
|
34
|
+
start_row = rid * (height / num_ractors)
|
35
|
+
end_row = (rid + 1) * (height / num_ractors)
|
36
|
+
|
37
|
+
(start_row...end_row).each do |y|
|
38
|
+
width.times do |x|
|
39
|
+
color_value = mandelbrot_pixel(x, y, width, height, max_iter)
|
40
|
+
color = ChunkyPNG::Color.grayscale(color_value * 255 / max_iter)
|
41
|
+
image[x, y] = color
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Ractor.yield image
|
46
|
+
end
|
47
|
+
end
|
48
|
+
image_parts = ractors.map(&:take)
|
49
|
+
|
50
|
+
# Merge image_parts into a single image
|
51
|
+
image = ChunkyPNG::Image.new(width, height, ChunkyPNG::Color::TRANSPARENT)
|
52
|
+
image_parts.each do |image_part|
|
53
|
+
image_part.height.times do |y|
|
54
|
+
image_part.width.times do |x|
|
55
|
+
if !image_part[x, y].nil?
|
56
|
+
image[x, y] = image_part[x, y]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
image
|
62
|
+
end
|
63
|
+
|
64
|
+
# Parameters
|
65
|
+
width = 800
|
66
|
+
height = 800
|
67
|
+
max_iter = 1000
|
68
|
+
ractors = 4
|
69
|
+
|
70
|
+
puts "width: #{width}, height: #{height}, max_iter: #{max_iter}, ractors: #{ractors}"
|
71
|
+
|
72
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
73
|
+
generate_mandelbrot_image(width, height, max_iter, ractors)
|
74
|
+
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
75
|
+
|
76
|
+
elapsed = end_time - start_time
|
77
|
+
puts "Complete in #{elapsed} seconds"
|
data/ext/pf2/build.rs
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
use std::env;
|
2
|
+
|
1
3
|
fn main() {
|
2
4
|
cc::Build::new().file("src/siginfo_t.c").compile("ccode");
|
5
|
+
cc::Build::new()
|
6
|
+
.flag(format!("-I{}", env::var("DEP_RB_RBCONFIG_RUBYHDRDIR").unwrap()).as_str())
|
7
|
+
.flag(format!("-I{}", env::var("DEP_RB_RBCONFIG_RUBYARCHHDRDIR").unwrap()).as_str())
|
8
|
+
.file("src/ruby_c_api_helper.c")
|
9
|
+
.compile("rubyhelper");
|
3
10
|
}
|
@@ -63,6 +63,7 @@ impl ProfileSerializer2 {
|
|
63
63
|
stack,
|
64
64
|
native_stack,
|
65
65
|
ruby_thread_id: Some(sample.ruby_thread),
|
66
|
+
elapsed_ns: sample.timestamp.duration_since(source.start_instant).as_nanos() as u64,
|
66
67
|
});
|
67
68
|
}
|
68
69
|
}
|
@@ -215,6 +216,8 @@ impl ProfileSerializer2 {
|
|
215
216
|
} else {
|
216
217
|
Qnil as VALUE
|
217
218
|
};
|
219
|
+
// sample[:elapsed_ns]
|
220
|
+
let elapsed_ns = rb_ull2inum(sample.elapsed_ns);
|
218
221
|
|
219
222
|
let sample_hash: VALUE = rb_hash_new();
|
220
223
|
rb_hash_aset(sample_hash, rb_id2sym(rb_intern(cstr!("stack"))), stack);
|
@@ -228,6 +231,7 @@ impl ProfileSerializer2 {
|
|
228
231
|
rb_id2sym(rb_intern(cstr!("ruby_thread_id"))),
|
229
232
|
ruby_thread_id,
|
230
233
|
);
|
234
|
+
rb_hash_aset(sample_hash, rb_id2sym(rb_intern(cstr!("elapsed_ns"))), elapsed_ns);
|
231
235
|
|
232
236
|
rb_ary_push(samples, sample_hash);
|
233
237
|
}
|
@@ -103,7 +103,7 @@ impl SignalScheduler {
|
|
103
103
|
fn install_signal_handler(&self) {
|
104
104
|
let mut sa: libc::sigaction = unsafe { mem::zeroed() };
|
105
105
|
sa.sa_sigaction = Self::signal_handler as usize;
|
106
|
-
sa.sa_flags = libc::SA_SIGINFO;
|
106
|
+
sa.sa_flags = libc::SA_SIGINFO | libc::SA_RESTART;
|
107
107
|
let err = unsafe { libc::sigaction(libc::SIGALRM, &sa, null_mut()) };
|
108
108
|
if err != 0 {
|
109
109
|
panic!("sigaction failed: {}", err);
|
data/ext/pf2/src/util.rs
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
use core::mem::transmute;
|
2
2
|
use rb_sys::*;
|
3
|
-
use std::ffi::c_void;
|
3
|
+
use std::{ffi::c_void, u128};
|
4
4
|
|
5
5
|
// Convert str literal to C string literal
|
6
6
|
macro_rules! cstr {
|
@@ -27,4 +27,5 @@ pub fn RTEST(v: VALUE) -> bool {
|
|
27
27
|
|
28
28
|
extern "C" {
|
29
29
|
pub fn extract_si_value_sival_ptr(info: *mut libc::siginfo_t) -> *mut c_void;
|
30
|
+
pub fn rb_ull2num(n: u128) -> VALUE;
|
30
31
|
}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "backtrace_state.h"
|
3
|
+
|
4
|
+
struct backtrace_state *global_backtrace_state = NULL;
|
5
|
+
|
6
|
+
void
|
7
|
+
pf2_backtrace_print_error(void *data, const char *msg, int errnum)
|
8
|
+
{
|
9
|
+
printf("libbacktrace error callback: %s (errnum %d)\n", msg, errnum);
|
10
|
+
}
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
|
4
|
+
#include "configuration.h"
|
5
|
+
|
6
|
+
static int extract_interval_ms(VALUE options_hash);
|
7
|
+
static enum pf2_time_mode extract_time_mode(VALUE options_hash);
|
8
|
+
|
9
|
+
struct pf2_configuration *
|
10
|
+
pf2_configuration_new_from_options_hash(VALUE options_hash)
|
11
|
+
{
|
12
|
+
struct pf2_configuration *config = malloc(sizeof(struct pf2_configuration));
|
13
|
+
if (!config) {
|
14
|
+
rb_raise(rb_eRuntimeError, "Failed to allocate configuration");
|
15
|
+
}
|
16
|
+
|
17
|
+
config->interval_ms = extract_interval_ms(options_hash);
|
18
|
+
config->time_mode = extract_time_mode(options_hash);
|
19
|
+
|
20
|
+
return config;
|
21
|
+
}
|
22
|
+
|
23
|
+
static int
|
24
|
+
extract_interval_ms(VALUE options_hash)
|
25
|
+
{
|
26
|
+
if (options_hash == Qnil) {
|
27
|
+
return PF2_DEFAULT_INTERVAL_MS;
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE interval_ms = rb_hash_aref(options_hash, ID2SYM(rb_intern("interval_ms")));
|
31
|
+
if (interval_ms == Qundef || interval_ms == Qnil) {
|
32
|
+
return PF2_DEFAULT_INTERVAL_MS;
|
33
|
+
}
|
34
|
+
|
35
|
+
return NUM2INT(interval_ms);
|
36
|
+
}
|
37
|
+
|
38
|
+
static enum pf2_time_mode
|
39
|
+
extract_time_mode(VALUE options_hash)
|
40
|
+
{
|
41
|
+
if (options_hash == Qnil) {
|
42
|
+
return PF2_DEFAULT_TIME_MODE;
|
43
|
+
}
|
44
|
+
|
45
|
+
VALUE time_mode = rb_hash_aref(options_hash, ID2SYM(rb_intern("time_mode")));
|
46
|
+
if (time_mode == Qundef || time_mode == Qnil) {
|
47
|
+
return PF2_DEFAULT_TIME_MODE;
|
48
|
+
}
|
49
|
+
|
50
|
+
if (time_mode == ID2SYM(rb_intern("cpu"))) {
|
51
|
+
return PF2_TIME_MODE_CPU_TIME;
|
52
|
+
} else if (time_mode == ID2SYM(rb_intern("wall"))) {
|
53
|
+
return PF2_TIME_MODE_WALL_TIME;
|
54
|
+
} else {
|
55
|
+
VALUE time_mode_str = rb_obj_as_string(time_mode);
|
56
|
+
rb_raise(rb_eArgError, "Invalid time mode: %s", StringValueCStr(time_mode_str));
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
void
|
61
|
+
pf2_configuration_free(struct pf2_configuration *config)
|
62
|
+
{
|
63
|
+
free(config);
|
64
|
+
}
|
65
|
+
|
66
|
+
VALUE
|
67
|
+
pf2_configuration_to_ruby_hash(struct pf2_configuration *config)
|
68
|
+
{
|
69
|
+
VALUE hash = rb_hash_new();
|
70
|
+
|
71
|
+
// interval_ms
|
72
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("interval_ms")), INT2NUM(config->interval_ms));
|
73
|
+
|
74
|
+
// time_mode
|
75
|
+
VALUE time_mode_sym;
|
76
|
+
switch (config->time_mode) {
|
77
|
+
case PF2_TIME_MODE_CPU_TIME:
|
78
|
+
time_mode_sym = ID2SYM(rb_intern("cpu"));
|
79
|
+
break;
|
80
|
+
case PF2_TIME_MODE_WALL_TIME:
|
81
|
+
time_mode_sym = ID2SYM(rb_intern("wall"));
|
82
|
+
break;
|
83
|
+
default:
|
84
|
+
rb_raise(rb_eRuntimeError, "Invalid time mode");
|
85
|
+
break;
|
86
|
+
}
|
87
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("time_mode")), time_mode_sym);
|
88
|
+
|
89
|
+
return hash;
|
90
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#ifndef PF2_CONFIGURATION_H
|
2
|
+
#define PF2_CONFIGURATION_H
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
enum pf2_time_mode {
|
7
|
+
PF2_TIME_MODE_CPU_TIME,
|
8
|
+
PF2_TIME_MODE_WALL_TIME,
|
9
|
+
};
|
10
|
+
|
11
|
+
struct pf2_configuration {
|
12
|
+
int interval_ms;
|
13
|
+
enum pf2_time_mode time_mode;
|
14
|
+
};
|
15
|
+
|
16
|
+
#define PF2_DEFAULT_INTERVAL_MS 9
|
17
|
+
#define PF2_DEFAULT_TIME_MODE PF2_TIME_MODE_CPU_TIME
|
18
|
+
|
19
|
+
struct pf2_configuration *pf2_configuration_new_from_options_hash(VALUE options_hash);
|
20
|
+
void pf2_configuration_free(struct pf2_configuration *config);
|
21
|
+
VALUE pf2_configuration_to_ruby_hash(struct pf2_configuration *config);
|
22
|
+
|
23
|
+
#endif // PF2_CONFIGURATION_H
|
data/ext/pf2c/extconf.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
require 'mini_portile2'
|
3
|
+
|
4
|
+
libbacktrace = MiniPortile.new('libbacktrace', '1.0.0')
|
5
|
+
libbacktrace.source_directory = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendor', 'libbacktrace'))
|
6
|
+
libbacktrace.configure_options << 'CFLAGS=-fPIC'
|
7
|
+
libbacktrace.cook
|
8
|
+
libbacktrace.mkmf_config
|
9
|
+
|
10
|
+
if !have_func('backtrace_full', 'backtrace.h')
|
11
|
+
raise 'libbacktrace has not been properly configured'
|
12
|
+
end
|
13
|
+
|
14
|
+
append_ldflags('-lrt') # for timer_create
|
15
|
+
append_cflags('-fvisibility=hidden')
|
16
|
+
append_cflags('-DPF2_DEBUG') # TODO: make this conditional
|
17
|
+
|
18
|
+
if have_func('timer_create')
|
19
|
+
$srcs = Dir.glob("#{File.join(File.dirname(__FILE__), '*.c')}")
|
20
|
+
create_makefile 'pf2/pf2'
|
21
|
+
end
|
data/ext/pf2c/pf2.c
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
#include "session.h"
|
4
|
+
|
5
|
+
VALUE rb_mPf2c;
|
6
|
+
|
7
|
+
RUBY_FUNC_EXPORTED void
|
8
|
+
Init_pf2(void)
|
9
|
+
{
|
10
|
+
rb_mPf2c = rb_define_module("Pf2c");
|
11
|
+
VALUE rb_mPf2c_cSession = rb_define_class_under(rb_mPf2c, "Session", rb_cObject);
|
12
|
+
rb_define_alloc_func(rb_mPf2c_cSession, pf2_session_alloc);
|
13
|
+
rb_define_method(rb_mPf2c_cSession, "initialize", rb_pf2_session_initialize, -1);
|
14
|
+
rb_define_method(rb_mPf2c_cSession, "start", rb_pf2_session_start, 0);
|
15
|
+
rb_define_method(rb_mPf2c_cSession, "stop", rb_pf2_session_stop, 0);
|
16
|
+
rb_define_method(rb_mPf2c_cSession, "configuration", rb_pf2_session_configuration, 0);
|
17
|
+
}
|
data/ext/pf2c/pf2.h
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#include <stdbool.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
|
4
|
+
#include "ringbuffer.h"
|
5
|
+
|
6
|
+
struct pf2_ringbuffer *
|
7
|
+
pf2_ringbuffer_new(int size) {
|
8
|
+
if (size <= 0) { return NULL; }
|
9
|
+
|
10
|
+
struct pf2_ringbuffer *ringbuf = malloc(sizeof(struct pf2_ringbuffer));
|
11
|
+
if (!ringbuf) { goto err; }
|
12
|
+
ringbuf->size = size + 1; // One extra slot is required to distinguish full from empty
|
13
|
+
ringbuf->head = 0;
|
14
|
+
ringbuf->tail = 0;
|
15
|
+
ringbuf->samples = malloc(ringbuf->size * sizeof(struct pf2_sample));
|
16
|
+
if (!ringbuf->samples) { goto err_free_ringbuf; }
|
17
|
+
return ringbuf;
|
18
|
+
|
19
|
+
err_free_ringbuf:
|
20
|
+
free(ringbuf);
|
21
|
+
err:
|
22
|
+
return NULL;
|
23
|
+
}
|
24
|
+
|
25
|
+
void
|
26
|
+
pf2_ringbuffer_free(struct pf2_ringbuffer *ringbuf) {
|
27
|
+
free(ringbuf->samples);
|
28
|
+
free(ringbuf);
|
29
|
+
}
|
30
|
+
|
31
|
+
// Returns 0 on success, 1 on failure (buffer full).
|
32
|
+
bool
|
33
|
+
pf2_ringbuffer_push(struct pf2_ringbuffer *ringbuf, struct pf2_sample *sample) {
|
34
|
+
// Tail is only modified by the producer thread (us), so relaxed ordering is sufficient
|
35
|
+
const int current_tail = atomic_load_explicit(&ringbuf->tail, memory_order_relaxed);
|
36
|
+
const int next_tail = (current_tail + 1) % ringbuf->size;
|
37
|
+
|
38
|
+
// Check head to see if buffer is full. If next_tail == head, the buffer is full.
|
39
|
+
// Use acquire ordering to synchronize with the head update in pf2_ringbuffer_pop().
|
40
|
+
// This ensures we see the latest head value.
|
41
|
+
if (next_tail == atomic_load_explicit(&ringbuf->head, memory_order_acquire)) {
|
42
|
+
return false; // Buffer full
|
43
|
+
}
|
44
|
+
|
45
|
+
// Copy the sample from the provided input pointer to the buffer.
|
46
|
+
ringbuf->samples[current_tail] = *sample;
|
47
|
+
|
48
|
+
// Use release ordering when updating tail to ensure the sample write is visible
|
49
|
+
// to the consumer before they see the new tail value
|
50
|
+
atomic_store_explicit(&ringbuf->tail, next_tail, memory_order_release);
|
51
|
+
return true;
|
52
|
+
}
|
53
|
+
|
54
|
+
// Returns 0 on success, 1 on failure (buffer empty).
|
55
|
+
bool
|
56
|
+
pf2_ringbuffer_pop(struct pf2_ringbuffer *ringbuf, struct pf2_sample *out) {
|
57
|
+
// Head won't be modifed by the producer thread. It is safe to use relaxed ordering.
|
58
|
+
const int current_head = atomic_load_explicit(&ringbuf->head, memory_order_relaxed);
|
59
|
+
|
60
|
+
// Check tail to see if buffer is empty. If head == tail, the buffer is empty.
|
61
|
+
// Use acquire ordering to synchronize with the tail update in pf2_ringbuffer_push().
|
62
|
+
// This ensures we see the latest tail value.
|
63
|
+
if (current_head == atomic_load_explicit(&ringbuf->tail, memory_order_acquire)) {
|
64
|
+
return false; // Buffer empty
|
65
|
+
}
|
66
|
+
|
67
|
+
// Copy the sample from the buffer to the provided output pointer.
|
68
|
+
*out = ringbuf->samples[current_head];
|
69
|
+
|
70
|
+
// Use release ordering when updating head to ensure the sample read is complete
|
71
|
+
// before the producer sees the new head value
|
72
|
+
atomic_store_explicit(&ringbuf->head, (current_head + 1) % ringbuf->size, memory_order_release);
|
73
|
+
return true;
|
74
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef PF2_RINGBUFFER_H
|
2
|
+
#define PF2_RINGBUFFER_H
|
3
|
+
|
4
|
+
#include <stdatomic.h>
|
5
|
+
#include <stdbool.h>
|
6
|
+
|
7
|
+
#include "sample.h"
|
8
|
+
|
9
|
+
// A lock-free ringbuffer for storing pf2_sample structs.
|
10
|
+
// Thread safe for single-producer single-consumer (SPSC) use.
|
11
|
+
struct pf2_ringbuffer {
|
12
|
+
int size;
|
13
|
+
atomic_int head;
|
14
|
+
atomic_int tail;
|
15
|
+
struct pf2_sample *samples;
|
16
|
+
};
|
17
|
+
|
18
|
+
struct pf2_ringbuffer * pf2_ringbuffer_new(int size);
|
19
|
+
void pf2_ringbuffer_free(struct pf2_ringbuffer *ringbuf);
|
20
|
+
// async-signal-safe
|
21
|
+
bool pf2_ringbuffer_push(struct pf2_ringbuffer *ringbuf, struct pf2_sample *sample);
|
22
|
+
bool pf2_ringbuffer_pop(struct pf2_ringbuffer *ringbuf, struct pf2_sample *out);
|
23
|
+
|
24
|
+
#endif // RINGBUFFER_H
|
data/ext/pf2c/sample.c
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#include <stdbool.h>
|
2
|
+
#include <time.h>
|
3
|
+
|
4
|
+
#include <backtrace.h>
|
5
|
+
#include <ruby.h>
|
6
|
+
#include <ruby/debug.h>
|
7
|
+
|
8
|
+
#include "backtrace_state.h"
|
9
|
+
#include "sample.h"
|
10
|
+
|
11
|
+
const int PF2_SAMPLE_MAX_NATIVE_DEPTH = 300;
|
12
|
+
|
13
|
+
static int capture_native_backtrace(struct pf2_sample *sample);
|
14
|
+
static int backtrace_on_ok(void *data, uintptr_t pc);
|
15
|
+
|
16
|
+
// Capture a sample from the current thread.
|
17
|
+
bool
|
18
|
+
pf2_sample_capture(struct pf2_sample *sample)
|
19
|
+
{
|
20
|
+
// Record the current time
|
21
|
+
struct timespec now;
|
22
|
+
clock_gettime(CLOCK_MONOTONIC, &now);
|
23
|
+
sample->timestamp_ns = (uint64_t)now.tv_sec * 1000000000ULL + (uint64_t)now.tv_nsec;
|
24
|
+
|
25
|
+
// Obtain the current stack from Ruby
|
26
|
+
sample->depth = rb_profile_frames(0, 200, sample->cmes, sample->linenos);
|
27
|
+
|
28
|
+
// Capture C-level backtrace
|
29
|
+
sample->native_stack_depth = capture_native_backtrace(sample);
|
30
|
+
|
31
|
+
return true;
|
32
|
+
}
|
33
|
+
|
34
|
+
// Struct to be passed to backtrace_on_ok
|
35
|
+
struct bt_data {
|
36
|
+
struct pf2_sample *pf2_sample;
|
37
|
+
int index;
|
38
|
+
};
|
39
|
+
|
40
|
+
static int
|
41
|
+
capture_native_backtrace(struct pf2_sample *sample)
|
42
|
+
{
|
43
|
+
struct backtrace_state *state = global_backtrace_state;
|
44
|
+
assert(state != NULL);
|
45
|
+
|
46
|
+
struct bt_data data;
|
47
|
+
data.pf2_sample = sample;
|
48
|
+
data.index = 0;
|
49
|
+
|
50
|
+
// Capture the current PC
|
51
|
+
// Skip the first 2 frames (capture_native_backtrace, sigprof_handler)
|
52
|
+
backtrace_simple(state, 2, backtrace_on_ok, pf2_backtrace_print_error, &data);
|
53
|
+
|
54
|
+
return data.index;
|
55
|
+
}
|
56
|
+
|
57
|
+
static int
|
58
|
+
backtrace_on_ok(void *data, uintptr_t pc)
|
59
|
+
{
|
60
|
+
struct bt_data *bt_data = (struct bt_data *)data;
|
61
|
+
struct pf2_sample *sample = bt_data->pf2_sample;
|
62
|
+
|
63
|
+
// Store the PC value
|
64
|
+
if (bt_data->index < PF2_SAMPLE_MAX_NATIVE_DEPTH) {
|
65
|
+
sample->native_stack[bt_data->index] = pc;
|
66
|
+
bt_data->index++;
|
67
|
+
}
|
68
|
+
|
69
|
+
return 0; // Continue backtrace
|
70
|
+
}
|