pf2 0.11.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pf2/serializer.c CHANGED
@@ -1,6 +1,7 @@
1
1
  #include <time.h>
2
2
  #include <stdint.h>
3
3
  #include <string.h>
4
+ #include <stdatomic.h>
4
5
 
5
6
  #include <ruby.h>
6
7
  #include <ruby/debug.h>
@@ -29,6 +30,9 @@ pf2_ser_new(void) {
29
30
  ser->start_timestamp_ns = 0;
30
31
  ser->duration_ns = 0;
31
32
 
33
+ ser->collected_sample_count = 0;
34
+ ser->dropped_sample_count = 0;
35
+
32
36
  ser->samples = NULL;
33
37
  ser->samples_count = 0;
34
38
  ser->samples_capacity = 0;
@@ -76,48 +80,125 @@ pf2_ser_prepare(struct pf2_ser *serializer, struct pf2_session *session) {
76
80
  (uint64_t)session->start_time_realtime.tv_sec * 1000000000ULL +
77
81
  (uint64_t)session->start_time_realtime.tv_nsec;
78
82
  serializer->duration_ns = session->duration_ns;
83
+ serializer->collected_sample_count =
84
+ atomic_load_explicit(&session->collected_sample_count, memory_order_relaxed);
85
+ serializer->dropped_sample_count =
86
+ atomic_load_explicit(&session->dropped_sample_count, memory_order_relaxed);
87
+
88
+ // ---------------------------------------------------------------------
89
+ // Build locations/functions from the session's interning tables
90
+ // ---------------------------------------------------------------------
91
+ size_t location_table_size = kh_size(session->location_table);
92
+ if (location_table_size > serializer->locations_capacity) {
93
+ serializer->locations_capacity = location_table_size;
94
+ serializer->locations = realloc(
95
+ serializer->locations,
96
+ serializer->locations_capacity * sizeof(struct pf2_ser_location)
97
+ );
98
+ }
79
99
 
80
- // Process samples
81
- for (size_t i = 0; i < session->samples_index; i++) {
82
- struct pf2_sample *sample = &session->samples[i];
83
- ensure_samples_capacity(serializer);
84
-
85
- struct pf2_ser_sample *ser_sample = &serializer->samples[serializer->samples_count++];
86
- ser_sample->ruby_thread_id = (uintptr_t)sample->context_pthread;
87
- ser_sample->elapsed_ns = sample->timestamp_ns - serializer->start_timestamp_ns;
100
+ khint_t k;
101
+ kh_foreach(session->location_table, k) {
102
+ size_t location_id = kh_val(session->location_table, k);
103
+ VALUE cme = kh_key(session->location_table, k).cme;
104
+ int lineno = kh_key(session->location_table, k).lineno;
88
105
 
89
- // Copy and process Ruby stack frames
90
- ser_sample->stack = malloc(sizeof(size_t) * sample->depth);
91
- ser_sample->stack_count = sample->depth;
92
- for (int j = 0; j < sample->depth; j++) {
93
- VALUE frame = sample->cmes[j];
94
- int32_t lineno = sample->linenos[j];
106
+ struct pf2_ser_function func = extract_function_from_ruby_frame(cme);
107
+ size_t function_index = function_index_for(serializer, &func);
95
108
 
96
- struct pf2_ser_function func = extract_function_from_ruby_frame(frame);
97
- size_t function_index = function_index_for(serializer, &func);
98
- size_t location_index = location_index_for(serializer, function_index, lineno);
109
+ // location ids are assigned sequentially in intern_location, so we can
110
+ // place them directly by id.
111
+ serializer->locations[location_id].function_index = function_index;
112
+ serializer->locations[location_id].lineno = lineno;
113
+ serializer->locations[location_id].address = 0;
114
+ }
115
+ serializer->locations_count = location_table_size;
116
+
117
+ // ---------------------------------------------------------------------
118
+ // Precompute stack/native stack lookups by id for fast access
119
+ // ---------------------------------------------------------------------
120
+ size_t ruby_stack_count = kh_size(session->stack_table);
121
+ struct pf2_stack_key *ruby_stacks = NULL;
122
+ if (ruby_stack_count > 0) {
123
+ ruby_stacks = malloc(sizeof(struct pf2_stack_key) * ruby_stack_count);
124
+ kh_foreach(session->stack_table, k) {
125
+ size_t stack_id = kh_val(session->stack_table, k);
126
+ ruby_stacks[stack_id] = kh_key(session->stack_table, k);
127
+ }
128
+ }
99
129
 
100
- ser_sample->stack[j] = location_index;
130
+ size_t native_stack_count = kh_size(session->native_stack_table);
131
+ struct pf2_native_stack_key *native_stacks = NULL;
132
+ if (native_stack_count > 0) {
133
+ native_stacks = malloc(sizeof(struct pf2_native_stack_key) * native_stack_count);
134
+ kh_foreach(session->native_stack_table, k) {
135
+ size_t stack_id = kh_val(session->native_stack_table, k);
136
+ native_stacks[stack_id] = kh_key(session->native_stack_table, k);
101
137
  }
138
+ }
102
139
 
103
- // Copy and process native stack frames, if any
104
- if (sample->native_stack_depth > 0) {
105
- ser_sample->native_stack = malloc(sizeof(size_t) * sample->native_stack_depth);
106
- ser_sample->native_stack_count = sample->native_stack_depth;
140
+ // ---------------------------------------------------------------------
141
+ // Process aggregated sample_table entries into serializer samples
142
+ // ---------------------------------------------------------------------
143
+ size_t total_samples = 0;
144
+ kh_foreach(session->sample_table, k) {
145
+ total_samples += kh_val(session->sample_table, k).timestamps_count;
146
+ }
147
+ if (total_samples > serializer->samples_capacity) {
148
+ serializer->samples_capacity = total_samples;
149
+ serializer->samples = realloc(
150
+ serializer->samples,
151
+ serializer->samples_capacity * sizeof(struct pf2_ser_sample)
152
+ );
153
+ }
107
154
 
108
- for (size_t j = 0; j < sample->native_stack_depth; j++) {
109
- struct pf2_ser_function func = extract_function_from_native_pc(sample->native_stack[j]);
110
- size_t function_index = function_index_for(serializer, &func);
111
- size_t location_index = location_index_for(serializer, function_index, 0);
155
+ kh_foreach(session->sample_table, k) {
156
+ struct pf2_combined_stack_key ckey = kh_key(session->sample_table, k);
157
+ struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
112
158
 
113
- ser_sample->native_stack[j] = location_index;
159
+ const struct pf2_stack_key *ruby_stack = ruby_stacks ? &ruby_stacks[ckey.ruby_stack_id] : NULL;
160
+ const struct pf2_native_stack_key *native_stack = native_stacks ? &native_stacks[ckey.native_stack_id] : NULL;
161
+
162
+ for (size_t t = 0; t < stats->timestamps_count; t++) {
163
+ ensure_samples_capacity(serializer);
164
+ struct pf2_ser_sample *ser_sample = &serializer->samples[serializer->samples_count++];
165
+
166
+ ser_sample->ruby_thread_id = stats->thread_ids ? stats->thread_ids[t] : 0;
167
+ ser_sample->elapsed_ns = stats->timestamps[t] - serializer->start_timestamp_ns;
168
+
169
+ // Ruby stack
170
+ if (ruby_stack && ruby_stack->depth > 0) {
171
+ ser_sample->stack = malloc(sizeof(size_t) * ruby_stack->depth);
172
+ ser_sample->stack_count = ruby_stack->depth;
173
+ for (size_t j = 0; j < ruby_stack->depth; j++) {
174
+ // location ids map directly to indices in serializer->locations
175
+ ser_sample->stack[j] = ruby_stack->frames[j];
176
+ }
177
+ } else {
178
+ ser_sample->stack = NULL;
179
+ ser_sample->stack_count = 0;
114
180
  }
115
- } else {
116
- ser_sample->native_stack = NULL;
117
- ser_sample->native_stack_count = 0;
118
- }
119
181
 
182
+ // Native stack
183
+ if (native_stack && native_stack->depth > 0) {
184
+ ser_sample->native_stack = malloc(sizeof(size_t) * native_stack->depth);
185
+ ser_sample->native_stack_count = native_stack->depth;
186
+
187
+ for (size_t j = 0; j < native_stack->depth; j++) {
188
+ struct pf2_ser_function func = extract_function_from_native_pc(native_stack->frames[j]);
189
+ size_t function_index = function_index_for(serializer, &func);
190
+ size_t location_index = location_index_for(serializer, function_index, 0);
191
+ ser_sample->native_stack[j] = location_index;
192
+ }
193
+ } else {
194
+ ser_sample->native_stack = NULL;
195
+ ser_sample->native_stack_count = 0;
196
+ }
197
+ }
120
198
  }
199
+
200
+ free(ruby_stacks);
201
+ free(native_stacks);
121
202
  }
122
203
 
123
204
  VALUE
@@ -127,6 +208,8 @@ pf2_ser_to_ruby_hash(struct pf2_ser *serializer) {
127
208
  // Add metadata
128
209
  rb_hash_aset(hash, ID2SYM(rb_intern("start_timestamp_ns")), ULL2NUM(serializer->start_timestamp_ns));
129
210
  rb_hash_aset(hash, ID2SYM(rb_intern("duration_ns")), ULL2NUM(serializer->duration_ns));
211
+ rb_hash_aset(hash, ID2SYM(rb_intern("collected_sample_count")), ULL2NUM(serializer->collected_sample_count));
212
+ rb_hash_aset(hash, ID2SYM(rb_intern("dropped_sample_count")), ULL2NUM(serializer->dropped_sample_count));
130
213
 
131
214
  // Add samples
132
215
  VALUE samples = rb_ary_new_capa(serializer->samples_count);
@@ -154,7 +237,7 @@ pf2_ser_to_ruby_hash(struct pf2_ser *serializer) {
154
237
  rb_hash_aset(
155
238
  sample_hash,
156
239
  ID2SYM(rb_intern("ruby_thread_id")),
157
- sample->ruby_thread_id ? ULL2NUM(sample->ruby_thread_id) : Qnil
240
+ ULL2NUM(sample->ruby_thread_id)
158
241
  );
159
242
  rb_hash_aset(sample_hash, ID2SYM(rb_intern("elapsed_ns")), ULL2NUM(sample->elapsed_ns));
160
243
 
data/ext/pf2/serializer.h CHANGED
@@ -38,6 +38,8 @@ struct pf2_ser_function {
38
38
  struct pf2_ser {
39
39
  uint64_t start_timestamp_ns;
40
40
  uint64_t duration_ns;
41
+ uint64_t collected_sample_count;
42
+ uint64_t dropped_sample_count;
41
43
 
42
44
  struct pf2_ser_sample *samples;
43
45
  size_t samples_count;