datadog 2.17.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +90 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +63 -56
  4. data/ext/datadog_profiling_native_extension/collectors_stack.c +263 -76
  5. data/ext/datadog_profiling_native_extension/collectors_stack.h +20 -3
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +62 -12
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  8. data/ext/datadog_profiling_native_extension/extconf.rb +7 -0
  9. data/ext/datadog_profiling_native_extension/heap_recorder.c +239 -363
  10. data/ext/datadog_profiling_native_extension/heap_recorder.h +4 -6
  11. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
  12. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
  13. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +38 -26
  14. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +6 -4
  15. data/ext/datadog_profiling_native_extension/ruby_helpers.c +1 -13
  16. data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -11
  17. data/ext/datadog_profiling_native_extension/stack_recorder.c +154 -57
  18. data/ext/libdatadog_api/extconf.rb +2 -2
  19. data/ext/libdatadog_api/library_config.c +54 -12
  20. data/ext/libdatadog_api/library_config.h +6 -0
  21. data/ext/libdatadog_api/process_discovery.c +2 -7
  22. data/ext/libdatadog_extconf_helpers.rb +1 -1
  23. data/lib/datadog/appsec/api_security/lru_cache.rb +9 -2
  24. data/lib/datadog/appsec/api_security/route_extractor.rb +71 -0
  25. data/lib/datadog/appsec/api_security/sampler.rb +59 -0
  26. data/lib/datadog/appsec/api_security.rb +14 -0
  27. data/lib/datadog/appsec/assets/waf_rules/recommended.json +257 -85
  28. data/lib/datadog/appsec/assets/waf_rules/strict.json +10 -78
  29. data/lib/datadog/appsec/component.rb +30 -54
  30. data/lib/datadog/appsec/configuration/settings.rb +60 -2
  31. data/lib/datadog/appsec/context.rb +6 -6
  32. data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +1 -1
  33. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +27 -16
  34. data/lib/datadog/appsec/instrumentation/gateway/argument.rb +1 -1
  35. data/lib/datadog/appsec/processor/rule_loader.rb +5 -6
  36. data/lib/datadog/appsec/remote.rb +15 -55
  37. data/lib/datadog/appsec/security_engine/engine.rb +194 -0
  38. data/lib/datadog/appsec/security_engine/runner.rb +10 -11
  39. data/lib/datadog/appsec.rb +4 -7
  40. data/lib/datadog/core/configuration/agent_settings.rb +52 -0
  41. data/lib/datadog/core/configuration/agent_settings_resolver.rb +1 -43
  42. data/lib/datadog/core/configuration/components.rb +2 -4
  43. data/lib/datadog/core/configuration/option.rb +9 -9
  44. data/lib/datadog/core/configuration/settings.rb +42 -10
  45. data/lib/datadog/core/configuration/stable_config.rb +1 -2
  46. data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
  47. data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
  48. data/lib/datadog/core/process_discovery.rb +5 -1
  49. data/lib/datadog/core/remote/configuration/repository.rb +12 -0
  50. data/lib/datadog/core/tag_builder.rb +56 -0
  51. data/lib/datadog/core/telemetry/component.rb +8 -4
  52. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +1 -0
  53. data/lib/datadog/core/telemetry/event/app_started.rb +148 -40
  54. data/lib/datadog/core/telemetry/logger.rb +5 -4
  55. data/lib/datadog/core/telemetry/logging.rb +11 -5
  56. data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
  57. data/lib/datadog/core/transport/http/builder.rb +2 -2
  58. data/lib/datadog/core/transport/http/env.rb +8 -0
  59. data/lib/datadog/core/utils.rb +7 -0
  60. data/lib/datadog/di/instrumenter.rb +48 -5
  61. data/lib/datadog/di/probe_notification_builder.rb +37 -42
  62. data/lib/datadog/di/probe_notifier_worker.rb +9 -1
  63. data/lib/datadog/di/serializer.rb +10 -2
  64. data/lib/datadog/di/transport/http/input.rb +10 -0
  65. data/lib/datadog/di/transport/input.rb +10 -2
  66. data/lib/datadog/di.rb +0 -6
  67. data/lib/datadog/kit/appsec/events/v2.rb +195 -0
  68. data/lib/datadog/profiling/collectors/code_provenance.rb +17 -8
  69. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +6 -0
  70. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  71. data/lib/datadog/profiling/collectors/info.rb +41 -0
  72. data/lib/datadog/profiling/collectors/thread_context.rb +16 -1
  73. data/lib/datadog/profiling/component.rb +8 -9
  74. data/lib/datadog/profiling/exporter.rb +9 -3
  75. data/lib/datadog/profiling/ext.rb +0 -12
  76. data/lib/datadog/profiling/http_transport.rb +2 -2
  77. data/lib/datadog/profiling/profiler.rb +2 -0
  78. data/lib/datadog/profiling/scheduler.rb +2 -1
  79. data/lib/datadog/profiling/sequence_tracker.rb +44 -0
  80. data/lib/datadog/profiling/stack_recorder.rb +5 -5
  81. data/lib/datadog/profiling/tag_builder.rb +7 -37
  82. data/lib/datadog/profiling/tasks/setup.rb +2 -0
  83. data/lib/datadog/profiling.rb +1 -0
  84. data/lib/datadog/single_step_instrument.rb +9 -0
  85. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
  86. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
  87. data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
  88. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +7 -1
  89. data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +13 -0
  90. data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
  91. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -6
  92. data/lib/datadog/tracing/contrib/rails/patcher.rb +4 -1
  93. data/lib/datadog/tracing/contrib/rails/runner.rb +61 -40
  94. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  95. data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
  96. data/lib/datadog/tracing/diagnostics/environment_logger.rb +3 -1
  97. data/lib/datadog/tracing/span_event.rb +1 -1
  98. data/lib/datadog/tracing/span_operation.rb +22 -0
  99. data/lib/datadog/tracing/sync_writer.rb +1 -1
  100. data/lib/datadog/tracing/trace_operation.rb +12 -4
  101. data/lib/datadog/tracing/tracer.rb +6 -2
  102. data/lib/datadog/version.rb +1 -1
  103. data/lib/datadog.rb +7 -0
  104. metadata +14 -10
  105. data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -321
  106. data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -1023
  107. data/lib/datadog/appsec/processor/rule_merger.rb +0 -171
  108. data/lib/datadog/appsec/processor.rb +0 -107
@@ -1,9 +1,21 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/debug.h>
3
- #include "extconf.h"
4
- #include "helpers.h"
5
- #include "libdatadog_helpers.h"
6
- #include "ruby_helpers.h"
3
+ #include <ruby/st.h>
4
+ #include <stdatomic.h>
5
+
6
+ #include "extconf.h" // This is needed for the HAVE_DLADDR and friends below
7
+
8
+ #if (defined(HAVE_DLADDR1) && HAVE_DLADDR1) || (defined(HAVE_DLADDR) && HAVE_DLADDR)
9
+ #ifndef _GNU_SOURCE
10
+ #define _GNU_SOURCE
11
+ #endif
12
+ #include <dlfcn.h>
13
+ #if defined(HAVE_DLADDR1) && HAVE_DLADDR1
14
+ #include <link.h>
15
+ #endif
16
+ #endif
17
+
18
+ #include "datadog_ruby_common.h"
7
19
  #include "private_vm_api_access.h"
8
20
  #include "stack_recorder.h"
9
21
  #include "collectors_stack.h"
@@ -11,19 +23,23 @@
11
23
  // Gathers stack traces from running threads, storing them in a StackRecorder instance
12
24
  // This file implements the native bits of the Datadog::Profiling::Collectors::Stack class
13
25
 
14
- static VALUE missing_string = Qnil;
15
-
16
- // Used as scratch space during sampling
17
- struct sampling_buffer { // Note: typedef'd in the header to sampling_buffer
18
- uint16_t max_frames;
19
- ddog_prof_Location *locations;
20
- frame_info *stack_buffer;
21
- };
22
-
26
+ static VALUE _native_filenames_available(DDTRACE_UNUSED VALUE self);
27
+ static VALUE _native_ruby_native_filename(DDTRACE_UNUSED VALUE self);
23
28
  static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
24
29
  static VALUE native_sample_do(VALUE args);
25
30
  static VALUE native_sample_ensure(VALUE args);
26
- static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* buffer, char *frames_omitted_message, int frames_omitted_message_size);
31
+ static void set_file_info_for_cfunc(
32
+ ddog_CharSlice *filename_slice,
33
+ int *line,
34
+ ddog_CharSlice last_ruby_frame_filename,
35
+ int last_ruby_line,
36
+ void *function,
37
+ bool top_of_the_stack,
38
+ bool native_filenames_enabled,
39
+ st_table *native_filenames_cache
40
+ );
41
+ static const char *get_or_compute_native_filename(void *function, st_table *native_filenames_cache);
42
+ static void add_truncated_frames_placeholder(sampling_buffer* buffer);
27
43
  static void record_placeholder_stack_in_native_code(VALUE recorder_instance, sample_values values, sample_labels labels);
28
44
  static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_CharSlice *filename_slice);
29
45
 
@@ -32,16 +48,47 @@ static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_Char
32
48
  extern VALUE rb_iseq_path(const VALUE);
33
49
  extern VALUE rb_iseq_base_label(const VALUE);
34
50
 
51
+ // NULL if dladdr is not available or we weren't able to get the native filename for the Ruby VM
52
+ static const char *ruby_native_filename = NULL;
53
+
35
54
  void collectors_stack_init(VALUE profiling_module) {
36
55
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
37
56
  VALUE collectors_stack_class = rb_define_class_under(collectors_module, "Stack", rb_cObject);
57
+
58
+ rb_define_singleton_method(collectors_stack_class, "_native_filenames_available?", _native_filenames_available, 0);
59
+ rb_define_singleton_method(collectors_stack_class, "_native_ruby_native_filename", _native_ruby_native_filename, 0);
60
+
38
61
  // Hosts methods used for testing the native code using RSpec
39
62
  VALUE testing_module = rb_define_module_under(collectors_stack_class, "Testing");
40
63
 
41
64
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, -1);
42
65
 
43
- missing_string = rb_str_new2("");
44
- rb_global_variable(&missing_string);
66
+ #if (defined(HAVE_DLADDR1) && HAVE_DLADDR1) || (defined(HAVE_DLADDR) && HAVE_DLADDR)
67
+ // To be able to detect when a frame is coming from Ruby, we record here its filename as returned by dladdr.
68
+ // We expect this same pointer to be returned by dladdr for all frames coming from Ruby.
69
+ //
70
+ // Small note: Creating/deleting the cache is a bit awkward here, but it seems like a bigger footgun to allow
71
+ // `get_or_compute_native_filename` to run without a cache, since we never expect that to happen during sampling. So it seems
72
+ // like a reasonable trade-off to force callers to always figure that out.
73
+ st_table *temporary_cache = st_init_numtable();
74
+ const char *native_filename = get_or_compute_native_filename(rb_ary_new, temporary_cache);
75
+ if (native_filename != NULL && native_filename[0] != '\0') {
76
+ ruby_native_filename = native_filename;
77
+ }
78
+ st_free_table(temporary_cache);
79
+ #endif
80
+ }
81
+
82
+ static VALUE _native_filenames_available(DDTRACE_UNUSED VALUE self) {
83
+ #if (defined(HAVE_DLADDR1) && HAVE_DLADDR1) || (defined(HAVE_DLADDR) && HAVE_DLADDR)
84
+ return ruby_native_filename != NULL ? Qtrue : Qfalse;
85
+ #else
86
+ return Qfalse;
87
+ #endif
88
+ }
89
+
90
+ static VALUE _native_ruby_native_filename(DDTRACE_UNUSED VALUE self) {
91
+ return ruby_native_filename != NULL ? rb_utf8_str_new_cstr(ruby_native_filename) : Qnil;
45
92
  }
46
93
 
47
94
  typedef struct {
@@ -52,6 +99,8 @@ typedef struct {
52
99
  VALUE thread;
53
100
  ddog_prof_Location *locations;
54
101
  sampling_buffer *buffer;
102
+ bool native_filenames_enabled;
103
+ st_table *native_filenames_cache;
55
104
  } native_sample_args;
56
105
 
57
106
  // This method exists only to enable testing Datadog::Profiling::Collectors::Stack behavior using RSpec.
@@ -73,10 +122,15 @@ static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
73
122
  VALUE max_frames = rb_hash_lookup2(options, ID2SYM(rb_intern("max_frames")), INT2NUM(400));
74
123
  VALUE in_gc = rb_hash_lookup2(options, ID2SYM(rb_intern("in_gc")), Qfalse);
75
124
  VALUE is_gvl_waiting_state = rb_hash_lookup2(options, ID2SYM(rb_intern("is_gvl_waiting_state")), Qfalse);
125
+ VALUE native_filenames_enabled = rb_hash_lookup2(options, ID2SYM(rb_intern("native_filenames_enabled")), Qfalse);
76
126
 
77
127
  ENFORCE_TYPE(metric_values_hash, T_HASH);
78
128
  ENFORCE_TYPE(labels_array, T_ARRAY);
79
129
  ENFORCE_TYPE(numeric_labels_array, T_ARRAY);
130
+ ENFORCE_TYPE(max_frames, T_FIXNUM);
131
+ ENFORCE_BOOLEAN(in_gc);
132
+ ENFORCE_BOOLEAN(is_gvl_waiting_state);
133
+ ENFORCE_BOOLEAN(native_filenames_enabled);
80
134
 
81
135
  VALUE zero = INT2NUM(0);
82
136
  VALUE heap_sample = rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("heap_sample"), Qfalse);
@@ -119,7 +173,8 @@ static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
119
173
  int max_frames_requested = sampling_buffer_check_max_frames(NUM2INT(max_frames));
120
174
 
121
175
  ddog_prof_Location *locations = ruby_xcalloc(max_frames_requested, sizeof(ddog_prof_Location));
122
- sampling_buffer *buffer = sampling_buffer_new(max_frames_requested, locations);
176
+ sampling_buffer buffer;
177
+ sampling_buffer_initialize(&buffer, max_frames_requested, locations);
123
178
 
124
179
  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count};
125
180
 
@@ -130,7 +185,9 @@ static VALUE _native_sample(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
130
185
  .labels = (sample_labels) {.labels = slice_labels, .state_label = state_label, .is_gvl_waiting_state = is_gvl_waiting_state == Qtrue},
131
186
  .thread = thread,
132
187
  .locations = locations,
133
- .buffer = buffer,
188
+ .buffer = &buffer,
189
+ .native_filenames_enabled = native_filenames_enabled == Qtrue,
190
+ .native_filenames_cache = st_init_numtable(),
134
191
  };
135
192
 
136
193
  return rb_ensure(native_sample_do, (VALUE) &args_struct, native_sample_ensure, (VALUE) &args_struct);
@@ -152,7 +209,9 @@ static VALUE native_sample_do(VALUE args) {
152
209
  args_struct->buffer,
153
210
  args_struct->recorder_instance,
154
211
  args_struct->values,
155
- args_struct->labels
212
+ args_struct->labels,
213
+ args_struct->native_filenames_enabled,
214
+ args_struct->native_filenames_cache
156
215
  );
157
216
  }
158
217
 
@@ -164,6 +223,7 @@ static VALUE native_sample_ensure(VALUE args) {
164
223
 
165
224
  ruby_xfree(args_struct->locations);
166
225
  sampling_buffer_free(args_struct->buffer);
226
+ st_free_table(args_struct->native_filenames_cache);
167
227
 
168
228
  return Qtrue;
169
229
  }
@@ -184,14 +244,15 @@ void sample_thread(
184
244
  sampling_buffer* buffer,
185
245
  VALUE recorder_instance,
186
246
  sample_values values,
187
- sample_labels labels
247
+ sample_labels labels,
248
+ bool native_filenames_enabled,
249
+ st_table *native_filenames_cache
188
250
  ) {
189
- int captured_frames = ddtrace_rb_profile_frames(
190
- thread,
191
- 0 /* stack starting depth */,
192
- buffer->max_frames,
193
- buffer->stack_buffer
194
- );
251
+ // If we already prepared a sample, we use it below; if not, we prepare it now.
252
+ if (!buffer->pending_sample) prepare_sample_thread(thread, buffer);
253
+
254
+ buffer->pending_sample = false;
255
+ int captured_frames = buffer->pending_sample_result;
195
256
 
196
257
  if (captured_frames == PLACEHOLDER_STACK_IN_NATIVE_CODE) {
197
258
  record_placeholder_stack_in_native_code(recorder_instance, values, labels);
@@ -210,8 +271,9 @@ void sample_thread(
210
271
  // The convention in Kernel#caller_locations is to instead use the path and line number of the first Ruby frame
211
272
  // on the stack that is below (e.g. directly or indirectly has called) the native method.
212
273
  // Thus, we keep that frame here to able to replicate that behavior.
213
- // (This is why we also iterate the sampling buffers backwards below -- so that it's easier to keep the last_ruby_frame_filename)
214
- VALUE last_ruby_frame_filename = Qnil;
274
+ // (This is why we also iterate the sampling buffers backwards from what libdatadog uses below -- so that it's easier
275
+ // to keep the last_ruby_frame_filename)
276
+ ddog_CharSlice last_ruby_frame_filename = DDOG_CHARSLICE_C("");
215
277
  int last_ruby_line = 0;
216
278
 
217
279
  ddog_prof_Label *state_label = labels.state_label;
@@ -229,33 +291,42 @@ void sample_thread(
229
291
  if (labels.is_gvl_waiting_state) rb_raise(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting");
230
292
  }
231
293
 
232
- for (int i = captured_frames - 1; i >= 0; i--) {
233
- VALUE name, filename;
294
+ int top_of_stack_position = captured_frames - 1;
295
+
296
+ for (int i = 0; i <= top_of_stack_position; i++) {
297
+ ddog_CharSlice name_slice, filename_slice;
234
298
  int line;
299
+ bool top_of_the_stack = i == top_of_stack_position;
235
300
 
236
301
  if (buffer->stack_buffer[i].is_ruby_frame) {
237
- name = rb_iseq_base_label(buffer->stack_buffer[i].as.ruby_frame.iseq);
238
- filename = rb_iseq_path(buffer->stack_buffer[i].as.ruby_frame.iseq);
302
+ VALUE name = rb_iseq_base_label(buffer->stack_buffer[i].as.ruby_frame.iseq);
303
+ VALUE filename = rb_iseq_path(buffer->stack_buffer[i].as.ruby_frame.iseq);
304
+
305
+ name_slice = NIL_P(name) ? DDOG_CHARSLICE_C("") : char_slice_from_ruby_string(name);
306
+ filename_slice = NIL_P(filename) ? DDOG_CHARSLICE_C("") : char_slice_from_ruby_string(filename);
239
307
  line = buffer->stack_buffer[i].as.ruby_frame.line;
240
308
 
241
- last_ruby_frame_filename = filename;
309
+ last_ruby_frame_filename = filename_slice;
242
310
  last_ruby_line = line;
243
311
  } else {
244
- name = rb_id2str(buffer->stack_buffer[i].as.native_frame.method_id);
245
- filename = last_ruby_frame_filename;
246
- line = last_ruby_line;
312
+ VALUE name = rb_id2str(buffer->stack_buffer[i].as.native_frame.method_id);
313
+
314
+ name_slice = NIL_P(name) ? DDOG_CHARSLICE_C("") : char_slice_from_ruby_string(name);
315
+
316
+ set_file_info_for_cfunc(
317
+ &filename_slice,
318
+ &line,
319
+ last_ruby_frame_filename,
320
+ last_ruby_line,
321
+ buffer->stack_buffer[i].as.native_frame.function,
322
+ top_of_the_stack,
323
+ native_filenames_enabled,
324
+ native_filenames_cache
325
+ );
247
326
  }
248
327
 
249
- name = NIL_P(name) ? missing_string : name;
250
- filename = NIL_P(filename) ? missing_string : filename;
251
-
252
- ddog_CharSlice name_slice = char_slice_from_ruby_string(name);
253
- ddog_CharSlice filename_slice = char_slice_from_ruby_string(filename);
254
-
255
328
  maybe_trim_template_random_ids(&name_slice, &filename_slice);
256
329
 
257
- bool top_of_the_stack = i == 0;
258
-
259
330
  // When there's only wall-time in a sample, this means that the thread was not active in the sampled period.
260
331
  if (top_of_the_stack && only_wall_time) {
261
332
  // Did the caller already provide the state?
@@ -299,21 +370,19 @@ void sample_thread(
299
370
  }
300
371
  }
301
372
 
302
- buffer->locations[i] = (ddog_prof_Location) {
373
+ int libdatadog_stores_stacks_flipped_from_rb_profile_frames_index = top_of_stack_position - i;
374
+
375
+ buffer->locations[libdatadog_stores_stacks_flipped_from_rb_profile_frames_index] = (ddog_prof_Location) {
303
376
  .mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C(""), .build_id_id = {}},
304
377
  .function = (ddog_prof_Function) {.name = name_slice, .filename = filename_slice},
305
378
  .line = line,
306
379
  };
307
380
  }
308
381
 
309
- // Used below; since we want to stack-allocate this, we must do it here rather than in maybe_add_placeholder_frames_omitted
310
- const int frames_omitted_message_size = sizeof(MAX_FRAMES_LIMIT_AS_STRING " frames omitted");
311
- char frames_omitted_message[frames_omitted_message_size];
312
-
313
382
  // If we filled up the buffer, some frames may have been omitted. In that case, we'll add a placeholder frame
314
383
  // with that info.
315
384
  if (captured_frames == (long) buffer->max_frames) {
316
- maybe_add_placeholder_frames_omitted(thread, buffer, frames_omitted_message, frames_omitted_message_size);
385
+ add_truncated_frames_placeholder(buffer);
317
386
  }
318
387
 
319
388
  record_sample(
@@ -324,6 +393,94 @@ void sample_thread(
324
393
  );
325
394
  }
326
395
 
396
+ #if (defined(HAVE_DLADDR1) && HAVE_DLADDR1) || (defined(HAVE_DLADDR) && HAVE_DLADDR)
397
+ static void set_file_info_for_cfunc(
398
+ ddog_CharSlice *filename_slice,
399
+ int *line,
400
+ ddog_CharSlice last_ruby_frame_filename,
401
+ int last_ruby_line,
402
+ void *function,
403
+ bool top_of_the_stack,
404
+ bool native_filenames_enabled,
405
+ st_table *native_filenames_cache
406
+ ) {
407
+ if (native_filenames_enabled) {
408
+ const char *native_filename = get_or_compute_native_filename(function, native_filenames_cache);
409
+ if (native_filename && native_filename[0] != '\0' &&
410
+ // Using the ruby_native_filename at the top of the stack has a weird effect on the "top methods" table because
411
+ // e.g. we don't have classnames for methods. This is especially visible in the allocations profile, e.g.
412
+ // what a surprise, you're telling me "libruby.so:new" is the top method always?
413
+ //
414
+ // Until we have a better way of dealing with that, we don't do this replacement for the top frame.
415
+ //
416
+ // Also, dladdr is expected to always return the same pointer to the ruby_native_filename, so that's why we're
417
+ // comparing only pointer values and not the string contents.
418
+ (native_filename != ruby_native_filename || !top_of_the_stack)
419
+ ) {
420
+ *filename_slice = (ddog_CharSlice) {.ptr = native_filename, .len = strlen(native_filename)};
421
+ // Explicitly set the line to 0 as it has no meaning on a native library (e.g. an .so is built of many source files)
422
+ // and anyway often that debug info is not available.
423
+ *line = 0;
424
+ return;
425
+ }
426
+ }
427
+
428
+ *filename_slice = last_ruby_frame_filename;
429
+ *line = last_ruby_line;
430
+ }
431
+
432
+ // `native_filenames_cache` is used to cache native filename lookup results (Map[void *function_pointer, char *filename])
433
+ //
434
+ // Caching this information is safe because there's no API in Ruby to "unrequire" a native extension. Thus, if we see a
435
+ // frame on the **Ruby** stack with a given `function`, then that `function` was registered with the Ruby VM and
436
+ // belongs to a Ruby extension, so a lot of other bad things would happen if it was dlclosed.
437
+ static const char *get_or_compute_native_filename(void *function, st_table *native_filenames_cache) {
438
+ const char *cached_filename = NULL;
439
+ st_lookup(native_filenames_cache, (st_data_t) function, (st_data_t *) &cached_filename);
440
+ if (cached_filename != NULL) return cached_filename;
441
+
442
+ Dl_info info;
443
+ const char *native_filename = NULL;
444
+ #if defined(HAVE_DLADDR1) && HAVE_DLADDR1
445
+ struct link_map *extra_info = NULL;
446
+ if (dladdr1(function, &info, (void **) &extra_info, RTLD_DL_LINKMAP) != 0 && extra_info != NULL) {
447
+ native_filename = extra_info->l_name != NULL ? extra_info->l_name : info.dli_fname;
448
+ }
449
+ #elif defined(HAVE_DLADDR) && HAVE_DLADDR
450
+ if (dladdr(function, &info) != 0) {
451
+ native_filename = info.dli_fname;
452
+ }
453
+ #endif
454
+
455
+ // We explicitly use an empty string here so as to cache lookups that somehow "failed". Otherwise we would keep trying them every time.
456
+ if (native_filename == NULL) native_filename = "";
457
+
458
+ // An st_table is what Ruby uses for its own hashtables. This allows us to get an easy estimate of the size of the cache:
459
+ // `ObjectSpace.memsize_of((0..100000).map { |it| [it, nil] }.to_h)` => 4194400 bytes as of Ruby 3.2 so that seems reasonable?
460
+ // Note: `st_table_size()` is available from Ruby 3.2+ but not before
461
+ if (native_filenames_cache->num_entries >= 100000) {
462
+ st_clear(native_filenames_cache);
463
+ }
464
+
465
+ st_insert(native_filenames_cache, (st_data_t) function, (st_data_t) native_filename);
466
+ return native_filename;
467
+ }
468
+ #else
469
+ static void set_file_info_for_cfunc(
470
+ ddog_CharSlice *filename_slice,
471
+ int *line,
472
+ ddog_CharSlice last_ruby_frame_filename,
473
+ int last_ruby_line,
474
+ DDTRACE_UNUSED void *function,
475
+ DDTRACE_UNUSED bool top_of_the_stack,
476
+ DDTRACE_UNUSED bool native_filenames_enabled,
477
+ DDTRACE_UNUSED st_table *native_filenames_cache
478
+ ) {
479
+ *filename_slice = last_ruby_frame_filename;
480
+ *line = last_ruby_line;
481
+ }
482
+ #endif
483
+
327
484
  // Rails's ActionView likes to dynamically generate method names with suffixed hashes/ids, resulting in methods with
328
485
  // names such as:
329
486
  // * "_app_views_layouts_explore_html_haml__2304485752546535910_211320" (__number_number suffix -- two underscores)
@@ -340,6 +497,7 @@ static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_Char
340
497
  if (filename_slice->len < 3 || memcmp(filename_slice->ptr + filename_slice->len - 3, ".rb", 3) == 0) return;
341
498
 
342
499
  if (name_slice->len > 1024) return;
500
+ if (name_slice->len == 0) return;
343
501
 
344
502
  int pos = ((int) name_slice->len) - 1;
345
503
 
@@ -363,24 +521,12 @@ static void maybe_trim_template_random_ids(ddog_CharSlice *name_slice, ddog_Char
363
521
  name_slice->len = pos;
364
522
  }
365
523
 
366
- static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* buffer, char *frames_omitted_message, int frames_omitted_message_size) {
367
- ptrdiff_t frames_omitted = stack_depth_for(thread) - buffer->max_frames;
368
-
369
- if (frames_omitted == 0) return; // Perfect fit!
370
-
371
- // The placeholder frame takes over a space, so if 10 frames were left out and we consume one other space for the
372
- // placeholder, then 11 frames are omitted in total
373
- frames_omitted++;
374
-
375
- snprintf(frames_omitted_message, frames_omitted_message_size, "%td frames omitted", frames_omitted);
376
-
377
- // Important note: `frames_omitted_message` MUST have a lifetime that is at least as long as the call to
378
- // `record_sample`. So be careful where it gets allocated. (We do have tests for this, at least!)
379
- ddog_CharSlice function_name = DDOG_CHARSLICE_C("");
380
- ddog_CharSlice function_filename = {.ptr = frames_omitted_message, .len = strlen(frames_omitted_message)};
381
- buffer->locations[buffer->max_frames - 1] = (ddog_prof_Location) {
524
+ static void add_truncated_frames_placeholder(sampling_buffer* buffer) {
525
+ // Important note: The strings below are static so we don't need to worry about their lifetime. If we ever want to change
526
+ // this to non-static strings, don't forget to check that lifetimes are properly respected.
527
+ buffer->locations[0] = (ddog_prof_Location) {
382
528
  .mapping = {.filename = DDOG_CHARSLICE_C(""), .build_id = DDOG_CHARSLICE_C(""), .build_id_id = {}},
383
- .function = (ddog_prof_Function) {.name = function_name, .filename = function_filename},
529
+ .function = {.name = DDOG_CHARSLICE_C("Truncated Frames"), .filename = DDOG_CHARSLICE_C(""), .filename_id = {}},
384
530
  .line = 0,
385
531
  };
386
532
  }
@@ -439,30 +585,71 @@ void record_placeholder_stack(
439
585
  );
440
586
  }
441
587
 
588
+ bool prepare_sample_thread(VALUE thread, sampling_buffer *buffer) {
589
+ // Since this can get called from inside a signal handler, we don't want to touch the buffer if
590
+ // the thread was actually in the middle of marking it.
591
+ if (buffer->is_marking) return false;
592
+
593
+ buffer->pending_sample = true;
594
+ buffer->pending_sample_result = ddtrace_rb_profile_frames(thread, 0, buffer->max_frames, buffer->stack_buffer);
595
+ return true;
596
+ }
597
+
442
598
  uint16_t sampling_buffer_check_max_frames(int max_frames) {
443
599
  if (max_frames < 5) rb_raise(rb_eArgError, "Invalid max_frames: value must be >= 5");
444
600
  if (max_frames > MAX_FRAMES_LIMIT) rb_raise(rb_eArgError, "Invalid max_frames: value must be <= " MAX_FRAMES_LIMIT_AS_STRING);
445
601
  return max_frames;
446
602
  }
447
603
 
448
- sampling_buffer *sampling_buffer_new(uint16_t max_frames, ddog_prof_Location *locations) {
604
+ void sampling_buffer_initialize(sampling_buffer *buffer, uint16_t max_frames, ddog_prof_Location *locations) {
449
605
  sampling_buffer_check_max_frames(max_frames);
450
606
 
451
- // Note: never returns NULL; if out of memory, it calls the Ruby out-of-memory handlers
452
- sampling_buffer* buffer = ruby_xcalloc(1, sizeof(sampling_buffer));
453
-
454
607
  buffer->max_frames = max_frames;
455
608
  buffer->locations = locations;
456
609
  buffer->stack_buffer = ruby_xcalloc(max_frames, sizeof(frame_info));
457
-
458
- return buffer;
610
+ buffer->pending_sample = false;
611
+ buffer->is_marking = false;
612
+ buffer->pending_sample_result = 0;
459
613
  }
460
614
 
461
615
  void sampling_buffer_free(sampling_buffer *buffer) {
462
- if (buffer == NULL) rb_raise(rb_eArgError, "sampling_buffer_free called with NULL buffer");
616
+ if (buffer->max_frames == 0 || buffer->locations == NULL || buffer->stack_buffer == NULL) {
617
+ rb_raise(rb_eArgError, "sampling_buffer_free called with invalid buffer");
618
+ }
463
619
 
464
- // buffer->locations are owned by whoever called sampling_buffer_new, not us
465
620
  ruby_xfree(buffer->stack_buffer);
621
+ // Note: buffer->locations are owned by whoever called sampling_buffer_initialize, not by the buffer itself
622
+
623
+ buffer->max_frames = 0;
624
+ buffer->locations = NULL;
625
+ buffer->stack_buffer = NULL;
626
+ buffer->pending_sample = false;
627
+ buffer->is_marking = false;
628
+ buffer->pending_sample_result = 0;
629
+ }
630
+
631
+ void sampling_buffer_mark(sampling_buffer *buffer) {
632
+ if (!sampling_buffer_needs_marking(buffer)) {
633
+ rb_bug("sampling_buffer_mark called with no pending sample. `sampling_buffer_needs_marking` should be used before calling mark.");
634
+ }
635
+
636
+ buffer->is_marking = true;
637
+ // Tell the compiler it's not allowed to reorder the `is_marking` flag with the iteration below.
638
+ //
639
+ // Specifically, in the middle of `sampling_buffer_mark` a signal handler may execute and call
640
+ // `prepare_sample_thread` to add a new sample to the buffer. This flag is here to prevent that BUT we need to
641
+ // make sure the signal handler actually sees the flag being set.
642
+ //
643
+ // See https://github.com/ruby/ruby/pull/11036 for a similar change made to the Ruby VM with more context.
644
+ atomic_signal_fence(memory_order_seq_cst);
645
+
646
+ for (int i = 0; i < buffer->pending_sample_result; i++) {
647
+ if (buffer->stack_buffer[i].is_ruby_frame) {
648
+ rb_gc_mark(buffer->stack_buffer[i].as.ruby_frame.iseq);
649
+ }
650
+ }
466
651
 
467
- ruby_xfree(buffer);
652
+ // Make sure iteration completes before `is_marking` is unset...
653
+ atomic_signal_fence(memory_order_seq_cst);
654
+ buffer->is_marking = false;
468
655
  }
@@ -2,19 +2,30 @@
2
2
 
3
3
  #include <datadog/profiling.h>
4
4
 
5
+ #include "private_vm_api_access.h"
5
6
  #include "stack_recorder.h"
6
7
 
7
8
  #define MAX_FRAMES_LIMIT 3000
8
9
  #define MAX_FRAMES_LIMIT_AS_STRING "3000"
9
10
 
10
- typedef struct sampling_buffer sampling_buffer;
11
+ // Used as scratch space during sampling
12
+ typedef struct {
13
+ uint16_t max_frames;
14
+ ddog_prof_Location *locations;
15
+ frame_info *stack_buffer;
16
+ bool pending_sample;
17
+ bool is_marking; // Used to avoid recording a sample when marking
18
+ int pending_sample_result;
19
+ } sampling_buffer;
11
20
 
12
21
  void sample_thread(
13
22
  VALUE thread,
14
23
  sampling_buffer* buffer,
15
24
  VALUE recorder_instance,
16
25
  sample_values values,
17
- sample_labels labels
26
+ sample_labels labels,
27
+ bool native_filenames_enabled,
28
+ st_table *native_filenames_cache
18
29
  );
19
30
  void record_placeholder_stack(
20
31
  VALUE recorder_instance,
@@ -22,6 +33,12 @@ void record_placeholder_stack(
22
33
  sample_labels labels,
23
34
  ddog_CharSlice placeholder_stack
24
35
  );
36
+ bool prepare_sample_thread(VALUE thread, sampling_buffer *buffer);
37
+
25
38
  uint16_t sampling_buffer_check_max_frames(int max_frames);
26
- sampling_buffer *sampling_buffer_new(uint16_t max_frames, ddog_prof_Location *locations);
39
+ void sampling_buffer_initialize(sampling_buffer *buffer, uint16_t max_frames, ddog_prof_Location *locations);
27
40
  void sampling_buffer_free(sampling_buffer *buffer);
41
+ void sampling_buffer_mark(sampling_buffer *buffer);
42
+ static inline bool sampling_buffer_needs_marking(sampling_buffer *buffer) {
43
+ return buffer->pending_sample && buffer->pending_sample_result > 0;
44
+ }