ddtrace 1.5.1 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +63 -1
  3. data/ext/ddtrace_profiling_loader/ddtrace_profiling_loader.c +9 -2
  4. data/ext/ddtrace_profiling_loader/extconf.rb +17 -0
  5. data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +38 -2
  6. data/ext/ddtrace_profiling_native_extension/clock_id.h +1 -0
  7. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +1 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +517 -42
  9. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +3 -0
  10. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +208 -30
  11. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +156 -46
  12. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +11 -2
  13. data/ext/ddtrace_profiling_native_extension/extconf.rb +11 -1
  14. data/ext/ddtrace_profiling_native_extension/http_transport.c +83 -64
  15. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +4 -4
  16. data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +3 -2
  17. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +59 -0
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +10 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -1
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +4 -2
  22. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +45 -29
  23. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +7 -7
  24. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +44 -18
  25. data/lib/datadog/appsec/event.rb +8 -4
  26. data/lib/datadog/core/configuration/components.rb +20 -14
  27. data/lib/datadog/core/configuration/settings.rb +59 -7
  28. data/lib/datadog/core/diagnostics/environment_logger.rb +5 -1
  29. data/lib/datadog/core/utils/compression.rb +5 -1
  30. data/lib/datadog/core.rb +0 -54
  31. data/lib/datadog/profiling/collectors/cpu_and_wall_time.rb +12 -2
  32. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +5 -3
  33. data/lib/datadog/profiling/exporter.rb +2 -4
  34. data/lib/datadog/profiling/http_transport.rb +1 -1
  35. data/lib/datadog/tracing/client_ip.rb +11 -0
  36. data/lib/datadog/tracing/configuration/ext.rb +3 -1
  37. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +2 -0
  38. data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
  39. data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +4 -0
  40. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +2 -0
  41. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +3 -0
  42. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +2 -0
  43. data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +2 -0
  44. data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -0
  45. data/lib/datadog/tracing/contrib/ext.rb +6 -0
  46. data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -0
  47. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +5 -0
  48. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +7 -1
  49. data/lib/datadog/tracing/contrib/grpc/ext.rb +2 -0
  50. data/lib/datadog/tracing/contrib/hanami/action_tracer.rb +47 -0
  51. data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +22 -0
  52. data/lib/datadog/tracing/contrib/hanami/ext.rb +24 -0
  53. data/lib/datadog/tracing/contrib/hanami/integration.rb +44 -0
  54. data/lib/datadog/tracing/contrib/hanami/patcher.rb +33 -0
  55. data/lib/datadog/tracing/contrib/hanami/plugin.rb +23 -0
  56. data/lib/datadog/tracing/contrib/hanami/renderer_policy_tracing.rb +41 -0
  57. data/lib/datadog/tracing/contrib/hanami/router_tracing.rb +44 -0
  58. data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -0
  59. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +2 -0
  60. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -0
  61. data/lib/datadog/tracing/contrib/mongodb/ext.rb +7 -0
  62. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +4 -0
  63. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +12 -0
  64. data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
  65. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -0
  66. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +12 -0
  67. data/lib/datadog/tracing/contrib/pg/ext.rb +2 -1
  68. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +34 -18
  69. data/lib/datadog/tracing/contrib/propagation/sql_comment/comment.rb +43 -0
  70. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +32 -0
  71. data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +28 -0
  72. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +49 -0
  73. data/lib/datadog/tracing/contrib/rack/middlewares.rb +15 -7
  74. data/lib/datadog/tracing/contrib/redis/ext.rb +2 -0
  75. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +4 -2
  76. data/lib/datadog/tracing/contrib/redis/patcher.rb +41 -0
  77. data/lib/datadog/tracing/contrib/redis/tags.rb +5 -0
  78. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +2 -0
  79. data/lib/datadog/tracing/contrib/sinatra/env.rb +12 -23
  80. data/lib/datadog/tracing/contrib/sinatra/ext.rb +7 -3
  81. data/lib/datadog/tracing/contrib/sinatra/patcher.rb +2 -2
  82. data/lib/datadog/tracing/contrib/sinatra/tracer.rb +8 -80
  83. data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +14 -9
  84. data/lib/datadog/tracing/contrib.rb +1 -0
  85. data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +84 -0
  86. data/lib/datadog/tracing/distributed/headers/datadog.rb +122 -30
  87. data/lib/datadog/tracing/distributed/headers/ext.rb +2 -0
  88. data/lib/datadog/tracing/flush.rb +1 -1
  89. data/lib/datadog/tracing/metadata/ext.rb +8 -0
  90. data/lib/datadog/tracing/propagation/http.rb +9 -1
  91. data/lib/datadog/tracing/sampling/ext.rb +31 -0
  92. data/lib/datadog/tracing/sampling/priority_sampler.rb +46 -4
  93. data/lib/datadog/tracing/sampling/rate_by_key_sampler.rb +8 -9
  94. data/lib/datadog/tracing/sampling/rate_by_service_sampler.rb +29 -5
  95. data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -3
  96. data/lib/datadog/tracing/sampling/rule_sampler.rb +4 -3
  97. data/lib/datadog/tracing/sampling/span/ext.rb +0 -4
  98. data/lib/datadog/tracing/sampling/span/rule.rb +1 -1
  99. data/lib/datadog/tracing/sampling/span/sampler.rb +14 -3
  100. data/lib/datadog/tracing/trace_digest.rb +3 -0
  101. data/lib/datadog/tracing/trace_operation.rb +10 -0
  102. data/lib/datadog/tracing/trace_segment.rb +6 -0
  103. data/lib/datadog/tracing/tracer.rb +3 -1
  104. data/lib/datadog/tracing/writer.rb +7 -0
  105. data/lib/ddtrace/transport/trace_formatter.rb +7 -0
  106. data/lib/ddtrace/transport/traces.rb +1 -1
  107. data/lib/ddtrace/version.rb +2 -2
  108. metadata +18 -14
  109. data/lib/datadog/profiling/old_ext.rb +0 -42
  110. data/lib/datadog/profiling/transport/http/api/endpoint.rb +0 -85
  111. data/lib/datadog/profiling/transport/http/api/instance.rb +0 -38
  112. data/lib/datadog/profiling/transport/http/api/spec.rb +0 -42
  113. data/lib/datadog/profiling/transport/http/api.rb +0 -45
  114. data/lib/datadog/profiling/transport/http/builder.rb +0 -30
  115. data/lib/datadog/profiling/transport/http/client.rb +0 -37
  116. data/lib/datadog/profiling/transport/http/response.rb +0 -21
  117. data/lib/datadog/profiling/transport/http.rb +0 -118
@@ -13,8 +13,63 @@
13
13
  //
14
14
  // Triggering of this component (e.g. deciding when to take a sample) is implemented in Collectors::CpuAndWallTimeWorker.
15
15
 
16
+ // ---
17
+ // ## Tracking of cpu-time and wall-time spent during garbage collection
18
+ //
19
+ // This feature works by having an implicit state that a thread can be in: doing garbage collection. This state is
20
+ // tracked inside the thread's `per_thread_context.gc_tracking` data, and three functions, listed below. The functions
21
+ // will get called by the `Collectors::CpuAndWallTimeWorker` at very specific times in the VM lifetime.
22
+ //
23
+ // * `cpu_and_wall_time_collector_on_gc_start`: Called at the very beginning of the garbage collection process.
24
+ // The internal VM `during_gc` flag is set to `true`, but Ruby has not done any work yet.
25
+ // * `cpu_and_wall_time_collector_on_gc_finish`: Called at the very end of the garbage collection process.
26
+ // The internal VM `during_gc` flag is still set to `true`, but all the work has been done.
27
+ // * `cpu_and_wall_time_collector_sample_after_gc`: Called shortly after the garbage collection process.
28
+ // The internal VM `during_gc` flag is set to `false`.
29
+ //
30
+ // Inside this component, here's what happens inside those three functions:
31
+ //
32
+ // When `cpu_and_wall_time_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
33
+ // context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
34
+ //
35
+ // While these fields are set, regular samples (if any) do not account for any time that passes after these two
36
+ // timestamps.
37
+ //
38
+ // (Regular samples can still account for the time between the previous sample and the start of GC.)
39
+ //
40
+ // When `cpu_and_wall_time_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
41
+ // thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
42
+ //
43
+ // Finally, when `cpu_and_wall_time_collector_sample_after_gc` gets called, the following happens:
44
+ //
45
+ // 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
46
+ // `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
47
+ // recorded between calls to `on_gc_start` and `on_gc_finish`.
48
+ //
49
+ // 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
50
+ //
51
+ // 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
52
+ // GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
53
+ //
54
+ // In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
55
+ // discovered that we needed to factor the sampling work away from `cpu_and_wall_time_collector_on_gc_finish` and into a
56
+ // separate `cpu_and_wall_time_collector_sample_after_gc` because (as documented in more detail below),
57
+ // `sample_after_gc` could trigger memory allocation in rare occasions (usually exceptions), which is actually not
58
+ // allowed to happen during Ruby's garbage collection start/finish hooks.
59
+ // ---
60
+
16
61
  #define INVALID_TIME -1
17
- #define THREAD_ID_LIMIT_CHARS 20
62
+ #define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
63
+ #define RAISE_ON_FAILURE true
64
+ #define DO_NOT_RAISE_ON_FAILURE false
65
+ #define IS_WALL_TIME true
66
+ #define IS_NOT_WALL_TIME false
67
+ #define MISSING_TRACER_CONTEXT_KEY 0
68
+
69
+ static ID at_active_trace_id; // id of :@active_trace in Ruby
70
+ static ID at_root_span_id; // id of :@root_span in Ruby
71
+ static ID at_active_span_id; // id of :@active_span in Ruby
72
+ static ID at_id_id; // id of :@id in Ruby
18
73
 
19
74
  // Contains state for a single CpuAndWallTime instance
20
75
  struct cpu_and_wall_time_collector_state {
@@ -27,17 +82,53 @@ struct cpu_and_wall_time_collector_state {
27
82
  st_table *hash_map_per_thread_context;
28
83
  // Datadog::Profiling::StackRecorder instance
29
84
  VALUE recorder_instance;
30
- // Track how many samples we've taken.
85
+ // If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
86
+ // to enable code hotspots and endpoint aggregation.
87
+ // When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
88
+ ID tracer_context_key;
89
+ // Track how many regular samples we've taken. Does not include garbage collection samples.
90
+ // Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
91
+ // is not (just) a stat.
31
92
  unsigned int sample_count;
93
+
94
+ struct {
95
+ // Track how many garbage collection samples we've taken.
96
+ unsigned int gc_samples;
97
+ // See cpu_and_wall_time_collector_on_gc_start for details
98
+ unsigned int gc_samples_missed_due_to_missing_context;
99
+ } stats;
32
100
  };
33
101
 
34
102
  // Tracks per-thread state
35
103
  struct per_thread_context {
36
104
  char thread_id[THREAD_ID_LIMIT_CHARS];
37
- ddprof_ffi_CharSlice thread_id_char_slice;
105
+ ddog_CharSlice thread_id_char_slice;
38
106
  thread_cpu_time_id thread_cpu_time_id;
39
107
  long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
40
108
  long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
109
+
110
+ struct {
111
+ // Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
112
+ // Outside of this window, they will be INVALID_TIME.
113
+ long cpu_time_at_start_ns;
114
+ long wall_time_at_start_ns;
115
+
116
+ // Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
117
+ // Outside of this window, they will be INVALID_TIME.
118
+ long cpu_time_at_finish_ns;
119
+ long wall_time_at_finish_ns;
120
+ } gc_tracking;
121
+ };
122
+
123
+ // Used to correlate profiles with traces
124
+ struct trace_identifiers {
125
+ #define MAXIMUM_LENGTH_64_BIT_IDENTIFIER 21 // Why 21? 2^64 => 20 digits + 1 for \0
126
+
127
+ bool valid;
128
+ ddog_CharSlice local_root_span_id;
129
+ ddog_CharSlice span_id;
130
+ char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
131
+ char span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
41
132
  };
42
133
 
43
134
  static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr);
@@ -45,21 +136,35 @@ static void cpu_and_wall_time_collector_typed_data_free(void *state_ptr);
45
136
  static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
46
137
  static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
47
138
  static VALUE _native_new(VALUE klass);
48
- static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames);
139
+ static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key);
49
140
  static VALUE _native_sample(VALUE self, VALUE collector_instance);
141
+ static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
142
+ static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
143
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
144
+ static void trigger_sample_for_thread(
145
+ struct cpu_and_wall_time_collector_state *state,
146
+ VALUE thread,
147
+ struct per_thread_context *thread_context,
148
+ ddog_Slice_i64 metric_values_slice,
149
+ sample_type type
150
+ );
50
151
  static VALUE _native_thread_list(VALUE self);
51
152
  static struct per_thread_context *get_or_create_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
153
+ static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
52
154
  static void initialize_context(VALUE thread, struct per_thread_context *thread_context);
53
155
  static VALUE _native_inspect(VALUE self, VALUE collector_instance);
54
156
  static VALUE per_thread_context_st_table_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
55
157
  static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
158
+ static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
56
159
  static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state);
57
160
  static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
58
161
  static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
59
- static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns);
162
+ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
60
163
  static long cpu_time_now_ns(struct per_thread_context *thread_context);
61
- static long wall_time_now_ns();
164
+ static long wall_time_now_ns(bool raise_on_failure);
62
165
  static long thread_id_for(VALUE thread);
166
+ static VALUE _native_stats(VALUE self, VALUE collector_instance);
167
+ static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
63
168
 
64
169
  void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
65
170
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -77,11 +182,20 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
77
182
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
78
183
  rb_define_alloc_func(collectors_cpu_and_wall_time_class, _native_new);
79
184
 
80
- rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 3);
185
+ rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 4);
81
186
  rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_inspect", _native_inspect, 1);
82
187
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 1);
188
+ rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
189
+ rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
190
+ rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
83
191
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
84
192
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
193
+ rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
194
+
195
+ at_active_trace_id = rb_intern_const("@active_trace");
196
+ at_root_span_id = rb_intern_const("@root_span");
197
+ at_active_span_id = rb_intern_const("@active_span");
198
+ at_id_id = rb_intern_const("@id");
85
199
  }
86
200
 
87
201
  // This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_collector_state
@@ -147,12 +261,12 @@ static VALUE _native_new(VALUE klass) {
147
261
  // "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
148
262
  st_init_numtable();
149
263
  state->recorder_instance = Qnil;
150
- state->sample_count = 0;
264
+ state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
151
265
 
152
266
  return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_collector_typed_data, state);
153
267
  }
154
268
 
155
- static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames) {
269
+ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key) {
156
270
  struct cpu_and_wall_time_collector_state *state;
157
271
  TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
158
272
 
@@ -164,6 +278,14 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_inst
164
278
  // hash_map_per_thread_context is already initialized, nothing to do here
165
279
  state->recorder_instance = enforce_recorder_instance(recorder_instance);
166
280
 
281
+ if (RTEST(tracer_context_key)) {
282
+ ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
283
+ // Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
284
+ // In this case, we can't really escape this because as of this writing, ruby master still calls `rb_to_id` inside
285
+ // the implementation of Thread#[]= so any symbol that gets used as a key there will already be prevented from GC.
286
+ state->tracer_context_key = rb_to_id(tracer_context_key);
287
+ }
288
+
167
289
  return Qtrue;
168
290
  }
169
291
 
@@ -174,18 +296,40 @@ static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance
174
296
  return Qtrue;
175
297
  }
176
298
 
299
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
300
+ // It SHOULD NOT be used for other purposes.
301
+ static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
302
+ cpu_and_wall_time_collector_on_gc_start(collector_instance);
303
+ return Qtrue;
304
+ }
305
+
306
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
307
+ // It SHOULD NOT be used for other purposes.
308
+ static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
309
+ cpu_and_wall_time_collector_on_gc_finish(collector_instance);
310
+ return Qtrue;
311
+ }
312
+
313
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
314
+ // It SHOULD NOT be used for other purposes.
315
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
316
+ cpu_and_wall_time_collector_sample_after_gc(collector_instance);
317
+ return Qtrue;
318
+ }
319
+
177
320
  // This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
178
321
  //
179
322
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
180
323
  // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
181
324
  // Assumption 3: This function IS NOT called from a signal handler. This function is not async-signal-safe.
182
325
  // Assumption 4: This function IS NOT called in a reentrant way.
326
+ // Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
183
327
  VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
184
328
  struct cpu_and_wall_time_collector_state *state;
185
329
  TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
186
330
 
187
331
  VALUE threads = ddtrace_thread_list();
188
- long current_wall_time_ns = wall_time_now_ns();
332
+ long current_wall_time_ns = wall_time_now_ns(RAISE_ON_FAILURE);
189
333
 
190
334
  const long thread_count = RARRAY_LEN(threads);
191
335
  for (long i = 0; i < thread_count; i++) {
@@ -194,10 +338,18 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
194
338
 
195
339
  long current_cpu_time_ns = cpu_time_now_ns(thread_context);
196
340
 
197
- long cpu_time_elapsed_ns =
198
- update_time_since_previous_sample(&thread_context->cpu_time_at_previous_sample_ns, current_cpu_time_ns);
199
- long wall_time_elapsed_ns =
200
- update_time_since_previous_sample(&thread_context->wall_time_at_previous_sample_ns, current_wall_time_ns);
341
+ long cpu_time_elapsed_ns = update_time_since_previous_sample(
342
+ &thread_context->cpu_time_at_previous_sample_ns,
343
+ current_cpu_time_ns,
344
+ thread_context->gc_tracking.cpu_time_at_start_ns,
345
+ IS_NOT_WALL_TIME
346
+ );
347
+ long wall_time_elapsed_ns = update_time_since_previous_sample(
348
+ &thread_context->wall_time_at_previous_sample_ns,
349
+ current_wall_time_ns,
350
+ thread_context->gc_tracking.wall_time_at_start_ns,
351
+ IS_WALL_TIME
352
+ );
201
353
 
202
354
  int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
203
355
 
@@ -205,26 +357,12 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
205
357
  metric_values[CPU_SAMPLES_VALUE_POS] = 1;
206
358
  metric_values[WALL_TIME_VALUE_POS] = wall_time_elapsed_ns;
207
359
 
208
- VALUE thread_name = thread_name_for(thread);
209
- bool have_thread_name = thread_name != Qnil;
210
-
211
- int label_count = 1 + (have_thread_name ? 1 : 0);
212
- ddprof_ffi_Label labels[label_count];
213
-
214
- labels[0] = (ddprof_ffi_Label) {.key = DDPROF_FFI_CHARSLICE_C("thread id"), .str = thread_context->thread_id_char_slice};
215
- if (have_thread_name) {
216
- labels[1] = (ddprof_ffi_Label) {
217
- .key = DDPROF_FFI_CHARSLICE_C("thread name"),
218
- .str = char_slice_from_ruby_string(thread_name)
219
- };
220
- }
221
-
222
- sample_thread(
360
+ trigger_sample_for_thread(
361
+ state,
223
362
  thread,
224
- state->sampling_buffer,
225
- state->recorder_instance,
226
- (ddprof_ffi_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
227
- (ddprof_ffi_Slice_label) {.ptr = labels, .len = label_count}
363
+ thread_context,
364
+ (ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
365
+ SAMPLE_REGULAR
228
366
  );
229
367
  }
230
368
 
@@ -238,6 +376,226 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
238
376
  return Qnil;
239
377
  }
240
378
 
379
+ // This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
380
+ // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
381
+ // create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
382
+ //
383
+ // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
384
+ // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
385
+ // This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
386
+ //
387
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
388
+ // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
389
+ void cpu_and_wall_time_collector_on_gc_start(VALUE self_instance) {
390
+ struct cpu_and_wall_time_collector_state *state;
391
+ if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
392
+ // This should never fail the the above check passes
393
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
394
+
395
+ struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
396
+
397
+ // If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
398
+ // the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
399
+ // GC" is a rare enough case that we can just ignore it.
400
+ // We can always improve this later if we find that this happens often (and we have the counter to help us figure that out)!
401
+ if (thread_context == NULL) {
402
+ state->stats.gc_samples_missed_due_to_missing_context++;
403
+ return;
404
+ }
405
+
406
+ // If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
407
+ //
408
+ // When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
409
+ // called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
410
+ // before we can actually run that method.
411
+ //
412
+ // We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
413
+ // `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
414
+ // then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
415
+ // there was a single, longer GC period.
416
+ if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
417
+ thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
418
+
419
+ // Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
420
+ thread_context->gc_tracking.wall_time_at_start_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
421
+ thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
422
+ }
423
+
424
+ // This function gets called when Ruby has finished running the Garbage Collector on the current thread.
425
+ // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
426
+ // create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
427
+ //
428
+ // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
429
+ // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
430
+ // This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
431
+ //
432
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
433
+ // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
434
+ void cpu_and_wall_time_collector_on_gc_finish(VALUE self_instance) {
435
+ struct cpu_and_wall_time_collector_state *state;
436
+ if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
437
+ // This should never fail the the above check passes
438
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
439
+
440
+ struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
441
+
442
+ // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
443
+ // how often this happens -- see on_gc_start.
444
+ if (thread_context == NULL) return;
445
+
446
+ if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
447
+ thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
448
+ // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
449
+ // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
450
+ // do nothing.
451
+ return;
452
+ }
453
+
454
+ // Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
455
+ thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
456
+ thread_context->gc_tracking.wall_time_at_finish_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
457
+ }
458
+
459
+ // This function gets called shortly after Ruby has finished running the Garbage Collector.
460
+ // It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
461
+ // GC-related tracking.
462
+ //
463
+ // Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
464
+ // and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
465
+ // set on their context.
466
+ //
467
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
468
+ // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
469
+ // Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
470
+ // Assumption 4: This function is called from the main Ractor (if Ruby has support for Ractors).
471
+ VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance) {
472
+ struct cpu_and_wall_time_collector_state *state;
473
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
474
+
475
+ VALUE threads = ddtrace_thread_list();
476
+ bool sampled_any_thread = false;
477
+
478
+ const long thread_count = RARRAY_LEN(threads);
479
+ for (long i = 0; i < thread_count; i++) {
480
+ VALUE thread = RARRAY_AREF(threads, i);
481
+ struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
482
+
483
+ if (
484
+ thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
485
+ thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
486
+ thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
487
+ thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
488
+ ) continue; // Ignore threads with no/incomplete garbage collection data
489
+
490
+ sampled_any_thread = true;
491
+
492
+ long gc_cpu_time_elapsed_ns =
493
+ thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
494
+ long gc_wall_time_elapsed_ns =
495
+ thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
496
+
497
+ // We don't expect non-wall time to go backwards, so let's flag this as a bug
498
+ if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
499
+ // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
500
+ // was a bug.
501
+ // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
502
+ // https://github.com/DataDog/dd-trace-rb/pull/2336.
503
+ if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
504
+
505
+ if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
506
+ // Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
507
+ // come up with a crazy value for the frame
508
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
509
+ }
510
+
511
+ int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
512
+
513
+ metric_values[CPU_TIME_VALUE_POS] = gc_cpu_time_elapsed_ns;
514
+ metric_values[CPU_SAMPLES_VALUE_POS] = 1;
515
+ metric_values[WALL_TIME_VALUE_POS] = gc_wall_time_elapsed_ns;
516
+
517
+ trigger_sample_for_thread(
518
+ state,
519
+ thread,
520
+ thread_context,
521
+ (ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
522
+ SAMPLE_IN_GC
523
+ );
524
+
525
+ // Mark thread as no longer in GC
526
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
527
+ thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
528
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
529
+ thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
530
+
531
+ // Update counters so that they won't include the time in GC during the next sample
532
+ if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
533
+ thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
534
+ }
535
+ if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
536
+ thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
537
+ }
538
+ }
539
+
540
+ if (sampled_any_thread) state->stats.gc_samples++;
541
+
542
+ // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
543
+ return Qnil;
544
+ }
545
+
546
+ static void trigger_sample_for_thread(
547
+ struct cpu_and_wall_time_collector_state *state,
548
+ VALUE thread,
549
+ struct per_thread_context *thread_context,
550
+ ddog_Slice_i64 metric_values_slice,
551
+ sample_type type
552
+ ) {
553
+ int max_label_count =
554
+ 1 + // thread id
555
+ 1 + // thread name
556
+ 2; // local root span id and span id
557
+ ddog_Label labels[max_label_count];
558
+ int label_pos = 0;
559
+
560
+ labels[label_pos++] = (ddog_Label) {
561
+ .key = DDOG_CHARSLICE_C("thread id"),
562
+ .str = thread_context->thread_id_char_slice
563
+ };
564
+
565
+ VALUE thread_name = thread_name_for(thread);
566
+ if (thread_name != Qnil) {
567
+ labels[label_pos++] = (ddog_Label) {
568
+ .key = DDOG_CHARSLICE_C("thread name"),
569
+ .str = char_slice_from_ruby_string(thread_name)
570
+ };
571
+ }
572
+
573
+ struct trace_identifiers trace_identifiers_result = {.valid = false};
574
+ trace_identifiers_for(state, thread, &trace_identifiers_result);
575
+
576
+ if (trace_identifiers_result.valid) {
577
+ labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id};
578
+ labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id};
579
+ }
580
+
581
+ // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
582
+ // forgot to update max_label_count" in the future, we've also added this validation.
583
+ // @ivoanjo: I wonder if C compilers are smart enough to statically prove when this check never triggers happens and
584
+ // remove it entirely.
585
+ if (label_pos > max_label_count) {
586
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
587
+ }
588
+
589
+ sample_thread(
590
+ thread,
591
+ state->sampling_buffer,
592
+ state->recorder_instance,
593
+ metric_values_slice,
594
+ (ddog_Slice_label) {.ptr = labels, .len = label_pos},
595
+ type
596
+ );
597
+ }
598
+
241
599
  // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
242
600
  // It SHOULD NOT be used for other purposes.
243
601
  static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
@@ -259,15 +617,32 @@ static struct per_thread_context *get_or_create_context_for(VALUE thread, struct
259
617
  return thread_context;
260
618
  }
261
619
 
620
+ static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state) {
621
+ struct per_thread_context* thread_context = NULL;
622
+ st_data_t value_context = 0;
623
+
624
+ if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
625
+ thread_context = (struct per_thread_context*) value_context;
626
+ }
627
+
628
+ return thread_context;
629
+ }
630
+
262
631
  static void initialize_context(VALUE thread, struct per_thread_context *thread_context) {
263
- snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%ld", thread_id_for(thread));
264
- thread_context->thread_id_char_slice = (ddprof_ffi_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
632
+ snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
633
+ thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
265
634
 
266
635
  thread_context->thread_cpu_time_id = thread_cpu_time_id_for(thread);
267
636
 
268
637
  // These will get initialized during actual sampling
269
638
  thread_context->cpu_time_at_previous_sample_ns = INVALID_TIME;
270
639
  thread_context->wall_time_at_previous_sample_ns = INVALID_TIME;
640
+
641
+ // These will only be used during a GC operation
642
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
643
+ thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
644
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
645
+ thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
271
646
  }
272
647
 
273
648
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -279,7 +654,10 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
279
654
  // Update this when modifying state struct
280
655
  rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
281
656
  rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
657
+ VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
658
+ rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
282
659
  rb_str_concat(result, rb_sprintf(" sample_count=%u", state->sample_count));
660
+ rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
283
661
 
284
662
  return result;
285
663
  }
@@ -304,13 +682,29 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
304
682
  ID2SYM(rb_intern("thread_cpu_time_id_valid?")), /* => */ thread_context->thread_cpu_time_id.valid ? Qtrue : Qfalse,
305
683
  ID2SYM(rb_intern("thread_cpu_time_id")), /* => */ CLOCKID2NUM(thread_context->thread_cpu_time_id.clock_id),
306
684
  ID2SYM(rb_intern("cpu_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->cpu_time_at_previous_sample_ns),
307
- ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns)
685
+ ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
686
+
687
+ ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
688
+ ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
689
+ ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
690
+ ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
308
691
  };
309
692
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
310
693
 
311
694
  return ST_CONTINUE;
312
695
  }
313
696
 
697
+ static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state) {
698
+ // Update this when modifying state struct (stats inner struct)
699
+ VALUE stats_as_hash = rb_hash_new();
700
+ VALUE arguments[] = {
701
+ ID2SYM(rb_intern("gc_samples")), /* => */ INT2NUM(state->stats.gc_samples),
702
+ ID2SYM(rb_intern("gc_samples_missed_due_to_missing_context")), /* => */ INT2NUM(state->stats.gc_samples_missed_due_to_missing_context),
703
+ };
704
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
705
+ return stats_as_hash;
706
+ }
707
+
314
708
  static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state) {
315
709
  st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
316
710
  }
@@ -336,24 +730,58 @@ static VALUE _native_per_thread_context(DDTRACE_UNUSED VALUE _self, VALUE collec
336
730
  return per_thread_context_st_table_as_ruby_hash(state);
337
731
  }
338
732
 
339
- static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns) {
733
+ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time) {
340
734
  // If we didn't have a time for the previous sample, we use the current one
341
735
  if (*time_at_previous_sample_ns == INVALID_TIME) *time_at_previous_sample_ns = current_time_ns;
342
736
 
343
- long elapsed_time_ns = current_time_ns - *time_at_previous_sample_ns;
344
- *time_at_previous_sample_ns = current_time_ns;
737
+ bool is_thread_doing_gc = gc_start_time_ns != INVALID_TIME;
738
+ long elapsed_time_ns = -1;
739
+
740
+ if (is_thread_doing_gc) {
741
+ bool previous_sample_was_during_gc = gc_start_time_ns <= *time_at_previous_sample_ns;
742
+
743
+ if (previous_sample_was_during_gc) {
744
+ elapsed_time_ns = 0; // No time to account for -- any time since the last sample is going to get assigned to GC separately
745
+ } else {
746
+ elapsed_time_ns = gc_start_time_ns - *time_at_previous_sample_ns; // Capture time between previous sample and start of GC only
747
+ }
748
+
749
+ // Remaining time (from gc_start_time to current_time_ns) will be accounted for inside `sample_after_gc`
750
+ *time_at_previous_sample_ns = gc_start_time_ns;
751
+ } else {
752
+ elapsed_time_ns = current_time_ns - *time_at_previous_sample_ns; // Capture all time since previous sample
753
+ *time_at_previous_sample_ns = current_time_ns;
754
+ }
755
+
756
+ if (elapsed_time_ns < 0) {
757
+ if (is_wall_time) {
758
+ // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
759
+ // was a bug.
760
+ // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
761
+ // https://github.com/DataDog/dd-trace-rb/pull/2336.
762
+ elapsed_time_ns = 0;
763
+ } else {
764
+ // We don't expect non-wall time to go backwards, so let's flag this as a bug
765
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
766
+ }
767
+ }
345
768
 
346
- return elapsed_time_ns >= 0 ? elapsed_time_ns : 0 /* In case something really weird happened */;
769
+ return elapsed_time_ns;
347
770
  }
348
771
 
349
- static long wall_time_now_ns() {
772
+ // Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false
773
+ static long wall_time_now_ns(bool raise_on_failure) {
350
774
  struct timespec current_monotonic;
351
775
 
352
- if (clock_gettime(CLOCK_MONOTONIC, &current_monotonic) != 0) rb_sys_fail("Failed to read CLOCK_MONOTONIC");
776
+ if (clock_gettime(CLOCK_MONOTONIC, &current_monotonic) != 0) {
777
+ if (raise_on_failure) rb_sys_fail("Failed to read CLOCK_MONOTONIC");
778
+ else return 0;
779
+ }
353
780
 
354
781
  return current_monotonic.tv_nsec + (current_monotonic.tv_sec * 1000 * 1000 * 1000);
355
782
  }
356
783
 
784
+ // Safety: This function is assumed never to raise exceptions by callers
357
785
  static long cpu_time_now_ns(struct per_thread_context *thread_context) {
358
786
  thread_cpu_time cpu_time = thread_cpu_time_for(thread_context->thread_cpu_time_id);
359
787
 
@@ -388,3 +816,50 @@ VALUE enforce_cpu_and_wall_time_collector_instance(VALUE object) {
388
816
  Check_TypedStruct(object, &cpu_and_wall_time_collector_typed_data);
389
817
  return object;
390
818
  }
819
+
820
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
821
+ // It SHOULD NOT be used for other purposes.
822
+ //
823
+ // Returns the whole contents of the per_thread_context structs being tracked.
824
+ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
825
+ struct cpu_and_wall_time_collector_state *state;
826
+ TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
827
+
828
+ return stats_as_ruby_hash(state);
829
+ }
830
+
831
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
832
+ static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
833
+ if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
834
+
835
+ VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
836
+ if (current_context == Qnil) return;
837
+
838
+ VALUE active_trace = rb_ivar_get(current_context, at_active_trace_id /* @active_trace */);
839
+ if (active_trace == Qnil) return;
840
+
841
+ VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
842
+ VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
843
+ if (root_span == Qnil || active_span == Qnil) return;
844
+
845
+ VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
846
+ VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
847
+ if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
848
+
849
+ unsigned long long local_root_span_id = NUM2ULL(numeric_local_root_span_id);
850
+ unsigned long long span_id = NUM2ULL(numeric_span_id);
851
+
852
+ snprintf(trace_identifiers_result->local_root_span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", local_root_span_id);
853
+ snprintf(trace_identifiers_result->span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", span_id);
854
+
855
+ trace_identifiers_result->local_root_span_id = (ddog_CharSlice) {
856
+ .ptr = trace_identifiers_result->local_root_span_id_buffer,
857
+ .len = strlen(trace_identifiers_result->local_root_span_id_buffer)
858
+ };
859
+ trace_identifiers_result->span_id = (ddog_CharSlice) {
860
+ .ptr = trace_identifiers_result->span_id_buffer,
861
+ .len = strlen(trace_identifiers_result->span_id_buffer)
862
+ };
863
+
864
+ trace_identifiers_result->valid = true;
865
+ }