ddtrace 1.5.2 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -2
  3. data/ext/ddtrace_profiling_loader/ddtrace_profiling_loader.c +9 -2
  4. data/ext/ddtrace_profiling_loader/extconf.rb +17 -0
  5. data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +38 -2
  6. data/ext/ddtrace_profiling_native_extension/clock_id.h +1 -0
  7. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +1 -0
  8. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +517 -42
  9. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.h +3 -0
  10. data/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +208 -30
  11. data/ext/ddtrace_profiling_native_extension/collectors_stack.c +156 -46
  12. data/ext/ddtrace_profiling_native_extension/collectors_stack.h +11 -2
  13. data/ext/ddtrace_profiling_native_extension/extconf.rb +11 -1
  14. data/ext/ddtrace_profiling_native_extension/http_transport.c +83 -64
  15. data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +4 -4
  16. data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +3 -2
  17. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +59 -0
  18. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
  19. data/ext/ddtrace_profiling_native_extension/profiling.c +10 -0
  20. data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -1
  21. data/ext/ddtrace_profiling_native_extension/ruby_helpers.h +4 -2
  22. data/ext/ddtrace_profiling_native_extension/stack_recorder.c +45 -29
  23. data/ext/ddtrace_profiling_native_extension/stack_recorder.h +7 -7
  24. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +4 -0
  25. data/lib/datadog/appsec/event.rb +6 -0
  26. data/lib/datadog/core/configuration/components.rb +20 -14
  27. data/lib/datadog/core/configuration/settings.rb +42 -4
  28. data/lib/datadog/core/diagnostics/environment_logger.rb +5 -1
  29. data/lib/datadog/core/utils/compression.rb +5 -1
  30. data/lib/datadog/core.rb +0 -54
  31. data/lib/datadog/profiling/collectors/cpu_and_wall_time.rb +12 -2
  32. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +5 -3
  33. data/lib/datadog/profiling/exporter.rb +2 -4
  34. data/lib/datadog/profiling/http_transport.rb +1 -1
  35. data/lib/datadog/tracing/configuration/ext.rb +1 -0
  36. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +2 -0
  37. data/lib/datadog/tracing/contrib/dalli/ext.rb +1 -0
  38. data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +4 -0
  39. data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +2 -0
  40. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +3 -0
  41. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +2 -0
  42. data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +2 -0
  43. data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -0
  44. data/lib/datadog/tracing/contrib/ext.rb +6 -0
  45. data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -0
  46. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +5 -0
  47. data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +7 -1
  48. data/lib/datadog/tracing/contrib/grpc/ext.rb +2 -0
  49. data/lib/datadog/tracing/contrib/hanami/action_tracer.rb +47 -0
  50. data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +22 -0
  51. data/lib/datadog/tracing/contrib/hanami/ext.rb +24 -0
  52. data/lib/datadog/tracing/contrib/hanami/integration.rb +44 -0
  53. data/lib/datadog/tracing/contrib/hanami/patcher.rb +33 -0
  54. data/lib/datadog/tracing/contrib/hanami/plugin.rb +23 -0
  55. data/lib/datadog/tracing/contrib/hanami/renderer_policy_tracing.rb +41 -0
  56. data/lib/datadog/tracing/contrib/hanami/router_tracing.rb +44 -0
  57. data/lib/datadog/tracing/contrib/http/instrumentation.rb +2 -0
  58. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +2 -0
  59. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +2 -0
  60. data/lib/datadog/tracing/contrib/mongodb/ext.rb +7 -0
  61. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +4 -0
  62. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +12 -0
  63. data/lib/datadog/tracing/contrib/mysql2/ext.rb +1 -0
  64. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +16 -0
  65. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +12 -0
  66. data/lib/datadog/tracing/contrib/pg/ext.rb +2 -1
  67. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +34 -18
  68. data/lib/datadog/tracing/contrib/propagation/sql_comment/comment.rb +43 -0
  69. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +32 -0
  70. data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +28 -0
  71. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +49 -0
  72. data/lib/datadog/tracing/contrib/rack/middlewares.rb +11 -5
  73. data/lib/datadog/tracing/contrib/redis/ext.rb +2 -0
  74. data/lib/datadog/tracing/contrib/redis/instrumentation.rb +4 -2
  75. data/lib/datadog/tracing/contrib/redis/integration.rb +2 -1
  76. data/lib/datadog/tracing/contrib/redis/patcher.rb +40 -0
  77. data/lib/datadog/tracing/contrib/redis/tags.rb +5 -0
  78. data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +2 -0
  79. data/lib/datadog/tracing/contrib/sinatra/env.rb +12 -23
  80. data/lib/datadog/tracing/contrib/sinatra/ext.rb +7 -3
  81. data/lib/datadog/tracing/contrib/sinatra/patcher.rb +2 -2
  82. data/lib/datadog/tracing/contrib/sinatra/tracer.rb +8 -80
  83. data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +14 -9
  84. data/lib/datadog/tracing/contrib.rb +1 -0
  85. data/lib/datadog/tracing/distributed/datadog_tags_codec.rb +84 -0
  86. data/lib/datadog/tracing/distributed/headers/datadog.rb +122 -30
  87. data/lib/datadog/tracing/distributed/headers/ext.rb +2 -0
  88. data/lib/datadog/tracing/flush.rb +1 -1
  89. data/lib/datadog/tracing/metadata/ext.rb +8 -0
  90. data/lib/datadog/tracing/propagation/http.rb +9 -1
  91. data/lib/datadog/tracing/sampling/ext.rb +31 -0
  92. data/lib/datadog/tracing/sampling/priority_sampler.rb +46 -4
  93. data/lib/datadog/tracing/sampling/rate_by_key_sampler.rb +8 -9
  94. data/lib/datadog/tracing/sampling/rate_by_service_sampler.rb +29 -5
  95. data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -3
  96. data/lib/datadog/tracing/sampling/rule_sampler.rb +4 -3
  97. data/lib/datadog/tracing/sampling/span/ext.rb +0 -4
  98. data/lib/datadog/tracing/sampling/span/rule.rb +1 -1
  99. data/lib/datadog/tracing/sampling/span/sampler.rb +14 -3
  100. data/lib/datadog/tracing/trace_digest.rb +3 -0
  101. data/lib/datadog/tracing/trace_operation.rb +10 -0
  102. data/lib/datadog/tracing/trace_segment.rb +6 -0
  103. data/lib/datadog/tracing/tracer.rb +3 -1
  104. data/lib/datadog/tracing/writer.rb +7 -0
  105. data/lib/ddtrace/transport/trace_formatter.rb +7 -0
  106. data/lib/ddtrace/transport/traces.rb +1 -1
  107. data/lib/ddtrace/version.rb +2 -2
  108. metadata +18 -14
  109. data/lib/datadog/profiling/old_ext.rb +0 -42
  110. data/lib/datadog/profiling/transport/http/api/endpoint.rb +0 -85
  111. data/lib/datadog/profiling/transport/http/api/instance.rb +0 -38
  112. data/lib/datadog/profiling/transport/http/api/spec.rb +0 -42
  113. data/lib/datadog/profiling/transport/http/api.rb +0 -45
  114. data/lib/datadog/profiling/transport/http/builder.rb +0 -30
  115. data/lib/datadog/profiling/transport/http/client.rb +0 -37
  116. data/lib/datadog/profiling/transport/http/response.rb +0 -21
  117. data/lib/datadog/profiling/transport/http.rb +0 -118
@@ -13,8 +13,63 @@
13
13
  //
14
14
  // Triggering of this component (e.g. deciding when to take a sample) is implemented in Collectors::CpuAndWallTimeWorker.
15
15
 
16
+ // ---
17
+ // ## Tracking of cpu-time and wall-time spent during garbage collection
18
+ //
19
+ // This feature works by having an implicit state that a thread can be in: doing garbage collection. This state is
20
+ // tracked inside the thread's `per_thread_context.gc_tracking` data, and three functions, listed below. The functions
21
+ // will get called by the `Collectors::CpuAndWallTimeWorker` at very specific times in the VM lifetime.
22
+ //
23
+ // * `cpu_and_wall_time_collector_on_gc_start`: Called at the very beginning of the garbage collection process.
24
+ // The internal VM `during_gc` flag is set to `true`, but Ruby has not done any work yet.
25
+ // * `cpu_and_wall_time_collector_on_gc_finish`: Called at the very end of the garbage collection process.
26
+ // The internal VM `during_gc` flag is still set to `true`, but all the work has been done.
27
+ // * `cpu_and_wall_time_collector_sample_after_gc`: Called shortly after the garbage collection process.
28
+ // The internal VM `during_gc` flag is set to `false`.
29
+ //
30
+ // Inside this component, here's what happens inside those three functions:
31
+ //
32
+ // When `cpu_and_wall_time_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
33
+ // context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
34
+ //
35
+ // While these fields are set, regular samples (if any) do not account for any time that passes after these two
36
+ // timestamps.
37
+ //
38
+ // (Regular samples can still account for the time between the previous sample and the start of GC.)
39
+ //
40
+ // When `cpu_and_wall_time_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the
41
+ // thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`.
42
+ //
43
+ // Finally, when `cpu_and_wall_time_collector_sample_after_gc` gets called, the following happens:
44
+ //
45
+ // 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder
46
+ // `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was
47
+ // recorded between calls to `on_gc_start` and `on_gc_finish`.
48
+ //
49
+ // 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`).
50
+ //
51
+ // 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in
52
+ // GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC.
53
+ //
54
+ // In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
55
+ // discovered that we needed to factor the sampling work away from `cpu_and_wall_time_collector_on_gc_finish` and into a
56
+ // separate `cpu_and_wall_time_collector_sample_after_gc` because (as documented in more detail below),
57
+ // `sample_after_gc` could trigger memory allocation in rare occasions (usually exceptions), which is actually not
58
+ // allowed to happen during Ruby's garbage collection start/finish hooks.
59
+ // ---
60
+
16
61
  #define INVALID_TIME -1
17
- #define THREAD_ID_LIMIT_CHARS 20
62
+ #define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
63
+ #define RAISE_ON_FAILURE true
64
+ #define DO_NOT_RAISE_ON_FAILURE false
65
+ #define IS_WALL_TIME true
66
+ #define IS_NOT_WALL_TIME false
67
+ #define MISSING_TRACER_CONTEXT_KEY 0
68
+
69
+ static ID at_active_trace_id; // id of :@active_trace in Ruby
70
+ static ID at_root_span_id; // id of :@root_span in Ruby
71
+ static ID at_active_span_id; // id of :@active_span in Ruby
72
+ static ID at_id_id; // id of :@id in Ruby
18
73
 
19
74
  // Contains state for a single CpuAndWallTime instance
20
75
  struct cpu_and_wall_time_collector_state {
@@ -27,17 +82,53 @@ struct cpu_and_wall_time_collector_state {
27
82
  st_table *hash_map_per_thread_context;
28
83
  // Datadog::Profiling::StackRecorder instance
29
84
  VALUE recorder_instance;
30
- // Track how many samples we've taken.
85
+ // If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
86
+ // to enable code hotspots and endpoint aggregation.
87
+ // When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
88
+ ID tracer_context_key;
89
+ // Track how many regular samples we've taken. Does not include garbage collection samples.
90
+ // Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
91
+ // is not (just) a stat.
31
92
  unsigned int sample_count;
93
+
94
+ struct {
95
+ // Track how many garbage collection samples we've taken.
96
+ unsigned int gc_samples;
97
+ // See cpu_and_wall_time_collector_on_gc_start for details
98
+ unsigned int gc_samples_missed_due_to_missing_context;
99
+ } stats;
32
100
  };
33
101
 
34
102
  // Tracks per-thread state
35
103
  struct per_thread_context {
36
104
  char thread_id[THREAD_ID_LIMIT_CHARS];
37
- ddprof_ffi_CharSlice thread_id_char_slice;
105
+ ddog_CharSlice thread_id_char_slice;
38
106
  thread_cpu_time_id thread_cpu_time_id;
39
107
  long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
40
108
  long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
109
+
110
+ struct {
111
+ // Both of these fields are set by on_gc_start and kept until sample_after_gc is called.
112
+ // Outside of this window, they will be INVALID_TIME.
113
+ long cpu_time_at_start_ns;
114
+ long wall_time_at_start_ns;
115
+
116
+ // Both of these fields are set by on_gc_finish and kept until sample_after_gc is called.
117
+ // Outside of this window, they will be INVALID_TIME.
118
+ long cpu_time_at_finish_ns;
119
+ long wall_time_at_finish_ns;
120
+ } gc_tracking;
121
+ };
122
+
123
+ // Used to correlate profiles with traces
124
+ struct trace_identifiers {
125
+ #define MAXIMUM_LENGTH_64_BIT_IDENTIFIER 21 // Why 21? 2^64 => 20 digits + 1 for \0
126
+
127
+ bool valid;
128
+ ddog_CharSlice local_root_span_id;
129
+ ddog_CharSlice span_id;
130
+ char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
131
+ char span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
41
132
  };
42
133
 
43
134
  static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr);
@@ -45,21 +136,35 @@ static void cpu_and_wall_time_collector_typed_data_free(void *state_ptr);
45
136
  static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
46
137
  static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
47
138
  static VALUE _native_new(VALUE klass);
48
- static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames);
139
+ static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key);
49
140
  static VALUE _native_sample(VALUE self, VALUE collector_instance);
141
+ static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
142
+ static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
143
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
144
+ static void trigger_sample_for_thread(
145
+ struct cpu_and_wall_time_collector_state *state,
146
+ VALUE thread,
147
+ struct per_thread_context *thread_context,
148
+ ddog_Slice_i64 metric_values_slice,
149
+ sample_type type
150
+ );
50
151
  static VALUE _native_thread_list(VALUE self);
51
152
  static struct per_thread_context *get_or_create_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
153
+ static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state);
52
154
  static void initialize_context(VALUE thread, struct per_thread_context *thread_context);
53
155
  static VALUE _native_inspect(VALUE self, VALUE collector_instance);
54
156
  static VALUE per_thread_context_st_table_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
55
157
  static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
158
+ static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state);
56
159
  static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state);
57
160
  static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
58
161
  static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
59
- static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns);
162
+ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
60
163
  static long cpu_time_now_ns(struct per_thread_context *thread_context);
61
- static long wall_time_now_ns();
164
+ static long wall_time_now_ns(bool raise_on_failure);
62
165
  static long thread_id_for(VALUE thread);
166
+ static VALUE _native_stats(VALUE self, VALUE collector_instance);
167
+ static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
63
168
 
64
169
  void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
65
170
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -77,11 +182,20 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
77
182
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
78
183
  rb_define_alloc_func(collectors_cpu_and_wall_time_class, _native_new);
79
184
 
80
- rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 3);
185
+ rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 4);
81
186
  rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_inspect", _native_inspect, 1);
82
187
  rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 1);
188
+ rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
189
+ rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
190
+ rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
83
191
  rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
84
192
  rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
193
+ rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
194
+
195
+ at_active_trace_id = rb_intern_const("@active_trace");
196
+ at_root_span_id = rb_intern_const("@root_span");
197
+ at_active_span_id = rb_intern_const("@active_span");
198
+ at_id_id = rb_intern_const("@id");
85
199
  }
86
200
 
87
201
  // This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_collector_state
@@ -147,12 +261,12 @@ static VALUE _native_new(VALUE klass) {
147
261
  // "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
148
262
  st_init_numtable();
149
263
  state->recorder_instance = Qnil;
150
- state->sample_count = 0;
264
+ state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
151
265
 
152
266
  return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_collector_typed_data, state);
153
267
  }
154
268
 
155
- static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames) {
269
+ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key) {
156
270
  struct cpu_and_wall_time_collector_state *state;
157
271
  TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
158
272
 
@@ -164,6 +278,14 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_inst
164
278
  // hash_map_per_thread_context is already initialized, nothing to do here
165
279
  state->recorder_instance = enforce_recorder_instance(recorder_instance);
166
280
 
281
+ if (RTEST(tracer_context_key)) {
282
+ ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
283
+ // Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
284
+ // In this case, we can't really escape this because as of this writing, ruby master still calls `rb_to_id` inside
285
+ // the implementation of Thread#[]= so any symbol that gets used as a key there will already be prevented from GC.
286
+ state->tracer_context_key = rb_to_id(tracer_context_key);
287
+ }
288
+
167
289
  return Qtrue;
168
290
  }
169
291
 
@@ -174,18 +296,40 @@ static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance
174
296
  return Qtrue;
175
297
  }
176
298
 
299
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
300
+ // It SHOULD NOT be used for other purposes.
301
+ static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
302
+ cpu_and_wall_time_collector_on_gc_start(collector_instance);
303
+ return Qtrue;
304
+ }
305
+
306
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
307
+ // It SHOULD NOT be used for other purposes.
308
+ static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
309
+ cpu_and_wall_time_collector_on_gc_finish(collector_instance);
310
+ return Qtrue;
311
+ }
312
+
313
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
314
+ // It SHOULD NOT be used for other purposes.
315
+ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
316
+ cpu_and_wall_time_collector_sample_after_gc(collector_instance);
317
+ return Qtrue;
318
+ }
319
+
177
320
  // This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
178
321
  //
179
322
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
180
323
  // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
181
324
  // Assumption 3: This function IS NOT called from a signal handler. This function is not async-signal-safe.
182
325
  // Assumption 4: This function IS NOT called in a reentrant way.
326
+ // Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
183
327
  VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
184
328
  struct cpu_and_wall_time_collector_state *state;
185
329
  TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
186
330
 
187
331
  VALUE threads = ddtrace_thread_list();
188
- long current_wall_time_ns = wall_time_now_ns();
332
+ long current_wall_time_ns = wall_time_now_ns(RAISE_ON_FAILURE);
189
333
 
190
334
  const long thread_count = RARRAY_LEN(threads);
191
335
  for (long i = 0; i < thread_count; i++) {
@@ -194,10 +338,18 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
194
338
 
195
339
  long current_cpu_time_ns = cpu_time_now_ns(thread_context);
196
340
 
197
- long cpu_time_elapsed_ns =
198
- update_time_since_previous_sample(&thread_context->cpu_time_at_previous_sample_ns, current_cpu_time_ns);
199
- long wall_time_elapsed_ns =
200
- update_time_since_previous_sample(&thread_context->wall_time_at_previous_sample_ns, current_wall_time_ns);
341
+ long cpu_time_elapsed_ns = update_time_since_previous_sample(
342
+ &thread_context->cpu_time_at_previous_sample_ns,
343
+ current_cpu_time_ns,
344
+ thread_context->gc_tracking.cpu_time_at_start_ns,
345
+ IS_NOT_WALL_TIME
346
+ );
347
+ long wall_time_elapsed_ns = update_time_since_previous_sample(
348
+ &thread_context->wall_time_at_previous_sample_ns,
349
+ current_wall_time_ns,
350
+ thread_context->gc_tracking.wall_time_at_start_ns,
351
+ IS_WALL_TIME
352
+ );
201
353
 
202
354
  int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
203
355
 
@@ -205,26 +357,12 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
205
357
  metric_values[CPU_SAMPLES_VALUE_POS] = 1;
206
358
  metric_values[WALL_TIME_VALUE_POS] = wall_time_elapsed_ns;
207
359
 
208
- VALUE thread_name = thread_name_for(thread);
209
- bool have_thread_name = thread_name != Qnil;
210
-
211
- int label_count = 1 + (have_thread_name ? 1 : 0);
212
- ddprof_ffi_Label labels[label_count];
213
-
214
- labels[0] = (ddprof_ffi_Label) {.key = DDPROF_FFI_CHARSLICE_C("thread id"), .str = thread_context->thread_id_char_slice};
215
- if (have_thread_name) {
216
- labels[1] = (ddprof_ffi_Label) {
217
- .key = DDPROF_FFI_CHARSLICE_C("thread name"),
218
- .str = char_slice_from_ruby_string(thread_name)
219
- };
220
- }
221
-
222
- sample_thread(
360
+ trigger_sample_for_thread(
361
+ state,
223
362
  thread,
224
- state->sampling_buffer,
225
- state->recorder_instance,
226
- (ddprof_ffi_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
227
- (ddprof_ffi_Slice_label) {.ptr = labels, .len = label_count}
363
+ thread_context,
364
+ (ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
365
+ SAMPLE_REGULAR
228
366
  );
229
367
  }
230
368
 
@@ -238,6 +376,226 @@ VALUE cpu_and_wall_time_collector_sample(VALUE self_instance) {
238
376
  return Qnil;
239
377
  }
240
378
 
379
+ // This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
380
+ // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
381
+ // create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work.
382
+ //
383
+ // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
384
+ // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
385
+ // This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
386
+ //
387
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
388
+ // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
389
+ void cpu_and_wall_time_collector_on_gc_start(VALUE self_instance) {
390
+ struct cpu_and_wall_time_collector_state *state;
391
+ if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
392
+ // This should never fail the the above check passes
393
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
394
+
395
+ struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
396
+
397
+ // If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
398
+ // the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
399
+ // GC" is a rare enough case that we can just ignore it.
400
+ // We can always improve this later if we find that this happens often (and we have the counter to help us figure that out)!
401
+ if (thread_context == NULL) {
402
+ state->stats.gc_samples_missed_due_to_missing_context++;
403
+ return;
404
+ }
405
+
406
+ // If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`.
407
+ //
408
+ // When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be
409
+ // called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle
410
+ // before we can actually run that method.
411
+ //
412
+ // We handle this by collapsing multiple GC cycles into one. That is, if the following happens:
413
+ // `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc`
414
+ // then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if
415
+ // there was a single, longer GC period.
416
+ if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME &&
417
+ thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return;
418
+
419
+ // Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time
420
+ thread_context->gc_tracking.wall_time_at_start_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
421
+ thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
422
+ }
423
+
424
+ // This function gets called when Ruby has finished running the Garbage Collector on the current thread.
425
+ // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
426
+ // create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now.
427
+ //
428
+ // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
429
+ // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
430
+ // This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
431
+ //
432
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
433
+ // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
434
+ void cpu_and_wall_time_collector_on_gc_finish(VALUE self_instance) {
435
+ struct cpu_and_wall_time_collector_state *state;
436
+ if (!rb_typeddata_is_kind_of(self_instance, &cpu_and_wall_time_collector_typed_data)) return;
437
+ // This should never fail the the above check passes
438
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
439
+
440
+ struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
441
+
442
+ // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
443
+ // how often this happens -- see on_gc_start.
444
+ if (thread_context == NULL) return;
445
+
446
+ if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME &&
447
+ thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) {
448
+ // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
449
+ // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
450
+ // do nothing.
451
+ return;
452
+ }
453
+
454
+ // Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time
455
+ thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
456
+ thread_context->gc_tracking.wall_time_at_finish_ns = wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
457
+ }
458
+
459
+ // This function gets called shortly after Ruby has finished running the Garbage Collector.
460
+ // It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
461
+ // GC-related tracking.
462
+ //
463
+ // Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish
464
+ // and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns
465
+ // set on their context.
466
+ //
467
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
468
+ // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
469
+ // Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
470
+ // Assumption 4: This function is called from the main Ractor (if Ruby has support for Ractors).
471
+ VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance) {
472
+ struct cpu_and_wall_time_collector_state *state;
473
+ TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
474
+
475
+ VALUE threads = ddtrace_thread_list();
476
+ bool sampled_any_thread = false;
477
+
478
+ const long thread_count = RARRAY_LEN(threads);
479
+ for (long i = 0; i < thread_count; i++) {
480
+ VALUE thread = RARRAY_AREF(threads, i);
481
+ struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
482
+
483
+ if (
484
+ thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME ||
485
+ thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME ||
486
+ thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME ||
487
+ thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME
488
+ ) continue; // Ignore threads with no/incomplete garbage collection data
489
+
490
+ sampled_any_thread = true;
491
+
492
+ long gc_cpu_time_elapsed_ns =
493
+ thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns;
494
+ long gc_wall_time_elapsed_ns =
495
+ thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns;
496
+
497
+ // We don't expect non-wall time to go backwards, so let's flag this as a bug
498
+ if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
499
+ // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
500
+ // was a bug.
501
+ // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
502
+ // https://github.com/DataDog/dd-trace-rb/pull/2336.
503
+ if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
504
+
505
+ if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
506
+ // Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't
507
+ // come up with a crazy value for the frame
508
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns");
509
+ }
510
+
511
+ int64_t metric_values[ENABLED_VALUE_TYPES_COUNT] = {0};
512
+
513
+ metric_values[CPU_TIME_VALUE_POS] = gc_cpu_time_elapsed_ns;
514
+ metric_values[CPU_SAMPLES_VALUE_POS] = 1;
515
+ metric_values[WALL_TIME_VALUE_POS] = gc_wall_time_elapsed_ns;
516
+
517
+ trigger_sample_for_thread(
518
+ state,
519
+ thread,
520
+ thread_context,
521
+ (ddog_Slice_i64) {.ptr = metric_values, .len = ENABLED_VALUE_TYPES_COUNT},
522
+ SAMPLE_IN_GC
523
+ );
524
+
525
+ // Mark thread as no longer in GC
526
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
527
+ thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
528
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
529
+ thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
530
+
531
+ // Update counters so that they won't include the time in GC during the next sample
532
+ if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
533
+ thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
534
+ }
535
+ if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) {
536
+ thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns;
537
+ }
538
+ }
539
+
540
+ if (sampled_any_thread) state->stats.gc_samples++;
541
+
542
+ // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
543
+ return Qnil;
544
+ }
545
+
546
+ static void trigger_sample_for_thread(
547
+ struct cpu_and_wall_time_collector_state *state,
548
+ VALUE thread,
549
+ struct per_thread_context *thread_context,
550
+ ddog_Slice_i64 metric_values_slice,
551
+ sample_type type
552
+ ) {
553
+ int max_label_count =
554
+ 1 + // thread id
555
+ 1 + // thread name
556
+ 2; // local root span id and span id
557
+ ddog_Label labels[max_label_count];
558
+ int label_pos = 0;
559
+
560
+ labels[label_pos++] = (ddog_Label) {
561
+ .key = DDOG_CHARSLICE_C("thread id"),
562
+ .str = thread_context->thread_id_char_slice
563
+ };
564
+
565
+ VALUE thread_name = thread_name_for(thread);
566
+ if (thread_name != Qnil) {
567
+ labels[label_pos++] = (ddog_Label) {
568
+ .key = DDOG_CHARSLICE_C("thread name"),
569
+ .str = char_slice_from_ruby_string(thread_name)
570
+ };
571
+ }
572
+
573
+ struct trace_identifiers trace_identifiers_result = {.valid = false};
574
+ trace_identifiers_for(state, thread, &trace_identifiers_result);
575
+
576
+ if (trace_identifiers_result.valid) {
577
+ labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id};
578
+ labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id};
579
+ }
580
+
581
+ // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
582
+ // forgot to update max_label_count" in the future, we've also added this validation.
583
+ // @ivoanjo: I wonder if C compilers are smart enough to statically prove when this check never triggers happens and
584
+ // remove it entirely.
585
+ if (label_pos > max_label_count) {
586
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
587
+ }
588
+
589
+ sample_thread(
590
+ thread,
591
+ state->sampling_buffer,
592
+ state->recorder_instance,
593
+ metric_values_slice,
594
+ (ddog_Slice_label) {.ptr = labels, .len = label_pos},
595
+ type
596
+ );
597
+ }
598
+
241
599
  // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
242
600
  // It SHOULD NOT be used for other purposes.
243
601
  static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
@@ -259,15 +617,32 @@ static struct per_thread_context *get_or_create_context_for(VALUE thread, struct
259
617
  return thread_context;
260
618
  }
261
619
 
620
+ static struct per_thread_context *get_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state) {
621
+ struct per_thread_context* thread_context = NULL;
622
+ st_data_t value_context = 0;
623
+
624
+ if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
625
+ thread_context = (struct per_thread_context*) value_context;
626
+ }
627
+
628
+ return thread_context;
629
+ }
630
+
262
631
  static void initialize_context(VALUE thread, struct per_thread_context *thread_context) {
263
- snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%ld", thread_id_for(thread));
264
- thread_context->thread_id_char_slice = (ddprof_ffi_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
632
+ snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
633
+ thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
265
634
 
266
635
  thread_context->thread_cpu_time_id = thread_cpu_time_id_for(thread);
267
636
 
268
637
  // These will get initialized during actual sampling
269
638
  thread_context->cpu_time_at_previous_sample_ns = INVALID_TIME;
270
639
  thread_context->wall_time_at_previous_sample_ns = INVALID_TIME;
640
+
641
+ // These will only be used during a GC operation
642
+ thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
643
+ thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME;
644
+ thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
645
+ thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME;
271
646
  }
272
647
 
273
648
  static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
@@ -279,7 +654,10 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
279
654
  // Update this when modifying state struct
280
655
  rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
281
656
  rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
657
+ VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
658
+ rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
282
659
  rb_str_concat(result, rb_sprintf(" sample_count=%u", state->sample_count));
660
+ rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
283
661
 
284
662
  return result;
285
663
  }
@@ -304,13 +682,29 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
304
682
  ID2SYM(rb_intern("thread_cpu_time_id_valid?")), /* => */ thread_context->thread_cpu_time_id.valid ? Qtrue : Qfalse,
305
683
  ID2SYM(rb_intern("thread_cpu_time_id")), /* => */ CLOCKID2NUM(thread_context->thread_cpu_time_id.clock_id),
306
684
  ID2SYM(rb_intern("cpu_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->cpu_time_at_previous_sample_ns),
307
- ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns)
685
+ ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns),
686
+
687
+ ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns),
688
+ ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns),
689
+ ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns),
690
+ ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns)
308
691
  };
309
692
  for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]);
310
693
 
311
694
  return ST_CONTINUE;
312
695
  }
313
696
 
697
+ static VALUE stats_as_ruby_hash(struct cpu_and_wall_time_collector_state *state) {
698
+ // Update this when modifying state struct (stats inner struct)
699
+ VALUE stats_as_hash = rb_hash_new();
700
+ VALUE arguments[] = {
701
+ ID2SYM(rb_intern("gc_samples")), /* => */ INT2NUM(state->stats.gc_samples),
702
+ ID2SYM(rb_intern("gc_samples_missed_due_to_missing_context")), /* => */ INT2NUM(state->stats.gc_samples_missed_due_to_missing_context),
703
+ };
704
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
705
+ return stats_as_hash;
706
+ }
707
+
314
708
  static void remove_context_for_dead_threads(struct cpu_and_wall_time_collector_state *state) {
315
709
  st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */);
316
710
  }
@@ -336,24 +730,58 @@ static VALUE _native_per_thread_context(DDTRACE_UNUSED VALUE _self, VALUE collec
336
730
  return per_thread_context_st_table_as_ruby_hash(state);
337
731
  }
338
732
 
339
- static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns) {
733
+ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time) {
340
734
  // If we didn't have a time for the previous sample, we use the current one
341
735
  if (*time_at_previous_sample_ns == INVALID_TIME) *time_at_previous_sample_ns = current_time_ns;
342
736
 
343
- long elapsed_time_ns = current_time_ns - *time_at_previous_sample_ns;
344
- *time_at_previous_sample_ns = current_time_ns;
737
+ bool is_thread_doing_gc = gc_start_time_ns != INVALID_TIME;
738
+ long elapsed_time_ns = -1;
739
+
740
+ if (is_thread_doing_gc) {
741
+ bool previous_sample_was_during_gc = gc_start_time_ns <= *time_at_previous_sample_ns;
742
+
743
+ if (previous_sample_was_during_gc) {
744
+ elapsed_time_ns = 0; // No time to account for -- any time since the last sample is going to get assigned to GC separately
745
+ } else {
746
+ elapsed_time_ns = gc_start_time_ns - *time_at_previous_sample_ns; // Capture time between previous sample and start of GC only
747
+ }
748
+
749
+ // Remaining time (from gc_start_time to current_time_ns) will be accounted for inside `sample_after_gc`
750
+ *time_at_previous_sample_ns = gc_start_time_ns;
751
+ } else {
752
+ elapsed_time_ns = current_time_ns - *time_at_previous_sample_ns; // Capture all time since previous sample
753
+ *time_at_previous_sample_ns = current_time_ns;
754
+ }
755
+
756
+ if (elapsed_time_ns < 0) {
757
+ if (is_wall_time) {
758
+ // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
759
+ // was a bug.
760
+ // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
761
+ // https://github.com/DataDog/dd-trace-rb/pull/2336.
762
+ elapsed_time_ns = 0;
763
+ } else {
764
+ // We don't expect non-wall time to go backwards, so let's flag this as a bug
765
+ rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
766
+ }
767
+ }
345
768
 
346
- return elapsed_time_ns >= 0 ? elapsed_time_ns : 0 /* In case something really weird happened */;
769
+ return elapsed_time_ns;
347
770
  }
348
771
 
349
- static long wall_time_now_ns() {
772
+ // Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false
773
+ static long wall_time_now_ns(bool raise_on_failure) {
350
774
  struct timespec current_monotonic;
351
775
 
352
- if (clock_gettime(CLOCK_MONOTONIC, &current_monotonic) != 0) rb_sys_fail("Failed to read CLOCK_MONOTONIC");
776
+ if (clock_gettime(CLOCK_MONOTONIC, &current_monotonic) != 0) {
777
+ if (raise_on_failure) rb_sys_fail("Failed to read CLOCK_MONOTONIC");
778
+ else return 0;
779
+ }
353
780
 
354
781
  return current_monotonic.tv_nsec + (current_monotonic.tv_sec * 1000 * 1000 * 1000);
355
782
  }
356
783
 
784
+ // Safety: This function is assumed never to raise exceptions by callers
357
785
  static long cpu_time_now_ns(struct per_thread_context *thread_context) {
358
786
  thread_cpu_time cpu_time = thread_cpu_time_for(thread_context->thread_cpu_time_id);
359
787
 
@@ -388,3 +816,50 @@ VALUE enforce_cpu_and_wall_time_collector_instance(VALUE object) {
388
816
  Check_TypedStruct(object, &cpu_and_wall_time_collector_typed_data);
389
817
  return object;
390
818
  }
819
+
820
+ // This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
821
+ // It SHOULD NOT be used for other purposes.
822
+ //
823
+ // Returns the whole contents of the per_thread_context structs being tracked.
824
+ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) {
825
+ struct cpu_and_wall_time_collector_state *state;
826
+ TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
827
+
828
+ return stats_as_ruby_hash(state);
829
+ }
830
+
831
+ // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
832
+ static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
833
+ if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
834
+
835
+ VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
836
+ if (current_context == Qnil) return;
837
+
838
+ VALUE active_trace = rb_ivar_get(current_context, at_active_trace_id /* @active_trace */);
839
+ if (active_trace == Qnil) return;
840
+
841
+ VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
842
+ VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
843
+ if (root_span == Qnil || active_span == Qnil) return;
844
+
845
+ VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
846
+ VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
847
+ if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
848
+
849
+ unsigned long long local_root_span_id = NUM2ULL(numeric_local_root_span_id);
850
+ unsigned long long span_id = NUM2ULL(numeric_span_id);
851
+
852
+ snprintf(trace_identifiers_result->local_root_span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", local_root_span_id);
853
+ snprintf(trace_identifiers_result->span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", span_id);
854
+
855
+ trace_identifiers_result->local_root_span_id = (ddog_CharSlice) {
856
+ .ptr = trace_identifiers_result->local_root_span_id_buffer,
857
+ .len = strlen(trace_identifiers_result->local_root_span_id_buffer)
858
+ };
859
+ trace_identifiers_result->span_id = (ddog_CharSlice) {
860
+ .ptr = trace_identifiers_result->span_id_buffer,
861
+ .len = strlen(trace_identifiers_result->span_id_buffer)
862
+ };
863
+
864
+ trace_identifiers_result->valid = true;
865
+ }