ddtrace 1.20.0 → 1.22.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/LICENSE-3rdparty.csv +1 -1
  4. data/bin/ddprofrb +15 -0
  5. data/bin/ddtracerb +3 -1
  6. data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
  7. data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
  8. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +238 -61
  9. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
  10. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
  11. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +97 -4
  12. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
  13. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +45 -3
  14. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +7 -1
  15. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
  16. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
  17. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
  18. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
  19. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
  20. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
  21. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
  22. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +7 -9
  23. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
  24. data/lib/datadog/appsec/event.rb +1 -1
  25. data/lib/datadog/auto_instrument.rb +3 -0
  26. data/lib/datadog/core/configuration/components.rb +7 -6
  27. data/lib/datadog/core/configuration/option.rb +8 -6
  28. data/lib/datadog/core/configuration/settings.rb +130 -63
  29. data/lib/datadog/core/configuration.rb +20 -4
  30. data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
  31. data/lib/datadog/core/environment/git.rb +25 -0
  32. data/lib/datadog/core/environment/identity.rb +18 -48
  33. data/lib/datadog/core/environment/platform.rb +7 -1
  34. data/lib/datadog/core/git/ext.rb +2 -23
  35. data/lib/datadog/core/remote/client/capabilities.rb +1 -1
  36. data/lib/datadog/core/remote/negotiation.rb +2 -2
  37. data/lib/datadog/core/remote/transport/http/config.rb +1 -1
  38. data/lib/datadog/core/remote/worker.rb +7 -4
  39. data/lib/datadog/core/telemetry/client.rb +18 -10
  40. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  41. data/lib/datadog/core/telemetry/event.rb +247 -57
  42. data/lib/datadog/core/telemetry/ext.rb +1 -0
  43. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  44. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  45. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  46. data/lib/datadog/core/telemetry/request.rb +59 -0
  47. data/lib/datadog/core/transport/ext.rb +2 -0
  48. data/lib/datadog/core/utils/url.rb +25 -0
  49. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  50. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -0
  51. data/lib/datadog/profiling/collectors/info.rb +101 -0
  52. data/lib/datadog/profiling/component.rb +34 -28
  53. data/lib/datadog/profiling/exporter.rb +19 -5
  54. data/lib/datadog/profiling/ext.rb +2 -0
  55. data/lib/datadog/profiling/flush.rb +6 -3
  56. data/lib/datadog/profiling/http_transport.rb +5 -1
  57. data/lib/datadog/profiling/load_native_extension.rb +19 -6
  58. data/lib/datadog/profiling/native_extension.rb +1 -1
  59. data/lib/datadog/profiling/tag_builder.rb +5 -0
  60. data/lib/datadog/profiling/tasks/exec.rb +3 -3
  61. data/lib/datadog/profiling/tasks/help.rb +3 -3
  62. data/lib/datadog/profiling.rb +13 -2
  63. data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
  64. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
  65. data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
  66. data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
  67. data/lib/datadog/tracing/contrib/configurable.rb +1 -1
  68. data/lib/datadog/tracing/contrib/extensions.rb +6 -2
  69. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
  70. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  71. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  72. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
  73. data/lib/datadog/tracing/trace_operation.rb +1 -2
  74. data/lib/datadog/tracing/transport/http.rb +1 -0
  75. data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
  76. data/lib/ddtrace/version.rb +1 -1
  77. metadata +55 -62
  78. data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
  79. data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
  80. data/lib/datadog/core/telemetry/collector.rb +0 -250
  81. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
  82. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  83. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  84. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  85. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  86. data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
  87. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  88. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  89. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  90. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  91. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
  92. data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
  93. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
  94. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
  95. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
  96. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
  97. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
  98. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
  99. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
  100. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
  101. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
  102. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
  103. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
  104. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
  105. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
  106. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
  107. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
  108. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
  109. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
  110. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
  111. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
  112. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
  113. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
@@ -9,16 +9,23 @@
9
9
  #define BASE_SAMPLING_INTERVAL 50
10
10
 
11
11
  #define ADJUSTMENT_WINDOW_NS SECONDS_AS_NS(1)
12
+ #define ADJUSTMENT_WINDOW_SAMPLES 100
13
+ // Any average sampling times above this value will be clamped to this value.
14
+ // In practice, this limits the budget consumption of a single sample to that of an adjustment window,
15
+ // thus aiming for a minimum sample rate of once per adjustment window (dependent on actual event rate).
16
+ // NOTE: This is our main strategy to deal with timing hiccups such as those that can be caused by
17
+ // suspensions, system overloads and other things that could lead to arbitrarily big sampling
18
+ // time measurements.
19
+ #define MAX_ALLOWED_SAMPLING_NS(target_overhead) (long) (ADJUSTMENT_WINDOW_NS * target_overhead / 100.)
12
20
 
13
21
  #define EMA_SMOOTHING_FACTOR 0.6
14
- #define EXP_MOVING_AVERAGE(last, avg, first) first ? last : (1-EMA_SMOOTHING_FACTOR) * avg + EMA_SMOOTHING_FACTOR * last
15
22
 
16
- void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name) {
23
+ void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns) {
17
24
  sampler->debug_name = debug_name;
18
- discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT);
25
+ discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT, now_ns);
19
26
  }
20
27
 
21
- static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
28
+ void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
22
29
  const char *debug_name = sampler->debug_name;
23
30
  double target_overhead = sampler->target_overhead;
24
31
  (*sampler) = (discrete_dynamic_sampler) {
@@ -31,6 +38,7 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
31
38
  // This fake readjustment will use a hardcoded sampling interval
32
39
  .sampling_interval = BASE_SAMPLING_INTERVAL,
33
40
  .sampling_probability = 1.0 / BASE_SAMPLING_INTERVAL,
41
+ .max_sampling_time_ns = MAX_ALLOWED_SAMPLING_NS(target_overhead),
34
42
  // But we want to make sure we sample at least once in the next window so that our first
35
43
  // real readjustment has some notion of how heavy sampling is. Therefore, we'll make it so that
36
44
  // the next event is automatically sampled by artificially locating it in the interval threshold.
@@ -38,27 +46,17 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
38
46
  };
39
47
  }
40
48
 
41
- void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler) {
42
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
43
- _discrete_dynamic_sampler_reset(sampler, now);
44
- }
45
-
46
- static void _discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
49
+ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
47
50
  if (target_overhead <= 0 || target_overhead > 100) {
48
51
  rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
49
52
  }
50
53
  sampler->target_overhead = target_overhead;
51
- _discrete_dynamic_sampler_reset(sampler, now_ns);
52
- }
53
-
54
- void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead) {
55
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
56
- _discrete_dynamic_sampler_set_overhead_target_percentage(sampler, target_overhead, now);
54
+ return discrete_dynamic_sampler_reset(sampler, now_ns);
57
55
  }
58
56
 
59
57
  static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);
60
58
 
61
- static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
59
+ bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
62
60
  // For efficiency reasons we don't do true random sampling but rather systematic
63
61
  // sampling following a sample interval/skip. This can be biased and hide patterns
64
62
  // but the dynamic interval and rather indeterministic pattern of allocations in
@@ -77,12 +75,7 @@ static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sa
77
75
  return should_sample;
78
76
  }
79
77
 
80
- bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler) {
81
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
82
- return _discrete_dynamic_sampler_should_sample(sampler, now);
83
- }
84
-
85
- static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
78
+ long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
86
79
  long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
87
80
  sampler->samples_since_last_readjustment++;
88
81
  sampler->sampling_time_since_last_readjustment_ns += last_sampling_time_ns;
@@ -94,11 +87,6 @@ static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sam
94
87
  return last_sampling_time_ns;
95
88
  }
96
89
 
97
- long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler) {
98
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
99
- return _discrete_dynamic_sampler_after_sample(sampler, now);
100
- }
101
-
102
90
  double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
103
91
  return sampler->sampling_probability * 100.;
104
92
  }
@@ -107,35 +95,66 @@ size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sample
107
95
  return sampler->events_since_last_sample;
108
96
  }
109
97
 
98
+ static double ewma_adj_window(double latest_value, double avg, long current_window_time_ns, bool is_first) {
99
+ if (is_first) {
100
+ return latest_value;
101
+ }
102
+
103
+ // We don't want samples coming from partial adjustment windows (e.g. preempted due to number of samples)
104
+ // to lead to quick "forgetting" of the past. Thus, we'll tweak the weight of this new value based on the
105
+ // size of the time window from which we gathered it in relation to our standard adjustment window time.
106
+ double fraction_of_full_window = double_min_of((double) current_window_time_ns / ADJUSTMENT_WINDOW_NS, 1);
107
+ double alpha = EMA_SMOOTHING_FACTOR * fraction_of_full_window;
108
+
109
+ return (1-alpha) * avg + alpha * latest_value;
110
+ }
111
+
110
112
  static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
111
- long window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
113
+ long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
114
+
115
+ bool should_readjust_based_on_time = this_window_time_ns >= ADJUSTMENT_WINDOW_NS;
116
+ bool should_readjust_based_on_samples = sampler->samples_since_last_readjustment >= ADJUSTMENT_WINDOW_SAMPLES;
117
+
118
+ if (!should_readjust_based_on_time && !should_readjust_based_on_samples) {
119
+ // not enough time or samples have passed to perform a readjustment
120
+ return;
121
+ }
112
122
 
113
- if (window_time_ns < ADJUSTMENT_WINDOW_NS) {
114
- // not enough time has passed to perform a readjustment
123
+ if (this_window_time_ns == 0) {
124
+ // should not be possible given previous condition but lets protect against div by 0 below.
115
125
  return;
116
126
  }
117
127
 
118
128
  // If we got this far, lets recalculate our sampling params based on new observations
119
129
  bool first_readjustment = !sampler->has_completed_full_adjustment_window;
120
130
 
121
- // Update our running average of events/sec with latest observation
122
- sampler->events_per_ns = EXP_MOVING_AVERAGE(
123
- (double) sampler->events_since_last_readjustment / window_time_ns,
131
+ // Update our running average of events/sec with latest observation.
132
+ sampler->events_per_ns = ewma_adj_window(
133
+ (double) sampler->events_since_last_readjustment / this_window_time_ns,
124
134
  sampler->events_per_ns,
135
+ this_window_time_ns,
125
136
  first_readjustment
126
137
  );
127
138
 
128
139
  // Update our running average of sampling time for a specific event
129
- long sampling_window_time_ns = sampler->sampling_time_since_last_readjustment_ns;
130
- long sampling_overshoot_time_ns = -1;
131
140
  if (sampler->samples_since_last_readjustment > 0) {
132
141
  // We can only update sampling-related stats if we actually sampled on the last window...
133
142
 
134
143
  // Lets update our average sampling time per event
135
- long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampling_window_time_ns / sampler->samples_since_last_readjustment;
136
- sampler->sampling_time_ns = EXP_MOVING_AVERAGE(
144
+ long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampler->sampling_time_since_last_readjustment_ns / sampler->samples_since_last_readjustment;
145
+ if (avg_sampling_time_in_window_ns > sampler->max_sampling_time_ns) {
146
+ // If the average sampling time in the previous window was deemed unnacceptable, clamp it to the
147
+ // maximum acceptable value and register this operation in our counter.
148
+ // NOTE: This is important so that events like suspensions or system overloads do not lead us to
149
+ // learn arbitrarily big sampling times which may then result in us not sampling anything
150
+ // for very long periods of time.
151
+ avg_sampling_time_in_window_ns = sampler->max_sampling_time_ns;
152
+ sampler->sampling_time_clamps++;
153
+ }
154
+ sampler->sampling_time_ns = ewma_adj_window(
137
155
  avg_sampling_time_in_window_ns,
138
156
  sampler->sampling_time_ns,
157
+ this_window_time_ns,
139
158
  first_readjustment
140
159
  );
141
160
  }
@@ -145,21 +164,21 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
145
164
  // NOTE: Updating this even when no samples occur is a conscious choice which enables us to cooldown extreme adjustments over time.
146
165
  // If we didn't do this, whenever a big spike caused target_overhead_adjustment to equal target_overhead, we'd get stuck
147
166
  // in a "probability = 0" state.
148
- long reference_target_sampling_time_ns = window_time_ns * (sampler->target_overhead / 100.);
167
+ long this_window_sampling_target_time_ns = this_window_time_ns * (sampler->target_overhead / 100.);
149
168
  // Overshoot by definition is always >= 0. < 0 would be undershooting!
150
- sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - reference_target_sampling_time_ns);
169
+ long this_window_sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - this_window_sampling_target_time_ns);
151
170
  // Our overhead adjustment should always be between [-target_overhead, 0]. Higher adjustments would lead to negative overhead targets
152
171
  // which don't make much sense.
153
- double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, sampling_overshoot_time_ns * 100. / window_time_ns);
154
- sampler->target_overhead_adjustment = EXP_MOVING_AVERAGE(
172
+ double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, this_window_sampling_overshoot_time_ns * 100. / this_window_time_ns);
173
+ sampler->target_overhead_adjustment = ewma_adj_window(
155
174
  last_target_overhead_adjustment,
156
175
  sampler->target_overhead_adjustment,
176
+ this_window_time_ns,
157
177
  first_readjustment
158
178
  );
159
179
 
160
180
  // Apply our overhead adjustment to figure out our real targets for this readjustment.
161
181
  double target_overhead = double_max_of(0, sampler->target_overhead + sampler->target_overhead_adjustment);
162
- long target_sampling_time_ns = window_time_ns * (target_overhead / 100.);
163
182
 
164
183
  // Recalculate target sampling probability so that the following 2 hold:
165
184
  // * window_time_ns = working_window_time_ns + sampling_window_time_ns
@@ -175,11 +194,13 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
175
194
  // ┌─ assuming no events will be emitted during sampling
176
195
  // │
177
196
  // = events_per_ns * working_window_time_ns * sampling_probability * sampling_time_ns
197
+ // = events_per_ns * (window_time_ns - sampling_window_time_ns) * sampling_probability * sampling_time_ns
178
198
  //
179
199
  // Re-ordering for sampling_probability and solving for the upper-bound of sampling_window_time_ns:
180
200
  //
181
201
  // sampling_window_time_ns = window_time_ns * target_overhead / 100
182
- // sampling_probability = window_time_ns * target_overhead / 100 / (events_per_ns * working_window_time_ns * sampling_time_ns) =
202
+ // sampling_probability = (sampling_window_time_ns) / (events_per_ns * sampling_time_ns * (window_time_ns - sampling_window_time_ns))
203
+ // = (window_time_ns * target_overhead / 100) / (events_per_ns * sampling_time_ns * window_time_ns * (1 - target_overhead / 100))
183
204
  //
184
205
  // Which you can intuitively understand as:
185
206
  //
@@ -190,16 +211,22 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
190
211
  // then probability will be > 1 (but we should clamp to 1 since probabilities higher than 1 don't make sense).
191
212
  // * If app is eventing a lot or our sampling overhead is big, then as time_to_sample_all_events_ns grows, sampling_probability will
192
213
  // tend to 0.
193
- long working_window_time_ns = long_max_of(0, window_time_ns - sampling_window_time_ns);
194
- double max_allowed_time_for_sampling_ns = target_sampling_time_ns;
195
- long time_to_sample_all_events_ns = sampler->events_per_ns * working_window_time_ns * sampler->sampling_time_ns;
196
- if (max_allowed_time_for_sampling_ns == 0) {
197
- // if we aren't allowed any sampling time at all, probability has to be 0
214
+ //
215
+ // In fact, we can simplify the equation further since the `window_time_ns` components cancel each other out:
216
+ //
217
+ // sampling_probability = (target_overhead / 100) / (events_per_ns * sampling_time_ns * (1 - target_overhead / 100))
218
+ // = max_sampling_overhead / avg_sampling_overhead
219
+
220
+ double max_sampling_overhead = target_overhead / 100.;
221
+ double avg_sampling_overhead = sampler->events_per_ns * sampler->sampling_time_ns * (1 - max_sampling_overhead);
222
+
223
+ if (max_sampling_overhead == 0) {
224
+ // if we aren't allowed any sampling overhead at all, probability has to be 0
198
225
  sampler->sampling_probability = 0;
199
226
  } else {
200
227
  // otherwise apply the formula described above (protecting against div by 0)
201
- sampler->sampling_probability = time_to_sample_all_events_ns == 0 ? 1. :
202
- double_min_of(1., max_allowed_time_for_sampling_ns / time_to_sample_all_events_ns);
228
+ sampler->sampling_probability = avg_sampling_overhead == 0 ? 1. :
229
+ double_min_of(1., max_sampling_overhead / avg_sampling_overhead);
203
230
  }
204
231
 
205
232
  // Doing true random selection would involve "tossing a coin" on every allocation. Lets do systematic sampling instead so that our
@@ -225,26 +252,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
225
252
  double samples_in_60s = allocs_in_60s * sampler->sampling_probability;
226
253
  double expected_total_sampling_time_in_60s =
227
254
  samples_in_60s * sampler->sampling_time_ns / 1e9;
228
- double real_total_sampling_time_in_60s = sampling_window_time_ns / 1e9 * 60 / (window_time_ns / 1e9);
255
+ double num_this_windows_in_60s = 60 * 1e9 / this_window_time_ns;
256
+ double real_total_sampling_time_in_60s = sampler->sampling_time_since_last_readjustment_ns * num_this_windows_in_60s / 1e9;
229
257
 
230
- fprintf(stderr, "[dds.%s] readjusting...\n", sampler->debug_name);
258
+ const char* readjustment_reason = should_readjust_based_on_time ? "time" : "samples";
259
+
260
+ fprintf(stderr, "[dds.%s] readjusting due to %s...\n", sampler->debug_name, readjustment_reason);
261
+ fprintf(stderr, "events_since_last_readjustment=%ld\n", sampler->events_since_last_readjustment);
231
262
  fprintf(stderr, "samples_since_last_readjustment=%ld\n", sampler->samples_since_last_readjustment);
232
- fprintf(stderr, "window_time=%ld\n", window_time_ns);
263
+ fprintf(stderr, "this_window_time=%ld\n", this_window_time_ns);
264
+ fprintf(stderr, "this_window_sampling_time=%ld\n", sampler->sampling_time_since_last_readjustment_ns);
265
+ fprintf(stderr, "this_working_window_time=%ld\n", this_window_time_ns - sampler->sampling_time_since_last_readjustment_ns);
266
+ fprintf(stderr, "this_window_sampling_target_time=%ld\n", this_window_sampling_target_time_ns);
267
+ fprintf(stderr, "this_window_sampling_overshoot_time=%ld\n", this_window_sampling_overshoot_time_ns);
268
+ fprintf(stderr, "\n");
269
+ fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
270
+ fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
233
271
  fprintf(stderr, "events_per_sec=%f\n", sampler->events_per_ns * 1e9);
234
272
  fprintf(stderr, "sampling_time=%ld\n", sampler->sampling_time_ns);
235
- fprintf(stderr, "sampling_window_time=%ld\n", sampling_window_time_ns);
236
- fprintf(stderr, "sampling_target_time=%ld\n", reference_target_sampling_time_ns);
237
- fprintf(stderr, "sampling_overshoot_time=%ld\n", sampling_overshoot_time_ns);
238
- fprintf(stderr, "working_window_time=%ld\n", working_window_time_ns);
273
+ fprintf(stderr, "avg_sampling_overhead=%f\n", avg_sampling_overhead * 100);
239
274
  fprintf(stderr, "sampling_interval=%zu\n", sampler->sampling_interval);
240
- fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability);
275
+ fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability * 100);
276
+ fprintf(stderr, "\n");
241
277
  fprintf(stderr, "expected allocs in 60s=%f\n", allocs_in_60s);
242
278
  fprintf(stderr, "expected samples in 60s=%f\n", samples_in_60s);
243
279
  fprintf(stderr, "expected sampling time in 60s=%f (previous real=%f)\n", expected_total_sampling_time_in_60s, real_total_sampling_time_in_60s);
244
- fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
245
- fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
246
- fprintf(stderr, "target_sampling_time=%ld\n", target_sampling_time_ns);
247
280
  fprintf(stderr, "expected max overhead in 60s=%f\n", target_overhead / 100.0 * 60);
281
+ fprintf(stderr, "\n");
282
+ fprintf(stderr, "sampling_time_clamps=%zu\n", sampler->sampling_time_clamps);
248
283
  fprintf(stderr, "-------\n");
249
284
  #endif
250
285
 
@@ -255,15 +290,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
255
290
  sampler->has_completed_full_adjustment_window = true;
256
291
  }
257
292
 
293
+ VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler) {
294
+ VALUE arguments[] = {
295
+ ID2SYM(rb_intern("target_overhead")), /* => */ DBL2NUM(sampler->target_overhead),
296
+ ID2SYM(rb_intern("target_overhead_adjustment")), /* => */ DBL2NUM(sampler->target_overhead_adjustment),
297
+ ID2SYM(rb_intern("events_per_sec")), /* => */ DBL2NUM(sampler->events_per_ns * 1e9),
298
+ ID2SYM(rb_intern("sampling_time_ns")), /* => */ LONG2NUM(sampler->sampling_time_ns),
299
+ ID2SYM(rb_intern("sampling_interval")), /* => */ ULONG2NUM(sampler->sampling_interval),
300
+ ID2SYM(rb_intern("sampling_probability")), /* => */ DBL2NUM(sampler->sampling_probability * 100),
301
+ ID2SYM(rb_intern("events_since_last_readjustment")), /* => */ ULONG2NUM(sampler->events_since_last_readjustment),
302
+ ID2SYM(rb_intern("samples_since_last_readjustment")), /* => */ ULONG2NUM(sampler->samples_since_last_readjustment),
303
+ ID2SYM(rb_intern("max_sampling_time_ns")), /* => */ LONG2NUM(sampler->max_sampling_time_ns),
304
+ ID2SYM(rb_intern("sampling_time_clamps")), /* => */ ULONG2NUM(sampler->sampling_time_clamps),
305
+ };
306
+ VALUE hash = rb_hash_new();
307
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
308
+ return hash;
309
+ }
310
+
258
311
  // ---
259
312
  // Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
260
313
 
261
314
  static VALUE _native_new(VALUE klass);
315
+ static VALUE _native_initialize(VALUE self, VALUE now);
262
316
  static VALUE _native_reset(VALUE self, VALUE now);
263
317
  static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now);
264
318
  static VALUE _native_should_sample(VALUE self, VALUE now);
265
319
  static VALUE _native_after_sample(VALUE self, VALUE now);
266
- static VALUE _native_probability(VALUE self);
320
+ static VALUE _native_state_snapshot(VALUE self);
267
321
 
268
322
  typedef struct sampler_state {
269
323
  discrete_dynamic_sampler sampler;
@@ -276,12 +330,15 @@ void collectors_discrete_dynamic_sampler_init(VALUE profiling_module) {
276
330
  VALUE sampler_class = rb_define_class_under(testing_module, "Sampler", rb_cObject);
277
331
 
278
332
  rb_define_alloc_func(sampler_class, _native_new);
333
+ // NOTE: Despite being native, we're using the normal ruby keyword to prevent having to write a whole
334
+ // new ruby file to simply proxy the initialization call.
335
+ rb_define_method(sampler_class, "initialize", _native_initialize, 1);
279
336
 
280
337
  rb_define_method(sampler_class, "_native_reset", _native_reset, 1);
281
338
  rb_define_method(sampler_class, "_native_set_overhead_target_percentage", _native_set_overhead_target_percentage, 2);
282
339
  rb_define_method(sampler_class, "_native_should_sample", _native_should_sample, 1);
283
340
  rb_define_method(sampler_class, "_native_after_sample", _native_after_sample, 1);
284
- rb_define_method(sampler_class, "_native_probability", _native_probability, 0);
341
+ rb_define_method(sampler_class, "_native_state_snapshot", _native_state_snapshot, 0);
285
342
  }
286
343
 
287
344
  static const rb_data_type_t sampler_typed_data = {
@@ -296,21 +353,37 @@ static const rb_data_type_t sampler_typed_data = {
296
353
  static VALUE _native_new(VALUE klass) {
297
354
  sampler_state *state = ruby_xcalloc(sizeof(sampler_state), 1);
298
355
 
299
- discrete_dynamic_sampler_init(&state->sampler, "test sampler");
356
+ long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
357
+ if (now_ns == 0) {
358
+ rb_raise(rb_eRuntimeError, "failed to get clock time");
359
+ }
360
+ discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns);
300
361
 
301
362
  return TypedData_Wrap_Struct(klass, &sampler_typed_data, state);
302
363
  }
303
364
 
304
- static VALUE _native_reset(VALUE self, VALUE now_ns) {
365
+ static VALUE _native_initialize(VALUE self, VALUE now_ns) {
305
366
  ENFORCE_TYPE(now_ns, T_FIXNUM);
306
367
 
307
368
  sampler_state *state;
308
369
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
309
370
 
310
- _discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
371
+ discrete_dynamic_sampler_init(&state->sampler, "test sampler", NUM2LONG(now_ns));
372
+
311
373
  return Qtrue;
312
374
  }
313
375
 
376
+ static VALUE _native_reset(VALUE self, VALUE now_ns) {
377
+ ENFORCE_TYPE(now_ns, T_FIXNUM);
378
+
379
+ sampler_state *state;
380
+ TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
381
+
382
+ discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
383
+
384
+ return Qnil;
385
+ }
386
+
314
387
  static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now_ns) {
315
388
  ENFORCE_TYPE(target_overhead, T_FLOAT);
316
389
  ENFORCE_TYPE(now_ns, T_FIXNUM);
@@ -318,7 +391,7 @@ static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_ove
318
391
  sampler_state *state;
319
392
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
320
393
 
321
- _discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
394
+ discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
322
395
 
323
396
  return Qnil;
324
397
  }
@@ -329,7 +402,7 @@ VALUE _native_should_sample(VALUE self, VALUE now_ns) {
329
402
  sampler_state *state;
330
403
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
331
404
 
332
- return _discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
405
+ return discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
333
406
  }
334
407
 
335
408
  VALUE _native_after_sample(VALUE self, VALUE now_ns) {
@@ -338,12 +411,12 @@ VALUE _native_after_sample(VALUE self, VALUE now_ns) {
338
411
  sampler_state *state;
339
412
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
340
413
 
341
- return LONG2NUM(_discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
414
+ return LONG2NUM(discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
342
415
  }
343
416
 
344
- VALUE _native_probability(VALUE self) {
417
+ VALUE _native_state_snapshot(VALUE self) {
345
418
  sampler_state *state;
346
419
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
347
420
 
348
- return DBL2NUM(discrete_dynamic_sampler_probability(&state->sampler));
421
+ return discrete_dynamic_sampler_state_snapshot(&state->sampler);
349
422
  }
@@ -3,6 +3,8 @@
3
3
  #include <stdbool.h>
4
4
  #include <stddef.h>
5
5
 
6
+ #include <ruby.h>
7
+
6
8
  // A sampler that will sample discrete events based on the overhead of their
7
9
  // sampling.
8
10
  //
@@ -31,6 +33,8 @@ typedef struct discrete_dynamic_sampler {
31
33
  // NOTE: This is an inverted view of the probability.
32
34
  // NOTE: A value of 0 works as +inf, effectively disabling sampling (to align with probability=0)
33
35
  unsigned long sampling_interval;
36
+ // Max allowed value for an individual sampling time measurement.
37
+ long max_sampling_time_ns;
34
38
 
35
39
  // -- Sampling State --
36
40
  // How many events have we seen since we last decided to sample.
@@ -53,20 +57,23 @@ typedef struct discrete_dynamic_sampler {
53
57
  // A negative number that we add to target_overhead to serve as extra padding to
54
58
  // try and mitigate observed overshooting of max sampling time.
55
59
  double target_overhead_adjustment;
60
+
61
+ // -- Interesting stats --
62
+ unsigned long sampling_time_clamps;
56
63
  } discrete_dynamic_sampler;
57
64
 
58
65
 
59
66
  // Init a new sampler with sane defaults.
60
- void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name);
67
+ void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns);
61
68
 
62
69
  // Reset a sampler, clearing all stored state.
63
- void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler);
70
+ void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns);
64
71
 
65
72
  // Sets a new target_overhead for the provided sampler, resetting it in the process.
66
73
  // @param target_overhead A double representing the percentage of total time we are
67
74
  // willing to use as overhead for the resulting sampling. Values are expected
68
75
  // to be in the range ]0.0, 100.0].
69
- void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead);
76
+ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns);
70
77
 
71
78
  // Make a sampling decision.
72
79
  //
@@ -75,15 +82,20 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa
75
82
  //
76
83
  // NOTE: If true is returned we implicitly assume the start of a sampling operation
77
84
  // and it is expected that a follow-up after_sample call is issued.
78
- bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);
85
+ bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns);
79
86
 
80
87
  // Signal the end of a sampling operation.
81
88
  //
82
89
  // @return Sampling time in nanoseconds for the sample operation we just finished.
83
- long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler);
90
+ long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns);
84
91
 
85
92
  // Retrieve the current sampling probability ([0.0, 100.0]) being applied by this sampler.
86
93
  double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler);
87
94
 
88
95
  // Retrieve the current number of events seen since last sample.
89
96
  unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler);
97
+
98
+ // Return a Ruby hash containing a snapshot of this sampler's interesting state at calling time.
99
+ // WARN: This allocates in the Ruby VM and therefore should not be called without the
100
+ // VM lock or during GC.
101
+ VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler);
@@ -82,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
82
82
  static ID at_resource_id; // id of :@resource in Ruby
83
83
  static ID at_root_span_id; // id of :@root_span in Ruby
84
84
  static ID at_type_id; // id of :@type in Ruby
85
+ static ID at_otel_values_id; // id of :@otel_values in Ruby
86
+ static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
87
+ static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
85
88
 
86
89
  // Contains state for a single ThreadContext instance
87
90
  struct thread_context_collector_state {
@@ -114,6 +117,8 @@ struct thread_context_collector_state {
114
117
  monotonic_to_system_epoch_state time_converter_state;
115
118
  // Used to identify the main thread, to give it a fallback name
116
119
  VALUE main_thread;
120
+ // Used when extracting trace identifiers from otel spans. Lazily initialized.
121
+ VALUE otel_current_span_key;
117
122
 
118
123
  struct stats {
119
124
  // Track how many garbage collection samples we've taken.
@@ -218,6 +223,14 @@ static VALUE thread_list(struct thread_context_collector_state *state);
218
223
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
219
224
  static VALUE _native_new_empty_thread(VALUE self);
220
225
  static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
226
+ static void ddtrace_otel_trace_identifiers_for(
227
+ struct thread_context_collector_state *state,
228
+ VALUE *active_trace,
229
+ VALUE *root_span,
230
+ VALUE *numeric_span_id,
231
+ VALUE active_span,
232
+ VALUE otel_values
233
+ );
221
234
 
222
235
  void collectors_thread_context_init(VALUE profiling_module) {
223
236
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -255,6 +268,9 @@ void collectors_thread_context_init(VALUE profiling_module) {
255
268
  at_resource_id = rb_intern_const("@resource");
256
269
  at_root_span_id = rb_intern_const("@root_span");
257
270
  at_type_id = rb_intern_const("@type");
271
+ at_otel_values_id = rb_intern_const("@otel_values");
272
+ at_parent_span_id_id = rb_intern_const("@parent_span_id");
273
+ at_datadog_trace_id = rb_intern_const("@datadog_trace");
258
274
 
259
275
  gc_profiling_init();
260
276
  }
@@ -282,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
282
298
  st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
283
299
  rb_gc_mark(state->thread_list_buffer);
284
300
  rb_gc_mark(state->main_thread);
301
+ rb_gc_mark(state->otel_current_span_key);
285
302
  }
286
303
 
287
304
  static void thread_context_collector_typed_data_free(void *state_ptr) {
@@ -334,6 +351,7 @@ static VALUE _native_new(VALUE klass) {
334
351
  state->allocation_type_enabled = true;
335
352
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
336
353
  state->main_thread = rb_thread_main();
354
+ state->otel_current_span_key = Qnil;
337
355
  state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
338
356
  state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
339
357
 
@@ -603,11 +621,14 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
603
621
  // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
604
622
  // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
605
623
  // samples first.
606
- bool finished_major_gc = gc_profiling_has_major_gc_finished();
607
624
  bool over_flush_time_treshold =
608
625
  (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
609
626
 
610
- return finished_major_gc || over_flush_time_treshold;
627
+ if (over_flush_time_treshold) {
628
+ return true;
629
+ } else {
630
+ return gc_profiling_has_major_gc_finished();
631
+ }
611
632
  }
612
633
 
613
634
  // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
@@ -917,6 +938,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
917
938
  ));
918
939
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
919
940
  rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
941
+ rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
920
942
 
921
943
  return result;
922
944
  }
@@ -1104,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1104
1126
 
1105
1127
  VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
1106
1128
  VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
1107
- if (root_span == Qnil || active_span == Qnil) return;
1129
+ // Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
1130
+ // `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
1131
+ // opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
1132
+ VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
1133
+
1134
+ VALUE numeric_span_id = Qnil;
1135
+
1136
+ if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
1137
+
1138
+ if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
1108
1139
 
1109
1140
  VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
1110
- VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
1141
+ if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
1111
1142
  if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
1112
1143
 
1113
1144
  trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
@@ -1299,3 +1330,65 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1299
1330
  default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
1300
1331
  }
1301
1332
  }
1333
+
1334
+ static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
1335
+ if (state->otel_current_span_key == Qnil) {
1336
+ VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
1337
+ VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
1338
+ VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
1339
+ VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
1340
+ VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
1341
+
1342
+ if (current_span_key == Qnil) {
1343
+ rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
1344
+ }
1345
+
1346
+ state->otel_current_span_key = current_span_key;
1347
+ }
1348
+
1349
+ return state->otel_current_span_key;
1350
+ }
1351
+
1352
+ // This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
1353
+ // differently, and this codepath handles it.
1354
+ static void ddtrace_otel_trace_identifiers_for(
1355
+ struct thread_context_collector_state *state,
1356
+ VALUE *active_trace,
1357
+ VALUE *root_span,
1358
+ VALUE *numeric_span_id,
1359
+ VALUE active_span,
1360
+ VALUE otel_values
1361
+ ) {
1362
+ VALUE resolved_numeric_span_id =
1363
+ active_span == Qnil ?
1364
+ // For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
1365
+ rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
1366
+ // Regular span created by ddtrace
1367
+ rb_ivar_get(active_span, at_id_id /* @id */);
1368
+
1369
+ if (resolved_numeric_span_id == Qnil) return;
1370
+
1371
+ VALUE otel_current_span_key = get_otel_current_span_key(state);
1372
+ VALUE current_trace = *active_trace;
1373
+
1374
+ // ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
1375
+ // trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
1376
+ // list. The local root span is going to be the trace/span we find at the end of this linked list.
1377
+ while (otel_values != Qnil) {
1378
+ VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
1379
+ if (otel_span == Qnil) break;
1380
+ VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
1381
+ if (next_trace == Qnil) break;
1382
+
1383
+ current_trace = next_trace;
1384
+ otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
1385
+ }
1386
+
1387
+ // We found the last trace in the linked list. This contains the local root span
1388
+ VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
1389
+ if (resolved_root_span == Qnil) return;
1390
+
1391
+ *root_span = resolved_root_span;
1392
+ *active_trace = current_trace;
1393
+ *numeric_span_id = resolved_numeric_span_id;
1394
+ }