ddtrace 1.20.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +115 -1
  3. data/LICENSE-3rdparty.csv +1 -1
  4. data/bin/ddprofrb +15 -0
  5. data/bin/ddtracerb +3 -1
  6. data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
  7. data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
  8. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +238 -61
  9. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.c +145 -72
  10. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_discrete_dynamic_sampler.h +17 -5
  11. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +97 -4
  12. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +2 -2
  13. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.c +45 -3
  14. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/heap_recorder.h +7 -1
  15. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +15 -19
  16. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +4 -4
  17. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +14 -0
  18. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +4 -0
  19. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +1 -1
  20. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.c +10 -0
  21. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +2 -0
  22. data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.c +7 -9
  23. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
  24. data/lib/datadog/appsec/event.rb +1 -1
  25. data/lib/datadog/auto_instrument.rb +3 -0
  26. data/lib/datadog/core/configuration/components.rb +7 -6
  27. data/lib/datadog/core/configuration/option.rb +8 -6
  28. data/lib/datadog/core/configuration/settings.rb +130 -63
  29. data/lib/datadog/core/configuration.rb +20 -4
  30. data/lib/datadog/core/diagnostics/environment_logger.rb +4 -3
  31. data/lib/datadog/core/environment/git.rb +25 -0
  32. data/lib/datadog/core/environment/identity.rb +18 -48
  33. data/lib/datadog/core/environment/platform.rb +7 -1
  34. data/lib/datadog/core/git/ext.rb +2 -23
  35. data/lib/datadog/core/remote/client/capabilities.rb +1 -1
  36. data/lib/datadog/core/remote/negotiation.rb +2 -2
  37. data/lib/datadog/core/remote/transport/http/config.rb +1 -1
  38. data/lib/datadog/core/remote/worker.rb +7 -4
  39. data/lib/datadog/core/telemetry/client.rb +18 -10
  40. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  41. data/lib/datadog/core/telemetry/event.rb +247 -57
  42. data/lib/datadog/core/telemetry/ext.rb +1 -0
  43. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  44. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  45. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  46. data/lib/datadog/core/telemetry/request.rb +59 -0
  47. data/lib/datadog/core/transport/ext.rb +2 -0
  48. data/lib/datadog/core/utils/url.rb +25 -0
  49. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  50. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +31 -0
  51. data/lib/datadog/profiling/collectors/info.rb +101 -0
  52. data/lib/datadog/profiling/component.rb +34 -28
  53. data/lib/datadog/profiling/exporter.rb +19 -5
  54. data/lib/datadog/profiling/ext.rb +2 -0
  55. data/lib/datadog/profiling/flush.rb +6 -3
  56. data/lib/datadog/profiling/http_transport.rb +5 -1
  57. data/lib/datadog/profiling/load_native_extension.rb +19 -6
  58. data/lib/datadog/profiling/native_extension.rb +1 -1
  59. data/lib/datadog/profiling/tag_builder.rb +5 -0
  60. data/lib/datadog/profiling/tasks/exec.rb +3 -3
  61. data/lib/datadog/profiling/tasks/help.rb +3 -3
  62. data/lib/datadog/profiling.rb +13 -2
  63. data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
  64. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +11 -4
  65. data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
  66. data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +11 -1
  67. data/lib/datadog/tracing/contrib/configurable.rb +1 -1
  68. data/lib/datadog/tracing/contrib/extensions.rb +6 -2
  69. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +11 -4
  70. data/lib/datadog/tracing/sampling/matcher.rb +23 -3
  71. data/lib/datadog/tracing/sampling/rule.rb +7 -2
  72. data/lib/datadog/tracing/sampling/rule_sampler.rb +2 -0
  73. data/lib/datadog/tracing/trace_operation.rb +1 -2
  74. data/lib/datadog/tracing/transport/http.rb +1 -0
  75. data/lib/datadog/tracing/transport/trace_formatter.rb +31 -0
  76. data/lib/ddtrace/version.rb +1 -1
  77. metadata +55 -62
  78. data/ext/ddtrace_profiling_native_extension/pid_controller.c +0 -57
  79. data/ext/ddtrace_profiling_native_extension/pid_controller.h +0 -45
  80. data/lib/datadog/core/telemetry/collector.rb +0 -250
  81. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
  82. data/lib/datadog/core/telemetry/v1/application.rb +0 -92
  83. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
  84. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
  85. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  86. data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
  87. data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
  88. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  89. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
  90. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  91. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
  92. data/lib/datadog/profiling/diagnostics/environment_logger.rb +0 -39
  93. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +0 -0
  94. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -0
  95. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +0 -0
  96. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -0
  97. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +0 -0
  98. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +0 -0
  99. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.c +0 -0
  100. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_gc_profiling_helper.h +0 -0
  101. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +0 -0
  102. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
  103. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +0 -0
  104. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +0 -0
  105. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +0 -0
  106. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +0 -0
  107. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.c +0 -0
  108. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/libdatadog_helpers.h +0 -0
  109. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
  110. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
  111. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/stack_recorder.h +0 -0
  112. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.c +0 -0
  113. /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/time_helpers.h +0 -0
@@ -9,16 +9,23 @@
9
9
  #define BASE_SAMPLING_INTERVAL 50
10
10
 
11
11
  #define ADJUSTMENT_WINDOW_NS SECONDS_AS_NS(1)
12
+ #define ADJUSTMENT_WINDOW_SAMPLES 100
13
+ // Any average sampling times above this value will be clamped to this value.
14
+ // In practice, this limits the budget consumption of a single sample to that of an adjustment window,
15
+ // thus aiming for a minimum sample rate of once per adjustment window (dependent on actual event rate).
16
+ // NOTE: This is our main strategy to deal with timing hiccups such as those that can be caused by
17
+ // suspensions, system overloads and other things that could lead to arbitrarily big sampling
18
+ // time measurements.
19
+ #define MAX_ALLOWED_SAMPLING_NS(target_overhead) (long) (ADJUSTMENT_WINDOW_NS * target_overhead / 100.)
12
20
 
13
21
  #define EMA_SMOOTHING_FACTOR 0.6
14
- #define EXP_MOVING_AVERAGE(last, avg, first) first ? last : (1-EMA_SMOOTHING_FACTOR) * avg + EMA_SMOOTHING_FACTOR * last
15
22
 
16
- void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name) {
23
+ void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns) {
17
24
  sampler->debug_name = debug_name;
18
- discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT);
25
+ discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT, now_ns);
19
26
  }
20
27
 
21
- static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
28
+ void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns) {
22
29
  const char *debug_name = sampler->debug_name;
23
30
  double target_overhead = sampler->target_overhead;
24
31
  (*sampler) = (discrete_dynamic_sampler) {
@@ -31,6 +38,7 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
31
38
  // This fake readjustment will use a hardcoded sampling interval
32
39
  .sampling_interval = BASE_SAMPLING_INTERVAL,
33
40
  .sampling_probability = 1.0 / BASE_SAMPLING_INTERVAL,
41
+ .max_sampling_time_ns = MAX_ALLOWED_SAMPLING_NS(target_overhead),
34
42
  // But we want to make sure we sample at least once in the next window so that our first
35
43
  // real readjustment has some notion of how heavy sampling is. Therefore, we'll make it so that
36
44
  // the next event is automatically sampled by artificially locating it in the interval threshold.
@@ -38,27 +46,17 @@ static void _discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, l
38
46
  };
39
47
  }
40
48
 
41
- void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler) {
42
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
43
- _discrete_dynamic_sampler_reset(sampler, now);
44
- }
45
-
46
- static void _discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
49
+ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
47
50
  if (target_overhead <= 0 || target_overhead > 100) {
48
51
  rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
49
52
  }
50
53
  sampler->target_overhead = target_overhead;
51
- _discrete_dynamic_sampler_reset(sampler, now_ns);
52
- }
53
-
54
- void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead) {
55
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
56
- _discrete_dynamic_sampler_set_overhead_target_percentage(sampler, target_overhead, now);
54
+ return discrete_dynamic_sampler_reset(sampler, now_ns);
57
55
  }
58
56
 
59
57
  static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);
60
58
 
61
- static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
59
+ bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
62
60
  // For efficiency reasons we don't do true random sampling but rather systematic
63
61
  // sampling following a sample interval/skip. This can be biased and hide patterns
64
62
  // but the dynamic interval and rather indeterministic pattern of allocations in
@@ -77,12 +75,7 @@ static bool _discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sa
77
75
  return should_sample;
78
76
  }
79
77
 
80
- bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler) {
81
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
82
- return _discrete_dynamic_sampler_should_sample(sampler, now);
83
- }
84
-
85
- static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
78
+ long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
86
79
  long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
87
80
  sampler->samples_since_last_readjustment++;
88
81
  sampler->sampling_time_since_last_readjustment_ns += last_sampling_time_ns;
@@ -94,11 +87,6 @@ static long _discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sam
94
87
  return last_sampling_time_ns;
95
88
  }
96
89
 
97
- long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler) {
98
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
99
- return _discrete_dynamic_sampler_after_sample(sampler, now);
100
- }
101
-
102
90
  double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler) {
103
91
  return sampler->sampling_probability * 100.;
104
92
  }
@@ -107,35 +95,66 @@ size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sample
107
95
  return sampler->events_since_last_sample;
108
96
  }
109
97
 
98
+ static double ewma_adj_window(double latest_value, double avg, long current_window_time_ns, bool is_first) {
99
+ if (is_first) {
100
+ return latest_value;
101
+ }
102
+
103
+ // We don't want samples coming from partial adjustment windows (e.g. preempted due to number of samples)
104
+ // to lead to quick "forgetting" of the past. Thus, we'll tweak the weight of this new value based on the
105
+ // size of the time window from which we gathered it in relation to our standard adjustment window time.
106
+ double fraction_of_full_window = double_min_of((double) current_window_time_ns / ADJUSTMENT_WINDOW_NS, 1);
107
+ double alpha = EMA_SMOOTHING_FACTOR * fraction_of_full_window;
108
+
109
+ return (1-alpha) * avg + alpha * latest_value;
110
+ }
111
+
110
112
  static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
111
- long window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
113
+ long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
114
+
115
+ bool should_readjust_based_on_time = this_window_time_ns >= ADJUSTMENT_WINDOW_NS;
116
+ bool should_readjust_based_on_samples = sampler->samples_since_last_readjustment >= ADJUSTMENT_WINDOW_SAMPLES;
117
+
118
+ if (!should_readjust_based_on_time && !should_readjust_based_on_samples) {
119
+ // not enough time or samples have passed to perform a readjustment
120
+ return;
121
+ }
112
122
 
113
- if (window_time_ns < ADJUSTMENT_WINDOW_NS) {
114
- // not enough time has passed to perform a readjustment
123
+ if (this_window_time_ns == 0) {
124
+ // should not be possible given previous condition but lets protect against div by 0 below.
115
125
  return;
116
126
  }
117
127
 
118
128
  // If we got this far, lets recalculate our sampling params based on new observations
119
129
  bool first_readjustment = !sampler->has_completed_full_adjustment_window;
120
130
 
121
- // Update our running average of events/sec with latest observation
122
- sampler->events_per_ns = EXP_MOVING_AVERAGE(
123
- (double) sampler->events_since_last_readjustment / window_time_ns,
131
+ // Update our running average of events/sec with latest observation.
132
+ sampler->events_per_ns = ewma_adj_window(
133
+ (double) sampler->events_since_last_readjustment / this_window_time_ns,
124
134
  sampler->events_per_ns,
135
+ this_window_time_ns,
125
136
  first_readjustment
126
137
  );
127
138
 
128
139
  // Update our running average of sampling time for a specific event
129
- long sampling_window_time_ns = sampler->sampling_time_since_last_readjustment_ns;
130
- long sampling_overshoot_time_ns = -1;
131
140
  if (sampler->samples_since_last_readjustment > 0) {
132
141
  // We can only update sampling-related stats if we actually sampled on the last window...
133
142
 
134
143
  // Lets update our average sampling time per event
135
- long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampling_window_time_ns / sampler->samples_since_last_readjustment;
136
- sampler->sampling_time_ns = EXP_MOVING_AVERAGE(
144
+ long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampler->sampling_time_since_last_readjustment_ns / sampler->samples_since_last_readjustment;
145
+ if (avg_sampling_time_in_window_ns > sampler->max_sampling_time_ns) {
146
+ // If the average sampling time in the previous window was deemed unnacceptable, clamp it to the
147
+ // maximum acceptable value and register this operation in our counter.
148
+ // NOTE: This is important so that events like suspensions or system overloads do not lead us to
149
+ // learn arbitrarily big sampling times which may then result in us not sampling anything
150
+ // for very long periods of time.
151
+ avg_sampling_time_in_window_ns = sampler->max_sampling_time_ns;
152
+ sampler->sampling_time_clamps++;
153
+ }
154
+ sampler->sampling_time_ns = ewma_adj_window(
137
155
  avg_sampling_time_in_window_ns,
138
156
  sampler->sampling_time_ns,
157
+ this_window_time_ns,
139
158
  first_readjustment
140
159
  );
141
160
  }
@@ -145,21 +164,21 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
145
164
  // NOTE: Updating this even when no samples occur is a conscious choice which enables us to cooldown extreme adjustments over time.
146
165
  // If we didn't do this, whenever a big spike caused target_overhead_adjustment to equal target_overhead, we'd get stuck
147
166
  // in a "probability = 0" state.
148
- long reference_target_sampling_time_ns = window_time_ns * (sampler->target_overhead / 100.);
167
+ long this_window_sampling_target_time_ns = this_window_time_ns * (sampler->target_overhead / 100.);
149
168
  // Overshoot by definition is always >= 0. < 0 would be undershooting!
150
- sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - reference_target_sampling_time_ns);
169
+ long this_window_sampling_overshoot_time_ns = long_max_of(0, sampler->sampling_time_since_last_readjustment_ns - this_window_sampling_target_time_ns);
151
170
  // Our overhead adjustment should always be between [-target_overhead, 0]. Higher adjustments would lead to negative overhead targets
152
171
  // which don't make much sense.
153
- double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, sampling_overshoot_time_ns * 100. / window_time_ns);
154
- sampler->target_overhead_adjustment = EXP_MOVING_AVERAGE(
172
+ double last_target_overhead_adjustment = -double_min_of(sampler->target_overhead, this_window_sampling_overshoot_time_ns * 100. / this_window_time_ns);
173
+ sampler->target_overhead_adjustment = ewma_adj_window(
155
174
  last_target_overhead_adjustment,
156
175
  sampler->target_overhead_adjustment,
176
+ this_window_time_ns,
157
177
  first_readjustment
158
178
  );
159
179
 
160
180
  // Apply our overhead adjustment to figure out our real targets for this readjustment.
161
181
  double target_overhead = double_max_of(0, sampler->target_overhead + sampler->target_overhead_adjustment);
162
- long target_sampling_time_ns = window_time_ns * (target_overhead / 100.);
163
182
 
164
183
  // Recalculate target sampling probability so that the following 2 hold:
165
184
  // * window_time_ns = working_window_time_ns + sampling_window_time_ns
@@ -175,11 +194,13 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
175
194
  // ┌─ assuming no events will be emitted during sampling
176
195
  // │
177
196
  // = events_per_ns * working_window_time_ns * sampling_probability * sampling_time_ns
197
+ // = events_per_ns * (window_time_ns - sampling_window_time_ns) * sampling_probability * sampling_time_ns
178
198
  //
179
199
  // Re-ordering for sampling_probability and solving for the upper-bound of sampling_window_time_ns:
180
200
  //
181
201
  // sampling_window_time_ns = window_time_ns * target_overhead / 100
182
- // sampling_probability = window_time_ns * target_overhead / 100 / (events_per_ns * working_window_time_ns * sampling_time_ns) =
202
+ // sampling_probability = (sampling_window_time_ns) / (events_per_ns * sampling_time_ns * (window_time_ns - sampling_window_time_ns))
203
+ // = (window_time_ns * target_overhead / 100) / (events_per_ns * sampling_time_ns * window_time_ns * (1 - target_overhead / 100))
183
204
  //
184
205
  // Which you can intuitively understand as:
185
206
  //
@@ -190,16 +211,22 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
190
211
  // then probability will be > 1 (but we should clamp to 1 since probabilities higher than 1 don't make sense).
191
212
  // * If app is eventing a lot or our sampling overhead is big, then as time_to_sample_all_events_ns grows, sampling_probability will
192
213
  // tend to 0.
193
- long working_window_time_ns = long_max_of(0, window_time_ns - sampling_window_time_ns);
194
- double max_allowed_time_for_sampling_ns = target_sampling_time_ns;
195
- long time_to_sample_all_events_ns = sampler->events_per_ns * working_window_time_ns * sampler->sampling_time_ns;
196
- if (max_allowed_time_for_sampling_ns == 0) {
197
- // if we aren't allowed any sampling time at all, probability has to be 0
214
+ //
215
+ // In fact, we can simplify the equation further since the `window_time_ns` components cancel each other out:
216
+ //
217
+ // sampling_probability = (target_overhead / 100) / (events_per_ns * sampling_time_ns * (1 - target_overhead / 100))
218
+ // = max_sampling_overhead / avg_sampling_overhead
219
+
220
+ double max_sampling_overhead = target_overhead / 100.;
221
+ double avg_sampling_overhead = sampler->events_per_ns * sampler->sampling_time_ns * (1 - max_sampling_overhead);
222
+
223
+ if (max_sampling_overhead == 0) {
224
+ // if we aren't allowed any sampling overhead at all, probability has to be 0
198
225
  sampler->sampling_probability = 0;
199
226
  } else {
200
227
  // otherwise apply the formula described above (protecting against div by 0)
201
- sampler->sampling_probability = time_to_sample_all_events_ns == 0 ? 1. :
202
- double_min_of(1., max_allowed_time_for_sampling_ns / time_to_sample_all_events_ns);
228
+ sampler->sampling_probability = avg_sampling_overhead == 0 ? 1. :
229
+ double_min_of(1., max_sampling_overhead / avg_sampling_overhead);
203
230
  }
204
231
 
205
232
  // Doing true random selection would involve "tossing a coin" on every allocation. Lets do systematic sampling instead so that our
@@ -225,26 +252,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
225
252
  double samples_in_60s = allocs_in_60s * sampler->sampling_probability;
226
253
  double expected_total_sampling_time_in_60s =
227
254
  samples_in_60s * sampler->sampling_time_ns / 1e9;
228
- double real_total_sampling_time_in_60s = sampling_window_time_ns / 1e9 * 60 / (window_time_ns / 1e9);
255
+ double num_this_windows_in_60s = 60 * 1e9 / this_window_time_ns;
256
+ double real_total_sampling_time_in_60s = sampler->sampling_time_since_last_readjustment_ns * num_this_windows_in_60s / 1e9;
229
257
 
230
- fprintf(stderr, "[dds.%s] readjusting...\n", sampler->debug_name);
258
+ const char* readjustment_reason = should_readjust_based_on_time ? "time" : "samples";
259
+
260
+ fprintf(stderr, "[dds.%s] readjusting due to %s...\n", sampler->debug_name, readjustment_reason);
261
+ fprintf(stderr, "events_since_last_readjustment=%ld\n", sampler->events_since_last_readjustment);
231
262
  fprintf(stderr, "samples_since_last_readjustment=%ld\n", sampler->samples_since_last_readjustment);
232
- fprintf(stderr, "window_time=%ld\n", window_time_ns);
263
+ fprintf(stderr, "this_window_time=%ld\n", this_window_time_ns);
264
+ fprintf(stderr, "this_window_sampling_time=%ld\n", sampler->sampling_time_since_last_readjustment_ns);
265
+ fprintf(stderr, "this_working_window_time=%ld\n", this_window_time_ns - sampler->sampling_time_since_last_readjustment_ns);
266
+ fprintf(stderr, "this_window_sampling_target_time=%ld\n", this_window_sampling_target_time_ns);
267
+ fprintf(stderr, "this_window_sampling_overshoot_time=%ld\n", this_window_sampling_overshoot_time_ns);
268
+ fprintf(stderr, "\n");
269
+ fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
270
+ fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
233
271
  fprintf(stderr, "events_per_sec=%f\n", sampler->events_per_ns * 1e9);
234
272
  fprintf(stderr, "sampling_time=%ld\n", sampler->sampling_time_ns);
235
- fprintf(stderr, "sampling_window_time=%ld\n", sampling_window_time_ns);
236
- fprintf(stderr, "sampling_target_time=%ld\n", reference_target_sampling_time_ns);
237
- fprintf(stderr, "sampling_overshoot_time=%ld\n", sampling_overshoot_time_ns);
238
- fprintf(stderr, "working_window_time=%ld\n", working_window_time_ns);
273
+ fprintf(stderr, "avg_sampling_overhead=%f\n", avg_sampling_overhead * 100);
239
274
  fprintf(stderr, "sampling_interval=%zu\n", sampler->sampling_interval);
240
- fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability);
275
+ fprintf(stderr, "sampling_probability=%f\n", sampler->sampling_probability * 100);
276
+ fprintf(stderr, "\n");
241
277
  fprintf(stderr, "expected allocs in 60s=%f\n", allocs_in_60s);
242
278
  fprintf(stderr, "expected samples in 60s=%f\n", samples_in_60s);
243
279
  fprintf(stderr, "expected sampling time in 60s=%f (previous real=%f)\n", expected_total_sampling_time_in_60s, real_total_sampling_time_in_60s);
244
- fprintf(stderr, "target_overhead=%f\n", sampler->target_overhead);
245
- fprintf(stderr, "target_overhead_adjustment=%f\n", sampler->target_overhead_adjustment);
246
- fprintf(stderr, "target_sampling_time=%ld\n", target_sampling_time_ns);
247
280
  fprintf(stderr, "expected max overhead in 60s=%f\n", target_overhead / 100.0 * 60);
281
+ fprintf(stderr, "\n");
282
+ fprintf(stderr, "sampling_time_clamps=%zu\n", sampler->sampling_time_clamps);
248
283
  fprintf(stderr, "-------\n");
249
284
  #endif
250
285
 
@@ -255,15 +290,34 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
255
290
  sampler->has_completed_full_adjustment_window = true;
256
291
  }
257
292
 
293
+ VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler) {
294
+ VALUE arguments[] = {
295
+ ID2SYM(rb_intern("target_overhead")), /* => */ DBL2NUM(sampler->target_overhead),
296
+ ID2SYM(rb_intern("target_overhead_adjustment")), /* => */ DBL2NUM(sampler->target_overhead_adjustment),
297
+ ID2SYM(rb_intern("events_per_sec")), /* => */ DBL2NUM(sampler->events_per_ns * 1e9),
298
+ ID2SYM(rb_intern("sampling_time_ns")), /* => */ LONG2NUM(sampler->sampling_time_ns),
299
+ ID2SYM(rb_intern("sampling_interval")), /* => */ ULONG2NUM(sampler->sampling_interval),
300
+ ID2SYM(rb_intern("sampling_probability")), /* => */ DBL2NUM(sampler->sampling_probability * 100),
301
+ ID2SYM(rb_intern("events_since_last_readjustment")), /* => */ ULONG2NUM(sampler->events_since_last_readjustment),
302
+ ID2SYM(rb_intern("samples_since_last_readjustment")), /* => */ ULONG2NUM(sampler->samples_since_last_readjustment),
303
+ ID2SYM(rb_intern("max_sampling_time_ns")), /* => */ LONG2NUM(sampler->max_sampling_time_ns),
304
+ ID2SYM(rb_intern("sampling_time_clamps")), /* => */ ULONG2NUM(sampler->sampling_time_clamps),
305
+ };
306
+ VALUE hash = rb_hash_new();
307
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(hash, arguments[i], arguments[i+1]);
308
+ return hash;
309
+ }
310
+
258
311
  // ---
259
312
  // Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual.
260
313
 
261
314
  static VALUE _native_new(VALUE klass);
315
+ static VALUE _native_initialize(VALUE self, VALUE now);
262
316
  static VALUE _native_reset(VALUE self, VALUE now);
263
317
  static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now);
264
318
  static VALUE _native_should_sample(VALUE self, VALUE now);
265
319
  static VALUE _native_after_sample(VALUE self, VALUE now);
266
- static VALUE _native_probability(VALUE self);
320
+ static VALUE _native_state_snapshot(VALUE self);
267
321
 
268
322
  typedef struct sampler_state {
269
323
  discrete_dynamic_sampler sampler;
@@ -276,12 +330,15 @@ void collectors_discrete_dynamic_sampler_init(VALUE profiling_module) {
276
330
  VALUE sampler_class = rb_define_class_under(testing_module, "Sampler", rb_cObject);
277
331
 
278
332
  rb_define_alloc_func(sampler_class, _native_new);
333
+ // NOTE: Despite being native, we're using the normal ruby keyword to prevent having to write a whole
334
+ // new ruby file to simply proxy the initialization call.
335
+ rb_define_method(sampler_class, "initialize", _native_initialize, 1);
279
336
 
280
337
  rb_define_method(sampler_class, "_native_reset", _native_reset, 1);
281
338
  rb_define_method(sampler_class, "_native_set_overhead_target_percentage", _native_set_overhead_target_percentage, 2);
282
339
  rb_define_method(sampler_class, "_native_should_sample", _native_should_sample, 1);
283
340
  rb_define_method(sampler_class, "_native_after_sample", _native_after_sample, 1);
284
- rb_define_method(sampler_class, "_native_probability", _native_probability, 0);
341
+ rb_define_method(sampler_class, "_native_state_snapshot", _native_state_snapshot, 0);
285
342
  }
286
343
 
287
344
  static const rb_data_type_t sampler_typed_data = {
@@ -296,21 +353,37 @@ static const rb_data_type_t sampler_typed_data = {
296
353
  static VALUE _native_new(VALUE klass) {
297
354
  sampler_state *state = ruby_xcalloc(sizeof(sampler_state), 1);
298
355
 
299
- discrete_dynamic_sampler_init(&state->sampler, "test sampler");
356
+ long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
357
+ if (now_ns == 0) {
358
+ rb_raise(rb_eRuntimeError, "failed to get clock time");
359
+ }
360
+ discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns);
300
361
 
301
362
  return TypedData_Wrap_Struct(klass, &sampler_typed_data, state);
302
363
  }
303
364
 
304
- static VALUE _native_reset(VALUE self, VALUE now_ns) {
365
+ static VALUE _native_initialize(VALUE self, VALUE now_ns) {
305
366
  ENFORCE_TYPE(now_ns, T_FIXNUM);
306
367
 
307
368
  sampler_state *state;
308
369
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
309
370
 
310
- _discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
371
+ discrete_dynamic_sampler_init(&state->sampler, "test sampler", NUM2LONG(now_ns));
372
+
311
373
  return Qtrue;
312
374
  }
313
375
 
376
+ static VALUE _native_reset(VALUE self, VALUE now_ns) {
377
+ ENFORCE_TYPE(now_ns, T_FIXNUM);
378
+
379
+ sampler_state *state;
380
+ TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
381
+
382
+ discrete_dynamic_sampler_reset(&state->sampler, NUM2LONG(now_ns));
383
+
384
+ return Qnil;
385
+ }
386
+
314
387
  static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_overhead, VALUE now_ns) {
315
388
  ENFORCE_TYPE(target_overhead, T_FLOAT);
316
389
  ENFORCE_TYPE(now_ns, T_FIXNUM);
@@ -318,7 +391,7 @@ static VALUE _native_set_overhead_target_percentage(VALUE self, VALUE target_ove
318
391
  sampler_state *state;
319
392
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
320
393
 
321
- _discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
394
+ discrete_dynamic_sampler_set_overhead_target_percentage(&state->sampler, NUM2DBL(target_overhead), NUM2LONG(now_ns));
322
395
 
323
396
  return Qnil;
324
397
  }
@@ -329,7 +402,7 @@ VALUE _native_should_sample(VALUE self, VALUE now_ns) {
329
402
  sampler_state *state;
330
403
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
331
404
 
332
- return _discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
405
+ return discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
333
406
  }
334
407
 
335
408
  VALUE _native_after_sample(VALUE self, VALUE now_ns) {
@@ -338,12 +411,12 @@ VALUE _native_after_sample(VALUE self, VALUE now_ns) {
338
411
  sampler_state *state;
339
412
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
340
413
 
341
- return LONG2NUM(_discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
414
+ return LONG2NUM(discrete_dynamic_sampler_after_sample(&state->sampler, NUM2LONG(now_ns)));
342
415
  }
343
416
 
344
- VALUE _native_probability(VALUE self) {
417
+ VALUE _native_state_snapshot(VALUE self) {
345
418
  sampler_state *state;
346
419
  TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);
347
420
 
348
- return DBL2NUM(discrete_dynamic_sampler_probability(&state->sampler));
421
+ return discrete_dynamic_sampler_state_snapshot(&state->sampler);
349
422
  }
@@ -3,6 +3,8 @@
3
3
  #include <stdbool.h>
4
4
  #include <stddef.h>
5
5
 
6
+ #include <ruby.h>
7
+
6
8
  // A sampler that will sample discrete events based on the overhead of their
7
9
  // sampling.
8
10
  //
@@ -31,6 +33,8 @@ typedef struct discrete_dynamic_sampler {
31
33
  // NOTE: This is an inverted view of the probability.
32
34
  // NOTE: A value of 0 works as +inf, effectively disabling sampling (to align with probability=0)
33
35
  unsigned long sampling_interval;
36
+ // Max allowed value for an individual sampling time measurement.
37
+ long max_sampling_time_ns;
34
38
 
35
39
  // -- Sampling State --
36
40
  // How many events have we seen since we last decided to sample.
@@ -53,20 +57,23 @@ typedef struct discrete_dynamic_sampler {
53
57
  // A negative number that we add to target_overhead to serve as extra padding to
54
58
  // try and mitigate observed overshooting of max sampling time.
55
59
  double target_overhead_adjustment;
60
+
61
+ // -- Interesting stats --
62
+ unsigned long sampling_time_clamps;
56
63
  } discrete_dynamic_sampler;
57
64
 
58
65
 
59
66
  // Init a new sampler with sane defaults.
60
- void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name);
67
+ void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns);
61
68
 
62
69
  // Reset a sampler, clearing all stored state.
63
- void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler);
70
+ void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_ns);
64
71
 
65
72
  // Sets a new target_overhead for the provided sampler, resetting it in the process.
66
73
  // @param target_overhead A double representing the percentage of total time we are
67
74
  // willing to use as overhead for the resulting sampling. Values are expected
68
75
  // to be in the range ]0.0, 100.0].
69
- void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead);
76
+ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns);
70
77
 
71
78
  // Make a sampling decision.
72
79
  //
@@ -75,15 +82,20 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa
75
82
  //
76
83
  // NOTE: If true is returned we implicitly assume the start of a sampling operation
77
84
  // and it is expected that a follow-up after_sample call is issued.
78
- bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);
85
+ bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns);
79
86
 
80
87
  // Signal the end of a sampling operation.
81
88
  //
82
89
  // @return Sampling time in nanoseconds for the sample operation we just finished.
83
- long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler);
90
+ long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns);
84
91
 
85
92
  // Retrieve the current sampling probability ([0.0, 100.0]) being applied by this sampler.
86
93
  double discrete_dynamic_sampler_probability(discrete_dynamic_sampler *sampler);
87
94
 
88
95
  // Retrieve the current number of events seen since last sample.
89
96
  unsigned long discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sampler *sampler);
97
+
98
+ // Return a Ruby hash containing a snapshot of this sampler's interesting state at calling time.
99
+ // WARN: This allocates in the Ruby VM and therefore should not be called without the
100
+ // VM lock or during GC.
101
+ VALUE discrete_dynamic_sampler_state_snapshot(discrete_dynamic_sampler *sampler);
@@ -82,6 +82,9 @@ static ID at_id_id; // id of :@id in Ruby
82
82
  static ID at_resource_id; // id of :@resource in Ruby
83
83
  static ID at_root_span_id; // id of :@root_span in Ruby
84
84
  static ID at_type_id; // id of :@type in Ruby
85
+ static ID at_otel_values_id; // id of :@otel_values in Ruby
86
+ static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
87
+ static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
85
88
 
86
89
  // Contains state for a single ThreadContext instance
87
90
  struct thread_context_collector_state {
@@ -114,6 +117,8 @@ struct thread_context_collector_state {
114
117
  monotonic_to_system_epoch_state time_converter_state;
115
118
  // Used to identify the main thread, to give it a fallback name
116
119
  VALUE main_thread;
120
+ // Used when extracting trace identifiers from otel spans. Lazily initialized.
121
+ VALUE otel_current_span_key;
117
122
 
118
123
  struct stats {
119
124
  // Track how many garbage collection samples we've taken.
@@ -218,6 +223,14 @@ static VALUE thread_list(struct thread_context_collector_state *state);
218
223
  static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
219
224
  static VALUE _native_new_empty_thread(VALUE self);
220
225
  static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
226
+ static void ddtrace_otel_trace_identifiers_for(
227
+ struct thread_context_collector_state *state,
228
+ VALUE *active_trace,
229
+ VALUE *root_span,
230
+ VALUE *numeric_span_id,
231
+ VALUE active_span,
232
+ VALUE otel_values
233
+ );
221
234
 
222
235
  void collectors_thread_context_init(VALUE profiling_module) {
223
236
  VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -255,6 +268,9 @@ void collectors_thread_context_init(VALUE profiling_module) {
255
268
  at_resource_id = rb_intern_const("@resource");
256
269
  at_root_span_id = rb_intern_const("@root_span");
257
270
  at_type_id = rb_intern_const("@type");
271
+ at_otel_values_id = rb_intern_const("@otel_values");
272
+ at_parent_span_id_id = rb_intern_const("@parent_span_id");
273
+ at_datadog_trace_id = rb_intern_const("@datadog_trace");
258
274
 
259
275
  gc_profiling_init();
260
276
  }
@@ -282,6 +298,7 @@ static void thread_context_collector_typed_data_mark(void *state_ptr) {
282
298
  st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
283
299
  rb_gc_mark(state->thread_list_buffer);
284
300
  rb_gc_mark(state->main_thread);
301
+ rb_gc_mark(state->otel_current_span_key);
285
302
  }
286
303
 
287
304
  static void thread_context_collector_typed_data_free(void *state_ptr) {
@@ -334,6 +351,7 @@ static VALUE _native_new(VALUE klass) {
334
351
  state->allocation_type_enabled = true;
335
352
  state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
336
353
  state->main_thread = rb_thread_main();
354
+ state->otel_current_span_key = Qnil;
337
355
  state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
338
356
  state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
339
357
 
@@ -603,11 +621,14 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) {
603
621
  // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
604
622
  // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
605
623
  // samples first.
606
- bool finished_major_gc = gc_profiling_has_major_gc_finished();
607
624
  bool over_flush_time_treshold =
608
625
  (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
609
626
 
610
- return finished_major_gc || over_flush_time_treshold;
627
+ if (over_flush_time_treshold) {
628
+ return true;
629
+ } else {
630
+ return gc_profiling_has_major_gc_finished();
631
+ }
611
632
  }
612
633
 
613
634
  // This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
@@ -917,6 +938,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
917
938
  ));
918
939
  rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread));
919
940
  rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state)));
941
+ rb_str_concat(result, rb_sprintf(" otel_current_span_key=%"PRIsVALUE, state->otel_current_span_key));
920
942
 
921
943
  return result;
922
944
  }
@@ -1104,10 +1126,19 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1104
1126
 
1105
1127
  VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
1106
1128
  VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
1107
- if (root_span == Qnil || active_span == Qnil) return;
1129
+ // Note: On Ruby 3.x `rb_attr_get` is exactly the same as `rb_ivar_get`. For Ruby 2.x, the difference is that
1130
+ // `rb_ivar_get` can trigger "warning: instance variable @otel_values not initialized" if warnings are enabled and
1131
+ // opentelemetry is not in use, whereas `rb_attr_get` does the lookup without generating the warning.
1132
+ VALUE otel_values = rb_attr_get(active_trace, at_otel_values_id /* @otel_values */);
1133
+
1134
+ VALUE numeric_span_id = Qnil;
1135
+
1136
+ if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
1137
+
1138
+ if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
1108
1139
 
1109
1140
  VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
1110
- VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
1141
+ if (active_span != Qnil && numeric_span_id == Qnil) numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
1111
1142
  if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
1112
1143
 
1113
1144
  trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
@@ -1299,3 +1330,65 @@ static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) {
1299
1330
  default: return DDOG_CHARSLICE_C("(VM Internal, Missing class)");
1300
1331
  }
1301
1332
  }
1333
+
1334
+ static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
1335
+ if (state->otel_current_span_key == Qnil) {
1336
+ VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog"));
1337
+ VALUE opentelemetry_module = rb_const_get(datadog_module, rb_intern("OpenTelemetry"));
1338
+ VALUE api_module = rb_const_get(opentelemetry_module, rb_intern("API"));
1339
+ VALUE context_module = rb_const_get(api_module, rb_intern_const("Context"));
1340
+ VALUE current_span_key = rb_const_get(context_module, rb_intern_const("CURRENT_SPAN_KEY"));
1341
+
1342
+ if (current_span_key == Qnil) {
1343
+ rb_raise(rb_eRuntimeError, "Unexpected: Missing Datadog::OpenTelemetry::API::Context::CURRENT_SPAN_KEY");
1344
+ }
1345
+
1346
+ state->otel_current_span_key = current_span_key;
1347
+ }
1348
+
1349
+ return state->otel_current_span_key;
1350
+ }
1351
+
1352
+ // This method gets used when ddtrace is being used indirectly via the otel APIs. Information gets stored slightly
1353
+ // differently, and this codepath handles it.
1354
+ static void ddtrace_otel_trace_identifiers_for(
1355
+ struct thread_context_collector_state *state,
1356
+ VALUE *active_trace,
1357
+ VALUE *root_span,
1358
+ VALUE *numeric_span_id,
1359
+ VALUE active_span,
1360
+ VALUE otel_values
1361
+ ) {
1362
+ VALUE resolved_numeric_span_id =
1363
+ active_span == Qnil ?
1364
+ // For traces started from otel spans, the span id will be empty, and the @parent_span_id has the right value
1365
+ rb_ivar_get(*active_trace, at_parent_span_id_id /* @parent_span_id */) :
1366
+ // Regular span created by ddtrace
1367
+ rb_ivar_get(active_span, at_id_id /* @id */);
1368
+
1369
+ if (resolved_numeric_span_id == Qnil) return;
1370
+
1371
+ VALUE otel_current_span_key = get_otel_current_span_key(state);
1372
+ VALUE current_trace = *active_trace;
1373
+
1374
+ // ddtrace uses a different structure when spans are created from otel, where each otel span will have a unique ddtrace
1375
+ // trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked
1376
+ // list. The local root span is going to be the trace/span we find at the end of this linked list.
1377
+ while (otel_values != Qnil) {
1378
+ VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key);
1379
+ if (otel_span == Qnil) break;
1380
+ VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id);
1381
+ if (next_trace == Qnil) break;
1382
+
1383
+ current_trace = next_trace;
1384
+ otel_values = rb_ivar_get(current_trace, at_otel_values_id /* @otel_values */);
1385
+ }
1386
+
1387
+ // We found the last trace in the linked list. This contains the local root span
1388
+ VALUE resolved_root_span = rb_ivar_get(current_trace, at_root_span_id /* @root_span */);
1389
+ if (resolved_root_span == Qnil) return;
1390
+
1391
+ *root_span = resolved_root_span;
1392
+ *active_trace = current_trace;
1393
+ *numeric_span_id = resolved_numeric_span_id;
1394
+ }