ddtrace 0.53.0 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -11
  3. data/ddtrace.gemspec +5 -2
  4. data/docs/GettingStarted.md +40 -3
  5. data/docs/ProfilingDevelopment.md +2 -2
  6. data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +86 -0
  7. data/ext/ddtrace_profiling_native_extension/clock_id.h +4 -0
  8. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +52 -0
  9. data/ext/ddtrace_profiling_native_extension/clock_id_noop.c +14 -0
  10. data/ext/ddtrace_profiling_native_extension/extconf.rb +111 -3
  11. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +35 -0
  12. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
  13. data/ext/ddtrace_profiling_native_extension/profiling.c +6 -1
  14. data/lib/datadog/ci/contrib/cucumber/formatter.rb +1 -0
  15. data/lib/datadog/ci/contrib/rspec/example.rb +1 -0
  16. data/lib/datadog/ci/ext/environment.rb +26 -21
  17. data/lib/datadog/ci/ext/test.rb +1 -0
  18. data/lib/datadog/ci/test.rb +5 -1
  19. data/lib/ddtrace/buffer.rb +28 -16
  20. data/lib/ddtrace/configuration/agent_settings_resolver.rb +27 -16
  21. data/lib/ddtrace/context.rb +10 -2
  22. data/lib/ddtrace/contrib/delayed_job/plugin.rb +2 -2
  23. data/lib/ddtrace/contrib/mongodb/instrumentation.rb +1 -1
  24. data/lib/ddtrace/contrib/mongodb/integration.rb +5 -0
  25. data/lib/ddtrace/contrib/rails/configuration/settings.rb +7 -0
  26. data/lib/ddtrace/contrib/rails/framework.rb +3 -2
  27. data/lib/ddtrace/contrib/redis/instrumentation.rb +90 -0
  28. data/lib/ddtrace/contrib/redis/patcher.rb +2 -84
  29. data/lib/ddtrace/contrib/resque/integration.rb +1 -5
  30. data/lib/ddtrace/ext/priority.rb +6 -4
  31. data/lib/ddtrace/ext/profiling.rb +1 -1
  32. data/lib/ddtrace/metrics.rb +2 -2
  33. data/lib/ddtrace/profiling/collectors/stack.rb +45 -45
  34. data/lib/ddtrace/profiling/encoding/profile.rb +1 -1
  35. data/lib/ddtrace/profiling/events/stack.rb +8 -8
  36. data/lib/ddtrace/profiling/native_extension.rb +23 -1
  37. data/lib/ddtrace/profiling/pprof/builder.rb +8 -2
  38. data/lib/ddtrace/profiling/pprof/stack_sample.rb +13 -16
  39. data/lib/ddtrace/profiling/pprof/template.rb +2 -2
  40. data/lib/ddtrace/profiling/tasks/setup.rb +21 -12
  41. data/lib/ddtrace/profiling/trace_identifiers/ddtrace.rb +9 -8
  42. data/lib/ddtrace/profiling/trace_identifiers/helper.rb +2 -2
  43. data/lib/ddtrace/profiling.rb +0 -2
  44. data/lib/ddtrace/sampler.rb +18 -8
  45. data/lib/ddtrace/sampling/rule_sampler.rb +13 -1
  46. data/lib/ddtrace/utils/time.rb +6 -0
  47. data/lib/ddtrace/version.rb +1 -1
  48. metadata +14 -9
  49. data/lib/ddtrace/profiling/ext/cpu.rb +0 -67
  50. data/lib/ddtrace/profiling/ext/cthread.rb +0 -156
@@ -31,7 +31,7 @@ module Datadog
31
31
  !version.nil? && version >= Gem::Version.new('3.3.0') &&
32
32
  # dogstatsd-ruby >= 5.0 & < 5.2.0 has known issues with process forks
33
33
  # and do not support the single thread mode we use to avoid this problem.
34
- !(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.2'))
34
+ !(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.3'))
35
35
  end
36
36
 
37
37
  def enabled?
@@ -274,7 +274,7 @@ module Datadog
274
274
  IGNORED_STATSD_ONLY_ONCE.run do
275
275
  Datadog.logger.warn(
276
276
  'Ignoring user-supplied statsd instance as currently-installed version of dogstastd-ruby is incompatible. ' \
277
- "To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.2'` on your Gemfile or gems.rb file."
277
+ "To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.3'` on your Gemfile or gems.rb file."
278
278
  )
279
279
  end
280
280
  end
@@ -1,4 +1,6 @@
1
1
  # typed: true
2
+
3
+ require 'ddtrace/profiling/native_extension'
2
4
  require 'ddtrace/profiling/backtrace_location'
3
5
  require 'ddtrace/profiling/events/stack'
4
6
  require 'ddtrace/utils/only_once'
@@ -19,6 +21,7 @@ module Datadog
19
21
  MIN_INTERVAL = 0.01
20
22
  THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
21
23
  THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
24
+ SYNTHETIC_STACK_IN_NATIVE_CODE = [BacktraceLocation.new('', 0, 'In native code').freeze].freeze
22
25
 
23
26
  # This default was picked based on the current sampling performance and on expected concurrency on an average
24
27
  # Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
@@ -31,7 +34,8 @@ module Datadog
31
34
  :trace_identifiers_helper,
32
35
  :ignore_thread,
33
36
  :max_time_usage_pct,
34
- :thread_api
37
+ :thread_api,
38
+ :cpu_time_provider
35
39
 
36
40
  def initialize(
37
41
  recorder,
@@ -41,6 +45,7 @@ module Datadog
41
45
  max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
42
46
  max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
43
47
  thread_api: Thread,
48
+ cpu_time_provider: Datadog::Profiling::NativeExtension,
44
49
  fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
45
50
  interval: MIN_INTERVAL,
46
51
  enabled: true
@@ -52,6 +57,8 @@ module Datadog
52
57
  @max_time_usage_pct = max_time_usage_pct
53
58
  @max_threads_sampled = max_threads_sampled
54
59
  @thread_api = thread_api
60
+ # Only set the provider if it's able to work in the current Ruby/OS combo
61
+ @cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?
55
62
 
56
63
  # Workers::Async::Thread settings
57
64
  self.fork_policy = fork_policy
@@ -62,8 +69,6 @@ module Datadog
62
69
  # Workers::Polling settings
63
70
  self.enabled = enabled
64
71
 
65
- @warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
66
-
67
72
  # Cache this proc, since it's pretty expensive to keep recreating it
68
73
  @build_backtrace_location = method(:build_backtrace_location).to_proc
69
74
  # Cache this buffer, since it's pretty expensive to keep accessing it
@@ -119,6 +124,26 @@ module Datadog
119
124
  locations = thread.backtrace_locations
120
125
  return if locations.nil?
121
126
 
127
+ # Having empty locations means that the thread is alive, but we don't know what it's doing:
128
+ #
129
+ # 1. It can be starting up
130
+ # ```
131
+ # > Thread.new { sleep }.backtrace
132
+ # => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
133
+ # ```
134
+ # 2. It can be running native code
135
+ # ```
136
+ # > t = Process.detach(fork { sleep })
137
+ # => #<Process::Waiter:0x00007ffe7285f7a0 run>
138
+ # > t.backtrace
139
+ # => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
140
+ # ```
141
+ # This effect has been observed in threads created by the Iodine web server and the ffi gem
142
+ #
143
+ # To give customers visibility into these threads, we replace the empty stack with one containing a
144
+ # synthetic placeholder frame, so that these threads are properly represented in the UX.
145
+ locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?
146
+
122
147
  # Get actual stack size then trim the stack
123
148
  stack_size = locations.length
124
149
  locations = locations[0..(max_frames - 1)]
@@ -126,8 +151,8 @@ module Datadog
126
151
  # Convert backtrace locations into structs
127
152
  locations = convert_backtrace_locations(locations)
128
153
 
129
- thread_id = thread.respond_to?(:pthread_thread_id) ? thread.pthread_thread_id : thread.object_id
130
- trace_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
154
+ thread_id = thread.object_id
155
+ root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
131
156
  cpu_time = get_cpu_time_interval!(thread)
132
157
  wall_time_interval_ns =
133
158
  get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
@@ -137,7 +162,7 @@ module Datadog
137
162
  locations,
138
163
  stack_size,
139
164
  thread_id,
140
- trace_id,
165
+ root_span_id,
141
166
  span_id,
142
167
  trace_resource,
143
168
  cpu_time,
@@ -146,17 +171,10 @@ module Datadog
146
171
  end
147
172
 
148
173
  def get_cpu_time_interval!(thread)
149
- # Return if we can't get the current CPU time
150
- unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
151
- warn_about_missing_cpu_time_instrumentation(thread)
152
- return
153
- end
174
+ return unless cpu_time_provider
154
175
 
155
- current_cpu_time_ns = thread.cpu_time(:nanosecond)
176
+ current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)
156
177
 
157
- # NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
158
- # initialization that needs to be done by the thread itself, and it's possible for us to try to sample
159
- # *before* the thread had time to finish the initialization
160
178
  return unless current_cpu_time_ns
161
179
 
162
180
  get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
@@ -205,33 +223,6 @@ module Datadog
205
223
 
206
224
  private
207
225
 
208
- def warn_about_missing_cpu_time_instrumentation(thread)
209
- @warn_about_missing_cpu_time_instrumentation_only_once.run do
210
- # Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
211
- # missing on this thread we just found.
212
- #
213
- # As far as we know, it can be missing due to one the following:
214
- #
215
- # a) The thread was started before we installed our instrumentation.
216
- # In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
217
- #
218
- # b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
219
- # Known cases right now that trigger this are the ethon/typhoeus gems.
220
- # We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
221
- #
222
- # c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
223
- # When threads are started using these APIs, there's a small time window during which the thread has started
224
- # but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
225
- # it to run and our instrumentation to be applied.
226
- #
227
- if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
228
- Datadog.logger.debug(
229
- "Thread ('#{thread}') is missing profiling instrumentation; other threads should be unaffected"
230
- )
231
- end
232
- end
233
- end
234
-
235
226
  # If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
236
227
  # clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
237
228
  #
@@ -253,9 +244,18 @@ module Datadog
253
244
  end
254
245
 
255
246
  def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
256
- # See cthread.rb for more details, but this is a workaround for https://bugs.ruby-lang.org/issues/17807 ;
257
- # using all thread_variable related methods on these instances also triggers a crash and for now we just
258
- # skip it for the affected Rubies
247
+ # Process::Waiter crash workaround:
248
+ #
249
+ # This is a workaround for a Ruby VM segfault (usually something like
250
+ # "[BUG] Segmentation fault at 0x0000000000000008") in the affected Ruby versions.
251
+ # See https://bugs.ruby-lang.org/issues/17807 for details.
252
+ #
253
+ # In those Ruby versions, there's a very special subclass of `Thread` called `Process::Waiter` that causes VM
254
+ # crashes whenever something tries to read its instance or thread variables. This subclass of thread only
255
+ # shows up when the `Process.detach` API gets used.
256
+ # In the specs you'll find crash regression tests that include a way of reproducing it.
257
+ #
258
+ # As workaround for now we just skip it for the affected Rubies
259
259
  return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
260
260
 
261
261
  last_value = thread.thread_variable_get(key) || current_value
@@ -37,7 +37,7 @@ module Datadog
37
37
  end
38
38
 
39
39
  # Build the profile and encode it
40
- template.to_pprof
40
+ template.to_pprof(start: flush.start, finish: flush.finish)
41
41
  end
42
42
  end
43
43
  end
@@ -11,7 +11,7 @@ module Datadog
11
11
  :frames,
12
12
  :total_frame_count,
13
13
  :thread_id,
14
- :trace_id,
14
+ :root_span_id,
15
15
  :span_id,
16
16
  :trace_resource
17
17
 
@@ -20,7 +20,7 @@ module Datadog
20
20
  frames,
21
21
  total_frame_count,
22
22
  thread_id,
23
- trace_id,
23
+ root_span_id,
24
24
  span_id,
25
25
  trace_resource
26
26
  )
@@ -29,16 +29,16 @@ module Datadog
29
29
  @frames = frames
30
30
  @total_frame_count = total_frame_count
31
31
  @thread_id = thread_id
32
- @trace_id = trace_id
32
+ @root_span_id = root_span_id
33
33
  @span_id = span_id
34
34
  @trace_resource = trace_resource
35
35
 
36
36
  @hash = [
37
37
  thread_id,
38
- trace_id,
38
+ root_span_id,
39
39
  span_id,
40
- # trace_resource is deliberately not included -- events that share the same (trace_id, span_id)
41
- # trace_resource might not match between pairs, but they refer to the same trace.
40
+ # trace_resource is deliberately not included -- events that share the same (root_span_id, span_id) refer
41
+ # to the same trace
42
42
  frames.collect(&:hash),
43
43
  total_frame_count
44
44
  ].hash
@@ -56,7 +56,7 @@ module Datadog
56
56
  frames,
57
57
  total_frame_count,
58
58
  thread_id,
59
- trace_id,
59
+ root_span_id,
60
60
  span_id,
61
61
  trace_resource,
62
62
  cpu_time_interval_ns,
@@ -67,7 +67,7 @@ module Datadog
67
67
  frames,
68
68
  total_frame_count,
69
69
  thread_id,
70
- trace_id,
70
+ root_span_id,
71
71
  span_id,
72
72
  trace_resource
73
73
  )
@@ -2,7 +2,8 @@
2
2
  module Datadog
3
3
  module Profiling
4
4
  # This module contains classes and methods which are implemented using native code in the
5
- # ext/ddtrace_profiling_native_extension folder
5
+ # ext/ddtrace_profiling_native_extension folder, as well as some Ruby-level utilities that don't make sense to
6
+ # write using C
6
7
  module NativeExtension
7
8
  private_class_method def self.working?
8
9
  native_working?
@@ -13,6 +14,27 @@ module Datadog
13
14
  false
14
15
  end
15
16
  end
17
+
18
+ unless singleton_class.method_defined?(:clock_id_for)
19
+ def self.clock_id_for(_)
20
+ nil
21
+ end
22
+ end
23
+
24
+ def self.cpu_time_ns_for(thread)
25
+ clock_id =
26
+ begin
27
+ clock_id_for(thread)
28
+ rescue Errno::ESRCH
29
+ nil
30
+ end
31
+
32
+ begin
33
+ ::Process.clock_gettime(clock_id, :nanosecond) if clock_id
34
+ rescue Errno::EINVAL
35
+ nil
36
+ end
37
+ end
16
38
  end
17
39
  end
18
40
  end
@@ -4,6 +4,7 @@
4
4
  require 'ddtrace/profiling/flush'
5
5
  require 'ddtrace/profiling/pprof/message_set'
6
6
  require 'ddtrace/profiling/pprof/string_table'
7
+ require 'ddtrace/utils/time'
7
8
 
8
9
  module Datadog
9
10
  module Profiling
@@ -47,14 +48,19 @@ module Datadog
47
48
  Perftools::Profiles::Profile.encode(profile).force_encoding(DEFAULT_ENCODING)
48
49
  end
49
50
 
50
- def build_profile
51
+ def build_profile(start:, finish:)
52
+ start_ns = Datadog::Utils::Time.as_utc_epoch_ns(start)
53
+ finish_ns = Datadog::Utils::Time.as_utc_epoch_ns(finish)
54
+
51
55
  Perftools::Profiles::Profile.new(
52
56
  sample_type: @sample_types.messages,
53
57
  sample: @samples,
54
58
  mapping: @mappings.messages,
55
59
  location: @locations.values,
56
60
  function: @functions.messages,
57
- string_table: @string_table.strings
61
+ string_table: @string_table.strings,
62
+ time_nanos: start_ns,
63
+ duration_nanos: finish_ns - start_ns,
58
64
  )
59
65
  end
60
66
 
@@ -32,7 +32,7 @@ module Datadog
32
32
 
33
33
  @most_recent_trace_samples = {}
34
34
  @processed_unique_stacks = 0
35
- @processed_with_trace_ids = 0
35
+ @processed_with_trace = 0
36
36
  end
37
37
 
38
38
  def add_events!(stack_samples)
@@ -48,18 +48,18 @@ module Datadog
48
48
  stack_sample.hash
49
49
  end
50
50
 
51
- # Track the most recent sample for each trace
51
+ # Track the most recent sample for each trace (identified by root span id)
52
52
  def update_most_recent_trace_sample(stack_sample)
53
- return unless stack_sample.trace_id && stack_sample.trace_resource
53
+ return unless stack_sample.root_span_id && stack_sample.trace_resource
54
54
 
55
55
  # Update trace resource with most recent value
56
- if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.trace_id])
56
+ if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.root_span_id])
57
57
  if most_recent_trace_sample.timestamp < stack_sample.timestamp
58
- @most_recent_trace_samples[stack_sample.trace_id] = stack_sample
58
+ @most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
59
59
  end
60
60
  else
61
61
  # Add trace resource
62
- @most_recent_trace_samples[stack_sample.trace_id] = stack_sample
62
+ @most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
63
63
  end
64
64
  end
65
65
 
@@ -100,15 +100,15 @@ module Datadog
100
100
  )
101
101
  ]
102
102
 
103
- trace_id = stack_sample.trace_id || 0
103
+ root_span_id = stack_sample.root_span_id || 0
104
104
  span_id = stack_sample.span_id || 0
105
105
 
106
- if trace_id != 0 && span_id != 0
107
- @processed_with_trace_ids += 1
106
+ if root_span_id != 0 && span_id != 0
107
+ @processed_with_trace += 1
108
108
 
109
109
  labels << Perftools::Profiles::Label.new(
110
- key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_TRACE_ID),
111
- str: builder.string_table.fetch(trace_id.to_s)
110
+ key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_LOCAL_ROOT_SPAN_ID),
111
+ str: builder.string_table.fetch(root_span_id.to_s)
112
112
  )
113
113
 
114
114
  labels << Perftools::Profiles::Label.new(
@@ -118,10 +118,7 @@ module Datadog
118
118
 
119
119
  # Use most up-to-date trace resource, if available.
120
120
  # Otherwise, use the trace resource provided.
121
- trace_resource = (
122
- @most_recent_trace_samples[stack_sample.trace_id] \
123
- || stack_sample
124
- ).trace_resource
121
+ trace_resource = @most_recent_trace_samples.fetch(stack_sample.root_span_id, stack_sample).trace_resource
125
122
 
126
123
  if trace_resource && !trace_resource.empty?
127
124
  labels << Perftools::Profiles::Label.new(
@@ -135,7 +132,7 @@ module Datadog
135
132
  end
136
133
 
137
134
  def debug_statistics
138
- "unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@processed_with_trace_ids}"
135
+ "unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@processed_with_trace}"
139
136
  end
140
137
  end
141
138
  end
@@ -80,8 +80,8 @@ module Datadog
80
80
  converters.values.map(&:debug_statistics).join(', ')
81
81
  end
82
82
 
83
- def to_pprof
84
- profile = builder.build_profile
83
+ def to_pprof(start:, finish:)
84
+ profile = builder.build_profile(start: start, finish: finish)
85
85
  data = builder.encode_profile(profile)
86
86
  types = sample_type_mappings.keys
87
87
 
@@ -1,21 +1,20 @@
1
1
  # typed: false
2
2
  require 'ddtrace/utils/only_once'
3
3
  require 'ddtrace/profiling'
4
- require 'ddtrace/profiling/ext/cpu'
5
4
  require 'ddtrace/profiling/ext/forking'
6
5
 
7
6
  module Datadog
8
7
  module Profiling
9
8
  module Tasks
10
- # Takes care of loading our extensions/monkey patches to handle fork() and CPU profiling.
9
+ # Takes care of loading our extensions/monkey patches to handle fork() and validating if CPU-time profiling is usable
11
10
  class Setup
12
11
  ACTIVATE_EXTENSIONS_ONLY_ONCE = Datadog::Utils::OnlyOnce.new
13
12
 
14
13
  def run
15
14
  ACTIVATE_EXTENSIONS_ONLY_ONCE.run do
16
15
  begin
16
+ check_if_cpu_time_profiling_is_supported
17
17
  activate_forking_extensions
18
- activate_cpu_extensions
19
18
  setup_at_fork_hooks
20
19
  rescue StandardError, ScriptError => e
21
20
  Datadog.logger.warn do
@@ -39,19 +38,15 @@ module Datadog
39
38
  end
40
39
  end
41
40
 
42
- def activate_cpu_extensions
43
- if Ext::CPU.supported?
44
- Ext::CPU.apply!
45
- elsif Datadog.configuration.profiling.enabled
41
+ def check_if_cpu_time_profiling_is_supported
42
+ unsupported = cpu_time_profiling_unsupported_reason
43
+
44
+ if unsupported
46
45
  Datadog.logger.info do
47
46
  'CPU time profiling skipped because native CPU time is not supported: ' \
48
- "#{Ext::CPU.unsupported_reason}. Profiles containing Wall time will still be reported."
47
+ "#{unsupported}. Profiles containing 'Wall time' data will still be reported."
49
48
  end
50
49
  end
51
- rescue StandardError, ScriptError => e
52
- Datadog.logger.warn do
53
- "Profiler CPU profiling extensions unavailable. Cause: #{e.message} Location: #{Array(e.backtrace).first}"
54
- end
55
50
  end
56
51
 
57
52
  def setup_at_fork_hooks
@@ -75,6 +70,20 @@ module Datadog
75
70
  end
76
71
  end
77
72
  end
73
+
74
+ def cpu_time_profiling_unsupported_reason
75
+ # NOTE: Only the first matching reason is returned, so try to keep a nice order on reasons
76
+
77
+ if RUBY_ENGINE == 'jruby'
78
+ 'JRuby is not supported'
79
+ elsif RUBY_PLATFORM.include?('darwin')
80
+ 'Feature requires Linux; macOS is not supported'
81
+ elsif RUBY_PLATFORM =~ /(mswin|mingw)/
82
+ 'Feature requires Linux; Windows is not supported'
83
+ elsif !RUBY_PLATFORM.include?('linux')
84
+ "Feature requires Linux; #{RUBY_PLATFORM} is not supported"
85
+ end
86
+ end
78
87
  end
79
88
  end
80
89
  end
@@ -6,10 +6,10 @@ require 'ddtrace/ext/http'
6
6
  module Datadog
7
7
  module Profiling
8
8
  module TraceIdentifiers
9
- # Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (trace id and span id) for a
10
- # given thread, if there is an active trace for that thread in Datadog.tracer.
9
+ # Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (root span id and span id)
10
+ # for a given thread, if there is an active trace for that thread in the supplied tracer object.
11
11
  class Ddtrace
12
- def initialize(tracer: nil)
12
+ def initialize(tracer:)
13
13
  @tracer = (tracer if tracer.respond_to?(:call_context))
14
14
  end
15
15
 
@@ -19,10 +19,13 @@ module Datadog
19
19
  context = @tracer.call_context(thread)
20
20
  return unless context
21
21
 
22
- trace_id = context.trace_id || 0
23
- span_id = context.span_id || 0
22
+ span, root_span = context.current_span_and_root_span
23
+ return unless span && root_span
24
24
 
25
- [trace_id, span_id, maybe_extract_resource(context.current_root_span)] if trace_id != 0 && span_id != 0
25
+ root_span_id = root_span.span_id || 0
26
+ span_id = span.span_id || 0
27
+
28
+ [root_span_id, span_id, maybe_extract_resource(root_span)] if root_span_id != 0 && span_id != 0
26
29
  end
27
30
 
28
31
  private
@@ -31,8 +34,6 @@ module Datadog
31
34
  # Resources MUST NOT include personal identifiable information (PII); this should not be the case with
32
35
  # ddtrace integrations, but worth mentioning just in case :)
33
36
  def maybe_extract_resource(root_span)
34
- return unless root_span
35
-
36
37
  root_span.resource if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
37
38
  end
38
39
  end
@@ -6,7 +6,7 @@ require 'ddtrace/profiling/trace_identifiers/ddtrace'
6
6
  module Datadog
7
7
  module Profiling
8
8
  module TraceIdentifiers
9
- # Helper used to retrieve the trace identifiers (trace id and span id) for a given thread,
9
+ # Helper used to retrieve the trace identifiers (root span id and span id) for a given thread,
10
10
  # if there is an active trace for that thread for the supported tracing APIs.
11
11
  #
12
12
  # This data is used to connect profiles to the traces -- samples in a profile will be tagged with this data and
@@ -28,7 +28,7 @@ module Datadog
28
28
  end
29
29
 
30
30
  # Expected output of the #trace_identifiers_for
31
- # duck type is [trace_id, span_id, (optional trace_resource_container)]
31
+ # duck type is [root_span_id, span_id, (optional trace_resource_container)]
32
32
  def trace_identifiers_for(thread)
33
33
  @supported_apis.each do |api|
34
34
  trace_identifiers = api.trace_identifiers_for(thread)
@@ -128,9 +128,7 @@ module Datadog
128
128
  private_class_method def self.load_profiling
129
129
  return false unless supported?
130
130
 
131
- require 'ddtrace/profiling/ext/cpu'
132
131
  require 'ddtrace/profiling/ext/forking'
133
-
134
132
  require 'ddtrace/profiling/collectors/stack'
135
133
  require 'ddtrace/profiling/exporter'
136
134
  require 'ddtrace/profiling/recorder'
@@ -194,6 +194,12 @@ module Datadog
194
194
  class PrioritySampler
195
195
  extend Forwardable
196
196
 
197
+ # NOTE: We do not advise using a pre-sampler. It can save resources,
198
+ # but pre-sampling at rates < 100% may result in partial traces, unless
199
+ # the pre-sampler knows exactly how to drop a span without dropping its ancestors.
200
+ #
201
+ # Additionally, as service metrics are calculated in the Datadog Agent,
202
+ # the service's throughput will be underestimated.
197
203
  attr_reader :pre_sampler, :priority_sampler
198
204
 
199
205
  SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze
@@ -209,17 +215,21 @@ module Datadog
209
215
 
210
216
  def sample!(span)
211
217
  # If pre-sampling is configured, do it first. (By default, this will sample at 100%.)
212
- # NOTE: Pre-sampling at rates < 100% may result in partial traces; not recommended.
213
218
  span.sampled = pre_sample?(span) ? @pre_sampler.sample!(span) : true
214
219
 
215
220
  if span.sampled
216
- # If priority sampling has already been applied upstream, use that, otherwise...
217
- unless priority_assigned_upstream?(span)
218
- # Roll the dice and determine whether how we set the priority.
219
- priority = priority_sample!(span) ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
221
+ # If priority sampling has already been applied upstream, use that value.
222
+ return true if priority_assigned?(span)
220
223
 
221
- assign_priority!(span, priority)
222
- end
224
+ # Check with post sampler how we set the priority.
225
+ sample = priority_sample!(span)
226
+
227
+ # Check if post sampler has already assigned a priority.
228
+ return true if priority_assigned?(span)
229
+
230
+ # If not, use agent priority values.
231
+ priority = sample ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
232
+ assign_priority!(span, priority)
223
233
  else
224
234
  # If discarded by pre-sampling, set "reject" priority, so other
225
235
  # services for the same trace don't sample needlessly.
@@ -244,7 +254,7 @@ module Datadog
244
254
  end
245
255
  end
246
256
 
247
- def priority_assigned_upstream?(span)
257
+ def priority_assigned?(span)
248
258
  span.context && !span.context.sampling_priority.nil?
249
259
  end
250
260
 
@@ -97,11 +97,13 @@ module Datadog
97
97
  sampled = rule.sample?(span)
98
98
  sample_rate = rule.sample_rate(span)
99
99
 
100
+ set_priority(span, sampled)
100
101
  set_rule_metrics(span, sample_rate)
101
102
 
102
103
  return false unless sampled
103
104
 
104
- rate_limiter.allow?(1).tap do
105
+ rate_limiter.allow?(1).tap do |allowed|
106
+ set_priority(span, allowed)
105
107
  set_limiter_metrics(span, rate_limiter.effective_rate)
106
108
  end
107
109
  rescue StandardError => e
@@ -109,6 +111,16 @@ module Datadog
109
111
  yield(span)
110
112
  end
111
113
 
114
+ # Span priority should only be set when the {RuleSampler}
115
+ # was responsible for the sampling decision.
116
+ def set_priority(span, sampled)
117
+ if sampled
118
+ ForcedTracing.keep(span)
119
+ else
120
+ ForcedTracing.drop(span)
121
+ end
122
+ end
123
+
112
124
  def set_rule_metrics(span, sample_rate)
113
125
  span.set_metric(Ext::Sampling::RULE_SAMPLE_RATE, sample_rate)
114
126
  end
@@ -40,6 +40,12 @@ module Datadog
40
40
  after = get_time
41
41
  after - before
42
42
  end
43
+
44
+ def as_utc_epoch_ns(time)
45
+ # we use #to_r instead of #to_f because Float doesn't have enough precision to represent exact nanoseconds, see
46
+ # https://rubyapi.org/3.0/o/time#method-i-to_f
47
+ (time.to_r * 1_000_000_000).to_i
48
+ end
43
49
  end
44
50
  end
45
51
  end
@@ -2,7 +2,7 @@
2
2
  module Datadog
3
3
  module VERSION
4
4
  MAJOR = 0
5
- MINOR = 53
5
+ MINOR = 54
6
6
  PATCH = 0
7
7
  PRE = nil
8
8