ddtrace 0.53.0 → 0.54.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +89 -11
  3. data/ddtrace.gemspec +5 -2
  4. data/docs/GettingStarted.md +40 -3
  5. data/docs/ProfilingDevelopment.md +2 -2
  6. data/ext/ddtrace_profiling_native_extension/NativeExtensionDesign.md +86 -0
  7. data/ext/ddtrace_profiling_native_extension/clock_id.h +4 -0
  8. data/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +52 -0
  9. data/ext/ddtrace_profiling_native_extension/clock_id_noop.c +14 -0
  10. data/ext/ddtrace_profiling_native_extension/extconf.rb +144 -6
  11. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +35 -0
  12. data/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +3 -0
  13. data/ext/ddtrace_profiling_native_extension/profiling.c +6 -1
  14. data/lib/datadog/ci/contrib/cucumber/formatter.rb +1 -0
  15. data/lib/datadog/ci/contrib/rspec/example.rb +1 -0
  16. data/lib/datadog/ci/ext/environment.rb +26 -21
  17. data/lib/datadog/ci/ext/test.rb +1 -0
  18. data/lib/datadog/ci/test.rb +5 -1
  19. data/lib/ddtrace/buffer.rb +28 -16
  20. data/lib/ddtrace/configuration/agent_settings_resolver.rb +27 -16
  21. data/lib/ddtrace/context.rb +10 -2
  22. data/lib/ddtrace/contrib/delayed_job/plugin.rb +2 -2
  23. data/lib/ddtrace/contrib/mongodb/instrumentation.rb +1 -1
  24. data/lib/ddtrace/contrib/mongodb/integration.rb +5 -0
  25. data/lib/ddtrace/contrib/rails/configuration/settings.rb +7 -0
  26. data/lib/ddtrace/contrib/rails/framework.rb +3 -2
  27. data/lib/ddtrace/contrib/redis/instrumentation.rb +90 -0
  28. data/lib/ddtrace/contrib/redis/patcher.rb +2 -84
  29. data/lib/ddtrace/contrib/resque/integration.rb +1 -5
  30. data/lib/ddtrace/ext/priority.rb +6 -4
  31. data/lib/ddtrace/ext/profiling.rb +1 -1
  32. data/lib/ddtrace/metrics.rb +2 -2
  33. data/lib/ddtrace/profiling/collectors/stack.rb +45 -45
  34. data/lib/ddtrace/profiling/encoding/profile.rb +1 -1
  35. data/lib/ddtrace/profiling/events/stack.rb +8 -8
  36. data/lib/ddtrace/profiling/native_extension.rb +23 -1
  37. data/lib/ddtrace/profiling/pprof/builder.rb +8 -2
  38. data/lib/ddtrace/profiling/pprof/stack_sample.rb +13 -16
  39. data/lib/ddtrace/profiling/pprof/template.rb +2 -2
  40. data/lib/ddtrace/profiling/tasks/setup.rb +21 -12
  41. data/lib/ddtrace/profiling/trace_identifiers/ddtrace.rb +9 -8
  42. data/lib/ddtrace/profiling/trace_identifiers/helper.rb +2 -2
  43. data/lib/ddtrace/profiling.rb +0 -2
  44. data/lib/ddtrace/sampler.rb +18 -8
  45. data/lib/ddtrace/sampling/rule_sampler.rb +13 -1
  46. data/lib/ddtrace/utils/time.rb +6 -0
  47. data/lib/ddtrace/version.rb +2 -2
  48. metadata +14 -9
  49. data/lib/ddtrace/profiling/ext/cpu.rb +0 -67
  50. data/lib/ddtrace/profiling/ext/cthread.rb +0 -156
@@ -4,13 +4,15 @@ module Datadog
4
4
  # Priority is a hint given to the backend so that it knows which traces to reject or kept.
5
5
  # In a distributed context, it should be set before any context propagation (fork, RPC calls) to be effective.
6
6
  module Priority
7
- # Use this to explicitely inform the backend that a trace should be rejected and not stored.
7
+ # Use this to explicitly inform the backend that a trace MUST be rejected and not stored.
8
+ # This includes rules and rate limits configured by the user through the {RuleSampler}.
8
9
  USER_REJECT = -1
9
- # Used by the builtin sampler to inform the backend that a trace should be rejected and not stored.
10
+ # Used by the {PrioritySampler} to inform the backend that a trace should be rejected and not stored.
10
11
  AUTO_REJECT = 0
11
- # Used by the builtin sampler to inform the backend that a trace should be kept and stored.
12
+ # Used by the {PrioritySampler} to inform the backend that a trace should be kept and stored.
12
13
  AUTO_KEEP = 1
13
- # Use this to explicitely inform the backend that a trace should be kept and stored.
14
+ # Use this to explicitly inform the backend that a trace MUST be kept and stored.
15
+ # This includes rules and rate limits configured by the user through the {RuleSampler}.
14
16
  USER_KEEP = 2
15
17
  end
16
18
  end
@@ -9,9 +9,9 @@ module Datadog
9
9
  ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED'.freeze
10
10
 
11
11
  module Pprof
12
+ LABEL_KEY_LOCAL_ROOT_SPAN_ID = 'local root span id'.freeze
12
13
  LABEL_KEY_SPAN_ID = 'span id'.freeze
13
14
  LABEL_KEY_THREAD_ID = 'thread id'.freeze
14
- LABEL_KEY_TRACE_ID = 'trace id'.freeze
15
15
  LABEL_KEY_TRACE_ENDPOINT = 'trace endpoint'.freeze
16
16
  SAMPLE_VALUE_NO_VALUE = 0
17
17
  VALUE_TYPE_CPU = 'cpu-time'.freeze
@@ -31,7 +31,7 @@ module Datadog
31
31
  !version.nil? && version >= Gem::Version.new('3.3.0') &&
32
32
  # dogstatsd-ruby >= 5.0 & < 5.2.0 has known issues with process forks
33
33
  # and do not support the single thread mode we use to avoid this problem.
34
- !(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.2'))
34
+ !(version >= Gem::Version.new('5.0') && version < Gem::Version.new('5.3'))
35
35
  end
36
36
 
37
37
  def enabled?
@@ -274,7 +274,7 @@ module Datadog
274
274
  IGNORED_STATSD_ONLY_ONCE.run do
275
275
  Datadog.logger.warn(
276
276
  'Ignoring user-supplied statsd instance as currently-installed version of dogstastd-ruby is incompatible. ' \
277
- "To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.2'` on your Gemfile or gems.rb file."
277
+ "To fix this, ensure that you have `gem 'dogstatsd-ruby', '~> 5.3'` on your Gemfile or gems.rb file."
278
278
  )
279
279
  end
280
280
  end
@@ -1,4 +1,6 @@
1
1
  # typed: true
2
+
3
+ require 'ddtrace/profiling/native_extension'
2
4
  require 'ddtrace/profiling/backtrace_location'
3
5
  require 'ddtrace/profiling/events/stack'
4
6
  require 'ddtrace/utils/only_once'
@@ -19,6 +21,7 @@ module Datadog
19
21
  MIN_INTERVAL = 0.01
20
22
  THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
21
23
  THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
24
+ SYNTHETIC_STACK_IN_NATIVE_CODE = [BacktraceLocation.new('', 0, 'In native code').freeze].freeze
22
25
 
23
26
  # This default was picked based on the current sampling performance and on expected concurrency on an average
24
27
  # Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
@@ -31,7 +34,8 @@ module Datadog
31
34
  :trace_identifiers_helper,
32
35
  :ignore_thread,
33
36
  :max_time_usage_pct,
34
- :thread_api
37
+ :thread_api,
38
+ :cpu_time_provider
35
39
 
36
40
  def initialize(
37
41
  recorder,
@@ -41,6 +45,7 @@ module Datadog
41
45
  max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
42
46
  max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
43
47
  thread_api: Thread,
48
+ cpu_time_provider: Datadog::Profiling::NativeExtension,
44
49
  fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
45
50
  interval: MIN_INTERVAL,
46
51
  enabled: true
@@ -52,6 +57,8 @@ module Datadog
52
57
  @max_time_usage_pct = max_time_usage_pct
53
58
  @max_threads_sampled = max_threads_sampled
54
59
  @thread_api = thread_api
60
+ # Only set the provider if it's able to work in the current Ruby/OS combo
61
+ @cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?
55
62
 
56
63
  # Workers::Async::Thread settings
57
64
  self.fork_policy = fork_policy
@@ -62,8 +69,6 @@ module Datadog
62
69
  # Workers::Polling settings
63
70
  self.enabled = enabled
64
71
 
65
- @warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
66
-
67
72
  # Cache this proc, since it's pretty expensive to keep recreating it
68
73
  @build_backtrace_location = method(:build_backtrace_location).to_proc
69
74
  # Cache this buffer, since it's pretty expensive to keep accessing it
@@ -119,6 +124,26 @@ module Datadog
119
124
  locations = thread.backtrace_locations
120
125
  return if locations.nil?
121
126
 
127
+ # Having empty locations means that the thread is alive, but we don't know what it's doing:
128
+ #
129
+ # 1. It can be starting up
130
+ # ```
131
+ # > Thread.new { sleep }.backtrace
132
+ # => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
133
+ # ```
134
+ # 2. It can be running native code
135
+ # ```
136
+ # > t = Process.detach(fork { sleep })
137
+ # => #<Process::Waiter:0x00007ffe7285f7a0 run>
138
+ # > t.backtrace
139
+ # => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
140
+ # ```
141
+ # This effect has been observed in threads created by the Iodine web server and the ffi gem
142
+ #
143
+ # To give customers visibility into these threads, we replace the empty stack with one containing a
144
+ # synthetic placeholder frame, so that these threads are properly represented in the UX.
145
+ locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?
146
+
122
147
  # Get actual stack size then trim the stack
123
148
  stack_size = locations.length
124
149
  locations = locations[0..(max_frames - 1)]
@@ -126,8 +151,8 @@ module Datadog
126
151
  # Convert backtrace locations into structs
127
152
  locations = convert_backtrace_locations(locations)
128
153
 
129
- thread_id = thread.respond_to?(:pthread_thread_id) ? thread.pthread_thread_id : thread.object_id
130
- trace_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
154
+ thread_id = thread.object_id
155
+ root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
131
156
  cpu_time = get_cpu_time_interval!(thread)
132
157
  wall_time_interval_ns =
133
158
  get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
@@ -137,7 +162,7 @@ module Datadog
137
162
  locations,
138
163
  stack_size,
139
164
  thread_id,
140
- trace_id,
165
+ root_span_id,
141
166
  span_id,
142
167
  trace_resource,
143
168
  cpu_time,
@@ -146,17 +171,10 @@ module Datadog
146
171
  end
147
172
 
148
173
  def get_cpu_time_interval!(thread)
149
- # Return if we can't get the current CPU time
150
- unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
151
- warn_about_missing_cpu_time_instrumentation(thread)
152
- return
153
- end
174
+ return unless cpu_time_provider
154
175
 
155
- current_cpu_time_ns = thread.cpu_time(:nanosecond)
176
+ current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)
156
177
 
157
- # NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
158
- # initialization that needs to be done by the thread itself, and it's possible for us to try to sample
159
- # *before* the thread had time to finish the initialization
160
178
  return unless current_cpu_time_ns
161
179
 
162
180
  get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
@@ -205,33 +223,6 @@ module Datadog
205
223
 
206
224
  private
207
225
 
208
- def warn_about_missing_cpu_time_instrumentation(thread)
209
- @warn_about_missing_cpu_time_instrumentation_only_once.run do
210
- # Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
211
- # missing on this thread we just found.
212
- #
213
- # As far as we know, it can be missing due to one the following:
214
- #
215
- # a) The thread was started before we installed our instrumentation.
216
- # In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
217
- #
218
- # b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
219
- # Known cases right now that trigger this are the ethon/typhoeus gems.
220
- # We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
221
- #
222
- # c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
223
- # When threads are started using these APIs, there's a small time window during which the thread has started
224
- # but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
225
- # it to run and our instrumentation to be applied.
226
- #
227
- if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
228
- Datadog.logger.debug(
229
- "Thread ('#{thread}') is missing profiling instrumentation; other threads should be unaffected"
230
- )
231
- end
232
- end
233
- end
234
-
235
226
  # If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
236
227
  # clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
237
228
  #
@@ -253,9 +244,18 @@ module Datadog
253
244
  end
254
245
 
255
246
  def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
256
- # See cthread.rb for more details, but this is a workaround for https://bugs.ruby-lang.org/issues/17807 ;
257
- # using all thread_variable related methods on these instances also triggers a crash and for now we just
258
- # skip it for the affected Rubies
247
+ # Process::Waiter crash workaround:
248
+ #
249
+ # This is a workaround for a Ruby VM segfault (usually something like
250
+ # "[BUG] Segmentation fault at 0x0000000000000008") in the affected Ruby versions.
251
+ # See https://bugs.ruby-lang.org/issues/17807 for details.
252
+ #
253
+ # In those Ruby versions, there's a very special subclass of `Thread` called `Process::Waiter` that causes VM
254
+ # crashes whenever something tries to read its instance or thread variables. This subclass of thread only
255
+ # shows up when the `Process.detach` API gets used.
256
+ # In the specs you'll find crash regression tests that include a way of reproducing it.
257
+ #
258
+ # As workaround for now we just skip it for the affected Rubies
259
259
  return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
260
260
 
261
261
  last_value = thread.thread_variable_get(key) || current_value
@@ -37,7 +37,7 @@ module Datadog
37
37
  end
38
38
 
39
39
  # Build the profile and encode it
40
- template.to_pprof
40
+ template.to_pprof(start: flush.start, finish: flush.finish)
41
41
  end
42
42
  end
43
43
  end
@@ -11,7 +11,7 @@ module Datadog
11
11
  :frames,
12
12
  :total_frame_count,
13
13
  :thread_id,
14
- :trace_id,
14
+ :root_span_id,
15
15
  :span_id,
16
16
  :trace_resource
17
17
 
@@ -20,7 +20,7 @@ module Datadog
20
20
  frames,
21
21
  total_frame_count,
22
22
  thread_id,
23
- trace_id,
23
+ root_span_id,
24
24
  span_id,
25
25
  trace_resource
26
26
  )
@@ -29,16 +29,16 @@ module Datadog
29
29
  @frames = frames
30
30
  @total_frame_count = total_frame_count
31
31
  @thread_id = thread_id
32
- @trace_id = trace_id
32
+ @root_span_id = root_span_id
33
33
  @span_id = span_id
34
34
  @trace_resource = trace_resource
35
35
 
36
36
  @hash = [
37
37
  thread_id,
38
- trace_id,
38
+ root_span_id,
39
39
  span_id,
40
- # trace_resource is deliberately not included -- events that share the same (trace_id, span_id)
41
- # trace_resource might not match between pairs, but they refer to the same trace.
40
+ # trace_resource is deliberately not included -- events that share the same (root_span_id, span_id) refer
41
+ # to the same trace
42
42
  frames.collect(&:hash),
43
43
  total_frame_count
44
44
  ].hash
@@ -56,7 +56,7 @@ module Datadog
56
56
  frames,
57
57
  total_frame_count,
58
58
  thread_id,
59
- trace_id,
59
+ root_span_id,
60
60
  span_id,
61
61
  trace_resource,
62
62
  cpu_time_interval_ns,
@@ -67,7 +67,7 @@ module Datadog
67
67
  frames,
68
68
  total_frame_count,
69
69
  thread_id,
70
- trace_id,
70
+ root_span_id,
71
71
  span_id,
72
72
  trace_resource
73
73
  )
@@ -2,7 +2,8 @@
2
2
  module Datadog
3
3
  module Profiling
4
4
  # This module contains classes and methods which are implemented using native code in the
5
- # ext/ddtrace_profiling_native_extension folder
5
+ # ext/ddtrace_profiling_native_extension folder, as well as some Ruby-level utilities that don't make sense to
6
+ # write using C
6
7
  module NativeExtension
7
8
  private_class_method def self.working?
8
9
  native_working?
@@ -13,6 +14,27 @@ module Datadog
13
14
  false
14
15
  end
15
16
  end
17
+
18
+ unless singleton_class.method_defined?(:clock_id_for)
19
+ def self.clock_id_for(_)
20
+ nil
21
+ end
22
+ end
23
+
24
+ def self.cpu_time_ns_for(thread)
25
+ clock_id =
26
+ begin
27
+ clock_id_for(thread)
28
+ rescue Errno::ESRCH
29
+ nil
30
+ end
31
+
32
+ begin
33
+ ::Process.clock_gettime(clock_id, :nanosecond) if clock_id
34
+ rescue Errno::EINVAL
35
+ nil
36
+ end
37
+ end
16
38
  end
17
39
  end
18
40
  end
@@ -4,6 +4,7 @@
4
4
  require 'ddtrace/profiling/flush'
5
5
  require 'ddtrace/profiling/pprof/message_set'
6
6
  require 'ddtrace/profiling/pprof/string_table'
7
+ require 'ddtrace/utils/time'
7
8
 
8
9
  module Datadog
9
10
  module Profiling
@@ -47,14 +48,19 @@ module Datadog
47
48
  Perftools::Profiles::Profile.encode(profile).force_encoding(DEFAULT_ENCODING)
48
49
  end
49
50
 
50
- def build_profile
51
+ def build_profile(start:, finish:)
52
+ start_ns = Datadog::Utils::Time.as_utc_epoch_ns(start)
53
+ finish_ns = Datadog::Utils::Time.as_utc_epoch_ns(finish)
54
+
51
55
  Perftools::Profiles::Profile.new(
52
56
  sample_type: @sample_types.messages,
53
57
  sample: @samples,
54
58
  mapping: @mappings.messages,
55
59
  location: @locations.values,
56
60
  function: @functions.messages,
57
- string_table: @string_table.strings
61
+ string_table: @string_table.strings,
62
+ time_nanos: start_ns,
63
+ duration_nanos: finish_ns - start_ns,
58
64
  )
59
65
  end
60
66
 
@@ -32,7 +32,7 @@ module Datadog
32
32
 
33
33
  @most_recent_trace_samples = {}
34
34
  @processed_unique_stacks = 0
35
- @processed_with_trace_ids = 0
35
+ @processed_with_trace = 0
36
36
  end
37
37
 
38
38
  def add_events!(stack_samples)
@@ -48,18 +48,18 @@ module Datadog
48
48
  stack_sample.hash
49
49
  end
50
50
 
51
- # Track the most recent sample for each trace
51
+ # Track the most recent sample for each trace (identified by root span id)
52
52
  def update_most_recent_trace_sample(stack_sample)
53
- return unless stack_sample.trace_id && stack_sample.trace_resource
53
+ return unless stack_sample.root_span_id && stack_sample.trace_resource
54
54
 
55
55
  # Update trace resource with most recent value
56
- if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.trace_id])
56
+ if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.root_span_id])
57
57
  if most_recent_trace_sample.timestamp < stack_sample.timestamp
58
- @most_recent_trace_samples[stack_sample.trace_id] = stack_sample
58
+ @most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
59
59
  end
60
60
  else
61
61
  # Add trace resource
62
- @most_recent_trace_samples[stack_sample.trace_id] = stack_sample
62
+ @most_recent_trace_samples[stack_sample.root_span_id] = stack_sample
63
63
  end
64
64
  end
65
65
 
@@ -100,15 +100,15 @@ module Datadog
100
100
  )
101
101
  ]
102
102
 
103
- trace_id = stack_sample.trace_id || 0
103
+ root_span_id = stack_sample.root_span_id || 0
104
104
  span_id = stack_sample.span_id || 0
105
105
 
106
- if trace_id != 0 && span_id != 0
107
- @processed_with_trace_ids += 1
106
+ if root_span_id != 0 && span_id != 0
107
+ @processed_with_trace += 1
108
108
 
109
109
  labels << Perftools::Profiles::Label.new(
110
- key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_TRACE_ID),
111
- str: builder.string_table.fetch(trace_id.to_s)
110
+ key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_LOCAL_ROOT_SPAN_ID),
111
+ str: builder.string_table.fetch(root_span_id.to_s)
112
112
  )
113
113
 
114
114
  labels << Perftools::Profiles::Label.new(
@@ -118,10 +118,7 @@ module Datadog
118
118
 
119
119
  # Use most up-to-date trace resource, if available.
120
120
  # Otherwise, use the trace resource provided.
121
- trace_resource = (
122
- @most_recent_trace_samples[stack_sample.trace_id] \
123
- || stack_sample
124
- ).trace_resource
121
+ trace_resource = @most_recent_trace_samples.fetch(stack_sample.root_span_id, stack_sample).trace_resource
125
122
 
126
123
  if trace_resource && !trace_resource.empty?
127
124
  labels << Perftools::Profiles::Label.new(
@@ -135,7 +132,7 @@ module Datadog
135
132
  end
136
133
 
137
134
  def debug_statistics
138
- "unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@processed_with_trace_ids}"
135
+ "unique stacks: #{@processed_unique_stacks}, of which had active traces: #{@processed_with_trace}"
139
136
  end
140
137
  end
141
138
  end
@@ -80,8 +80,8 @@ module Datadog
80
80
  converters.values.map(&:debug_statistics).join(', ')
81
81
  end
82
82
 
83
- def to_pprof
84
- profile = builder.build_profile
83
+ def to_pprof(start:, finish:)
84
+ profile = builder.build_profile(start: start, finish: finish)
85
85
  data = builder.encode_profile(profile)
86
86
  types = sample_type_mappings.keys
87
87
 
@@ -1,21 +1,20 @@
1
1
  # typed: false
2
2
  require 'ddtrace/utils/only_once'
3
3
  require 'ddtrace/profiling'
4
- require 'ddtrace/profiling/ext/cpu'
5
4
  require 'ddtrace/profiling/ext/forking'
6
5
 
7
6
  module Datadog
8
7
  module Profiling
9
8
  module Tasks
10
- # Takes care of loading our extensions/monkey patches to handle fork() and CPU profiling.
9
+ # Takes care of loading our extensions/monkey patches to handle fork() and validating if CPU-time profiling is usable
11
10
  class Setup
12
11
  ACTIVATE_EXTENSIONS_ONLY_ONCE = Datadog::Utils::OnlyOnce.new
13
12
 
14
13
  def run
15
14
  ACTIVATE_EXTENSIONS_ONLY_ONCE.run do
16
15
  begin
16
+ check_if_cpu_time_profiling_is_supported
17
17
  activate_forking_extensions
18
- activate_cpu_extensions
19
18
  setup_at_fork_hooks
20
19
  rescue StandardError, ScriptError => e
21
20
  Datadog.logger.warn do
@@ -39,19 +38,15 @@ module Datadog
39
38
  end
40
39
  end
41
40
 
42
- def activate_cpu_extensions
43
- if Ext::CPU.supported?
44
- Ext::CPU.apply!
45
- elsif Datadog.configuration.profiling.enabled
41
+ def check_if_cpu_time_profiling_is_supported
42
+ unsupported = cpu_time_profiling_unsupported_reason
43
+
44
+ if unsupported
46
45
  Datadog.logger.info do
47
46
  'CPU time profiling skipped because native CPU time is not supported: ' \
48
- "#{Ext::CPU.unsupported_reason}. Profiles containing Wall time will still be reported."
47
+ "#{unsupported}. Profiles containing 'Wall time' data will still be reported."
49
48
  end
50
49
  end
51
- rescue StandardError, ScriptError => e
52
- Datadog.logger.warn do
53
- "Profiler CPU profiling extensions unavailable. Cause: #{e.message} Location: #{Array(e.backtrace).first}"
54
- end
55
50
  end
56
51
 
57
52
  def setup_at_fork_hooks
@@ -75,6 +70,20 @@ module Datadog
75
70
  end
76
71
  end
77
72
  end
73
+
74
+ def cpu_time_profiling_unsupported_reason
75
+ # NOTE: Only the first matching reason is returned, so try to keep a nice order on reasons
76
+
77
+ if RUBY_ENGINE == 'jruby'
78
+ 'JRuby is not supported'
79
+ elsif RUBY_PLATFORM.include?('darwin')
80
+ 'Feature requires Linux; macOS is not supported'
81
+ elsif RUBY_PLATFORM =~ /(mswin|mingw)/
82
+ 'Feature requires Linux; Windows is not supported'
83
+ elsif !RUBY_PLATFORM.include?('linux')
84
+ "Feature requires Linux; #{RUBY_PLATFORM} is not supported"
85
+ end
86
+ end
78
87
  end
79
88
  end
80
89
  end
@@ -6,10 +6,10 @@ require 'ddtrace/ext/http'
6
6
  module Datadog
7
7
  module Profiling
8
8
  module TraceIdentifiers
9
- # Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (trace id and span id) for a
10
- # given thread, if there is an active trace for that thread in Datadog.tracer.
9
+ # Used by Datadog::Profiling::TraceIdentifiers::Helper to get the trace identifiers (root span id and span id)
10
+ # for a given thread, if there is an active trace for that thread in the supplied tracer object.
11
11
  class Ddtrace
12
- def initialize(tracer: nil)
12
+ def initialize(tracer:)
13
13
  @tracer = (tracer if tracer.respond_to?(:call_context))
14
14
  end
15
15
 
@@ -19,10 +19,13 @@ module Datadog
19
19
  context = @tracer.call_context(thread)
20
20
  return unless context
21
21
 
22
- trace_id = context.trace_id || 0
23
- span_id = context.span_id || 0
22
+ span, root_span = context.current_span_and_root_span
23
+ return unless span && root_span
24
24
 
25
- [trace_id, span_id, maybe_extract_resource(context.current_root_span)] if trace_id != 0 && span_id != 0
25
+ root_span_id = root_span.span_id || 0
26
+ span_id = span.span_id || 0
27
+
28
+ [root_span_id, span_id, maybe_extract_resource(root_span)] if root_span_id != 0 && span_id != 0
26
29
  end
27
30
 
28
31
  private
@@ -31,8 +34,6 @@ module Datadog
31
34
  # Resources MUST NOT include personal identifiable information (PII); this should not be the case with
32
35
  # ddtrace integrations, but worth mentioning just in case :)
33
36
  def maybe_extract_resource(root_span)
34
- return unless root_span
35
-
36
37
  root_span.resource if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
37
38
  end
38
39
  end
@@ -6,7 +6,7 @@ require 'ddtrace/profiling/trace_identifiers/ddtrace'
6
6
  module Datadog
7
7
  module Profiling
8
8
  module TraceIdentifiers
9
- # Helper used to retrieve the trace identifiers (trace id and span id) for a given thread,
9
+ # Helper used to retrieve the trace identifiers (root span id and span id) for a given thread,
10
10
  # if there is an active trace for that thread for the supported tracing APIs.
11
11
  #
12
12
  # This data is used to connect profiles to the traces -- samples in a profile will be tagged with this data and
@@ -28,7 +28,7 @@ module Datadog
28
28
  end
29
29
 
30
30
  # Expected output of the #trace_identifiers_for
31
- # duck type is [trace_id, span_id, (optional trace_resource_container)]
31
+ # duck type is [root_span_id, span_id, (optional trace_resource_container)]
32
32
  def trace_identifiers_for(thread)
33
33
  @supported_apis.each do |api|
34
34
  trace_identifiers = api.trace_identifiers_for(thread)
@@ -128,9 +128,7 @@ module Datadog
128
128
  private_class_method def self.load_profiling
129
129
  return false unless supported?
130
130
 
131
- require 'ddtrace/profiling/ext/cpu'
132
131
  require 'ddtrace/profiling/ext/forking'
133
-
134
132
  require 'ddtrace/profiling/collectors/stack'
135
133
  require 'ddtrace/profiling/exporter'
136
134
  require 'ddtrace/profiling/recorder'
@@ -194,6 +194,12 @@ module Datadog
194
194
  class PrioritySampler
195
195
  extend Forwardable
196
196
 
197
+ # NOTE: We do not advise using a pre-sampler. It can save resources,
198
+ # but pre-sampling at rates < 100% may result in partial traces, unless
199
+ # the pre-sampler knows exactly how to drop a span without dropping its ancestors.
200
+ #
201
+ # Additionally, as service metrics are calculated in the Datadog Agent,
202
+ # the service's throughput will be underestimated.
197
203
  attr_reader :pre_sampler, :priority_sampler
198
204
 
199
205
  SAMPLE_RATE_METRIC_KEY = '_sample_rate'.freeze
@@ -209,17 +215,21 @@ module Datadog
209
215
 
210
216
  def sample!(span)
211
217
  # If pre-sampling is configured, do it first. (By default, this will sample at 100%.)
212
- # NOTE: Pre-sampling at rates < 100% may result in partial traces; not recommended.
213
218
  span.sampled = pre_sample?(span) ? @pre_sampler.sample!(span) : true
214
219
 
215
220
  if span.sampled
216
- # If priority sampling has already been applied upstream, use that, otherwise...
217
- unless priority_assigned_upstream?(span)
218
- # Roll the dice and determine whether how we set the priority.
219
- priority = priority_sample!(span) ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
221
+ # If priority sampling has already been applied upstream, use that value.
222
+ return true if priority_assigned?(span)
220
223
 
221
- assign_priority!(span, priority)
222
- end
224
+ # Check with post sampler how we set the priority.
225
+ sample = priority_sample!(span)
226
+
227
+ # Check if post sampler has already assigned a priority.
228
+ return true if priority_assigned?(span)
229
+
230
+ # If not, use agent priority values.
231
+ priority = sample ? Datadog::Ext::Priority::AUTO_KEEP : Datadog::Ext::Priority::AUTO_REJECT
232
+ assign_priority!(span, priority)
223
233
  else
224
234
  # If discarded by pre-sampling, set "reject" priority, so other
225
235
  # services for the same trace don't sample needlessly.
@@ -244,7 +254,7 @@ module Datadog
244
254
  end
245
255
  end
246
256
 
247
- def priority_assigned_upstream?(span)
257
+ def priority_assigned?(span)
248
258
  span.context && !span.context.sampling_priority.nil?
249
259
  end
250
260
 
@@ -97,11 +97,13 @@ module Datadog
97
97
  sampled = rule.sample?(span)
98
98
  sample_rate = rule.sample_rate(span)
99
99
 
100
+ set_priority(span, sampled)
100
101
  set_rule_metrics(span, sample_rate)
101
102
 
102
103
  return false unless sampled
103
104
 
104
- rate_limiter.allow?(1).tap do
105
+ rate_limiter.allow?(1).tap do |allowed|
106
+ set_priority(span, allowed)
105
107
  set_limiter_metrics(span, rate_limiter.effective_rate)
106
108
  end
107
109
  rescue StandardError => e
@@ -109,6 +111,16 @@ module Datadog
109
111
  yield(span)
110
112
  end
111
113
 
114
+ # Span priority should only be set when the {RuleSampler}
115
+ # was responsible for the sampling decision.
116
+ def set_priority(span, sampled)
117
+ if sampled
118
+ ForcedTracing.keep(span)
119
+ else
120
+ ForcedTracing.drop(span)
121
+ end
122
+ end
123
+
112
124
  def set_rule_metrics(span, sample_rate)
113
125
  span.set_metric(Ext::Sampling::RULE_SAMPLE_RATE, sample_rate)
114
126
  end