ddtrace 0.52.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +65 -1
  3. data/ddtrace.gemspec +1 -1
  4. data/docs/DevelopmentGuide.md +1 -6
  5. data/docs/GettingStarted.md +66 -16
  6. data/lib/datadog/ci/contrib/rspec/integration.rb +2 -2
  7. data/lib/datadog/ci/ext/environment.rb +41 -4
  8. data/lib/datadog/contrib.rb +2 -0
  9. data/lib/datadog/core/environment/vm_cache.rb +46 -0
  10. data/lib/ddtrace/configuration/agent_settings_resolver.rb +107 -40
  11. data/lib/ddtrace/configuration/components.rb +1 -1
  12. data/lib/ddtrace/configuration/settings.rb +13 -3
  13. data/lib/ddtrace/contrib/action_cable/instrumentation.rb +46 -0
  14. data/lib/ddtrace/contrib/action_cable/patcher.rb +1 -0
  15. data/lib/ddtrace/contrib/action_mailer/configuration/settings.rb +32 -0
  16. data/lib/ddtrace/contrib/action_mailer/event.rb +50 -0
  17. data/lib/ddtrace/contrib/action_mailer/events/deliver.rb +54 -0
  18. data/lib/ddtrace/contrib/action_mailer/events/process.rb +41 -0
  19. data/lib/ddtrace/contrib/action_mailer/events.rb +31 -0
  20. data/lib/ddtrace/contrib/action_mailer/ext.rb +32 -0
  21. data/lib/ddtrace/contrib/action_mailer/integration.rb +45 -0
  22. data/lib/ddtrace/contrib/action_mailer/patcher.rb +27 -0
  23. data/lib/ddtrace/contrib/active_job/configuration/settings.rb +33 -0
  24. data/lib/ddtrace/contrib/active_job/event.rb +54 -0
  25. data/lib/ddtrace/contrib/active_job/events/discard.rb +46 -0
  26. data/lib/ddtrace/contrib/active_job/events/enqueue.rb +45 -0
  27. data/lib/ddtrace/contrib/active_job/events/enqueue_at.rb +45 -0
  28. data/lib/ddtrace/contrib/active_job/events/enqueue_retry.rb +47 -0
  29. data/lib/ddtrace/contrib/active_job/events/perform.rb +45 -0
  30. data/lib/ddtrace/contrib/active_job/events/retry_stopped.rb +46 -0
  31. data/lib/ddtrace/contrib/active_job/events.rb +39 -0
  32. data/lib/ddtrace/contrib/active_job/ext.rb +32 -0
  33. data/lib/ddtrace/contrib/active_job/integration.rb +46 -0
  34. data/lib/ddtrace/contrib/active_job/log_injection.rb +21 -0
  35. data/lib/ddtrace/contrib/active_job/patcher.rb +33 -0
  36. data/lib/ddtrace/contrib/auto_instrument.rb +0 -1
  37. data/lib/ddtrace/contrib/rails/auto_instrument_railtie.rb +0 -1
  38. data/lib/ddtrace/contrib/rails/framework.rb +22 -0
  39. data/lib/ddtrace/contrib/rails/patcher.rb +19 -10
  40. data/lib/ddtrace/contrib/registerable.rb +0 -1
  41. data/lib/ddtrace/contrib/sidekiq/ext.rb +3 -0
  42. data/lib/ddtrace/contrib/sidekiq/integration.rb +10 -0
  43. data/lib/ddtrace/contrib/sidekiq/patcher.rb +26 -0
  44. data/lib/ddtrace/contrib/sidekiq/server_internal_tracer/heartbeat.rb +30 -0
  45. data/lib/ddtrace/contrib/sidekiq/server_internal_tracer/job_fetch.rb +30 -0
  46. data/lib/ddtrace/contrib/sidekiq/server_internal_tracer/scheduled_push.rb +29 -0
  47. data/lib/ddtrace/contrib/sinatra/env.rb +2 -1
  48. data/lib/ddtrace/contrib/sinatra/tracer.rb +15 -2
  49. data/lib/ddtrace/ext/git.rb +12 -0
  50. data/lib/ddtrace/ext/profiling.rb +1 -0
  51. data/lib/ddtrace/ext/runtime.rb +3 -0
  52. data/lib/ddtrace/ext/transport.rb +11 -0
  53. data/lib/ddtrace/profiling/collectors/stack.rb +71 -27
  54. data/lib/ddtrace/profiling/encoding/profile.rb +9 -1
  55. data/lib/ddtrace/profiling/events/stack.rb +7 -7
  56. data/lib/ddtrace/profiling/pprof/converter.rb +22 -9
  57. data/lib/ddtrace/profiling/pprof/stack_sample.rb +28 -2
  58. data/lib/ddtrace/profiling/tasks/setup.rb +0 -1
  59. data/lib/ddtrace/profiling/trace_identifiers/ddtrace.rb +1 -1
  60. data/lib/ddtrace/profiling/trace_identifiers/helper.rb +3 -3
  61. data/lib/ddtrace/profiling/transport/http.rb +8 -17
  62. data/lib/ddtrace/runtime/metrics.rb +14 -0
  63. data/lib/ddtrace/span.rb +7 -19
  64. data/lib/ddtrace/tracer.rb +1 -1
  65. data/lib/ddtrace/transport/http/adapters/net.rb +13 -3
  66. data/lib/ddtrace/transport/http/adapters/test.rb +4 -2
  67. data/lib/ddtrace/transport/http/adapters/unix_socket.rb +23 -12
  68. data/lib/ddtrace/transport/http/builder.rb +13 -6
  69. data/lib/ddtrace/transport/http.rb +5 -11
  70. data/lib/ddtrace/utils/time.rb +5 -6
  71. data/lib/ddtrace/version.rb +1 -1
  72. metadata +27 -2
@@ -34,8 +34,34 @@ module Datadog
34
34
  config.server_middleware do |chain|
35
35
  chain.add(Sidekiq::ServerTracer)
36
36
  end
37
+
38
+ patch_server_internals if Integration.compatible_with_server_internal_tracing?
37
39
  end
38
40
  end
41
+
42
+ def patch_server_internals
43
+ patch_server_heartbeat
44
+ patch_server_job_fetch
45
+ patch_server_scheduled_push
46
+ end
47
+
48
+ def patch_server_heartbeat
49
+ require 'ddtrace/contrib/sidekiq/server_internal_tracer/heartbeat'
50
+
51
+ ::Sidekiq::Launcher.prepend(ServerInternalTracer::Heartbeat)
52
+ end
53
+
54
+ def patch_server_job_fetch
55
+ require 'ddtrace/contrib/sidekiq/server_internal_tracer/job_fetch'
56
+
57
+ ::Sidekiq::Processor.prepend(ServerInternalTracer::JobFetch)
58
+ end
59
+
60
+ def patch_server_scheduled_push
61
+ require 'ddtrace/contrib/sidekiq/server_internal_tracer/scheduled_push'
62
+
63
+ ::Sidekiq::Scheduled::Poller.prepend(ServerInternalTracer::ScheduledPush)
64
+ end
39
65
  end
40
66
  end
41
67
  end
@@ -0,0 +1,30 @@
1
+ # typed: true
2
+
3
+ module Datadog
4
+ module Contrib
5
+ module Sidekiq
6
+ module ServerInternalTracer
7
+ # Trace when a Sidekiq process has a heartbeat
8
+ module Heartbeat
9
+ private
10
+
11
+ def ❤ # rubocop:disable Naming/AsciiIdentifiers, Naming/MethodName
12
+ configuration = Datadog.configuration[:sidekiq]
13
+
14
+ configuration[:tracer].trace(Ext::SPAN_HEARTBEAT) do |span|
15
+ span.service = configuration[:service_name]
16
+ span.span_type = Datadog::Ext::AppTypes::WORKER
17
+
18
+ # Set analytics sample rate
19
+ if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
20
+ Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
21
+ end
22
+
23
+ super
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,30 @@
1
+ # typed: true
2
+
3
+ module Datadog
4
+ module Contrib
5
+ module Sidekiq
6
+ module ServerInternalTracer
7
+ # Trace when Sidekiq looks for another job to work
8
+ module JobFetch
9
+ private
10
+
11
+ def fetch
12
+ configuration = Datadog.configuration[:sidekiq]
13
+
14
+ configuration[:tracer].trace(Ext::SPAN_JOB_FETCH) do |span|
15
+ span.service = configuration[:service_name]
16
+ span.span_type = Datadog::Ext::AppTypes::WORKER
17
+
18
+ # Set analytics sample rate
19
+ if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
20
+ Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
21
+ end
22
+
23
+ super
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,29 @@
1
+ # typed: true
2
+
3
+ module Datadog
4
+ module Contrib
5
+ module Sidekiq
6
+ module ServerInternalTracer
7
+ # Trace when Sidekiq checks to see if there are scheduled jobs that need to be worked
8
+ # https://github.com/mperham/sidekiq/wiki/Scheduled-Jobs
9
+ module ScheduledPush
10
+ def enqueue
11
+ configuration = Datadog.configuration[:sidekiq]
12
+
13
+ configuration[:tracer].trace(Ext::SPAN_SCHEDULED_PUSH) do |span|
14
+ span.service = configuration[:service_name]
15
+ span.span_type = Datadog::Ext::AppTypes::WORKER
16
+
17
+ # Set analytics sample rate
18
+ if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
19
+ Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
20
+ end
21
+
22
+ super
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -10,7 +10,8 @@ module Datadog
10
10
  module_function
11
11
 
12
12
  def datadog_span(env, app)
13
- env[Ext::RACK_ENV_REQUEST_SPAN][app]
13
+ request_span = env[Ext::RACK_ENV_REQUEST_SPAN]
14
+ request_span && request_span[app]
14
15
  end
15
16
 
16
17
  def set_datadog_span(env, app, span)
@@ -5,7 +5,7 @@ require 'ddtrace/ext/app_types'
5
5
  require 'ddtrace/ext/errors'
6
6
  require 'ddtrace/ext/http'
7
7
  require 'ddtrace/propagation/http_propagator'
8
-
8
+ require 'ddtrace/utils/only_once'
9
9
  require 'ddtrace/contrib/sinatra/ext'
10
10
  require 'ddtrace/contrib/sinatra/tracer_middleware'
11
11
  require 'ddtrace/contrib/sinatra/env'
@@ -77,6 +77,9 @@ module Datadog
77
77
 
78
78
  # Method overrides for Sinatra::Base
79
79
  module Base
80
+ MISSING_REQUEST_SPAN_ONLY_ONCE = Datadog::Utils::OnlyOnce.new
81
+ private_constant :MISSING_REQUEST_SPAN_ONLY_ONCE
82
+
80
83
  def render(engine, data, *)
81
84
  tracer = Datadog.configuration[:sinatra][:tracer]
82
85
  return super unless tracer.enabled
@@ -121,8 +124,18 @@ module Datadog
121
124
  else
122
125
  Sinatra::Env.datadog_span(env, self.class)
123
126
  end
124
- if sinatra_request_span # DEV: Is it possible for sinatra_request_span to ever be nil here?
127
+ if sinatra_request_span
125
128
  sinatra_request_span.resource = span.resource
129
+ else
130
+ MISSING_REQUEST_SPAN_ONLY_ONCE.run do
131
+ Datadog.logger.warn do
132
+ 'Sinatra integration is misconfigured, reported traces will be missing request metadata ' \
133
+ 'such as path and HTTP status code. ' \
134
+ 'Did you forget to add `register Datadog::Contrib::Sinatra::Tracer` to your ' \
135
+ '`Sinatra::Base` subclass? ' \
136
+ 'See <https://docs.datadoghq.com/tracing/setup_overview/setup/ruby/#sinatra> for more details.'
137
+ end
138
+ end
126
139
  end
127
140
 
128
141
  Contrib::Analytics.set_measured(span)
@@ -15,6 +15,18 @@ module Datadog
15
15
  TAG_COMMIT_COMMITTER_NAME = 'git.commit.committer.name'.freeze
16
16
  TAG_COMMIT_MESSAGE = 'git.commit.message'.freeze
17
17
  TAG_COMMIT_SHA = 'git.commit.sha'.freeze
18
+
19
+ ENV_REPOSITORY_URL = 'DD_GIT_REPOSITORY_URL'.freeze
20
+ ENV_COMMIT_SHA = 'DD_GIT_COMMIT_SHA'.freeze
21
+ ENV_BRANCH = 'DD_GIT_BRANCH'.freeze
22
+ ENV_TAG = 'DD_GIT_TAG'.freeze
23
+ ENV_COMMIT_MESSAGE = 'DD_GIT_COMMIT_MESSAGE'.freeze
24
+ ENV_COMMIT_AUTHOR_NAME = 'DD_GIT_COMMIT_AUTHOR_NAME'.freeze
25
+ ENV_COMMIT_AUTHOR_EMAIL = 'DD_GIT_COMMIT_AUTHOR_EMAIL'.freeze
26
+ ENV_COMMIT_AUTHOR_DATE = 'DD_GIT_COMMIT_AUTHOR_DATE'.freeze
27
+ ENV_COMMIT_COMMITTER_NAME = 'DD_GIT_COMMIT_COMMITTER_NAME'.freeze
28
+ ENV_COMMIT_COMMITTER_EMAIL = 'DD_GIT_COMMIT_COMMITTER_EMAIL'.freeze
29
+ ENV_COMMIT_COMMITTER_DATE = 'DD_GIT_COMMIT_COMMITTER_DATE'.freeze
18
30
  end
19
31
  end
20
32
  end
@@ -6,6 +6,7 @@ module Datadog
6
6
  ENV_UPLOAD_TIMEOUT = 'DD_PROFILING_UPLOAD_TIMEOUT'.freeze
7
7
  ENV_MAX_FRAMES = 'DD_PROFILING_MAX_FRAMES'.freeze
8
8
  ENV_AGENTLESS = 'DD_PROFILING_AGENTLESS'.freeze
9
+ ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED'.freeze
9
10
 
10
11
  module Pprof
11
12
  LABEL_KEY_SPAN_ID = 'span id'.freeze
@@ -6,6 +6,7 @@ module Datadog
6
6
  module Runtime
7
7
  TAG_ID = 'runtime-id'.freeze
8
8
  TAG_LANG = 'language'.freeze
9
+ TAG_PID = 'system.pid'.freeze
9
10
 
10
11
  # Metrics
11
12
  module Metrics
@@ -14,6 +15,8 @@ module Datadog
14
15
  METRIC_CLASS_COUNT = 'runtime.ruby.class_count'.freeze
15
16
  METRIC_GC_PREFIX = 'runtime.ruby.gc'.freeze
16
17
  METRIC_THREAD_COUNT = 'runtime.ruby.thread_count'.freeze
18
+ METRIC_GLOBAL_CONSTANT_STATE = 'runtime.ruby.global_constant_state'.freeze
19
+ METRIC_GLOBAL_METHOD_STATE = 'runtime.ruby.global_method_state'.freeze
17
20
 
18
21
  TAG_SERVICE = 'service'.freeze
19
22
  end
@@ -3,6 +3,7 @@ module Datadog
3
3
  module Ext
4
4
  module Transport
5
5
  module HTTP
6
+ ADAPTER = :net_http # DEV: Rename to simply `:http`, as Net::HTTP is an implementation detail.
6
7
  DEFAULT_HOST = '127.0.0.1'.freeze
7
8
  DEFAULT_PORT = 8126
8
9
  DEFAULT_TIMEOUT_SECONDS = 1
@@ -16,6 +17,16 @@ module Datadog
16
17
  HEADER_META_LANG_INTERPRETER = 'Datadog-Meta-Lang-Interpreter'.freeze
17
18
  HEADER_META_TRACER_VERSION = 'Datadog-Meta-Tracer-Version'.freeze
18
19
  end
20
+
21
+ module Test
22
+ ADAPTER = :test
23
+ end
24
+
25
+ module UnixSocket
26
+ ADAPTER = :unix
27
+ DEFAULT_PATH = '/var/run/datadog/apm.socket'.freeze
28
+ DEFAULT_TIMEOUT_SECONDS = 1
29
+ end
19
30
  end
20
31
  end
21
32
  end
@@ -18,6 +18,12 @@ module Datadog
18
18
  DEFAULT_MAX_TIME_USAGE_PCT = 2.0
19
19
  MIN_INTERVAL = 0.01
20
20
  THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
21
+ THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
22
+
23
+ # This default was picked based on the current sampling performance and on expected concurrency on an average
24
+ # Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
25
+ # optimizes for coverage (less chance to miss what a given thread is doing).
26
+ DEFAULT_MAX_THREADS_SAMPLED = 16
21
27
 
22
28
  attr_reader \
23
29
  :recorder,
@@ -33,6 +39,7 @@ module Datadog
33
39
  trace_identifiers_helper:, # Usually an instance of Datadog::Profiling::TraceIdentifiers::Helper
34
40
  ignore_thread: nil,
35
41
  max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
42
+ max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
36
43
  thread_api: Thread,
37
44
  fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
38
45
  interval: MIN_INTERVAL,
@@ -43,6 +50,7 @@ module Datadog
43
50
  @trace_identifiers_helper = trace_identifiers_helper
44
51
  @ignore_thread = ignore_thread
45
52
  @max_time_usage_pct = max_time_usage_pct
53
+ @max_threads_sampled = max_threads_sampled
46
54
  @thread_api = thread_api
47
55
 
48
56
  # Workers::Async::Thread settings
@@ -60,10 +68,13 @@ module Datadog
60
68
  @build_backtrace_location = method(:build_backtrace_location).to_proc
61
69
  # Cache this buffer, since it's pretty expensive to keep accessing it
62
70
  @stack_sample_event_recorder = recorder[Events::StackSample]
71
+ # See below for details on why this is needed
72
+ @needs_process_waiter_workaround =
73
+ Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3') &&
74
+ Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7')
63
75
  end
64
76
 
65
77
  def start
66
- @last_wall_time = Datadog::Utils::Time.get_time
67
78
  reset_cpu_time_tracking
68
79
  perform
69
80
  end
@@ -87,24 +98,14 @@ module Datadog
87
98
 
88
99
  def collect_events
89
100
  events = []
90
-
91
- # Compute wall time interval
92
- current_wall_time = Datadog::Utils::Time.get_time
93
- last_wall_time = if instance_variable_defined?(:@last_wall_time)
94
- @last_wall_time
95
- else
96
- current_wall_time
97
- end
98
-
99
- wall_time_interval_ns = ((current_wall_time - last_wall_time).round(9) * 1e9).to_i
100
- @last_wall_time = current_wall_time
101
+ current_wall_time_ns = get_current_wall_time_timestamp_ns
101
102
 
102
103
  # Collect backtraces from each thread
103
- thread_api.list.each do |thread|
104
+ threads_to_sample.each do |thread|
104
105
  next unless thread.alive?
105
106
  next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
106
107
 
107
- event = collect_thread_event(thread, wall_time_interval_ns)
108
+ event = collect_thread_event(thread, current_wall_time_ns)
108
109
  events << event unless event.nil?
109
110
  end
110
111
 
@@ -114,7 +115,7 @@ module Datadog
114
115
  events
115
116
  end
116
117
 
117
- def collect_thread_event(thread, wall_time_interval_ns)
118
+ def collect_thread_event(thread, current_wall_time_ns)
118
119
  locations = thread.backtrace_locations
119
120
  return if locations.nil?
120
121
 
@@ -126,8 +127,10 @@ module Datadog
126
127
  locations = convert_backtrace_locations(locations)
127
128
 
128
129
  thread_id = thread.respond_to?(:pthread_thread_id) ? thread.pthread_thread_id : thread.object_id
129
- trace_id, span_id, trace_resource_container = trace_identifiers_helper.trace_identifiers_for(thread)
130
+ trace_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
130
131
  cpu_time = get_cpu_time_interval!(thread)
132
+ wall_time_interval_ns =
133
+ get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
131
134
 
132
135
  Events::StackSample.new(
133
136
  nil,
@@ -136,7 +139,7 @@ module Datadog
136
139
  thread_id,
137
140
  trace_id,
138
141
  span_id,
139
- trace_resource_container,
142
+ trace_resource,
140
143
  cpu_time,
141
144
  wall_time_interval_ns
142
145
  )
@@ -156,14 +159,7 @@ module Datadog
156
159
  # *before* the thread had time to finish the initialization
157
160
  return unless current_cpu_time_ns
158
161
 
159
- last_cpu_time_ns = (thread.thread_variable_get(THREAD_LAST_CPU_TIME_KEY) || current_cpu_time_ns)
160
- interval = current_cpu_time_ns - last_cpu_time_ns
161
-
162
- # Update CPU time for thread
163
- thread.thread_variable_set(THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
164
-
165
- # Return interval
166
- interval
162
+ get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
167
163
  end
168
164
 
169
165
  def compute_wait_time(used_time)
@@ -237,10 +233,10 @@ module Datadog
237
233
  end
238
234
 
239
235
  # If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
240
- # clean up any leftover per-thread cpu time counters, so that the first sample after starting doesn't end up with:
236
+ # clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
241
237
  #
242
238
  # a) negative time: At least on my test docker container, and on the reliability environment, after the process
243
- # forks, the clock reference changes and (old cpu time - new cpu time) can be < 0
239
+ # forks, the cpu time reference changes and (old cpu time - new cpu time) can be < 0
244
240
  #
245
241
  # b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
246
242
  # restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
@@ -248,9 +244,57 @@ module Datadog
248
244
  # By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
249
245
  def reset_cpu_time_tracking
250
246
  thread_api.list.each do |thread|
247
+ # See below for details on why this is needed
248
+ next if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
249
+
251
250
  thread.thread_variable_set(THREAD_LAST_CPU_TIME_KEY, nil)
251
+ thread.thread_variable_set(THREAD_LAST_WALL_CLOCK_KEY, nil)
252
+ end
253
+ end
254
+
255
+ def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
256
+ # See cthread.rb for more details, but this is a workaround for https://bugs.ruby-lang.org/issues/17807 ;
257
+ # using all thread_variable related methods on these instances also triggers a crash and for now we just
258
+ # skip it for the affected Rubies
259
+ return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
260
+
261
+ last_value = thread.thread_variable_get(key) || current_value
262
+ thread.thread_variable_set(key, current_value)
263
+
264
+ current_value - last_value
265
+ end
266
+
267
+ # Whenever there are more than max_threads_sampled active, we only sample a subset of them.
268
+ # We do this to avoid impacting the latency of the service being profiled. We want to avoid doing
269
+ # a big burst of work all at once (sample everything), and instead do a little work each time
270
+ # (sample a bit by bit).
271
+ #
272
+ # Because we pick the threads to sample randomly, we'll eventually sample all threads -- just not at once.
273
+ # Notice also that this will interact with our dynamic sampling mechanism -- if samples are faster, we take
274
+ # them more often, if they are slower, we take them less often -- which again means that over a longer period
275
+ # we should take sample roughly the same samples.
276
+ #
277
+ # One downside of this approach is that if there really are many threads, the resulting wall clock times
278
+ # in a one minute profile may "drift" around the 60 second mark, e.g. maybe we only sampled a thread once per
279
+ # second and only 59 times, so we'll report 59s, but on the next report we'll include the missing one, so
280
+ # then the result will be 61s. I've observed 60 +- 1.68 secs for an app with ~65 threads, given the
281
+ # default maximum of 16 threads. This seems a reasonable enough margin of error given the improvement to
282
+ # latency (especially on such a large application! -> even bigger latency impact if we tried to sample all
283
+ # threads).
284
+ #
285
+ def threads_to_sample
286
+ all_threads = thread_api.list
287
+
288
+ if all_threads.size > @max_threads_sampled
289
+ all_threads.sample(@max_threads_sampled)
290
+ else
291
+ all_threads
252
292
  end
253
293
  end
294
+
295
+ def get_current_wall_time_timestamp_ns
296
+ Datadog::Utils::Time.get_time(:nanosecond)
297
+ end
254
298
  end
255
299
  end
256
300
  end
@@ -24,8 +24,16 @@ module Datadog
24
24
  flush.event_groups.each { |event_group| template.add_events!(event_group.event_class, event_group.events) }
25
25
 
26
26
  Datadog.logger.debug do
27
+ max_events = Datadog.configuration.profiling.advanced.max_events
28
+ events_sampled =
29
+ if flush.event_count == max_events
30
+ 'max events limit hit, events were sampled [profile will be biased], '
31
+ else
32
+ ''
33
+ end
34
+
27
35
  "Encoding profile covering #{flush.start.iso8601} to #{flush.finish.iso8601}, " \
28
- "events: #{flush.event_count} (#{template.debug_statistics})"
36
+ "events: #{flush.event_count} (#{events_sampled}#{template.debug_statistics})"
29
37
  end
30
38
 
31
39
  # Build the profile and encode it
@@ -13,7 +13,7 @@ module Datadog
13
13
  :thread_id,
14
14
  :trace_id,
15
15
  :span_id,
16
- :trace_resource_container
16
+ :trace_resource
17
17
 
18
18
  def initialize(
19
19
  timestamp,
@@ -22,7 +22,7 @@ module Datadog
22
22
  thread_id,
23
23
  trace_id,
24
24
  span_id,
25
- trace_resource_container
25
+ trace_resource
26
26
  )
27
27
  super(timestamp)
28
28
 
@@ -31,14 +31,14 @@ module Datadog
31
31
  @thread_id = thread_id
32
32
  @trace_id = trace_id
33
33
  @span_id = span_id
34
- @trace_resource_container = trace_resource_container
34
+ @trace_resource = trace_resource
35
35
 
36
36
  @hash = [
37
37
  thread_id,
38
38
  trace_id,
39
39
  span_id,
40
- # trace_resource_container is deliberately not included -- events that share the same (trace_id, span_id)
41
- # pair should also have the same trace_resource_container
40
+ # trace_resource is deliberately not included -- events that share the same (trace_id, span_id)
41
+ # trace_resource might not match between pairs, but they refer to the same trace.
42
42
  frames.collect(&:hash),
43
43
  total_frame_count
44
44
  ].hash
@@ -58,7 +58,7 @@ module Datadog
58
58
  thread_id,
59
59
  trace_id,
60
60
  span_id,
61
- trace_resource_container,
61
+ trace_resource,
62
62
  cpu_time_interval_ns,
63
63
  wall_time_interval_ns
64
64
  )
@@ -69,7 +69,7 @@ module Datadog
69
69
  thread_id,
70
70
  trace_id,
71
71
  span_id,
72
- trace_resource_container
72
+ trace_resource
73
73
  )
74
74
 
75
75
  @cpu_time_interval_ns = cpu_time_interval_ns
@@ -25,20 +25,19 @@ module Datadog
25
25
  # [key, EventGroup]
26
26
  event_groups = {}
27
27
 
28
+ # Aggregate each event into a group
29
+ # with identical properties, but different values.
28
30
  events.each do |event|
29
31
  key = yield(event)
30
- values = build_sample_values(event)
32
+ values = build_event_values(event)
31
33
 
32
34
  unless key.nil?
33
35
  if event_groups.key?(key)
34
- # Update values for group
35
- group_values = event_groups[key].values
36
- group_values.each_with_index do |group_value, i|
37
- group_values[i] = group_value + values[i]
38
- end
36
+ # Update existing group from event
37
+ update_group(event_groups[key], event, values)
39
38
  else
40
39
  # Add new group
41
- event_groups[key] = EventGroup.new(event, values)
40
+ event_groups[key] = new_group(event, values)
42
41
  end
43
42
  end
44
43
  end
@@ -57,7 +56,7 @@ module Datadog
57
56
  index
58
57
  end
59
58
 
60
- def build_sample_values(stack_sample)
59
+ def build_event_values(event)
61
60
  # Build a value array that matches the length of the sample types
62
61
  # Populate all values with "no value" by default
63
62
  Array.new(@sample_type_mappings.length, Datadog::Ext::Profiling::Pprof::SAMPLE_VALUE_NO_VALUE)
@@ -69,7 +68,7 @@ module Datadog
69
68
 
70
69
  # Represents a grouped event
71
70
  # 'sample' is an example event object from the group.
72
- # 'values' is the the summation of the group's sample values
71
+ # 'values' is the summation of the group's sample values
73
72
  EventGroup = Struct.new(:sample, :values)
74
73
 
75
74
  # Error when the mapping of a sample type to value index is unknown
@@ -84,6 +83,20 @@ module Datadog
84
83
  "Mapping for sample value type '#{type}' to index is unknown."
85
84
  end
86
85
  end
86
+
87
+ protected
88
+
89
+ def new_group(event, values)
90
+ EventGroup.new(event, values)
91
+ end
92
+
93
+ def update_group(event_group, event, values)
94
+ # Update values for group
95
+ group_values = event_group.values
96
+ group_values.each_with_index do |group_value, i|
97
+ group_values[i] = group_value + values[i]
98
+ end
99
+ end
87
100
  end
88
101
  end
89
102
  end
@@ -30,6 +30,7 @@ module Datadog
30
30
  def initialize(*_)
31
31
  super
32
32
 
33
+ @most_recent_trace_samples = {}
33
34
  @processed_unique_stacks = 0
34
35
  @processed_with_trace_ids = 0
35
36
  end
@@ -40,9 +41,28 @@ module Datadog
40
41
  end
41
42
 
42
43
  def stack_sample_group_key(stack_sample)
44
+ # We want to make sure we have the most recent sample for any trace.
45
+ # (This is done here to save an iteration over all samples.)
46
+ update_most_recent_trace_sample(stack_sample)
47
+
43
48
  stack_sample.hash
44
49
  end
45
50
 
51
+ # Track the most recent sample for each trace
52
+ def update_most_recent_trace_sample(stack_sample)
53
+ return unless stack_sample.trace_id && stack_sample.trace_resource
54
+
55
+ # Update trace resource with most recent value
56
+ if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.trace_id])
57
+ if most_recent_trace_sample.timestamp < stack_sample.timestamp
58
+ @most_recent_trace_samples[stack_sample.trace_id] = stack_sample
59
+ end
60
+ else
61
+ # Add trace resource
62
+ @most_recent_trace_samples[stack_sample.trace_id] = stack_sample
63
+ end
64
+ end
65
+
46
66
  def build_samples(stack_samples)
47
67
  groups = group_events(stack_samples, &method(:stack_sample_group_key))
48
68
  groups.collect do |_group_key, group|
@@ -64,7 +84,7 @@ module Datadog
64
84
  )
65
85
  end
66
86
 
67
- def build_sample_values(stack_sample)
87
+ def build_event_values(stack_sample)
68
88
  no_value = Datadog::Ext::Profiling::Pprof::SAMPLE_VALUE_NO_VALUE
69
89
  values = super(stack_sample)
70
90
  values[sample_value_index(:cpu_time_ns)] = stack_sample.cpu_time_interval_ns || no_value
@@ -96,7 +116,13 @@ module Datadog
96
116
  str: builder.string_table.fetch(span_id.to_s)
97
117
  )
98
118
 
99
- trace_resource = stack_sample.trace_resource_container && stack_sample.trace_resource_container.latest
119
+ # Use most up-to-date trace resource, if available.
120
+ # Otherwise, use the trace resource provided.
121
+ trace_resource = (
122
+ @most_recent_trace_samples[stack_sample.trace_id] \
123
+ || stack_sample
124
+ ).trace_resource
125
+
100
126
  if trace_resource && !trace_resource.empty?
101
127
  labels << Perftools::Profiles::Label.new(
102
128
  key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_TRACE_ENDPOINT),
@@ -1,5 +1,4 @@
1
1
  # typed: false
2
- require 'ddtrace'
3
2
  require 'ddtrace/utils/only_once'
4
3
  require 'ddtrace/profiling'
5
4
  require 'ddtrace/profiling/ext/cpu'
@@ -33,7 +33,7 @@ module Datadog
33
33
  def maybe_extract_resource(root_span)
34
34
  return unless root_span
35
35
 
36
- root_span.resource_container if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
36
+ root_span.resource if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
37
37
  end
38
38
  end
39
39
  end
@@ -20,10 +20,10 @@ module Datadog
20
20
  def initialize(
21
21
  tracer:,
22
22
  # If this is disabled, the helper will strip the optional trace_resource_container even if provided by the api
23
- extract_trace_resource:,
23
+ endpoint_collection_enabled:,
24
24
  supported_apis: DEFAULT_SUPPORTED_APIS.map { |api| api.new(tracer: tracer) }
25
25
  )
26
- @extract_trace_resource = extract_trace_resource
26
+ @endpoint_collection_enabled = endpoint_collection_enabled
27
27
  @supported_apis = supported_apis
28
28
  end
29
29
 
@@ -34,7 +34,7 @@ module Datadog
34
34
  trace_identifiers = api.trace_identifiers_for(thread)
35
35
 
36
36
  if trace_identifiers
37
- return @extract_trace_resource ? trace_identifiers : trace_identifiers[0..1]
37
+ return @endpoint_collection_enabled ? trace_identifiers : trace_identifiers[0..1]
38
38
  end
39
39
  end
40
40