ddtrace 0.52.0 → 0.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +65 -1
- data/ddtrace.gemspec +1 -1
- data/docs/DevelopmentGuide.md +1 -6
- data/docs/GettingStarted.md +66 -16
- data/lib/datadog/ci/contrib/rspec/integration.rb +2 -2
- data/lib/datadog/ci/ext/environment.rb +41 -4
- data/lib/datadog/contrib.rb +2 -0
- data/lib/datadog/core/environment/vm_cache.rb +46 -0
- data/lib/ddtrace/configuration/agent_settings_resolver.rb +107 -40
- data/lib/ddtrace/configuration/components.rb +1 -1
- data/lib/ddtrace/configuration/settings.rb +13 -3
- data/lib/ddtrace/contrib/action_cable/instrumentation.rb +46 -0
- data/lib/ddtrace/contrib/action_cable/patcher.rb +1 -0
- data/lib/ddtrace/contrib/action_mailer/configuration/settings.rb +32 -0
- data/lib/ddtrace/contrib/action_mailer/event.rb +50 -0
- data/lib/ddtrace/contrib/action_mailer/events/deliver.rb +54 -0
- data/lib/ddtrace/contrib/action_mailer/events/process.rb +41 -0
- data/lib/ddtrace/contrib/action_mailer/events.rb +31 -0
- data/lib/ddtrace/contrib/action_mailer/ext.rb +32 -0
- data/lib/ddtrace/contrib/action_mailer/integration.rb +45 -0
- data/lib/ddtrace/contrib/action_mailer/patcher.rb +27 -0
- data/lib/ddtrace/contrib/active_job/configuration/settings.rb +33 -0
- data/lib/ddtrace/contrib/active_job/event.rb +54 -0
- data/lib/ddtrace/contrib/active_job/events/discard.rb +46 -0
- data/lib/ddtrace/contrib/active_job/events/enqueue.rb +45 -0
- data/lib/ddtrace/contrib/active_job/events/enqueue_at.rb +45 -0
- data/lib/ddtrace/contrib/active_job/events/enqueue_retry.rb +47 -0
- data/lib/ddtrace/contrib/active_job/events/perform.rb +45 -0
- data/lib/ddtrace/contrib/active_job/events/retry_stopped.rb +46 -0
- data/lib/ddtrace/contrib/active_job/events.rb +39 -0
- data/lib/ddtrace/contrib/active_job/ext.rb +32 -0
- data/lib/ddtrace/contrib/active_job/integration.rb +46 -0
- data/lib/ddtrace/contrib/active_job/log_injection.rb +21 -0
- data/lib/ddtrace/contrib/active_job/patcher.rb +33 -0
- data/lib/ddtrace/contrib/auto_instrument.rb +0 -1
- data/lib/ddtrace/contrib/rails/auto_instrument_railtie.rb +0 -1
- data/lib/ddtrace/contrib/rails/framework.rb +22 -0
- data/lib/ddtrace/contrib/rails/patcher.rb +19 -10
- data/lib/ddtrace/contrib/registerable.rb +0 -1
- data/lib/ddtrace/contrib/sidekiq/ext.rb +3 -0
- data/lib/ddtrace/contrib/sidekiq/integration.rb +10 -0
- data/lib/ddtrace/contrib/sidekiq/patcher.rb +26 -0
- data/lib/ddtrace/contrib/sidekiq/server_internal_tracer/heartbeat.rb +30 -0
- data/lib/ddtrace/contrib/sidekiq/server_internal_tracer/job_fetch.rb +30 -0
- data/lib/ddtrace/contrib/sidekiq/server_internal_tracer/scheduled_push.rb +29 -0
- data/lib/ddtrace/contrib/sinatra/env.rb +2 -1
- data/lib/ddtrace/contrib/sinatra/tracer.rb +15 -2
- data/lib/ddtrace/ext/git.rb +12 -0
- data/lib/ddtrace/ext/profiling.rb +1 -0
- data/lib/ddtrace/ext/runtime.rb +3 -0
- data/lib/ddtrace/ext/transport.rb +11 -0
- data/lib/ddtrace/profiling/collectors/stack.rb +71 -27
- data/lib/ddtrace/profiling/encoding/profile.rb +9 -1
- data/lib/ddtrace/profiling/events/stack.rb +7 -7
- data/lib/ddtrace/profiling/pprof/converter.rb +22 -9
- data/lib/ddtrace/profiling/pprof/stack_sample.rb +28 -2
- data/lib/ddtrace/profiling/tasks/setup.rb +0 -1
- data/lib/ddtrace/profiling/trace_identifiers/ddtrace.rb +1 -1
- data/lib/ddtrace/profiling/trace_identifiers/helper.rb +3 -3
- data/lib/ddtrace/profiling/transport/http.rb +8 -17
- data/lib/ddtrace/runtime/metrics.rb +14 -0
- data/lib/ddtrace/span.rb +7 -19
- data/lib/ddtrace/tracer.rb +1 -1
- data/lib/ddtrace/transport/http/adapters/net.rb +13 -3
- data/lib/ddtrace/transport/http/adapters/test.rb +4 -2
- data/lib/ddtrace/transport/http/adapters/unix_socket.rb +23 -12
- data/lib/ddtrace/transport/http/builder.rb +13 -6
- data/lib/ddtrace/transport/http.rb +5 -11
- data/lib/ddtrace/utils/time.rb +5 -6
- data/lib/ddtrace/version.rb +1 -1
- metadata +27 -2
@@ -34,8 +34,34 @@ module Datadog
|
|
34
34
|
config.server_middleware do |chain|
|
35
35
|
chain.add(Sidekiq::ServerTracer)
|
36
36
|
end
|
37
|
+
|
38
|
+
patch_server_internals if Integration.compatible_with_server_internal_tracing?
|
37
39
|
end
|
38
40
|
end
|
41
|
+
|
42
|
+
def patch_server_internals
|
43
|
+
patch_server_heartbeat
|
44
|
+
patch_server_job_fetch
|
45
|
+
patch_server_scheduled_push
|
46
|
+
end
|
47
|
+
|
48
|
+
def patch_server_heartbeat
|
49
|
+
require 'ddtrace/contrib/sidekiq/server_internal_tracer/heartbeat'
|
50
|
+
|
51
|
+
::Sidekiq::Launcher.prepend(ServerInternalTracer::Heartbeat)
|
52
|
+
end
|
53
|
+
|
54
|
+
def patch_server_job_fetch
|
55
|
+
require 'ddtrace/contrib/sidekiq/server_internal_tracer/job_fetch'
|
56
|
+
|
57
|
+
::Sidekiq::Processor.prepend(ServerInternalTracer::JobFetch)
|
58
|
+
end
|
59
|
+
|
60
|
+
def patch_server_scheduled_push
|
61
|
+
require 'ddtrace/contrib/sidekiq/server_internal_tracer/scheduled_push'
|
62
|
+
|
63
|
+
::Sidekiq::Scheduled::Poller.prepend(ServerInternalTracer::ScheduledPush)
|
64
|
+
end
|
39
65
|
end
|
40
66
|
end
|
41
67
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# typed: true
|
2
|
+
|
3
|
+
module Datadog
|
4
|
+
module Contrib
|
5
|
+
module Sidekiq
|
6
|
+
module ServerInternalTracer
|
7
|
+
# Trace when a Sidekiq process has a heartbeat
|
8
|
+
module Heartbeat
|
9
|
+
private
|
10
|
+
|
11
|
+
def ❤ # rubocop:disable Naming/AsciiIdentifiers, Naming/MethodName
|
12
|
+
configuration = Datadog.configuration[:sidekiq]
|
13
|
+
|
14
|
+
configuration[:tracer].trace(Ext::SPAN_HEARTBEAT) do |span|
|
15
|
+
span.service = configuration[:service_name]
|
16
|
+
span.span_type = Datadog::Ext::AppTypes::WORKER
|
17
|
+
|
18
|
+
# Set analytics sample rate
|
19
|
+
if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
|
20
|
+
Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
|
21
|
+
end
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# typed: true
|
2
|
+
|
3
|
+
module Datadog
|
4
|
+
module Contrib
|
5
|
+
module Sidekiq
|
6
|
+
module ServerInternalTracer
|
7
|
+
# Trace when Sidekiq looks for another job to work
|
8
|
+
module JobFetch
|
9
|
+
private
|
10
|
+
|
11
|
+
def fetch
|
12
|
+
configuration = Datadog.configuration[:sidekiq]
|
13
|
+
|
14
|
+
configuration[:tracer].trace(Ext::SPAN_JOB_FETCH) do |span|
|
15
|
+
span.service = configuration[:service_name]
|
16
|
+
span.span_type = Datadog::Ext::AppTypes::WORKER
|
17
|
+
|
18
|
+
# Set analytics sample rate
|
19
|
+
if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
|
20
|
+
Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
|
21
|
+
end
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# typed: true
|
2
|
+
|
3
|
+
module Datadog
|
4
|
+
module Contrib
|
5
|
+
module Sidekiq
|
6
|
+
module ServerInternalTracer
|
7
|
+
# Trace when Sidekiq checks to see if there are scheduled jobs that need to be worked
|
8
|
+
# https://github.com/mperham/sidekiq/wiki/Scheduled-Jobs
|
9
|
+
module ScheduledPush
|
10
|
+
def enqueue
|
11
|
+
configuration = Datadog.configuration[:sidekiq]
|
12
|
+
|
13
|
+
configuration[:tracer].trace(Ext::SPAN_SCHEDULED_PUSH) do |span|
|
14
|
+
span.service = configuration[:service_name]
|
15
|
+
span.span_type = Datadog::Ext::AppTypes::WORKER
|
16
|
+
|
17
|
+
# Set analytics sample rate
|
18
|
+
if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
|
19
|
+
Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
|
20
|
+
end
|
21
|
+
|
22
|
+
super
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -5,7 +5,7 @@ require 'ddtrace/ext/app_types'
|
|
5
5
|
require 'ddtrace/ext/errors'
|
6
6
|
require 'ddtrace/ext/http'
|
7
7
|
require 'ddtrace/propagation/http_propagator'
|
8
|
-
|
8
|
+
require 'ddtrace/utils/only_once'
|
9
9
|
require 'ddtrace/contrib/sinatra/ext'
|
10
10
|
require 'ddtrace/contrib/sinatra/tracer_middleware'
|
11
11
|
require 'ddtrace/contrib/sinatra/env'
|
@@ -77,6 +77,9 @@ module Datadog
|
|
77
77
|
|
78
78
|
# Method overrides for Sinatra::Base
|
79
79
|
module Base
|
80
|
+
MISSING_REQUEST_SPAN_ONLY_ONCE = Datadog::Utils::OnlyOnce.new
|
81
|
+
private_constant :MISSING_REQUEST_SPAN_ONLY_ONCE
|
82
|
+
|
80
83
|
def render(engine, data, *)
|
81
84
|
tracer = Datadog.configuration[:sinatra][:tracer]
|
82
85
|
return super unless tracer.enabled
|
@@ -121,8 +124,18 @@ module Datadog
|
|
121
124
|
else
|
122
125
|
Sinatra::Env.datadog_span(env, self.class)
|
123
126
|
end
|
124
|
-
if sinatra_request_span
|
127
|
+
if sinatra_request_span
|
125
128
|
sinatra_request_span.resource = span.resource
|
129
|
+
else
|
130
|
+
MISSING_REQUEST_SPAN_ONLY_ONCE.run do
|
131
|
+
Datadog.logger.warn do
|
132
|
+
'Sinatra integration is misconfigured, reported traces will be missing request metadata ' \
|
133
|
+
'such as path and HTTP status code. ' \
|
134
|
+
'Did you forget to add `register Datadog::Contrib::Sinatra::Tracer` to your ' \
|
135
|
+
'`Sinatra::Base` subclass? ' \
|
136
|
+
'See <https://docs.datadoghq.com/tracing/setup_overview/setup/ruby/#sinatra> for more details.'
|
137
|
+
end
|
138
|
+
end
|
126
139
|
end
|
127
140
|
|
128
141
|
Contrib::Analytics.set_measured(span)
|
data/lib/ddtrace/ext/git.rb
CHANGED
@@ -15,6 +15,18 @@ module Datadog
|
|
15
15
|
TAG_COMMIT_COMMITTER_NAME = 'git.commit.committer.name'.freeze
|
16
16
|
TAG_COMMIT_MESSAGE = 'git.commit.message'.freeze
|
17
17
|
TAG_COMMIT_SHA = 'git.commit.sha'.freeze
|
18
|
+
|
19
|
+
ENV_REPOSITORY_URL = 'DD_GIT_REPOSITORY_URL'.freeze
|
20
|
+
ENV_COMMIT_SHA = 'DD_GIT_COMMIT_SHA'.freeze
|
21
|
+
ENV_BRANCH = 'DD_GIT_BRANCH'.freeze
|
22
|
+
ENV_TAG = 'DD_GIT_TAG'.freeze
|
23
|
+
ENV_COMMIT_MESSAGE = 'DD_GIT_COMMIT_MESSAGE'.freeze
|
24
|
+
ENV_COMMIT_AUTHOR_NAME = 'DD_GIT_COMMIT_AUTHOR_NAME'.freeze
|
25
|
+
ENV_COMMIT_AUTHOR_EMAIL = 'DD_GIT_COMMIT_AUTHOR_EMAIL'.freeze
|
26
|
+
ENV_COMMIT_AUTHOR_DATE = 'DD_GIT_COMMIT_AUTHOR_DATE'.freeze
|
27
|
+
ENV_COMMIT_COMMITTER_NAME = 'DD_GIT_COMMIT_COMMITTER_NAME'.freeze
|
28
|
+
ENV_COMMIT_COMMITTER_EMAIL = 'DD_GIT_COMMIT_COMMITTER_EMAIL'.freeze
|
29
|
+
ENV_COMMIT_COMMITTER_DATE = 'DD_GIT_COMMIT_COMMITTER_DATE'.freeze
|
18
30
|
end
|
19
31
|
end
|
20
32
|
end
|
@@ -6,6 +6,7 @@ module Datadog
|
|
6
6
|
ENV_UPLOAD_TIMEOUT = 'DD_PROFILING_UPLOAD_TIMEOUT'.freeze
|
7
7
|
ENV_MAX_FRAMES = 'DD_PROFILING_MAX_FRAMES'.freeze
|
8
8
|
ENV_AGENTLESS = 'DD_PROFILING_AGENTLESS'.freeze
|
9
|
+
ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED'.freeze
|
9
10
|
|
10
11
|
module Pprof
|
11
12
|
LABEL_KEY_SPAN_ID = 'span id'.freeze
|
data/lib/ddtrace/ext/runtime.rb
CHANGED
@@ -6,6 +6,7 @@ module Datadog
|
|
6
6
|
module Runtime
|
7
7
|
TAG_ID = 'runtime-id'.freeze
|
8
8
|
TAG_LANG = 'language'.freeze
|
9
|
+
TAG_PID = 'system.pid'.freeze
|
9
10
|
|
10
11
|
# Metrics
|
11
12
|
module Metrics
|
@@ -14,6 +15,8 @@ module Datadog
|
|
14
15
|
METRIC_CLASS_COUNT = 'runtime.ruby.class_count'.freeze
|
15
16
|
METRIC_GC_PREFIX = 'runtime.ruby.gc'.freeze
|
16
17
|
METRIC_THREAD_COUNT = 'runtime.ruby.thread_count'.freeze
|
18
|
+
METRIC_GLOBAL_CONSTANT_STATE = 'runtime.ruby.global_constant_state'.freeze
|
19
|
+
METRIC_GLOBAL_METHOD_STATE = 'runtime.ruby.global_method_state'.freeze
|
17
20
|
|
18
21
|
TAG_SERVICE = 'service'.freeze
|
19
22
|
end
|
@@ -3,6 +3,7 @@ module Datadog
|
|
3
3
|
module Ext
|
4
4
|
module Transport
|
5
5
|
module HTTP
|
6
|
+
ADAPTER = :net_http # DEV: Rename to simply `:http`, as Net::HTTP is an implementation detail.
|
6
7
|
DEFAULT_HOST = '127.0.0.1'.freeze
|
7
8
|
DEFAULT_PORT = 8126
|
8
9
|
DEFAULT_TIMEOUT_SECONDS = 1
|
@@ -16,6 +17,16 @@ module Datadog
|
|
16
17
|
HEADER_META_LANG_INTERPRETER = 'Datadog-Meta-Lang-Interpreter'.freeze
|
17
18
|
HEADER_META_TRACER_VERSION = 'Datadog-Meta-Tracer-Version'.freeze
|
18
19
|
end
|
20
|
+
|
21
|
+
module Test
|
22
|
+
ADAPTER = :test
|
23
|
+
end
|
24
|
+
|
25
|
+
module UnixSocket
|
26
|
+
ADAPTER = :unix
|
27
|
+
DEFAULT_PATH = '/var/run/datadog/apm.socket'.freeze
|
28
|
+
DEFAULT_TIMEOUT_SECONDS = 1
|
29
|
+
end
|
19
30
|
end
|
20
31
|
end
|
21
32
|
end
|
@@ -18,6 +18,12 @@ module Datadog
|
|
18
18
|
DEFAULT_MAX_TIME_USAGE_PCT = 2.0
|
19
19
|
MIN_INTERVAL = 0.01
|
20
20
|
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
21
|
+
THREAD_LAST_WALL_CLOCK_KEY = :datadog_profiler_last_wall_clock
|
22
|
+
|
23
|
+
# This default was picked based on the current sampling performance and on expected concurrency on an average
|
24
|
+
# Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising
|
25
|
+
# optimizes for coverage (less chance to miss what a given thread is doing).
|
26
|
+
DEFAULT_MAX_THREADS_SAMPLED = 16
|
21
27
|
|
22
28
|
attr_reader \
|
23
29
|
:recorder,
|
@@ -33,6 +39,7 @@ module Datadog
|
|
33
39
|
trace_identifiers_helper:, # Usually an instance of Datadog::Profiling::TraceIdentifiers::Helper
|
34
40
|
ignore_thread: nil,
|
35
41
|
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
42
|
+
max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
|
36
43
|
thread_api: Thread,
|
37
44
|
fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
38
45
|
interval: MIN_INTERVAL,
|
@@ -43,6 +50,7 @@ module Datadog
|
|
43
50
|
@trace_identifiers_helper = trace_identifiers_helper
|
44
51
|
@ignore_thread = ignore_thread
|
45
52
|
@max_time_usage_pct = max_time_usage_pct
|
53
|
+
@max_threads_sampled = max_threads_sampled
|
46
54
|
@thread_api = thread_api
|
47
55
|
|
48
56
|
# Workers::Async::Thread settings
|
@@ -60,10 +68,13 @@ module Datadog
|
|
60
68
|
@build_backtrace_location = method(:build_backtrace_location).to_proc
|
61
69
|
# Cache this buffer, since it's pretty expensive to keep accessing it
|
62
70
|
@stack_sample_event_recorder = recorder[Events::StackSample]
|
71
|
+
# See below for details on why this is needed
|
72
|
+
@needs_process_waiter_workaround =
|
73
|
+
Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3') &&
|
74
|
+
Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7')
|
63
75
|
end
|
64
76
|
|
65
77
|
def start
|
66
|
-
@last_wall_time = Datadog::Utils::Time.get_time
|
67
78
|
reset_cpu_time_tracking
|
68
79
|
perform
|
69
80
|
end
|
@@ -87,24 +98,14 @@ module Datadog
|
|
87
98
|
|
88
99
|
def collect_events
|
89
100
|
events = []
|
90
|
-
|
91
|
-
# Compute wall time interval
|
92
|
-
current_wall_time = Datadog::Utils::Time.get_time
|
93
|
-
last_wall_time = if instance_variable_defined?(:@last_wall_time)
|
94
|
-
@last_wall_time
|
95
|
-
else
|
96
|
-
current_wall_time
|
97
|
-
end
|
98
|
-
|
99
|
-
wall_time_interval_ns = ((current_wall_time - last_wall_time).round(9) * 1e9).to_i
|
100
|
-
@last_wall_time = current_wall_time
|
101
|
+
current_wall_time_ns = get_current_wall_time_timestamp_ns
|
101
102
|
|
102
103
|
# Collect backtraces from each thread
|
103
|
-
|
104
|
+
threads_to_sample.each do |thread|
|
104
105
|
next unless thread.alive?
|
105
106
|
next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
|
106
107
|
|
107
|
-
event = collect_thread_event(thread,
|
108
|
+
event = collect_thread_event(thread, current_wall_time_ns)
|
108
109
|
events << event unless event.nil?
|
109
110
|
end
|
110
111
|
|
@@ -114,7 +115,7 @@ module Datadog
|
|
114
115
|
events
|
115
116
|
end
|
116
117
|
|
117
|
-
def collect_thread_event(thread,
|
118
|
+
def collect_thread_event(thread, current_wall_time_ns)
|
118
119
|
locations = thread.backtrace_locations
|
119
120
|
return if locations.nil?
|
120
121
|
|
@@ -126,8 +127,10 @@ module Datadog
|
|
126
127
|
locations = convert_backtrace_locations(locations)
|
127
128
|
|
128
129
|
thread_id = thread.respond_to?(:pthread_thread_id) ? thread.pthread_thread_id : thread.object_id
|
129
|
-
trace_id, span_id,
|
130
|
+
trace_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
|
130
131
|
cpu_time = get_cpu_time_interval!(thread)
|
132
|
+
wall_time_interval_ns =
|
133
|
+
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)
|
131
134
|
|
132
135
|
Events::StackSample.new(
|
133
136
|
nil,
|
@@ -136,7 +139,7 @@ module Datadog
|
|
136
139
|
thread_id,
|
137
140
|
trace_id,
|
138
141
|
span_id,
|
139
|
-
|
142
|
+
trace_resource,
|
140
143
|
cpu_time,
|
141
144
|
wall_time_interval_ns
|
142
145
|
)
|
@@ -156,14 +159,7 @@ module Datadog
|
|
156
159
|
# *before* the thread had time to finish the initialization
|
157
160
|
return unless current_cpu_time_ns
|
158
161
|
|
159
|
-
|
160
|
-
interval = current_cpu_time_ns - last_cpu_time_ns
|
161
|
-
|
162
|
-
# Update CPU time for thread
|
163
|
-
thread.thread_variable_set(THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
|
164
|
-
|
165
|
-
# Return interval
|
166
|
-
interval
|
162
|
+
get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
|
167
163
|
end
|
168
164
|
|
169
165
|
def compute_wait_time(used_time)
|
@@ -237,10 +233,10 @@ module Datadog
|
|
237
233
|
end
|
238
234
|
|
239
235
|
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
240
|
-
# clean up any leftover per-thread
|
236
|
+
# clean up any leftover per-thread counters, so that the first sample after starting doesn't end up with:
|
241
237
|
#
|
242
238
|
# a) negative time: At least on my test docker container, and on the reliability environment, after the process
|
243
|
-
# forks, the
|
239
|
+
# forks, the cpu time reference changes and (old cpu time - new cpu time) can be < 0
|
244
240
|
#
|
245
241
|
# b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
|
246
242
|
# restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
|
@@ -248,9 +244,57 @@ module Datadog
|
|
248
244
|
# By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
|
249
245
|
def reset_cpu_time_tracking
|
250
246
|
thread_api.list.each do |thread|
|
247
|
+
# See below for details on why this is needed
|
248
|
+
next if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
249
|
+
|
251
250
|
thread.thread_variable_set(THREAD_LAST_CPU_TIME_KEY, nil)
|
251
|
+
thread.thread_variable_set(THREAD_LAST_WALL_CLOCK_KEY, nil)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def get_elapsed_since_last_sample_and_set_value(thread, key, current_value)
|
256
|
+
# See cthread.rb for more details, but this is a workaround for https://bugs.ruby-lang.org/issues/17807 ;
|
257
|
+
# using all thread_variable related methods on these instances also triggers a crash and for now we just
|
258
|
+
# skip it for the affected Rubies
|
259
|
+
return 0 if @needs_process_waiter_workaround && thread.is_a?(::Process::Waiter)
|
260
|
+
|
261
|
+
last_value = thread.thread_variable_get(key) || current_value
|
262
|
+
thread.thread_variable_set(key, current_value)
|
263
|
+
|
264
|
+
current_value - last_value
|
265
|
+
end
|
266
|
+
|
267
|
+
# Whenever there are more than max_threads_sampled active, we only sample a subset of them.
|
268
|
+
# We do this to avoid impacting the latency of the service being profiled. We want to avoid doing
|
269
|
+
# a big burst of work all at once (sample everything), and instead do a little work each time
|
270
|
+
# (sample a bit by bit).
|
271
|
+
#
|
272
|
+
# Because we pick the threads to sample randomly, we'll eventually sample all threads -- just not at once.
|
273
|
+
# Notice also that this will interact with our dynamic sampling mechanism -- if samples are faster, we take
|
274
|
+
# them more often, if they are slower, we take them less often -- which again means that over a longer period
|
275
|
+
# we should take sample roughly the same samples.
|
276
|
+
#
|
277
|
+
# One downside of this approach is that if there really are many threads, the resulting wall clock times
|
278
|
+
# in a one minute profile may "drift" around the 60 second mark, e.g. maybe we only sampled a thread once per
|
279
|
+
# second and only 59 times, so we'll report 59s, but on the next report we'll include the missing one, so
|
280
|
+
# then the result will be 61s. I've observed 60 +- 1.68 secs for an app with ~65 threads, given the
|
281
|
+
# default maximum of 16 threads. This seems a reasonable enough margin of error given the improvement to
|
282
|
+
# latency (especially on such a large application! -> even bigger latency impact if we tried to sample all
|
283
|
+
# threads).
|
284
|
+
#
|
285
|
+
def threads_to_sample
|
286
|
+
all_threads = thread_api.list
|
287
|
+
|
288
|
+
if all_threads.size > @max_threads_sampled
|
289
|
+
all_threads.sample(@max_threads_sampled)
|
290
|
+
else
|
291
|
+
all_threads
|
252
292
|
end
|
253
293
|
end
|
294
|
+
|
295
|
+
def get_current_wall_time_timestamp_ns
|
296
|
+
Datadog::Utils::Time.get_time(:nanosecond)
|
297
|
+
end
|
254
298
|
end
|
255
299
|
end
|
256
300
|
end
|
@@ -24,8 +24,16 @@ module Datadog
|
|
24
24
|
flush.event_groups.each { |event_group| template.add_events!(event_group.event_class, event_group.events) }
|
25
25
|
|
26
26
|
Datadog.logger.debug do
|
27
|
+
max_events = Datadog.configuration.profiling.advanced.max_events
|
28
|
+
events_sampled =
|
29
|
+
if flush.event_count == max_events
|
30
|
+
'max events limit hit, events were sampled [profile will be biased], '
|
31
|
+
else
|
32
|
+
''
|
33
|
+
end
|
34
|
+
|
27
35
|
"Encoding profile covering #{flush.start.iso8601} to #{flush.finish.iso8601}, " \
|
28
|
-
"events: #{flush.event_count} (#{template.debug_statistics})"
|
36
|
+
"events: #{flush.event_count} (#{events_sampled}#{template.debug_statistics})"
|
29
37
|
end
|
30
38
|
|
31
39
|
# Build the profile and encode it
|
@@ -13,7 +13,7 @@ module Datadog
|
|
13
13
|
:thread_id,
|
14
14
|
:trace_id,
|
15
15
|
:span_id,
|
16
|
-
:
|
16
|
+
:trace_resource
|
17
17
|
|
18
18
|
def initialize(
|
19
19
|
timestamp,
|
@@ -22,7 +22,7 @@ module Datadog
|
|
22
22
|
thread_id,
|
23
23
|
trace_id,
|
24
24
|
span_id,
|
25
|
-
|
25
|
+
trace_resource
|
26
26
|
)
|
27
27
|
super(timestamp)
|
28
28
|
|
@@ -31,14 +31,14 @@ module Datadog
|
|
31
31
|
@thread_id = thread_id
|
32
32
|
@trace_id = trace_id
|
33
33
|
@span_id = span_id
|
34
|
-
@
|
34
|
+
@trace_resource = trace_resource
|
35
35
|
|
36
36
|
@hash = [
|
37
37
|
thread_id,
|
38
38
|
trace_id,
|
39
39
|
span_id,
|
40
|
-
#
|
41
|
-
#
|
40
|
+
# trace_resource is deliberately not included -- events that share the same (trace_id, span_id)
|
41
|
+
# trace_resource might not match between pairs, but they refer to the same trace.
|
42
42
|
frames.collect(&:hash),
|
43
43
|
total_frame_count
|
44
44
|
].hash
|
@@ -58,7 +58,7 @@ module Datadog
|
|
58
58
|
thread_id,
|
59
59
|
trace_id,
|
60
60
|
span_id,
|
61
|
-
|
61
|
+
trace_resource,
|
62
62
|
cpu_time_interval_ns,
|
63
63
|
wall_time_interval_ns
|
64
64
|
)
|
@@ -69,7 +69,7 @@ module Datadog
|
|
69
69
|
thread_id,
|
70
70
|
trace_id,
|
71
71
|
span_id,
|
72
|
-
|
72
|
+
trace_resource
|
73
73
|
)
|
74
74
|
|
75
75
|
@cpu_time_interval_ns = cpu_time_interval_ns
|
@@ -25,20 +25,19 @@ module Datadog
|
|
25
25
|
# [key, EventGroup]
|
26
26
|
event_groups = {}
|
27
27
|
|
28
|
+
# Aggregate each event into a group
|
29
|
+
# with identical properties, but different values.
|
28
30
|
events.each do |event|
|
29
31
|
key = yield(event)
|
30
|
-
values =
|
32
|
+
values = build_event_values(event)
|
31
33
|
|
32
34
|
unless key.nil?
|
33
35
|
if event_groups.key?(key)
|
34
|
-
# Update
|
35
|
-
|
36
|
-
group_values.each_with_index do |group_value, i|
|
37
|
-
group_values[i] = group_value + values[i]
|
38
|
-
end
|
36
|
+
# Update existing group from event
|
37
|
+
update_group(event_groups[key], event, values)
|
39
38
|
else
|
40
39
|
# Add new group
|
41
|
-
event_groups[key] =
|
40
|
+
event_groups[key] = new_group(event, values)
|
42
41
|
end
|
43
42
|
end
|
44
43
|
end
|
@@ -57,7 +56,7 @@ module Datadog
|
|
57
56
|
index
|
58
57
|
end
|
59
58
|
|
60
|
-
def
|
59
|
+
def build_event_values(event)
|
61
60
|
# Build a value array that matches the length of the sample types
|
62
61
|
# Populate all values with "no value" by default
|
63
62
|
Array.new(@sample_type_mappings.length, Datadog::Ext::Profiling::Pprof::SAMPLE_VALUE_NO_VALUE)
|
@@ -69,7 +68,7 @@ module Datadog
|
|
69
68
|
|
70
69
|
# Represents a grouped event
|
71
70
|
# 'sample' is an example event object from the group.
|
72
|
-
# 'values' is the
|
71
|
+
# 'values' is the summation of the group's sample values
|
73
72
|
EventGroup = Struct.new(:sample, :values)
|
74
73
|
|
75
74
|
# Error when the mapping of a sample type to value index is unknown
|
@@ -84,6 +83,20 @@ module Datadog
|
|
84
83
|
"Mapping for sample value type '#{type}' to index is unknown."
|
85
84
|
end
|
86
85
|
end
|
86
|
+
|
87
|
+
protected
|
88
|
+
|
89
|
+
def new_group(event, values)
|
90
|
+
EventGroup.new(event, values)
|
91
|
+
end
|
92
|
+
|
93
|
+
def update_group(event_group, event, values)
|
94
|
+
# Update values for group
|
95
|
+
group_values = event_group.values
|
96
|
+
group_values.each_with_index do |group_value, i|
|
97
|
+
group_values[i] = group_value + values[i]
|
98
|
+
end
|
99
|
+
end
|
87
100
|
end
|
88
101
|
end
|
89
102
|
end
|
@@ -30,6 +30,7 @@ module Datadog
|
|
30
30
|
def initialize(*_)
|
31
31
|
super
|
32
32
|
|
33
|
+
@most_recent_trace_samples = {}
|
33
34
|
@processed_unique_stacks = 0
|
34
35
|
@processed_with_trace_ids = 0
|
35
36
|
end
|
@@ -40,9 +41,28 @@ module Datadog
|
|
40
41
|
end
|
41
42
|
|
42
43
|
def stack_sample_group_key(stack_sample)
|
44
|
+
# We want to make sure we have the most recent sample for any trace.
|
45
|
+
# (This is done here to save an iteration over all samples.)
|
46
|
+
update_most_recent_trace_sample(stack_sample)
|
47
|
+
|
43
48
|
stack_sample.hash
|
44
49
|
end
|
45
50
|
|
51
|
+
# Track the most recent sample for each trace
|
52
|
+
def update_most_recent_trace_sample(stack_sample)
|
53
|
+
return unless stack_sample.trace_id && stack_sample.trace_resource
|
54
|
+
|
55
|
+
# Update trace resource with most recent value
|
56
|
+
if (most_recent_trace_sample = @most_recent_trace_samples[stack_sample.trace_id])
|
57
|
+
if most_recent_trace_sample.timestamp < stack_sample.timestamp
|
58
|
+
@most_recent_trace_samples[stack_sample.trace_id] = stack_sample
|
59
|
+
end
|
60
|
+
else
|
61
|
+
# Add trace resource
|
62
|
+
@most_recent_trace_samples[stack_sample.trace_id] = stack_sample
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
46
66
|
def build_samples(stack_samples)
|
47
67
|
groups = group_events(stack_samples, &method(:stack_sample_group_key))
|
48
68
|
groups.collect do |_group_key, group|
|
@@ -64,7 +84,7 @@ module Datadog
|
|
64
84
|
)
|
65
85
|
end
|
66
86
|
|
67
|
-
def
|
87
|
+
def build_event_values(stack_sample)
|
68
88
|
no_value = Datadog::Ext::Profiling::Pprof::SAMPLE_VALUE_NO_VALUE
|
69
89
|
values = super(stack_sample)
|
70
90
|
values[sample_value_index(:cpu_time_ns)] = stack_sample.cpu_time_interval_ns || no_value
|
@@ -96,7 +116,13 @@ module Datadog
|
|
96
116
|
str: builder.string_table.fetch(span_id.to_s)
|
97
117
|
)
|
98
118
|
|
99
|
-
|
119
|
+
# Use most up-to-date trace resource, if available.
|
120
|
+
# Otherwise, use the trace resource provided.
|
121
|
+
trace_resource = (
|
122
|
+
@most_recent_trace_samples[stack_sample.trace_id] \
|
123
|
+
|| stack_sample
|
124
|
+
).trace_resource
|
125
|
+
|
100
126
|
if trace_resource && !trace_resource.empty?
|
101
127
|
labels << Perftools::Profiles::Label.new(
|
102
128
|
key: builder.string_table.fetch(Datadog::Ext::Profiling::Pprof::LABEL_KEY_TRACE_ENDPOINT),
|
@@ -33,7 +33,7 @@ module Datadog
|
|
33
33
|
def maybe_extract_resource(root_span)
|
34
34
|
return unless root_span
|
35
35
|
|
36
|
-
root_span.
|
36
|
+
root_span.resource if root_span.span_type == Datadog::Ext::HTTP::TYPE_INBOUND
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -20,10 +20,10 @@ module Datadog
|
|
20
20
|
def initialize(
|
21
21
|
tracer:,
|
22
22
|
# If this is disabled, the helper will strip the optional trace_resource_container even if provided by the api
|
23
|
-
|
23
|
+
endpoint_collection_enabled:,
|
24
24
|
supported_apis: DEFAULT_SUPPORTED_APIS.map { |api| api.new(tracer: tracer) }
|
25
25
|
)
|
26
|
-
@
|
26
|
+
@endpoint_collection_enabled = endpoint_collection_enabled
|
27
27
|
@supported_apis = supported_apis
|
28
28
|
end
|
29
29
|
|
@@ -34,7 +34,7 @@ module Datadog
|
|
34
34
|
trace_identifiers = api.trace_identifiers_for(thread)
|
35
35
|
|
36
36
|
if trace_identifiers
|
37
|
-
return @
|
37
|
+
return @endpoint_collection_enabled ? trace_identifiers : trace_identifiers[0..1]
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|