ddtrace 0.47.0 → 0.48.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +4 -2
- data/.circleci/images/primary/Dockerfile-2.0.0 +11 -1
- data/.circleci/images/primary/Dockerfile-2.1.10 +11 -1
- data/.circleci/images/primary/Dockerfile-2.2.10 +11 -1
- data/.circleci/images/primary/Dockerfile-2.3.8 +10 -0
- data/.circleci/images/primary/Dockerfile-2.4.6 +10 -0
- data/.circleci/images/primary/Dockerfile-2.5.6 +10 -0
- data/.circleci/images/primary/Dockerfile-2.6.4 +10 -0
- data/.circleci/images/primary/Dockerfile-2.7.0 +10 -0
- data/.circleci/images/primary/Dockerfile-jruby-9.2-latest +10 -0
- data/.gitlab-ci.yml +18 -18
- data/.rubocop.yml +19 -0
- data/.rubocop_todo.yml +44 -3
- data/Appraisals +55 -1
- data/CHANGELOG.md +47 -1
- data/Gemfile +10 -0
- data/Rakefile +9 -0
- data/bin/ddtracerb +15 -0
- data/ddtrace.gemspec +4 -2
- data/docs/GettingStarted.md +36 -53
- data/docs/ProfilingDevelopment.md +88 -0
- data/integration/README.md +1 -2
- data/integration/apps/rack/Dockerfile +3 -0
- data/integration/apps/rack/script/build-images +1 -1
- data/integration/apps/rack/script/ci +1 -1
- data/integration/apps/rails-five/script/build-images +1 -1
- data/integration/apps/rails-five/script/ci +1 -1
- data/integration/apps/ruby/script/build-images +1 -1
- data/integration/apps/ruby/script/ci +1 -1
- data/integration/images/include/http-health-check +1 -1
- data/integration/images/wrk/scripts/entrypoint.sh +1 -1
- data/integration/script/build-images +1 -1
- data/lib/ddtrace.rb +1 -0
- data/lib/ddtrace/configuration.rb +39 -13
- data/lib/ddtrace/configuration/components.rb +85 -3
- data/lib/ddtrace/configuration/settings.rb +31 -0
- data/lib/ddtrace/contrib/active_record/configuration/makara_resolver.rb +30 -0
- data/lib/ddtrace/contrib/active_record/configuration/resolver.rb +9 -3
- data/lib/ddtrace/contrib/resque/configuration/settings.rb +17 -1
- data/lib/ddtrace/contrib/resque/patcher.rb +4 -4
- data/lib/ddtrace/contrib/resque/resque_job.rb +22 -1
- data/lib/ddtrace/contrib/shoryuken/configuration/settings.rb +1 -0
- data/lib/ddtrace/contrib/shoryuken/tracer.rb +7 -3
- data/lib/ddtrace/diagnostics/environment_logger.rb +1 -1
- data/lib/ddtrace/error.rb +2 -0
- data/lib/ddtrace/ext/profiling.rb +52 -0
- data/lib/ddtrace/ext/transport.rb +1 -0
- data/lib/ddtrace/metrics.rb +4 -0
- data/lib/ddtrace/profiling.rb +54 -0
- data/lib/ddtrace/profiling/backtrace_location.rb +32 -0
- data/lib/ddtrace/profiling/buffer.rb +41 -0
- data/lib/ddtrace/profiling/collectors/stack.rb +253 -0
- data/lib/ddtrace/profiling/encoding/profile.rb +31 -0
- data/lib/ddtrace/profiling/event.rb +13 -0
- data/lib/ddtrace/profiling/events/stack.rb +102 -0
- data/lib/ddtrace/profiling/exporter.rb +23 -0
- data/lib/ddtrace/profiling/ext/cpu.rb +54 -0
- data/lib/ddtrace/profiling/ext/cthread.rb +134 -0
- data/lib/ddtrace/profiling/ext/forking.rb +97 -0
- data/lib/ddtrace/profiling/flush.rb +41 -0
- data/lib/ddtrace/profiling/pprof/builder.rb +121 -0
- data/lib/ddtrace/profiling/pprof/converter.rb +85 -0
- data/lib/ddtrace/profiling/pprof/message_set.rb +12 -0
- data/lib/ddtrace/profiling/pprof/payload.rb +18 -0
- data/lib/ddtrace/profiling/pprof/pprof.proto +212 -0
- data/lib/ddtrace/profiling/pprof/pprof_pb.rb +81 -0
- data/lib/ddtrace/profiling/pprof/stack_sample.rb +90 -0
- data/lib/ddtrace/profiling/pprof/string_table.rb +10 -0
- data/lib/ddtrace/profiling/pprof/template.rb +114 -0
- data/lib/ddtrace/profiling/preload.rb +3 -0
- data/lib/ddtrace/profiling/profiler.rb +28 -0
- data/lib/ddtrace/profiling/recorder.rb +87 -0
- data/lib/ddtrace/profiling/scheduler.rb +84 -0
- data/lib/ddtrace/profiling/tasks/setup.rb +77 -0
- data/lib/ddtrace/profiling/transport/client.rb +12 -0
- data/lib/ddtrace/profiling/transport/http.rb +122 -0
- data/lib/ddtrace/profiling/transport/http/api.rb +43 -0
- data/lib/ddtrace/profiling/transport/http/api/endpoint.rb +90 -0
- data/lib/ddtrace/profiling/transport/http/api/instance.rb +36 -0
- data/lib/ddtrace/profiling/transport/http/api/spec.rb +40 -0
- data/lib/ddtrace/profiling/transport/http/builder.rb +28 -0
- data/lib/ddtrace/profiling/transport/http/client.rb +33 -0
- data/lib/ddtrace/profiling/transport/http/response.rb +21 -0
- data/lib/ddtrace/profiling/transport/io.rb +30 -0
- data/lib/ddtrace/profiling/transport/io/client.rb +27 -0
- data/lib/ddtrace/profiling/transport/io/response.rb +16 -0
- data/lib/ddtrace/profiling/transport/parcel.rb +17 -0
- data/lib/ddtrace/profiling/transport/request.rb +15 -0
- data/lib/ddtrace/profiling/transport/response.rb +8 -0
- data/lib/ddtrace/runtime/container.rb +11 -3
- data/lib/ddtrace/sampling/rule_sampler.rb +3 -9
- data/lib/ddtrace/tasks/exec.rb +48 -0
- data/lib/ddtrace/tasks/help.rb +14 -0
- data/lib/ddtrace/tracer.rb +21 -0
- data/lib/ddtrace/transport/io/client.rb +15 -8
- data/lib/ddtrace/transport/parcel.rb +4 -0
- data/lib/ddtrace/version.rb +3 -1
- data/lib/ddtrace/workers/runtime_metrics.rb +14 -1
- metadata +70 -9
@@ -0,0 +1,30 @@
|
|
1
|
+
module Datadog
|
2
|
+
module Contrib
|
3
|
+
module ActiveRecord
|
4
|
+
module Configuration
|
5
|
+
# The `makara` gem has the concept of **role**, which can be
|
6
|
+
# inferred from the configuration `name`, in the form of:
|
7
|
+
# `master/0`, `replica/0`, `replica/1`, etc.
|
8
|
+
# The first part of this string is the database role.
|
9
|
+
#
|
10
|
+
# This allows the matching of a connection based on its role,
|
11
|
+
# instead of connection-specific information.
|
12
|
+
module MakaraResolver
|
13
|
+
def normalize_for_config(active_record_config)
|
14
|
+
hash = super
|
15
|
+
hash[:makara_role] = active_record_config[:makara_role]
|
16
|
+
hash
|
17
|
+
end
|
18
|
+
|
19
|
+
def normalize_for_resolve(active_record_config)
|
20
|
+
hash = super
|
21
|
+
|
22
|
+
hash[:makara_role] = active_record_config[:name].split('/')[0].to_s if active_record_config[:name].is_a?(String)
|
23
|
+
|
24
|
+
hash
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'ddtrace/contrib/configuration/resolver'
|
2
2
|
require 'ddtrace/vendor/active_record/connection_specification'
|
3
|
+
require_relative 'makara_resolver'
|
3
4
|
|
4
5
|
module Datadog
|
5
6
|
module Contrib
|
@@ -28,6 +29,8 @@ module Datadog
|
|
28
29
|
# When more than one configuration could be matched, the last one to match is selected,
|
29
30
|
# based on addition order (`#add`).
|
30
31
|
class Resolver < Contrib::Configuration::Resolver
|
32
|
+
prepend MakaraResolver
|
33
|
+
|
31
34
|
def initialize(active_record_configuration = nil)
|
32
35
|
super()
|
33
36
|
|
@@ -50,7 +53,7 @@ module Datadog
|
|
50
53
|
def resolve(db_config)
|
51
54
|
active_record_config = resolve_connection_key(db_config).symbolize_keys
|
52
55
|
|
53
|
-
hash =
|
56
|
+
hash = normalize_for_resolve(active_record_config)
|
54
57
|
|
55
58
|
# Hashes in Ruby maintain insertion order
|
56
59
|
_, config = @configurations.reverse_each.find do |matcher, _|
|
@@ -73,7 +76,7 @@ module Datadog
|
|
73
76
|
|
74
77
|
def parse_matcher(matcher)
|
75
78
|
resolved_pattern = resolve_connection_key(matcher).symbolize_keys
|
76
|
-
normalized =
|
79
|
+
normalized = normalize_for_config(resolved_pattern)
|
77
80
|
|
78
81
|
# Remove empty fields to allow for partial matching
|
79
82
|
normalized.reject! { |_, v| v.nil? }
|
@@ -112,7 +115,7 @@ module Datadog
|
|
112
115
|
|
113
116
|
# Extract only fields we'd like to match
|
114
117
|
# from the ActiveRecord configuration.
|
115
|
-
def
|
118
|
+
def normalize_for_config(active_record_config)
|
116
119
|
{
|
117
120
|
adapter: active_record_config[:adapter],
|
118
121
|
host: active_record_config[:host],
|
@@ -121,6 +124,9 @@ module Datadog
|
|
121
124
|
username: active_record_config[:username]
|
122
125
|
}
|
123
126
|
end
|
127
|
+
|
128
|
+
# Both resolvers perform the same operations for this implementation, but can be specialized
|
129
|
+
alias_method :normalize_for_resolve, :normalize_for_config
|
124
130
|
end
|
125
131
|
end
|
126
132
|
end
|
@@ -23,7 +23,23 @@ module Datadog
|
|
23
23
|
end
|
24
24
|
|
25
25
|
option :service_name, default: Ext::SERVICE_NAME
|
26
|
-
|
26
|
+
|
27
|
+
# A list Ruby worker classes to be instrumented.
|
28
|
+
# The value of `nil` has special semantics: it instruments all workers dynamically.
|
29
|
+
#
|
30
|
+
# TODO: 1.0: Automatic patching should be the default behavior.
|
31
|
+
# We should not provide this option in the future,
|
32
|
+
# as our integrations should always instrument all possible scenarios when feasible.
|
33
|
+
option :workers, default: nil do |o|
|
34
|
+
o.on_set do |value|
|
35
|
+
unless value.nil?
|
36
|
+
Datadog.logger.warn(
|
37
|
+
"DEPRECATED: Resque integration now instruments all workers. \n" \
|
38
|
+
'The `workers:` option is unnecessary and will be removed in the future.'
|
39
|
+
)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
27
43
|
option :error_handler, default: Datadog::Tracer::DEFAULT_ON_ERROR
|
28
44
|
end
|
29
45
|
end
|
@@ -17,11 +17,11 @@ module Datadog
|
|
17
17
|
|
18
18
|
def patch
|
19
19
|
require_relative 'resque_job'
|
20
|
-
get_option(:workers).each { |worker| worker.extend(ResqueJob) }
|
21
|
-
end
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
::Resque::Job.send(:prepend, Resque::Job)
|
22
|
+
|
23
|
+
workers = Datadog.configuration[:resque][:workers] || []
|
24
|
+
workers.each { |worker| worker.extend(ResqueJob) }
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
@@ -7,9 +7,30 @@ require 'resque'
|
|
7
7
|
module Datadog
|
8
8
|
module Contrib
|
9
9
|
module Resque
|
10
|
+
# Automatically configures jobs with {ResqueJob} plugin.
|
11
|
+
module Job
|
12
|
+
def perform
|
13
|
+
if Datadog.configuration[:resque][:workers].nil?
|
14
|
+
job = payload_class
|
15
|
+
job.extend(Datadog::Contrib::Resque::ResqueJob) unless job.is_a? Datadog::Contrib::Resque::ResqueJob
|
16
|
+
end
|
17
|
+
ensure
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
10
22
|
# Uses Resque job hooks to create traces
|
11
23
|
module ResqueJob
|
12
|
-
|
24
|
+
# `around_perform` hooks are executed in alphabetical order.
|
25
|
+
# we use the lowest printable character that allows for an inline
|
26
|
+
# method definition ('0'), alongside our naming prefix for identification.
|
27
|
+
#
|
28
|
+
# We could, in theory, use any character (e.g "\x00"), but this will lead
|
29
|
+
# to unreadable stack traces that contain this method call.
|
30
|
+
#
|
31
|
+
# We could also just use `around_perform` but this might override the user's
|
32
|
+
# own method.
|
33
|
+
def around_perform0_ddtrace(*args)
|
13
34
|
return yield unless datadog_configuration && tracer
|
14
35
|
|
15
36
|
tracer.trace(Ext::SPAN_JOB, span_options) do |span|
|
@@ -12,8 +12,12 @@ module Datadog
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def call(worker_instance, queue, sqs_msg, body)
|
15
|
-
@tracer.trace(
|
16
|
-
|
15
|
+
@tracer.trace(
|
16
|
+
Ext::SPAN_JOB,
|
17
|
+
service: @shoryuken_service,
|
18
|
+
span_type: Datadog::Ext::AppTypes::WORKER,
|
19
|
+
on_error: @error_handler
|
20
|
+
) do |span|
|
17
21
|
# Set analytics sample rate
|
18
22
|
if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
|
19
23
|
Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
|
@@ -26,7 +30,7 @@ module Datadog
|
|
26
30
|
span.set_tag(Ext::TAG_JOB_ID, sqs_msg.message_id)
|
27
31
|
span.set_tag(Ext::TAG_JOB_QUEUE, queue)
|
28
32
|
span.set_tag(Ext::TAG_JOB_ATTRIBUTES, sqs_msg.attributes) if sqs_msg.respond_to?(:attributes)
|
29
|
-
span.set_tag(Ext::TAG_JOB_BODY, body)
|
33
|
+
span.set_tag(Ext::TAG_JOB_BODY, body) if configuration[:tag_body]
|
30
34
|
|
31
35
|
yield
|
32
36
|
end
|
@@ -12,7 +12,7 @@ module Datadog
|
|
12
12
|
# Outputs environment information to {Datadog.logger}.
|
13
13
|
# Executes only for the lifetime of the program.
|
14
14
|
def log!(transport_responses)
|
15
|
-
return if @executed || !log?
|
15
|
+
return if (defined?(@executed) && @executed) || !log?
|
16
16
|
|
17
17
|
@executed = true
|
18
18
|
|
data/lib/ddtrace/error.rb
CHANGED
@@ -16,6 +16,8 @@ module Datadog
|
|
16
16
|
|
17
17
|
def initialize(type = nil, message = nil, backtrace = nil)
|
18
18
|
backtrace = Array(backtrace).join("\n")
|
19
|
+
|
20
|
+
# DEV: We should measure if `Utils.utf8_encode` is still needed in practice.
|
19
21
|
@type = Utils.utf8_encode(type)
|
20
22
|
@message = Utils.utf8_encode(message)
|
21
23
|
@backtrace = Utils.utf8_encode(backtrace)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Datadog
|
2
|
+
module Ext
|
3
|
+
module Profiling
|
4
|
+
ENV_ENABLED = 'DD_PROFILING_ENABLED'.freeze
|
5
|
+
ENV_UPLOAD_TIMEOUT = 'DD_PROFILING_UPLOAD_TIMEOUT'.freeze
|
6
|
+
ENV_MAX_FRAMES = 'DD_PROFILING_MAX_FRAMES'.freeze
|
7
|
+
|
8
|
+
module Pprof
|
9
|
+
LABEL_KEY_SPAN_ID = 'span id'.freeze
|
10
|
+
LABEL_KEY_THREAD_ID = 'thread id'.freeze
|
11
|
+
LABEL_KEY_TRACE_ID = 'trace id'.freeze
|
12
|
+
SAMPLE_VALUE_NO_VALUE = 0
|
13
|
+
VALUE_TYPE_CPU = 'cpu-time'.freeze
|
14
|
+
VALUE_TYPE_WALL = 'wall-time'.freeze
|
15
|
+
VALUE_UNIT_NANOSECONDS = 'nanoseconds'.freeze
|
16
|
+
end
|
17
|
+
|
18
|
+
module Transport
|
19
|
+
module HTTP
|
20
|
+
URI_TEMPLATE_DD_API = 'https://intake.profile.%s/'.freeze
|
21
|
+
|
22
|
+
FORM_FIELD_DATA = 'data[0]'.freeze
|
23
|
+
FORM_FIELD_FORMAT = 'format'.freeze
|
24
|
+
FORM_FIELD_FORMAT_PPROF = 'pprof'.freeze
|
25
|
+
FORM_FIELD_RECORDING_END = 'recording-end'.freeze
|
26
|
+
FORM_FIELD_RECORDING_START = 'recording-start'.freeze
|
27
|
+
FORM_FIELD_RUNTIME = 'runtime'.freeze
|
28
|
+
FORM_FIELD_RUNTIME_ID = 'runtime-id'.freeze
|
29
|
+
FORM_FIELD_TAG_ENV = 'env'.freeze
|
30
|
+
FORM_FIELD_TAG_HOST = 'host'.freeze
|
31
|
+
FORM_FIELD_TAG_LANGUAGE = 'language'.freeze
|
32
|
+
FORM_FIELD_TAG_PROFILER_VERSION = 'profiler_version'.freeze
|
33
|
+
FORM_FIELD_TAG_RUNTIME = 'runtime'.freeze
|
34
|
+
FORM_FIELD_TAG_RUNTIME_ENGINE = 'runtime_engine'.freeze
|
35
|
+
FORM_FIELD_TAG_RUNTIME_ID = 'runtime-id'.freeze
|
36
|
+
FORM_FIELD_TAG_RUNTIME_PLATFORM = 'runtime_platform'.freeze
|
37
|
+
FORM_FIELD_TAG_RUNTIME_VERSION = 'runtime_version'.freeze
|
38
|
+
FORM_FIELD_TAG_SERVICE = 'service'.freeze
|
39
|
+
FORM_FIELD_TAG_VERSION = 'version'.freeze
|
40
|
+
FORM_FIELD_TAGS = 'tags'.freeze
|
41
|
+
FORM_FIELD_TYPES = 'types[0]'.freeze
|
42
|
+
FORM_FIELD_TYPES_AUTO = 'auto'.freeze
|
43
|
+
|
44
|
+
HEADER_CONTENT_TYPE = 'Content-Type'.freeze
|
45
|
+
HEADER_CONTENT_TYPE_OCTET_STREAM = 'application/octet-stream'.freeze
|
46
|
+
|
47
|
+
PPROF_DEFAULT_FILENAME = 'profile.pb.gz'.freeze
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -8,6 +8,7 @@ module Datadog
|
|
8
8
|
ENV_DEFAULT_PORT = 'DD_TRACE_AGENT_PORT'.freeze
|
9
9
|
ENV_DEFAULT_URL = 'DD_TRACE_AGENT_URL'.freeze
|
10
10
|
HEADER_CONTAINER_ID = 'Datadog-Container-ID'.freeze
|
11
|
+
HEADER_DD_API_KEY = 'DD-API-KEY'.freeze
|
11
12
|
HEADER_META_LANG = 'Datadog-Meta-Lang'.freeze
|
12
13
|
HEADER_META_LANG_VERSION = 'Datadog-Meta-Lang-Version'.freeze
|
13
14
|
HEADER_META_LANG_INTERPRETER = 'Datadog-Meta-Lang-Interpreter'.freeze
|
data/lib/ddtrace/metrics.rb
CHANGED
@@ -120,6 +120,10 @@ module Datadog
|
|
120
120
|
metrics.each { |m| send(m.type, *[m.name, m.value, m.options].compact) }
|
121
121
|
end
|
122
122
|
|
123
|
+
def close
|
124
|
+
@statsd.close if @statsd && @statsd.respond_to?(:close)
|
125
|
+
end
|
126
|
+
|
123
127
|
Metric = Struct.new(:type, :name, :value, :options) do
|
124
128
|
def initialize(*args)
|
125
129
|
super
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Datadog
|
2
|
+
# Contains profiler for generating stack profiles, etc.
|
3
|
+
module Profiling
|
4
|
+
module_function
|
5
|
+
|
6
|
+
GOOGLE_PROTOBUF_MINIMUM_VERSION = Gem::Version.new('3.0')
|
7
|
+
|
8
|
+
def supported?
|
9
|
+
google_protobuf_supported?
|
10
|
+
end
|
11
|
+
|
12
|
+
def native_cpu_time_supported?
|
13
|
+
require 'ddtrace/profiling/ext/cpu'
|
14
|
+
Ext::CPU.supported?
|
15
|
+
end
|
16
|
+
|
17
|
+
def google_protobuf_supported?
|
18
|
+
RUBY_PLATFORM != 'java' \
|
19
|
+
&& !Gem.loaded_specs['google-protobuf'].nil? \
|
20
|
+
&& Gem.loaded_specs['google-protobuf'].version >= GOOGLE_PROTOBUF_MINIMUM_VERSION \
|
21
|
+
&& !defined?(@failed_to_load_protobuf)
|
22
|
+
end
|
23
|
+
|
24
|
+
def load_profiling
|
25
|
+
require 'ddtrace/profiling/ext/cpu'
|
26
|
+
require 'ddtrace/profiling/ext/forking'
|
27
|
+
|
28
|
+
require 'ddtrace/profiling/collectors/stack'
|
29
|
+
require 'ddtrace/profiling/exporter'
|
30
|
+
require 'ddtrace/profiling/recorder'
|
31
|
+
require 'ddtrace/profiling/scheduler'
|
32
|
+
require 'ddtrace/profiling/tasks/setup'
|
33
|
+
require 'ddtrace/profiling/transport/io'
|
34
|
+
require 'ddtrace/profiling/transport/http'
|
35
|
+
require 'ddtrace/profiling/profiler'
|
36
|
+
|
37
|
+
begin
|
38
|
+
require 'ddtrace/profiling/pprof/pprof_pb' if google_protobuf_supported?
|
39
|
+
rescue LoadError => e
|
40
|
+
@failed_to_load_protobuf = true
|
41
|
+
Kernel.warn(
|
42
|
+
"[DDTRACE] Error while loading google-protobuf gem. Cause: '#{e.message}' Location: '#{e.backtrace.first}'. " \
|
43
|
+
'This can happen when google-protobuf is missing its native components. ' \
|
44
|
+
'To fix this, try removing and reinstalling the gem, forcing it to recompile the components: ' \
|
45
|
+
'`gem uninstall google-protobuf -a; BUNDLE_FORCE_RUBY_PLATFORM=true bundle install`. ' \
|
46
|
+
'If the error persists, please contact support via <https://docs.datadoghq.com/help/> or ' \
|
47
|
+
'file a bug at <https://github.com/DataDog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug>.'
|
48
|
+
)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
load_profiling if supported?
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Datadog
|
2
|
+
module Profiling
|
3
|
+
# Entity class used to represent an entry in a stack trace.
|
4
|
+
# Its fields are a simplified struct version of `Thread::Backtrace::Location`.
|
5
|
+
class BacktraceLocation
|
6
|
+
attr_reader \
|
7
|
+
:base_label,
|
8
|
+
:lineno,
|
9
|
+
:path,
|
10
|
+
:hash
|
11
|
+
|
12
|
+
def initialize(
|
13
|
+
base_label,
|
14
|
+
lineno,
|
15
|
+
path
|
16
|
+
)
|
17
|
+
@base_label = base_label
|
18
|
+
@lineno = lineno
|
19
|
+
@path = path
|
20
|
+
@hash = [base_label, lineno, path].hash
|
21
|
+
end
|
22
|
+
|
23
|
+
def ==(other)
|
24
|
+
hash == other.hash
|
25
|
+
end
|
26
|
+
|
27
|
+
def eql?(other)
|
28
|
+
hash == other.hash
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'ddtrace/buffer'
|
2
|
+
require 'ddtrace/utils/string_table'
|
3
|
+
require 'ddtrace/utils/object_set'
|
4
|
+
|
5
|
+
module Datadog
|
6
|
+
module Profiling
|
7
|
+
# Profiling buffer that stores profiling events. The buffer has a maximum size and when
|
8
|
+
# the buffer is full, a random event is discarded. This class is thread-safe.
|
9
|
+
class Buffer < Datadog::ThreadSafeBuffer
|
10
|
+
def initialize(*args)
|
11
|
+
super
|
12
|
+
@caches = {}
|
13
|
+
@string_table = Utils::StringTable.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def cache(cache_name)
|
17
|
+
synchronize do
|
18
|
+
@caches[cache_name] ||= Utils::ObjectSet.new
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def string_table
|
23
|
+
synchronize do
|
24
|
+
@string_table
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
|
30
|
+
def drain!
|
31
|
+
items = super
|
32
|
+
|
33
|
+
# Clear caches
|
34
|
+
@caches = {}
|
35
|
+
@string_table = Utils::StringTable.new
|
36
|
+
|
37
|
+
items
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
require 'ddtrace/profiling/backtrace_location'
|
2
|
+
require 'ddtrace/profiling/events/stack'
|
3
|
+
require 'ddtrace/utils/only_once'
|
4
|
+
require 'ddtrace/utils/time'
|
5
|
+
require 'ddtrace/worker'
|
6
|
+
require 'ddtrace/workers/polling'
|
7
|
+
|
8
|
+
module Datadog
|
9
|
+
module Profiling
|
10
|
+
module Collectors
|
11
|
+
# Collects stack trace samples from Ruby threads for both CPU-time (if available) and wall-clock.
|
12
|
+
# Runs on its own background thread.
|
13
|
+
#
|
14
|
+
class Stack < Worker
|
15
|
+
include Workers::Polling
|
16
|
+
|
17
|
+
DEFAULT_MAX_TIME_USAGE_PCT = 2.0
|
18
|
+
MIN_INTERVAL = 0.01
|
19
|
+
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
20
|
+
|
21
|
+
attr_reader \
|
22
|
+
:recorder,
|
23
|
+
:max_frames,
|
24
|
+
:ignore_thread,
|
25
|
+
:max_time_usage_pct,
|
26
|
+
:thread_api
|
27
|
+
|
28
|
+
def initialize(
|
29
|
+
recorder,
|
30
|
+
max_frames: nil,
|
31
|
+
ignore_thread: nil,
|
32
|
+
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
33
|
+
thread_api: Thread,
|
34
|
+
fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
35
|
+
interval: MIN_INTERVAL,
|
36
|
+
enabled: true
|
37
|
+
)
|
38
|
+
@recorder = recorder
|
39
|
+
# TODO: Make this a required named argument after we drop support for Ruby 2.0
|
40
|
+
@max_frames = max_frames || raise(ArgumentError, 'missing keyword :max_frames')
|
41
|
+
@ignore_thread = ignore_thread
|
42
|
+
@max_time_usage_pct = max_time_usage_pct
|
43
|
+
@thread_api = thread_api
|
44
|
+
|
45
|
+
# Workers::Async::Thread settings
|
46
|
+
self.fork_policy = fork_policy
|
47
|
+
|
48
|
+
# Workers::IntervalLoop settings
|
49
|
+
self.loop_base_interval = interval
|
50
|
+
|
51
|
+
# Workers::Polling settings
|
52
|
+
self.enabled = enabled
|
53
|
+
|
54
|
+
@warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
|
55
|
+
end
|
56
|
+
|
57
|
+
def start
|
58
|
+
@last_wall_time = Datadog::Utils::Time.get_time
|
59
|
+
reset_cpu_time_tracking
|
60
|
+
perform
|
61
|
+
end
|
62
|
+
|
63
|
+
def perform
|
64
|
+
collect_and_wait
|
65
|
+
end
|
66
|
+
|
67
|
+
def loop_back_off?
|
68
|
+
false
|
69
|
+
end
|
70
|
+
|
71
|
+
def collect_and_wait
|
72
|
+
run_time = Datadog::Utils::Time.measure do
|
73
|
+
collect_events
|
74
|
+
end
|
75
|
+
|
76
|
+
# Update wait time to throttle profiling
|
77
|
+
self.loop_wait_time = compute_wait_time(run_time)
|
78
|
+
end
|
79
|
+
|
80
|
+
def collect_events
|
81
|
+
events = []
|
82
|
+
|
83
|
+
# Compute wall time interval
|
84
|
+
current_wall_time = Datadog::Utils::Time.get_time
|
85
|
+
last_wall_time = if instance_variable_defined?(:@last_wall_time)
|
86
|
+
@last_wall_time
|
87
|
+
else
|
88
|
+
current_wall_time
|
89
|
+
end
|
90
|
+
|
91
|
+
wall_time_interval_ns = ((current_wall_time - last_wall_time).round(9) * 1e9).to_i
|
92
|
+
@last_wall_time = current_wall_time
|
93
|
+
|
94
|
+
# Collect backtraces from each thread
|
95
|
+
thread_api.list.each do |thread|
|
96
|
+
next unless thread.alive?
|
97
|
+
next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
|
98
|
+
|
99
|
+
event = collect_thread_event(thread, wall_time_interval_ns)
|
100
|
+
events << event unless event.nil?
|
101
|
+
end
|
102
|
+
|
103
|
+
# Send events to recorder
|
104
|
+
recorder.push(events) unless events.empty?
|
105
|
+
|
106
|
+
events
|
107
|
+
end
|
108
|
+
|
109
|
+
def collect_thread_event(thread, wall_time_interval_ns)
|
110
|
+
locations = thread.backtrace_locations
|
111
|
+
return if locations.nil?
|
112
|
+
|
113
|
+
# Get actual stack size then trim the stack
|
114
|
+
stack_size = locations.length
|
115
|
+
locations = locations[0..(max_frames - 1)]
|
116
|
+
|
117
|
+
# Convert backtrace locations into structs
|
118
|
+
locations = convert_backtrace_locations(locations)
|
119
|
+
|
120
|
+
thread_id = thread.respond_to?(:native_thread_id) ? thread.native_thread_id : thread.object_id
|
121
|
+
trace_id, span_id = get_trace_identifiers(thread)
|
122
|
+
cpu_time = get_cpu_time_interval!(thread)
|
123
|
+
|
124
|
+
Events::StackSample.new(
|
125
|
+
nil,
|
126
|
+
locations,
|
127
|
+
stack_size,
|
128
|
+
thread_id,
|
129
|
+
trace_id,
|
130
|
+
span_id,
|
131
|
+
cpu_time,
|
132
|
+
wall_time_interval_ns
|
133
|
+
)
|
134
|
+
end
|
135
|
+
|
136
|
+
def get_cpu_time_interval!(thread)
|
137
|
+
# Return if we can't get the current CPU time
|
138
|
+
unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
|
139
|
+
warn_about_missing_cpu_time_instrumentation(thread)
|
140
|
+
return
|
141
|
+
end
|
142
|
+
|
143
|
+
current_cpu_time_ns = thread.cpu_time(:nanosecond)
|
144
|
+
|
145
|
+
# NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
|
146
|
+
# initialization that needs to be done by the thread itself, and it's possible for us to try to sample
|
147
|
+
# *before* the thread had time to finish the initialization
|
148
|
+
return unless current_cpu_time_ns
|
149
|
+
|
150
|
+
last_cpu_time_ns = (thread[THREAD_LAST_CPU_TIME_KEY] || current_cpu_time_ns)
|
151
|
+
interval = current_cpu_time_ns - last_cpu_time_ns
|
152
|
+
|
153
|
+
# Update CPU time for thread
|
154
|
+
thread[THREAD_LAST_CPU_TIME_KEY] = current_cpu_time_ns
|
155
|
+
|
156
|
+
# Return interval
|
157
|
+
interval
|
158
|
+
end
|
159
|
+
|
160
|
+
def get_trace_identifiers(thread)
|
161
|
+
return unless thread.is_a?(::Thread)
|
162
|
+
return unless Datadog.respond_to?(:tracer) && Datadog.tracer.respond_to?(:active_correlation)
|
163
|
+
|
164
|
+
identifier = Datadog.tracer.active_correlation(thread)
|
165
|
+
[identifier.trace_id, identifier.span_id]
|
166
|
+
end
|
167
|
+
|
168
|
+
def compute_wait_time(used_time)
|
169
|
+
# We took used_time to get the last sample.
|
170
|
+
#
|
171
|
+
# What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
|
172
|
+
# spend working, how much is (100% - max_time_usage_pct) of the time?
|
173
|
+
#
|
174
|
+
# For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
|
175
|
+
# means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
|
176
|
+
used_time_ns = used_time * 1e9
|
177
|
+
interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
|
178
|
+
[interval / 1e9, MIN_INTERVAL].max
|
179
|
+
end
|
180
|
+
|
181
|
+
# Convert backtrace locations into structs
|
182
|
+
# Re-use old backtrace location objects if they already exist in the buffer
|
183
|
+
def convert_backtrace_locations(locations)
|
184
|
+
locations.collect do |location|
|
185
|
+
# Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
|
186
|
+
recorder[Events::StackSample].cache(:backtrace_locations).fetch(
|
187
|
+
# Function name
|
188
|
+
location.base_label,
|
189
|
+
# Line number
|
190
|
+
location.lineno,
|
191
|
+
# Filename
|
192
|
+
location.path,
|
193
|
+
# Build function
|
194
|
+
&method(:build_backtrace_location)
|
195
|
+
)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def build_backtrace_location(_id, base_label, lineno, path)
|
200
|
+
string_table = recorder[Events::StackSample].string_table
|
201
|
+
|
202
|
+
Profiling::BacktraceLocation.new(
|
203
|
+
string_table.fetch_string(base_label),
|
204
|
+
lineno,
|
205
|
+
string_table.fetch_string(path)
|
206
|
+
)
|
207
|
+
end
|
208
|
+
|
209
|
+
private
|
210
|
+
|
211
|
+
def warn_about_missing_cpu_time_instrumentation(thread)
|
212
|
+
@warn_about_missing_cpu_time_instrumentation_only_once.run do
|
213
|
+
# Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
|
214
|
+
# missing on this thread we just found.
|
215
|
+
#
|
216
|
+
# As far as we know, it can be missing due to one the following:
|
217
|
+
#
|
218
|
+
# a) The thread was started before we installed our instrumentation.
|
219
|
+
# In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
|
220
|
+
#
|
221
|
+
# b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
|
222
|
+
# We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
|
223
|
+
#
|
224
|
+
# c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
|
225
|
+
# When threads are started using these APIs, there's a small time window during which the thread has started
|
226
|
+
# but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
|
227
|
+
# it to run and our instrumentation to be applied.
|
228
|
+
#
|
229
|
+
if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
|
230
|
+
Datadog.logger.debug("Detected thread ('#{thread}') with missing CPU profiling instrumentation.")
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
236
|
+
# clean up the per-thread cpu time counters we keep, so that the first sample after starting doesn't end up with:
|
237
|
+
#
|
238
|
+
# a) negative time: At least on my test docker container, and on the reliability environment, after the process
|
239
|
+
# forks, the clock reference changes and (old cpu time - new cpu time) can be < 0
|
240
|
+
#
|
241
|
+
# b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
|
242
|
+
# restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
|
243
|
+
#
|
244
|
+
# By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
|
245
|
+
def reset_cpu_time_tracking
|
246
|
+
thread_api.list.each do |thread|
|
247
|
+
thread[THREAD_LAST_CPU_TIME_KEY] = nil if thread[THREAD_LAST_CPU_TIME_KEY]
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|