ddtrace 0.47.0 → 0.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +4 -2
- data/.circleci/images/primary/Dockerfile-2.0.0 +11 -1
- data/.circleci/images/primary/Dockerfile-2.1.10 +11 -1
- data/.circleci/images/primary/Dockerfile-2.2.10 +11 -1
- data/.circleci/images/primary/Dockerfile-2.3.8 +10 -0
- data/.circleci/images/primary/Dockerfile-2.4.6 +10 -0
- data/.circleci/images/primary/Dockerfile-2.5.6 +10 -0
- data/.circleci/images/primary/Dockerfile-2.6.4 +10 -0
- data/.circleci/images/primary/Dockerfile-2.7.0 +10 -0
- data/.circleci/images/primary/Dockerfile-jruby-9.2-latest +10 -0
- data/.gitlab-ci.yml +18 -18
- data/.rubocop.yml +19 -0
- data/.rubocop_todo.yml +44 -3
- data/Appraisals +55 -1
- data/CHANGELOG.md +47 -1
- data/Gemfile +10 -0
- data/Rakefile +9 -0
- data/bin/ddtracerb +15 -0
- data/ddtrace.gemspec +4 -2
- data/docs/GettingStarted.md +36 -53
- data/docs/ProfilingDevelopment.md +88 -0
- data/integration/README.md +1 -2
- data/integration/apps/rack/Dockerfile +3 -0
- data/integration/apps/rack/script/build-images +1 -1
- data/integration/apps/rack/script/ci +1 -1
- data/integration/apps/rails-five/script/build-images +1 -1
- data/integration/apps/rails-five/script/ci +1 -1
- data/integration/apps/ruby/script/build-images +1 -1
- data/integration/apps/ruby/script/ci +1 -1
- data/integration/images/include/http-health-check +1 -1
- data/integration/images/wrk/scripts/entrypoint.sh +1 -1
- data/integration/script/build-images +1 -1
- data/lib/ddtrace.rb +1 -0
- data/lib/ddtrace/configuration.rb +39 -13
- data/lib/ddtrace/configuration/components.rb +85 -3
- data/lib/ddtrace/configuration/settings.rb +31 -0
- data/lib/ddtrace/contrib/active_record/configuration/makara_resolver.rb +30 -0
- data/lib/ddtrace/contrib/active_record/configuration/resolver.rb +9 -3
- data/lib/ddtrace/contrib/resque/configuration/settings.rb +17 -1
- data/lib/ddtrace/contrib/resque/patcher.rb +4 -4
- data/lib/ddtrace/contrib/resque/resque_job.rb +22 -1
- data/lib/ddtrace/contrib/shoryuken/configuration/settings.rb +1 -0
- data/lib/ddtrace/contrib/shoryuken/tracer.rb +7 -3
- data/lib/ddtrace/diagnostics/environment_logger.rb +1 -1
- data/lib/ddtrace/error.rb +2 -0
- data/lib/ddtrace/ext/profiling.rb +52 -0
- data/lib/ddtrace/ext/transport.rb +1 -0
- data/lib/ddtrace/metrics.rb +4 -0
- data/lib/ddtrace/profiling.rb +54 -0
- data/lib/ddtrace/profiling/backtrace_location.rb +32 -0
- data/lib/ddtrace/profiling/buffer.rb +41 -0
- data/lib/ddtrace/profiling/collectors/stack.rb +253 -0
- data/lib/ddtrace/profiling/encoding/profile.rb +31 -0
- data/lib/ddtrace/profiling/event.rb +13 -0
- data/lib/ddtrace/profiling/events/stack.rb +102 -0
- data/lib/ddtrace/profiling/exporter.rb +23 -0
- data/lib/ddtrace/profiling/ext/cpu.rb +54 -0
- data/lib/ddtrace/profiling/ext/cthread.rb +134 -0
- data/lib/ddtrace/profiling/ext/forking.rb +97 -0
- data/lib/ddtrace/profiling/flush.rb +41 -0
- data/lib/ddtrace/profiling/pprof/builder.rb +121 -0
- data/lib/ddtrace/profiling/pprof/converter.rb +85 -0
- data/lib/ddtrace/profiling/pprof/message_set.rb +12 -0
- data/lib/ddtrace/profiling/pprof/payload.rb +18 -0
- data/lib/ddtrace/profiling/pprof/pprof.proto +212 -0
- data/lib/ddtrace/profiling/pprof/pprof_pb.rb +81 -0
- data/lib/ddtrace/profiling/pprof/stack_sample.rb +90 -0
- data/lib/ddtrace/profiling/pprof/string_table.rb +10 -0
- data/lib/ddtrace/profiling/pprof/template.rb +114 -0
- data/lib/ddtrace/profiling/preload.rb +3 -0
- data/lib/ddtrace/profiling/profiler.rb +28 -0
- data/lib/ddtrace/profiling/recorder.rb +87 -0
- data/lib/ddtrace/profiling/scheduler.rb +84 -0
- data/lib/ddtrace/profiling/tasks/setup.rb +77 -0
- data/lib/ddtrace/profiling/transport/client.rb +12 -0
- data/lib/ddtrace/profiling/transport/http.rb +122 -0
- data/lib/ddtrace/profiling/transport/http/api.rb +43 -0
- data/lib/ddtrace/profiling/transport/http/api/endpoint.rb +90 -0
- data/lib/ddtrace/profiling/transport/http/api/instance.rb +36 -0
- data/lib/ddtrace/profiling/transport/http/api/spec.rb +40 -0
- data/lib/ddtrace/profiling/transport/http/builder.rb +28 -0
- data/lib/ddtrace/profiling/transport/http/client.rb +33 -0
- data/lib/ddtrace/profiling/transport/http/response.rb +21 -0
- data/lib/ddtrace/profiling/transport/io.rb +30 -0
- data/lib/ddtrace/profiling/transport/io/client.rb +27 -0
- data/lib/ddtrace/profiling/transport/io/response.rb +16 -0
- data/lib/ddtrace/profiling/transport/parcel.rb +17 -0
- data/lib/ddtrace/profiling/transport/request.rb +15 -0
- data/lib/ddtrace/profiling/transport/response.rb +8 -0
- data/lib/ddtrace/runtime/container.rb +11 -3
- data/lib/ddtrace/sampling/rule_sampler.rb +3 -9
- data/lib/ddtrace/tasks/exec.rb +48 -0
- data/lib/ddtrace/tasks/help.rb +14 -0
- data/lib/ddtrace/tracer.rb +21 -0
- data/lib/ddtrace/transport/io/client.rb +15 -8
- data/lib/ddtrace/transport/parcel.rb +4 -0
- data/lib/ddtrace/version.rb +3 -1
- data/lib/ddtrace/workers/runtime_metrics.rb +14 -1
- metadata +70 -9
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module Datadog
|
|
2
|
+
module Contrib
|
|
3
|
+
module ActiveRecord
|
|
4
|
+
module Configuration
|
|
5
|
+
# The `makara` gem has the concept of **role**, which can be
|
|
6
|
+
# inferred from the configuration `name`, in the form of:
|
|
7
|
+
# `master/0`, `replica/0`, `replica/1`, etc.
|
|
8
|
+
# The first part of this string is the database role.
|
|
9
|
+
#
|
|
10
|
+
# This allows the matching of a connection based on its role,
|
|
11
|
+
# instead of connection-specific information.
|
|
12
|
+
module MakaraResolver
|
|
13
|
+
def normalize_for_config(active_record_config)
|
|
14
|
+
hash = super
|
|
15
|
+
hash[:makara_role] = active_record_config[:makara_role]
|
|
16
|
+
hash
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def normalize_for_resolve(active_record_config)
|
|
20
|
+
hash = super
|
|
21
|
+
|
|
22
|
+
hash[:makara_role] = active_record_config[:name].split('/')[0].to_s if active_record_config[:name].is_a?(String)
|
|
23
|
+
|
|
24
|
+
hash
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'ddtrace/contrib/configuration/resolver'
|
|
2
2
|
require 'ddtrace/vendor/active_record/connection_specification'
|
|
3
|
+
require_relative 'makara_resolver'
|
|
3
4
|
|
|
4
5
|
module Datadog
|
|
5
6
|
module Contrib
|
|
@@ -28,6 +29,8 @@ module Datadog
|
|
|
28
29
|
# When more than one configuration could be matched, the last one to match is selected,
|
|
29
30
|
# based on addition order (`#add`).
|
|
30
31
|
class Resolver < Contrib::Configuration::Resolver
|
|
32
|
+
prepend MakaraResolver
|
|
33
|
+
|
|
31
34
|
def initialize(active_record_configuration = nil)
|
|
32
35
|
super()
|
|
33
36
|
|
|
@@ -50,7 +53,7 @@ module Datadog
|
|
|
50
53
|
def resolve(db_config)
|
|
51
54
|
active_record_config = resolve_connection_key(db_config).symbolize_keys
|
|
52
55
|
|
|
53
|
-
hash =
|
|
56
|
+
hash = normalize_for_resolve(active_record_config)
|
|
54
57
|
|
|
55
58
|
# Hashes in Ruby maintain insertion order
|
|
56
59
|
_, config = @configurations.reverse_each.find do |matcher, _|
|
|
@@ -73,7 +76,7 @@ module Datadog
|
|
|
73
76
|
|
|
74
77
|
def parse_matcher(matcher)
|
|
75
78
|
resolved_pattern = resolve_connection_key(matcher).symbolize_keys
|
|
76
|
-
normalized =
|
|
79
|
+
normalized = normalize_for_config(resolved_pattern)
|
|
77
80
|
|
|
78
81
|
# Remove empty fields to allow for partial matching
|
|
79
82
|
normalized.reject! { |_, v| v.nil? }
|
|
@@ -112,7 +115,7 @@ module Datadog
|
|
|
112
115
|
|
|
113
116
|
# Extract only fields we'd like to match
|
|
114
117
|
# from the ActiveRecord configuration.
|
|
115
|
-
def
|
|
118
|
+
def normalize_for_config(active_record_config)
|
|
116
119
|
{
|
|
117
120
|
adapter: active_record_config[:adapter],
|
|
118
121
|
host: active_record_config[:host],
|
|
@@ -121,6 +124,9 @@ module Datadog
|
|
|
121
124
|
username: active_record_config[:username]
|
|
122
125
|
}
|
|
123
126
|
end
|
|
127
|
+
|
|
128
|
+
# Both resolvers perform the same operations for this implementation, but can be specialized
|
|
129
|
+
alias_method :normalize_for_resolve, :normalize_for_config
|
|
124
130
|
end
|
|
125
131
|
end
|
|
126
132
|
end
|
|
@@ -23,7 +23,23 @@ module Datadog
|
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
option :service_name, default: Ext::SERVICE_NAME
|
|
26
|
-
|
|
26
|
+
|
|
27
|
+
# A list Ruby worker classes to be instrumented.
|
|
28
|
+
# The value of `nil` has special semantics: it instruments all workers dynamically.
|
|
29
|
+
#
|
|
30
|
+
# TODO: 1.0: Automatic patching should be the default behavior.
|
|
31
|
+
# We should not provide this option in the future,
|
|
32
|
+
# as our integrations should always instrument all possible scenarios when feasible.
|
|
33
|
+
option :workers, default: nil do |o|
|
|
34
|
+
o.on_set do |value|
|
|
35
|
+
unless value.nil?
|
|
36
|
+
Datadog.logger.warn(
|
|
37
|
+
"DEPRECATED: Resque integration now instruments all workers. \n" \
|
|
38
|
+
'The `workers:` option is unnecessary and will be removed in the future.'
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
27
43
|
option :error_handler, default: Datadog::Tracer::DEFAULT_ON_ERROR
|
|
28
44
|
end
|
|
29
45
|
end
|
|
@@ -17,11 +17,11 @@ module Datadog
|
|
|
17
17
|
|
|
18
18
|
def patch
|
|
19
19
|
require_relative 'resque_job'
|
|
20
|
-
get_option(:workers).each { |worker| worker.extend(ResqueJob) }
|
|
21
|
-
end
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
::Resque::Job.send(:prepend, Resque::Job)
|
|
22
|
+
|
|
23
|
+
workers = Datadog.configuration[:resque][:workers] || []
|
|
24
|
+
workers.each { |worker| worker.extend(ResqueJob) }
|
|
25
25
|
end
|
|
26
26
|
end
|
|
27
27
|
end
|
|
@@ -7,9 +7,30 @@ require 'resque'
|
|
|
7
7
|
module Datadog
|
|
8
8
|
module Contrib
|
|
9
9
|
module Resque
|
|
10
|
+
# Automatically configures jobs with {ResqueJob} plugin.
|
|
11
|
+
module Job
|
|
12
|
+
def perform
|
|
13
|
+
if Datadog.configuration[:resque][:workers].nil?
|
|
14
|
+
job = payload_class
|
|
15
|
+
job.extend(Datadog::Contrib::Resque::ResqueJob) unless job.is_a? Datadog::Contrib::Resque::ResqueJob
|
|
16
|
+
end
|
|
17
|
+
ensure
|
|
18
|
+
super
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
10
22
|
# Uses Resque job hooks to create traces
|
|
11
23
|
module ResqueJob
|
|
12
|
-
|
|
24
|
+
# `around_perform` hooks are executed in alphabetical order.
|
|
25
|
+
# we use the lowest printable character that allows for an inline
|
|
26
|
+
# method definition ('0'), alongside our naming prefix for identification.
|
|
27
|
+
#
|
|
28
|
+
# We could, in theory, use any character (e.g "\x00"), but this will lead
|
|
29
|
+
# to unreadable stack traces that contain this method call.
|
|
30
|
+
#
|
|
31
|
+
# We could also just use `around_perform` but this might override the user's
|
|
32
|
+
# own method.
|
|
33
|
+
def around_perform0_ddtrace(*args)
|
|
13
34
|
return yield unless datadog_configuration && tracer
|
|
14
35
|
|
|
15
36
|
tracer.trace(Ext::SPAN_JOB, span_options) do |span|
|
|
@@ -12,8 +12,12 @@ module Datadog
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def call(worker_instance, queue, sqs_msg, body)
|
|
15
|
-
@tracer.trace(
|
|
16
|
-
|
|
15
|
+
@tracer.trace(
|
|
16
|
+
Ext::SPAN_JOB,
|
|
17
|
+
service: @shoryuken_service,
|
|
18
|
+
span_type: Datadog::Ext::AppTypes::WORKER,
|
|
19
|
+
on_error: @error_handler
|
|
20
|
+
) do |span|
|
|
17
21
|
# Set analytics sample rate
|
|
18
22
|
if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
|
|
19
23
|
Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
|
|
@@ -26,7 +30,7 @@ module Datadog
|
|
|
26
30
|
span.set_tag(Ext::TAG_JOB_ID, sqs_msg.message_id)
|
|
27
31
|
span.set_tag(Ext::TAG_JOB_QUEUE, queue)
|
|
28
32
|
span.set_tag(Ext::TAG_JOB_ATTRIBUTES, sqs_msg.attributes) if sqs_msg.respond_to?(:attributes)
|
|
29
|
-
span.set_tag(Ext::TAG_JOB_BODY, body)
|
|
33
|
+
span.set_tag(Ext::TAG_JOB_BODY, body) if configuration[:tag_body]
|
|
30
34
|
|
|
31
35
|
yield
|
|
32
36
|
end
|
|
@@ -12,7 +12,7 @@ module Datadog
|
|
|
12
12
|
# Outputs environment information to {Datadog.logger}.
|
|
13
13
|
# Executes only for the lifetime of the program.
|
|
14
14
|
def log!(transport_responses)
|
|
15
|
-
return if @executed || !log?
|
|
15
|
+
return if (defined?(@executed) && @executed) || !log?
|
|
16
16
|
|
|
17
17
|
@executed = true
|
|
18
18
|
|
data/lib/ddtrace/error.rb
CHANGED
|
@@ -16,6 +16,8 @@ module Datadog
|
|
|
16
16
|
|
|
17
17
|
def initialize(type = nil, message = nil, backtrace = nil)
|
|
18
18
|
backtrace = Array(backtrace).join("\n")
|
|
19
|
+
|
|
20
|
+
# DEV: We should measure if `Utils.utf8_encode` is still needed in practice.
|
|
19
21
|
@type = Utils.utf8_encode(type)
|
|
20
22
|
@message = Utils.utf8_encode(message)
|
|
21
23
|
@backtrace = Utils.utf8_encode(backtrace)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
module Datadog
|
|
2
|
+
module Ext
|
|
3
|
+
module Profiling
|
|
4
|
+
ENV_ENABLED = 'DD_PROFILING_ENABLED'.freeze
|
|
5
|
+
ENV_UPLOAD_TIMEOUT = 'DD_PROFILING_UPLOAD_TIMEOUT'.freeze
|
|
6
|
+
ENV_MAX_FRAMES = 'DD_PROFILING_MAX_FRAMES'.freeze
|
|
7
|
+
|
|
8
|
+
module Pprof
|
|
9
|
+
LABEL_KEY_SPAN_ID = 'span id'.freeze
|
|
10
|
+
LABEL_KEY_THREAD_ID = 'thread id'.freeze
|
|
11
|
+
LABEL_KEY_TRACE_ID = 'trace id'.freeze
|
|
12
|
+
SAMPLE_VALUE_NO_VALUE = 0
|
|
13
|
+
VALUE_TYPE_CPU = 'cpu-time'.freeze
|
|
14
|
+
VALUE_TYPE_WALL = 'wall-time'.freeze
|
|
15
|
+
VALUE_UNIT_NANOSECONDS = 'nanoseconds'.freeze
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
module Transport
|
|
19
|
+
module HTTP
|
|
20
|
+
URI_TEMPLATE_DD_API = 'https://intake.profile.%s/'.freeze
|
|
21
|
+
|
|
22
|
+
FORM_FIELD_DATA = 'data[0]'.freeze
|
|
23
|
+
FORM_FIELD_FORMAT = 'format'.freeze
|
|
24
|
+
FORM_FIELD_FORMAT_PPROF = 'pprof'.freeze
|
|
25
|
+
FORM_FIELD_RECORDING_END = 'recording-end'.freeze
|
|
26
|
+
FORM_FIELD_RECORDING_START = 'recording-start'.freeze
|
|
27
|
+
FORM_FIELD_RUNTIME = 'runtime'.freeze
|
|
28
|
+
FORM_FIELD_RUNTIME_ID = 'runtime-id'.freeze
|
|
29
|
+
FORM_FIELD_TAG_ENV = 'env'.freeze
|
|
30
|
+
FORM_FIELD_TAG_HOST = 'host'.freeze
|
|
31
|
+
FORM_FIELD_TAG_LANGUAGE = 'language'.freeze
|
|
32
|
+
FORM_FIELD_TAG_PROFILER_VERSION = 'profiler_version'.freeze
|
|
33
|
+
FORM_FIELD_TAG_RUNTIME = 'runtime'.freeze
|
|
34
|
+
FORM_FIELD_TAG_RUNTIME_ENGINE = 'runtime_engine'.freeze
|
|
35
|
+
FORM_FIELD_TAG_RUNTIME_ID = 'runtime-id'.freeze
|
|
36
|
+
FORM_FIELD_TAG_RUNTIME_PLATFORM = 'runtime_platform'.freeze
|
|
37
|
+
FORM_FIELD_TAG_RUNTIME_VERSION = 'runtime_version'.freeze
|
|
38
|
+
FORM_FIELD_TAG_SERVICE = 'service'.freeze
|
|
39
|
+
FORM_FIELD_TAG_VERSION = 'version'.freeze
|
|
40
|
+
FORM_FIELD_TAGS = 'tags'.freeze
|
|
41
|
+
FORM_FIELD_TYPES = 'types[0]'.freeze
|
|
42
|
+
FORM_FIELD_TYPES_AUTO = 'auto'.freeze
|
|
43
|
+
|
|
44
|
+
HEADER_CONTENT_TYPE = 'Content-Type'.freeze
|
|
45
|
+
HEADER_CONTENT_TYPE_OCTET_STREAM = 'application/octet-stream'.freeze
|
|
46
|
+
|
|
47
|
+
PPROF_DEFAULT_FILENAME = 'profile.pb.gz'.freeze
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -8,6 +8,7 @@ module Datadog
|
|
|
8
8
|
ENV_DEFAULT_PORT = 'DD_TRACE_AGENT_PORT'.freeze
|
|
9
9
|
ENV_DEFAULT_URL = 'DD_TRACE_AGENT_URL'.freeze
|
|
10
10
|
HEADER_CONTAINER_ID = 'Datadog-Container-ID'.freeze
|
|
11
|
+
HEADER_DD_API_KEY = 'DD-API-KEY'.freeze
|
|
11
12
|
HEADER_META_LANG = 'Datadog-Meta-Lang'.freeze
|
|
12
13
|
HEADER_META_LANG_VERSION = 'Datadog-Meta-Lang-Version'.freeze
|
|
13
14
|
HEADER_META_LANG_INTERPRETER = 'Datadog-Meta-Lang-Interpreter'.freeze
|
data/lib/ddtrace/metrics.rb
CHANGED
|
@@ -120,6 +120,10 @@ module Datadog
|
|
|
120
120
|
metrics.each { |m| send(m.type, *[m.name, m.value, m.options].compact) }
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
+
def close
|
|
124
|
+
@statsd.close if @statsd && @statsd.respond_to?(:close)
|
|
125
|
+
end
|
|
126
|
+
|
|
123
127
|
Metric = Struct.new(:type, :name, :value, :options) do
|
|
124
128
|
def initialize(*args)
|
|
125
129
|
super
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
module Datadog
|
|
2
|
+
# Contains profiler for generating stack profiles, etc.
|
|
3
|
+
module Profiling
|
|
4
|
+
module_function
|
|
5
|
+
|
|
6
|
+
GOOGLE_PROTOBUF_MINIMUM_VERSION = Gem::Version.new('3.0')
|
|
7
|
+
|
|
8
|
+
def supported?
|
|
9
|
+
google_protobuf_supported?
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def native_cpu_time_supported?
|
|
13
|
+
require 'ddtrace/profiling/ext/cpu'
|
|
14
|
+
Ext::CPU.supported?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def google_protobuf_supported?
|
|
18
|
+
RUBY_PLATFORM != 'java' \
|
|
19
|
+
&& !Gem.loaded_specs['google-protobuf'].nil? \
|
|
20
|
+
&& Gem.loaded_specs['google-protobuf'].version >= GOOGLE_PROTOBUF_MINIMUM_VERSION \
|
|
21
|
+
&& !defined?(@failed_to_load_protobuf)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def load_profiling
|
|
25
|
+
require 'ddtrace/profiling/ext/cpu'
|
|
26
|
+
require 'ddtrace/profiling/ext/forking'
|
|
27
|
+
|
|
28
|
+
require 'ddtrace/profiling/collectors/stack'
|
|
29
|
+
require 'ddtrace/profiling/exporter'
|
|
30
|
+
require 'ddtrace/profiling/recorder'
|
|
31
|
+
require 'ddtrace/profiling/scheduler'
|
|
32
|
+
require 'ddtrace/profiling/tasks/setup'
|
|
33
|
+
require 'ddtrace/profiling/transport/io'
|
|
34
|
+
require 'ddtrace/profiling/transport/http'
|
|
35
|
+
require 'ddtrace/profiling/profiler'
|
|
36
|
+
|
|
37
|
+
begin
|
|
38
|
+
require 'ddtrace/profiling/pprof/pprof_pb' if google_protobuf_supported?
|
|
39
|
+
rescue LoadError => e
|
|
40
|
+
@failed_to_load_protobuf = true
|
|
41
|
+
Kernel.warn(
|
|
42
|
+
"[DDTRACE] Error while loading google-protobuf gem. Cause: '#{e.message}' Location: '#{e.backtrace.first}'. " \
|
|
43
|
+
'This can happen when google-protobuf is missing its native components. ' \
|
|
44
|
+
'To fix this, try removing and reinstalling the gem, forcing it to recompile the components: ' \
|
|
45
|
+
'`gem uninstall google-protobuf -a; BUNDLE_FORCE_RUBY_PLATFORM=true bundle install`. ' \
|
|
46
|
+
'If the error persists, please contact support via <https://docs.datadoghq.com/help/> or ' \
|
|
47
|
+
'file a bug at <https://github.com/DataDog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug>.'
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
load_profiling if supported?
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Datadog
|
|
2
|
+
module Profiling
|
|
3
|
+
# Entity class used to represent an entry in a stack trace.
|
|
4
|
+
# Its fields are a simplified struct version of `Thread::Backtrace::Location`.
|
|
5
|
+
class BacktraceLocation
|
|
6
|
+
attr_reader \
|
|
7
|
+
:base_label,
|
|
8
|
+
:lineno,
|
|
9
|
+
:path,
|
|
10
|
+
:hash
|
|
11
|
+
|
|
12
|
+
def initialize(
|
|
13
|
+
base_label,
|
|
14
|
+
lineno,
|
|
15
|
+
path
|
|
16
|
+
)
|
|
17
|
+
@base_label = base_label
|
|
18
|
+
@lineno = lineno
|
|
19
|
+
@path = path
|
|
20
|
+
@hash = [base_label, lineno, path].hash
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def ==(other)
|
|
24
|
+
hash == other.hash
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def eql?(other)
|
|
28
|
+
hash == other.hash
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require 'ddtrace/buffer'
|
|
2
|
+
require 'ddtrace/utils/string_table'
|
|
3
|
+
require 'ddtrace/utils/object_set'
|
|
4
|
+
|
|
5
|
+
module Datadog
|
|
6
|
+
module Profiling
|
|
7
|
+
# Profiling buffer that stores profiling events. The buffer has a maximum size and when
|
|
8
|
+
# the buffer is full, a random event is discarded. This class is thread-safe.
|
|
9
|
+
class Buffer < Datadog::ThreadSafeBuffer
|
|
10
|
+
def initialize(*args)
|
|
11
|
+
super
|
|
12
|
+
@caches = {}
|
|
13
|
+
@string_table = Utils::StringTable.new
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def cache(cache_name)
|
|
17
|
+
synchronize do
|
|
18
|
+
@caches[cache_name] ||= Utils::ObjectSet.new
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def string_table
|
|
23
|
+
synchronize do
|
|
24
|
+
@string_table
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
protected
|
|
29
|
+
|
|
30
|
+
def drain!
|
|
31
|
+
items = super
|
|
32
|
+
|
|
33
|
+
# Clear caches
|
|
34
|
+
@caches = {}
|
|
35
|
+
@string_table = Utils::StringTable.new
|
|
36
|
+
|
|
37
|
+
items
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
require 'ddtrace/profiling/backtrace_location'
|
|
2
|
+
require 'ddtrace/profiling/events/stack'
|
|
3
|
+
require 'ddtrace/utils/only_once'
|
|
4
|
+
require 'ddtrace/utils/time'
|
|
5
|
+
require 'ddtrace/worker'
|
|
6
|
+
require 'ddtrace/workers/polling'
|
|
7
|
+
|
|
8
|
+
module Datadog
|
|
9
|
+
module Profiling
|
|
10
|
+
module Collectors
|
|
11
|
+
# Collects stack trace samples from Ruby threads for both CPU-time (if available) and wall-clock.
|
|
12
|
+
# Runs on its own background thread.
|
|
13
|
+
#
|
|
14
|
+
class Stack < Worker
|
|
15
|
+
include Workers::Polling
|
|
16
|
+
|
|
17
|
+
DEFAULT_MAX_TIME_USAGE_PCT = 2.0
|
|
18
|
+
MIN_INTERVAL = 0.01
|
|
19
|
+
THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
|
|
20
|
+
|
|
21
|
+
attr_reader \
|
|
22
|
+
:recorder,
|
|
23
|
+
:max_frames,
|
|
24
|
+
:ignore_thread,
|
|
25
|
+
:max_time_usage_pct,
|
|
26
|
+
:thread_api
|
|
27
|
+
|
|
28
|
+
def initialize(
|
|
29
|
+
recorder,
|
|
30
|
+
max_frames: nil,
|
|
31
|
+
ignore_thread: nil,
|
|
32
|
+
max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
|
|
33
|
+
thread_api: Thread,
|
|
34
|
+
fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
|
|
35
|
+
interval: MIN_INTERVAL,
|
|
36
|
+
enabled: true
|
|
37
|
+
)
|
|
38
|
+
@recorder = recorder
|
|
39
|
+
# TODO: Make this a required named argument after we drop support for Ruby 2.0
|
|
40
|
+
@max_frames = max_frames || raise(ArgumentError, 'missing keyword :max_frames')
|
|
41
|
+
@ignore_thread = ignore_thread
|
|
42
|
+
@max_time_usage_pct = max_time_usage_pct
|
|
43
|
+
@thread_api = thread_api
|
|
44
|
+
|
|
45
|
+
# Workers::Async::Thread settings
|
|
46
|
+
self.fork_policy = fork_policy
|
|
47
|
+
|
|
48
|
+
# Workers::IntervalLoop settings
|
|
49
|
+
self.loop_base_interval = interval
|
|
50
|
+
|
|
51
|
+
# Workers::Polling settings
|
|
52
|
+
self.enabled = enabled
|
|
53
|
+
|
|
54
|
+
@warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def start
|
|
58
|
+
@last_wall_time = Datadog::Utils::Time.get_time
|
|
59
|
+
reset_cpu_time_tracking
|
|
60
|
+
perform
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def perform
|
|
64
|
+
collect_and_wait
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def loop_back_off?
|
|
68
|
+
false
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def collect_and_wait
|
|
72
|
+
run_time = Datadog::Utils::Time.measure do
|
|
73
|
+
collect_events
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Update wait time to throttle profiling
|
|
77
|
+
self.loop_wait_time = compute_wait_time(run_time)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def collect_events
|
|
81
|
+
events = []
|
|
82
|
+
|
|
83
|
+
# Compute wall time interval
|
|
84
|
+
current_wall_time = Datadog::Utils::Time.get_time
|
|
85
|
+
last_wall_time = if instance_variable_defined?(:@last_wall_time)
|
|
86
|
+
@last_wall_time
|
|
87
|
+
else
|
|
88
|
+
current_wall_time
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
wall_time_interval_ns = ((current_wall_time - last_wall_time).round(9) * 1e9).to_i
|
|
92
|
+
@last_wall_time = current_wall_time
|
|
93
|
+
|
|
94
|
+
# Collect backtraces from each thread
|
|
95
|
+
thread_api.list.each do |thread|
|
|
96
|
+
next unless thread.alive?
|
|
97
|
+
next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
|
|
98
|
+
|
|
99
|
+
event = collect_thread_event(thread, wall_time_interval_ns)
|
|
100
|
+
events << event unless event.nil?
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Send events to recorder
|
|
104
|
+
recorder.push(events) unless events.empty?
|
|
105
|
+
|
|
106
|
+
events
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def collect_thread_event(thread, wall_time_interval_ns)
|
|
110
|
+
locations = thread.backtrace_locations
|
|
111
|
+
return if locations.nil?
|
|
112
|
+
|
|
113
|
+
# Get actual stack size then trim the stack
|
|
114
|
+
stack_size = locations.length
|
|
115
|
+
locations = locations[0..(max_frames - 1)]
|
|
116
|
+
|
|
117
|
+
# Convert backtrace locations into structs
|
|
118
|
+
locations = convert_backtrace_locations(locations)
|
|
119
|
+
|
|
120
|
+
thread_id = thread.respond_to?(:native_thread_id) ? thread.native_thread_id : thread.object_id
|
|
121
|
+
trace_id, span_id = get_trace_identifiers(thread)
|
|
122
|
+
cpu_time = get_cpu_time_interval!(thread)
|
|
123
|
+
|
|
124
|
+
Events::StackSample.new(
|
|
125
|
+
nil,
|
|
126
|
+
locations,
|
|
127
|
+
stack_size,
|
|
128
|
+
thread_id,
|
|
129
|
+
trace_id,
|
|
130
|
+
span_id,
|
|
131
|
+
cpu_time,
|
|
132
|
+
wall_time_interval_ns
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def get_cpu_time_interval!(thread)
|
|
137
|
+
# Return if we can't get the current CPU time
|
|
138
|
+
unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
|
|
139
|
+
warn_about_missing_cpu_time_instrumentation(thread)
|
|
140
|
+
return
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
current_cpu_time_ns = thread.cpu_time(:nanosecond)
|
|
144
|
+
|
|
145
|
+
# NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
|
|
146
|
+
# initialization that needs to be done by the thread itself, and it's possible for us to try to sample
|
|
147
|
+
# *before* the thread had time to finish the initialization
|
|
148
|
+
return unless current_cpu_time_ns
|
|
149
|
+
|
|
150
|
+
last_cpu_time_ns = (thread[THREAD_LAST_CPU_TIME_KEY] || current_cpu_time_ns)
|
|
151
|
+
interval = current_cpu_time_ns - last_cpu_time_ns
|
|
152
|
+
|
|
153
|
+
# Update CPU time for thread
|
|
154
|
+
thread[THREAD_LAST_CPU_TIME_KEY] = current_cpu_time_ns
|
|
155
|
+
|
|
156
|
+
# Return interval
|
|
157
|
+
interval
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def get_trace_identifiers(thread)
|
|
161
|
+
return unless thread.is_a?(::Thread)
|
|
162
|
+
return unless Datadog.respond_to?(:tracer) && Datadog.tracer.respond_to?(:active_correlation)
|
|
163
|
+
|
|
164
|
+
identifier = Datadog.tracer.active_correlation(thread)
|
|
165
|
+
[identifier.trace_id, identifier.span_id]
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def compute_wait_time(used_time)
|
|
169
|
+
# We took used_time to get the last sample.
|
|
170
|
+
#
|
|
171
|
+
# What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
|
|
172
|
+
# spend working, how much is (100% - max_time_usage_pct) of the time?
|
|
173
|
+
#
|
|
174
|
+
# For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
|
|
175
|
+
# means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
|
|
176
|
+
used_time_ns = used_time * 1e9
|
|
177
|
+
interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
|
|
178
|
+
[interval / 1e9, MIN_INTERVAL].max
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Convert backtrace locations into structs
|
|
182
|
+
# Re-use old backtrace location objects if they already exist in the buffer
|
|
183
|
+
def convert_backtrace_locations(locations)
|
|
184
|
+
locations.collect do |location|
|
|
185
|
+
# Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
|
|
186
|
+
recorder[Events::StackSample].cache(:backtrace_locations).fetch(
|
|
187
|
+
# Function name
|
|
188
|
+
location.base_label,
|
|
189
|
+
# Line number
|
|
190
|
+
location.lineno,
|
|
191
|
+
# Filename
|
|
192
|
+
location.path,
|
|
193
|
+
# Build function
|
|
194
|
+
&method(:build_backtrace_location)
|
|
195
|
+
)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def build_backtrace_location(_id, base_label, lineno, path)
|
|
200
|
+
string_table = recorder[Events::StackSample].string_table
|
|
201
|
+
|
|
202
|
+
Profiling::BacktraceLocation.new(
|
|
203
|
+
string_table.fetch_string(base_label),
|
|
204
|
+
lineno,
|
|
205
|
+
string_table.fetch_string(path)
|
|
206
|
+
)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
private
|
|
210
|
+
|
|
211
|
+
def warn_about_missing_cpu_time_instrumentation(thread)
|
|
212
|
+
@warn_about_missing_cpu_time_instrumentation_only_once.run do
|
|
213
|
+
# Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
|
|
214
|
+
# missing on this thread we just found.
|
|
215
|
+
#
|
|
216
|
+
# As far as we know, it can be missing due to one the following:
|
|
217
|
+
#
|
|
218
|
+
# a) The thread was started before we installed our instrumentation.
|
|
219
|
+
# In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
|
|
220
|
+
#
|
|
221
|
+
# b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
|
|
222
|
+
# We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
|
|
223
|
+
#
|
|
224
|
+
# c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
|
|
225
|
+
# When threads are started using these APIs, there's a small time window during which the thread has started
|
|
226
|
+
# but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
|
|
227
|
+
# it to run and our instrumentation to be applied.
|
|
228
|
+
#
|
|
229
|
+
if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
|
|
230
|
+
Datadog.logger.debug("Detected thread ('#{thread}') with missing CPU profiling instrumentation.")
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
|
|
236
|
+
# clean up the per-thread cpu time counters we keep, so that the first sample after starting doesn't end up with:
|
|
237
|
+
#
|
|
238
|
+
# a) negative time: At least on my test docker container, and on the reliability environment, after the process
|
|
239
|
+
# forks, the clock reference changes and (old cpu time - new cpu time) can be < 0
|
|
240
|
+
#
|
|
241
|
+
# b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
|
|
242
|
+
# restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
|
|
243
|
+
#
|
|
244
|
+
# By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
|
|
245
|
+
def reset_cpu_time_tracking
|
|
246
|
+
thread_api.list.each do |thread|
|
|
247
|
+
thread[THREAD_LAST_CPU_TIME_KEY] = nil if thread[THREAD_LAST_CPU_TIME_KEY]
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|