ddtrace 0.47.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +4 -2
  3. data/.circleci/images/primary/Dockerfile-2.0.0 +11 -1
  4. data/.circleci/images/primary/Dockerfile-2.1.10 +11 -1
  5. data/.circleci/images/primary/Dockerfile-2.2.10 +11 -1
  6. data/.circleci/images/primary/Dockerfile-2.3.8 +10 -0
  7. data/.circleci/images/primary/Dockerfile-2.4.6 +10 -0
  8. data/.circleci/images/primary/Dockerfile-2.5.6 +10 -0
  9. data/.circleci/images/primary/Dockerfile-2.6.4 +10 -0
  10. data/.circleci/images/primary/Dockerfile-2.7.0 +10 -0
  11. data/.circleci/images/primary/Dockerfile-jruby-9.2-latest +10 -0
  12. data/.gitlab-ci.yml +18 -18
  13. data/.rubocop.yml +19 -0
  14. data/.rubocop_todo.yml +44 -3
  15. data/Appraisals +55 -1
  16. data/CHANGELOG.md +47 -1
  17. data/Gemfile +10 -0
  18. data/Rakefile +9 -0
  19. data/bin/ddtracerb +15 -0
  20. data/ddtrace.gemspec +4 -2
  21. data/docs/GettingStarted.md +36 -53
  22. data/docs/ProfilingDevelopment.md +88 -0
  23. data/integration/README.md +1 -2
  24. data/integration/apps/rack/Dockerfile +3 -0
  25. data/integration/apps/rack/script/build-images +1 -1
  26. data/integration/apps/rack/script/ci +1 -1
  27. data/integration/apps/rails-five/script/build-images +1 -1
  28. data/integration/apps/rails-five/script/ci +1 -1
  29. data/integration/apps/ruby/script/build-images +1 -1
  30. data/integration/apps/ruby/script/ci +1 -1
  31. data/integration/images/include/http-health-check +1 -1
  32. data/integration/images/wrk/scripts/entrypoint.sh +1 -1
  33. data/integration/script/build-images +1 -1
  34. data/lib/ddtrace.rb +1 -0
  35. data/lib/ddtrace/configuration.rb +39 -13
  36. data/lib/ddtrace/configuration/components.rb +85 -3
  37. data/lib/ddtrace/configuration/settings.rb +31 -0
  38. data/lib/ddtrace/contrib/active_record/configuration/makara_resolver.rb +30 -0
  39. data/lib/ddtrace/contrib/active_record/configuration/resolver.rb +9 -3
  40. data/lib/ddtrace/contrib/resque/configuration/settings.rb +17 -1
  41. data/lib/ddtrace/contrib/resque/patcher.rb +4 -4
  42. data/lib/ddtrace/contrib/resque/resque_job.rb +22 -1
  43. data/lib/ddtrace/contrib/shoryuken/configuration/settings.rb +1 -0
  44. data/lib/ddtrace/contrib/shoryuken/tracer.rb +7 -3
  45. data/lib/ddtrace/diagnostics/environment_logger.rb +1 -1
  46. data/lib/ddtrace/error.rb +2 -0
  47. data/lib/ddtrace/ext/profiling.rb +52 -0
  48. data/lib/ddtrace/ext/transport.rb +1 -0
  49. data/lib/ddtrace/metrics.rb +4 -0
  50. data/lib/ddtrace/profiling.rb +54 -0
  51. data/lib/ddtrace/profiling/backtrace_location.rb +32 -0
  52. data/lib/ddtrace/profiling/buffer.rb +41 -0
  53. data/lib/ddtrace/profiling/collectors/stack.rb +253 -0
  54. data/lib/ddtrace/profiling/encoding/profile.rb +31 -0
  55. data/lib/ddtrace/profiling/event.rb +13 -0
  56. data/lib/ddtrace/profiling/events/stack.rb +102 -0
  57. data/lib/ddtrace/profiling/exporter.rb +23 -0
  58. data/lib/ddtrace/profiling/ext/cpu.rb +54 -0
  59. data/lib/ddtrace/profiling/ext/cthread.rb +134 -0
  60. data/lib/ddtrace/profiling/ext/forking.rb +97 -0
  61. data/lib/ddtrace/profiling/flush.rb +41 -0
  62. data/lib/ddtrace/profiling/pprof/builder.rb +121 -0
  63. data/lib/ddtrace/profiling/pprof/converter.rb +85 -0
  64. data/lib/ddtrace/profiling/pprof/message_set.rb +12 -0
  65. data/lib/ddtrace/profiling/pprof/payload.rb +18 -0
  66. data/lib/ddtrace/profiling/pprof/pprof.proto +212 -0
  67. data/lib/ddtrace/profiling/pprof/pprof_pb.rb +81 -0
  68. data/lib/ddtrace/profiling/pprof/stack_sample.rb +90 -0
  69. data/lib/ddtrace/profiling/pprof/string_table.rb +10 -0
  70. data/lib/ddtrace/profiling/pprof/template.rb +114 -0
  71. data/lib/ddtrace/profiling/preload.rb +3 -0
  72. data/lib/ddtrace/profiling/profiler.rb +28 -0
  73. data/lib/ddtrace/profiling/recorder.rb +87 -0
  74. data/lib/ddtrace/profiling/scheduler.rb +84 -0
  75. data/lib/ddtrace/profiling/tasks/setup.rb +77 -0
  76. data/lib/ddtrace/profiling/transport/client.rb +12 -0
  77. data/lib/ddtrace/profiling/transport/http.rb +122 -0
  78. data/lib/ddtrace/profiling/transport/http/api.rb +43 -0
  79. data/lib/ddtrace/profiling/transport/http/api/endpoint.rb +90 -0
  80. data/lib/ddtrace/profiling/transport/http/api/instance.rb +36 -0
  81. data/lib/ddtrace/profiling/transport/http/api/spec.rb +40 -0
  82. data/lib/ddtrace/profiling/transport/http/builder.rb +28 -0
  83. data/lib/ddtrace/profiling/transport/http/client.rb +33 -0
  84. data/lib/ddtrace/profiling/transport/http/response.rb +21 -0
  85. data/lib/ddtrace/profiling/transport/io.rb +30 -0
  86. data/lib/ddtrace/profiling/transport/io/client.rb +27 -0
  87. data/lib/ddtrace/profiling/transport/io/response.rb +16 -0
  88. data/lib/ddtrace/profiling/transport/parcel.rb +17 -0
  89. data/lib/ddtrace/profiling/transport/request.rb +15 -0
  90. data/lib/ddtrace/profiling/transport/response.rb +8 -0
  91. data/lib/ddtrace/runtime/container.rb +11 -3
  92. data/lib/ddtrace/sampling/rule_sampler.rb +3 -9
  93. data/lib/ddtrace/tasks/exec.rb +48 -0
  94. data/lib/ddtrace/tasks/help.rb +14 -0
  95. data/lib/ddtrace/tracer.rb +21 -0
  96. data/lib/ddtrace/transport/io/client.rb +15 -8
  97. data/lib/ddtrace/transport/parcel.rb +4 -0
  98. data/lib/ddtrace/version.rb +3 -1
  99. data/lib/ddtrace/workers/runtime_metrics.rb +14 -1
  100. metadata +70 -9
@@ -0,0 +1,30 @@
1
+ module Datadog
2
+ module Contrib
3
+ module ActiveRecord
4
+ module Configuration
5
+ # The `makara` gem has the concept of **role**, which can be
6
+ # inferred from the configuration `name`, in the form of:
7
+ # `master/0`, `replica/0`, `replica/1`, etc.
8
+ # The first part of this string is the database role.
9
+ #
10
+ # This allows the matching of a connection based on its role,
11
+ # instead of connection-specific information.
12
+ module MakaraResolver
13
+ def normalize_for_config(active_record_config)
14
+ hash = super
15
+ hash[:makara_role] = active_record_config[:makara_role]
16
+ hash
17
+ end
18
+
19
+ def normalize_for_resolve(active_record_config)
20
+ hash = super
21
+
22
+ hash[:makara_role] = active_record_config[:name].split('/')[0].to_s if active_record_config[:name].is_a?(String)
23
+
24
+ hash
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,5 +1,6 @@
1
1
  require 'ddtrace/contrib/configuration/resolver'
2
2
  require 'ddtrace/vendor/active_record/connection_specification'
3
+ require_relative 'makara_resolver'
3
4
 
4
5
  module Datadog
5
6
  module Contrib
@@ -28,6 +29,8 @@ module Datadog
28
29
  # When more than one configuration could be matched, the last one to match is selected,
29
30
  # based on addition order (`#add`).
30
31
  class Resolver < Contrib::Configuration::Resolver
32
+ prepend MakaraResolver
33
+
31
34
  def initialize(active_record_configuration = nil)
32
35
  super()
33
36
 
@@ -50,7 +53,7 @@ module Datadog
50
53
  def resolve(db_config)
51
54
  active_record_config = resolve_connection_key(db_config).symbolize_keys
52
55
 
53
- hash = normalize(active_record_config)
56
+ hash = normalize_for_resolve(active_record_config)
54
57
 
55
58
  # Hashes in Ruby maintain insertion order
56
59
  _, config = @configurations.reverse_each.find do |matcher, _|
@@ -73,7 +76,7 @@ module Datadog
73
76
 
74
77
  def parse_matcher(matcher)
75
78
  resolved_pattern = resolve_connection_key(matcher).symbolize_keys
76
- normalized = normalize(resolved_pattern)
79
+ normalized = normalize_for_config(resolved_pattern)
77
80
 
78
81
  # Remove empty fields to allow for partial matching
79
82
  normalized.reject! { |_, v| v.nil? }
@@ -112,7 +115,7 @@ module Datadog
112
115
 
113
116
  # Extract only fields we'd like to match
114
117
  # from the ActiveRecord configuration.
115
- def normalize(active_record_config)
118
+ def normalize_for_config(active_record_config)
116
119
  {
117
120
  adapter: active_record_config[:adapter],
118
121
  host: active_record_config[:host],
@@ -121,6 +124,9 @@ module Datadog
121
124
  username: active_record_config[:username]
122
125
  }
123
126
  end
127
+
128
+ # Both resolvers perform the same operations for this implementation, but can be specialized
129
+ alias_method :normalize_for_resolve, :normalize_for_config
124
130
  end
125
131
  end
126
132
  end
@@ -23,7 +23,23 @@ module Datadog
23
23
  end
24
24
 
25
25
  option :service_name, default: Ext::SERVICE_NAME
26
- option :workers, default: []
26
+
27
+ # A list Ruby worker classes to be instrumented.
28
+ # The value of `nil` has special semantics: it instruments all workers dynamically.
29
+ #
30
+ # TODO: 1.0: Automatic patching should be the default behavior.
31
+ # We should not provide this option in the future,
32
+ # as our integrations should always instrument all possible scenarios when feasible.
33
+ option :workers, default: nil do |o|
34
+ o.on_set do |value|
35
+ unless value.nil?
36
+ Datadog.logger.warn(
37
+ "DEPRECATED: Resque integration now instruments all workers. \n" \
38
+ 'The `workers:` option is unnecessary and will be removed in the future.'
39
+ )
40
+ end
41
+ end
42
+ end
27
43
  option :error_handler, default: Datadog::Tracer::DEFAULT_ON_ERROR
28
44
  end
29
45
  end
@@ -17,11 +17,11 @@ module Datadog
17
17
 
18
18
  def patch
19
19
  require_relative 'resque_job'
20
- get_option(:workers).each { |worker| worker.extend(ResqueJob) }
21
- end
22
20
 
23
- def get_option(option)
24
- Datadog.configuration[:resque].get_option(option)
21
+ ::Resque::Job.send(:prepend, Resque::Job)
22
+
23
+ workers = Datadog.configuration[:resque][:workers] || []
24
+ workers.each { |worker| worker.extend(ResqueJob) }
25
25
  end
26
26
  end
27
27
  end
@@ -7,9 +7,30 @@ require 'resque'
7
7
  module Datadog
8
8
  module Contrib
9
9
  module Resque
10
+ # Automatically configures jobs with {ResqueJob} plugin.
11
+ module Job
12
+ def perform
13
+ if Datadog.configuration[:resque][:workers].nil?
14
+ job = payload_class
15
+ job.extend(Datadog::Contrib::Resque::ResqueJob) unless job.is_a? Datadog::Contrib::Resque::ResqueJob
16
+ end
17
+ ensure
18
+ super
19
+ end
20
+ end
21
+
10
22
  # Uses Resque job hooks to create traces
11
23
  module ResqueJob
12
- def around_perform(*args)
24
+ # `around_perform` hooks are executed in alphabetical order.
25
+ # we use the lowest printable character that allows for an inline
26
+ # method definition ('0'), alongside our naming prefix for identification.
27
+ #
28
+ # We could, in theory, use any character (e.g "\x00"), but this will lead
29
+ # to unreadable stack traces that contain this method call.
30
+ #
31
+ # We could also just use `around_perform` but this might override the user's
32
+ # own method.
33
+ def around_perform0_ddtrace(*args)
13
34
  return yield unless datadog_configuration && tracer
14
35
 
15
36
  tracer.trace(Ext::SPAN_JOB, span_options) do |span|
@@ -23,6 +23,7 @@ module Datadog
23
23
 
24
24
  option :service_name, default: Ext::SERVICE_NAME
25
25
  option :error_handler, default: Datadog::Tracer::DEFAULT_ON_ERROR
26
+ option :tag_body, default: false
26
27
  end
27
28
  end
28
29
  end
@@ -12,8 +12,12 @@ module Datadog
12
12
  end
13
13
 
14
14
  def call(worker_instance, queue, sqs_msg, body)
15
- @tracer.trace(Ext::SPAN_JOB, service: @shoryuken_service, span_type: Datadog::Ext::AppTypes::WORKER,
16
- on_error: @error_handler) do |span|
15
+ @tracer.trace(
16
+ Ext::SPAN_JOB,
17
+ service: @shoryuken_service,
18
+ span_type: Datadog::Ext::AppTypes::WORKER,
19
+ on_error: @error_handler
20
+ ) do |span|
17
21
  # Set analytics sample rate
18
22
  if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
19
23
  Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
@@ -26,7 +30,7 @@ module Datadog
26
30
  span.set_tag(Ext::TAG_JOB_ID, sqs_msg.message_id)
27
31
  span.set_tag(Ext::TAG_JOB_QUEUE, queue)
28
32
  span.set_tag(Ext::TAG_JOB_ATTRIBUTES, sqs_msg.attributes) if sqs_msg.respond_to?(:attributes)
29
- span.set_tag(Ext::TAG_JOB_BODY, body)
33
+ span.set_tag(Ext::TAG_JOB_BODY, body) if configuration[:tag_body]
30
34
 
31
35
  yield
32
36
  end
@@ -12,7 +12,7 @@ module Datadog
12
12
  # Outputs environment information to {Datadog.logger}.
13
13
  # Executes only for the lifetime of the program.
14
14
  def log!(transport_responses)
15
- return if @executed || !log?
15
+ return if (defined?(@executed) && @executed) || !log?
16
16
 
17
17
  @executed = true
18
18
 
data/lib/ddtrace/error.rb CHANGED
@@ -16,6 +16,8 @@ module Datadog
16
16
 
17
17
  def initialize(type = nil, message = nil, backtrace = nil)
18
18
  backtrace = Array(backtrace).join("\n")
19
+
20
+ # DEV: We should measure if `Utils.utf8_encode` is still needed in practice.
19
21
  @type = Utils.utf8_encode(type)
20
22
  @message = Utils.utf8_encode(message)
21
23
  @backtrace = Utils.utf8_encode(backtrace)
@@ -0,0 +1,52 @@
1
+ module Datadog
2
+ module Ext
3
+ module Profiling
4
+ ENV_ENABLED = 'DD_PROFILING_ENABLED'.freeze
5
+ ENV_UPLOAD_TIMEOUT = 'DD_PROFILING_UPLOAD_TIMEOUT'.freeze
6
+ ENV_MAX_FRAMES = 'DD_PROFILING_MAX_FRAMES'.freeze
7
+
8
+ module Pprof
9
+ LABEL_KEY_SPAN_ID = 'span id'.freeze
10
+ LABEL_KEY_THREAD_ID = 'thread id'.freeze
11
+ LABEL_KEY_TRACE_ID = 'trace id'.freeze
12
+ SAMPLE_VALUE_NO_VALUE = 0
13
+ VALUE_TYPE_CPU = 'cpu-time'.freeze
14
+ VALUE_TYPE_WALL = 'wall-time'.freeze
15
+ VALUE_UNIT_NANOSECONDS = 'nanoseconds'.freeze
16
+ end
17
+
18
+ module Transport
19
+ module HTTP
20
+ URI_TEMPLATE_DD_API = 'https://intake.profile.%s/'.freeze
21
+
22
+ FORM_FIELD_DATA = 'data[0]'.freeze
23
+ FORM_FIELD_FORMAT = 'format'.freeze
24
+ FORM_FIELD_FORMAT_PPROF = 'pprof'.freeze
25
+ FORM_FIELD_RECORDING_END = 'recording-end'.freeze
26
+ FORM_FIELD_RECORDING_START = 'recording-start'.freeze
27
+ FORM_FIELD_RUNTIME = 'runtime'.freeze
28
+ FORM_FIELD_RUNTIME_ID = 'runtime-id'.freeze
29
+ FORM_FIELD_TAG_ENV = 'env'.freeze
30
+ FORM_FIELD_TAG_HOST = 'host'.freeze
31
+ FORM_FIELD_TAG_LANGUAGE = 'language'.freeze
32
+ FORM_FIELD_TAG_PROFILER_VERSION = 'profiler_version'.freeze
33
+ FORM_FIELD_TAG_RUNTIME = 'runtime'.freeze
34
+ FORM_FIELD_TAG_RUNTIME_ENGINE = 'runtime_engine'.freeze
35
+ FORM_FIELD_TAG_RUNTIME_ID = 'runtime-id'.freeze
36
+ FORM_FIELD_TAG_RUNTIME_PLATFORM = 'runtime_platform'.freeze
37
+ FORM_FIELD_TAG_RUNTIME_VERSION = 'runtime_version'.freeze
38
+ FORM_FIELD_TAG_SERVICE = 'service'.freeze
39
+ FORM_FIELD_TAG_VERSION = 'version'.freeze
40
+ FORM_FIELD_TAGS = 'tags'.freeze
41
+ FORM_FIELD_TYPES = 'types[0]'.freeze
42
+ FORM_FIELD_TYPES_AUTO = 'auto'.freeze
43
+
44
+ HEADER_CONTENT_TYPE = 'Content-Type'.freeze
45
+ HEADER_CONTENT_TYPE_OCTET_STREAM = 'application/octet-stream'.freeze
46
+
47
+ PPROF_DEFAULT_FILENAME = 'profile.pb.gz'.freeze
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -8,6 +8,7 @@ module Datadog
8
8
  ENV_DEFAULT_PORT = 'DD_TRACE_AGENT_PORT'.freeze
9
9
  ENV_DEFAULT_URL = 'DD_TRACE_AGENT_URL'.freeze
10
10
  HEADER_CONTAINER_ID = 'Datadog-Container-ID'.freeze
11
+ HEADER_DD_API_KEY = 'DD-API-KEY'.freeze
11
12
  HEADER_META_LANG = 'Datadog-Meta-Lang'.freeze
12
13
  HEADER_META_LANG_VERSION = 'Datadog-Meta-Lang-Version'.freeze
13
14
  HEADER_META_LANG_INTERPRETER = 'Datadog-Meta-Lang-Interpreter'.freeze
@@ -120,6 +120,10 @@ module Datadog
120
120
  metrics.each { |m| send(m.type, *[m.name, m.value, m.options].compact) }
121
121
  end
122
122
 
123
+ def close
124
+ @statsd.close if @statsd && @statsd.respond_to?(:close)
125
+ end
126
+
123
127
  Metric = Struct.new(:type, :name, :value, :options) do
124
128
  def initialize(*args)
125
129
  super
@@ -0,0 +1,54 @@
1
+ module Datadog
2
+ # Contains profiler for generating stack profiles, etc.
3
+ module Profiling
4
+ module_function
5
+
6
+ GOOGLE_PROTOBUF_MINIMUM_VERSION = Gem::Version.new('3.0')
7
+
8
+ def supported?
9
+ google_protobuf_supported?
10
+ end
11
+
12
+ def native_cpu_time_supported?
13
+ require 'ddtrace/profiling/ext/cpu'
14
+ Ext::CPU.supported?
15
+ end
16
+
17
+ def google_protobuf_supported?
18
+ RUBY_PLATFORM != 'java' \
19
+ && !Gem.loaded_specs['google-protobuf'].nil? \
20
+ && Gem.loaded_specs['google-protobuf'].version >= GOOGLE_PROTOBUF_MINIMUM_VERSION \
21
+ && !defined?(@failed_to_load_protobuf)
22
+ end
23
+
24
+ def load_profiling
25
+ require 'ddtrace/profiling/ext/cpu'
26
+ require 'ddtrace/profiling/ext/forking'
27
+
28
+ require 'ddtrace/profiling/collectors/stack'
29
+ require 'ddtrace/profiling/exporter'
30
+ require 'ddtrace/profiling/recorder'
31
+ require 'ddtrace/profiling/scheduler'
32
+ require 'ddtrace/profiling/tasks/setup'
33
+ require 'ddtrace/profiling/transport/io'
34
+ require 'ddtrace/profiling/transport/http'
35
+ require 'ddtrace/profiling/profiler'
36
+
37
+ begin
38
+ require 'ddtrace/profiling/pprof/pprof_pb' if google_protobuf_supported?
39
+ rescue LoadError => e
40
+ @failed_to_load_protobuf = true
41
+ Kernel.warn(
42
+ "[DDTRACE] Error while loading google-protobuf gem. Cause: '#{e.message}' Location: '#{e.backtrace.first}'. " \
43
+ 'This can happen when google-protobuf is missing its native components. ' \
44
+ 'To fix this, try removing and reinstalling the gem, forcing it to recompile the components: ' \
45
+ '`gem uninstall google-protobuf -a; BUNDLE_FORCE_RUBY_PLATFORM=true bundle install`. ' \
46
+ 'If the error persists, please contact support via <https://docs.datadoghq.com/help/> or ' \
47
+ 'file a bug at <https://github.com/DataDog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug>.'
48
+ )
49
+ end
50
+ end
51
+
52
+ load_profiling if supported?
53
+ end
54
+ end
@@ -0,0 +1,32 @@
1
+ module Datadog
2
+ module Profiling
3
+ # Entity class used to represent an entry in a stack trace.
4
+ # Its fields are a simplified struct version of `Thread::Backtrace::Location`.
5
+ class BacktraceLocation
6
+ attr_reader \
7
+ :base_label,
8
+ :lineno,
9
+ :path,
10
+ :hash
11
+
12
+ def initialize(
13
+ base_label,
14
+ lineno,
15
+ path
16
+ )
17
+ @base_label = base_label
18
+ @lineno = lineno
19
+ @path = path
20
+ @hash = [base_label, lineno, path].hash
21
+ end
22
+
23
+ def ==(other)
24
+ hash == other.hash
25
+ end
26
+
27
+ def eql?(other)
28
+ hash == other.hash
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ require 'ddtrace/buffer'
2
+ require 'ddtrace/utils/string_table'
3
+ require 'ddtrace/utils/object_set'
4
+
5
+ module Datadog
6
+ module Profiling
7
+ # Profiling buffer that stores profiling events. The buffer has a maximum size and when
8
+ # the buffer is full, a random event is discarded. This class is thread-safe.
9
+ class Buffer < Datadog::ThreadSafeBuffer
10
+ def initialize(*args)
11
+ super
12
+ @caches = {}
13
+ @string_table = Utils::StringTable.new
14
+ end
15
+
16
+ def cache(cache_name)
17
+ synchronize do
18
+ @caches[cache_name] ||= Utils::ObjectSet.new
19
+ end
20
+ end
21
+
22
+ def string_table
23
+ synchronize do
24
+ @string_table
25
+ end
26
+ end
27
+
28
+ protected
29
+
30
+ def drain!
31
+ items = super
32
+
33
+ # Clear caches
34
+ @caches = {}
35
+ @string_table = Utils::StringTable.new
36
+
37
+ items
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,253 @@
1
+ require 'ddtrace/profiling/backtrace_location'
2
+ require 'ddtrace/profiling/events/stack'
3
+ require 'ddtrace/utils/only_once'
4
+ require 'ddtrace/utils/time'
5
+ require 'ddtrace/worker'
6
+ require 'ddtrace/workers/polling'
7
+
8
+ module Datadog
9
+ module Profiling
10
+ module Collectors
11
+ # Collects stack trace samples from Ruby threads for both CPU-time (if available) and wall-clock.
12
+ # Runs on its own background thread.
13
+ #
14
+ class Stack < Worker
15
+ include Workers::Polling
16
+
17
+ DEFAULT_MAX_TIME_USAGE_PCT = 2.0
18
+ MIN_INTERVAL = 0.01
19
+ THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
20
+
21
+ attr_reader \
22
+ :recorder,
23
+ :max_frames,
24
+ :ignore_thread,
25
+ :max_time_usage_pct,
26
+ :thread_api
27
+
28
+ def initialize(
29
+ recorder,
30
+ max_frames: nil,
31
+ ignore_thread: nil,
32
+ max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
33
+ thread_api: Thread,
34
+ fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
35
+ interval: MIN_INTERVAL,
36
+ enabled: true
37
+ )
38
+ @recorder = recorder
39
+ # TODO: Make this a required named argument after we drop support for Ruby 2.0
40
+ @max_frames = max_frames || raise(ArgumentError, 'missing keyword :max_frames')
41
+ @ignore_thread = ignore_thread
42
+ @max_time_usage_pct = max_time_usage_pct
43
+ @thread_api = thread_api
44
+
45
+ # Workers::Async::Thread settings
46
+ self.fork_policy = fork_policy
47
+
48
+ # Workers::IntervalLoop settings
49
+ self.loop_base_interval = interval
50
+
51
+ # Workers::Polling settings
52
+ self.enabled = enabled
53
+
54
+ @warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
55
+ end
56
+
57
+ def start
58
+ @last_wall_time = Datadog::Utils::Time.get_time
59
+ reset_cpu_time_tracking
60
+ perform
61
+ end
62
+
63
+ def perform
64
+ collect_and_wait
65
+ end
66
+
67
+ def loop_back_off?
68
+ false
69
+ end
70
+
71
+ def collect_and_wait
72
+ run_time = Datadog::Utils::Time.measure do
73
+ collect_events
74
+ end
75
+
76
+ # Update wait time to throttle profiling
77
+ self.loop_wait_time = compute_wait_time(run_time)
78
+ end
79
+
80
+ def collect_events
81
+ events = []
82
+
83
+ # Compute wall time interval
84
+ current_wall_time = Datadog::Utils::Time.get_time
85
+ last_wall_time = if instance_variable_defined?(:@last_wall_time)
86
+ @last_wall_time
87
+ else
88
+ current_wall_time
89
+ end
90
+
91
+ wall_time_interval_ns = ((current_wall_time - last_wall_time).round(9) * 1e9).to_i
92
+ @last_wall_time = current_wall_time
93
+
94
+ # Collect backtraces from each thread
95
+ thread_api.list.each do |thread|
96
+ next unless thread.alive?
97
+ next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
98
+
99
+ event = collect_thread_event(thread, wall_time_interval_ns)
100
+ events << event unless event.nil?
101
+ end
102
+
103
+ # Send events to recorder
104
+ recorder.push(events) unless events.empty?
105
+
106
+ events
107
+ end
108
+
109
+ def collect_thread_event(thread, wall_time_interval_ns)
110
+ locations = thread.backtrace_locations
111
+ return if locations.nil?
112
+
113
+ # Get actual stack size then trim the stack
114
+ stack_size = locations.length
115
+ locations = locations[0..(max_frames - 1)]
116
+
117
+ # Convert backtrace locations into structs
118
+ locations = convert_backtrace_locations(locations)
119
+
120
+ thread_id = thread.respond_to?(:native_thread_id) ? thread.native_thread_id : thread.object_id
121
+ trace_id, span_id = get_trace_identifiers(thread)
122
+ cpu_time = get_cpu_time_interval!(thread)
123
+
124
+ Events::StackSample.new(
125
+ nil,
126
+ locations,
127
+ stack_size,
128
+ thread_id,
129
+ trace_id,
130
+ span_id,
131
+ cpu_time,
132
+ wall_time_interval_ns
133
+ )
134
+ end
135
+
136
+ def get_cpu_time_interval!(thread)
137
+ # Return if we can't get the current CPU time
138
+ unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
139
+ warn_about_missing_cpu_time_instrumentation(thread)
140
+ return
141
+ end
142
+
143
+ current_cpu_time_ns = thread.cpu_time(:nanosecond)
144
+
145
+ # NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
146
+ # initialization that needs to be done by the thread itself, and it's possible for us to try to sample
147
+ # *before* the thread had time to finish the initialization
148
+ return unless current_cpu_time_ns
149
+
150
+ last_cpu_time_ns = (thread[THREAD_LAST_CPU_TIME_KEY] || current_cpu_time_ns)
151
+ interval = current_cpu_time_ns - last_cpu_time_ns
152
+
153
+ # Update CPU time for thread
154
+ thread[THREAD_LAST_CPU_TIME_KEY] = current_cpu_time_ns
155
+
156
+ # Return interval
157
+ interval
158
+ end
159
+
160
+ def get_trace_identifiers(thread)
161
+ return unless thread.is_a?(::Thread)
162
+ return unless Datadog.respond_to?(:tracer) && Datadog.tracer.respond_to?(:active_correlation)
163
+
164
+ identifier = Datadog.tracer.active_correlation(thread)
165
+ [identifier.trace_id, identifier.span_id]
166
+ end
167
+
168
+ def compute_wait_time(used_time)
169
+ # We took used_time to get the last sample.
170
+ #
171
+ # What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
172
+ # spend working, how much is (100% - max_time_usage_pct) of the time?
173
+ #
174
+ # For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
175
+ # means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
176
+ used_time_ns = used_time * 1e9
177
+ interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
178
+ [interval / 1e9, MIN_INTERVAL].max
179
+ end
180
+
181
+ # Convert backtrace locations into structs
182
+ # Re-use old backtrace location objects if they already exist in the buffer
183
+ def convert_backtrace_locations(locations)
184
+ locations.collect do |location|
185
+ # Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
186
+ recorder[Events::StackSample].cache(:backtrace_locations).fetch(
187
+ # Function name
188
+ location.base_label,
189
+ # Line number
190
+ location.lineno,
191
+ # Filename
192
+ location.path,
193
+ # Build function
194
+ &method(:build_backtrace_location)
195
+ )
196
+ end
197
+ end
198
+
199
+ def build_backtrace_location(_id, base_label, lineno, path)
200
+ string_table = recorder[Events::StackSample].string_table
201
+
202
+ Profiling::BacktraceLocation.new(
203
+ string_table.fetch_string(base_label),
204
+ lineno,
205
+ string_table.fetch_string(path)
206
+ )
207
+ end
208
+
209
+ private
210
+
211
+ def warn_about_missing_cpu_time_instrumentation(thread)
212
+ @warn_about_missing_cpu_time_instrumentation_only_once.run do
213
+ # Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
214
+ # missing on this thread we just found.
215
+ #
216
+ # As far as we know, it can be missing due to one the following:
217
+ #
218
+ # a) The thread was started before we installed our instrumentation.
219
+ # In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
220
+ #
221
+ # b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
222
+ # We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
223
+ #
224
+ # c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
225
+ # When threads are started using these APIs, there's a small time window during which the thread has started
226
+ # but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
227
+ # it to run and our instrumentation to be applied.
228
+ #
229
+ if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
230
+ Datadog.logger.debug("Detected thread ('#{thread}') with missing CPU profiling instrumentation.")
231
+ end
232
+ end
233
+ end
234
+
235
+ # If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
236
+ # clean up the per-thread cpu time counters we keep, so that the first sample after starting doesn't end up with:
237
+ #
238
+ # a) negative time: At least on my test docker container, and on the reliability environment, after the process
239
+ # forks, the clock reference changes and (old cpu time - new cpu time) can be < 0
240
+ #
241
+ # b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
242
+ # restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
243
+ #
244
+ # By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
245
+ def reset_cpu_time_tracking
246
+ thread_api.list.each do |thread|
247
+ thread[THREAD_LAST_CPU_TIME_KEY] = nil if thread[THREAD_LAST_CPU_TIME_KEY]
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end
253
+ end