ddtrace 0.47.0 → 0.48.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +4 -2
  3. data/.circleci/images/primary/Dockerfile-2.0.0 +11 -1
  4. data/.circleci/images/primary/Dockerfile-2.1.10 +11 -1
  5. data/.circleci/images/primary/Dockerfile-2.2.10 +11 -1
  6. data/.circleci/images/primary/Dockerfile-2.3.8 +10 -0
  7. data/.circleci/images/primary/Dockerfile-2.4.6 +10 -0
  8. data/.circleci/images/primary/Dockerfile-2.5.6 +10 -0
  9. data/.circleci/images/primary/Dockerfile-2.6.4 +10 -0
  10. data/.circleci/images/primary/Dockerfile-2.7.0 +10 -0
  11. data/.circleci/images/primary/Dockerfile-jruby-9.2-latest +10 -0
  12. data/.gitlab-ci.yml +18 -18
  13. data/.rubocop.yml +19 -0
  14. data/.rubocop_todo.yml +44 -3
  15. data/Appraisals +55 -1
  16. data/CHANGELOG.md +47 -1
  17. data/Gemfile +10 -0
  18. data/Rakefile +9 -0
  19. data/bin/ddtracerb +15 -0
  20. data/ddtrace.gemspec +4 -2
  21. data/docs/GettingStarted.md +36 -53
  22. data/docs/ProfilingDevelopment.md +88 -0
  23. data/integration/README.md +1 -2
  24. data/integration/apps/rack/Dockerfile +3 -0
  25. data/integration/apps/rack/script/build-images +1 -1
  26. data/integration/apps/rack/script/ci +1 -1
  27. data/integration/apps/rails-five/script/build-images +1 -1
  28. data/integration/apps/rails-five/script/ci +1 -1
  29. data/integration/apps/ruby/script/build-images +1 -1
  30. data/integration/apps/ruby/script/ci +1 -1
  31. data/integration/images/include/http-health-check +1 -1
  32. data/integration/images/wrk/scripts/entrypoint.sh +1 -1
  33. data/integration/script/build-images +1 -1
  34. data/lib/ddtrace.rb +1 -0
  35. data/lib/ddtrace/configuration.rb +39 -13
  36. data/lib/ddtrace/configuration/components.rb +85 -3
  37. data/lib/ddtrace/configuration/settings.rb +31 -0
  38. data/lib/ddtrace/contrib/active_record/configuration/makara_resolver.rb +30 -0
  39. data/lib/ddtrace/contrib/active_record/configuration/resolver.rb +9 -3
  40. data/lib/ddtrace/contrib/resque/configuration/settings.rb +17 -1
  41. data/lib/ddtrace/contrib/resque/patcher.rb +4 -4
  42. data/lib/ddtrace/contrib/resque/resque_job.rb +22 -1
  43. data/lib/ddtrace/contrib/shoryuken/configuration/settings.rb +1 -0
  44. data/lib/ddtrace/contrib/shoryuken/tracer.rb +7 -3
  45. data/lib/ddtrace/diagnostics/environment_logger.rb +1 -1
  46. data/lib/ddtrace/error.rb +2 -0
  47. data/lib/ddtrace/ext/profiling.rb +52 -0
  48. data/lib/ddtrace/ext/transport.rb +1 -0
  49. data/lib/ddtrace/metrics.rb +4 -0
  50. data/lib/ddtrace/profiling.rb +54 -0
  51. data/lib/ddtrace/profiling/backtrace_location.rb +32 -0
  52. data/lib/ddtrace/profiling/buffer.rb +41 -0
  53. data/lib/ddtrace/profiling/collectors/stack.rb +253 -0
  54. data/lib/ddtrace/profiling/encoding/profile.rb +31 -0
  55. data/lib/ddtrace/profiling/event.rb +13 -0
  56. data/lib/ddtrace/profiling/events/stack.rb +102 -0
  57. data/lib/ddtrace/profiling/exporter.rb +23 -0
  58. data/lib/ddtrace/profiling/ext/cpu.rb +54 -0
  59. data/lib/ddtrace/profiling/ext/cthread.rb +134 -0
  60. data/lib/ddtrace/profiling/ext/forking.rb +97 -0
  61. data/lib/ddtrace/profiling/flush.rb +41 -0
  62. data/lib/ddtrace/profiling/pprof/builder.rb +121 -0
  63. data/lib/ddtrace/profiling/pprof/converter.rb +85 -0
  64. data/lib/ddtrace/profiling/pprof/message_set.rb +12 -0
  65. data/lib/ddtrace/profiling/pprof/payload.rb +18 -0
  66. data/lib/ddtrace/profiling/pprof/pprof.proto +212 -0
  67. data/lib/ddtrace/profiling/pprof/pprof_pb.rb +81 -0
  68. data/lib/ddtrace/profiling/pprof/stack_sample.rb +90 -0
  69. data/lib/ddtrace/profiling/pprof/string_table.rb +10 -0
  70. data/lib/ddtrace/profiling/pprof/template.rb +114 -0
  71. data/lib/ddtrace/profiling/preload.rb +3 -0
  72. data/lib/ddtrace/profiling/profiler.rb +28 -0
  73. data/lib/ddtrace/profiling/recorder.rb +87 -0
  74. data/lib/ddtrace/profiling/scheduler.rb +84 -0
  75. data/lib/ddtrace/profiling/tasks/setup.rb +77 -0
  76. data/lib/ddtrace/profiling/transport/client.rb +12 -0
  77. data/lib/ddtrace/profiling/transport/http.rb +122 -0
  78. data/lib/ddtrace/profiling/transport/http/api.rb +43 -0
  79. data/lib/ddtrace/profiling/transport/http/api/endpoint.rb +90 -0
  80. data/lib/ddtrace/profiling/transport/http/api/instance.rb +36 -0
  81. data/lib/ddtrace/profiling/transport/http/api/spec.rb +40 -0
  82. data/lib/ddtrace/profiling/transport/http/builder.rb +28 -0
  83. data/lib/ddtrace/profiling/transport/http/client.rb +33 -0
  84. data/lib/ddtrace/profiling/transport/http/response.rb +21 -0
  85. data/lib/ddtrace/profiling/transport/io.rb +30 -0
  86. data/lib/ddtrace/profiling/transport/io/client.rb +27 -0
  87. data/lib/ddtrace/profiling/transport/io/response.rb +16 -0
  88. data/lib/ddtrace/profiling/transport/parcel.rb +17 -0
  89. data/lib/ddtrace/profiling/transport/request.rb +15 -0
  90. data/lib/ddtrace/profiling/transport/response.rb +8 -0
  91. data/lib/ddtrace/runtime/container.rb +11 -3
  92. data/lib/ddtrace/sampling/rule_sampler.rb +3 -9
  93. data/lib/ddtrace/tasks/exec.rb +48 -0
  94. data/lib/ddtrace/tasks/help.rb +14 -0
  95. data/lib/ddtrace/tracer.rb +21 -0
  96. data/lib/ddtrace/transport/io/client.rb +15 -8
  97. data/lib/ddtrace/transport/parcel.rb +4 -0
  98. data/lib/ddtrace/version.rb +3 -1
  99. data/lib/ddtrace/workers/runtime_metrics.rb +14 -1
  100. metadata +70 -9
@@ -0,0 +1,30 @@
1
+ module Datadog
2
+ module Contrib
3
+ module ActiveRecord
4
+ module Configuration
5
+ # The `makara` gem has the concept of **role**, which can be
6
+ # inferred from the configuration `name`, in the form of:
7
+ # `master/0`, `replica/0`, `replica/1`, etc.
8
+ # The first part of this string is the database role.
9
+ #
10
+ # This allows the matching of a connection based on its role,
11
+ # instead of connection-specific information.
12
+ module MakaraResolver
13
+ def normalize_for_config(active_record_config)
14
+ hash = super
15
+ hash[:makara_role] = active_record_config[:makara_role]
16
+ hash
17
+ end
18
+
19
+ def normalize_for_resolve(active_record_config)
20
+ hash = super
21
+
22
+ hash[:makara_role] = active_record_config[:name].split('/')[0].to_s if active_record_config[:name].is_a?(String)
23
+
24
+ hash
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,5 +1,6 @@
1
1
  require 'ddtrace/contrib/configuration/resolver'
2
2
  require 'ddtrace/vendor/active_record/connection_specification'
3
+ require_relative 'makara_resolver'
3
4
 
4
5
  module Datadog
5
6
  module Contrib
@@ -28,6 +29,8 @@ module Datadog
28
29
  # When more than one configuration could be matched, the last one to match is selected,
29
30
  # based on addition order (`#add`).
30
31
  class Resolver < Contrib::Configuration::Resolver
32
+ prepend MakaraResolver
33
+
31
34
  def initialize(active_record_configuration = nil)
32
35
  super()
33
36
 
@@ -50,7 +53,7 @@ module Datadog
50
53
  def resolve(db_config)
51
54
  active_record_config = resolve_connection_key(db_config).symbolize_keys
52
55
 
53
- hash = normalize(active_record_config)
56
+ hash = normalize_for_resolve(active_record_config)
54
57
 
55
58
  # Hashes in Ruby maintain insertion order
56
59
  _, config = @configurations.reverse_each.find do |matcher, _|
@@ -73,7 +76,7 @@ module Datadog
73
76
 
74
77
  def parse_matcher(matcher)
75
78
  resolved_pattern = resolve_connection_key(matcher).symbolize_keys
76
- normalized = normalize(resolved_pattern)
79
+ normalized = normalize_for_config(resolved_pattern)
77
80
 
78
81
  # Remove empty fields to allow for partial matching
79
82
  normalized.reject! { |_, v| v.nil? }
@@ -112,7 +115,7 @@ module Datadog
112
115
 
113
116
  # Extract only fields we'd like to match
114
117
  # from the ActiveRecord configuration.
115
- def normalize(active_record_config)
118
+ def normalize_for_config(active_record_config)
116
119
  {
117
120
  adapter: active_record_config[:adapter],
118
121
  host: active_record_config[:host],
@@ -121,6 +124,9 @@ module Datadog
121
124
  username: active_record_config[:username]
122
125
  }
123
126
  end
127
+
128
+ # Both resolvers perform the same operations for this implementation, but can be specialized
129
+ alias_method :normalize_for_resolve, :normalize_for_config
124
130
  end
125
131
  end
126
132
  end
@@ -23,7 +23,23 @@ module Datadog
23
23
  end
24
24
 
25
25
  option :service_name, default: Ext::SERVICE_NAME
26
- option :workers, default: []
26
+
27
+ # A list Ruby worker classes to be instrumented.
28
+ # The value of `nil` has special semantics: it instruments all workers dynamically.
29
+ #
30
+ # TODO: 1.0: Automatic patching should be the default behavior.
31
+ # We should not provide this option in the future,
32
+ # as our integrations should always instrument all possible scenarios when feasible.
33
+ option :workers, default: nil do |o|
34
+ o.on_set do |value|
35
+ unless value.nil?
36
+ Datadog.logger.warn(
37
+ "DEPRECATED: Resque integration now instruments all workers. \n" \
38
+ 'The `workers:` option is unnecessary and will be removed in the future.'
39
+ )
40
+ end
41
+ end
42
+ end
27
43
  option :error_handler, default: Datadog::Tracer::DEFAULT_ON_ERROR
28
44
  end
29
45
  end
@@ -17,11 +17,11 @@ module Datadog
17
17
 
18
18
  def patch
19
19
  require_relative 'resque_job'
20
- get_option(:workers).each { |worker| worker.extend(ResqueJob) }
21
- end
22
20
 
23
- def get_option(option)
24
- Datadog.configuration[:resque].get_option(option)
21
+ ::Resque::Job.send(:prepend, Resque::Job)
22
+
23
+ workers = Datadog.configuration[:resque][:workers] || []
24
+ workers.each { |worker| worker.extend(ResqueJob) }
25
25
  end
26
26
  end
27
27
  end
@@ -7,9 +7,30 @@ require 'resque'
7
7
  module Datadog
8
8
  module Contrib
9
9
  module Resque
10
+ # Automatically configures jobs with {ResqueJob} plugin.
11
+ module Job
12
+ def perform
13
+ if Datadog.configuration[:resque][:workers].nil?
14
+ job = payload_class
15
+ job.extend(Datadog::Contrib::Resque::ResqueJob) unless job.is_a? Datadog::Contrib::Resque::ResqueJob
16
+ end
17
+ ensure
18
+ super
19
+ end
20
+ end
21
+
10
22
  # Uses Resque job hooks to create traces
11
23
  module ResqueJob
12
- def around_perform(*args)
24
+ # `around_perform` hooks are executed in alphabetical order.
25
+ # we use the lowest printable character that allows for an inline
26
+ # method definition ('0'), alongside our naming prefix for identification.
27
+ #
28
+ # We could, in theory, use any character (e.g "\x00"), but this will lead
29
+ # to unreadable stack traces that contain this method call.
30
+ #
31
+ # We could also just use `around_perform` but this might override the user's
32
+ # own method.
33
+ def around_perform0_ddtrace(*args)
13
34
  return yield unless datadog_configuration && tracer
14
35
 
15
36
  tracer.trace(Ext::SPAN_JOB, span_options) do |span|
@@ -23,6 +23,7 @@ module Datadog
23
23
 
24
24
  option :service_name, default: Ext::SERVICE_NAME
25
25
  option :error_handler, default: Datadog::Tracer::DEFAULT_ON_ERROR
26
+ option :tag_body, default: false
26
27
  end
27
28
  end
28
29
  end
@@ -12,8 +12,12 @@ module Datadog
12
12
  end
13
13
 
14
14
  def call(worker_instance, queue, sqs_msg, body)
15
- @tracer.trace(Ext::SPAN_JOB, service: @shoryuken_service, span_type: Datadog::Ext::AppTypes::WORKER,
16
- on_error: @error_handler) do |span|
15
+ @tracer.trace(
16
+ Ext::SPAN_JOB,
17
+ service: @shoryuken_service,
18
+ span_type: Datadog::Ext::AppTypes::WORKER,
19
+ on_error: @error_handler
20
+ ) do |span|
17
21
  # Set analytics sample rate
18
22
  if Contrib::Analytics.enabled?(configuration[:analytics_enabled])
19
23
  Contrib::Analytics.set_sample_rate(span, configuration[:analytics_sample_rate])
@@ -26,7 +30,7 @@ module Datadog
26
30
  span.set_tag(Ext::TAG_JOB_ID, sqs_msg.message_id)
27
31
  span.set_tag(Ext::TAG_JOB_QUEUE, queue)
28
32
  span.set_tag(Ext::TAG_JOB_ATTRIBUTES, sqs_msg.attributes) if sqs_msg.respond_to?(:attributes)
29
- span.set_tag(Ext::TAG_JOB_BODY, body)
33
+ span.set_tag(Ext::TAG_JOB_BODY, body) if configuration[:tag_body]
30
34
 
31
35
  yield
32
36
  end
@@ -12,7 +12,7 @@ module Datadog
12
12
  # Outputs environment information to {Datadog.logger}.
13
13
  # Executes only for the lifetime of the program.
14
14
  def log!(transport_responses)
15
- return if @executed || !log?
15
+ return if (defined?(@executed) && @executed) || !log?
16
16
 
17
17
  @executed = true
18
18
 
data/lib/ddtrace/error.rb CHANGED
@@ -16,6 +16,8 @@ module Datadog
16
16
 
17
17
  def initialize(type = nil, message = nil, backtrace = nil)
18
18
  backtrace = Array(backtrace).join("\n")
19
+
20
+ # DEV: We should measure if `Utils.utf8_encode` is still needed in practice.
19
21
  @type = Utils.utf8_encode(type)
20
22
  @message = Utils.utf8_encode(message)
21
23
  @backtrace = Utils.utf8_encode(backtrace)
@@ -0,0 +1,52 @@
1
+ module Datadog
2
+ module Ext
3
+ module Profiling
4
+ ENV_ENABLED = 'DD_PROFILING_ENABLED'.freeze
5
+ ENV_UPLOAD_TIMEOUT = 'DD_PROFILING_UPLOAD_TIMEOUT'.freeze
6
+ ENV_MAX_FRAMES = 'DD_PROFILING_MAX_FRAMES'.freeze
7
+
8
+ module Pprof
9
+ LABEL_KEY_SPAN_ID = 'span id'.freeze
10
+ LABEL_KEY_THREAD_ID = 'thread id'.freeze
11
+ LABEL_KEY_TRACE_ID = 'trace id'.freeze
12
+ SAMPLE_VALUE_NO_VALUE = 0
13
+ VALUE_TYPE_CPU = 'cpu-time'.freeze
14
+ VALUE_TYPE_WALL = 'wall-time'.freeze
15
+ VALUE_UNIT_NANOSECONDS = 'nanoseconds'.freeze
16
+ end
17
+
18
+ module Transport
19
+ module HTTP
20
+ URI_TEMPLATE_DD_API = 'https://intake.profile.%s/'.freeze
21
+
22
+ FORM_FIELD_DATA = 'data[0]'.freeze
23
+ FORM_FIELD_FORMAT = 'format'.freeze
24
+ FORM_FIELD_FORMAT_PPROF = 'pprof'.freeze
25
+ FORM_FIELD_RECORDING_END = 'recording-end'.freeze
26
+ FORM_FIELD_RECORDING_START = 'recording-start'.freeze
27
+ FORM_FIELD_RUNTIME = 'runtime'.freeze
28
+ FORM_FIELD_RUNTIME_ID = 'runtime-id'.freeze
29
+ FORM_FIELD_TAG_ENV = 'env'.freeze
30
+ FORM_FIELD_TAG_HOST = 'host'.freeze
31
+ FORM_FIELD_TAG_LANGUAGE = 'language'.freeze
32
+ FORM_FIELD_TAG_PROFILER_VERSION = 'profiler_version'.freeze
33
+ FORM_FIELD_TAG_RUNTIME = 'runtime'.freeze
34
+ FORM_FIELD_TAG_RUNTIME_ENGINE = 'runtime_engine'.freeze
35
+ FORM_FIELD_TAG_RUNTIME_ID = 'runtime-id'.freeze
36
+ FORM_FIELD_TAG_RUNTIME_PLATFORM = 'runtime_platform'.freeze
37
+ FORM_FIELD_TAG_RUNTIME_VERSION = 'runtime_version'.freeze
38
+ FORM_FIELD_TAG_SERVICE = 'service'.freeze
39
+ FORM_FIELD_TAG_VERSION = 'version'.freeze
40
+ FORM_FIELD_TAGS = 'tags'.freeze
41
+ FORM_FIELD_TYPES = 'types[0]'.freeze
42
+ FORM_FIELD_TYPES_AUTO = 'auto'.freeze
43
+
44
+ HEADER_CONTENT_TYPE = 'Content-Type'.freeze
45
+ HEADER_CONTENT_TYPE_OCTET_STREAM = 'application/octet-stream'.freeze
46
+
47
+ PPROF_DEFAULT_FILENAME = 'profile.pb.gz'.freeze
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -8,6 +8,7 @@ module Datadog
8
8
  ENV_DEFAULT_PORT = 'DD_TRACE_AGENT_PORT'.freeze
9
9
  ENV_DEFAULT_URL = 'DD_TRACE_AGENT_URL'.freeze
10
10
  HEADER_CONTAINER_ID = 'Datadog-Container-ID'.freeze
11
+ HEADER_DD_API_KEY = 'DD-API-KEY'.freeze
11
12
  HEADER_META_LANG = 'Datadog-Meta-Lang'.freeze
12
13
  HEADER_META_LANG_VERSION = 'Datadog-Meta-Lang-Version'.freeze
13
14
  HEADER_META_LANG_INTERPRETER = 'Datadog-Meta-Lang-Interpreter'.freeze
@@ -120,6 +120,10 @@ module Datadog
120
120
  metrics.each { |m| send(m.type, *[m.name, m.value, m.options].compact) }
121
121
  end
122
122
 
123
+ def close
124
+ @statsd.close if @statsd && @statsd.respond_to?(:close)
125
+ end
126
+
123
127
  Metric = Struct.new(:type, :name, :value, :options) do
124
128
  def initialize(*args)
125
129
  super
@@ -0,0 +1,54 @@
1
+ module Datadog
2
+ # Contains profiler for generating stack profiles, etc.
3
+ module Profiling
4
+ module_function
5
+
6
+ GOOGLE_PROTOBUF_MINIMUM_VERSION = Gem::Version.new('3.0')
7
+
8
+ def supported?
9
+ google_protobuf_supported?
10
+ end
11
+
12
+ def native_cpu_time_supported?
13
+ require 'ddtrace/profiling/ext/cpu'
14
+ Ext::CPU.supported?
15
+ end
16
+
17
+ def google_protobuf_supported?
18
+ RUBY_PLATFORM != 'java' \
19
+ && !Gem.loaded_specs['google-protobuf'].nil? \
20
+ && Gem.loaded_specs['google-protobuf'].version >= GOOGLE_PROTOBUF_MINIMUM_VERSION \
21
+ && !defined?(@failed_to_load_protobuf)
22
+ end
23
+
24
+ def load_profiling
25
+ require 'ddtrace/profiling/ext/cpu'
26
+ require 'ddtrace/profiling/ext/forking'
27
+
28
+ require 'ddtrace/profiling/collectors/stack'
29
+ require 'ddtrace/profiling/exporter'
30
+ require 'ddtrace/profiling/recorder'
31
+ require 'ddtrace/profiling/scheduler'
32
+ require 'ddtrace/profiling/tasks/setup'
33
+ require 'ddtrace/profiling/transport/io'
34
+ require 'ddtrace/profiling/transport/http'
35
+ require 'ddtrace/profiling/profiler'
36
+
37
+ begin
38
+ require 'ddtrace/profiling/pprof/pprof_pb' if google_protobuf_supported?
39
+ rescue LoadError => e
40
+ @failed_to_load_protobuf = true
41
+ Kernel.warn(
42
+ "[DDTRACE] Error while loading google-protobuf gem. Cause: '#{e.message}' Location: '#{e.backtrace.first}'. " \
43
+ 'This can happen when google-protobuf is missing its native components. ' \
44
+ 'To fix this, try removing and reinstalling the gem, forcing it to recompile the components: ' \
45
+ '`gem uninstall google-protobuf -a; BUNDLE_FORCE_RUBY_PLATFORM=true bundle install`. ' \
46
+ 'If the error persists, please contact support via <https://docs.datadoghq.com/help/> or ' \
47
+ 'file a bug at <https://github.com/DataDog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug>.'
48
+ )
49
+ end
50
+ end
51
+
52
+ load_profiling if supported?
53
+ end
54
+ end
@@ -0,0 +1,32 @@
1
+ module Datadog
2
+ module Profiling
3
+ # Entity class used to represent an entry in a stack trace.
4
+ # Its fields are a simplified struct version of `Thread::Backtrace::Location`.
5
+ class BacktraceLocation
6
+ attr_reader \
7
+ :base_label,
8
+ :lineno,
9
+ :path,
10
+ :hash
11
+
12
+ def initialize(
13
+ base_label,
14
+ lineno,
15
+ path
16
+ )
17
+ @base_label = base_label
18
+ @lineno = lineno
19
+ @path = path
20
+ @hash = [base_label, lineno, path].hash
21
+ end
22
+
23
+ def ==(other)
24
+ hash == other.hash
25
+ end
26
+
27
+ def eql?(other)
28
+ hash == other.hash
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ require 'ddtrace/buffer'
2
+ require 'ddtrace/utils/string_table'
3
+ require 'ddtrace/utils/object_set'
4
+
5
+ module Datadog
6
+ module Profiling
7
+ # Profiling buffer that stores profiling events. The buffer has a maximum size and when
8
+ # the buffer is full, a random event is discarded. This class is thread-safe.
9
+ class Buffer < Datadog::ThreadSafeBuffer
10
+ def initialize(*args)
11
+ super
12
+ @caches = {}
13
+ @string_table = Utils::StringTable.new
14
+ end
15
+
16
+ def cache(cache_name)
17
+ synchronize do
18
+ @caches[cache_name] ||= Utils::ObjectSet.new
19
+ end
20
+ end
21
+
22
+ def string_table
23
+ synchronize do
24
+ @string_table
25
+ end
26
+ end
27
+
28
+ protected
29
+
30
+ def drain!
31
+ items = super
32
+
33
+ # Clear caches
34
+ @caches = {}
35
+ @string_table = Utils::StringTable.new
36
+
37
+ items
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,253 @@
1
+ require 'ddtrace/profiling/backtrace_location'
2
+ require 'ddtrace/profiling/events/stack'
3
+ require 'ddtrace/utils/only_once'
4
+ require 'ddtrace/utils/time'
5
+ require 'ddtrace/worker'
6
+ require 'ddtrace/workers/polling'
7
+
8
+ module Datadog
9
+ module Profiling
10
+ module Collectors
11
+ # Collects stack trace samples from Ruby threads for both CPU-time (if available) and wall-clock.
12
+ # Runs on its own background thread.
13
+ #
14
+ class Stack < Worker
15
+ include Workers::Polling
16
+
17
+ DEFAULT_MAX_TIME_USAGE_PCT = 2.0
18
+ MIN_INTERVAL = 0.01
19
+ THREAD_LAST_CPU_TIME_KEY = :datadog_profiler_last_cpu_time
20
+
21
+ attr_reader \
22
+ :recorder,
23
+ :max_frames,
24
+ :ignore_thread,
25
+ :max_time_usage_pct,
26
+ :thread_api
27
+
28
+ def initialize(
29
+ recorder,
30
+ max_frames: nil,
31
+ ignore_thread: nil,
32
+ max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
33
+ thread_api: Thread,
34
+ fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
35
+ interval: MIN_INTERVAL,
36
+ enabled: true
37
+ )
38
+ @recorder = recorder
39
+ # TODO: Make this a required named argument after we drop support for Ruby 2.0
40
+ @max_frames = max_frames || raise(ArgumentError, 'missing keyword :max_frames')
41
+ @ignore_thread = ignore_thread
42
+ @max_time_usage_pct = max_time_usage_pct
43
+ @thread_api = thread_api
44
+
45
+ # Workers::Async::Thread settings
46
+ self.fork_policy = fork_policy
47
+
48
+ # Workers::IntervalLoop settings
49
+ self.loop_base_interval = interval
50
+
51
+ # Workers::Polling settings
52
+ self.enabled = enabled
53
+
54
+ @warn_about_missing_cpu_time_instrumentation_only_once = Datadog::Utils::OnlyOnce.new
55
+ end
56
+
57
+ def start
58
+ @last_wall_time = Datadog::Utils::Time.get_time
59
+ reset_cpu_time_tracking
60
+ perform
61
+ end
62
+
63
+ def perform
64
+ collect_and_wait
65
+ end
66
+
67
+ def loop_back_off?
68
+ false
69
+ end
70
+
71
+ def collect_and_wait
72
+ run_time = Datadog::Utils::Time.measure do
73
+ collect_events
74
+ end
75
+
76
+ # Update wait time to throttle profiling
77
+ self.loop_wait_time = compute_wait_time(run_time)
78
+ end
79
+
80
+ def collect_events
81
+ events = []
82
+
83
+ # Compute wall time interval
84
+ current_wall_time = Datadog::Utils::Time.get_time
85
+ last_wall_time = if instance_variable_defined?(:@last_wall_time)
86
+ @last_wall_time
87
+ else
88
+ current_wall_time
89
+ end
90
+
91
+ wall_time_interval_ns = ((current_wall_time - last_wall_time).round(9) * 1e9).to_i
92
+ @last_wall_time = current_wall_time
93
+
94
+ # Collect backtraces from each thread
95
+ thread_api.list.each do |thread|
96
+ next unless thread.alive?
97
+ next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)
98
+
99
+ event = collect_thread_event(thread, wall_time_interval_ns)
100
+ events << event unless event.nil?
101
+ end
102
+
103
+ # Send events to recorder
104
+ recorder.push(events) unless events.empty?
105
+
106
+ events
107
+ end
108
+
109
+ def collect_thread_event(thread, wall_time_interval_ns)
110
+ locations = thread.backtrace_locations
111
+ return if locations.nil?
112
+
113
+ # Get actual stack size then trim the stack
114
+ stack_size = locations.length
115
+ locations = locations[0..(max_frames - 1)]
116
+
117
+ # Convert backtrace locations into structs
118
+ locations = convert_backtrace_locations(locations)
119
+
120
+ thread_id = thread.respond_to?(:native_thread_id) ? thread.native_thread_id : thread.object_id
121
+ trace_id, span_id = get_trace_identifiers(thread)
122
+ cpu_time = get_cpu_time_interval!(thread)
123
+
124
+ Events::StackSample.new(
125
+ nil,
126
+ locations,
127
+ stack_size,
128
+ thread_id,
129
+ trace_id,
130
+ span_id,
131
+ cpu_time,
132
+ wall_time_interval_ns
133
+ )
134
+ end
135
+
136
+ def get_cpu_time_interval!(thread)
137
+ # Return if we can't get the current CPU time
138
+ unless thread.respond_to?(:cpu_time_instrumentation_installed?) && thread.cpu_time_instrumentation_installed?
139
+ warn_about_missing_cpu_time_instrumentation(thread)
140
+ return
141
+ end
142
+
143
+ current_cpu_time_ns = thread.cpu_time(:nanosecond)
144
+
145
+ # NOTE: This can still be nil even when all of the checks above passed because of a race: there's a bit of
146
+ # initialization that needs to be done by the thread itself, and it's possible for us to try to sample
147
+ # *before* the thread had time to finish the initialization
148
+ return unless current_cpu_time_ns
149
+
150
+ last_cpu_time_ns = (thread[THREAD_LAST_CPU_TIME_KEY] || current_cpu_time_ns)
151
+ interval = current_cpu_time_ns - last_cpu_time_ns
152
+
153
+ # Update CPU time for thread
154
+ thread[THREAD_LAST_CPU_TIME_KEY] = current_cpu_time_ns
155
+
156
+ # Return interval
157
+ interval
158
+ end
159
+
160
+ def get_trace_identifiers(thread)
161
+ return unless thread.is_a?(::Thread)
162
+ return unless Datadog.respond_to?(:tracer) && Datadog.tracer.respond_to?(:active_correlation)
163
+
164
+ identifier = Datadog.tracer.active_correlation(thread)
165
+ [identifier.trace_id, identifier.span_id]
166
+ end
167
+
168
+ def compute_wait_time(used_time)
169
+ # We took used_time to get the last sample.
170
+ #
171
+ # What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
172
+ # spend working, how much is (100% - max_time_usage_pct) of the time?
173
+ #
174
+ # For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
175
+ # means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
176
+ used_time_ns = used_time * 1e9
177
+ interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
178
+ [interval / 1e9, MIN_INTERVAL].max
179
+ end
180
+
181
+ # Convert backtrace locations into structs
182
+ # Re-use old backtrace location objects if they already exist in the buffer
183
+ def convert_backtrace_locations(locations)
184
+ locations.collect do |location|
185
+ # Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
186
+ recorder[Events::StackSample].cache(:backtrace_locations).fetch(
187
+ # Function name
188
+ location.base_label,
189
+ # Line number
190
+ location.lineno,
191
+ # Filename
192
+ location.path,
193
+ # Build function
194
+ &method(:build_backtrace_location)
195
+ )
196
+ end
197
+ end
198
+
199
+ def build_backtrace_location(_id, base_label, lineno, path)
200
+ string_table = recorder[Events::StackSample].string_table
201
+
202
+ Profiling::BacktraceLocation.new(
203
+ string_table.fetch_string(base_label),
204
+ lineno,
205
+ string_table.fetch_string(path)
206
+ )
207
+ end
208
+
209
+ private
210
+
211
+ def warn_about_missing_cpu_time_instrumentation(thread)
212
+ @warn_about_missing_cpu_time_instrumentation_only_once.run do
213
+ # Is the profiler thread instrumented? If it is, then we know instrumentation is available, but seems to be
214
+ # missing on this thread we just found.
215
+ #
216
+ # As far as we know, it can be missing due to one the following:
217
+ #
218
+ # a) The thread was started before we installed our instrumentation.
219
+ # In this case, the fix is to make sure ddtrace gets loaded before any other parts of the application.
220
+ #
221
+ # b) The thread was started using the Ruby native APIs (e.g. from a C extension such as ffi).
222
+ # We currently have no solution for this case; these threads will always be missing our CPU instrumentation.
223
+ #
224
+ # c) The thread was started with `Thread.start`/`Thread.fork` and hasn't yet enabled the instrumentation.
225
+ # When threads are started using these APIs, there's a small time window during which the thread has started
226
+ # but our code to apply the instrumentation hasn't run yet; in these cases it's just a matter of allowing
227
+ # it to run and our instrumentation to be applied.
228
+ #
229
+ if thread_api.current.respond_to?(:cpu_time) && thread_api.current.cpu_time
230
+ Datadog.logger.debug("Detected thread ('#{thread}') with missing CPU profiling instrumentation.")
231
+ end
232
+ end
233
+ end
234
+
235
+ # If the profiler is started for a while, stopped and then restarted OR whenever the process forks, we need to
236
+ # clean up the per-thread cpu time counters we keep, so that the first sample after starting doesn't end up with:
237
+ #
238
+ # a) negative time: At least on my test docker container, and on the reliability environment, after the process
239
+ # forks, the clock reference changes and (old cpu time - new cpu time) can be < 0
240
+ #
241
+ # b) large amount of time: if the profiler was started, then stopped for some amount of time, and then
242
+ # restarted, we don't want the first sample to be "blamed" for multiple minutes of CPU time
243
+ #
244
+ # By resetting the last cpu time seen, we start with a clean slate every time we start the stack collector.
245
+ def reset_cpu_time_tracking
246
+ thread_api.list.each do |thread|
247
+ thread[THREAD_LAST_CPU_TIME_KEY] = nil if thread[THREAD_LAST_CPU_TIME_KEY]
248
+ end
249
+ end
250
+ end
251
+ end
252
+ end
253
+ end