datadog 2.0.0.beta1 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +181 -1
  3. data/ext/datadog_profiling_native_extension/NativeExtensionDesign.md +1 -1
  4. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +40 -32
  5. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +23 -12
  6. data/ext/datadog_profiling_native_extension/crashtracker.c +108 -0
  7. data/ext/datadog_profiling_native_extension/extconf.rb +9 -23
  8. data/ext/datadog_profiling_native_extension/heap_recorder.c +81 -4
  9. data/ext/datadog_profiling_native_extension/heap_recorder.h +12 -1
  10. data/ext/datadog_profiling_native_extension/http_transport.c +1 -94
  11. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +86 -0
  12. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +4 -0
  13. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +2 -12
  14. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +25 -86
  15. data/ext/datadog_profiling_native_extension/profiling.c +2 -0
  16. data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -5
  17. data/ext/datadog_profiling_native_extension/stack_recorder.c +161 -62
  18. data/lib/datadog/appsec/contrib/devise/tracking.rb +8 -0
  19. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +43 -13
  20. data/lib/datadog/appsec/event.rb +2 -2
  21. data/lib/datadog/core/configuration/components.rb +2 -1
  22. data/lib/datadog/core/configuration/option.rb +7 -5
  23. data/lib/datadog/core/configuration/settings.rb +34 -79
  24. data/lib/datadog/core/configuration.rb +20 -4
  25. data/lib/datadog/core/environment/platform.rb +7 -1
  26. data/lib/datadog/core/remote/client/capabilities.rb +2 -1
  27. data/lib/datadog/core/remote/client.rb +1 -5
  28. data/lib/datadog/core/remote/configuration/repository.rb +1 -1
  29. data/lib/datadog/core/remote/dispatcher.rb +3 -3
  30. data/lib/datadog/core/remote/transport/http/config.rb +5 -5
  31. data/lib/datadog/core/telemetry/client.rb +18 -10
  32. data/lib/datadog/core/telemetry/emitter.rb +9 -13
  33. data/lib/datadog/core/telemetry/event.rb +247 -57
  34. data/lib/datadog/core/telemetry/ext.rb +1 -0
  35. data/lib/datadog/core/telemetry/heartbeat.rb +1 -3
  36. data/lib/datadog/core/telemetry/http/ext.rb +4 -1
  37. data/lib/datadog/core/telemetry/http/response.rb +4 -0
  38. data/lib/datadog/core/telemetry/http/transport.rb +9 -4
  39. data/lib/datadog/core/telemetry/request.rb +59 -0
  40. data/lib/datadog/core/utils/base64.rb +22 -0
  41. data/lib/datadog/opentelemetry/sdk/span_processor.rb +19 -2
  42. data/lib/datadog/opentelemetry/sdk/trace/span.rb +3 -17
  43. data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
  44. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +25 -0
  45. data/lib/datadog/profiling/component.rb +49 -17
  46. data/lib/datadog/profiling/crashtracker.rb +91 -0
  47. data/lib/datadog/profiling/exporter.rb +6 -3
  48. data/lib/datadog/profiling/http_transport.rb +7 -11
  49. data/lib/datadog/profiling/load_native_extension.rb +14 -1
  50. data/lib/datadog/profiling/profiler.rb +9 -2
  51. data/lib/datadog/profiling/stack_recorder.rb +6 -2
  52. data/lib/datadog/profiling.rb +12 -0
  53. data/lib/datadog/tracing/component.rb +5 -1
  54. data/lib/datadog/tracing/configuration/dynamic.rb +39 -1
  55. data/lib/datadog/tracing/configuration/settings.rb +1 -0
  56. data/lib/datadog/tracing/contrib/action_pack/integration.rb +1 -1
  57. data/lib/datadog/tracing/contrib/action_view/integration.rb +1 -1
  58. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +1 -0
  59. data/lib/datadog/tracing/contrib/active_record/integration.rb +11 -1
  60. data/lib/datadog/tracing/contrib/active_support/integration.rb +1 -1
  61. data/lib/datadog/tracing/contrib/configuration/resolver.rb +43 -0
  62. data/lib/datadog/tracing/contrib/grape/endpoint.rb +43 -5
  63. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +1 -1
  64. data/lib/datadog/tracing/correlation.rb +3 -4
  65. data/lib/datadog/tracing/remote.rb +5 -1
  66. data/lib/datadog/tracing/sampling/ext.rb +5 -1
  67. data/lib/datadog/tracing/sampling/matcher.rb +75 -26
  68. data/lib/datadog/tracing/sampling/rule.rb +27 -4
  69. data/lib/datadog/tracing/sampling/rule_sampler.rb +19 -1
  70. data/lib/datadog/tracing/sampling/span/matcher.rb +13 -41
  71. data/lib/datadog/tracing/span.rb +7 -2
  72. data/lib/datadog/tracing/span_link.rb +92 -0
  73. data/lib/datadog/tracing/span_operation.rb +6 -4
  74. data/lib/datadog/tracing/trace_operation.rb +12 -0
  75. data/lib/datadog/tracing/tracer.rb +4 -3
  76. data/lib/datadog/tracing/transport/serializable_trace.rb +3 -1
  77. data/lib/datadog/tracing/utils.rb +16 -0
  78. data/lib/datadog/version.rb +1 -1
  79. metadata +10 -31
  80. data/lib/datadog/core/telemetry/collector.rb +0 -248
  81. data/lib/datadog/core/telemetry/v1/app_event.rb +0 -59
  82. data/lib/datadog/core/telemetry/v1/application.rb +0 -94
  83. data/lib/datadog/core/telemetry/v1/configuration.rb +0 -27
  84. data/lib/datadog/core/telemetry/v1/dependency.rb +0 -45
  85. data/lib/datadog/core/telemetry/v1/host.rb +0 -59
  86. data/lib/datadog/core/telemetry/v1/install_signature.rb +0 -38
  87. data/lib/datadog/core/telemetry/v1/integration.rb +0 -66
  88. data/lib/datadog/core/telemetry/v1/product.rb +0 -36
  89. data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -108
  90. data/lib/datadog/core/telemetry/v2/app_client_configuration_change.rb +0 -41
  91. data/lib/datadog/core/telemetry/v2/request.rb +0 -29
@@ -72,8 +72,10 @@ module Datadog
72
72
  exporter = build_profiler_exporter(settings, recorder, worker, internal_metadata: internal_metadata)
73
73
  transport = build_profiler_transport(settings, agent_settings)
74
74
  scheduler = Profiling::Scheduler.new(exporter: exporter, transport: transport, interval: upload_period_seconds)
75
+ crashtracker = build_crashtracker(settings, transport)
76
+ profiler = Profiling::Profiler.new(worker: worker, scheduler: scheduler, optional_crashtracker: crashtracker)
75
77
 
76
- [Profiling::Profiler.new(worker: worker, scheduler: scheduler), { profiling_enabled: true }]
78
+ [profiler, { profiling_enabled: true }]
77
79
  end
78
80
 
79
81
  private_class_method def self.build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled)
@@ -110,20 +112,53 @@ module Datadog
110
112
  )
111
113
  end
112
114
 
115
+ private_class_method def self.build_crashtracker(settings, transport)
116
+ return unless settings.profiling.advanced.experimental_crash_tracking_enabled
117
+
118
+ # By default, the transport is an instance of HttpTransport, which validates the configuration and makes
119
+ # it available for us to use here.
120
+ # But we support overriding the transport with a user-specific one, which may e.g. write stuff to a file,
121
+ # and thus can't really provide a valid configuration to talk to a Datadog agent. Thus, in this situation,
122
+ # we can't use the crashtracker, even if enabled.
123
+ unless transport.respond_to?(:exporter_configuration)
124
+ Datadog.logger.warn(
125
+ 'Cannot enable profiling crash tracking as a custom settings.profiling.exporter.transport is configured'
126
+ )
127
+ return
128
+ end
129
+
130
+ Datadog::Profiling::Crashtracker.new(
131
+ exporter_configuration: transport.exporter_configuration,
132
+ tags: Datadog::Profiling::TagBuilder.call(settings: settings),
133
+ upload_timeout_seconds: settings.profiling.upload.timeout_seconds,
134
+ )
135
+ end
136
+
113
137
  private_class_method def self.enable_gc_profiling?(settings)
114
- # See comments on the setting definition for more context on why it exists.
115
- if settings.profiling.advanced.force_enable_gc_profiling
116
- if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('3')
117
- Datadog.logger.debug(
118
- 'Profiling time/resources spent in Garbage Collection force enabled. Do not use Ractors in combination ' \
119
- 'with this option as profiles will be incomplete.'
120
- )
121
- end
138
+ return false unless settings.profiling.advanced.gc_enabled
122
139
 
123
- true
124
- else
125
- false
140
+ # SEVERE - Only with Ractors
141
+ # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 gc profiling can trigger a VM bug
142
+ # that causes a segmentation fault during garbage collection of Ractors
143
+ # (https://bugs.ruby-lang.org/issues/18464). We don't allow enabling gc profiling on such Rubies.
144
+ # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0.
145
+ if RUBY_VERSION.start_with?('3.0.') ||
146
+ (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION < '3.1.4') ||
147
+ (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION < '3.2.3')
148
+ Datadog.logger.warn(
149
+ "Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling GC profiling would cause "\
150
+ 'crashes (https://bugs.ruby-lang.org/issues/18464). GC profiling has been disabled.'
151
+ )
152
+ return false
153
+ elsif RUBY_VERSION.start_with?('3.')
154
+ Datadog.logger.debug(
155
+ 'In all known versions of Ruby 3.x, using Ractors may result in GC profiling unexpectedly ' \
156
+ 'stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your ' \
157
+ 'application stability or performance. This does not happen if Ractors are not used.'
158
+ )
126
159
  end
160
+
161
+ true
127
162
  end
128
163
 
129
164
  private_class_method def self.get_heap_sample_every(settings)
@@ -135,10 +170,7 @@ module Datadog
135
170
  end
136
171
 
137
172
  private_class_method def self.enable_allocation_profiling?(settings)
138
- unless settings.profiling.allocation_enabled
139
- # Allocation profiling disabled, short-circuit out
140
- return false
141
- end
173
+ return false unless settings.profiling.allocation_enabled
142
174
 
143
175
  # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x.
144
176
 
@@ -235,7 +267,7 @@ module Datadog
235
267
 
236
268
  private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength
237
269
  setting_value = settings.profiling.advanced.no_signals_workaround_enabled
238
- legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.3.', '2.4.', '2.5.')
270
+ legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.5.')
239
271
 
240
272
  unless [true, false, :auto].include?(setting_value)
241
273
  Datadog.logger.error(
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'libdatadog'
4
+
5
+ module Datadog
6
+ module Profiling
7
+ # Used to report Ruby VM crashes.
8
+ # The interesting bits are implemented as native code and using libdatadog.
9
+ #
10
+ # NOTE: The crashtracker native state is a singleton; so even if you create multiple instances of `Crashtracker`
11
+ # and start them, it only works as "last writer wins". Same for stop -- there's only one state, so calling stop
12
+ # on it will stop the crash tracker, regardless of which instance started it.
13
+ #
14
+ # Methods prefixed with _native_ are implemented in `crashtracker.c`
15
+ class Crashtracker
16
+ private
17
+
18
+ attr_reader \
19
+ :exporter_configuration,
20
+ :tags_as_array,
21
+ :path_to_crashtracking_receiver_binary,
22
+ :ld_library_path,
23
+ :upload_timeout_seconds
24
+
25
+ public
26
+
27
+ def initialize(
28
+ exporter_configuration:,
29
+ tags:,
30
+ upload_timeout_seconds:,
31
+ path_to_crashtracking_receiver_binary: Libdatadog.path_to_crashtracking_receiver_binary,
32
+ ld_library_path: Libdatadog.ld_library_path
33
+ )
34
+ @exporter_configuration = exporter_configuration
35
+ @tags_as_array = tags.to_a
36
+ @upload_timeout_seconds = upload_timeout_seconds
37
+ @path_to_crashtracking_receiver_binary = path_to_crashtracking_receiver_binary
38
+ @ld_library_path = ld_library_path
39
+ end
40
+
41
+ def start
42
+ start_or_update_on_fork(action: :start)
43
+ end
44
+
45
+ def reset_after_fork
46
+ start_or_update_on_fork(action: :update_on_fork)
47
+ end
48
+
49
+ def stop
50
+ begin
51
+ self.class._native_stop
52
+ Datadog.logger.debug('Crash tracking stopped successfully')
53
+ rescue => e
54
+ Datadog.logger.error("Failed to stop crash tracking: #{e.message}")
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def start_or_update_on_fork(action:)
61
+ unless path_to_crashtracking_receiver_binary
62
+ Datadog.logger.warn(
63
+ "Cannot #{action} profiling crash tracking as no path_to_crashtracking_receiver_binary was found"
64
+ )
65
+ return
66
+ end
67
+
68
+ unless ld_library_path
69
+ Datadog.logger.warn(
70
+ "Cannot #{action} profiling crash tracking as no ld_library_path was found"
71
+ )
72
+ return
73
+ end
74
+
75
+ begin
76
+ self.class._native_start_or_update_on_fork(
77
+ action: action,
78
+ exporter_configuration: exporter_configuration,
79
+ path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
80
+ ld_library_path: ld_library_path,
81
+ tags_as_array: tags_as_array,
82
+ upload_timeout_seconds: Integer(upload_timeout_seconds),
83
+ )
84
+ Datadog.logger.debug("Crash tracking #{action} successful")
85
+ rescue => e
86
+ Datadog.logger.error("Failed to #{action} crash tracking: #{e.message}")
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -54,10 +54,11 @@ module Datadog
54
54
 
55
55
  def flush
56
56
  worker_stats = @worker.stats_and_reset_not_thread_safe
57
- start, finish, compressed_pprof = pprof_recorder.serialize
58
- @last_flush_finish_at = finish
57
+ serialization_result = pprof_recorder.serialize
58
+ return if serialization_result.nil?
59
59
 
60
- return if compressed_pprof.nil? # We don't want to report empty profiles
60
+ start, finish, compressed_pprof, profile_stats = serialization_result
61
+ @last_flush_finish_at = finish
61
62
 
62
63
  if duration_below_threshold?(start, finish)
63
64
  Datadog.logger.debug('Skipped exporting profiling events as profile duration is below minimum')
@@ -77,6 +78,8 @@ module Datadog
77
78
  internal_metadata: internal_metadata.merge(
78
79
  {
79
80
  worker_stats: worker_stats,
81
+ profile_stats: profile_stats,
82
+ recorder_stats: pprof_recorder.stats,
80
83
  gc: GC.stat,
81
84
  }
82
85
  ),
@@ -7,6 +7,8 @@ module Datadog
7
7
  # Used to report profiling data to Datadog.
8
8
  # Methods prefixed with _native_ are implemented in `http_transport.c`
9
9
  class HttpTransport
10
+ attr_reader :exporter_configuration
11
+
10
12
  def initialize(agent_settings:, site:, api_key:, upload_timeout_seconds:)
11
13
  @upload_timeout_milliseconds = (upload_timeout_seconds * 1_000).to_i
12
14
 
@@ -14,19 +16,19 @@ module Datadog
14
16
 
15
17
  @exporter_configuration =
16
18
  if agentless?(site, api_key)
17
- [:agentless, site, api_key]
19
+ [:agentless, site, api_key].freeze
18
20
  else
19
- [:agent, base_url_from(agent_settings)]
21
+ [:agent, base_url_from(agent_settings)].freeze
20
22
  end
21
23
 
22
- status, result = validate_exporter(@exporter_configuration)
24
+ status, result = validate_exporter(exporter_configuration)
23
25
 
24
26
  raise(ArgumentError, "Failed to initialize transport: #{result}") if status == :error
25
27
  end
26
28
 
27
29
  def export(flush)
28
30
  status, result = do_export(
29
- exporter_configuration: @exporter_configuration,
31
+ exporter_configuration: exporter_configuration,
30
32
  upload_timeout_milliseconds: @upload_timeout_milliseconds,
31
33
 
32
34
  # why "timespec"?
@@ -66,12 +68,6 @@ module Datadog
66
68
  end
67
69
  end
68
70
 
69
- # Used to log soft failures in `ddog_Vec_tag_push` (e.g. we still report the profile in these cases)
70
- # Called from native code
71
- def self.log_failure_to_process_tag(failure_details)
72
- Datadog.logger.warn("Failed to add tag to profiling request: #{failure_details}")
73
- end
74
-
75
71
  private
76
72
 
77
73
  def base_url_from(agent_settings)
@@ -136,7 +132,7 @@ module Datadog
136
132
  end
137
133
 
138
134
  def config_without_api_key
139
- [@exporter_configuration[0..1]].to_h
135
+ [exporter_configuration[0..1]].to_h
140
136
  end
141
137
  end
142
138
  end
@@ -20,7 +20,20 @@ rescue LoadError => e
20
20
  end
21
21
 
22
22
  extension_name = "datadog_profiling_native_extension.#{RUBY_VERSION}_#{RUBY_PLATFORM}"
23
- full_file_path = "#{__dir__}/../../#{extension_name}.#{RbConfig::CONFIG['DLEXT']}"
23
+ file_name = "#{extension_name}.#{RbConfig::CONFIG['DLEXT']}"
24
+ full_file_path = "#{__dir__}/../../#{file_name}"
25
+
26
+ unless File.exist?(full_file_path)
27
+ extension_dir = Gem.loaded_specs['datadog'].extension_dir
28
+ candidate_path = "#{extension_dir}/#{file_name}"
29
+ if File.exist?(candidate_path)
30
+ full_file_path = candidate_path
31
+ else # rubocop:disable Style/EmptyElse
32
+ # We found none of the files. This is unexpected. Let's go ahead anyway, the error is going to be reported further
33
+ # down anyway.
34
+ end
35
+ end
36
+
24
37
  init_function_name = "Init_#{extension_name.split('.').first}"
25
38
 
26
39
  status, result = Datadog::Profiling::Loader._native_load(full_file_path, init_function_name)
@@ -8,21 +8,24 @@ module Datadog
8
8
 
9
9
  private
10
10
 
11
- attr_reader :worker, :scheduler
11
+ attr_reader :worker, :scheduler, :optional_crashtracker
12
12
 
13
13
  public
14
14
 
15
- def initialize(worker:, scheduler:)
15
+ def initialize(worker:, scheduler:, optional_crashtracker:)
16
16
  @worker = worker
17
17
  @scheduler = scheduler
18
+ @optional_crashtracker = optional_crashtracker
18
19
  end
19
20
 
20
21
  def start
21
22
  after_fork! do
23
+ optional_crashtracker.reset_after_fork if optional_crashtracker
22
24
  worker.reset_after_fork
23
25
  scheduler.reset_after_fork
24
26
  end
25
27
 
28
+ optional_crashtracker.start if optional_crashtracker
26
29
  worker.start(on_failure_proc: proc { component_failed(:worker) })
27
30
  scheduler.start(on_failure_proc: proc { component_failed(:scheduler) })
28
31
  end
@@ -32,6 +35,7 @@ module Datadog
32
35
 
33
36
  stop_worker
34
37
  stop_scheduler
38
+ optional_crashtracker.stop if optional_crashtracker
35
39
  end
36
40
 
37
41
  private
@@ -51,6 +55,9 @@ module Datadog
51
55
  'See previous log messages for details.'
52
56
  )
53
57
 
58
+ # We explicitly not stop the crash tracker in this situation, under the assumption that, if a component failed,
59
+ # we're operating in a degraded state and crash tracking may still be helpful.
60
+
54
61
  if failed_component == :worker
55
62
  stop_scheduler
56
63
  elsif failed_component == :scheduler
@@ -33,11 +33,11 @@ module Datadog
33
33
  status, result = @no_concurrent_synchronize_mutex.synchronize { self.class._native_serialize(self) }
34
34
 
35
35
  if status == :ok
36
- start, finish, encoded_pprof = result
36
+ start, finish, encoded_pprof, profile_stats = result
37
37
 
38
38
  Datadog.logger.debug { "Encoded profile covering #{start.iso8601} to #{finish.iso8601}" }
39
39
 
40
- [start, finish, encoded_pprof]
40
+ [start, finish, encoded_pprof, profile_stats]
41
41
  else
42
42
  error_message = result
43
43
 
@@ -64,6 +64,10 @@ module Datadog
64
64
  def reset_after_fork
65
65
  self.class._native_reset_after_fork(self)
66
66
  end
67
+
68
+ def stats
69
+ self.class._native_stats(self)
70
+ end
67
71
  end
68
72
  end
69
73
  end
@@ -65,6 +65,17 @@ module Datadog
65
65
  !!(profiler.send(:scheduler).running? if profiler)
66
66
  end
67
67
 
68
+ def self.wait_until_running(timeout_seconds: 5)
69
+ profiler = Datadog.send(:components).profiler
70
+ if profiler
71
+ # Use .send(...) to avoid exposing the attr_reader as an API to the outside
72
+ worker = profiler.send(:worker)
73
+ worker.wait_until_running(timeout_seconds: timeout_seconds)
74
+ else
75
+ raise 'Profiler not enabled or available'
76
+ end
77
+ end
78
+
68
79
  private_class_method def self.replace_noop_allocation_count
69
80
  def self.allocation_count # rubocop:disable Lint/NestedMethodDefinition (On purpose!)
70
81
  Datadog::Profiling::Collectors::CpuAndWallTimeWorker._native_allocation_count
@@ -132,6 +143,7 @@ module Datadog
132
143
  require_relative 'profiling/collectors/idle_sampling_helper'
133
144
  require_relative 'profiling/collectors/stack'
134
145
  require_relative 'profiling/collectors/thread_context'
146
+ require_relative 'profiling/crashtracker'
135
147
  require_relative 'profiling/stack_recorder'
136
148
  require_relative 'profiling/exporter'
137
149
  require_relative 'profiling/flush'
@@ -124,9 +124,13 @@ module Datadog
124
124
  end
125
125
 
126
126
  WRITER_RECORD_ENVIRONMENT_INFORMATION_CALLBACK = lambda do |_, responses|
127
- Tracing::Diagnostics::EnvironmentLogger.collect_and_log!(responses: responses)
127
+ WRITER_RECORD_ENVIRONMENT_INFORMATION_ONLY_ONCE.run do
128
+ Tracing::Diagnostics::EnvironmentLogger.collect_and_log!(responses: responses)
129
+ end
128
130
  end
129
131
 
132
+ WRITER_RECORD_ENVIRONMENT_INFORMATION_ONLY_ONCE = Core::Utils::OnlyOnce.new
133
+
130
134
  # Create new lambda for writer callback,
131
135
  # capture the current sampler in the callback closure.
132
136
  def writer_update_priority_sampler_rates_callback(sampler)
@@ -53,8 +53,46 @@ module Datadog
53
53
  end
54
54
  end
55
55
 
56
+ # Dynamic configuration for `DD_TRACE_SAMPLING_RULES`.
57
+ class TracingSamplingRules < SimpleOption
58
+ def initialize
59
+ super('tracing_sampling_rules', 'DD_TRACE_SAMPLING_RULES', :rules)
60
+ end
61
+
62
+ # Ensures sampler is rebuilt and new configuration is applied
63
+ def call(tracing_sampling_rules)
64
+ # Modify the remote configuration value that it matches the
65
+ # local environment variable it configures.
66
+ if tracing_sampling_rules
67
+ tracing_sampling_rules.each do |rule|
68
+ next unless (tags = rule['tags'])
69
+
70
+ # Tag maps come in as arrays of 'key' and `value_glob`.
71
+ # We need to convert them into a hash for local use.
72
+ tag_array = tags.map! do |tag|
73
+ [tag['key'], tag['value_glob']]
74
+ end
75
+
76
+ rule['tags'] = tag_array.to_h
77
+ end
78
+
79
+ # The configuration is stored as JSON, so we need to convert it back
80
+ tracing_sampling_rules = tracing_sampling_rules.to_json
81
+ end
82
+
83
+ super(tracing_sampling_rules)
84
+ Datadog.send(:components).reconfigure_live_sampler
85
+ end
86
+
87
+ protected
88
+
89
+ def configuration_object
90
+ Datadog.configuration.tracing.sampling
91
+ end
92
+ end
93
+
56
94
  # List of all tracing dynamic configuration options supported.
57
- OPTIONS = [LogInjectionEnabled, TracingHeaderTags, TracingSamplingRate].map do |option_class|
95
+ OPTIONS = [LogInjectionEnabled, TracingHeaderTags, TracingSamplingRate, TracingSamplingRules].map do |option_class|
58
96
  option = option_class.new
59
97
  [option.name, option.env_var, option]
60
98
  end
@@ -274,6 +274,7 @@ module Datadog
274
274
  # @return [String,nil]
275
275
  # @public_api
276
276
  option :rules do |o|
277
+ o.type :string, nilable: true
277
278
  o.default { ENV.fetch(Configuration::Ext::Sampling::ENV_RULES, nil) }
278
279
  end
279
280
 
@@ -3,7 +3,7 @@
3
3
  require_relative 'configuration/settings'
4
4
  require_relative 'patcher'
5
5
  require_relative '../integration'
6
- require_relative '../rails/integration'
6
+ require_relative '../rails/ext'
7
7
  require_relative '../rails/utils'
8
8
 
9
9
  module Datadog
@@ -3,7 +3,7 @@
3
3
  require_relative 'configuration/settings'
4
4
  require_relative 'patcher'
5
5
  require_relative '../integration'
6
- require_relative '../rails/integration'
6
+ require_relative '../rails/ext'
7
7
  require_relative '../rails/utils'
8
8
 
9
9
  module Datadog
@@ -32,6 +32,7 @@ module Datadog
32
32
  # based on addition order (`#add`).
33
33
  class Resolver < Contrib::Configuration::Resolver
34
34
  prepend MakaraResolver
35
+ prepend Contrib::Configuration::CachingResolver
35
36
 
36
37
  def initialize(active_record_configuration = nil)
37
38
  super()
@@ -4,8 +4,9 @@ require_relative 'configuration/resolver'
4
4
  require_relative 'configuration/settings'
5
5
  require_relative 'events'
6
6
  require_relative 'patcher'
7
+ require_relative '../component'
7
8
  require_relative '../integration'
8
- require_relative '../rails/integration'
9
+ require_relative '../rails/ext'
9
10
  require_relative '../rails/utils'
10
11
 
11
12
  module Datadog
@@ -50,6 +51,15 @@ module Datadog
50
51
  def resolver
51
52
  @resolver ||= Configuration::Resolver.new
52
53
  end
54
+
55
+ def reset_resolver_cache
56
+ @resolver&.reset_cache
57
+ end
58
+
59
+ Contrib::Component.register('activerecord') do |_config|
60
+ # Ensure resolver cache is reset on configuration change
61
+ Datadog.configuration.tracing.fetch_integration(:active_record).reset_resolver_cache
62
+ end
53
63
  end
54
64
  end
55
65
  end
@@ -4,7 +4,7 @@ require_relative '../integration'
4
4
  require_relative 'configuration/settings'
5
5
  require_relative 'patcher'
6
6
  require_relative 'cache/redis'
7
- require_relative '../rails/integration'
7
+ require_relative '../rails/ext'
8
8
  require_relative '../rails/utils'
9
9
 
10
10
  module Datadog
@@ -79,6 +79,49 @@ module Datadog
79
79
  matcher
80
80
  end
81
81
  end
82
+
83
+ # The {CachingResolver} is a mixin that provides caching functionality to the {Resolver} class.
84
+ # This is useful when {Resolver#resolve} values that are expensive to compute.
85
+ # This is a size-limited, FIFO cache.
86
+ #
87
+ # @example
88
+ # class MyResolver < Datadog::Tracing::Contrib::Configuration::Resolver
89
+ # prepend Datadog::Tracing::Contrib::Configuration::CachingResolver
90
+ # # ...
91
+ # end
92
+ module CachingResolver
93
+ # @param [Integer] cache_limit maximum number of entries to cache
94
+ def initialize(*args, cache_limit: 200)
95
+ super(*args)
96
+
97
+ @cache_limit = cache_limit
98
+ @cache = {}
99
+ end
100
+
101
+ # (see Resolver#resolve)
102
+ def resolve(value)
103
+ if @cache.key?(value)
104
+ @cache[value]
105
+ else
106
+ if @cache.size >= @cache_limit
107
+ @cache.shift # Remove the oldest entry if cache is full
108
+ end
109
+
110
+ @cache[value] = super
111
+ end
112
+ end
113
+
114
+ # (see Resolver#add)
115
+ def add(matcher, value)
116
+ reset_cache # Bust the cache when a new matcher is added
117
+ super
118
+ end
119
+
120
+ # Clears the internal cache.
121
+ def reset_cache
122
+ @cache.clear
123
+ end
124
+ end
82
125
  end
83
126
  end
84
127
  end
@@ -92,8 +92,7 @@ module Datadog
92
92
  # Measure service stats
93
93
  Contrib::Analytics.set_measured(span)
94
94
 
95
- # catch thrown exceptions
96
- handle_error(span, payload[:exception_object]) if payload[:exception_object]
95
+ handle_error_and_status_code(span, endpoint, payload)
97
96
 
98
97
  # override the current span with this notification values
99
98
  span.set_tag(Ext::TAG_ROUTE_ENDPOINT, api_view) unless api_view.nil?
@@ -110,6 +109,30 @@ module Datadog
110
109
  Datadog.logger.error(e.message)
111
110
  end
112
111
 
112
+ # Status code resolution is tied to the exception handling
113
+ def handle_error_and_status_code(span, endpoint, payload)
114
+ status = nil
115
+
116
+ # Handle exceptions and status code
117
+ if (exception_object = payload[:exception_object])
118
+ # If the exception is not an internal Grape error, we won't have a status code at this point.
119
+ status = exception_object.status if exception_object.respond_to?(:status)
120
+
121
+ handle_error(span, exception_object, status)
122
+ else
123
+ # Status code is unreliable in `endpoint_run.grape` if there was an exception.
124
+ # Only after `Grape::Middleware::Error#run_rescue_handler` that the error status code of a request with a
125
+ # Ruby exception error is resolved. But that handler is called further down the Grape middleware stack.
126
+ # Rack span will then be the most reliable source for status codes.
127
+ # DEV: As a corollary, instrumenting Grape without Rack will provide incomplete
128
+ # DEV: status quote information.
129
+ status = endpoint.status
130
+ span.set_error(endpoint) if error_status_codes.include?(status)
131
+ end
132
+
133
+ span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE, status) if status
134
+ end
135
+
113
136
  def endpoint_start_render(*)
114
137
  return if Thread.current[KEY_RENDER]
115
138
  return unless enabled?
@@ -193,9 +216,10 @@ module Datadog
193
216
 
194
217
  private
195
218
 
196
- def handle_error(span, exception)
197
- if exception.respond_to?('status')
198
- span.set_error(exception) if error_status_codes.include?(exception.status)
219
+ def handle_error(span, exception, status = nil)
220
+ status ||= (exception.status if exception.respond_to?(:status))
221
+ if status
222
+ span.set_error(exception) if error_status_codes.include?(status)
199
223
  else
200
224
  on_error.call(span, exception)
201
225
  end
@@ -240,6 +264,20 @@ module Datadog
240
264
  datadog_configuration[:analytics_sample_rate]
241
265
  end
242
266
 
267
+ def exception_is_error?(exception)
268
+ return false unless exception
269
+ return true unless exception.respond_to?(:status)
270
+
271
+ error_status?(status.exception)
272
+ end
273
+
274
+ def error_status?(status)
275
+ matcher = datadog_configuration[:error_statuses]
276
+ return true unless matcher
277
+
278
+ matcher.include?(status) if matcher
279
+ end
280
+
243
281
  def enabled?
244
282
  Datadog.configuration.tracing.enabled && \
245
283
  datadog_configuration[:enabled] == true
@@ -24,7 +24,7 @@ module Datadog
24
24
 
25
25
  Tracing.trace(Ext::SPAN_QUERY, service: service) do |span, trace_op|
26
26
  span.resource = sql
27
- span.span_type = Tracing::Metadata::Ext::SQL::TYPE
27
+ span.type = Tracing::Metadata::Ext::SQL::TYPE
28
28
 
29
29
  if datadog_configuration[:peer_service]
30
30
  span.set_tag(