datadog 2.29.0 → 2.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +87 -2
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +21 -12
  4. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +9 -7
  5. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +18 -0
  6. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +10 -0
  7. data/ext/datadog_profiling_native_extension/extconf.rb +6 -24
  8. data/ext/datadog_profiling_native_extension/heap_recorder.c +5 -6
  9. data/ext/datadog_profiling_native_extension/http_transport.c +51 -64
  10. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +0 -13
  11. data/ext/datadog_profiling_native_extension/profiling.c +3 -1
  12. data/ext/datadog_profiling_native_extension/setup_signal_handler.c +24 -8
  13. data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -3
  14. data/ext/datadog_profiling_native_extension/stack_recorder.c +29 -43
  15. data/ext/libdatadog_api/crashtracker.c +5 -8
  16. data/ext/libdatadog_api/crashtracker_report_exception.c +34 -144
  17. data/ext/libdatadog_api/datadog_ruby_common.c +18 -0
  18. data/ext/libdatadog_api/datadog_ruby_common.h +10 -0
  19. data/ext/libdatadog_api/di.c +79 -0
  20. data/ext/libdatadog_api/extconf.rb +5 -20
  21. data/ext/libdatadog_api/init.c +5 -2
  22. data/ext/libdatadog_extconf_helpers.rb +57 -11
  23. data/lib/datadog/ai_guard/component.rb +2 -0
  24. data/lib/datadog/ai_guard/configuration/settings.rb +3 -0
  25. data/lib/datadog/ai_guard/contrib/ruby_llm/chat_instrumentation.rb +41 -3
  26. data/lib/datadog/ai_guard/evaluation/content_builder.rb +31 -0
  27. data/lib/datadog/ai_guard/evaluation/content_part.rb +36 -0
  28. data/lib/datadog/ai_guard/evaluation/no_op_result.rb +3 -1
  29. data/lib/datadog/ai_guard/evaluation/request.rb +14 -9
  30. data/lib/datadog/ai_guard/evaluation/result.rb +3 -1
  31. data/lib/datadog/ai_guard/evaluation.rb +36 -7
  32. data/lib/datadog/ai_guard.rb +26 -8
  33. data/lib/datadog/appsec/autoload.rb +1 -1
  34. data/lib/datadog/appsec/component.rb +11 -7
  35. data/lib/datadog/appsec/contrib/active_record/patcher.rb +3 -0
  36. data/lib/datadog/appsec/contrib/devise/integration.rb +1 -1
  37. data/lib/datadog/appsec/contrib/excon/patcher.rb +2 -0
  38. data/lib/datadog/appsec/contrib/excon/ssrf_detection_middleware.rb +1 -1
  39. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +1 -1
  40. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
  41. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +10 -11
  42. data/lib/datadog/appsec/contrib/rack/integration.rb +1 -1
  43. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +25 -2
  44. data/lib/datadog/appsec/contrib/rack/response_body.rb +36 -0
  45. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +2 -2
  46. data/lib/datadog/appsec/contrib/rails/integration.rb +1 -1
  47. data/lib/datadog/appsec/contrib/rails/patcher.rb +2 -2
  48. data/lib/datadog/appsec/contrib/rest_client/patcher.rb +2 -0
  49. data/lib/datadog/appsec/contrib/rest_client/request_ssrf_detection_patch.rb +2 -2
  50. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +3 -3
  51. data/lib/datadog/appsec/event.rb +1 -17
  52. data/lib/datadog/appsec/instrumentation/gateway/middleware.rb +2 -3
  53. data/lib/datadog/appsec/instrumentation/gateway.rb +2 -15
  54. data/lib/datadog/appsec/monitor/gateway/watcher.rb +4 -2
  55. data/lib/datadog/appsec/utils/http/media_type.rb +1 -2
  56. data/lib/datadog/appsec/utils/http/url_encoded.rb +2 -2
  57. data/lib/datadog/appsec.rb +5 -9
  58. data/lib/datadog/core/configuration/base.rb +17 -5
  59. data/lib/datadog/core/configuration/components.rb +21 -8
  60. data/lib/datadog/core/configuration/config_helper.rb +9 -0
  61. data/lib/datadog/core/configuration/option.rb +32 -6
  62. data/lib/datadog/core/configuration/option_definition.rb +38 -12
  63. data/lib/datadog/core/configuration/options.rb +41 -7
  64. data/lib/datadog/core/configuration/settings.rb +42 -3
  65. data/lib/datadog/core/configuration/supported_configurations.rb +17 -0
  66. data/lib/datadog/core/contrib/rails/railtie.rb +32 -0
  67. data/lib/datadog/core/contrib/rails/utils.rb +7 -3
  68. data/lib/datadog/core/crashtracking/component.rb +7 -15
  69. data/lib/datadog/core/environment/container.rb +2 -2
  70. data/lib/datadog/core/environment/ext.rb +1 -0
  71. data/lib/datadog/core/environment/identity.rb +25 -3
  72. data/lib/datadog/core/environment/process.rb +12 -0
  73. data/lib/datadog/core/metrics/client.rb +5 -5
  74. data/lib/datadog/core/process_discovery.rb +5 -0
  75. data/lib/datadog/core/remote/component.rb +38 -21
  76. data/lib/datadog/core/runtime/metrics.rb +2 -3
  77. data/lib/datadog/core/telemetry/component.rb +3 -0
  78. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +2 -3
  79. data/lib/datadog/core/telemetry/event/app_extended_heartbeat.rb +32 -0
  80. data/lib/datadog/core/telemetry/event/app_started.rb +151 -169
  81. data/lib/datadog/core/telemetry/event.rb +1 -7
  82. data/lib/datadog/core/telemetry/ext.rb +1 -0
  83. data/lib/datadog/core/telemetry/transport/http/telemetry.rb +5 -0
  84. data/lib/datadog/core/telemetry/worker.rb +20 -0
  85. data/lib/datadog/core/utils/base64.rb +1 -1
  86. data/lib/datadog/core/utils/only_once.rb +1 -1
  87. data/lib/datadog/core/utils/spawn_monkey_patch.rb +36 -0
  88. data/lib/datadog/core/workers/async.rb +1 -1
  89. data/lib/datadog/core/workers/interval_loop.rb +13 -6
  90. data/lib/datadog/core/workers/queue.rb +0 -4
  91. data/lib/datadog/core/workers/runtime_metrics.rb +9 -1
  92. data/lib/datadog/core.rb +0 -1
  93. data/lib/datadog/data_streams/pathway_context.rb +1 -1
  94. data/lib/datadog/data_streams/processor.rb +1 -0
  95. data/lib/datadog/di/boot.rb +3 -4
  96. data/lib/datadog/di/component.rb +20 -4
  97. data/lib/datadog/di/instrumenter.rb +20 -10
  98. data/lib/datadog/di/probe_manager.rb +79 -62
  99. data/lib/datadog/di/probe_notification_builder.rb +148 -33
  100. data/lib/datadog/di/probe_notifier_worker.rb +52 -6
  101. data/lib/datadog/di/probe_repository.rb +198 -0
  102. data/lib/datadog/di/remote.rb +5 -6
  103. data/lib/datadog/di/serializer.rb +127 -9
  104. data/lib/datadog/di/transport/http.rb +12 -3
  105. data/lib/datadog/di/transport/input.rb +46 -8
  106. data/lib/datadog/di.rb +81 -0
  107. data/lib/datadog/kit/enable_core_dumps.rb +1 -1
  108. data/lib/datadog/open_feature/configuration.rb +2 -0
  109. data/lib/datadog/open_feature/evaluation_engine.rb +1 -1
  110. data/lib/datadog/open_feature/exposures/reporter.rb +1 -1
  111. data/lib/datadog/open_feature/exposures/worker.rb +1 -1
  112. data/lib/datadog/open_feature/remote.rb +1 -1
  113. data/lib/datadog/open_feature/transport.rb +1 -1
  114. data/lib/datadog/opentelemetry/configuration/settings.rb +2 -0
  115. data/lib/datadog/profiling/collectors/code_provenance.rb +2 -3
  116. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +14 -1
  117. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -1
  118. data/lib/datadog/profiling/component.rb +31 -1
  119. data/lib/datadog/profiling/http_transport.rb +5 -6
  120. data/lib/datadog/profiling/load_native_extension.rb +1 -1
  121. data/lib/datadog/profiling/profiler.rb +15 -12
  122. data/lib/datadog/profiling/scheduler.rb +2 -2
  123. data/lib/datadog/profiling/tasks/exec.rb +2 -2
  124. data/lib/datadog/profiling/tasks/setup.rb +2 -2
  125. data/lib/datadog/profiling.rb +1 -2
  126. data/lib/datadog/single_step_instrument.rb +1 -1
  127. data/lib/datadog/tracing/buffer.rb +3 -3
  128. data/lib/datadog/tracing/component.rb +11 -0
  129. data/lib/datadog/tracing/configuration/settings.rb +2 -1
  130. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +2 -2
  131. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +20 -0
  132. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +3 -1
  133. data/lib/datadog/tracing/contrib/action_view/events/render_template.rb +1 -1
  134. data/lib/datadog/tracing/contrib/active_job/events/discard.rb +1 -1
  135. data/lib/datadog/tracing/contrib/active_job/events/enqueue.rb +1 -1
  136. data/lib/datadog/tracing/contrib/active_job/events/enqueue_at.rb +1 -1
  137. data/lib/datadog/tracing/contrib/active_job/events/enqueue_retry.rb +1 -1
  138. data/lib/datadog/tracing/contrib/active_job/events/perform.rb +1 -1
  139. data/lib/datadog/tracing/contrib/active_job/events/retry_stopped.rb +1 -1
  140. data/lib/datadog/tracing/contrib/active_model_serializers/events/render.rb +1 -1
  141. data/lib/datadog/tracing/contrib/active_model_serializers/events/serialize.rb +1 -1
  142. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +1 -1
  143. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -1
  144. data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
  145. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +1 -1
  146. data/lib/datadog/tracing/contrib/active_support/notifications/subscription.rb +2 -2
  147. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +1 -1
  148. data/lib/datadog/tracing/contrib/configurable.rb +18 -3
  149. data/lib/datadog/tracing/contrib/dalli/integration.rb +4 -1
  150. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +1 -1
  151. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +5 -1
  152. data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
  153. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +5 -2
  154. data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
  155. data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -2
  156. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +5 -2
  157. data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
  158. data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -2
  159. data/lib/datadog/tracing/contrib/grape/endpoint.rb +7 -7
  160. data/lib/datadog/tracing/contrib/grape/instrumentation.rb +13 -8
  161. data/lib/datadog/tracing/contrib/grape/patcher.rb +6 -1
  162. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +5 -2
  163. data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
  164. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +5 -2
  165. data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
  166. data/lib/datadog/tracing/contrib/http/instrumentation.rb +1 -1
  167. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +5 -2
  168. data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
  169. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +5 -2
  170. data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
  171. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +1 -1
  172. data/lib/datadog/tracing/contrib/karafka/configuration/settings.rb +5 -1
  173. data/lib/datadog/tracing/contrib/karafka/ext.rb +1 -0
  174. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +1 -1
  175. data/lib/datadog/tracing/contrib/que/configuration/settings.rb +5 -2
  176. data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
  177. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +5 -1
  178. data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
  179. data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +5 -2
  180. data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
  181. data/lib/datadog/tracing/contrib/rails/log_injection.rb +1 -1
  182. data/lib/datadog/tracing/contrib/rails/patcher.rb +0 -1
  183. data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
  184. data/lib/datadog/tracing/contrib/rake/instrumentation.rb +2 -2
  185. data/lib/datadog/tracing/contrib/redis/tags.rb +1 -1
  186. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +5 -2
  187. data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
  188. data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +5 -1
  189. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  190. data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +5 -1
  191. data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
  192. data/lib/datadog/tracing/contrib/status_range_matcher.rb +4 -0
  193. data/lib/datadog/tracing/contrib/stripe/request.rb +1 -1
  194. data/lib/datadog/tracing/contrib/waterdrop/configuration/settings.rb +5 -1
  195. data/lib/datadog/tracing/contrib/waterdrop/ext.rb +1 -0
  196. data/lib/datadog/tracing/distributed/datadog.rb +4 -2
  197. data/lib/datadog/tracing/event.rb +1 -1
  198. data/lib/datadog/tracing/metadata/ext.rb +4 -0
  199. data/lib/datadog/tracing/remote.rb +1 -1
  200. data/lib/datadog/tracing/sampling/ext.rb +2 -0
  201. data/lib/datadog/tracing/sampling/priority_sampler.rb +13 -0
  202. data/lib/datadog/tracing/sampling/rule.rb +1 -1
  203. data/lib/datadog/tracing/sampling/rule_sampler.rb +54 -25
  204. data/lib/datadog/tracing/sampling/span/rule_parser.rb +1 -1
  205. data/lib/datadog/tracing/span_operation.rb +1 -1
  206. data/lib/datadog/tracing/sync_writer.rb +0 -1
  207. data/lib/datadog/tracing/trace_operation.rb +50 -6
  208. data/lib/datadog/tracing/tracer.rb +25 -0
  209. data/lib/datadog/tracing/transport/io/client.rb +1 -1
  210. data/lib/datadog/tracing/transport/trace_formatter.rb +11 -0
  211. data/lib/datadog/tracing/writer.rb +0 -1
  212. data/lib/datadog/version.rb +1 -1
  213. metadata +15 -8
  214. data/lib/datadog/tracing/workers/trace_writer.rb +0 -204
@@ -27,6 +27,9 @@ module Datadog
27
27
  metrics_manager:,
28
28
  dependency_collection:,
29
29
  logger:,
30
+ settings:,
31
+ agent_settings:,
32
+ extended_heartbeat_interval_seconds:,
30
33
  enabled: true,
31
34
  shutdown_timeout: Workers::Polling::DEFAULT_SHUTDOWN_TIMEOUT,
32
35
  buffer_size: DEFAULT_BUFFER_MAX_SIZE
@@ -35,8 +38,11 @@ module Datadog
35
38
  @metrics_manager = metrics_manager
36
39
  @dependency_collection = dependency_collection
37
40
  @logger = logger
41
+ @settings = settings
42
+ @agent_settings = agent_settings
38
43
 
39
44
  @ticks_per_heartbeat = (heartbeat_interval_seconds / metrics_aggregation_interval_seconds).to_i
45
+ @ticks_per_extended_heartbeat = (extended_heartbeat_interval_seconds / metrics_aggregation_interval_seconds).to_i
40
46
  @current_ticks = 0
41
47
 
42
48
  # Workers::Polling settings
@@ -63,6 +69,7 @@ module Datadog
63
69
  self.buffer = buffer_klass.new(@buffer_size)
64
70
 
65
71
  @initial_event_once = Utils::OnlyOnceSuccessful.new(APP_STARTED_EVENT_RETRIES)
72
+ @extended_heartbeat_ticks = 0
66
73
  end
67
74
 
68
75
  attr_reader :logger
@@ -151,6 +158,13 @@ module Datadog
151
158
  end
152
159
 
153
160
  @current_ticks += 1
161
+ @extended_heartbeat_ticks += 1
162
+
163
+ if @extended_heartbeat_ticks >= @ticks_per_extended_heartbeat
164
+ @extended_heartbeat_ticks = 0
165
+ extended_heartbeat!
166
+ end
167
+
154
168
  return if @current_ticks < @ticks_per_heartbeat
155
169
 
156
170
  @current_ticks = 0
@@ -170,6 +184,12 @@ module Datadog
170
184
  send_event(Event::AppHeartbeat.new)
171
185
  end
172
186
 
187
+ def extended_heartbeat!
188
+ return if !enabled? || !sent_initial_event?
189
+
190
+ send_event(Event::AppExtendedHeartbeat.new(settings: @settings, agent_settings: @agent_settings))
191
+ end
192
+
173
193
  def started!
174
194
  return unless enabled?
175
195
 
@@ -14,7 +14,7 @@ module Datadog
14
14
  end
15
15
 
16
16
  def self.strict_decode64(str)
17
- str.unpack1('m0')
17
+ str.unpack1('m0') #: String # 'm0' format always returns a String
18
18
  end
19
19
  end
20
20
  end
@@ -3,7 +3,7 @@
3
3
  module Datadog
4
4
  module Core
5
5
  module Utils
6
- # Helper class to execute something only once such as not repeating warning logs, and instrumenting classes
6
+ # Helper class to execute something only once, such as not repeating warning logs and instrumenting classes
7
7
  # only once.
8
8
  #
9
9
  # Thread-safe when used correctly (e.g. be careful of races when lazily initializing instances of this class).
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Datadog
4
+ module Core
5
+ module Utils
6
+ module SpawnMonkeyPatch
7
+ # @param lineage_envs_provider [#call] returns a Hash of env vars to merge into the child process
8
+ def self.apply!(lineage_envs_provider:)
9
+ @lineage_envs_provider = lineage_envs_provider
10
+ ::Process.singleton_class.prepend(ProcessSpawnPatch)
11
+ true
12
+ end
13
+
14
+ module ProcessSpawnPatch
15
+ def spawn(*args, **opts)
16
+ args.replace(SpawnMonkeyPatch.inject_lineage_envs(args))
17
+ super
18
+ end
19
+ end
20
+
21
+ # Process.spawn(env?, cmd, ...): env is optional first arg (Hash). When present, merge
22
+ # runtime_ids into it; when absent, prepend full ENV + runtime_ids so the child inherits both.
23
+ def self.inject_lineage_envs(args)
24
+ runtime_ids = @lineage_envs_provider.call
25
+ env_provided = Hash === args.first
26
+
27
+ base_env = env_provided ? args.first : DATADOG_ENV.to_h
28
+ env = base_env.merge(runtime_ids)
29
+ rest = env_provided ? args.drop(1) : args
30
+
31
+ [env, *rest]
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -168,7 +168,7 @@ module Datadog
168
168
  rescue Exception => e
169
169
  @error = e
170
170
  Datadog.logger.debug(
171
- "Worker thread error. Cause: #{e.class.name} #{e.message} Location: #{Array(e.backtrace).first}"
171
+ "Worker thread error. Cause: #{e.class}: #{e} Location: #{Array(e.backtrace).first}"
172
172
  )
173
173
  raise
174
174
 
@@ -70,16 +70,23 @@ module Datadog
70
70
  true
71
71
  end
72
72
 
73
- # TODO This overwrites Queue's +work_pending?+ method with an
74
- # implementation that, to me, is at leat questionable semantically:
75
- # the Queue's idea of pending work is if the buffer is not empty,
76
- # but this module says that work is pending if the work processing
77
- # loop is scheduled to run (in other words, as long as the background
78
- # thread is running, there is always pending work).
73
+ # TODO: This method carries two semantics today:
74
+ # 1. Is there work available to process?
75
+ # 2. Should the main loop keep running?
76
+ # Things get messy when `Queue` is mixed in,
77
+ # wanting to override semantic 1 but not 2.
78
+ # This should probably be split into two methods:
79
+ # `work_pending?` (semantic 1) and `continue_loop?` (semantic 2).
80
+ # Today, `run_loop?` performs semantic 2, but should probably be
81
+ # renamed to `continue_loop?`, since that would make it clear that
82
+ # we're just checking if we can "keep going inside the loop",
83
+ # and not checking if we should run the loop right now.
84
+ # Clean up {Workers::Queue} after this.
79
85
  def work_pending?
80
86
  run_loop?
81
87
  end
82
88
 
89
+ # TODO: Probably should be renamed to `continue_loop?`, see `work_pending?` TODO.
83
90
  def run_loop?
84
91
  return false unless instance_variable_defined?(:@run_loop)
85
92
 
@@ -130,10 +130,6 @@ module Datadog
130
130
  # they all override +perform+ and the correct behavior depends on
131
131
  # placing IntervalLoop after Queue.
132
132
  #
133
- # The TraceWriter worker then defines +work_pending?+ to be the
134
- # same as Queue implementation here... Essentially, it demands
135
- # the behavior that perhaps should be applied to all workers.
136
- #
137
133
  # Until this mess is untangled, call +buffer.empty?+ here.
138
134
  buffer.empty? && !in_iteration?
139
135
  end
@@ -21,7 +21,15 @@ module Datadog
21
21
  :metrics
22
22
 
23
23
  def initialize(telemetry:, **options)
24
- @metrics = options.fetch(:metrics) { Core::Runtime::Metrics.new(logger: options[:logger], telemetry: telemetry) }
24
+ @metrics = options.fetch(:metrics) do
25
+ Core::Runtime::Metrics.new(
26
+ logger: options[:logger],
27
+ telemetry: telemetry,
28
+ experimental_propagate_process_tags_enabled: options.fetch(:propagate_process_tags_enabled) do
29
+ options.fetch(:experimental_propagate_process_tags_enabled)
30
+ end
31
+ )
32
+ end
25
33
 
26
34
  # Workers::Async::Thread settings
27
35
  self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
data/lib/datadog/core.rb CHANGED
@@ -22,7 +22,6 @@ module Datadog
22
22
  end
23
23
  end
24
24
 
25
- DATADOG_ENV = Core::Configuration::ConfigHelper.new
26
25
  extend Core::Extensions
27
26
 
28
27
  # Add shutdown hook:
@@ -46,7 +46,7 @@ module Datadog
46
46
  decode(binary_data)
47
47
  rescue ArgumentError => e
48
48
  # Invalid base64 encoding - may indicate version mismatch or corruption
49
- Datadog.logger.debug("Failed to decode DSM pathway context: #{e.message}")
49
+ Datadog.logger.debug { "Failed to decode DSM pathway context: #{e.class}: #{e}" }
50
50
  nil
51
51
  end
52
52
  end
@@ -316,6 +316,7 @@ module Datadog
316
316
  end
317
317
 
318
318
  payload = {
319
+ 'Env' => @settings.env || 'none',
319
320
  'Service' => @settings.service,
320
321
  'TracerVersion' => Datadog::VERSION::STRING,
321
322
  'Lang' => 'ruby',
@@ -12,13 +12,13 @@ require_relative 'probe_builder'
12
12
  require_relative 'probe_manager'
13
13
  require_relative 'probe_notification_builder'
14
14
  require_relative 'probe_notifier_worker'
15
+ require_relative 'probe_repository'
15
16
  require_relative 'redactor'
16
17
  require_relative 'serializer'
17
18
  require_relative 'transport/http'
18
19
  require_relative 'utils'
19
20
 
20
- # Steep: https://github.com/ruby/rbs/pull/2715
21
- if %w[1 true yes].include?(Datadog::DATADOG_ENV['DD_DYNAMIC_INSTRUMENTATION_ENABLED']) # steep:ignore ArgumentTypeMismatch
21
+ if %w[1 true yes].include?(Datadog::DATADOG_ENV['DD_DYNAMIC_INSTRUMENTATION_ENABLED'])
22
22
 
23
23
  # For initial release of Dynamic Instrumentation, activate code tracking
24
24
  # only if DI is explicitly requested in the environment.
@@ -36,8 +36,7 @@ require_relative 'contrib'
36
36
 
37
37
  Datadog::DI::Contrib.load_now_or_later
38
38
 
39
- # Steep: https://github.com/ruby/rbs/pull/2715
40
- if %w[1 true yes].include?(Datadog::DATADOG_ENV['DD_DYNAMIC_INSTRUMENTATION_ENABLED']) # steep:ignore ArgumentTypeMismatch
39
+ if %w[1 true yes].include?(Datadog::DATADOG_ENV['DD_DYNAMIC_INSTRUMENTATION_ENABLED'])
41
40
  if Datadog::DATADOG_ENV['DD_DYNAMIC_INSTRUMENTATION_PROBE_FILE']
42
41
  require_relative 'probe_file_loader'
43
42
  Datadog::DI::ProbeFileLoader.load_now_or_later
@@ -49,6 +49,10 @@ module Datadog
49
49
  logger.warn("di: cannot enable dynamic instrumentation: Ruby 2.6+ is required, but running on #{RUBY_VERSION}")
50
50
  return false
51
51
  end
52
+ unless DI.respond_to?(:exception_message)
53
+ logger.warn("di: cannot enable dynamic instrumentation: C extension is not available")
54
+ return false
55
+ end
52
56
  true
53
57
  end
54
58
  end
@@ -63,9 +67,19 @@ module Datadog
63
67
  @redactor = Redactor.new(settings)
64
68
  @serializer = Serializer.new(settings, redactor, telemetry: telemetry)
65
69
  @instrumenter = Instrumenter.new(settings, serializer, logger, code_tracker: code_tracker, telemetry: telemetry)
66
- @probe_notifier_worker = ProbeNotifierWorker.new(settings, logger, agent_settings: agent_settings, telemetry: telemetry)
70
+ @probe_repository = ProbeRepository.new
67
71
  @probe_notification_builder = ProbeNotificationBuilder.new(settings, serializer)
68
- @probe_manager = ProbeManager.new(settings, instrumenter, probe_notification_builder, probe_notifier_worker, logger, telemetry: telemetry)
72
+ @probe_notifier_worker = ProbeNotifierWorker.new(
73
+ settings, logger,
74
+ agent_settings: agent_settings,
75
+ probe_repository: probe_repository,
76
+ probe_notification_builder: probe_notification_builder,
77
+ telemetry: telemetry,
78
+ )
79
+ @probe_manager = ProbeManager.new(
80
+ settings, instrumenter, probe_notification_builder, probe_notifier_worker, logger, probe_repository,
81
+ telemetry: telemetry,
82
+ )
69
83
  probe_notifier_worker.start
70
84
  end
71
85
 
@@ -75,6 +89,7 @@ module Datadog
75
89
  attr_reader :telemetry
76
90
  attr_reader :code_tracker
77
91
  attr_reader :instrumenter
92
+ attr_reader :probe_repository
78
93
  attr_reader :probe_notifier_worker
79
94
  attr_reader :probe_notification_builder
80
95
  attr_reader :probe_manager
@@ -106,8 +121,9 @@ module Datadog
106
121
  )
107
122
  payload = probe_notification_builder.build_errored(probe, exc)
108
123
  probe_notifier_worker.add_status(payload)
109
- rescue # standard:disable Lint/UselessRescue
110
- # TODO report via instrumentation telemetry?
124
+ rescue => nested_exc
125
+ logger.debug { "di: failed to build error notification: #{nested_exc.class}: #{nested_exc}" }
126
+ telemetry&.report(nested_exc, description: 'Error building probe error notification')
111
127
  raise
112
128
  end
113
129
 
@@ -147,17 +147,19 @@ module Datadog
147
147
  # the probe notifier builder requires a context.
148
148
  begin
149
149
  responder.probe_condition_evaluation_failed_callback(context, exc)
150
- rescue
150
+ rescue => nested_exc
151
151
  raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
152
152
 
153
- # TODO log / report via telemetry?
153
+ instrumenter.logger.debug { "di: error in probe condition evaluation failed callback: #{nested_exc.class}: #{nested_exc}" }
154
+ instrumenter.telemetry&.report(nested_exc, description: "Error in probe condition evaluation failed callback")
154
155
  end
155
156
  else
156
157
  _ = 42 # stop standard from wrecking this code
157
158
 
158
159
  raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
159
160
 
160
- # TODO log / report via telemetry?
161
+ instrumenter.logger.debug { "di: error evaluating condition without context (tracer bug?): #{exc.class}: #{exc}" }
162
+ instrumenter.telemetry&.report(exc, description: "Error evaluating condition without context")
161
163
  # If execution gets here, there is probably a bug in the tracer.
162
164
  end
163
165
 
@@ -227,8 +229,7 @@ module Datadog
227
229
  # that location here.
228
230
  []
229
231
  end
230
- # Steep: https://github.com/ruby/rbs/pull/2745
231
- caller_locs = method_frame + caller_locations # steep:ignore ArgumentTypeMismatch
232
+ caller_locs = method_frame + caller_locations
232
233
  # TODO capture arguments at exit
233
234
 
234
235
  context = Context.new(locals: nil, target_self: self,
@@ -237,9 +238,16 @@ module Datadog
237
238
  caller_locations: caller_locs,
238
239
  return_value: rv, duration: duration, exception: exc,)
239
240
 
240
- responder.probe_executed_callback(context)
241
+ begin
242
+ responder.probe_executed_callback(context)
243
+
244
+ instrumenter.send(:check_and_disable_if_exceeded, probe, responder, di_start_time, di_duration)
245
+ rescue => di_exc
246
+ raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
241
247
 
242
- instrumenter.send(:check_and_disable_if_exceeded, probe, responder, di_start_time, di_duration)
248
+ instrumenter.logger.debug { "di: unhandled exception in method probe: #{di_exc.class}: #{di_exc}" }
249
+ instrumenter.telemetry&.report(di_exc, description: "Unhandled exception in method probe")
250
+ end
243
251
 
244
252
  if exc
245
253
  raise exc
@@ -525,10 +533,11 @@ module Datadog
525
533
  # the probe notifier builder requires a context.
526
534
  begin
527
535
  responder.probe_condition_evaluation_failed_callback(context, condition, exc)
528
- rescue
536
+ rescue => nested_exc
529
537
  raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
530
538
 
531
- # TODO log / report via telemetry?
539
+ logger.debug { "di: error in probe condition evaluation failed callback: #{nested_exc.class}: #{nested_exc}" }
540
+ telemetry&.report(nested_exc, description: "Error in probe condition evaluation failed callback")
532
541
  end
533
542
 
534
543
  return
@@ -537,7 +546,8 @@ module Datadog
537
546
 
538
547
  raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
539
548
 
540
- # TODO log / report via telemetry?
549
+ logger.debug { "di: error evaluating condition without context (tracer bug?): #{exc.class}: #{exc}" }
550
+ telemetry&.report(exc, description: "Error evaluating condition without context")
541
551
  # If execution gets here, there is probably a bug in the tracer.
542
552
  end
543
553
  end
@@ -2,10 +2,11 @@
2
2
 
3
3
  # rubocop:disable Lint/AssignmentInCondition
4
4
 
5
- require 'monitor'
6
-
7
5
  module Datadog
8
6
  module DI
7
+ # Orchestrates probe lifecycle: installation, removal, and execution callbacks.
8
+ # Delegates probe storage to ProbeRepository.
9
+ #
9
10
  # Stores probes received from remote config (that we can parse, in other
10
11
  # words, whose type/attributes we support), requests needed instrumentation
11
12
  # for the probes via Instrumenter, and stores pending probes (those which
@@ -16,17 +17,14 @@ module Datadog
16
17
  # @api private
17
18
  class ProbeManager
18
19
  def initialize(settings, instrumenter, probe_notification_builder,
19
- probe_notifier_worker, logger, telemetry: nil)
20
+ probe_notifier_worker, logger, probe_repository, telemetry: nil)
20
21
  @settings = settings
21
22
  @instrumenter = instrumenter
22
23
  @probe_notification_builder = probe_notification_builder
23
24
  @probe_notifier_worker = probe_notifier_worker
24
25
  @logger = logger
25
26
  @telemetry = telemetry
26
- @installed_probes = {}
27
- @pending_probes = {}
28
- @failed_probes = {}
29
- @lock = Monitor.new
27
+ @probe_repository = probe_repository
30
28
 
31
29
  @definition_trace_point = TracePoint.trace(:end) do |tp|
32
30
  install_pending_method_probes(tp.self)
@@ -40,7 +38,15 @@ module Datadog
40
38
 
41
39
  attr_reader :logger
42
40
  attr_reader :telemetry
41
+ attr_reader :probe_repository
43
42
 
43
+ # Shuts down the probe manager and releases all resources.
44
+ #
45
+ # Disables the class definition trace point and removes all installed
46
+ # probe instrumentation. Called during component teardown.
47
+ #
48
+ # @return [void]
49
+ #
44
50
  # TODO test that close is called during component teardown and
45
51
  # the trace point is cleared
46
52
  def close
@@ -48,13 +54,16 @@ module Datadog
48
54
  clear_hooks
49
55
  end
50
56
 
57
+ # Removes all installed probe instrumentation and clears the probe repository.
58
+ #
59
+ # Iterates through all installed probes, unhooks their instrumentation,
60
+ # and clears all probe collections (installed, pending, failed).
61
+ # Called during component shutdown to clean up resources.
62
+ #
63
+ # @return [void]
51
64
  def clear_hooks
52
- @lock.synchronize do
53
- @pending_probes.clear
54
- @installed_probes.each do |probe_id, probe|
55
- instrumenter.unhook(probe)
56
- end
57
- @installed_probes.clear
65
+ probe_repository.clear_all do |probe|
66
+ instrumenter.unhook(probe)
58
67
  end
59
68
  end
60
69
 
@@ -63,27 +72,6 @@ module Datadog
63
72
  attr_reader :probe_notification_builder
64
73
  attr_reader :probe_notifier_worker
65
74
 
66
- def installed_probes
67
- @lock.synchronize do
68
- @installed_probes
69
- end
70
- end
71
-
72
- def pending_probes
73
- @lock.synchronize do
74
- @pending_probes
75
- end
76
- end
77
-
78
- # Probes that failed to instrument for reasons other than the target is
79
- # not yet loaded are added to this collection, so that we do not try
80
- # to instrument them every time remote configuration is processed.
81
- def failed_probes
82
- @lock.synchronize do
83
- @failed_probes
84
- end
85
- end
86
-
87
75
  # Requests to install the specified probe.
88
76
  #
89
77
  # If the target of the probe does not exist, assume the relevant
@@ -92,9 +80,18 @@ module Datadog
92
80
  # defined, or files loaded, the probe will be checked against the
93
81
  # newly defined classes/loaded files, and will be installed if it
94
82
  # matches.
83
+ #
84
+ # On successful installation, sends INSTALLED status to the backend.
85
+ # On failure, sends ERROR status to the backend before re-raising.
86
+ #
87
+ # @param probe [Probe] the probe to install
88
+ # @return [Boolean] true if installed, false if pending
89
+ # @raise [Error::AlreadyInstrumented] if a probe with the same ID is already installed
90
+ # @raise [Error::ProbePreviouslyFailed] if a probe with the same ID previously failed
91
+ # @raise [StandardError] re-raises any other instrumentation error after recording failure
95
92
  def add_probe(probe)
96
- @lock.synchronize do
97
- if @installed_probes[probe.id]
93
+ probe_repository.synchronize do
94
+ if probe_repository.find_installed(probe.id)
98
95
  # Either this probe was already installed, or another probe was
99
96
  # installed with the same id (previous version perhaps?).
100
97
  # Since our state tracking is keyed by probe id, we cannot
@@ -108,7 +105,7 @@ module Datadog
108
105
  end
109
106
 
110
107
  # Probe failed to install previously, do not try to install it again.
111
- if msg = @failed_probes[probe.id]
108
+ if msg = probe_repository.find_failed(probe.id)
112
109
  # TODO test this path
113
110
  raise Error::ProbePreviouslyFailed, msg
114
111
  end
@@ -116,43 +113,41 @@ module Datadog
116
113
  begin
117
114
  instrumenter.hook(probe, self)
118
115
 
119
- @installed_probes[probe.id] = probe
116
+ probe_repository.add_installed(probe)
120
117
  payload = probe_notification_builder.build_installed(probe)
121
118
  probe_notifier_worker.add_status(payload, probe: probe)
122
119
  # The probe would only be in the pending probes list if it was
123
120
  # previously attempted to be installed and the target was not loaded.
124
121
  # Always remove from pending list here because it makes the
125
122
  # API smaller and shouldn't cause any actual problems.
126
- @pending_probes.delete(probe.id)
123
+ probe_repository.remove_pending(probe.id)
127
124
  logger.trace { "di: installed #{probe.type} probe at #{probe.location} (#{probe.id})" }
128
125
  true
129
126
  rescue Error::DITargetNotDefined
130
- @pending_probes[probe.id] = probe
127
+ probe_repository.add_pending(probe)
131
128
  logger.trace { "di: could not install #{probe.type} probe at #{probe.location} (#{probe.id}) because its target is not defined, adding it to pending list" }
132
129
  false
133
130
  end
134
- rescue => exc
135
- # In "propagate all exceptions" mode we will try to instrument again.
136
- raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
131
+ end
132
+ rescue => exc
133
+ # In "propagate all exceptions" mode we will try to instrument again.
134
+ raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
137
135
 
138
- logger.debug { "di: error processing probe configuration: #{exc.class}: #{exc}" }
139
- telemetry&.report(exc, description: "Error processing probe configuration")
140
- # TODO report probe as failed to agent since we won't attempt to
141
- # install it again.
136
+ logger.debug { "di: error processing probe configuration: #{exc.class}: #{exc}" }
137
+ telemetry&.report(exc, description: "Error processing probe configuration")
142
138
 
143
- # TODO add top stack frame to message
144
- @failed_probes[probe.id] = "#{exc.class}: #{exc}"
139
+ payload = probe_notification_builder.build_errored(probe, exc)
140
+ probe_notifier_worker.add_status(payload, probe: probe)
145
141
 
146
- raise
147
- end
142
+ probe_repository.add_failed(probe.id, "#{exc.class}: #{exc}")
143
+
144
+ raise
148
145
  end
149
146
 
150
147
  # Removes probe with specified id. The probe could be pending or
151
148
  # installed. Does nothing if there is no probe with the specified id.
152
149
  def remove_probe(probe_id)
153
- @lock.synchronize do
154
- @pending_probes.delete(probe_id)
155
- end
150
+ probe_repository.remove_pending(probe_id)
156
151
 
157
152
  # Do not delete the probe from the registry here in case
158
153
  # deinstrumentation fails - though I don't know why deinstrumentation
@@ -161,10 +156,10 @@ module Datadog
161
156
  # in the future, and if deinstrumentation fails I would want to
162
157
  # keep that probe as "installed" for the count, so that we can
163
158
  # investigate the situation.
164
- if probe = @installed_probes[probe_id]
159
+ if probe = probe_repository.find_installed(probe_id)
165
160
  begin
166
161
  instrumenter.unhook(probe)
167
- @installed_probes.delete(probe_id)
162
+ probe_repository.remove_installed(probe_id)
168
163
  rescue => exc
169
164
  raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
170
165
  # Silence all exceptions?
@@ -180,9 +175,9 @@ module Datadog
180
175
  # This method is meant to be called from the "end" trace point,
181
176
  # which is invoked for each class definition.
182
177
  private def install_pending_method_probes(cls)
183
- @lock.synchronize do
178
+ probe_repository.synchronize do
184
179
  # TODO search more efficiently than linearly
185
- @pending_probes.each do |probe_id, probe|
180
+ probe_repository.pending_probes.each do |probe_id, probe|
186
181
  if probe.method?
187
182
  # TODO move this stringification elsewhere
188
183
  if probe.type_name == cls.name
@@ -190,8 +185,8 @@ module Datadog
190
185
  # TODO is it OK to hook from trace point handler?
191
186
  # TODO the class is now defined, but can hooking still fail?
192
187
  instrumenter.hook(probe, self)
193
- @installed_probes[probe.id] = probe
194
- @pending_probes.delete(probe.id)
188
+ probe_repository.add_installed(probe)
189
+ probe_repository.remove_pending(probe.id)
195
190
  break
196
191
  rescue Error::DITargetNotDefined
197
192
  # This should not happen... try installing again later?
@@ -200,6 +195,11 @@ module Datadog
200
195
 
201
196
  logger.debug { "di: error installing #{probe.type} probe at #{probe.location} (#{probe.id}) after class is defined: #{exc.class}: #{exc}" }
202
197
  telemetry&.report(exc, description: "Error installing probe after class is defined")
198
+
199
+ payload = probe_notification_builder.build_errored(probe, exc)
200
+ probe_notifier_worker.add_status(payload, probe: probe)
201
+
202
+ probe_repository.add_failed(probe.id, "#{exc.class}: #{exc}")
203
203
  end
204
204
  end
205
205
  end
@@ -217,8 +217,8 @@ module Datadog
217
217
  if path.nil?
218
218
  raise ArgumentError, "path must not be nil"
219
219
  end
220
- @lock.synchronize do
221
- @pending_probes.values.each do |probe|
220
+ probe_repository.synchronize do
221
+ probe_repository.pending_probes.values.each do |probe|
222
222
  if probe.line?
223
223
  if probe.file_matches?(path)
224
224
  add_probe(probe)
@@ -248,6 +248,16 @@ module Datadog
248
248
  probe_notifier_worker.add_snapshot(payload)
249
249
  end
250
250
 
251
+ # Callback invoked when a probe's condition expression fails to evaluate.
252
+ #
253
+ # This can happen when the expression references undefined variables,
254
+ # has type mismatches, or encounters runtime errors during evaluation.
255
+ # Rate-limited to 1 notification per second per probe to avoid
256
+ # flooding the backend when conditions fail repeatedly.
257
+ #
258
+ # @param context [Context] The execution context containing probe and captured data
259
+ # @param expr [String] The condition expression that failed
260
+ # @param exc [Exception] The exception raised during condition evaluation
251
261
  def probe_condition_evaluation_failed_callback(context, expr, exc)
252
262
  probe = context.probe
253
263
  if probe.condition_evaluation_failed_rate_limiter&.allow?
@@ -256,6 +266,13 @@ module Datadog
256
266
  end
257
267
  end
258
268
 
269
+ # Callback invoked when a probe is disabled, for example due to
270
+ # exceeding the CPU consumption limit in DI processing.
271
+ #
272
+ # Sends ERROR status notification to the backend.
273
+ #
274
+ # @param probe [Probe] The probe that was disabled
275
+ # @param duration [Numeric] The CPU time consumed by probe processing, in seconds
259
276
  def probe_disabled_callback(probe, duration)
260
277
  payload = probe_notification_builder.build_disabled(probe, duration)
261
278
  probe_notifier_worker.add_status(payload, probe: probe)