datadog 2.29.0 → 2.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +87 -2
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +21 -12
  4. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +9 -7
  5. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +18 -0
  6. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +10 -0
  7. data/ext/datadog_profiling_native_extension/extconf.rb +6 -24
  8. data/ext/datadog_profiling_native_extension/heap_recorder.c +5 -6
  9. data/ext/datadog_profiling_native_extension/http_transport.c +51 -64
  10. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +0 -13
  11. data/ext/datadog_profiling_native_extension/profiling.c +3 -1
  12. data/ext/datadog_profiling_native_extension/setup_signal_handler.c +24 -8
  13. data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -3
  14. data/ext/datadog_profiling_native_extension/stack_recorder.c +29 -43
  15. data/ext/libdatadog_api/crashtracker.c +5 -8
  16. data/ext/libdatadog_api/crashtracker_report_exception.c +34 -144
  17. data/ext/libdatadog_api/datadog_ruby_common.c +18 -0
  18. data/ext/libdatadog_api/datadog_ruby_common.h +10 -0
  19. data/ext/libdatadog_api/di.c +79 -0
  20. data/ext/libdatadog_api/extconf.rb +5 -20
  21. data/ext/libdatadog_api/init.c +5 -2
  22. data/ext/libdatadog_extconf_helpers.rb +57 -11
  23. data/lib/datadog/ai_guard/component.rb +2 -0
  24. data/lib/datadog/ai_guard/configuration/settings.rb +3 -0
  25. data/lib/datadog/ai_guard/contrib/ruby_llm/chat_instrumentation.rb +41 -3
  26. data/lib/datadog/ai_guard/evaluation/content_builder.rb +31 -0
  27. data/lib/datadog/ai_guard/evaluation/content_part.rb +36 -0
  28. data/lib/datadog/ai_guard/evaluation/no_op_result.rb +3 -1
  29. data/lib/datadog/ai_guard/evaluation/request.rb +14 -9
  30. data/lib/datadog/ai_guard/evaluation/result.rb +3 -1
  31. data/lib/datadog/ai_guard/evaluation.rb +36 -7
  32. data/lib/datadog/ai_guard.rb +26 -8
  33. data/lib/datadog/appsec/autoload.rb +1 -1
  34. data/lib/datadog/appsec/component.rb +11 -7
  35. data/lib/datadog/appsec/contrib/active_record/patcher.rb +3 -0
  36. data/lib/datadog/appsec/contrib/devise/integration.rb +1 -1
  37. data/lib/datadog/appsec/contrib/excon/patcher.rb +2 -0
  38. data/lib/datadog/appsec/contrib/excon/ssrf_detection_middleware.rb +1 -1
  39. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +1 -1
  40. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
  41. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +10 -11
  42. data/lib/datadog/appsec/contrib/rack/integration.rb +1 -1
  43. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +25 -2
  44. data/lib/datadog/appsec/contrib/rack/response_body.rb +36 -0
  45. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +2 -2
  46. data/lib/datadog/appsec/contrib/rails/integration.rb +1 -1
  47. data/lib/datadog/appsec/contrib/rails/patcher.rb +2 -2
  48. data/lib/datadog/appsec/contrib/rest_client/patcher.rb +2 -0
  49. data/lib/datadog/appsec/contrib/rest_client/request_ssrf_detection_patch.rb +2 -2
  50. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +3 -3
  51. data/lib/datadog/appsec/event.rb +1 -17
  52. data/lib/datadog/appsec/instrumentation/gateway/middleware.rb +2 -3
  53. data/lib/datadog/appsec/instrumentation/gateway.rb +2 -15
  54. data/lib/datadog/appsec/monitor/gateway/watcher.rb +4 -2
  55. data/lib/datadog/appsec/utils/http/media_type.rb +1 -2
  56. data/lib/datadog/appsec/utils/http/url_encoded.rb +2 -2
  57. data/lib/datadog/appsec.rb +5 -9
  58. data/lib/datadog/core/configuration/base.rb +17 -5
  59. data/lib/datadog/core/configuration/components.rb +21 -8
  60. data/lib/datadog/core/configuration/config_helper.rb +9 -0
  61. data/lib/datadog/core/configuration/option.rb +32 -6
  62. data/lib/datadog/core/configuration/option_definition.rb +38 -12
  63. data/lib/datadog/core/configuration/options.rb +41 -7
  64. data/lib/datadog/core/configuration/settings.rb +42 -3
  65. data/lib/datadog/core/configuration/supported_configurations.rb +17 -0
  66. data/lib/datadog/core/contrib/rails/railtie.rb +32 -0
  67. data/lib/datadog/core/contrib/rails/utils.rb +7 -3
  68. data/lib/datadog/core/crashtracking/component.rb +7 -15
  69. data/lib/datadog/core/environment/container.rb +2 -2
  70. data/lib/datadog/core/environment/ext.rb +1 -0
  71. data/lib/datadog/core/environment/identity.rb +25 -3
  72. data/lib/datadog/core/environment/process.rb +12 -0
  73. data/lib/datadog/core/metrics/client.rb +5 -5
  74. data/lib/datadog/core/process_discovery.rb +5 -0
  75. data/lib/datadog/core/remote/component.rb +38 -21
  76. data/lib/datadog/core/runtime/metrics.rb +2 -3
  77. data/lib/datadog/core/telemetry/component.rb +3 -0
  78. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +2 -3
  79. data/lib/datadog/core/telemetry/event/app_extended_heartbeat.rb +32 -0
  80. data/lib/datadog/core/telemetry/event/app_started.rb +151 -169
  81. data/lib/datadog/core/telemetry/event.rb +1 -7
  82. data/lib/datadog/core/telemetry/ext.rb +1 -0
  83. data/lib/datadog/core/telemetry/transport/http/telemetry.rb +5 -0
  84. data/lib/datadog/core/telemetry/worker.rb +20 -0
  85. data/lib/datadog/core/utils/base64.rb +1 -1
  86. data/lib/datadog/core/utils/only_once.rb +1 -1
  87. data/lib/datadog/core/utils/spawn_monkey_patch.rb +36 -0
  88. data/lib/datadog/core/workers/async.rb +1 -1
  89. data/lib/datadog/core/workers/interval_loop.rb +13 -6
  90. data/lib/datadog/core/workers/queue.rb +0 -4
  91. data/lib/datadog/core/workers/runtime_metrics.rb +9 -1
  92. data/lib/datadog/core.rb +0 -1
  93. data/lib/datadog/data_streams/pathway_context.rb +1 -1
  94. data/lib/datadog/data_streams/processor.rb +1 -0
  95. data/lib/datadog/di/boot.rb +3 -4
  96. data/lib/datadog/di/component.rb +20 -4
  97. data/lib/datadog/di/instrumenter.rb +20 -10
  98. data/lib/datadog/di/probe_manager.rb +79 -62
  99. data/lib/datadog/di/probe_notification_builder.rb +148 -33
  100. data/lib/datadog/di/probe_notifier_worker.rb +52 -6
  101. data/lib/datadog/di/probe_repository.rb +198 -0
  102. data/lib/datadog/di/remote.rb +5 -6
  103. data/lib/datadog/di/serializer.rb +127 -9
  104. data/lib/datadog/di/transport/http.rb +12 -3
  105. data/lib/datadog/di/transport/input.rb +46 -8
  106. data/lib/datadog/di.rb +81 -0
  107. data/lib/datadog/kit/enable_core_dumps.rb +1 -1
  108. data/lib/datadog/open_feature/configuration.rb +2 -0
  109. data/lib/datadog/open_feature/evaluation_engine.rb +1 -1
  110. data/lib/datadog/open_feature/exposures/reporter.rb +1 -1
  111. data/lib/datadog/open_feature/exposures/worker.rb +1 -1
  112. data/lib/datadog/open_feature/remote.rb +1 -1
  113. data/lib/datadog/open_feature/transport.rb +1 -1
  114. data/lib/datadog/opentelemetry/configuration/settings.rb +2 -0
  115. data/lib/datadog/profiling/collectors/code_provenance.rb +2 -3
  116. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +14 -1
  117. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -1
  118. data/lib/datadog/profiling/component.rb +31 -1
  119. data/lib/datadog/profiling/http_transport.rb +5 -6
  120. data/lib/datadog/profiling/load_native_extension.rb +1 -1
  121. data/lib/datadog/profiling/profiler.rb +15 -12
  122. data/lib/datadog/profiling/scheduler.rb +2 -2
  123. data/lib/datadog/profiling/tasks/exec.rb +2 -2
  124. data/lib/datadog/profiling/tasks/setup.rb +2 -2
  125. data/lib/datadog/profiling.rb +1 -2
  126. data/lib/datadog/single_step_instrument.rb +1 -1
  127. data/lib/datadog/tracing/buffer.rb +3 -3
  128. data/lib/datadog/tracing/component.rb +11 -0
  129. data/lib/datadog/tracing/configuration/settings.rb +2 -1
  130. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +2 -2
  131. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +20 -0
  132. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +3 -1
  133. data/lib/datadog/tracing/contrib/action_view/events/render_template.rb +1 -1
  134. data/lib/datadog/tracing/contrib/active_job/events/discard.rb +1 -1
  135. data/lib/datadog/tracing/contrib/active_job/events/enqueue.rb +1 -1
  136. data/lib/datadog/tracing/contrib/active_job/events/enqueue_at.rb +1 -1
  137. data/lib/datadog/tracing/contrib/active_job/events/enqueue_retry.rb +1 -1
  138. data/lib/datadog/tracing/contrib/active_job/events/perform.rb +1 -1
  139. data/lib/datadog/tracing/contrib/active_job/events/retry_stopped.rb +1 -1
  140. data/lib/datadog/tracing/contrib/active_model_serializers/events/render.rb +1 -1
  141. data/lib/datadog/tracing/contrib/active_model_serializers/events/serialize.rb +1 -1
  142. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +1 -1
  143. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -1
  144. data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
  145. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +1 -1
  146. data/lib/datadog/tracing/contrib/active_support/notifications/subscription.rb +2 -2
  147. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +1 -1
  148. data/lib/datadog/tracing/contrib/configurable.rb +18 -3
  149. data/lib/datadog/tracing/contrib/dalli/integration.rb +4 -1
  150. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +1 -1
  151. data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +5 -1
  152. data/lib/datadog/tracing/contrib/ethon/ext.rb +1 -0
  153. data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +5 -2
  154. data/lib/datadog/tracing/contrib/excon/ext.rb +1 -0
  155. data/lib/datadog/tracing/contrib/excon/middleware.rb +2 -2
  156. data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +5 -2
  157. data/lib/datadog/tracing/contrib/faraday/ext.rb +1 -0
  158. data/lib/datadog/tracing/contrib/faraday/middleware.rb +2 -2
  159. data/lib/datadog/tracing/contrib/grape/endpoint.rb +7 -7
  160. data/lib/datadog/tracing/contrib/grape/instrumentation.rb +13 -8
  161. data/lib/datadog/tracing/contrib/grape/patcher.rb +6 -1
  162. data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +5 -2
  163. data/lib/datadog/tracing/contrib/grpc/ext.rb +1 -0
  164. data/lib/datadog/tracing/contrib/http/configuration/settings.rb +5 -2
  165. data/lib/datadog/tracing/contrib/http/ext.rb +1 -0
  166. data/lib/datadog/tracing/contrib/http/instrumentation.rb +1 -1
  167. data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +5 -2
  168. data/lib/datadog/tracing/contrib/httpclient/ext.rb +1 -0
  169. data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +5 -2
  170. data/lib/datadog/tracing/contrib/httprb/ext.rb +1 -0
  171. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +1 -1
  172. data/lib/datadog/tracing/contrib/karafka/configuration/settings.rb +5 -1
  173. data/lib/datadog/tracing/contrib/karafka/ext.rb +1 -0
  174. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +1 -1
  175. data/lib/datadog/tracing/contrib/que/configuration/settings.rb +5 -2
  176. data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
  177. data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +5 -1
  178. data/lib/datadog/tracing/contrib/rack/ext.rb +1 -0
  179. data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +5 -2
  180. data/lib/datadog/tracing/contrib/rails/ext.rb +1 -0
  181. data/lib/datadog/tracing/contrib/rails/log_injection.rb +1 -1
  182. data/lib/datadog/tracing/contrib/rails/patcher.rb +0 -1
  183. data/lib/datadog/tracing/contrib/rails/runner.rb +1 -1
  184. data/lib/datadog/tracing/contrib/rake/instrumentation.rb +2 -2
  185. data/lib/datadog/tracing/contrib/redis/tags.rb +1 -1
  186. data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +5 -2
  187. data/lib/datadog/tracing/contrib/rest_client/ext.rb +1 -0
  188. data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +5 -1
  189. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  190. data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +5 -1
  191. data/lib/datadog/tracing/contrib/sinatra/ext.rb +1 -0
  192. data/lib/datadog/tracing/contrib/status_range_matcher.rb +4 -0
  193. data/lib/datadog/tracing/contrib/stripe/request.rb +1 -1
  194. data/lib/datadog/tracing/contrib/waterdrop/configuration/settings.rb +5 -1
  195. data/lib/datadog/tracing/contrib/waterdrop/ext.rb +1 -0
  196. data/lib/datadog/tracing/distributed/datadog.rb +4 -2
  197. data/lib/datadog/tracing/event.rb +1 -1
  198. data/lib/datadog/tracing/metadata/ext.rb +4 -0
  199. data/lib/datadog/tracing/remote.rb +1 -1
  200. data/lib/datadog/tracing/sampling/ext.rb +2 -0
  201. data/lib/datadog/tracing/sampling/priority_sampler.rb +13 -0
  202. data/lib/datadog/tracing/sampling/rule.rb +1 -1
  203. data/lib/datadog/tracing/sampling/rule_sampler.rb +54 -25
  204. data/lib/datadog/tracing/sampling/span/rule_parser.rb +1 -1
  205. data/lib/datadog/tracing/span_operation.rb +1 -1
  206. data/lib/datadog/tracing/sync_writer.rb +0 -1
  207. data/lib/datadog/tracing/trace_operation.rb +50 -6
  208. data/lib/datadog/tracing/tracer.rb +25 -0
  209. data/lib/datadog/tracing/transport/io/client.rb +1 -1
  210. data/lib/datadog/tracing/transport/trace_formatter.rb +11 -0
  211. data/lib/datadog/tracing/writer.rb +0 -1
  212. data/lib/datadog/version.rb +1 -1
  213. metadata +15 -8
  214. data/lib/datadog/tracing/workers/trace_writer.rb +0 -204
@@ -79,7 +79,7 @@ module Datadog
79
79
  },
80
80
  return: {
81
81
  arguments: return_arguments,
82
- throwable: nil,
82
+ throwable: context.exception ? serialize_throwable(context.exception) : nil,
83
83
  },
84
84
  }
85
85
  elsif probe.line?
@@ -112,8 +112,155 @@ module Datadog
112
112
  build_snapshot_base(context, evaluation_errors: [error])
113
113
  end
114
114
 
115
+ # Builds a probe status notification payload.
116
+ #
117
+ # @param probe [Probe] the probe to build status for
118
+ # @param message [String] human-readable status message
119
+ # @param status [String] status value (RECEIVED, INSTALLED, EMITTING, ERROR)
120
+ # @param exception [Exception, nil] exception to include for ERROR status
121
+ # @return [Hash] the status payload
122
+ def build_status(probe, message:, status:, exception: nil)
123
+ diagnostics = {
124
+ probeId: probe.id,
125
+ probeVersion: 0,
126
+ runtimeId: Core::Environment::Identity.id,
127
+ parentId: nil,
128
+ status: status,
129
+ }
130
+
131
+ # Exception field is required by the backend for ERROR status.
132
+ # If the ERROR status is sent without the exception field, the status
133
+ # appears to be completely ignored by the backend.
134
+ # Note: The Go DI implementation does not send the top-level message
135
+ # field at all when sending error statuses.
136
+ if status == 'ERROR'
137
+ diagnostics[:exception] = { # steep:ignore
138
+ type: exception ? exception.class.name : 'Error',
139
+ message: exception ? exception.message : message
140
+ }
141
+ end
142
+
143
+ {
144
+ service: settings.service,
145
+ timestamp: timestamp_now,
146
+ message: message,
147
+ ddsource: 'dd_debugger',
148
+ debugger: {
149
+ diagnostics: diagnostics,
150
+ },
151
+ }
152
+ end
153
+
115
154
  private
116
155
 
156
+ # Serializes an exception for the throwable field in snapshot captures.
157
+ #
158
+ # Uses the C extension's exception_message to get the original message
159
+ # without invoking any Ruby-level message method override, which
160
+ # could be customer code.
161
+ #
162
+ # Caveats:
163
+ #
164
+ # 1. The value returned by exception_message is not guaranteed to be
165
+ # a string — it is whatever was passed to the Exception constructor.
166
+ # Calling .to_s on an arbitrary object would invoke customer code,
167
+ # violating DI's constraint of never executing customer methods
168
+ # during instrumentation. We only use the value directly when it
169
+ # is a String; for non-string values we return a redacted
170
+ # placeholder (reporting the class name would duplicate the
171
+ # exception type already present in the :type field).
172
+ #
173
+ # 2. Custom exception classes may not store a meaningful message via
174
+ # the constructor (e.g. they may compute it in an overridden
175
+ # +message+ method). In such cases exception_message may return
176
+ # nil or an unrelated constructor argument. This is acceptable:
177
+ # we still report the exception type, and a missing/wrong message
178
+ # is better than invoking customer code or reporting nothing.
179
+ #
180
+ # @param exception [Exception] the exception to serialize
181
+ # @return [Hash{Symbol => String?}] hash with :type and :message keys
182
+ def serialize_throwable(exception)
183
+ msg = DI.exception_message(exception)
184
+ message = if msg.nil? || String === msg
185
+ msg
186
+ else
187
+ # Non-string constructor argument — return a redacted placeholder
188
+ # rather than calling .to_s which could be customer code.
189
+ # The exception class is already reported via the :type field.
190
+ '<REDACTED: not a string value>'
191
+ end
192
+ # Prefer backtrace_locations (structured Location objects) over
193
+ # backtrace (formatted strings that need regex parsing).
194
+ #
195
+ # However, backtrace_locations returns nil when someone has called
196
+ # Exception#set_backtrace with Array<String> — the VM cannot
197
+ # reconstruct Location objects from formatted strings. This happens
198
+ # in exception wrapping patterns (catch, create new exception, copy
199
+ # original's string backtrace via set_backtrace, re-raise).
200
+ # In that case, fall back to backtrace strings.
201
+ #
202
+ # Both accessors use the UnboundMethod trick to bypass subclass
203
+ # overrides, consistent with the rest of this method.
204
+ #
205
+ # If a subclass overrides #backtrace, MRI's raise never stores
206
+ # the real backtrace — both backtrace_locations and backtrace
207
+ # return nil, and stacktrace is [].
208
+ # This is unrecoverable without calling customer code.
209
+ # See DI::EXCEPTION_BACKTRACE comment for details.
210
+ locations = DI::EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call
211
+ stacktrace = if locations
212
+ format_backtrace_locations(locations)
213
+ else
214
+ format_backtrace_strings(DI::EXCEPTION_BACKTRACE.bind(exception).call)
215
+ end
216
+ {
217
+ type: exception.class.name,
218
+ message: message,
219
+ stacktrace: stacktrace,
220
+ }
221
+ end
222
+
223
+ # Matches Ruby backtrace frame format: "/path/file.rb:42:in `method_name'"
224
+ # Captures: $1 = file path, $2 = line number, $3 = method name
225
+ BACKTRACE_FRAME_PATTERN = /\A(.+):(\d+):in\s+[`'](.+)'\z/
226
+
227
+ # Converts backtrace locations into the stack frame format
228
+ # expected by the Datadog UI.
229
+ #
230
+ # Uses Thread::Backtrace::Location objects which provide structured
231
+ # path/lineno/label directly, avoiding the round-trip of formatting
232
+ # to strings and regex-parsing back.
233
+ #
234
+ # @param locations [Array<Thread::Backtrace::Location>]
235
+ # @return [Array<Hash>]
236
+ def format_backtrace_locations(locations)
237
+ locations.map do |loc|
238
+ {fileName: loc.path, function: loc.label, lineNumber: loc.lineno}
239
+ end
240
+ end
241
+
242
+ # Parses Ruby backtrace strings into the stack frame format
243
+ # expected by the Datadog UI.
244
+ #
245
+ # Fallback for when backtrace_locations returns nil (see
246
+ # serialize_throwable for details on when this happens).
247
+ #
248
+ # Ruby backtrace format: "/path/file.rb:42:in `method_name'"
249
+ #
250
+ # @param backtrace [Array<String>, nil] from Exception#backtrace
251
+ # @return [Array<Hash>]
252
+ def format_backtrace_strings(backtrace)
253
+ return [] if backtrace.nil?
254
+
255
+ backtrace.map do |frame|
256
+ if frame =~ BACKTRACE_FRAME_PATTERN
257
+ {fileName: $1, function: $3, lineNumber: $2.to_i}
258
+ else
259
+ {fileName: frame, function: '', lineNumber: 0}
260
+ end
261
+ end
262
+ end
263
+
117
264
  def build_snapshot_base(context, evaluation_errors: [], captures: nil, message: nil)
118
265
  probe = context.probe
119
266
 
@@ -202,38 +349,6 @@ module Datadog
202
349
  payload
203
350
  end
204
351
 
205
- def build_status(probe, message:, status:, exception: nil)
206
- diagnostics = {
207
- probeId: probe.id,
208
- probeVersion: 0,
209
- runtimeId: Core::Environment::Identity.id,
210
- parentId: nil,
211
- status: status,
212
- }
213
-
214
- # Exception field is required by the backend for ERROR status.
215
- # If the ERROR status is sent without the exception field, the status
216
- # appears to be completely ignored by the backend.
217
- # Note: The Go DI implementation does not send the top-level message
218
- # field at all when sending error statuses.
219
- if status == 'ERROR'
220
- diagnostics[:exception] = { # steep:ignore
221
- type: exception ? exception.class.name : 'Error',
222
- message: exception ? exception.message : message
223
- }
224
- end
225
-
226
- {
227
- service: settings.service,
228
- timestamp: timestamp_now,
229
- message: message,
230
- ddsource: 'dd_debugger',
231
- debugger: {
232
- diagnostics: diagnostics,
233
- },
234
- }
235
- end
236
-
237
352
  def format_caller_locations(caller_locations)
238
353
  caller_locations.map do |loc|
239
354
  {fileName: loc.path, function: loc.label, lineNumber: loc.lineno}
@@ -23,7 +23,12 @@ module Datadog
23
23
  #
24
24
  # @api private
25
25
  class ProbeNotifierWorker
26
- def initialize(settings, logger, agent_settings:, telemetry: nil)
26
+ # @param probe_repository [ProbeRepository] Repository for looking up probes.
27
+ # Used for handling serialization errors (disabling affected probes).
28
+ # @param probe_notification_builder [ProbeNotificationBuilder] Builder for
29
+ # creating status notifications. Used for reporting ERROR status.
30
+ def initialize(settings, logger, agent_settings:,
31
+ probe_repository:, probe_notification_builder:, telemetry: nil)
27
32
  @settings = settings
28
33
  @telemetry = telemetry
29
34
  @status_queue = []
@@ -38,13 +43,23 @@ module Datadog
38
43
  @thread = nil
39
44
  @pid = nil
40
45
  @flush = 0
46
+ @probe_repository = probe_repository
47
+ @probe_notification_builder = probe_notification_builder
41
48
  end
42
49
 
43
50
  attr_reader :settings
44
51
  attr_reader :logger
45
52
  attr_reader :telemetry
46
53
  attr_reader :agent_settings
54
+ attr_reader :probe_repository
55
+ attr_reader :probe_notification_builder
47
56
 
57
+ # Starts the background worker thread.
58
+ #
59
+ # The thread batches and sends probe statuses and snapshots to the agent.
60
+ # If the process forks, the thread is automatically restarted in the child.
61
+ #
62
+ # @return [void]
48
63
  def start
49
64
  return if @thread && @pid == Process.pid
50
65
  logger.trace { "di: starting probe notifier: pid #{$$}" }
@@ -179,11 +194,44 @@ module Datadog
179
194
  end
180
195
 
181
196
  def snapshot_transport
182
- @snapshot_transport ||= DI::Transport::HTTP.input(agent_settings: agent_settings, logger: logger)
197
+ @snapshot_transport ||= DI::Transport::HTTP.input(agent_settings: agent_settings, logger: logger, telemetry: telemetry)
183
198
  end
184
199
 
200
+ # Sends a batch of snapshot payloads to the agent.
201
+ #
202
+ # The transport serializes each snapshot individually and reports
203
+ # serialization failures via callback. This allows healthy probes
204
+ # to continue working even when one probe produces un-serializable data.
205
+ #
206
+ # @param batch [Array<Hash>] Array of snapshot payload hashes
185
207
  def do_send_snapshot(batch)
186
- snapshot_transport.send_input(batch, tags)
208
+ snapshot_transport.send_input(batch, tags, on_serialization_error: method(:handle_serialization_error))
209
+ end
210
+
211
+ # Handles serialization errors reported by the transport.
212
+ #
213
+ # Disables the affected probe and sends ERROR status to the backend.
214
+ # Called by transport when a snapshot fails to serialize.
215
+ #
216
+ # @param probe_id [String] ID of the probe that produced bad data
217
+ # @param exception [Exception] The serialization exception
218
+ def handle_serialization_error(probe_id, exception)
219
+ # Only installed probes produce snapshots, so a serialization
220
+ # error can only come from an installed probe.
221
+ probe = probe_repository.find_installed(probe_id)
222
+ return unless probe
223
+
224
+ logger.debug { "di: disabling probe #{probe_id} due to serialization error: #{exception.class}: #{exception}" }
225
+
226
+ probe.disable!
227
+
228
+ payload = probe_notification_builder.build_status(
229
+ probe,
230
+ message: "Probe #{probe.id} disabled: snapshot JSON encoding failed (#{exception.class}: #{exception})",
231
+ status: 'ERROR',
232
+ exception: exception,
233
+ )
234
+ add_status(payload, probe: probe)
187
235
  end
188
236
 
189
237
  def tags
@@ -273,9 +321,7 @@ module Datadog
273
321
  rescue => exc
274
322
  raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
275
323
  logger.debug { "di: failed to send #{event_name}: #{exc.class}: #{exc} (at #{exc.backtrace.first})" }
276
- # Should we report this error to telemetry? Most likely failure
277
- # to send is due to a network issue, and trying to send a
278
- # telemetry message would also fail.
324
+ telemetry&.report(exc, description: "Error sending #{event_type}")
279
325
  end
280
326
  end
281
327
  batch.any?
@@ -0,0 +1,198 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'monitor'
4
+
5
+ module Datadog
6
+ module DI
7
+ # Thread-safe repository for storing probes in various states.
8
+ #
9
+ # Probes are stored in three collections based on their state:
10
+ # - installed_probes: Successfully instrumented probes
11
+ # - pending_probes: Probes waiting for their target to be defined
12
+ # - failed_probes: Probes that failed to instrument (stores error messages, not probes)
13
+ #
14
+ # This class is shared between ProbeManager and ProbeNotifierWorker,
15
+ # allowing ProbeNotifierWorker to look up probes for error handling.
16
+ #
17
+ # @api private
18
+ class ProbeRepository
19
+ def initialize
20
+ @installed_probes = {}
21
+ @pending_probes = {}
22
+ @failed_probes = {}
23
+ @lock = Monitor.new
24
+ end
25
+
26
+ # Executes the block while holding the repository lock.
27
+ #
28
+ # Use this for compound operations that need to atomically check
29
+ # and modify multiple collections (e.g., check-then-install).
30
+ # Individual methods already acquire the lock internally; since
31
+ # the lock is a Monitor (reentrant), calling them from within
32
+ # this block is safe.
33
+ #
34
+ # @yield Block to execute while holding the lock
35
+ # @return The return value of the block
36
+ def synchronize(&block)
37
+ @lock.synchronize(&block)
38
+ end
39
+
40
+ # Returns the installed probes hash.
41
+ # Note: Returns the actual hash for backward compatibility with existing code.
42
+ #
43
+ # @return [Hash<String, Probe>] map of probe ID to installed probe
44
+ def installed_probes
45
+ @lock.synchronize do
46
+ @installed_probes
47
+ end
48
+ end
49
+
50
+ # Finds an installed probe by ID.
51
+ #
52
+ # @param probe_id [String] The probe ID to look up
53
+ # @return [Probe, nil] The probe if found, nil otherwise
54
+ def find_installed(probe_id)
55
+ @lock.synchronize do
56
+ @installed_probes[probe_id]
57
+ end
58
+ end
59
+
60
+ # Adds a probe to the installed probes collection.
61
+ #
62
+ # @param probe [Probe] The probe to add
63
+ def add_installed(probe)
64
+ @lock.synchronize do
65
+ @installed_probes[probe.id] = probe
66
+ end
67
+ end
68
+
69
+ # Removes a probe from the installed probes collection.
70
+ #
71
+ # @param probe_id [String] The ID of the probe to remove
72
+ # @return [Probe, nil] The removed probe if found, nil otherwise
73
+ def remove_installed(probe_id)
74
+ @lock.synchronize do
75
+ @installed_probes.delete(probe_id)
76
+ end
77
+ end
78
+
79
+ # Returns the pending probes hash.
80
+ #
81
+ # @return [Hash<String, Probe>] map of probe ID to pending probe
82
+ def pending_probes
83
+ @lock.synchronize do
84
+ @pending_probes
85
+ end
86
+ end
87
+
88
+ # Finds a pending probe by ID.
89
+ #
90
+ # @param probe_id [String] The probe ID to look up
91
+ # @return [Probe, nil] The probe if found, nil otherwise
92
+ def find_pending(probe_id)
93
+ @lock.synchronize do
94
+ @pending_probes[probe_id]
95
+ end
96
+ end
97
+
98
+ # Adds a probe to the pending probes collection.
99
+ #
100
+ # @param probe [Probe] The probe to add
101
+ def add_pending(probe)
102
+ @lock.synchronize do
103
+ @pending_probes[probe.id] = probe
104
+ end
105
+ end
106
+
107
+ # Removes a probe from the pending probes collection.
108
+ #
109
+ # @param probe_id [String] The ID of the probe to remove
110
+ # @return [Probe, nil] The removed probe if found, nil otherwise
111
+ def remove_pending(probe_id)
112
+ @lock.synchronize do
113
+ @pending_probes.delete(probe_id)
114
+ end
115
+ end
116
+
117
+ # Clears all pending probes.
118
+ #
119
+ # @return [void]
120
+ def clear_pending
121
+ @lock.synchronize do
122
+ @pending_probes.clear
123
+ end
124
+ end
125
+
126
+ # Returns the failed probes hash.
127
+ # Values are error message strings, not Probe objects.
128
+ #
129
+ # @return [Hash<String, String>] map of probe ID to error message
130
+ def failed_probes
131
+ @lock.synchronize do
132
+ @failed_probes
133
+ end
134
+ end
135
+
136
+ # Finds a failed probe error message by probe ID.
137
+ #
138
+ # @param probe_id [String] The probe ID to look up
139
+ # @return [String, nil] The error message if found, nil otherwise
140
+ def find_failed(probe_id)
141
+ @lock.synchronize do
142
+ @failed_probes[probe_id]
143
+ end
144
+ end
145
+
146
+ # Records a probe installation failure.
147
+ #
148
+ # Failed probes are tracked by ID with their error message to prevent
149
+ # repeated installation attempts that would fail again.
150
+ #
151
+ # @param probe_id [String] The probe ID
152
+ # @param message [String] The error message describing why the probe failed
153
+ def add_failed(probe_id, message)
154
+ @lock.synchronize do
155
+ @failed_probes[probe_id] = message
156
+ end
157
+ end
158
+
159
+ # Removes a probe failure record from the collection.
160
+ #
161
+ # Called when remote configuration removes a probe that previously
162
+ # failed to install, cleaning up the failure tracking.
163
+ #
164
+ # @param probe_id [String] The ID of the probe to remove
165
+ # @return [String, nil] The removed error message if found, nil otherwise
166
+ def remove_failed(probe_id)
167
+ @lock.synchronize do
168
+ @failed_probes.delete(probe_id)
169
+ end
170
+ end
171
+
172
+ # Clears all probes from all collections.
173
+ #
174
+ # If a block is given, yields each installed probe after clearing
175
+ # to allow cleanup (e.g., unhooking instrumentation).
176
+ #
177
+ # The yield happens outside the lock to avoid blocking other operations
178
+ # if the cleanup callback is slow.
179
+ #
180
+ # @yield [probe] Yields each installed probe after clearing (for cleanup)
181
+ def clear_all
182
+ probes_to_cleanup = @lock.synchronize do
183
+ probes = @installed_probes.values
184
+ @installed_probes.clear
185
+ @pending_probes.clear
186
+ @failed_probes.clear
187
+ probes
188
+ end
189
+
190
+ if block_given?
191
+ probes_to_cleanup.each do |probe|
192
+ yield probe
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -77,14 +77,13 @@ module Datadog
77
77
  component.probe_manager.add_probe(probe)
78
78
  content.applied
79
79
  rescue DI::Error::DITargetNotInRegistry => exc
80
- component.telemetry&.report(exc, description: "Line probe is targeting a loaded file that is not in code tracker")
81
-
82
- payload = component.probe_notification_builder.build_errored(probe, exc)
83
- component.probe_notifier_worker.add_status(payload, probe: probe)
84
-
80
+ # Error status is already reported by probe_manager.add_probe,
81
+ # so we don't need to send another error payload here.
82
+ # Just mark the remote config content as errored.
83
+ #
85
84
  # If a probe fails to install, we will mark the content
86
85
  # as errored. On subsequent remote configuration application
87
- # attemps, probe manager will raise the "previously errored"
86
+ # attempts, probe manager will raise the "previously errored"
88
87
  # exception and we'll rescue it here, again marking the
89
88
  # content as errored but with a somewhat different exception
90
89
  # message.
@@ -42,6 +42,11 @@ module Datadog
42
42
  #
43
43
  # @api private
44
44
  class Serializer
45
+ # Exception classes that should never be caught during serialization.
46
+ # These represent fatal conditions (signals, interrupts, system exit)
47
+ # that must propagate to the caller.
48
+ FATAL_EXCEPTION_CLASSES = [SignalException, Interrupt, SystemExit].freeze
49
+
45
50
  # Third-party library integration / custom serializers.
46
51
  #
47
52
  # Dynamic instrumentation has limited payload sizes, and for efficiency
@@ -67,6 +72,22 @@ module Datadog
67
72
  #
68
73
  # Important: these serializers are NOT used in log messages.
69
74
  # They are only used for variables that are captured in the snapshots.
75
+ #
76
+ # Exception handling: If a custom serializer's condition lambda raises
77
+ # an exception (e.g., regex match against invalid UTF-8 strings), the
78
+ # exception will be logged at WARN level, then the serializer will be
79
+ # skipped and the next serializer will be tried. This prevents custom
80
+ # serializers from breaking the entire serialization process.
81
+ #
82
+ # IMPORTANT: Custom serializers MUST produce data that can be JSON-encoded.
83
+ # Specifically, custom serializers MUST NOT produce strings with binary
84
+ # encoding (ASCII-8BIT) containing non-ASCII code points (bytes >= 0x80)
85
+ # that cannot be automatically transcoded to UTF-8. Such strings will
86
+ # cause JSON encoding to fail, which will result in the probe being
87
+ # disabled and an ERROR status being reported. If your data contains
88
+ # binary content, encode it to a text representation (e.g., Base64,
89
+ # hex string, or UTF-8 with replacement characters) before returning
90
+ # it from the custom serializer.
70
91
  @@flat_registry = []
71
92
  def self.register(condition: nil, &block)
72
93
  @@flat_registry << {condition: condition, proc: block}
@@ -152,9 +173,28 @@ module Datadog
152
173
  end
153
174
 
154
175
  @@flat_registry.each do |entry|
155
- if (condition = entry[:condition]) && condition.call(value)
156
- serializer_proc = entry.fetch(:proc)
157
- return serializer_proc.call(self, value, name: nil, depth: depth)
176
+ condition = entry[:condition]
177
+ if condition
178
+ begin
179
+ condition_result = condition.call(value)
180
+ rescue => e
181
+ # If a custom serializer condition raises an exception (e.g., regex match
182
+ # against invalid UTF-8), skip it and continue with the next serializer.
183
+ # We don't want custom serializer conditions to break the entire serialization.
184
+ #
185
+ # Custom serializers may be defined by customers (in which case we should
186
+ # surface errors so they can fix their serializers) or they may be defined
187
+ # internally by dd-trace-rb (in which case we need to fix them). We use
188
+ # WARN level to surface these errors in either case.
189
+ Datadog.logger.warn("DI: Custom serializer condition failed: #{e.class}: #{e}")
190
+ telemetry&.report(e, description: "Custom serializer condition failed")
191
+ next
192
+ end
193
+
194
+ if condition_result
195
+ serializer_proc = entry.fetch(:proc)
196
+ return serializer_proc.call(self, value, name: nil, depth: depth)
197
+ end
158
198
  end
159
199
  end
160
200
 
@@ -184,13 +224,35 @@ module Datadog
184
224
  else
185
225
  value.to_s
186
226
  end
227
+
228
+ # Handle binary strings and invalid UTF-8 by escaping to JSON-safe format.
229
+ # See escape_binary_string for details on the escaping format.
230
+ #
231
+ # Truncate binary data BEFORE escaping to avoid cutting mid-escape-sequence.
232
+ # For regular strings, the limit is applied to string length in characters.
187
233
  max = settings.dynamic_instrumentation.max_capture_string_length
188
- if value.length > max
189
- serialized.update(truncated: true, size: value.length)
190
- value = value[0...max]
191
- need_dup = false
234
+
235
+ if value.encoding == Encoding::BINARY || !value.valid_encoding?
236
+ # Truncate binary data BEFORE escaping to avoid cutting mid-escape-sequence
237
+ # For invalid encodings, use bytesize instead of length to avoid encoding errors
238
+ original_size = value.bytesize
239
+ if original_size > max
240
+ serialized.update(truncated: true, size: original_size)
241
+ value = value.byteslice(0...max)
242
+ end
243
+ value = escape_binary_string(value) # steep:ignore ArgumentTypeMismatch
244
+ false # Already converted to a new string
245
+ else
246
+ # Truncate non-binary strings
247
+ if value.length > max
248
+ serialized.update(truncated: true, size: value.length)
249
+ value = value[0...max]
250
+ need_dup = false
251
+ end
252
+
253
+ value = value.dup if need_dup
192
254
  end
193
- value = value.dup if need_dup
255
+
194
256
  serialized.update(value: value)
195
257
  when Array
196
258
  if depth < 0
@@ -266,7 +328,16 @@ module Datadog
266
328
  end
267
329
  end
268
330
  serialized
269
- rescue => exc
331
+ rescue Exception => exc # standard:disable Lint/RescueException
332
+ # Re-raise fatal exceptions that should not be caught
333
+ # (signals, interrupts, system exit)
334
+ raise if FATAL_EXCEPTION_CLASSES.any? { |klass| exc.is_a?(klass) }
335
+
336
+ # Catch all other exceptions including SystemStackError and NoMemoryError.
337
+ # These inherit from Exception (not StandardError) but can occur during
338
+ # serialization (e.g., infinite recursion in custom serializers, memory
339
+ # exhaustion from large objects) and should return a safe structure
340
+ # rather than propagating to the transport layer.
270
341
  telemetry&.report(exc, description: "Error serializing")
271
342
  {type: class_name(cls), notSerializedReason: exc.to_s}
272
343
  end
@@ -417,6 +488,53 @@ module Datadog
417
488
  value
418
489
  end
419
490
  end
491
+
492
+ # Escapes a binary string or invalid UTF-8 string to a JSON-safe format.
493
+ #
494
+ # IMPORTANT: This method should ONLY be called with either:
495
+ # 1. True binary strings (encoding == Encoding::BINARY / ASCII-8BIT)
496
+ # 2. Strings with invalid encoding (!value.valid_encoding?)
497
+ #
498
+ # Calling this method with valid UTF-8 strings will produce incorrect output.
499
+ #
500
+ # Binary data (ASCII-8BIT encoding) or strings with invalid encoding are
501
+ # converted to an escaped string in the format: b'...' with hex escapes
502
+ # for non-printable bytes.
503
+ #
504
+ # The output format matches other Datadog tracer libraries for consistency
505
+ # across language implementations. The output is JSON-serializable.
506
+ #
507
+ # Examples:
508
+ # "Hello".b -> "b'Hello'"
509
+ # "\x80\xFF".b -> "b'\\x80\\xff'"
510
+ # "\x80".force_encoding('UTF-8') -> "b'\\x80'" (invalid UTF-8)
511
+ #
512
+ # @param binary_string [String] A string with ASCII-8BIT encoding or invalid encoding
513
+ # @return [String] Escaped string with UTF-8 encoding
514
+ def escape_binary_string(binary_string)
515
+ result = +"b'"
516
+ binary_string.each_byte do |byte|
517
+ result << case byte
518
+ when 0x09 # \t
519
+ '\\t'
520
+ when 0x0A # \n
521
+ '\\n'
522
+ when 0x0D # \r
523
+ '\\r'
524
+ when 0x27 # '
525
+ "\\'"
526
+ when 0x5C # \
527
+ '\\\\'
528
+ when 0x20..0x7E # Printable ASCII (space through ~)
529
+ byte.chr
530
+ else
531
+ # Non-printable: use \xHH format
532
+ format('\\x%02x', byte)
533
+ end
534
+ end
535
+ result << "'"
536
+ result
537
+ end
420
538
  end
421
539
  end
422
540
  end