datadog 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -1
  3. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +47 -17
  4. data/ext/datadog_profiling_native_extension/extconf.rb +0 -8
  5. data/ext/datadog_profiling_native_extension/heap_recorder.c +11 -89
  6. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +1 -1
  7. data/ext/datadog_profiling_native_extension/stack_recorder.c +0 -34
  8. data/ext/libdatadog_extconf_helpers.rb +1 -1
  9. data/lib/datadog/appsec/component.rb +1 -8
  10. data/lib/datadog/appsec/contrib/active_record/instrumentation.rb +73 -0
  11. data/lib/datadog/appsec/contrib/active_record/integration.rb +41 -0
  12. data/lib/datadog/appsec/contrib/active_record/patcher.rb +53 -0
  13. data/lib/datadog/appsec/event.rb +1 -1
  14. data/lib/datadog/appsec/processor/context.rb +2 -2
  15. data/lib/datadog/appsec/remote.rb +1 -3
  16. data/lib/datadog/appsec/response.rb +7 -11
  17. data/lib/datadog/appsec.rb +3 -2
  18. data/lib/datadog/core/configuration/components.rb +17 -1
  19. data/lib/datadog/core/configuration/settings.rb +10 -0
  20. data/lib/datadog/core/configuration.rb +9 -1
  21. data/lib/datadog/core/remote/client/capabilities.rb +6 -0
  22. data/lib/datadog/core/remote/client.rb +65 -59
  23. data/lib/datadog/core/telemetry/component.rb +9 -3
  24. data/lib/datadog/core/telemetry/ext.rb +1 -0
  25. data/lib/datadog/di/code_tracker.rb +5 -4
  26. data/lib/datadog/di/component.rb +5 -1
  27. data/lib/datadog/di/contrib/active_record.rb +1 -0
  28. data/lib/datadog/di/init.rb +20 -0
  29. data/lib/datadog/di/instrumenter.rb +81 -11
  30. data/lib/datadog/di/probe.rb +11 -1
  31. data/lib/datadog/di/probe_builder.rb +1 -0
  32. data/lib/datadog/di/probe_manager.rb +4 -1
  33. data/lib/datadog/di/probe_notification_builder.rb +13 -7
  34. data/lib/datadog/di/remote.rb +124 -0
  35. data/lib/datadog/di/serializer.rb +14 -7
  36. data/lib/datadog/di/transport.rb +1 -1
  37. data/lib/datadog/di/utils.rb +7 -0
  38. data/lib/datadog/di.rb +84 -20
  39. data/lib/datadog/profiling/component.rb +4 -16
  40. data/lib/datadog/tracing/configuration/settings.rb +4 -8
  41. data/lib/datadog/tracing/contrib/active_support/cache/redis.rb +16 -4
  42. data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +4 -0
  43. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
  44. data/lib/datadog/version.rb +2 -2
  45. data/lib/datadog.rb +3 -0
  46. metadata +17 -13
  47. data/lib/datadog/appsec/processor/actions.rb +0 -49
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '00617590b3381113b74dde6671802aad74e6ffee0a96737fbc04149515c6a79d'
4
- data.tar.gz: 5bed675aca238d308051ba0a728209e2a4a5ba17f2dc11a52eacee8aaf55a123
3
+ metadata.gz: 6f4b811f0c5014e6f325ac55406d225454a2101cc7576df15657ebc74cf47542
4
+ data.tar.gz: 20c6095b149238c31501bd0be8eb4363a9b22011f750d5f2d68e3f8b730bb970
5
5
  SHA512:
6
- metadata.gz: 4097896d2d8126418f0827b9c4ad916a003e71ead0919fae2b3586415540f869d58f140865d4c625d20fd4de6a76bcf667156893e7c01f0fee981b7fcb6cafe9
7
- data.tar.gz: ce91b73f91a97db31570bd92dab0ca26bf7a6b849d67b774a4efacea0ec93e19f19c27a3dc3f0f232f2adce6cf5ed13b510c30651c529a6b402137944a6b2e87
6
+ metadata.gz: af600463b83509c10417cc90fa808b4148baaa0961bde7aa2d1cdea98c6537a68fdde456af2d700a1e8d795a49fbc9aef3f61d68cc81949132abd8d9165ed19e
7
+ data.tar.gz: 28977b792b9f957e57bf8386b759a59257d0a74c161038ae7f2f4a1b0f8016b8c3828cf71092920c6fd14fd6640e5ab4dbf987dec257fc2ff54061f7342c20bf
data/CHANGELOG.md CHANGED
@@ -2,6 +2,28 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.8.0] - 2024-12-10
6
+
7
+ ### Added
8
+
9
+ * DI: Dynamic instrumentation is now available in Ruby as a Preview
10
+ * AppSec: Add SQL injection detection for ActiveRecord for following adapters: `mysql2`, `postgresql`, and `sqlite3` ([#4167][])
11
+ * Telemetry: Add environment variable to disable logs ([#4153][])
12
+ * Integrations: Add configuration option `on_error` to Elasticsearch tracing ([#4066][])
13
+
14
+ ### Changed
15
+
16
+ * Upgrade libdatadog dependency to 14.3.1 ([#4196][])
17
+ * Profiling: Require Ruby 3.1+ for heap profiling ([#4178][])
18
+ * AppSec: Update libddwaf to 1.18.0.0.0 ([#4164][])
19
+ * Single-step: Lower SSI GLIBC requirements down to 2.17 ([#4137][])
20
+
21
+ ### Fixed
22
+
23
+ * Integrations: Avoid loading `ActiveSupport::Cache::RedisCacheStore`, which tries to load `redis >= 4.0.1` regardless of the version of Redis the host application has installed ([#4197][])
24
+ * Profiling: Fix unsafe initialization when using profiler with otel tracing ([#4195][])
25
+ * Single-step: Add safe NOOP injection script for very old rubies ([#4140][])
26
+
5
27
  ## [2.7.1] - 2024-11-28
6
28
 
7
29
  ### Fixed
@@ -3035,7 +3057,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
3035
3057
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
3036
3058
 
3037
3059
 
3038
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.7.0...master
3060
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.8.0...master
3061
+ [2.8.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.7.1...v2.8.0
3039
3062
  [2.7.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.6.0...v2.7.0
3040
3063
  [2.6.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.5.0...v2.6.0
3041
3064
  [2.5.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.4.0...v2.5.0
@@ -4480,12 +4503,22 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4480
4503
  [#4027]: https://github.com/DataDog/dd-trace-rb/issues/4027
4481
4504
  [#4033]: https://github.com/DataDog/dd-trace-rb/issues/4033
4482
4505
  [#4065]: https://github.com/DataDog/dd-trace-rb/issues/4065
4506
+ [#4066]: https://github.com/DataDog/dd-trace-rb/issues/4066
4483
4507
  [#4075]: https://github.com/DataDog/dd-trace-rb/issues/4075
4484
4508
  [#4078]: https://github.com/DataDog/dd-trace-rb/issues/4078
4485
4509
  [#4082]: https://github.com/DataDog/dd-trace-rb/issues/4082
4486
4510
  [#4083]: https://github.com/DataDog/dd-trace-rb/issues/4083
4487
4511
  [#4085]: https://github.com/DataDog/dd-trace-rb/issues/4085
4512
+ [#4137]: https://github.com/DataDog/dd-trace-rb/issues/4137
4513
+ [#4140]: https://github.com/DataDog/dd-trace-rb/issues/4140
4514
+ [#4153]: https://github.com/DataDog/dd-trace-rb/issues/4153
4488
4515
  [#4161]: https://github.com/DataDog/dd-trace-rb/issues/4161
4516
+ [#4164]: https://github.com/DataDog/dd-trace-rb/issues/4164
4517
+ [#4167]: https://github.com/DataDog/dd-trace-rb/issues/4167
4518
+ [#4178]: https://github.com/DataDog/dd-trace-rb/issues/4178
4519
+ [#4195]: https://github.com/DataDog/dd-trace-rb/issues/4195
4520
+ [#4196]: https://github.com/DataDog/dd-trace-rb/issues/4196
4521
+ [#4197]: https://github.com/DataDog/dd-trace-rb/issues/4197
4489
4522
  [@AdrianLC]: https://github.com/AdrianLC
4490
4523
  [@Azure7111]: https://github.com/Azure7111
4491
4524
  [@BabyGroot]: https://github.com/BabyGroot
@@ -226,7 +226,8 @@ static void trigger_sample_for_thread(
226
226
  long current_monotonic_wall_time_ns,
227
227
  ddog_CharSlice *ruby_vm_type,
228
228
  ddog_CharSlice *class_name,
229
- bool is_gvl_waiting_state
229
+ bool is_gvl_waiting_state,
230
+ bool is_safe_to_allocate_objects
230
231
  );
231
232
  static VALUE _native_thread_list(VALUE self);
232
233
  static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state);
@@ -246,7 +247,12 @@ static long cpu_time_now_ns(struct per_thread_context *thread_context);
246
247
  static long thread_id_for(VALUE thread);
247
248
  static VALUE _native_stats(VALUE self, VALUE collector_instance);
248
249
  static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
249
- static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
250
+ static void trace_identifiers_for(
251
+ struct thread_context_collector_state *state,
252
+ VALUE thread,
253
+ struct trace_identifiers *trace_identifiers_result,
254
+ bool is_safe_to_allocate_objects
255
+ );
250
256
  static bool should_collect_resource(VALUE root_span);
251
257
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
252
258
  static VALUE thread_list(struct thread_context_collector_state *state);
@@ -259,7 +265,8 @@ static void ddtrace_otel_trace_identifiers_for(
259
265
  VALUE *root_span,
260
266
  VALUE *numeric_span_id,
261
267
  VALUE active_span,
262
- VALUE otel_values
268
+ VALUE otel_values,
269
+ bool is_safe_to_allocate_objects
263
270
  );
264
271
  static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
265
272
  static bool handle_gvl_waiting(
@@ -278,7 +285,8 @@ static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSE
278
285
  static void otel_without_ddtrace_trace_identifiers_for(
279
286
  struct thread_context_collector_state *state,
280
287
  VALUE thread,
281
- struct trace_identifiers *trace_identifiers_result
288
+ struct trace_identifiers *trace_identifiers_result,
289
+ bool is_safe_to_allocate_objects
282
290
  );
283
291
  static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key);
284
292
  static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
@@ -647,7 +655,8 @@ static void update_metrics_and_sample(
647
655
  current_monotonic_wall_time_ns,
648
656
  NULL,
649
657
  NULL,
650
- is_gvl_waiting_state
658
+ is_gvl_waiting_state,
659
+ /* is_safe_to_allocate_objects: */ true // We called from a context that's safe to run any regular code, including allocations
651
660
  );
652
661
  }
653
662
 
@@ -833,7 +842,10 @@ static void trigger_sample_for_thread(
833
842
  // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
834
843
  ddog_CharSlice *ruby_vm_type,
835
844
  ddog_CharSlice *class_name,
836
- bool is_gvl_waiting_state
845
+ bool is_gvl_waiting_state,
846
+ // If the Ruby VM is at a state that can allocate objects safely, or not. Added for allocation profiling: we're not
847
+ // allowed to allocate objects (or raise exceptions) when inside the NEWOBJ tracepoint.
848
+ bool is_safe_to_allocate_objects
837
849
  ) {
838
850
  int max_label_count =
839
851
  1 + // thread id
@@ -872,11 +884,11 @@ static void trigger_sample_for_thread(
872
884
  }
873
885
 
874
886
  struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
875
- trace_identifiers_for(state, thread, &trace_identifiers_result);
887
+ trace_identifiers_for(state, thread, &trace_identifiers_result, is_safe_to_allocate_objects);
876
888
 
877
889
  if (!trace_identifiers_result.valid && state->otel_context_enabled != OTEL_CONTEXT_ENABLED_FALSE) {
878
890
  // If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
879
- otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result);
891
+ otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result, is_safe_to_allocate_objects);
880
892
  }
881
893
 
882
894
  if (trace_identifiers_result.valid) {
@@ -1289,7 +1301,12 @@ static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_ins
1289
1301
  }
1290
1302
 
1291
1303
  // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
1292
- static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
1304
+ static void trace_identifiers_for(
1305
+ struct thread_context_collector_state *state,
1306
+ VALUE thread,
1307
+ struct trace_identifiers *trace_identifiers_result,
1308
+ bool is_safe_to_allocate_objects
1309
+ ) {
1293
1310
  if (state->otel_context_enabled == OTEL_CONTEXT_ENABLED_ONLY) return;
1294
1311
  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
1295
1312
 
@@ -1308,7 +1325,9 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
1308
1325
 
1309
1326
  VALUE numeric_span_id = Qnil;
1310
1327
 
1311
- if (otel_values != Qnil) ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values);
1328
+ if (otel_values != Qnil) {
1329
+ ddtrace_otel_trace_identifiers_for(state, &active_trace, &root_span, &numeric_span_id, active_span, otel_values, is_safe_to_allocate_objects);
1330
+ }
1312
1331
 
1313
1332
  if (root_span == Qnil || (active_span == Qnil && numeric_span_id == Qnil)) return;
1314
1333
 
@@ -1474,7 +1493,8 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
1474
1493
  INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
1475
1494
  &ruby_vm_type,
1476
1495
  optional_class_name,
1477
- false
1496
+ /* is_gvl_waiting_state: */ false,
1497
+ /* is_safe_to_allocate_objects: */ false // Not safe to allocate further inside the NEWOBJ tracepoint
1478
1498
  );
1479
1499
  }
1480
1500
 
@@ -1529,11 +1549,18 @@ static VALUE read_otel_current_span_key_const(DDTRACE_UNUSED VALUE _unused) {
1529
1549
  return rb_const_get(trace_module, rb_intern("CURRENT_SPAN_KEY"));
1530
1550
  }
1531
1551
 
1532
- static VALUE get_otel_current_span_key(struct thread_context_collector_state *state) {
1552
+ static VALUE get_otel_current_span_key(struct thread_context_collector_state *state, bool is_safe_to_allocate_objects) {
1533
1553
  if (state->otel_current_span_key == Qtrue) { // Qtrue means we haven't tried to extract it yet
1554
+ if (!is_safe_to_allocate_objects) {
1555
+ // Calling read_otel_current_span_key_const below can trigger exceptions and arbitrary Ruby code running (e.g.
1556
+ // `const_missing`, etc). Not safe to call in this situation, so we just skip otel info for this sample.
1557
+ return Qnil;
1558
+ }
1559
+
1534
1560
  // If this fails, we want to fail gracefully, rather than raise an exception (e.g. if the opentelemetry gem
1535
1561
  // gets refactored, we should not fall on our face)
1536
1562
  VALUE span_key = rb_protect(read_otel_current_span_key_const, Qnil, NULL);
1563
+ rb_set_errinfo(Qnil); // **Clear any pending exception after ignoring it**
1537
1564
 
1538
1565
  // Note that this gets set to Qnil if we failed to extract the correct value, and thus we won't try to extract it again
1539
1566
  state->otel_current_span_key = span_key;
@@ -1550,7 +1577,8 @@ static void ddtrace_otel_trace_identifiers_for(
1550
1577
  VALUE *root_span,
1551
1578
  VALUE *numeric_span_id,
1552
1579
  VALUE active_span,
1553
- VALUE otel_values
1580
+ VALUE otel_values,
1581
+ bool is_safe_to_allocate_objects
1554
1582
  ) {
1555
1583
  VALUE resolved_numeric_span_id =
1556
1584
  active_span == Qnil ?
@@ -1561,7 +1589,7 @@ static void ddtrace_otel_trace_identifiers_for(
1561
1589
 
1562
1590
  if (resolved_numeric_span_id == Qnil) return;
1563
1591
 
1564
- VALUE otel_current_span_key = get_otel_current_span_key(state);
1592
+ VALUE otel_current_span_key = get_otel_current_span_key(state, is_safe_to_allocate_objects);
1565
1593
  if (otel_current_span_key == Qnil) return;
1566
1594
  VALUE current_trace = *active_trace;
1567
1595
 
@@ -1640,14 +1668,15 @@ static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self
1640
1668
  static void otel_without_ddtrace_trace_identifiers_for(
1641
1669
  struct thread_context_collector_state *state,
1642
1670
  VALUE thread,
1643
- struct trace_identifiers *trace_identifiers_result
1671
+ struct trace_identifiers *trace_identifiers_result,
1672
+ bool is_safe_to_allocate_objects
1644
1673
  ) {
1645
1674
  VALUE context_storage = rb_thread_local_aref(thread, otel_context_storage_id /* __opentelemetry_context_storage__ */);
1646
1675
 
1647
1676
  // If it exists, context_storage is expected to be an Array[OpenTelemetry::Context]
1648
1677
  if (context_storage == Qnil || !RB_TYPE_P(context_storage, T_ARRAY)) return;
1649
1678
 
1650
- VALUE otel_current_span_key = get_otel_current_span_key(state);
1679
+ VALUE otel_current_span_key = get_otel_current_span_key(state, is_safe_to_allocate_objects);
1651
1680
  if (otel_current_span_key == Qnil) return;
1652
1681
 
1653
1682
  int active_context_index = RARRAY_LEN(context_storage) - 1;
@@ -1939,7 +1968,8 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
1939
1968
  gvl_waiting_started_wall_time_ns,
1940
1969
  NULL,
1941
1970
  NULL,
1942
- false // This is the extra sample before the wait begun; only the next sample will be in the gvl waiting state
1971
+ /* is_gvl_waiting_state: */ false, // This is the extra sample before the wait begun; only the next sample will be in the gvl waiting state
1972
+ /* is_safe_to_allocate_objects: */ true // This is similar to a regular cpu/wall sample, so it's also safe
1943
1973
  );
1944
1974
  }
1945
1975
 
@@ -170,11 +170,6 @@ $defs << "-DNO_THREAD_TID" if RUBY_VERSION < "3.1"
170
170
  # On older Rubies, there was no jit_return member on the rb_control_frame_t struct
171
171
  $defs << "-DNO_JIT_RETURN" if RUBY_VERSION < "3.1"
172
172
 
173
- # On older Rubies, rb_gc_force_recycle allowed to free objects in a way that
174
- # would be invisible to free tracepoints, finalizers and without cleaning
175
- # obj_to_id_tbl mappings.
176
- $defs << "-DHAVE_WORKING_RB_GC_FORCE_RECYCLE" if RUBY_VERSION < "3.1"
177
-
178
173
  # On older Rubies, there are no Ractors
179
174
  $defs << "-DNO_RACTORS" if RUBY_VERSION < "3"
180
175
 
@@ -184,9 +179,6 @@ $defs << "-DNO_IMEMO_NAME" if RUBY_VERSION < "3"
184
179
  # On older Rubies, objects would not move
185
180
  $defs << "-DNO_T_MOVED" if RUBY_VERSION < "2.7"
186
181
 
187
- # On older Rubies, there was no RUBY_SEEN_OBJ_ID flag
188
- $defs << "-DNO_SEEN_OBJ_ID_FLAG" if RUBY_VERSION < "2.7"
189
-
190
182
  # On older Rubies, rb_global_vm_lock_struct did not include the owner field
191
183
  $defs << "-DNO_GVL_OWNER" if RUBY_VERSION < "2.6"
192
184
 
@@ -7,10 +7,6 @@
7
7
  #include "libdatadog_helpers.h"
8
8
  #include "time_helpers.h"
9
9
 
10
- #if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG))
11
- #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
12
- #endif
13
-
14
10
  // Minimum age (in GC generations) of heap objects we want to include in heap
15
11
  // recorder iterations. Object with age 0 represent objects that have yet to undergo
16
12
  // a GC and, thus, may just be noise/trash at instant of iteration and are usually not
@@ -123,9 +119,6 @@ typedef struct {
123
119
  // Pointer to the (potentially partial) object_record containing metadata about an ongoing recording.
124
120
  // When NULL, this symbolizes an unstarted/invalid recording.
125
121
  object_record *object_record;
126
- // A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object
127
- // as part of our workaround around rb_gc_force_recycle issues.
128
- bool did_recycle_workaround;
129
122
  } recording;
130
123
 
131
124
  struct heap_recorder {
@@ -342,46 +335,12 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
342
335
  rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
343
336
  }
344
337
 
345
- bool did_recycle_workaround = false;
346
-
347
- #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
348
- // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
349
- // its usage may lead to an object being re-used outside of the typical GC cycle.
350
- //
351
- // This re-use is in theory invisible to us unless we're lucky enough to sample both
352
- // the original object and the replacement that uses the recycled slot.
353
- //
354
- // In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366)
355
- // that non-noop implementations of rb_gc_force_recycle have an implementation bug
356
- // which results in the object that re-used the recycled slot inheriting the same
357
- // object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to
358
- // "observe" implicit frees when an object we are tracking is force-recycled.
359
- //
360
- // However, it may happen that we start tracking a new object and that object was
361
- // allocated on a recycled slot. Due to the bug, this object would be missing the
362
- // FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be,
363
- // when we're doing our liveness check, the absence of the flag would trigger our
364
- // implicit free workaround and the object would be inferred as recycled even though
365
- // it might still be alive.
366
- //
367
- // Thus, if we detect that this new allocation is already missing the flag at the start
368
- // of the heap allocation recording, we force-set it. This should be safe since we
369
- // just called rb_obj_id on it above and the expectation is that any flaggable object
370
- // that goes through it ends up with the flag set (as evidenced by the GC_ASSERT
371
- // lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14).
372
- if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) {
373
- RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID);
374
- did_recycle_workaround = true;
375
- }
376
- #endif
377
-
378
338
  heap_recorder->active_recording = (recording) {
379
339
  .object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) {
380
340
  .weight = weight * heap_recorder->sample_rate,
381
341
  .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL,
382
342
  .alloc_gen = rb_gc_count(),
383
- }),
384
- .did_recycle_workaround = did_recycle_workaround,
343
+ }),
385
344
  };
386
345
  }
387
346
 
@@ -685,41 +644,6 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
685
644
 
686
645
  // If we got this far, then we found a valid live object for the tracked id.
687
646
 
688
- #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
689
- // If we are in a ruby version that has a working rb_gc_force_recycle implementation,
690
- // its usage may lead to an object being re-used outside of the typical GC cycle.
691
- //
692
- // This re-use is in theory invisible to us and would mean that the ref from which we
693
- // collected the object_record metadata may not be the same as the current ref and
694
- // thus any further reporting would be innacurately attributed to stale metadata.
695
- //
696
- // In practice, there is a way for us to notice that this happened because of a bug
697
- // in the implementation of rb_gc_force_recycle. Our heap profiler relies on object
698
- // ids and id2ref to detect whether objects are still alive. Turns out that when an
699
- // object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID
700
- // of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment
701
- // in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517
702
- //
703
- // Thus, if we detect that the ref we just resolved above is missing this flag, we can
704
- // safely say re-use happened and thus treat it as an implicit free of the object
705
- // we were tracking (the original one which got recycled).
706
- if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) {
707
-
708
- // NOTE: We don't really need to set this flag for heap recorder to work correctly
709
- // but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle
710
- // which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj
711
- // tables in objspace. We already do the same thing when we sample a recycled object,
712
- // here we apply it as well to objects that replace recycled objects that were being
713
- // tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366
714
- RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);
715
-
716
- on_committed_object_record_cleanup(recorder, record);
717
- recorder->stats_last_update.objects_dead++;
718
- return ST_DELETE;
719
- }
720
-
721
- #endif
722
-
723
647
  if (
724
648
  recorder->size_enabled &&
725
649
  recorder->update_include_old && // We only update sizes when doing a full update
@@ -732,6 +656,10 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext
732
656
  record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
733
657
  }
734
658
 
659
+ // Ensure that ref is kept on the stack so the Ruby garbage collector does not try to clean up the object before this
660
+ // point.
661
+ RB_GC_GUARD(ref);
662
+
735
663
  recorder->stats_last_update.objects_alive++;
736
664
  if (record->object_data.is_frozen) {
737
665
  recorder->stats_last_update.objects_frozen++;
@@ -803,18 +731,12 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *
803
731
  object_record *new_object_record = recording.object_record;
804
732
  if (existing) {
805
733
  object_record *existing_record = (object_record*) (*value);
806
- if (recording.did_recycle_workaround) {
807
- // In this case, it's possible for an object id to be re-used and we were lucky enough to have
808
- // sampled both the original object and the replacement so cleanup the old one and replace it with
809
- // the new object_record (i.e. treat this as a combined free+allocation).
810
- on_committed_object_record_cleanup(update_data->heap_recorder, existing_record);
811
- } else {
812
- // This is not supposed to happen, raising...
813
- VALUE existing_inspect = object_record_inspect(existing_record);
814
- VALUE new_inspect = object_record_inspect(new_object_record);
815
- rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
816
- "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
817
- }
734
+
735
+ // This is not supposed to happen, raising...
736
+ VALUE existing_inspect = object_record_inspect(existing_record);
737
+ VALUE new_inspect = object_record_inspect(new_object_record);
738
+ rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
739
+ "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
818
740
  }
819
741
  // Always carry on with the update, we want the new record to be there at the end
820
742
  (*value) = (st_data_t) new_object_record;
@@ -158,7 +158,7 @@ bool is_current_thread_holding_the_gvl(void) {
158
158
  //
159
159
  // Thus an incorrect `is_current_thread_holding_the_gvl` result may lead to issues inside `rb_postponed_job_register_one`.
160
160
  //
161
- // For this reason we currently do not enable the new Ruby profiler on Ruby 2.5 by default, and we print a
161
+ // For this reason we default to use the "no signals workaround" on Ruby 2.5 by default, and we print a
162
162
  // warning when customers force-enable it.
163
163
  bool gvl_acquired = vm->gvl.acquired != 0;
164
164
  rb_thread_t *current_owner = vm->running_thread;
@@ -258,8 +258,6 @@ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locatio
258
258
  static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
259
259
  static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
260
260
  static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
261
- static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj);
262
- static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj);
263
261
  static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
264
262
  static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns);
265
263
  static VALUE _native_is_object_recorded(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE object_id);
@@ -297,10 +295,6 @@ void stack_recorder_init(VALUE profiling_module) {
297
295
  _native_end_fake_slow_heap_serialization, 1);
298
296
  rb_define_singleton_method(testing_module, "_native_debug_heap_recorder",
299
297
  _native_debug_heap_recorder, 1);
300
- rb_define_singleton_method(testing_module, "_native_gc_force_recycle",
301
- _native_gc_force_recycle, 1);
302
- rb_define_singleton_method(testing_module, "_native_has_seen_id_flag",
303
- _native_has_seen_id_flag, 1);
304
298
  rb_define_singleton_method(testing_module, "_native_is_object_recorded?", _native_is_object_recorded, 2);
305
299
  rb_define_singleton_method(testing_module, "_native_heap_recorder_reset_last_update", _native_heap_recorder_reset_last_update, 1);
306
300
  rb_define_singleton_method(testing_module, "_native_recorder_after_gc_step", _native_recorder_after_gc_step, 1);
@@ -1006,34 +1000,6 @@ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recor
1006
1000
  return heap_recorder_testonly_debug(state->heap_recorder);
1007
1001
  }
1008
1002
 
1009
- #pragma GCC diagnostic push
1010
- // rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop.
1011
- #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1012
- #pragma GCC diagnostic ignored "-Wunused-parameter"
1013
- // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
1014
- // It SHOULD NOT be used for other purposes.
1015
- static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) {
1016
- #ifdef HAVE_WORKING_RB_GC_FORCE_RECYCLE
1017
- rb_gc_force_recycle(obj);
1018
- #endif
1019
- return Qnil;
1020
- }
1021
- #pragma GCC diagnostic pop
1022
-
1023
- // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
1024
- // It SHOULD NOT be used for other purposes.
1025
- static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) {
1026
- #ifndef NO_SEEN_OBJ_ID_FLAG
1027
- if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) {
1028
- return Qtrue;
1029
- } else {
1030
- return Qfalse;
1031
- }
1032
- #else
1033
- return Qfalse;
1034
- #endif
1035
- }
1036
-
1037
1003
  static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) {
1038
1004
  struct stack_recorder_state *state;
1039
1005
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
@@ -8,7 +8,7 @@ module Datadog
8
8
  module LibdatadogExtconfHelpers
9
9
  # Used to make sure the correct gem version gets loaded, as extconf.rb does not get run with "bundle exec" and thus
10
10
  # may see multiple libdatadog versions. See https://github.com/DataDog/dd-trace-rb/pull/2531 for the horror story.
11
- LIBDATADOG_VERSION = '~> 14.1.0.1.0'
11
+ LIBDATADOG_VERSION = '~> 14.3.1.1.0'
12
12
 
13
13
  # Used as an workaround for a limitation with how dynamic linking works in environments where the datadog gem and
14
14
  # libdatadog are moved after the extension gets compiled.
@@ -3,7 +3,6 @@
3
3
  require_relative 'processor'
4
4
  require_relative 'processor/rule_merger'
5
5
  require_relative 'processor/rule_loader'
6
- require_relative 'processor/actions'
7
6
 
8
7
  module Datadog
9
8
  module AppSec
@@ -52,10 +51,6 @@ module Datadog
52
51
  )
53
52
  return nil unless rules
54
53
 
55
- actions = rules['actions']
56
-
57
- AppSec::Processor::Actions.merge(actions) if actions
58
-
59
54
  data = AppSec::Processor::RuleLoader.load_data(
60
55
  ip_denylist: settings.appsec.ip_denylist,
61
56
  user_id_denylist: settings.appsec.user_id_denylist,
@@ -84,10 +79,8 @@ module Datadog
84
79
  @mutex = Mutex.new
85
80
  end
86
81
 
87
- def reconfigure(ruleset:, actions:, telemetry:)
82
+ def reconfigure(ruleset:, telemetry:)
88
83
  @mutex.synchronize do
89
- AppSec::Processor::Actions.merge(actions)
90
-
91
84
  new = Processor.new(ruleset: ruleset, telemetry: telemetry)
92
85
 
93
86
  if new && new.ready?
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Datadog
4
+ module AppSec
5
+ module Contrib
6
+ module ActiveRecord
7
+ # AppSec module that will be prepended to ActiveRecord adapter
8
+ module Instrumentation
9
+ module_function
10
+
11
+ def detect_sql_injection(sql, adapter_name)
12
+ scope = AppSec.active_scope
13
+ return unless scope
14
+
15
+ # libddwaf expects db system to be lowercase,
16
+ # in case of sqlite adapter, libddwaf expects 'sqlite' as db system
17
+ db_system = adapter_name.downcase
18
+ db_system = 'sqlite' if db_system == 'sqlite3'
19
+
20
+ ephemeral_data = {
21
+ 'server.db.statement' => sql,
22
+ 'server.db.system' => db_system
23
+ }
24
+
25
+ waf_timeout = Datadog.configuration.appsec.waf_timeout
26
+ result = scope.processor_context.run({}, ephemeral_data, waf_timeout)
27
+
28
+ if result.status == :match
29
+ Datadog::AppSec::Event.tag_and_keep!(scope, result)
30
+
31
+ event = {
32
+ waf_result: result,
33
+ trace: scope.trace,
34
+ span: scope.service_entry_span,
35
+ sql: sql,
36
+ actions: result.actions
37
+ }
38
+ scope.processor_context.events << event
39
+ end
40
+ end
41
+
42
+ # patch for all adapters in ActiveRecord >= 7.1
43
+ module InternalExecQueryAdapterPatch
44
+ def internal_exec_query(sql, *args, **rest)
45
+ Instrumentation.detect_sql_injection(sql, adapter_name)
46
+
47
+ super
48
+ end
49
+ end
50
+
51
+ # patch for postgres adapter in ActiveRecord < 7.1
52
+ module ExecuteAndClearAdapterPatch
53
+ def execute_and_clear(sql, *args, **rest)
54
+ Instrumentation.detect_sql_injection(sql, adapter_name)
55
+
56
+ super
57
+ end
58
+ end
59
+
60
+ # patch for mysql2 and sqlite3 adapters in ActiveRecord < 7.1
61
+ # this patch is also used when using JDBC adapter
62
+ module ExecQueryAdapterPatch
63
+ def exec_query(sql, *args, **rest)
64
+ Instrumentation.detect_sql_injection(sql, adapter_name)
65
+
66
+ super
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../integration'
4
+ require_relative 'patcher'
5
+
6
+ module Datadog
7
+ module AppSec
8
+ module Contrib
9
+ module ActiveRecord
10
+ # This class provides helper methods that are used when patching ActiveRecord
11
+ class Integration
12
+ include Datadog::AppSec::Contrib::Integration
13
+
14
+ MINIMUM_VERSION = Gem::Version.new('4')
15
+
16
+ register_as :active_record, auto_patch: false
17
+
18
+ def self.version
19
+ Gem.loaded_specs['activerecord'] && Gem.loaded_specs['activerecord'].version
20
+ end
21
+
22
+ def self.loaded?
23
+ !defined?(::ActiveRecord).nil?
24
+ end
25
+
26
+ def self.compatible?
27
+ super && version >= MINIMUM_VERSION
28
+ end
29
+
30
+ def self.auto_instrument?
31
+ true
32
+ end
33
+
34
+ def patcher
35
+ Patcher
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end