datadog 2.35.0 → 2.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +68 -31
  4. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +1 -1
  5. data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +1 -1
  6. data/ext/datadog_profiling_native_extension/collectors_stack.c +37 -18
  7. data/ext/datadog_profiling_native_extension/collectors_stack.h +8 -2
  8. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +434 -300
  9. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -7
  10. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +7 -8
  11. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -12
  12. data/ext/datadog_profiling_native_extension/extconf.rb +2 -2
  13. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +4 -43
  14. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +15 -47
  15. data/ext/datadog_profiling_native_extension/heap_recorder.c +44 -26
  16. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +14 -35
  17. data/ext/datadog_profiling_native_extension/profiling.c +41 -4
  18. data/ext/datadog_profiling_native_extension/ruby_helpers.c +33 -34
  19. data/ext/datadog_profiling_native_extension/stack_recorder.c +24 -3
  20. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  21. data/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h +4 -2
  22. data/ext/libdatadog_api/datadog_ruby_common.c +7 -8
  23. data/ext/libdatadog_api/datadog_ruby_common.h +0 -12
  24. data/ext/libdatadog_extconf_helpers.rb +1 -1
  25. data/lib/datadog/appsec/api_security/route_extractor.rb +6 -0
  26. data/lib/datadog/appsec/component.rb +1 -1
  27. data/lib/datadog/appsec/configuration.rb +7 -0
  28. data/lib/datadog/appsec/contrib/aws_lambda/waf_addresses.rb +37 -4
  29. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +64 -19
  30. data/lib/datadog/appsec/contrib/graphql/integration.rb +1 -0
  31. data/lib/datadog/appsec/contrib/rack/buffered_input.rb +83 -0
  32. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +41 -3
  33. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +20 -7
  34. data/lib/datadog/appsec/contrib/rack/input_peeker.rb +92 -0
  35. data/lib/datadog/appsec/contrib/rails/gateway/request.rb +33 -0
  36. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +17 -1
  37. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +20 -3
  38. data/lib/datadog/appsec/default_header_tags.rb +10 -6
  39. data/lib/datadog/core/configuration/components.rb +1 -0
  40. data/lib/datadog/core/configuration/settings.rb +1 -2
  41. data/lib/datadog/core/configuration/supported_configurations.rb +2 -0
  42. data/lib/datadog/core/remote/component.rb +1 -1
  43. data/lib/datadog/core/telemetry/event/app_started.rb +0 -21
  44. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +1 -1
  45. data/lib/datadog/core/utils/forking.rb +3 -1
  46. data/lib/datadog/core/utils/spawn_monkey_patch.rb +3 -1
  47. data/lib/datadog/core.rb +3 -0
  48. data/lib/datadog/di/base.rb +4 -1
  49. data/lib/datadog/di/component.rb +1 -1
  50. data/lib/datadog/error_tracking/collector.rb +2 -1
  51. data/lib/datadog/error_tracking/component.rb +2 -2
  52. data/lib/datadog/kit/tracing/method_tracer.rb +4 -1
  53. data/lib/datadog/opentelemetry/sdk/propagator.rb +9 -3
  54. data/lib/datadog/opentelemetry/sdk/span_processor.rb +4 -1
  55. data/lib/datadog/profiling/collectors/thread_context.rb +1 -0
  56. data/lib/datadog/profiling/component.rb +13 -15
  57. data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
  58. data/lib/datadog/ruby_version.rb +25 -0
  59. data/lib/datadog/symbol_database/component.rb +306 -98
  60. data/lib/datadog/symbol_database/extractor.rb +223 -84
  61. data/lib/datadog/tracing/configuration/ext.rb +13 -0
  62. data/lib/datadog/tracing/configuration/settings.rb +17 -0
  63. data/lib/datadog/tracing/contrib/configuration/resolver.rb +7 -0
  64. data/lib/datadog/tracing/contrib/grpc/distributed/propagation.rb +2 -0
  65. data/lib/datadog/tracing/contrib/grpc.rb +1 -0
  66. data/lib/datadog/tracing/contrib/http/distributed/propagation.rb +2 -0
  67. data/lib/datadog/tracing/contrib/http.rb +1 -0
  68. data/lib/datadog/tracing/contrib/karafka/distributed/propagation.rb +2 -0
  69. data/lib/datadog/tracing/contrib/karafka.rb +1 -0
  70. data/lib/datadog/tracing/contrib/rack/middlewares.rb +3 -1
  71. data/lib/datadog/tracing/contrib/rack/route_inference.rb +3 -1
  72. data/lib/datadog/tracing/contrib/sidekiq/distributed/propagation.rb +2 -0
  73. data/lib/datadog/tracing/contrib/sidekiq.rb +1 -0
  74. data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +2 -0
  75. data/lib/datadog/tracing/contrib/waterdrop.rb +1 -0
  76. data/lib/datadog/tracing/distributed/propagation.rb +33 -1
  77. data/lib/datadog/tracing/distributed/trace_context.rb +11 -2
  78. data/lib/datadog/tracing/trace_digest.rb +7 -0
  79. data/lib/datadog/tracing/trace_operation.rb +4 -1
  80. data/lib/datadog/tracing/tracer.rb +1 -0
  81. data/lib/datadog/version.rb +1 -1
  82. data/lib/datadog.rb +4 -1
  83. metadata +8 -5
@@ -21,42 +21,40 @@ void ruby_helpers_init(void) {
21
21
  to_s_id = rb_intern("to_s");
22
22
  }
23
23
 
24
- // Internal helper for raising pre-formatted syserr exceptions
25
- static NORETURN(void private_raise_syserr_formatted(int syserr_errno, const char *detailed_message, const char *static_message)) {
26
- VALUE exception = rb_syserr_new(syserr_errno, detailed_message);
27
- private_raise_exception(exception, static_message);
28
- }
29
-
30
24
  // Use `raise_syserr` the macro instead, as it provides additional argument checks.
31
25
  void private_raise_syserr(int syserr_errno, const char *fmt, ...) {
32
- FORMAT_VA_ERROR_MESSAGE(detailed_message, fmt);
33
- private_raise_syserr_formatted(syserr_errno, detailed_message, fmt);
26
+ va_list args;
27
+ va_start(args, fmt);
28
+ VALUE detailed_message = rb_vsprintf(fmt, args);
29
+ va_end(args);
30
+
31
+ VALUE exception = rb_syserr_new_str(syserr_errno, detailed_message);
32
+ private_raise_exception(exception, fmt);
34
33
  }
35
34
 
36
35
  typedef struct {
37
36
  VALUE exception_class;
38
37
  int syserr_errno;
39
- char exception_message[MAX_RAISE_MESSAGE_SIZE];
40
- char telemetry_message[MAX_RAISE_MESSAGE_SIZE];
38
+ const char *format_string;
39
+ va_list va_args;
41
40
  } raise_args;
42
41
 
42
+ // Called via rb_thread_call_with_gvl from private_grab_gvl_and_raise.
43
+ // Formats the message with rb_vsprintf (which requires the GVL) and raises.
43
44
  static void *trigger_raise(void *raise_arguments) {
44
45
  raise_args *args = (raise_args *) raise_arguments;
45
46
 
47
+ VALUE detailed_message = rb_vsprintf(args->format_string, args->va_args);
48
+
49
+ VALUE exception;
46
50
  if (args->syserr_errno) {
47
- private_raise_syserr_formatted(
48
- args->syserr_errno,
49
- args->exception_message,
50
- args->telemetry_message
51
- );
51
+ exception = rb_syserr_new_str(args->syserr_errno, detailed_message);
52
52
  } else {
53
- private_raise_error_formatted(
54
- args->exception_class,
55
- args->exception_message,
56
- args->telemetry_message
57
- );
53
+ exception = rb_exc_new_str(args->exception_class, detailed_message);
58
54
  }
59
55
 
56
+ private_raise_exception(exception, args->format_string);
57
+
60
58
  return NULL;
61
59
  }
62
60
 
@@ -71,11 +69,17 @@ void private_grab_gvl_and_raise(VALUE exception_class, int syserr_errno, const c
71
69
  args.syserr_errno = 0;
72
70
  }
73
71
 
74
- FORMAT_VA_ERROR_MESSAGE(formatted_exception_message, format_string);
75
- snprintf(args.exception_message, MAX_RAISE_MESSAGE_SIZE, "%s", formatted_exception_message);
76
- snprintf(args.telemetry_message, MAX_RAISE_MESSAGE_SIZE, "%s", format_string);
72
+ args.format_string = format_string;
73
+ va_start(args.va_args, format_string);
77
74
 
78
75
  if (is_current_thread_holding_the_gvl()) {
76
+ VALUE detailed_message = rb_vsprintf(format_string, args.va_args);
77
+ va_end(args.va_args);
78
+
79
+ VALUE wrapped_message = rb_sprintf(
80
+ "grab_gvl_and_raise called by thread holding the global VM lock: %"PRIsVALUE,
81
+ detailed_message
82
+ );
79
83
  char telemetry_message[MAX_RAISE_MESSAGE_SIZE];
80
84
  snprintf(
81
85
  telemetry_message,
@@ -83,20 +87,14 @@ void private_grab_gvl_and_raise(VALUE exception_class, int syserr_errno, const c
83
87
  "grab_gvl_and_raise called by thread holding the global VM lock: %s",
84
88
  format_string
85
89
  );
86
- char exception_message[MAX_RAISE_MESSAGE_SIZE];
87
- snprintf(
88
- exception_message,
89
- MAX_RAISE_MESSAGE_SIZE,
90
- "grab_gvl_and_raise called by thread holding the global VM lock: %s",
91
- args.exception_message
92
- );
93
- VALUE exception = rb_exc_new_cstr(rb_eRuntimeError, exception_message);
90
+ VALUE exception = rb_exc_new_str(rb_eRuntimeError, wrapped_message);
94
91
  private_raise_exception(exception, telemetry_message);
95
92
  }
96
93
 
97
94
  rb_thread_call_with_gvl(trigger_raise, &args);
98
95
 
99
- rb_bug("[ddtrace] Unexpected: Reached the end of grab_gvl_and_raise while raising '%s'\n", args.exception_message);
96
+ va_end(args.va_args);
97
+ rb_bug("[ddtrace] Unexpected: Reached the end of grab_gvl_and_raise while raising '%s'\n", format_string);
100
98
  }
101
99
 
102
100
  void private_raise_enforce_syserr(
@@ -107,10 +105,11 @@ void private_raise_enforce_syserr(
107
105
  int line,
108
106
  const char *function_name
109
107
  ) {
108
+ const char *format = "Failure returned by '%s' at %s:%d:in `%s'";
110
109
  if (have_gvl) {
111
- rb_exc_raise(rb_syserr_new_str(syserr_errno, rb_sprintf("Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name)));
110
+ private_raise_exception(rb_syserr_new_str(syserr_errno, rb_sprintf(format, expression, file, line, function_name)), format);
112
111
  } else {
113
- private_grab_gvl_and_raise(Qnil, syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name);
112
+ private_grab_gvl_and_raise(Qnil, syserr_errno, format, expression, file, line, function_name);
114
113
  }
115
114
  }
116
115
 
@@ -9,6 +9,7 @@
9
9
  #include "time_helpers.h"
10
10
  #include "heap_recorder.h"
11
11
  #include "encoded_profile.h"
12
+ #include "collectors_thread_context.h"
12
13
 
13
14
  // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs
14
15
  // This file implements the native bits of the Datadog::Profiling::StackRecorder class
@@ -177,6 +178,10 @@ typedef struct {
177
178
  heap_recorder *heap_recorder;
178
179
  bool heap_clean_after_gc_enabled;
179
180
 
181
+ // When set, _native_serialize will call thread_context_collector_on_serialize on this instance
182
+ // before serializing, so that threads suspended across the whole profile period still get sampled.
183
+ VALUE thread_context_collector_instance;
184
+
180
185
  pthread_mutex_t mutex_slot_one;
181
186
  profile_slot profile_slot_one;
182
187
  pthread_mutex_t mutex_slot_two;
@@ -320,6 +325,7 @@ static VALUE _native_new(VALUE klass) {
320
325
  // being leaked.
321
326
 
322
327
  state->heap_clean_after_gc_enabled = false;
328
+ state->thread_context_collector_instance = Qnil;
323
329
 
324
330
  ddog_prof_Slice_SampleType sample_types = {.ptr = all_sample_types, .len = ALL_VALUE_TYPES_COUNT};
325
331
 
@@ -391,6 +397,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Sam
391
397
  static void stack_recorder_typed_data_mark(void *state_ptr) {
392
398
  stack_recorder_state *state = (stack_recorder_state *) state_ptr;
393
399
 
400
+ rb_gc_mark(state->thread_context_collector_instance);
394
401
  heap_recorder_mark_pending_recordings(state->heap_recorder);
395
402
  }
396
403
 
@@ -515,12 +522,16 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
515
522
  TypedData_Get_Struct(recorder_instance, stack_recorder_state, &stack_recorder_typed_data, state);
516
523
 
517
524
  ddog_Timespec finish_timestamp = system_epoch_now_timespec();
518
- // Need to do this while still holding on to the Global VM Lock; see comments on method for why
525
+ // Need to do this while still holding the Global VM Lock; see comments on method for why
519
526
  serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);
520
527
 
528
+ if (state->thread_context_collector_instance != Qnil) {
529
+ thread_context_collector_on_serialize(state->thread_context_collector_instance);
530
+ }
531
+
521
532
  long heap_iteration_prep_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
522
533
  // Prepare the iteration on heap recorder we'll be doing outside the GVL. The preparation needs to
523
- // happen while holding on to the GVL.
534
+ // happen while holding the GVL.
524
535
  // NOTE: While rare, it's possible for the GVL to be released inside this function (see comments on `heap_recorder_update`)
525
536
  // and thus don't assume this is an "atomic" step -- other threads may get some running time in the meanwhile.
526
537
  heap_recorder_prepare_iteration(state->heap_recorder);
@@ -548,7 +559,7 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
548
559
  rb_thread_call_without_gvl2(call_serialize_without_gvl, &args, NULL /* No interruption function needed in this case */, NULL /* Not needed */);
549
560
  }
550
561
 
551
- // Cleanup after heap recorder iteration. This needs to happen while holding on to the GVL.
562
+ // Cleanup after heap recorder iteration. This needs to happen while holding the GVL.
552
563
  heap_recorder_finish_iteration(state->heap_recorder);
553
564
 
554
565
  // NOTE: We are focusing on the serialization time outside of the GVL in this stat here. This doesn't
@@ -894,6 +905,9 @@ static ddog_Timespec system_epoch_now_timespec(void) {
894
905
  //
895
906
  // Assumption: This method gets called BEFORE restarting profiling -- e.g. there are no components attempting to
896
907
  // trigger samples at the same time.
908
+ //
909
+ // Note that tests call this method directly in the same process without forking,
910
+ // and in such a case non-current Threads keep running.
897
911
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) {
898
912
  stack_recorder_state *state;
899
913
  TypedData_Get_Struct(recorder_instance, stack_recorder_state, &stack_recorder_typed_data, state);
@@ -1147,3 +1161,10 @@ static VALUE _native_finalize_pending_heap_recordings(DDTRACE_UNUSED VALUE _self
1147
1161
 
1148
1162
  return Qtrue;
1149
1163
  }
1164
+
1165
+ void recorder_install_on_serialize(VALUE recorder_instance, VALUE thread_context_collector_instance) {
1166
+ stack_recorder_state *state;
1167
+ TypedData_Get_Struct(recorder_instance, stack_recorder_state, &stack_recorder_typed_data, state);
1168
+
1169
+ state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
1170
+ }
@@ -29,4 +29,5 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
29
29
  __attribute__((warn_unused_result)) bool track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice alloc_class);
30
30
  void recorder_after_sample(VALUE recorder_instance);
31
31
  void recorder_after_gc_step(VALUE recorder_instance);
32
+ void recorder_install_on_serialize(VALUE recorder_instance, VALUE thread_context_collector_instance);
32
33
  VALUE enforce_recorder_instance(VALUE object);
@@ -4,7 +4,7 @@
4
4
  //
5
5
  // Specifically, when the profiler is sampling, we're never supposed to call into Ruby code (e.g. methods
6
6
  // implemented using Ruby code) or allocate Ruby objects.
7
- // That's because those events introduce thread switch points, and really we don't the VM switching between threads
7
+ // That's because those events introduce thread switch points, and really we don't want the VM switching between threads
8
8
  // in the middle of the profiler sampling. This includes raising exceptions.
9
9
  //
10
10
  // Raising exceptions as the very last operation, to stop the profiler is ok, but comes a caveat: raising exceptions
@@ -18,10 +18,12 @@
18
18
  // in most (all?) thread switch points, Ruby will check for interrupts and run the postponed jobs.
19
19
  //
20
20
  // Thus, if we set a flag while we're sampling (inside_unsafe_context), trigger the postponed job, and then only unset
21
- // the flag after sampling, he correct thing to happen is that the postponed job should never see the flag.
21
+ // the flag after sampling, the correct thing to happen is that the postponed job should never see the flag.
22
22
  //
23
23
  // If, however, we have a bug and there's a thread switch point, our postponed job will see the flag and immediately
24
24
  // stop the Ruby VM before further damage happens (and hopefully giving us a stack trace clearly pointing to the culprit).
25
+ //
26
+ // Note that this check currently does not detect Ruby object allocations, as those do not check for interrupts.
25
27
 
26
28
  void unsafe_api_calls_check_init(void);
27
29
 
@@ -29,16 +29,15 @@ void private_raise_exception(VALUE exception, const char *static_message) {
29
29
  rb_exc_raise(exception);
30
30
  }
31
31
 
32
- // Helper for raising pre-formatted exceptions
33
- void private_raise_error_formatted(VALUE exception_class, const char *detailed_message, const char *static_message) {
34
- VALUE exception = rb_exc_new_cstr(exception_class, detailed_message);
35
- private_raise_exception(exception, static_message);
36
- }
37
-
38
32
  // Use `raise_error` the macro instead, as it provides additional argument checks.
39
33
  void private_raise_error(VALUE exception_class, const char *fmt, ...) {
40
- FORMAT_VA_ERROR_MESSAGE(detailed_message, fmt);
41
- private_raise_error_formatted(exception_class, detailed_message, fmt);
34
+ va_list args;
35
+ va_start(args, fmt);
36
+ VALUE detailed_message = rb_vsprintf(fmt, args);
37
+ va_end(args);
38
+
39
+ VALUE exception = rb_exc_new_str(exception_class, detailed_message);
40
+ private_raise_exception(exception, fmt);
42
41
  }
43
42
 
44
43
  VALUE datadog_gem_version(void) {
@@ -47,11 +47,6 @@ NORETURN(
47
47
  __attribute__ ((format (printf, 2, 3)));
48
48
  );
49
49
 
50
- // Internal helper for raising pre-formatted exceptions
51
- NORETURN(
52
- void private_raise_error_formatted(VALUE exception_class, const char *detailed_message, const char *static_message)
53
- );
54
-
55
50
  // Raises an exception with separate telemetry-safe and detailed messages.
56
51
  // NOTE: Raising an exception always invokes Ruby code so it requires the GVL and is not compatible with "debug_enter_unsafe_context".
57
52
  // @see debug_enter_unsafe_context
@@ -61,13 +56,6 @@ NORETURN(
61
56
 
62
57
  #define MAX_RAISE_MESSAGE_SIZE 256
63
58
 
64
- #define FORMAT_VA_ERROR_MESSAGE(buf, fmt) \
65
- char buf[MAX_RAISE_MESSAGE_SIZE]; \
66
- va_list buf##_args; \
67
- va_start(buf##_args, fmt); \
68
- vsnprintf(buf, MAX_RAISE_MESSAGE_SIZE, fmt, buf##_args); \
69
- va_end(buf##_args);
70
-
71
59
  // Helper to retrieve Datadog::VERSION::STRING
72
60
  VALUE datadog_gem_version(void);
73
61
 
@@ -10,7 +10,7 @@ module Datadog
10
10
  module LibdatadogExtconfHelpers
11
11
  # Used to make sure the correct gem version gets loaded, as extconf.rb does not get run with "bundle exec" and thus
12
12
  # may see multiple libdatadog versions. See https://github.com/DataDog/dd-trace-rb/pull/2531 for the horror story.
13
- LIBDATADOG_VERSION = '~> 33.0.0.1.0'
13
+ LIBDATADOG_VERSION = '~> 35.0.0.1.0'
14
14
 
15
15
  # Used as an workaround for a limitation with how dynamic linking works in environments where the datadog gem and
16
16
  # libdatadog are moved after the extension gets compiled.
@@ -50,6 +50,12 @@ module Datadog
50
50
  # In Rails < 7.1 it also will not be set even if a route was found,
51
51
  # but in this case `action_dispatch.request.path_parameters` won't be empty.
52
52
  def self.route_pattern(request)
53
+ # NOTE: Requests from contribs like AWS Lambda don't provide a usable
54
+ # `::Rack::Request#env`, so infer the route from the path instead
55
+ unless request.respond_to?(:env)
56
+ return Tracing::Contrib::Rack::RouteInference.infer(request.path.to_s)
57
+ end
58
+
53
59
  if request.env.key?(GRAPE_ROUTE_KEY)
54
60
  pattern = request.env[GRAPE_ROUTE_KEY][:route_info]&.pattern&.origin
55
61
  "#{request.script_name}#{pattern}"
@@ -22,7 +22,7 @@ module Datadog
22
22
  return
23
23
  end
24
24
 
25
- if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('3.3') && ffi_version < Gem::Version.new('1.16.0')
25
+ if RubyVersion.is?('>= 3.3') && ffi_version < Gem::Version.new('1.16.0')
26
26
  Datadog.logger.warn(
27
27
  'AppSec is not supported in Ruby versions above 3.3.0 when using `ffi` versions older than 1.16.0, ' \
28
28
  'and will be forcibly disabled due to a memory leak in `ffi`. ' \
@@ -134,6 +134,13 @@ module Datadog
134
134
  end
135
135
  end
136
136
 
137
+ # NOTE: A value of 0 (or less) disables request body collection
138
+ option :body_parsing_size_limit do |o|
139
+ o.type :int
140
+ o.env 'DD_APPSEC_BODY_PARSING_SIZE_LIMIT' # bytes
141
+ o.default 10_485_760
142
+ end
143
+
137
144
  option :waf_debug do |o|
138
145
  o.env 'DD_APPSEC_WAF_DEBUG'
139
146
  o.default false
@@ -15,6 +15,10 @@ module Datadog
15
15
  # Extracts WAF input addresses from normalized AWS Lambda API Gateway event payloads.
16
16
  # @api private
17
17
  module WAFAddresses
18
+ BASE64_CHARS_PER_GROUP = 4
19
+ BASE64_BYTES_PER_GROUP = 3
20
+ BASE64_PADDING_BYTE = "=".ord
21
+
18
22
  module_function
19
23
 
20
24
  def from_request(payload)
@@ -27,10 +31,11 @@ module Datadog
27
31
  'server.request.uri.raw' => build_fullpath(payload),
28
32
  'server.request.headers' => headers,
29
33
  'server.request.headers.no_cookies' => headers.dup.tap { |h| h.delete('cookie') },
30
- 'http.client_ip' => extract_client_ip(payload['source_ip'], headers),
31
34
  'server.request.method' => payload['method'],
32
35
  'server.request.body' => parse_body(payload, headers),
33
- 'server.request.path_params' => payload['path_params']
36
+ 'server.request.body.byte_length' => body_byte_length(payload),
37
+ 'server.request.path_params' => payload['path_params'],
38
+ 'http.client_ip' => extract_client_ip(payload['source_ip'], headers)
34
39
  }
35
40
 
36
41
  data.compact!
@@ -44,7 +49,9 @@ module Datadog
44
49
  data = {
45
50
  'server.response.status' => payload['status_code']&.to_s,
46
51
  'server.response.headers' => headers,
47
- 'server.response.headers.no_cookies' => headers.dup.tap { |h| h.delete('set-cookie') }
52
+ 'server.response.headers.no_cookies' => headers.dup.tap { |h| h.delete('set-cookie') },
53
+ 'server.response.body' => parse_body(payload, headers),
54
+ 'server.response.body.byte_length' => body_byte_length(payload)
48
55
  }
49
56
 
50
57
  data.compact!
@@ -94,7 +101,9 @@ module Datadog
94
101
  body = payload['body']
95
102
  return unless body
96
103
 
97
- body = Core::Utils::Base64Codec.strict_decode64(body) if payload['base64_encoded']
104
+ if (byte_length = body_byte_length(payload))
105
+ return if byte_length > Datadog.configuration.appsec.body_parsing_size_limit
106
+ end
98
107
 
99
108
  content_type = headers['content-type']
100
109
  return unless content_type
@@ -102,7 +111,31 @@ module Datadog
102
111
  media_type = AppSec::Utils::HTTP::MediaType.parse(content_type)
103
112
  return unless media_type
104
113
 
114
+ body = Core::Utils::Base64Codec.strict_decode64(body) if payload['base64_encoded']
105
115
  AppSec::Utils::HTTP::Body.parse(body, media_type: media_type)
116
+ rescue ArgumentError => e
117
+ AppSec.telemetry.report(e, description: 'AppSec: Failed to decode base64 body')
118
+
119
+ nil
120
+ end
121
+
122
+ def body_byte_length(payload)
123
+ body = payload['body']
124
+
125
+ return unless body
126
+ return body.bytesize unless payload['base64_encoded']
127
+
128
+ # NOTE: Base64 packs every 3 bytes into 4 characters and pads the last
129
+ # group with up to two "=" bytes. The decoded length is therefore
130
+ # derivable from the encoded length, letting us measure the raw
131
+ # body size without allocating the decoded string.
132
+ padding = 0
133
+ if body.getbyte(-1) == BASE64_PADDING_BYTE
134
+ padding = 1
135
+ padding = 2 if body.getbyte(-2) == BASE64_PADDING_BYTE
136
+ end
137
+
138
+ body.bytesize / BASE64_CHARS_PER_GROUP * BASE64_BYTES_PER_GROUP - padding
106
139
  end
107
140
  end
108
141
  end
@@ -50,38 +50,83 @@ module Datadog
50
50
  selected_operation = query.selected_operation
51
51
  next unless selected_operation
52
52
 
53
- arguments_from_selections(selected_operation.selections, query.variables, args_hash)
53
+ arguments_from_selections(selected_operation.selections, query.variables, args_hash, query.fragments)
54
54
  end
55
55
  end
56
56
 
57
- def arguments_from_selections(selections, query_variables, args_hash)
57
+ def arguments_from_selections(selections, query_variables, args_hash, fragments, visited_fragments = {})
58
58
  selections.each do |selection|
59
- # rubocop:disable Style/ClassEqualityComparison
60
- next unless selection.class.name == Integration::AST_NODE_CLASS_NAMES[:field]
61
- # rubocop:enable Style/ClassEqualityComparison
62
-
63
- selection_name = selection.alias || selection.name
64
-
65
- if !selection.arguments.empty? || !selection.directives.empty?
66
- args_hash[selection_name] ||= []
67
- args_hash[selection_name] <<
68
- arguments_hash(selection.arguments, query_variables).merge!(
69
- arguments_from_directives(selection.directives, query_variables)
70
- )
59
+ case selection
60
+ when ::GraphQL::Language::Nodes::FragmentSpread
61
+ fragment_name = selection.name
62
+ append_arguments(
63
+ args_hash, fragment_name, nil, arguments_from_directives(selection.directives, query_variables)
64
+ )
65
+
66
+ next if visited_fragments[fragment_name]
67
+
68
+ fragment = fragments[fragment_name]
69
+ next unless fragment
70
+
71
+ append_arguments(
72
+ args_hash, fragment_name, nil, arguments_from_directives(fragment.directives, query_variables)
73
+ )
74
+
75
+ visited_fragments[fragment_name] = true
76
+ arguments_from_selections(
77
+ fragment.selections, query_variables, args_hash, fragments, visited_fragments
78
+ )
79
+ visited_fragments.delete(fragment_name)
80
+ when ::GraphQL::Language::Nodes::Field
81
+ selection_name = selection.alias || selection.name
82
+ field_arguments = arguments_hash(selection.arguments, query_variables) unless selection.arguments.empty?
83
+ append_arguments(
84
+ args_hash,
85
+ selection_name,
86
+ field_arguments,
87
+ arguments_from_directives(selection.directives, query_variables)
88
+ )
89
+
90
+ arguments_from_selections(
91
+ selection.selections, query_variables, args_hash, fragments, visited_fragments
92
+ )
93
+ when ::GraphQL::Language::Nodes::InlineFragment
94
+ append_arguments(
95
+ args_hash, selection.type.name, nil, arguments_from_directives(selection.directives, query_variables)
96
+ )
97
+
98
+ arguments_from_selections(
99
+ selection.selections, query_variables, args_hash, fragments, visited_fragments
100
+ )
71
101
  end
102
+ end
103
+ end
72
104
 
73
- arguments_from_selections(selection.selections, query_variables, args_hash)
105
+ def append_arguments(args_hash, selection_name, arguments, directive_arguments)
106
+ combined_arguments = if arguments
107
+ arguments.merge!(directive_arguments) if directive_arguments
108
+ arguments
109
+ else
110
+ directive_arguments
74
111
  end
112
+ return unless combined_arguments
113
+
114
+ args_hash[selection_name] ||= []
115
+ args_hash[selection_name] << combined_arguments
75
116
  end
76
117
 
77
118
  def arguments_from_directives(directives, query_variables)
78
- directives.each_with_object({}) do |directive, args_hash|
79
- # rubocop:disable Style/ClassEqualityComparison
80
- next unless directive.class.name == Integration::AST_NODE_CLASS_NAMES[:directive]
81
- # rubocop:enable Style/ClassEqualityComparison
119
+ return if directives.empty?
120
+
121
+ directive_arguments = directives.each_with_object({}) do |directive, args_hash|
122
+ next unless directive.is_a?(::GraphQL::Language::Nodes::Directive)
82
123
 
83
124
  args_hash[directive.name] = arguments_hash(directive.arguments, query_variables)
84
125
  end
126
+
127
+ return if directive_arguments.empty?
128
+
129
+ directive_arguments
85
130
  end
86
131
 
87
132
  def arguments_hash(arguments, query_variables)
@@ -16,6 +16,7 @@ module Datadog
16
16
  AST_NODE_CLASS_NAMES = {
17
17
  field: 'GraphQL::Language::Nodes::Field',
18
18
  directive: 'GraphQL::Language::Nodes::Directive',
19
+ fragment_spread: 'GraphQL::Language::Nodes::FragmentSpread',
19
20
  variable_identifier: 'GraphQL::Language::Nodes::VariableIdentifier',
20
21
  input_object: 'GraphQL::Language::Nodes::InputObject',
21
22
  }.freeze
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Datadog
4
+ module AppSec
5
+ module Contrib
6
+ module Rack
7
+ # Wraps a `rack.input` stream with a buffer placed in front of it.
8
+ # Every read drains the buffer first, then continues from the stream
9
+ #
10
+ # NOTE: Forward-only: no rewind, no seek
11
+ #
12
+ # NOTE: Rack 3 dropped the rewind requirement from the input stream contract
13
+ # @see https://github.com/rack/rack/blob/v3.2.6/SPEC.rdoc
14
+ class BufferedInput
15
+ # NOTE: Rack's multipart parser reads in 1 MiB chunks, used to bound
16
+ # {#each} the same way
17
+ # @see https://github.com/rack/rack/blob/v3.2.6/lib/rack/multipart/parser.rb#L54
18
+ READ_BUFSIZE_BYTES = 1_048_576
19
+
20
+ def initialize(stream, buffer:)
21
+ @stream = stream
22
+ @buffer = buffer
23
+ end
24
+
25
+ def read(length = nil, outbuf = nil)
26
+ if length.nil?
27
+ data = @buffer.read(nil, outbuf) || +''
28
+ more = @stream.read
29
+
30
+ data << more if more
31
+
32
+ return data
33
+ end
34
+
35
+ data = @buffer.read(length, outbuf)
36
+
37
+ if data.nil?
38
+ more = @stream.read(length, outbuf)
39
+ return more if more && !more.empty?
40
+
41
+ # NOTE: Match `IO#read(length, outbuf)` at EOF. Return nil and clear
42
+ # the caller's buffer so stale bytes are not mistaken for data
43
+ outbuf&.clear
44
+ return
45
+ end
46
+
47
+ remaining = length - data.bytesize
48
+ return data if remaining <= 0
49
+
50
+ more = @stream.read(remaining)
51
+ data << more if more
52
+
53
+ data
54
+ end
55
+
56
+ def gets
57
+ line = @buffer.gets
58
+
59
+ return @stream.gets if line.nil?
60
+ return line if line.end_with?("\n")
61
+
62
+ more = @stream.gets
63
+ more ? (line << more) : line
64
+ end
65
+
66
+ def each
67
+ while (chunk = read(READ_BUFSIZE_BYTES))
68
+ yield chunk
69
+ end
70
+
71
+ self
72
+ end
73
+
74
+ def close
75
+ @buffer.close
76
+ ensure
77
+ @stream.close
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end