ddtrace 1.22.0 → 1.23.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -169,28 +169,51 @@ static const uint8_t all_value_types_positions[] =
169
169
 
170
170
  #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
171
171
 
172
+ // Struct for storing stats related to a profile in a particular slot.
173
+ // These stats will share the same lifetime as the data in that profile slot.
174
+ typedef struct slot_stats {
175
+ // How many individual samples were recorded into this slot (un-weighted)
176
+ uint64_t recorded_samples;
177
+ } stats_slot;
178
+
179
+ typedef struct profile_slot {
180
+ ddog_prof_Profile profile;
181
+ stats_slot stats;
182
+ } profile_slot;
183
+
172
184
  // Contains native state for each instance
173
185
  struct stack_recorder_state {
174
186
  // Heap recorder instance
175
187
  heap_recorder *heap_recorder;
176
188
 
177
- pthread_mutex_t slot_one_mutex;
178
- ddog_prof_Profile slot_one_profile;
179
-
180
- pthread_mutex_t slot_two_mutex;
181
- ddog_prof_Profile slot_two_profile;
189
+ pthread_mutex_t mutex_slot_one;
190
+ profile_slot profile_slot_one;
191
+ pthread_mutex_t mutex_slot_two;
192
+ profile_slot profile_slot_two;
182
193
 
183
194
  short active_slot; // MUST NEVER BE ACCESSED FROM record_sample; this is NOT for the sampler thread to use.
184
195
 
185
196
  uint8_t position_for[ALL_VALUE_TYPES_COUNT];
186
197
  uint8_t enabled_values_count;
198
+
199
+ // Struct for storing stats related to behaviour of a stack recorder instance during its entire lifetime.
200
+ struct lifetime_stats {
201
+ // How many profiles have we serialized successfully so far
202
+ uint64_t serialization_successes;
203
+ // How many profiles have we serialized unsuccessfully so far
204
+ uint64_t serialization_failures;
205
+ // Stats on profile serialization time
206
+ long serialization_time_ns_min;
207
+ long serialization_time_ns_max;
208
+ uint64_t serialization_time_ns_total;
209
+ } stats_lifetime;
187
210
  };
188
211
 
189
- // Used to return a pair of values from sampler_lock_active_profile()
190
- struct active_slot_pair {
212
+ // Used to group mutex and the corresponding profile slot for easy unlocking after work is done.
213
+ typedef struct locked_profile_slot {
191
214
  pthread_mutex_t *mutex;
192
- ddog_prof_Profile *profile;
193
- };
215
+ profile_slot *data;
216
+ } locked_profile_slot;
194
217
 
195
218
  struct call_serialize_without_gvl_arguments {
196
219
  // Set by caller
@@ -198,8 +221,10 @@ struct call_serialize_without_gvl_arguments {
198
221
  ddog_Timespec finish_timestamp;
199
222
 
200
223
  // Set by callee
201
- ddog_prof_Profile *profile;
224
+ profile_slot *slot;
202
225
  ddog_prof_Profile_SerializeResult result;
226
+ long heap_profile_build_time_ns;
227
+ long serialize_no_gvl_time_ns;
203
228
 
204
229
  // Set by both
205
230
  bool serialize_ran;
@@ -222,9 +247,9 @@ static VALUE _native_initialize(
222
247
  static VALUE _native_serialize(VALUE self, VALUE recorder_instance);
223
248
  static VALUE ruby_time_from(ddog_Timespec ddprof_time);
224
249
  static void *call_serialize_without_gvl(void *call_args);
225
- static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state);
226
- static void sampler_unlock_active_profile(struct active_slot_pair active_slot);
227
- static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state);
250
+ static locked_profile_slot sampler_lock_active_profile(struct stack_recorder_state *state);
251
+ static void sampler_unlock_active_profile(locked_profile_slot active_slot);
252
+ static profile_slot* serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state);
228
253
  static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
229
254
  static VALUE _native_is_slot_one_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
230
255
  static VALUE _native_is_slot_two_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
@@ -233,7 +258,7 @@ static ddog_Timespec system_epoch_now_timespec(void);
233
258
  static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_instance);
234
259
  static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
235
260
  static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
236
- static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
261
+ static void reset_profile_slot(profile_slot *slot, ddog_Timespec *start_time /* Can be null */);
237
262
  static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class);
238
263
  static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations);
239
264
  static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
@@ -241,6 +266,8 @@ static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self
241
266
  static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
242
267
  static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj);
243
268
  static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj);
269
+ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
270
+ static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns);
244
271
 
245
272
 
246
273
  void stack_recorder_init(VALUE profiling_module) {
@@ -261,6 +288,7 @@ void stack_recorder_init(VALUE profiling_module) {
261
288
  rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 7);
262
289
  rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1);
263
290
  rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
291
+ rb_define_singleton_method(stack_recorder_class, "_native_stats", _native_stats, 1);
264
292
  rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1);
265
293
  rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
266
294
  rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
@@ -305,6 +333,9 @@ static VALUE _native_new(VALUE klass) {
305
333
  initialize_slot_concurrency_control(state);
306
334
  for (uint8_t i = 0; i < ALL_VALUE_TYPES_COUNT; i++) { state->position_for[i] = all_value_types_positions[i]; }
307
335
  state->enabled_values_count = ALL_VALUE_TYPES_COUNT;
336
+ state->stats_lifetime = (struct lifetime_stats) {
337
+ .serialization_time_ns_min = INT64_MAX,
338
+ };
308
339
 
309
340
  // Note: At this point, slot_one_profile and slot_two_profile contain null pointers. Libdatadog validates pointers
310
341
  // before using them so it's ok for us to go ahead and create the StackRecorder object.
@@ -325,11 +356,11 @@ static VALUE _native_new(VALUE klass) {
325
356
  }
326
357
 
327
358
  static void initialize_slot_concurrency_control(struct stack_recorder_state *state) {
328
- state->slot_one_mutex = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
329
- state->slot_two_mutex = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
359
+ state->mutex_slot_one = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
360
+ state->mutex_slot_two = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
330
361
 
331
362
  // A newly-created StackRecorder starts with slot one being active for samples, so let's lock slot two
332
- ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&state->slot_two_mutex));
363
+ ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&state->mutex_slot_two));
333
364
 
334
365
  state->active_slot = 1;
335
366
  }
@@ -352,18 +383,22 @@ static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Sl
352
383
  rb_raise(rb_eRuntimeError, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err));
353
384
  }
354
385
 
355
- state->slot_one_profile = slot_one_profile_result.ok;
356
- state->slot_two_profile = slot_two_profile_result.ok;
386
+ state->profile_slot_one = (profile_slot) {
387
+ .profile = slot_one_profile_result.ok,
388
+ };
389
+ state->profile_slot_two = (profile_slot) {
390
+ .profile = slot_two_profile_result.ok,
391
+ };
357
392
  }
358
393
 
359
394
  static void stack_recorder_typed_data_free(void *state_ptr) {
360
395
  struct stack_recorder_state *state = (struct stack_recorder_state *) state_ptr;
361
396
 
362
- pthread_mutex_destroy(&state->slot_one_mutex);
363
- ddog_prof_Profile_drop(&state->slot_one_profile);
397
+ pthread_mutex_destroy(&state->mutex_slot_one);
398
+ ddog_prof_Profile_drop(&state->profile_slot_one.profile);
364
399
 
365
- pthread_mutex_destroy(&state->slot_two_mutex);
366
- ddog_prof_Profile_drop(&state->slot_two_profile);
400
+ pthread_mutex_destroy(&state->mutex_slot_two);
401
+ ddog_prof_Profile_drop(&state->profile_slot_two.profile);
367
402
 
368
403
  heap_recorder_free(state->heap_recorder);
369
404
 
@@ -462,8 +497,8 @@ static VALUE _native_initialize(
462
497
  state->position_for[TIMELINE_VALUE_ID] = next_disabled_pos++;
463
498
  }
464
499
 
465
- ddog_prof_Profile_drop(&state->slot_one_profile);
466
- ddog_prof_Profile_drop(&state->slot_two_profile);
500
+ ddog_prof_Profile_drop(&state->profile_slot_one.profile);
501
+ ddog_prof_Profile_drop(&state->profile_slot_two.profile);
467
502
 
468
503
  ddog_prof_Slice_ValueType sample_types = {.ptr = enabled_value_types, .len = state->enabled_values_count};
469
504
  initialize_profiles(state, sample_types);
@@ -479,9 +514,11 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
479
514
  // Need to do this while still holding on to the Global VM Lock; see comments on method for why
480
515
  serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);
481
516
 
517
+ long heap_iteration_prep_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
482
518
  // Prepare the iteration on heap recorder we'll be doing outside the GVL. The preparation needs to
483
519
  // happen while holding on to the GVL.
484
520
  heap_recorder_prepare_iteration(state->heap_recorder);
521
+ long heap_iteration_prep_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - heap_iteration_prep_start_time_ns;
485
522
 
486
523
  // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this
487
524
  // is pending
@@ -508,12 +545,27 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
508
545
  // Cleanup after heap recorder iteration. This needs to happen while holding on to the GVL.
509
546
  heap_recorder_finish_iteration(state->heap_recorder);
510
547
 
548
+ // NOTE: We are focusing on the serialization time outside of the GVL in this stat here. This doesn't
549
+ // really cover the full serialization process but it gives a more useful number since it bypasses
550
+ // the noise of acquiring GVLs and dealing with interruptions which is highly specific to runtime
551
+ // conditions and over which we really have no control about.
552
+ long serialization_time_ns = args.serialize_no_gvl_time_ns;
553
+ if (serialization_time_ns >= 0) {
554
+ // Only update stats if our serialization time is valid.
555
+ state->stats_lifetime.serialization_time_ns_max = long_max_of(state->stats_lifetime.serialization_time_ns_max, serialization_time_ns);
556
+ state->stats_lifetime.serialization_time_ns_min = long_min_of(state->stats_lifetime.serialization_time_ns_min, serialization_time_ns);
557
+ state->stats_lifetime.serialization_time_ns_total += serialization_time_ns;
558
+ }
559
+
511
560
  ddog_prof_Profile_SerializeResult serialized_profile = args.result;
512
561
 
513
562
  if (serialized_profile.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) {
563
+ state->stats_lifetime.serialization_failures++;
514
564
  return rb_ary_new_from_args(2, error_symbol, get_error_details_and_drop(&serialized_profile.err));
515
565
  }
516
566
 
567
+ state->stats_lifetime.serialization_successes++;
568
+
517
569
  VALUE encoded_pprof = ruby_string_from_vec_u8(serialized_profile.ok.buffer);
518
570
 
519
571
  ddog_Timespec ddprof_start = serialized_profile.ok.start;
@@ -523,8 +575,9 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
523
575
 
524
576
  VALUE start = ruby_time_from(ddprof_start);
525
577
  VALUE finish = ruby_time_from(ddprof_finish);
578
+ VALUE profile_stats = build_profile_stats(args.slot, serialization_time_ns, heap_iteration_prep_time_ns, args.heap_profile_build_time_ns);
526
579
 
527
- return rb_ary_new_from_args(2, ok_symbol, rb_ary_new_from_args(3, start, finish, encoded_pprof));
580
+ return rb_ary_new_from_args(2, ok_symbol, rb_ary_new_from_args(4, start, finish, encoded_pprof, profile_stats));
528
581
  }
529
582
 
530
583
  static VALUE ruby_time_from(ddog_Timespec ddprof_time) {
@@ -537,7 +590,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
537
590
  struct stack_recorder_state *state;
538
591
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
539
592
 
540
- struct active_slot_pair active_slot = sampler_lock_active_profile(state);
593
+ locked_profile_slot active_slot = sampler_lock_active_profile(state);
541
594
 
542
595
  // Note: We initialize this array to have ALL_VALUE_TYPES_COUNT but only tell libdatadog to use the first
543
596
  // state->enabled_values_count values. This simplifies handling disabled value types -- we still put them on the
@@ -561,7 +614,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
561
614
  }
562
615
 
563
616
  ddog_prof_Profile_Result result = ddog_prof_Profile_add(
564
- active_slot.profile,
617
+ &active_slot.data->profile,
565
618
  (ddog_prof_Sample) {
566
619
  .locations = locations,
567
620
  .values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count},
@@ -570,6 +623,8 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
570
623
  labels.end_timestamp_ns
571
624
  );
572
625
 
626
+ active_slot.data->stats.recorded_samples++;
627
+
573
628
  sampler_unlock_active_profile(active_slot);
574
629
 
575
630
  if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
@@ -590,9 +645,9 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
590
645
  struct stack_recorder_state *state;
591
646
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
592
647
 
593
- struct active_slot_pair active_slot = sampler_lock_active_profile(state);
648
+ locked_profile_slot active_slot = sampler_lock_active_profile(state);
594
649
 
595
- ddog_prof_Profile_Result result = ddog_prof_Profile_set_endpoint(active_slot.profile, local_root_span_id, endpoint);
650
+ ddog_prof_Profile_Result result = ddog_prof_Profile_set_endpoint(&active_slot.data->profile, local_root_span_id, endpoint);
596
651
 
597
652
  sampler_unlock_active_profile(active_slot);
598
653
 
@@ -607,7 +662,7 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
607
662
  // during iteration of heap recorder live objects.
608
663
  typedef struct heap_recorder_iteration_context {
609
664
  struct stack_recorder_state *state;
610
- ddog_prof_Profile *profile;
665
+ profile_slot *slot;
611
666
 
612
667
  bool error;
613
668
  char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];
@@ -643,7 +698,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
643
698
  };
644
699
 
645
700
  ddog_prof_Profile_Result result = ddog_prof_Profile_add(
646
- context->profile,
701
+ &context->slot->profile,
647
702
  (ddog_prof_Sample) {
648
703
  .locations = iteration_data.locations,
649
704
  .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
@@ -655,6 +710,8 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
655
710
  0
656
711
  );
657
712
 
713
+ context->slot->stats.recorded_samples++;
714
+
658
715
  if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
659
716
  read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG);
660
717
  context->error = true;
@@ -666,10 +723,10 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
666
723
  return true;
667
724
  }
668
725
 
669
- static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) {
726
+ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, profile_slot *slot) {
670
727
  heap_recorder_iteration_context iteration_context = {
671
728
  .state = state,
672
- .profile = profile,
729
+ .slot = slot,
673
730
  .error = false,
674
731
  .error_msg = {0},
675
732
  };
@@ -689,15 +746,21 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d
689
746
  static void *call_serialize_without_gvl(void *call_args) {
690
747
  struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args;
691
748
 
692
- args->profile = serializer_flip_active_and_inactive_slots(args->state);
749
+ long serialize_no_gvl_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
750
+
751
+ profile_slot *slot_now_inactive = serializer_flip_active_and_inactive_slots(args->state);
752
+
753
+ args->slot = slot_now_inactive;
693
754
 
694
755
  // Now that we have the inactive profile with all but heap samples, lets fill it with heap data
695
756
  // without needing to race with the active sampler
696
- build_heap_profile_without_gvl(args->state, args->profile);
757
+ build_heap_profile_without_gvl(args->state, args->slot);
758
+ args->heap_profile_build_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - serialize_no_gvl_start_time_ns;
697
759
 
698
760
  // Note: The profile gets reset by the serialize call
699
- args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
761
+ args->result = ddog_prof_Profile_serialize(&args->slot->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
700
762
  args->serialize_ran = true;
763
+ args->serialize_no_gvl_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - serialize_no_gvl_start_time_ns;
701
764
 
702
765
  return NULL; // Unused
703
766
  }
@@ -707,42 +770,42 @@ VALUE enforce_recorder_instance(VALUE object) {
707
770
  return object;
708
771
  }
709
772
 
710
- static struct active_slot_pair sampler_lock_active_profile(struct stack_recorder_state *state) {
773
+ static locked_profile_slot sampler_lock_active_profile(struct stack_recorder_state *state) {
711
774
  int error;
712
775
 
713
776
  for (int attempts = 0; attempts < 2; attempts++) {
714
- error = pthread_mutex_trylock(&state->slot_one_mutex);
777
+ error = pthread_mutex_trylock(&state->mutex_slot_one);
715
778
  if (error && error != EBUSY) ENFORCE_SUCCESS_GVL(error);
716
779
 
717
780
  // Slot one is active
718
- if (!error) return (struct active_slot_pair) {.mutex = &state->slot_one_mutex, .profile = &state->slot_one_profile};
781
+ if (!error) return (locked_profile_slot) {.mutex = &state->mutex_slot_one, .data = &state->profile_slot_one};
719
782
 
720
783
  // If we got here, slot one was not active, let's try slot two
721
784
 
722
- error = pthread_mutex_trylock(&state->slot_two_mutex);
785
+ error = pthread_mutex_trylock(&state->mutex_slot_two);
723
786
  if (error && error != EBUSY) ENFORCE_SUCCESS_GVL(error);
724
787
 
725
788
  // Slot two is active
726
- if (!error) return (struct active_slot_pair) {.mutex = &state->slot_two_mutex, .profile = &state->slot_two_profile};
789
+ if (!error) return (locked_profile_slot) {.mutex = &state->mutex_slot_two, .data = &state->profile_slot_two};
727
790
  }
728
791
 
729
792
  // We already tried both multiple times, and we did not succeed. This is not expected to happen. Let's stop sampling.
730
793
  rb_raise(rb_eRuntimeError, "Failed to grab either mutex in sampler_lock_active_profile");
731
794
  }
732
795
 
733
- static void sampler_unlock_active_profile(struct active_slot_pair active_slot) {
796
+ static void sampler_unlock_active_profile(locked_profile_slot active_slot) {
734
797
  ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(active_slot.mutex));
735
798
  }
736
799
 
737
- static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state) {
800
+ static profile_slot* serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state) {
738
801
  int previously_active_slot = state->active_slot;
739
802
 
740
803
  if (previously_active_slot != 1 && previously_active_slot != 2) {
741
804
  grab_gvl_and_raise(rb_eRuntimeError, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot);
742
805
  }
743
806
 
744
- pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->slot_one_mutex : &state->slot_two_mutex;
745
- pthread_mutex_t *previously_inactive = (previously_active_slot == 1) ? &state->slot_two_mutex : &state->slot_one_mutex;
807
+ pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two;
808
+ pthread_mutex_t *previously_inactive = (previously_active_slot == 1) ? &state->mutex_slot_two : &state->mutex_slot_one;
746
809
 
747
810
  // Release the lock, thus making this slot active
748
811
  ENFORCE_SUCCESS_NO_GVL(pthread_mutex_unlock(previously_inactive));
@@ -753,8 +816,8 @@ static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack
753
816
  // Update active_slot
754
817
  state->active_slot = (previously_active_slot == 1) ? 2 : 1;
755
818
 
756
- // Return profile for previously active slot (now inactive)
757
- return (previously_active_slot == 1) ? &state->slot_one_profile : &state->slot_two_profile;
819
+ // Return pointer to previously active slot (now inactive)
820
+ return (previously_active_slot == 1) ? &state->profile_slot_one : &state->profile_slot_two;
758
821
  }
759
822
 
760
823
  // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
@@ -778,7 +841,7 @@ static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot) {
778
841
  struct stack_recorder_state *state;
779
842
  TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
780
843
 
781
- pthread_mutex_t *slot_mutex = (slot == 1) ? &state->slot_one_mutex : &state->slot_two_mutex;
844
+ pthread_mutex_t *slot_mutex = (slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two;
782
845
 
783
846
  // Like Heisenberg's uncertainty principle, we can't observe without affecting...
784
847
  int error = pthread_mutex_trylock(slot_mutex);
@@ -813,8 +876,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
813
876
  // resulting state is inconsistent, we make sure to reset it back to the initial state.
814
877
  initialize_slot_concurrency_control(state);
815
878
 
816
- reset_profile(&state->slot_one_profile, /* start_time: */ NULL);
817
- reset_profile(&state->slot_two_profile, /* start_time: */ NULL);
879
+ reset_profile_slot(&state->profile_slot_one, /* start_time: */ NULL);
880
+ reset_profile_slot(&state->profile_slot_two, /* start_time: */ NULL);
818
881
 
819
882
  heap_recorder_after_fork(state->heap_recorder);
820
883
 
@@ -825,8 +888,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
825
888
  // not be interrupted part-way through by a VM fork.
826
889
  static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time) {
827
890
  // Before making this profile active, we reset it so that it uses the correct start_time for its start
828
- ddog_prof_Profile *next_profile = (state->active_slot == 1) ? &state->slot_two_profile : &state->slot_one_profile;
829
- reset_profile(next_profile, &start_time);
891
+ profile_slot *next_profile_slot = (state->active_slot == 1) ? &state->profile_slot_two : &state->profile_slot_one;
892
+ reset_profile_slot(next_profile_slot, &start_time);
830
893
  }
831
894
 
832
895
  static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint) {
@@ -872,11 +935,12 @@ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locatio
872
935
  return Qnil;
873
936
  }
874
937
 
875
- static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */) {
876
- ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time);
938
+ static void reset_profile_slot(profile_slot *slot, ddog_Timespec *start_time /* Can be null */) {
939
+ ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&slot->profile, start_time);
877
940
  if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
878
941
  rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err));
879
942
  }
943
+ slot->stats = (stats_slot) {};
880
944
  }
881
945
 
882
946
  // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
@@ -937,3 +1001,40 @@ static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) {
937
1001
  return Qfalse;
938
1002
  #endif
939
1003
  }
1004
+
1005
+ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) {
1006
+ struct stack_recorder_state *state;
1007
+ TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
1008
+
1009
+ uint64_t total_serializations = state->stats_lifetime.serialization_successes + state->stats_lifetime.serialization_failures;
1010
+
1011
+ VALUE heap_recorder_snapshot = state->heap_recorder ?
1012
+ heap_recorder_state_snapshot(state->heap_recorder) : Qnil;
1013
+
1014
+ VALUE stats_as_hash = rb_hash_new();
1015
+ VALUE arguments[] = {
1016
+ ID2SYM(rb_intern("serialization_successes")), /* => */ ULL2NUM(state->stats_lifetime.serialization_successes),
1017
+ ID2SYM(rb_intern("serialization_failures")), /* => */ ULL2NUM(state->stats_lifetime.serialization_failures),
1018
+
1019
+ ID2SYM(rb_intern("serialization_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_min, != INT64_MAX, LONG2NUM),
1020
+ ID2SYM(rb_intern("serialization_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_max, > 0, LONG2NUM),
1021
+ ID2SYM(rb_intern("serialization_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_total, > 0, LONG2NUM),
1022
+ ID2SYM(rb_intern("serialization_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats_lifetime.serialization_time_ns_total, total_serializations),
1023
+
1024
+ ID2SYM(rb_intern("heap_recorder_snapshot")), /* => */ heap_recorder_snapshot,
1025
+ };
1026
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
1027
+ return stats_as_hash;
1028
+ }
1029
+
1030
+ static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns) {
1031
+ VALUE stats_as_hash = rb_hash_new();
1032
+ VALUE arguments[] = {
1033
+ ID2SYM(rb_intern("recorded_samples")), /* => */ ULL2NUM(slot->stats.recorded_samples),
1034
+ ID2SYM(rb_intern("serialization_time_ns")), /* => */ LONG2NUM(serialization_time_ns),
1035
+ ID2SYM(rb_intern("heap_iteration_prep_time_ns")), /* => */ LONG2NUM(heap_iteration_prep_time_ns),
1036
+ ID2SYM(rb_intern("heap_profile_build_time_ns")), /* => */ LONG2NUM(heap_profile_build_time_ns),
1037
+ };
1038
+ for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
1039
+ return stats_as_hash;
1040
+ }
@@ -13,12 +13,16 @@ module Datadog
13
13
  SIGNUP_EVENT = 'users.signup'
14
14
 
15
15
  def self.track_login_success(trace, span, user_id:, **others)
16
+ return if trace.nil? || span.nil?
17
+
16
18
  track(LOGIN_SUCCESS_EVENT, trace, span, **others)
17
19
 
18
20
  Kit::Identity.set_user(trace, span, id: user_id.to_s, **others) if user_id
19
21
  end
20
22
 
21
23
  def self.track_login_failure(trace, span, user_id:, user_exists:, **others)
24
+ return if trace.nil? || span.nil?
25
+
22
26
  track(LOGIN_FAILURE_EVENT, trace, span, **others)
23
27
 
24
28
  span.set_tag('appsec.events.users.login.failure.usr.id', user_id) if user_id
@@ -26,11 +30,15 @@ module Datadog
26
30
  end
27
31
 
28
32
  def self.track_signup(trace, span, user_id:, **others)
33
+ return if trace.nil? || span.nil?
34
+
29
35
  track(SIGNUP_EVENT, trace, span, **others)
30
36
  Kit::Identity.set_user(trace, id: user_id.to_s, **others) if user_id
31
37
  end
32
38
 
33
39
  def self.track(event, trace, span, **others)
40
+ return if trace.nil? || span.nil?
41
+
34
42
  span.set_tag("appsec.events.#{event}.track", 'true')
35
43
  span.set_tag("_dd.appsec.events.#{event}.auto.mode", Datadog.configuration.appsec.track_user_events.mode)
36
44
 
@@ -4,7 +4,7 @@ require_relative '../diagnostics/environment_logger'
4
4
  require_relative '../diagnostics/health'
5
5
  require_relative '../logger'
6
6
  require_relative '../runtime/metrics'
7
- require_relative '../telemetry/client'
7
+ require_relative '../telemetry/component'
8
8
  require_relative '../workers/runtime_metrics'
9
9
 
10
10
  require_relative '../remote/component'
@@ -60,7 +60,7 @@ module Datadog
60
60
  logger.debug { "Telemetry disabled. Agent network adapter not supported: #{agent_settings.adapter}" }
61
61
  end
62
62
 
63
- Telemetry::Client.new(
63
+ Telemetry::Component.new(
64
64
  enabled: enabled,
65
65
  heartbeat_interval_seconds: settings.telemetry.heartbeat_interval_seconds,
66
66
  dependency_collection: settings.telemetry.dependency_collection
@@ -165,8 +165,9 @@ module Datadog
165
165
  unused_statsd = (old_statsd - (old_statsd & new_statsd))
166
166
  unused_statsd.each(&:close)
167
167
 
168
- telemetry.stop!
168
+ # enqueue closing event before stopping telemetry so it will be send out on shutdown
169
169
  telemetry.emit_closing! unless replacement
170
+ telemetry.stop!
170
171
  end
171
172
  end
172
173
  end
@@ -81,23 +81,16 @@ module Datadog
81
81
  configuration = self.configuration
82
82
  yield(configuration)
83
83
 
84
- built_components = false
85
-
86
- components = safely_synchronize do |write_components|
84
+ safely_synchronize do |write_components|
87
85
  write_components.call(
88
86
  if components?
89
87
  replace_components!(configuration, @components)
90
88
  else
91
- components = build_components(configuration)
92
- built_components = true
93
- components
89
+ build_components(configuration)
94
90
  end
95
91
  )
96
92
  end
97
93
 
98
- # Should only be called the first time components are built
99
- components.telemetry.started! if built_components
100
-
101
94
  configuration
102
95
  end
103
96
 
@@ -197,20 +190,13 @@ module Datadog
197
190
  current_components = COMPONENTS_READ_LOCK.synchronize { defined?(@components) && @components }
198
191
  return current_components if current_components || !allow_initialization
199
192
 
200
- built_components = false
201
-
202
- components = safely_synchronize do |write_components|
193
+ safely_synchronize do |write_components|
203
194
  if defined?(@components) && @components
204
195
  @components
205
196
  else
206
- built_components = true
207
197
  write_components.call(build_components(configuration))
208
198
  end
209
199
  end
210
-
211
- # Should only be called the first time components are built
212
- components.telemetry.started! if built_components && components && components.telemetry
213
- components
214
200
  end
215
201
 
216
202
  private
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'emitter'
4
+ require_relative 'event'
5
+ require_relative 'worker'
6
+ require_relative '../utils/forking'
7
+
8
+ module Datadog
9
+ module Core
10
+ module Telemetry
11
+ # Telemetry entrypoint, coordinates sending telemetry events at various points in app lifecycle.
12
+ class Component
13
+ attr_reader :enabled
14
+
15
+ include Core::Utils::Forking
16
+
17
+ # @param enabled [Boolean] Determines whether telemetry events should be sent to the API
18
+ # @param heartbeat_interval_seconds [Float] How frequently heartbeats will be reported, in seconds.
19
+ # @param [Boolean] dependency_collection Whether to send the `app-dependencies-loaded` event
20
+ def initialize(heartbeat_interval_seconds:, dependency_collection:, enabled: true)
21
+ @enabled = enabled
22
+ @stopped = false
23
+
24
+ @worker = Telemetry::Worker.new(
25
+ enabled: @enabled,
26
+ heartbeat_interval_seconds: heartbeat_interval_seconds,
27
+ emitter: Emitter.new,
28
+ dependency_collection: dependency_collection
29
+ )
30
+ @worker.start
31
+ end
32
+
33
+ def disable!
34
+ @enabled = false
35
+ @worker.enabled = false
36
+ end
37
+
38
+ def stop!
39
+ return if @stopped
40
+
41
+ @worker.stop(true)
42
+ @stopped = true
43
+ end
44
+
45
+ def emit_closing!
46
+ return if !@enabled || forked?
47
+
48
+ @worker.enqueue(Event::AppClosing.new)
49
+ end
50
+
51
+ def integrations_change!
52
+ return if !@enabled || forked?
53
+
54
+ @worker.enqueue(Event::AppIntegrationsChange.new)
55
+ end
56
+
57
+ # Report configuration changes caused by Remote Configuration.
58
+ def client_configuration_change!(changes)
59
+ return if !@enabled || forked?
60
+
61
+ @worker.enqueue(Event::AppClientConfigurationChange.new(changes, 'remote_config'))
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -24,7 +24,7 @@ module Datadog
24
24
  seq_id = self.class.sequence.next
25
25
  payload = Request.build_payload(event, seq_id)
26
26
  res = @http_transport.request(request_type: event.type, payload: payload.to_json)
27
- Datadog.logger.debug { "Telemetry sent for event `#{event.type}` (status code: #{res.code})" }
27
+ Datadog.logger.debug { "Telemetry sent for event `#{event.type}` (code: #{res.code.inspect})" }
28
28
  res
29
29
  rescue => e
30
30
  Datadog.logger.debug("Unable to send telemetry request for event `#{event.type rescue 'unknown'}`: #{e}")